]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/bridge.c
xnu-1228.5.18.tar.gz
[apple/xnu.git] / bsd / net / bridge.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1998 Luigi Rizzo
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50 * SUCH DAMAGE.
51 *
52 * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $
53 */
54
55 /*
56 * This code implements bridging in FreeBSD. It only acts on ethernet
57 * type of interfaces (others are still usable for routing).
58 * A bridging table holds the source MAC address/dest. interface for each
59 * known node. The table is indexed using an hash of the source address.
60 *
61 * Input packets are tapped near the beginning of ether_input(), and
62 * analysed by calling bridge_in(). Depending on the result, the packet
63 * can be forwarded to one or more output interfaces using bdg_forward(),
64 * and/or sent to the upper layer (e.g. in case of multicast).
65 *
66 * Output packets are intercepted near the end of ether_output(),
67 * the correct destination is selected calling bridge_dst_lookup(),
68 * and then forwarding is done using bdg_forward().
69 * Bridging is controlled by the sysctl variable net.link.ether.bridge
70 *
71 * The arp code is also modified to let a machine answer to requests
72 * irrespective of the port the request came from.
73 *
74 * In case of loops in the bridging topology, the bridge detects this
75 * event and temporarily mutes output bridging on one of the ports.
76 * Periodically, interfaces are unmuted by bdg_timeout().
77 * Muting is only implemented as a safety measure, and also as
78 * a mechanism to support a user-space implementation of the spanning
79 * tree algorithm. In the final release, unmuting will only occur
80 * because of explicit action of the user-level daemon.
81 *
82 * To build a bridging kernel, use the following option
83 * option BRIDGE
84 * and then at runtime set the sysctl variable to enable bridging.
85 *
86 * Only one interface is supposed to have addresses set (but
87 * there are no problems in practice if you set addresses for more
88 * than one interface).
89 * Bridging will act before routing, but nothing prevents a machine
90 * from doing both (modulo bugs in the implementation...).
91 *
92 * THINGS TO REMEMBER
93 * - bridging is incompatible with multicast routing on the same
94 * machine. There is not an easy fix to this.
95 * - loop detection is still not very robust.
96 * - the interface of bdg_forward() could be improved.
97 */
98
99 #include <sys/param.h>
100 #include <sys/mbuf.h>
101 #include <sys/malloc.h>
102 #include <sys/systm.h>
103 #include <sys/socket.h> /* for net/if.h */
104 #include <sys/kernel.h>
105 #include <sys/sysctl.h>
106
107 #include <net/if.h>
108 #include <net/if_types.h>
109
110 #include <netinet/in.h> /* for struct arpcom */
111 #include <netinet/in_systm.h>
112 #include <netinet/in_var.h>
113 #include <netinet/ip.h>
114 #include <netinet/if_ether.h> /* for struct arpcom */
115
116 #include "opt_ipfw.h"
117 #include "opt_ipdn.h"
118
119 #if defined(IPFIREWALL)
120 #include <net/route.h>
121 #include <netinet/ip_fw.h>
122 #if defined(DUMMYNET)
123 #include <netinet/ip_dummynet.h>
124 #endif
125 #endif
126
127 #include <net/bridge.h>
128
129 /*
130 * For debugging, you can use the following macros.
131 * remember, rdtsc() only works on Pentium-class machines
132
133 quad_t ticks;
134 DDB(ticks = rdtsc();)
135 ... interesting code ...
136 DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;)
137
138 *
139 */
140
141 #define DDB(x) x
142 #define DEB(x)
143
144 static void bdginit(void *);
145 static void bdgtakeifaces(void);
146 static void flush_table(void);
147 static void bdg_promisc_on(void);
148 static void parse_bdg_cfg(void);
149
150 static int bdg_ipfw = 0 ;
151 int do_bridge = 0;
152 bdg_hash_table *bdg_table = NULL ;
153
154 /*
155 * System initialization
156 */
157
158 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL)
159
160 static struct bdg_stats bdg_stats ;
161 struct bdg_softc *ifp2sc = NULL ;
162 /* XXX make it static of size BDG_MAX_PORTS */
163
164 #define IFP_CHK(ifp, x) \
165 if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; }
166
167 /*
168 * turn off promisc mode, optionally clear the IFF_USED flag.
169 * The flag is turned on by parse_bdg_config
170 */
171 static void
172 bdg_promisc_off(int clear_used)
173 {
174 struct ifnet *ifp ;
175 ifnet_head_lock_shared();
176 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
177 if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
178 int s, ret ;
179 s = splimp();
180 ret = ifnet_set_promiscuous(ifp, 0);
181 splx(s);
182 ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ;
183 DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n",
184 ifp->if_name, ifp->if_unit,
185 ifp->if_flags, ifp2sc[ifp->if_index].flags);)
186 }
187 if (clear_used) {
188 ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ;
189 bdg_stats.s[ifp->if_index].name[0] = '\0';
190 }
191 }
192 ifnet_head_done();
193 }
194
195 /*
196 * set promisc mode on the interfaces we use.
197 */
198 static void
199 bdg_promisc_on()
200 {
201 struct ifnet *ifp ;
202 int s ;
203
204 ifnet_head_lock_shared();
205 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
206 if ( !BDG_USED(ifp) )
207 continue ;
208 if ( 0 == ( ifp->if_flags & IFF_UP) ) {
209 s = splimp();
210 if_up(ifp);
211 splx(s);
212 }
213 if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
214 int ret ;
215 s = splimp();
216 ret = ifnet_set_promiscuous(ifp, 1);
217 splx(s);
218 ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ;
219 printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n",
220 ifp->if_name, ifp->if_unit,
221 ifp->if_flags, ifp2sc[ifp->if_index].flags);
222 }
223 if (BDG_MUTED(ifp)) {
224 printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);
225 BDG_UNMUTE(ifp) ;
226 }
227 }
228 ifnet_head_done();
229 }
230
231 static int
232 sysctl_bdg(SYSCTL_HANDLER_ARGS)
233 {
234 int error, oldval = do_bridge ;
235
236 error = sysctl_handle_int(oidp,
237 oidp->oid_arg1, oidp->oid_arg2, req);
238 DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
239 oidp->oid_name, oidp->oid_arg2,
240 oldval, do_bridge); )
241
242 if (bdg_table == NULL)
243 do_bridge = 0 ;
244 if (oldval != do_bridge) {
245 bdg_promisc_off( 1 ); /* reset previously used interfaces */
246 flush_table();
247 if (do_bridge) {
248 parse_bdg_cfg();
249 bdg_promisc_on();
250 }
251 }
252 return error ;
253 }
254
255 static char bridge_cfg[256] = { "" } ;
256
257 /*
258 * parse the config string, set IFF_USED, name and cluster_id
259 * for all interfaces found.
260 */
261 static void
262 parse_bdg_cfg()
263 {
264 char *p, *beg ;
265 int i, l, cluster;
266 struct bdg_softc *b;
267
268 for (p= bridge_cfg; *p ; p++) {
269 /* interface names begin with [a-z] and continue up to ':' */
270 if (*p < 'a' || *p > 'z')
271 continue ;
272 for ( beg = p ; *p && *p != ':' ; p++ )
273 ;
274 if (*p == 0) /* end of string, ':' not found */
275 return ;
276 l = p - beg ; /* length of name string */
277 p++ ;
278 DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);)
279 for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++)
280 cluster = cluster*10 + (*p -'0');
281 /*
282 * now search in bridge strings
283 */
284 for (i=0, b = ifp2sc ; i < if_index ; i++, b++) {
285 char buf[32];
286 struct ifnet *ifp = b->ifp ;
287
288 if (ifp == NULL)
289 continue;
290 sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit);
291 if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */
292 b->cluster_id = htons(cluster) ;
293 b->flags |= IFF_USED ;
294 sprintf(bdg_stats.s[ifp->if_index].name,
295 "%s%d:%d", ifp->if_name, ifp->if_unit, cluster);
296
297 DEB(printf("--++ found %s\n",
298 bdg_stats.s[ifp->if_index].name);)
299 break ;
300 }
301 }
302 if (*p == '\0')
303 break ;
304 }
305 }
306
307 static int
308 sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS)
309 {
310 int error = 0 ;
311 char oldval[256] ;
312
313 strlcpy(oldval, bridge_cfg, sizeof (oldval));
314
315 error = sysctl_handle_string(oidp,
316 bridge_cfg, oidp->oid_arg2, req);
317 DEB(
318 printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n",
319 oidp->oid_name, oidp->oid_arg2,
320 error,
321 oldval, bridge_cfg);
322 )
323 if (strcmp(oldval, bridge_cfg)) {
324 bdg_promisc_off( 1 ); /* reset previously-used interfaces */
325 flush_table();
326 parse_bdg_cfg(); /* and set new ones... */
327 if (do_bridge)
328 bdg_promisc_on(); /* re-enable interfaces */
329 }
330 return error ;
331 }
332
333 static int
334 sysctl_refresh(SYSCTL_HANDLER_ARGS)
335 {
336 if (req->newptr)
337 bdgtakeifaces();
338
339 return 0;
340 }
341
342
343 SYSCTL_DECL(_net_link_ether);
344 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW,
345 &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
346 "Bridge configuration");
347
348 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
349 &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
350
351 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW,
352 &bdg_ipfw,0,"Pass bridged pkts through firewall");
353
354 #define SY(parent, var, comment) \
355 static int var ; \
356 SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment);
357
358 int bdg_ipfw_drops;
359 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop,
360 CTLFLAG_RW, &bdg_ipfw_drops,0,"");
361
362 int bdg_ipfw_colls;
363 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions,
364 CTLFLAG_RW, &bdg_ipfw_colls,0,"");
365
366 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR,
367 NULL, 0, &sysctl_refresh, "I", "iface refresh");
368
369 #if 1 /* diagnostic vars */
370
371 SY(_net_link_ether, verbose, "Be verbose");
372 SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward");
373
374 SY(_net_link_ether, bdg_thru, "Packets through bridge");
375
376 SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward");
377
378 SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward");
379 SY(_net_link_ether, bdg_predict, "Correctly predicted header location");
380
381 SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg");
382 SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item");
383 SY(_net_link_ether, bdg_fw_count, "Cycle counter count");
384 #endif
385
386 SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
387 CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
388
389 static int bdg_loops ;
390
391 /*
392 * completely flush the bridge table.
393 */
394 static void
395 flush_table()
396 {
397 int s,i;
398
399 if (bdg_table == NULL)
400 return ;
401 s = splimp();
402 for (i=0; i< HASH_SIZE; i++)
403 bdg_table[i].name= NULL; /* clear table */
404 splx(s);
405 }
406
407 /*
408 * called periodically to flush entries etc.
409 */
410 static void
411 bdg_timeout(void *dummy)
412 {
413 static int slowtimer = 0 ;
414
415 if (do_bridge) {
416 static int age_index = 0 ; /* index of table position to age */
417 int l = age_index + HASH_SIZE/4 ;
418 /*
419 * age entries in the forwarding table.
420 */
421 if (l > HASH_SIZE)
422 l = HASH_SIZE ;
423 for (; age_index < l ; age_index++)
424 if (bdg_table[age_index].used)
425 bdg_table[age_index].used = 0 ;
426 else if (bdg_table[age_index].name) {
427 /* printf("xx flushing stale entry %d\n", age_index); */
428 bdg_table[age_index].name = NULL ;
429 }
430 if (age_index >= HASH_SIZE)
431 age_index = 0 ;
432
433 if (--slowtimer <= 0 ) {
434 slowtimer = 5 ;
435
436 bdg_promisc_on() ; /* we just need unmute, really */
437 bdg_loops = 0 ;
438 }
439 }
440 timeout(bdg_timeout, (void *)0, 2*hz );
441 }
442
443 /*
444 * local MAC addresses are held in a small array. This makes comparisons
445 * much faster.
446 */
447 bdg_addr bdg_addresses[BDG_MAX_PORTS];
448 int bdg_ports ;
449
450 /*
451 * initialization of bridge code. This needs to be done after all
452 * interfaces have been configured.
453 */
454 static void
455 bdginit(void *dummy)
456 {
457
458 if (bdg_table == NULL)
459 bdg_table = (struct hash_table *)
460 _MALLOC(HASH_SIZE * sizeof(struct hash_table),
461 M_IFADDR, M_WAITOK);
462 flush_table();
463
464 ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc),
465 M_IFADDR, M_WAITOK );
466 bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) );
467
468 bzero(&bdg_stats, sizeof(bdg_stats) );
469 bdgtakeifaces();
470 bdg_timeout(0);
471 do_bridge=0;
472 }
473
474 void
475 bdgtakeifaces(void)
476 {
477 int i ;
478 struct ifnet *ifp;
479 bdg_addr *p = bdg_addresses ;
480 struct bdg_softc *bp;
481
482 bdg_ports = 0 ;
483 *bridge_cfg = '\0';
484
485 printf("BRIDGE 010131, have %d interfaces\n", if_index);
486 ifnet_head_lock_shared();
487 for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ;
488 i++, ifp = TAILQ_NEXT(ifp, if_link) )
489 if (ifp->if_type == IFT_ETHER) { /* ethernet ? */
490 ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN);
491 bp = &ifp2sc[ifp->if_index] ;
492 sprintf(bridge_cfg + strlen(bridge_cfg),
493 "%s%d:1,", ifp->if_name, ifp->if_unit);
494 printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
495 ifp->if_index,
496 bdg_stats.s[ifp->if_index].name,
497 (int)ifp->if_type, (int) ifp->if_physical,
498 (int)ifp->if_addrlen,
499 p->etheraddr, "." );
500 p++ ;
501 bp->ifp = ifp ;
502 bp->flags = IFF_USED ;
503 bp->cluster_id = htons(1) ;
504 bp->magic = 0xDEADBEEF ;
505
506 sprintf(bdg_stats.s[ifp->if_index].name,
507 "%s%d:%d", ifp->if_name, ifp->if_unit,
508 ntohs(bp->cluster_id));
509 bdg_ports ++ ;
510 }
511 ifnet_head_done();
512 }
513
514 /*
515 * bridge_in() is invoked to perform bridging decision on input packets.
516 *
517 * On Input:
518 * eh Ethernet header of the incoming packet.
519 *
520 * On Return: destination of packet, one of
521 * BDG_BCAST broadcast
522 * BDG_MCAST multicast
523 * BDG_LOCAL is only for a local address (do not forward)
524 * BDG_DROP drop the packet
525 * ifp ifp of the destination interface.
526 *
527 * Forwarding is not done directly to give a chance to some drivers
528 * to fetch more of the packet, or simply drop it completely.
529 */
530
531 struct ifnet *
532 bridge_in(struct ifnet *ifp, struct ether_header *eh)
533 {
534 int index;
535 struct ifnet *dst , *old ;
536 int dropit = BDG_MUTED(ifp) ;
537
538 /*
539 * hash the source address
540 */
541 index= HASH_FN(eh->ether_shost);
542 bdg_table[index].used = 1 ;
543 old = bdg_table[index].name ;
544 if ( old ) { /* the entry is valid. */
545 IFP_CHK(old, printf("bridge_in-- reading table\n") );
546
547 if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) {
548 bdg_ipfw_colls++ ;
549 bdg_table[index].name = NULL ;
550 } else if (old != ifp) {
551 /*
552 * found a loop. Either a machine has moved, or there
553 * is a misconfiguration/reconfiguration of the network.
554 * First, do not forward this packet!
555 * Record the relocation anyways; then, if loops persist,
556 * suspect a reconfiguration and disable forwarding
557 * from the old interface.
558 */
559 bdg_table[index].name = ifp ; /* relocate address */
560 printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
561 bdg_loops, eh->ether_shost, ".",
562 ifp->if_name, ifp->if_unit,
563 old->if_name, old->if_unit,
564 BDG_MUTED(old) ? "muted":"active");
565 dropit = 1 ;
566 if ( !BDG_MUTED(old) ) {
567 if (++bdg_loops > 10)
568 BDG_MUTE(old) ;
569 }
570 }
571 }
572
573 /*
574 * now write the source address into the table
575 */
576 if (bdg_table[index].name == NULL) {
577 DEB(printf("new addr %6D at %d for %s%d\n",
578 eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
579 bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6);
580 bdg_table[index].name = ifp ;
581 }
582 dst = bridge_dst_lookup(eh);
583 /* Return values:
584 * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
585 * For muted interfaces, the first 3 are changed in BDG_LOCAL,
586 * and others to BDG_DROP. Also, for incoming packets, ifp is changed
587 * to BDG_DROP in case ifp == src . These mods are not necessary
588 * for outgoing packets from ether_output().
589 */
590 BDG_STAT(ifp, BDG_IN);
591 switch ((int)dst) {
592 case (int)BDG_BCAST:
593 case (int)BDG_MCAST:
594 case (int)BDG_LOCAL:
595 case (int)BDG_UNKNOWN:
596 case (int)BDG_DROP:
597 BDG_STAT(ifp, dst);
598 break ;
599 default :
600 if (dst == ifp || dropit )
601 BDG_STAT(ifp, BDG_DROP);
602 else
603 BDG_STAT(ifp, BDG_FORWARD);
604 break ;
605 }
606
607 if ( dropit ) {
608 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
609 return BDG_LOCAL ;
610 else
611 return BDG_DROP ;
612 } else {
613 return (dst == ifp ? BDG_DROP : dst ) ;
614 }
615 }
616
617 /*
618 * Forward to dst, excluding src port and muted interfaces.
619 * If src == NULL, the pkt comes from ether_output, and dst is the real
620 * interface the packet is originally sent to. In this case we must forward
621 * it to the whole cluster. We never call bdg_forward ether_output on
622 * interfaces which are not part of a cluster.
623 *
624 * The packet is freed if possible (i.e. surely not of interest for
625 * the upper layer), otherwise a copy is left for use by the caller
626 * (pointer in m0).
627 *
628 * It would be more efficient to make bdg_forward() always consume
629 * the packet, leaving to the caller the task to check if it needs a copy
630 * and get one in case. As it is now, bdg_forward() can sometimes make
631 * a copy whereas it is not necessary.
632 *
633 * XXX be careful about eh, it can be a pointer into *m
634 */
635 struct mbuf *
636 bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst)
637 {
638 struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */
639 struct ifnet *ifp, *last = NULL ;
640 int s ;
641 int shared = bdg_copy ; /* someone else is using the mbuf */
642 int once = 0; /* loop only once */
643 struct ifnet *real_dst = dst ; /* real dst from ether_output */
644 #ifdef IPFIREWALL
645 struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */
646 #endif
647
648 /*
649 * XXX eh is usually a pointer within the mbuf (some ethernet drivers
650 * do that), so we better copy it before doing anything with the mbuf,
651 * or we might corrupt the header.
652 */
653 struct ether_header save_eh = *eh ;
654
655 #if defined(IPFIREWALL) && defined(DUMMYNET)
656 if (m0->m_type == MT_DUMMYNET) {
657 /* extract info from dummynet header */
658 rule = (struct ip_fw_chain *)(m0->m_data) ;
659 m0 = m0->m_next ;
660 src = m0->m_pkthdr.rcvif;
661 shared = 0 ; /* For sure this is our own mbuf. */
662 } else
663 #endif
664 bdg_thru++; /* only count once */
665
666 if (src == NULL) /* packet from ether_output */
667 dst = bridge_dst_lookup(eh);
668 if (dst == BDG_DROP) { /* this should not happen */
669 printf("xx bdg_forward for BDG_DROP\n");
670 m_freem(m0);
671 return NULL;
672 }
673 if (dst == BDG_LOCAL) { /* this should not happen as well */
674 printf("xx ouch, bdg_forward for local pkt\n");
675 return m0;
676 }
677 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
678 ifp = ifnet_head.tqh_first ; /* scan all ports */
679 once = 0 ;
680 if (dst != BDG_UNKNOWN) /* need a copy for the local stack */
681 shared = 1 ;
682 } else {
683 ifp = dst ;
684 once = 1 ;
685 }
686 if ( (u_int)(ifp) <= (u_int)BDG_FORWARD )
687 panic("bdg_forward: bad dst");
688
689 #ifdef IPFIREWALL
690 /*
691 * Do filtering in a very similar way to what is done in ip_output.
692 * Only if firewall is loaded, enabled, and the packet is not
693 * from ether_output() (src==NULL, or we would filter it twice).
694 * Additional restrictions may apply e.g. non-IP, short packets,
695 * and pkts already gone through a pipe.
696 */
697 if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) {
698 struct ip *ip ;
699 int i;
700
701 if (rule != NULL) /* dummynet packet, already partially processed */
702 goto forward; /* HACK! I should obey the fw_one_pass */
703 if (ntohs(save_eh.ether_type) != ETHERTYPE_IP)
704 goto forward ; /* not an IP packet, ipfw is not appropriate */
705 if (m0->m_pkthdr.len < sizeof(struct ip) )
706 goto forward ; /* header too short for an IP pkt, cannot filter */
707 /*
708 * i need some amt of data to be contiguous, and in case others need
709 * the packet (shared==1) also better be in the first mbuf.
710 */
711 i = min(m0->m_pkthdr.len, max_protohdr) ;
712 if ( shared || m0->m_len < i) {
713 m0 = m_pullup(m0, i) ;
714 if (m0 == NULL) {
715 printf("-- bdg: pullup failed.\n") ;
716 return NULL ;
717 }
718 }
719
720 /*
721 * before calling the firewall, swap fields the same as IP does.
722 * here we assume the pkt is an IP one and the header is contiguous
723 */
724 ip = mtod(m0, struct ip *);
725 NTOHS(ip->ip_len);
726 NTOHS(ip->ip_off);
727
728 /*
729 * The third parameter to the firewall code is the dst. interface.
730 * Since we apply checks only on input pkts we use NULL.
731 * The firewall knows this is a bridged packet as the cookie ptr
732 * is NULL.
733 */
734 i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL);
735 if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */
736 return m0 ;
737 /*
738 * If we get here, the firewall has passed the pkt, but the mbuf
739 * pointer might have changed. Restore ip and the fields NTOHS()'d.
740 */
741 ip = mtod(m0, struct ip *);
742 HTONS(ip->ip_len);
743 HTONS(ip->ip_off);
744
745 if (i == 0) /* a PASS rule. */
746 goto forward ;
747 #ifdef DUMMYNET
748 if (i & IP_FW_PORT_DYNT_FLAG) {
749 /*
750 * Pass the pkt to dummynet, which consumes it.
751 * If shared, make a copy and keep the original.
752 * Need to prepend the ethernet header, optimize the common
753 * case of eh pointing already into the original mbuf.
754 */
755 struct mbuf *m ;
756 if (shared) {
757 m = m_copypacket(m0, M_DONTWAIT);
758 if (m == NULL) {
759 printf("bdg_fwd: copy(1) failed\n");
760 return m0;
761 }
762 } else {
763 m = m0 ; /* pass the original to dummynet */
764 m0 = NULL ; /* and nothing back to the caller */
765 }
766 if ( (void *)(eh + 1) == (void *)m->m_data) {
767 m->m_data -= ETHER_HDR_LEN ;
768 m->m_len += ETHER_HDR_LEN ;
769 m->m_pkthdr.len += ETHER_HDR_LEN ;
770 bdg_predict++;
771 } else {
772 M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
773 if (!m && verbose) printf("M_PREPEND failed\n");
774 if (m == NULL) /* nope... */
775 return m0 ;
776 bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
777 }
778 dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0);
779 return m0 ;
780 }
781 #endif
782 /*
783 * XXX add divert/forward actions...
784 */
785 /* if none of the above matches, we have to drop the pkt */
786 bdg_ipfw_drops++ ;
787 printf("bdg_forward: No rules match, so dropping packet!\n");
788 return m0 ;
789 }
790 forward:
791 #endif /* IPFIREWALL */
792 /*
793 * Again, bring up the headers in case of shared bufs to avoid
794 * corruptions in the future.
795 */
796 if ( shared ) {
797 int i = min(m0->m_pkthdr.len, max_protohdr) ;
798
799 m0 = m_pullup(m0, i) ;
800 if (m0 == NULL) {
801 printf("-- bdg: pullup2 failed.\n") ;
802 return NULL ;
803 }
804 }
805 /* now real_dst is used to determine the cluster where to forward */
806 if (src != NULL) /* pkt comes from ether_input */
807 real_dst = src ;
808 for (;;) {
809 if (last) { /* need to forward packet leftover from previous loop */
810 struct mbuf *m ;
811 if (shared == 0 && once ) { /* no need to copy */
812 m = m0 ;
813 m0 = NULL ; /* original is gone */
814 } else {
815 m = m_copypacket(m0, M_DONTWAIT);
816 if (m == NULL) {
817 printf("bdg_forward: sorry, m_copypacket failed!\n");
818 return m0 ; /* the original is still there... */
819 }
820 }
821 /*
822 * Add header (optimized for the common case of eh pointing
823 * already into the mbuf) and execute last part of ether_output:
824 * queue pkt and start output if interface not yet active.
825 */
826 if ( (void *)(eh + 1) == (void *)m->m_data) {
827 m->m_data -= ETHER_HDR_LEN ;
828 m->m_len += ETHER_HDR_LEN ;
829 m->m_pkthdr.len += ETHER_HDR_LEN ;
830 bdg_predict++;
831 } else {
832 M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
833 if (!m && verbose) printf("M_PREPEND failed\n");
834 if (m == NULL)
835 return m0;
836 bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
837 }
838 s = splimp();
839 if (IF_QFULL(&last->if_snd)) {
840 IF_DROP(&last->if_snd);
841 #if 0
842 BDG_MUTE(last); /* should I also mute ? */
843 #endif
844 splx(s);
845 m_freem(m); /* consume the pkt anyways */
846 } else {
847 last->if_obytes += m->m_pkthdr.len ;
848 if (m->m_flags & M_MCAST)
849 last->if_omcasts++;
850 if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */
851 bdg_split_pkts++;
852
853 IF_ENQUEUE(&last->if_snd, m);
854 if ((last->if_flags & IFF_OACTIVE) == 0)
855 (*last->if_start)(last);
856 splx(s);
857 }
858 BDG_STAT(last, BDG_OUT);
859 last = NULL ;
860 if (once)
861 break ;
862 }
863 if (ifp == NULL)
864 break ;
865 /*
866 * If the interface is used for bridging, not muted, not full,
867 * up and running, is not the source interface, and belongs to
868 * the same cluster as the 'real_dst', then send here.
869 */
870 if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) &&
871 (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
872 ifp != src && BDG_SAMECLUSTER(ifp, real_dst) )
873 last = ifp ;
874 ifp = TAILQ_NEXT(ifp, if_link) ;
875 if (ifp == NULL)
876 once = 1 ;
877 }
878 DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;
879 if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
880 return m0 ;
881 }