X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..2d21ac55c334faf3a56e5634905ed6987fc787d4:/bsd/net/bridge.c?ds=sidebyside diff --git a/bsd/net/bridge.c b/bsd/net/bridge.c index 53cf96617..74d6a18d2 100644 --- a/bsd/net/bridge.c +++ b/bsd/net/bridge.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * Copyright (c) 1998 Luigi Rizzo @@ -43,6 +49,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * + * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $ */ /* @@ -51,14 +58,13 @@ * A bridging table holds the source MAC address/dest. interface for each * known node. The table is indexed using an hash of the source address. * - * Input packets are tapped near the end of the input routine in each - * driver (near the call to bpf_mtap, or before the call to ether_input) - * and analysed calling bridge_in(). Depending on the result, the packet + * Input packets are tapped near the beginning of ether_input(), and + * analysed by calling bridge_in(). Depending on the result, the packet * can be forwarded to one or more output interfaces using bdg_forward(), * and/or sent to the upper layer (e.g. in case of multicast). * * Output packets are intercepted near the end of ether_output(), - * the correct destination is selected calling bdg_dst_lookup(), + * the correct destination is selected calling bridge_dst_lookup(), * and then forwarding is done using bdg_forward(). * Bridging is controlled by the sysctl variable net.link.ether.bridge * @@ -67,9 +73,8 @@ * * In case of loops in the bridging topology, the bridge detects this * event and temporarily mutes output bridging on one of the ports. - * Periodically, interfaces are unmuted by bdg_timeout(). (For the - * mute flag i am temporarily using IFF_LINK2 but this has to - * change.) Muting is only implemented as a safety measure, and also as + * Periodically, interfaces are unmuted by bdg_timeout(). + * Muting is only implemented as a safety measure, and also as * a mechanism to support a user-space implementation of the spanning * tree algorithm. In the final release, unmuting will only occur * because of explicit action of the user-level daemon. @@ -85,11 +90,6 @@ * from doing both (modulo bugs in the implementation...). * * THINGS TO REMEMBER - * - bridging requires some (small) modifications to the interface - * driver. Currently (980911) the "ed", "de", "tx", "lnc" drivers - * have been modified and tested. "fxp", "ep", "fe" have been - * modified but not tested. See the "ed" and "de" drivers as - * examples on how to operate. * - bridging is incompatible with multicast routing on the same * machine. There is not an easy fix to this. * - loop detection is still not very robust. @@ -116,11 +116,13 @@ #include "opt_ipfw.h" #include "opt_ipdn.h" -#if defined(IPFIREWALL) && defined(DUMMYNET) +#if defined(IPFIREWALL) #include #include +#if defined(DUMMYNET) #include #endif +#endif #include @@ -139,79 +141,248 @@ #define DDB(x) x #define DEB(x) -/* - * System initialization - */ - static void bdginit(void *); +static void bdgtakeifaces(void); static void flush_table(void); - -SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL) +static void bdg_promisc_on(void); +static void parse_bdg_cfg(void); static int bdg_ipfw = 0 ; int do_bridge = 0; bdg_hash_table *bdg_table = NULL ; /* - * we need additional info for the bridge. The bdg_ifp2sc[] array - * provides a pointer to this struct using the if_index. - * bdg_softc has a backpointer to the struct ifnet, the bridge - * flags, and a group (bridging occurs only between port of the - * same group). + * System initialization + */ + +SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL) + +static struct bdg_stats bdg_stats ; +struct bdg_softc *ifp2sc = NULL ; +/* XXX make it static of size BDG_MAX_PORTS */ + +#define IFP_CHK(ifp, x) \ + if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; } + +/* + * turn off promisc mode, optionally clear the IFF_USED flag. + * The flag is turned on by parse_bdg_config */ -struct bdg_softc { +static void +bdg_promisc_off(int clear_used) +{ struct ifnet *ifp ; - /* ((struct arpcom *)ifp)->ac_enaddr is the eth. addr */ - int flags ; - int group ; -} ; - -static struct bdg_softc **ifp2sc = NULL ; - -#if 0 /* new code using ifp2sc */ -#define SAMEGROUP(ifp,src) (src == NULL || \ - ifp2sc[ifp->if_index]->group == ifp2sc[src->if_index]->group ) -#define MUTED(ifp) (ifp2sc[ifp->if_index]->flags & IFF_MUTE) -#define MUTE(ifp) ifp2sc[ifp->if_index]->flags |= IFF_MUTE -#define UNMUTE(ifp) ifp2sc[ifp->if_index]->flags &= ~IFF_MUTE -#else -#define SAMEGROUP(a,b) 1 -#define MUTED(ifp) (ifp->if_flags & IFF_MUTE) -#define MUTE(ifp) ifp->if_flags |= IFF_MUTE -#define UNMUTE(ifp) ifp->if_flags &= ~IFF_MUTE -#endif + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { + int s, ret ; + s = splimp(); + ret = ifnet_set_promiscuous(ifp, 0); + splx(s); + ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ; + DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n", + ifp->if_name, ifp->if_unit, + ifp->if_flags, ifp2sc[ifp->if_index].flags);) + } + if (clear_used) { + ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ; + bdg_stats.s[ifp->if_index].name[0] = '\0'; + } + } + ifnet_head_done(); +} + +/* + * set promisc mode on the interfaces we use. + */ +static void +bdg_promisc_on() +{ + struct ifnet *ifp ; + int s ; + + ifnet_head_lock_shared(); + TAILQ_FOREACH(ifp, &ifnet_head, if_link) { + if ( !BDG_USED(ifp) ) + continue ; + if ( 0 == ( ifp->if_flags & IFF_UP) ) { + s = splimp(); + if_up(ifp); + splx(s); + } + if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { + int ret ; + s = splimp(); + ret = ifnet_set_promiscuous(ifp, 1); + splx(s); + ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ; + printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", + ifp->if_name, ifp->if_unit, + ifp->if_flags, ifp2sc[ifp->if_index].flags); + } + if (BDG_MUTED(ifp)) { + printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); + BDG_UNMUTE(ifp) ; + } + } + ifnet_head_done(); +} static int -sysctl_bdg SYSCTL_HANDLER_ARGS +sysctl_bdg(SYSCTL_HANDLER_ARGS) { int error, oldval = do_bridge ; error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); - printf("called sysctl for bridge name %s arg2 %d val %d->%d\n", + DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n", oidp->oid_name, oidp->oid_arg2, - oldval, do_bridge); + oldval, do_bridge); ) + if (bdg_table == NULL) do_bridge = 0 ; if (oldval != do_bridge) { + bdg_promisc_off( 1 ); /* reset previously used interfaces */ + flush_table(); + if (do_bridge) { + parse_bdg_cfg(); + bdg_promisc_on(); + } + } + return error ; +} + +static char bridge_cfg[256] = { "" } ; + +/* + * parse the config string, set IFF_USED, name and cluster_id + * for all interfaces found. + */ +static void +parse_bdg_cfg() +{ + char *p, *beg ; + int i, l, cluster; + struct bdg_softc *b; + + for (p= bridge_cfg; *p ; p++) { + /* interface names begin with [a-z] and continue up to ':' */ + if (*p < 'a' || *p > 'z') + continue ; + for ( beg = p ; *p && *p != ':' ; p++ ) + ; + if (*p == 0) /* end of string, ':' not found */ + return ; + l = p - beg ; /* length of name string */ + p++ ; + DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);) + for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++) + cluster = cluster*10 + (*p -'0'); + /* + * now search in bridge strings + */ + for (i=0, b = ifp2sc ; i < if_index ; i++, b++) { + char buf[32]; + struct ifnet *ifp = b->ifp ; + + if (ifp == NULL) + continue; + sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit); + if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */ + b->cluster_id = htons(cluster) ; + b->flags |= IFF_USED ; + sprintf(bdg_stats.s[ifp->if_index].name, + "%s%d:%d", ifp->if_name, ifp->if_unit, cluster); + + DEB(printf("--++ found %s\n", + bdg_stats.s[ifp->if_index].name);) + break ; + } + } + if (*p == '\0') + break ; + } +} + +static int +sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS) +{ + int error = 0 ; + char oldval[256] ; + + strlcpy(oldval, bridge_cfg, sizeof (oldval)); + + error = sysctl_handle_string(oidp, + bridge_cfg, oidp->oid_arg2, req); + DEB( + printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n", + oidp->oid_name, oidp->oid_arg2, + error, + oldval, bridge_cfg); + ) + if (strcmp(oldval, bridge_cfg)) { + bdg_promisc_off( 1 ); /* reset previously-used interfaces */ flush_table(); + parse_bdg_cfg(); /* and set new ones... */ + if (do_bridge) + bdg_promisc_on(); /* re-enable interfaces */ } return error ; } +static int +sysctl_refresh(SYSCTL_HANDLER_ARGS) +{ + if (req->newptr) + bdgtakeifaces(); + + return 0; +} + + SYSCTL_DECL(_net_link_ether); +SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW, + &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A", + "Bridge configuration"); + SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW, - &do_bridge, 0, &sysctl_bdg, "I", "Bridging"); + &do_bridge, 0, &sysctl_bdg, "I", "Bridging"); + +SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, + &bdg_ipfw,0,"Pass bridged pkts through firewall"); + +#define SY(parent, var, comment) \ + static int var ; \ + SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment); + +int bdg_ipfw_drops; +SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop, + CTLFLAG_RW, &bdg_ipfw_drops,0,""); + +int bdg_ipfw_colls; +SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions, + CTLFLAG_RW, &bdg_ipfw_colls,0,""); + +SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR, + NULL, 0, &sysctl_refresh, "I", "iface refresh"); -SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, &bdg_ipfw,0,""); #if 1 /* diagnostic vars */ -int bdg_in_count = 0 , bdg_in_ticks = 0 , bdg_fw_count = 0, bdg_fw_ticks = 0 ; -SYSCTL_INT(_net_link_ether, OID_AUTO, bdginc, CTLFLAG_RW, &bdg_in_count,0,""); -SYSCTL_INT(_net_link_ether, OID_AUTO, bdgint, CTLFLAG_RW, &bdg_in_ticks,0,""); -SYSCTL_INT(_net_link_ether, OID_AUTO, bdgfwc, CTLFLAG_RW, &bdg_fw_count,0,""); -SYSCTL_INT(_net_link_ether, OID_AUTO, bdgfwt, CTLFLAG_RW, &bdg_fw_ticks,0,""); + +SY(_net_link_ether, verbose, "Be verbose"); +SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward"); + +SY(_net_link_ether, bdg_thru, "Packets through bridge"); + +SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward"); + +SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward"); +SY(_net_link_ether, bdg_predict, "Correctly predicted header location"); + +SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg"); +SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item"); +SY(_net_link_ether, bdg_fw_count, "Cycle counter count"); #endif -static struct bdg_stats bdg_stats ; + SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats, CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics"); @@ -233,29 +404,14 @@ flush_table() splx(s); } -/* wrapper for funnel */ -void -bdg_timeout_funneled(void * dummy) -{ - boolean_t funnel_state; - - funnel_state = thread_funnel_set(network_flock, TRUE); - bdg_timeout(dummy); - funnel_state = thread_funnel_set(network_flock, FALSE); -} - /* * called periodically to flush entries etc. */ static void bdg_timeout(void *dummy) { - struct ifnet *ifp ; - int s ; static int slowtimer = 0 ; - boolean_t funnel_state; - - + if (do_bridge) { static int age_index = 0 ; /* index of table position to age */ int l = age_index + HASH_SIZE/4 ; @@ -277,103 +433,89 @@ bdg_timeout(void *dummy) if (--slowtimer <= 0 ) { slowtimer = 5 ; - for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_link.tqe_next) { - if (ifp->if_type != IFT_ETHER) - continue ; - if ( 0 == ( ifp->if_flags & IFF_UP) ) { - s = splimp(); - if_up(ifp); - splx(s); - } - if ( 0 == ( ifp->if_flags & IFF_PROMISC) ) { - int ret ; - s = splimp(); - ret = ifpromisc(ifp, 1); - splx(s); - printf(">> now %s%d flags 0x%x promisc %d\n", - ifp->if_name, ifp->if_unit, - ifp->if_flags, ret); - } - if (MUTED(ifp)) { - printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); - UNMUTE(ifp) ; - } - } + bdg_promisc_on() ; /* we just need unmute, really */ bdg_loops = 0 ; } } - timeout(bdg_timeout_funneled, (void *)0, 2*hz ); - + timeout(bdg_timeout, (void *)0, 2*hz ); } /* * local MAC addresses are held in a small array. This makes comparisons * much faster. */ -unsigned char bdg_addresses[6*BDG_MAX_PORTS]; +bdg_addr bdg_addresses[BDG_MAX_PORTS]; int bdg_ports ; /* - * initialization of bridge code. + * initialization of bridge code. This needs to be done after all + * interfaces have been configured. */ static void -bdginit(dummy) - void *dummy; +bdginit(void *dummy) { - int i ; - struct ifnet *ifp; - struct arpcom *ac ; - u_char *eth_addr ; - /* - * initialization of bridge code - */ + if (bdg_table == NULL) bdg_table = (struct hash_table *) _MALLOC(HASH_SIZE * sizeof(struct hash_table), M_IFADDR, M_WAITOK); flush_table(); - ifp2sc = _MALLOC(if_index * sizeof(struct bdg_softc *), M_IFADDR, M_WAITOK ); - bzero(ifp2sc, if_index * sizeof(struct bdg_softc *) ); + ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc), + M_IFADDR, M_WAITOK ); + bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) ); bzero(&bdg_stats, sizeof(bdg_stats) ); + bdgtakeifaces(); + bdg_timeout(0); + do_bridge=0; +} + +void +bdgtakeifaces(void) +{ + int i ; + struct ifnet *ifp; + bdg_addr *p = bdg_addresses ; + struct bdg_softc *bp; + bdg_ports = 0 ; - eth_addr = bdg_addresses ; + *bridge_cfg = '\0'; - printf("BRIDGE 981214, have %d interfaces\n", if_index); + printf("BRIDGE 010131, have %d interfaces\n", if_index); + ifnet_head_lock_shared(); for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ; - i++, ifp = ifp->if_link.tqe_next) - if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ - ac = (struct arpcom *)ifp; - sprintf(bdg_stats.s[ifp->if_index].name, - "%s%d", ifp->if_name, ifp->if_unit); - printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", - ifp->if_index, - bdg_stats.s[ifp->if_index].name, - (int)ifp->if_type, (int) ifp->if_physical, - (int)ifp->if_addrlen, - ac->ac_enaddr, "." ); - bcopy(ac->ac_enaddr, eth_addr, 6); - eth_addr += 6 ; - - ifp2sc[bdg_ports] = _MALLOC(sizeof(struct bdg_softc), - M_IFADDR, M_WAITOK ); - ifp2sc[bdg_ports]->ifp = ifp ; - ifp2sc[bdg_ports]->flags = 0 ; - ifp2sc[bdg_ports]->group = 0 ; - bdg_ports ++ ; - } - bdg_timeout(0); - do_bridge=0; + i++, ifp = TAILQ_NEXT(ifp, if_link) ) + if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ + ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN); + bp = &ifp2sc[ifp->if_index] ; + sprintf(bridge_cfg + strlen(bridge_cfg), + "%s%d:1,", ifp->if_name, ifp->if_unit); + printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", + ifp->if_index, + bdg_stats.s[ifp->if_index].name, + (int)ifp->if_type, (int) ifp->if_physical, + (int)ifp->if_addrlen, + p->etheraddr, "." ); + p++ ; + bp->ifp = ifp ; + bp->flags = IFF_USED ; + bp->cluster_id = htons(1) ; + bp->magic = 0xDEADBEEF ; + + sprintf(bdg_stats.s[ifp->if_index].name, + "%s%d:%d", ifp->if_name, ifp->if_unit, + ntohs(bp->cluster_id)); + bdg_ports ++ ; + } + ifnet_head_done(); } /* * bridge_in() is invoked to perform bridging decision on input packets. + * * On Input: - * m packet to be bridged. The mbuf need not to hold the - * whole packet, only the first 14 bytes suffice. We - * assume them to be contiguous. No alignment assumptions - * because they are not a problem on i386 class machines. + * eh Ethernet header of the incoming packet. * * On Return: destination of packet, one of * BDG_BCAST broadcast @@ -386,16 +528,12 @@ bdginit(dummy) * to fetch more of the packet, or simply drop it completely. */ - struct ifnet * -bridge_in(struct mbuf *m) +bridge_in(struct ifnet *ifp, struct ether_header *eh) { int index; - struct ifnet *ifp = m->m_pkthdr.rcvif, *dst , *old ; - int dropit = MUTED(ifp) ; - struct ether_header *eh; - - eh = mtod(m, struct ether_header *); + struct ifnet *dst , *old ; + int dropit = BDG_MUTED(ifp) ; /* * hash the source address @@ -404,8 +542,10 @@ bridge_in(struct mbuf *m) bdg_table[index].used = 1 ; old = bdg_table[index].name ; if ( old ) { /* the entry is valid. */ + IFP_CHK(old, printf("bridge_in-- reading table\n") ); + if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) { - printf("collision at %d\n", index); + bdg_ipfw_colls++ ; bdg_table[index].name = NULL ; } else if (old != ifp) { /* @@ -421,11 +561,11 @@ bridge_in(struct mbuf *m) bdg_loops, eh->ether_shost, ".", ifp->if_name, ifp->if_unit, old->if_name, old->if_unit, - old->if_flags & IFF_MUTE ? "muted":"ignore"); + BDG_MUTED(old) ? "muted":"active"); dropit = 1 ; - if ( !MUTED(old) ) { + if ( !BDG_MUTED(old) ) { if (++bdg_loops > 10) - MUTE(old) ; + BDG_MUTE(old) ; } } } @@ -439,7 +579,7 @@ bridge_in(struct mbuf *m) bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6); bdg_table[index].name = ifp ; } - dst = bridge_dst_lookup(m); + dst = bridge_dst_lookup(eh); /* Return values: * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp. * For muted interfaces, the first 3 are changed in BDG_LOCAL, @@ -475,205 +615,267 @@ bridge_in(struct mbuf *m) } /* - * Forward to dst, excluding src port and (if not a single interface) - * muted interfaces. The packet is freed if marked as such - * and not for a local destination. - * A cleaner implementation would be to make bdg_forward() - * always consume the packet, leaving to the caller the task - * to make a copy if it needs it. As it is now, bdg_forward() - * can keep a copy alive in some cases. + * Forward to dst, excluding src port and muted interfaces. + * If src == NULL, the pkt comes from ether_output, and dst is the real + * interface the packet is originally sent to. In this case we must forward + * it to the whole cluster. We never call bdg_forward ether_output on + * interfaces which are not part of a cluster. + * + * The packet is freed if possible (i.e. surely not of interest for + * the upper layer), otherwise a copy is left for use by the caller + * (pointer in m0). + * + * It would be more efficient to make bdg_forward() always consume + * the packet, leaving to the caller the task to check if it needs a copy + * and get one in case. As it is now, bdg_forward() can sometimes make + * a copy whereas it is not necessary. + * + * XXX be careful about eh, it can be a pointer into *m */ -int -bdg_forward (struct mbuf **m0, struct ifnet *dst) +struct mbuf * +bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst) { - struct ifnet *src = (*m0)->m_pkthdr.rcvif; /* could be NULL in output */ - struct ifnet *ifp ; - struct ip *ip; - int error=0, s ; - int once = 0; /* execute the loop only once */ - int canfree = 1 ; /* can free the buf at the end */ - struct mbuf *m ; + struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */ + struct ifnet *ifp, *last = NULL ; + int s ; + int shared = bdg_copy ; /* someone else is using the mbuf */ + int once = 0; /* loop only once */ + struct ifnet *real_dst = dst ; /* real dst from ether_output */ +#ifdef IPFIREWALL + struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */ +#endif - struct ether_header *eh = mtod(*m0, struct ether_header *); /* XXX */ + /* + * XXX eh is usually a pointer within the mbuf (some ethernet drivers + * do that), so we better copy it before doing anything with the mbuf, + * or we might corrupt the header. + */ + struct ether_header save_eh = *eh ; +#if defined(IPFIREWALL) && defined(DUMMYNET) + if (m0->m_type == MT_DUMMYNET) { + /* extract info from dummynet header */ + rule = (struct ip_fw_chain *)(m0->m_data) ; + m0 = m0->m_next ; + src = m0->m_pkthdr.rcvif; + shared = 0 ; /* For sure this is our own mbuf. */ + } else +#endif + bdg_thru++; /* only count once */ + + if (src == NULL) /* packet from ether_output */ + dst = bridge_dst_lookup(eh); if (dst == BDG_DROP) { /* this should not happen */ - printf("xx bdg_forward for BDG_DROP)\n"); - m_freem(*m0) ; - *m0 = NULL ; - return 0; + printf("xx bdg_forward for BDG_DROP\n"); + m_freem(m0); + return NULL; } if (dst == BDG_LOCAL) { /* this should not happen as well */ - printf("xx ouch, bdg_forward for local pkt\n"); - return 0; + printf("xx ouch, bdg_forward for local pkt\n"); + return m0; } if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) { - ifp = ifnet.tqh_first ; - once = 0 ; - if (dst != BDG_UNKNOWN) - canfree = 0 ; + ifp = ifnet_head.tqh_first ; /* scan all ports */ + once = 0 ; + if (dst != BDG_UNKNOWN) /* need a copy for the local stack */ + shared = 1 ; } else { - ifp = dst ; - once = 1 ; /* and also canfree */ + ifp = dst ; + once = 1 ; } -#if IPFIREWALL + if ( (u_int)(ifp) <= (u_int)BDG_FORWARD ) + panic("bdg_forward: bad dst"); + +#ifdef IPFIREWALL /* - * do filtering in a very similar way to what is done - * in ip_output. Only for IP packets, and only pass/fail/dummynet - * is supported. The tricky thing is to make sure that enough of - * the packet (basically, Eth+IP+TCP/UDP headers) is contiguous - * so that calls to m_pullup in ip_fw_chk will not kill the - * ethernet header. + * Do filtering in a very similar way to what is done in ip_output. + * Only if firewall is loaded, enabled, and the packet is not + * from ether_output() (src==NULL, or we would filter it twice). + * Additional restrictions may apply e.g. non-IP, short packets, + * and pkts already gone through a pipe. */ - if (ip_fw_chk_ptr) { - u_int16_t dummy ; - struct ip_fw_chain *rule; - int off; - - m = *m0 ; - if (m->m_type == MT_DUMMYNET) { - /* - * the packet was already tagged, so part of the - * processing was already done, and we need to go down. - */ - rule = (struct ip_fw_chain *)(m->m_data) ; - (*m0) = m = m->m_next ; - - src = m->m_pkthdr.rcvif; /* could be NULL in output */ - eh = mtod(m, struct ether_header *); /* XXX */ - canfree = 1 ; /* for sure, a copy is not needed later. */ - goto forward; /* HACK! */ - } else - rule = NULL ; - if (bdg_ipfw == 0) - goto forward ; - if (src == NULL) - goto forward ; /* do not apply to packets from ether_output */ - if (canfree == 0 ) /* need to make a copy */ - m = m_copypacket(*m0, M_DONTWAIT); - if (m == NULL) { - /* fail... */ - return 0 ; + if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) { + struct ip *ip ; + int i; + + if (rule != NULL) /* dummynet packet, already partially processed */ + goto forward; /* HACK! I should obey the fw_one_pass */ + if (ntohs(save_eh.ether_type) != ETHERTYPE_IP) + goto forward ; /* not an IP packet, ipfw is not appropriate */ + if (m0->m_pkthdr.len < sizeof(struct ip) ) + goto forward ; /* header too short for an IP pkt, cannot filter */ + /* + * i need some amt of data to be contiguous, and in case others need + * the packet (shared==1) also better be in the first mbuf. + */ + i = min(m0->m_pkthdr.len, max_protohdr) ; + if ( shared || m0->m_len < i) { + m0 = m_pullup(m0, i) ; + if (m0 == NULL) { + printf("-- bdg: pullup failed.\n") ; + return NULL ; + } } - - dummy = 0 ; + /* * before calling the firewall, swap fields the same as IP does. * here we assume the pkt is an IP one and the header is contiguous */ - eh = mtod(m, struct ether_header *); - ip = (struct ip *)(eh + 1 ) ; + ip = mtod(m0, struct ip *); NTOHS(ip->ip_len); - NTOHS(ip->ip_id); NTOHS(ip->ip_off); /* - * The third parameter to the firewall code is the dst. interface. + * The third parameter to the firewall code is the dst. interface. * Since we apply checks only on input pkts we use NULL. + * The firewall knows this is a bridged packet as the cookie ptr + * is NULL. */ - off = (*ip_fw_chk_ptr)(NULL, 0, NULL, &dummy, &m, &rule, NULL) ; - if (m == NULL) { /* pkt discarded by firewall */ - if (canfree) - *m0 = NULL ; - return 0 ; - } + i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL); + if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */ + return m0 ; /* - * on return, the mbuf pointer might have changed. Restore - * *m0 (if it was the same as m), eh, ip and then - * restore original ordering. + * If we get here, the firewall has passed the pkt, but the mbuf + * pointer might have changed. Restore ip and the fields NTOHS()'d. */ - eh = mtod(m, struct ether_header *); - ip = (struct ip *)(eh + 1 ) ; - if (canfree) /* m was a reference to *m0, so update *m0 */ - *m0 = m ; + ip = mtod(m0, struct ip *); HTONS(ip->ip_len); - HTONS(ip->ip_id); HTONS(ip->ip_off); - if (off == 0) { - if (canfree == 0) - m_freem(m); + + if (i == 0) /* a PASS rule. */ goto forward ; - } -#if DUMMYNET - if (off & 0x10000) { +#ifdef DUMMYNET + if (i & IP_FW_PORT_DYNT_FLAG) { /* - * pass the pkt to dummynet. Need to include m, dst, rule. - * Dummynet consumes the packet in all cases. + * Pass the pkt to dummynet, which consumes it. + * If shared, make a copy and keep the original. + * Need to prepend the ethernet header, optimize the common + * case of eh pointing already into the original mbuf. */ - dummynet_io((off & 0xffff), DN_TO_BDG_FWD, m, dst, NULL, 0, rule); - if (canfree) /* dummynet has consumed the original one */ - *m0 = NULL ; - return 0 ; + struct mbuf *m ; + if (shared) { + m = m_copypacket(m0, M_DONTWAIT); + if (m == NULL) { + printf("bdg_fwd: copy(1) failed\n"); + return m0; + } + } else { + m = m0 ; /* pass the original to dummynet */ + m0 = NULL ; /* and nothing back to the caller */ + } + if ( (void *)(eh + 1) == (void *)m->m_data) { + m->m_data -= ETHER_HDR_LEN ; + m->m_len += ETHER_HDR_LEN ; + m->m_pkthdr.len += ETHER_HDR_LEN ; + bdg_predict++; + } else { + M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); + if (!m && verbose) printf("M_PREPEND failed\n"); + if (m == NULL) /* nope... */ + return m0 ; + bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); + } + dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0); + return m0 ; } #endif + /* + * XXX add divert/forward actions... + */ /* if none of the above matches, we have to drop the pkt */ - if (m) - m_freem(m); - if (canfree && m != *m0) { - m_freem(*m0); - *m0 = NULL ; - } - return 0 ; + bdg_ipfw_drops++ ; + printf("bdg_forward: No rules match, so dropping packet!\n"); + return m0 ; } forward: -#endif /* COMPAT_IPFW */ - if (canfree && once) - m = *m0 ; - else - m = NULL ; - - for ( ; ifp ; ifp = ifp->if_link.tqe_next ) { - if (ifp != src && ifp->if_type == IFT_ETHER && - (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) && - SAMEGROUP(ifp, src) && !MUTED(ifp) ) { - if (m == NULL) { /* do i need to make a copy ? */ - if (canfree && ifp->if_link.tqe_next == NULL) /* last one! */ - m = *m0 ; - else /* on a P5-90, m_packetcopy takes 540 ticks */ - m = m_copypacket(*m0, M_DONTWAIT); +#endif /* IPFIREWALL */ + /* + * Again, bring up the headers in case of shared bufs to avoid + * corruptions in the future. + */ + if ( shared ) { + int i = min(m0->m_pkthdr.len, max_protohdr) ; + + m0 = m_pullup(m0, i) ; + if (m0 == NULL) { + printf("-- bdg: pullup2 failed.\n") ; + return NULL ; + } + } + /* now real_dst is used to determine the cluster where to forward */ + if (src != NULL) /* pkt comes from ether_input */ + real_dst = src ; + for (;;) { + if (last) { /* need to forward packet leftover from previous loop */ + struct mbuf *m ; + if (shared == 0 && once ) { /* no need to copy */ + m = m0 ; + m0 = NULL ; /* original is gone */ + } else { + m = m_copypacket(m0, M_DONTWAIT); if (m == NULL) { - printf("bdg_forward: sorry, m_copy failed!\n"); - return ENOBUFS ; + printf("bdg_forward: sorry, m_copypacket failed!\n"); + return m0 ; /* the original is still there... */ } } /* - * execute last part of ether_output. + * Add header (optimized for the common case of eh pointing + * already into the mbuf) and execute last part of ether_output: + * queue pkt and start output if interface not yet active. */ + if ( (void *)(eh + 1) == (void *)m->m_data) { + m->m_data -= ETHER_HDR_LEN ; + m->m_len += ETHER_HDR_LEN ; + m->m_pkthdr.len += ETHER_HDR_LEN ; + bdg_predict++; + } else { + M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); + if (!m && verbose) printf("M_PREPEND failed\n"); + if (m == NULL) + return m0; + bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); + } s = splimp(); - /* - * Queue message on interface, and start output if interface - * not yet active. - */ - if (IF_QFULL(&ifp->if_snd)) { - IF_DROP(&ifp->if_snd); - MUTE(ifp); /* good measure... */ + if (IF_QFULL(&last->if_snd)) { + IF_DROP(&last->if_snd); +#if 0 + BDG_MUTE(last); /* should I also mute ? */ +#endif splx(s); - error = ENOBUFS ; + m_freem(m); /* consume the pkt anyways */ } else { - ifp->if_obytes += m->m_pkthdr.len ; + last->if_obytes += m->m_pkthdr.len ; if (m->m_flags & M_MCAST) - ifp->if_omcasts++; - IF_ENQUEUE(&ifp->if_snd, m); - if ((ifp->if_flags & IFF_OACTIVE) == 0) - (*ifp->if_start)(ifp); + last->if_omcasts++; + if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */ + bdg_split_pkts++; + + IF_ENQUEUE(&last->if_snd, m); + if ((last->if_flags & IFF_OACTIVE) == 0) + (*last->if_start)(last); splx(s); - if (m == *m0) - *m0 = NULL ; /* the packet is gone... */ - m = NULL ; } - BDG_STAT(ifp, BDG_OUT); + BDG_STAT(last, BDG_OUT); + last = NULL ; + if (once) + break ; } - if (once) + if (ifp == NULL) break ; + /* + * If the interface is used for bridging, not muted, not full, + * up and running, is not the source interface, and belongs to + * the same cluster as the 'real_dst', then send here. + */ + if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) && + (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) && + ifp != src && BDG_SAMECLUSTER(ifp, real_dst) ) + last = ifp ; + ifp = TAILQ_NEXT(ifp, if_link) ; + if (ifp == NULL) + once = 1 ; } - - /* cleanup any mbuf leftover. */ - if (m) - m_freem(m); - if (m == *m0) - *m0 = NULL ; - if (canfree && *m0) { - m_freem(*m0); - *m0 = NULL ; - } - return error ; + DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ; + if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; ) + return m0 ; }