2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 * Copyright (c) 1998 Luigi Rizzo
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions
28 * 1. Redistributions of source code must retain the above copyright
29 * notice, this list of conditions and the following disclaimer.
30 * 2. Redistributions in binary form must reproduce the above copyright
31 * notice, this list of conditions and the following disclaimer in the
32 * documentation and/or other materials provided with the distribution.
34 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
35 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
38 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
40 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
42 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
43 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
49 * This code implements bridging in FreeBSD. It only acts on ethernet
50 * type of interfaces (others are still usable for routing).
51 * A bridging table holds the source MAC address/dest. interface for each
52 * known node. The table is indexed using an hash of the source address.
54 * Input packets are tapped near the end of the input routine in each
55 * driver (near the call to bpf_mtap, or before the call to ether_input)
56 * and analysed calling bridge_in(). Depending on the result, the packet
57 * can be forwarded to one or more output interfaces using bdg_forward(),
58 * and/or sent to the upper layer (e.g. in case of multicast).
60 * Output packets are intercepted near the end of ether_output(),
61 * the correct destination is selected calling bdg_dst_lookup(),
62 * and then forwarding is done using bdg_forward().
63 * Bridging is controlled by the sysctl variable net.link.ether.bridge
65 * The arp code is also modified to let a machine answer to requests
66 * irrespective of the port the request came from.
68 * In case of loops in the bridging topology, the bridge detects this
69 * event and temporarily mutes output bridging on one of the ports.
70 * Periodically, interfaces are unmuted by bdg_timeout(). (For the
71 * mute flag i am temporarily using IFF_LINK2 but this has to
72 * change.) Muting is only implemented as a safety measure, and also as
73 * a mechanism to support a user-space implementation of the spanning
74 * tree algorithm. In the final release, unmuting will only occur
75 * because of explicit action of the user-level daemon.
77 * To build a bridging kernel, use the following option
79 * and then at runtime set the sysctl variable to enable bridging.
81 * Only one interface is supposed to have addresses set (but
82 * there are no problems in practice if you set addresses for more
83 * than one interface).
84 * Bridging will act before routing, but nothing prevents a machine
85 * from doing both (modulo bugs in the implementation...).
88 * - bridging requires some (small) modifications to the interface
89 * driver. Currently (980911) the "ed", "de", "tx", "lnc" drivers
90 * have been modified and tested. "fxp", "ep", "fe" have been
91 * modified but not tested. See the "ed" and "de" drivers as
92 * examples on how to operate.
93 * - bridging is incompatible with multicast routing on the same
94 * machine. There is not an easy fix to this.
95 * - loop detection is still not very robust.
96 * - the interface of bdg_forward() could be improved.
99 #include <sys/param.h>
100 #include <sys/mbuf.h>
101 #include <sys/malloc.h>
102 #include <sys/systm.h>
103 #include <sys/socket.h> /* for net/if.h */
104 #include <sys/kernel.h>
105 #include <sys/sysctl.h>
108 #include <net/if_types.h>
110 #include <netinet/in.h> /* for struct arpcom */
111 #include <netinet/in_systm.h>
112 #include <netinet/in_var.h>
113 #include <netinet/ip.h>
114 #include <netinet/if_ether.h> /* for struct arpcom */
116 #include "opt_ipfw.h"
117 #include "opt_ipdn.h"
119 #if defined(IPFIREWALL) && defined(DUMMYNET)
120 #include <net/route.h>
121 #include <netinet/ip_fw.h>
122 #include <netinet/ip_dummynet.h>
125 #include <net/bridge.h>
128 * For debugging, you can use the following macros.
129 * remember, rdtsc() only works on Pentium-class machines
132 DDB(ticks = rdtsc();)
133 ... interesting code ...
134 DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;)
143 * System initialization
146 static void bdginit(void *);
147 static void flush_table(void);
149 SYSINIT(interfaces
, SI_SUB_PROTO_IF
, SI_ORDER_FIRST
, bdginit
, NULL
)
151 static int bdg_ipfw
= 0 ;
153 bdg_hash_table
*bdg_table
= NULL
;
156 * we need additional info for the bridge. The bdg_ifp2sc[] array
157 * provides a pointer to this struct using the if_index.
158 * bdg_softc has a backpointer to the struct ifnet, the bridge
159 * flags, and a group (bridging occurs only between port of the
164 /* ((struct arpcom *)ifp)->ac_enaddr is the eth. addr */
169 static struct bdg_softc
**ifp2sc
= NULL
;
171 #if 0 /* new code using ifp2sc */
172 #define SAMEGROUP(ifp,src) (src == NULL || \
173 ifp2sc[ifp->if_index]->group == ifp2sc[src->if_index]->group )
174 #define MUTED(ifp) (ifp2sc[ifp->if_index]->flags & IFF_MUTE)
175 #define MUTE(ifp) ifp2sc[ifp->if_index]->flags |= IFF_MUTE
176 #define UNMUTE(ifp) ifp2sc[ifp->if_index]->flags &= ~IFF_MUTE
178 #define SAMEGROUP(a,b) 1
179 #define MUTED(ifp) (ifp->if_flags & IFF_MUTE)
180 #define MUTE(ifp) ifp->if_flags |= IFF_MUTE
181 #define UNMUTE(ifp) ifp->if_flags &= ~IFF_MUTE
185 sysctl_bdg SYSCTL_HANDLER_ARGS
187 int error
, oldval
= do_bridge
;
189 error
= sysctl_handle_int(oidp
,
190 oidp
->oid_arg1
, oidp
->oid_arg2
, req
);
191 printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
192 oidp
->oid_name
, oidp
->oid_arg2
,
194 if (bdg_table
== NULL
)
196 if (oldval
!= do_bridge
) {
202 SYSCTL_DECL(_net_link_ether
);
203 SYSCTL_PROC(_net_link_ether
, OID_AUTO
, bridge
, CTLTYPE_INT
|CTLFLAG_RW
,
204 &do_bridge
, 0, &sysctl_bdg
, "I", "Bridging");
206 SYSCTL_INT(_net_link_ether
, OID_AUTO
, bridge_ipfw
, CTLFLAG_RW
, &bdg_ipfw
,0,"");
207 #if 1 /* diagnostic vars */
208 int bdg_in_count
= 0 , bdg_in_ticks
= 0 , bdg_fw_count
= 0, bdg_fw_ticks
= 0 ;
209 SYSCTL_INT(_net_link_ether
, OID_AUTO
, bdginc
, CTLFLAG_RW
, &bdg_in_count
,0,"");
210 SYSCTL_INT(_net_link_ether
, OID_AUTO
, bdgint
, CTLFLAG_RW
, &bdg_in_ticks
,0,"");
211 SYSCTL_INT(_net_link_ether
, OID_AUTO
, bdgfwc
, CTLFLAG_RW
, &bdg_fw_count
,0,"");
212 SYSCTL_INT(_net_link_ether
, OID_AUTO
, bdgfwt
, CTLFLAG_RW
, &bdg_fw_ticks
,0,"");
214 static struct bdg_stats bdg_stats
;
215 SYSCTL_STRUCT(_net_link_ether
, PF_BDG
, bdgstats
,
216 CTLFLAG_RD
, &bdg_stats
, bdg_stats
, "bridge statistics");
218 static int bdg_loops
;
221 * completely flush the bridge table.
228 if (bdg_table
== NULL
)
231 for (i
=0; i
< HASH_SIZE
; i
++)
232 bdg_table
[i
].name
= NULL
; /* clear table */
236 /* wrapper for funnel */
238 bdg_timeout_funneled(void * dummy
)
240 boolean_t funnel_state
;
242 funnel_state
= thread_funnel_set(network_flock
, TRUE
);
244 funnel_state
= thread_funnel_set(network_flock
, FALSE
);
248 * called periodically to flush entries etc.
251 bdg_timeout(void *dummy
)
255 static int slowtimer
= 0 ;
256 boolean_t funnel_state
;
260 static int age_index
= 0 ; /* index of table position to age */
261 int l
= age_index
+ HASH_SIZE
/4 ;
263 * age entries in the forwarding table.
267 for (; age_index
< l
; age_index
++)
268 if (bdg_table
[age_index
].used
)
269 bdg_table
[age_index
].used
= 0 ;
270 else if (bdg_table
[age_index
].name
) {
271 /* printf("xx flushing stale entry %d\n", age_index); */
272 bdg_table
[age_index
].name
= NULL
;
274 if (age_index
>= HASH_SIZE
)
277 if (--slowtimer
<= 0 ) {
280 for (ifp
= ifnet
.tqh_first
; ifp
; ifp
= ifp
->if_link
.tqe_next
) {
281 if (ifp
->if_type
!= IFT_ETHER
)
283 if ( 0 == ( ifp
->if_flags
& IFF_UP
) ) {
288 if ( 0 == ( ifp
->if_flags
& IFF_PROMISC
) ) {
291 ret
= ifpromisc(ifp
, 1);
293 printf(">> now %s%d flags 0x%x promisc %d\n",
294 ifp
->if_name
, ifp
->if_unit
,
298 printf(">> unmuting %s%d\n", ifp
->if_name
, ifp
->if_unit
);
305 timeout(bdg_timeout_funneled
, (void *)0, 2*hz
);
310 * local MAC addresses are held in a small array. This makes comparisons
313 unsigned char bdg_addresses
[6*BDG_MAX_PORTS
];
317 * initialization of bridge code.
328 * initialization of bridge code
330 if (bdg_table
== NULL
)
331 bdg_table
= (struct hash_table
*)
332 _MALLOC(HASH_SIZE
* sizeof(struct hash_table
),
336 ifp2sc
= _MALLOC(if_index
* sizeof(struct bdg_softc
*), M_IFADDR
, M_WAITOK
);
337 bzero(ifp2sc
, if_index
* sizeof(struct bdg_softc
*) );
339 bzero(&bdg_stats
, sizeof(bdg_stats
) );
341 eth_addr
= bdg_addresses
;
343 printf("BRIDGE 981214, have %d interfaces\n", if_index
);
344 for (i
= 0 , ifp
= ifnet
.tqh_first
; i
< if_index
;
345 i
++, ifp
= ifp
->if_link
.tqe_next
)
346 if (ifp
->if_type
== IFT_ETHER
) { /* ethernet ? */
347 ac
= (struct arpcom
*)ifp
;
348 sprintf(bdg_stats
.s
[ifp
->if_index
].name
,
349 "%s%d", ifp
->if_name
, ifp
->if_unit
);
350 printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
352 bdg_stats
.s
[ifp
->if_index
].name
,
353 (int)ifp
->if_type
, (int) ifp
->if_physical
,
354 (int)ifp
->if_addrlen
,
355 ac
->ac_enaddr
, "." );
356 bcopy(ac
->ac_enaddr
, eth_addr
, 6);
359 ifp2sc
[bdg_ports
] = _MALLOC(sizeof(struct bdg_softc
),
360 M_IFADDR
, M_WAITOK
);
361 ifp2sc
[bdg_ports
]->ifp
= ifp
;
362 ifp2sc
[bdg_ports
]->flags
= 0 ;
363 ifp2sc
[bdg_ports
]->group
= 0 ;
371 * bridge_in() is invoked to perform bridging decision on input packets.
373 * m packet to be bridged. The mbuf need not to hold the
374 * whole packet, only the first 14 bytes suffice. We
375 * assume them to be contiguous. No alignment assumptions
376 * because they are not a problem on i386 class machines.
378 * On Return: destination of packet, one of
379 * BDG_BCAST broadcast
380 * BDG_MCAST multicast
381 * BDG_LOCAL is only for a local address (do not forward)
382 * BDG_DROP drop the packet
383 * ifp ifp of the destination interface.
385 * Forwarding is not done directly to give a chance to some drivers
386 * to fetch more of the packet, or simply drop it completely.
391 bridge_in(struct mbuf
*m
)
394 struct ifnet
*ifp
= m
->m_pkthdr
.rcvif
, *dst
, *old
;
395 int dropit
= MUTED(ifp
) ;
396 struct ether_header
*eh
;
398 eh
= mtod(m
, struct ether_header
*);
401 * hash the source address
403 index
= HASH_FN(eh
->ether_shost
);
404 bdg_table
[index
].used
= 1 ;
405 old
= bdg_table
[index
].name
;
406 if ( old
) { /* the entry is valid. */
407 if (!BDG_MATCH( eh
->ether_shost
, bdg_table
[index
].etheraddr
) ) {
408 printf("collision at %d\n", index
);
409 bdg_table
[index
].name
= NULL
;
410 } else if (old
!= ifp
) {
412 * found a loop. Either a machine has moved, or there
413 * is a misconfiguration/reconfiguration of the network.
414 * First, do not forward this packet!
415 * Record the relocation anyways; then, if loops persist,
416 * suspect a reconfiguration and disable forwarding
417 * from the old interface.
419 bdg_table
[index
].name
= ifp
; /* relocate address */
420 printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
421 bdg_loops
, eh
->ether_shost
, ".",
422 ifp
->if_name
, ifp
->if_unit
,
423 old
->if_name
, old
->if_unit
,
424 old
->if_flags
& IFF_MUTE
? "muted":"ignore");
427 if (++bdg_loops
> 10)
434 * now write the source address into the table
436 if (bdg_table
[index
].name
== NULL
) {
437 DEB(printf("new addr %6D at %d for %s%d\n",
438 eh
->ether_shost
, ".", index
, ifp
->if_name
, ifp
->if_unit
);)
439 bcopy(eh
->ether_shost
, bdg_table
[index
].etheraddr
, 6);
440 bdg_table
[index
].name
= ifp
;
442 dst
= bridge_dst_lookup(m
);
444 * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
445 * For muted interfaces, the first 3 are changed in BDG_LOCAL,
446 * and others to BDG_DROP. Also, for incoming packets, ifp is changed
447 * to BDG_DROP in case ifp == src . These mods are not necessary
448 * for outgoing packets from ether_output().
450 BDG_STAT(ifp
, BDG_IN
);
455 case (int)BDG_UNKNOWN
:
460 if (dst
== ifp
|| dropit
)
461 BDG_STAT(ifp
, BDG_DROP
);
463 BDG_STAT(ifp
, BDG_FORWARD
);
468 if (dst
== BDG_BCAST
|| dst
== BDG_MCAST
|| dst
== BDG_LOCAL
)
473 return (dst
== ifp
? BDG_DROP
: dst
) ;
478 * Forward to dst, excluding src port and (if not a single interface)
479 * muted interfaces. The packet is freed if marked as such
480 * and not for a local destination.
481 * A cleaner implementation would be to make bdg_forward()
482 * always consume the packet, leaving to the caller the task
483 * to make a copy if it needs it. As it is now, bdg_forward()
484 * can keep a copy alive in some cases.
487 bdg_forward (struct mbuf
**m0
, struct ifnet
*dst
)
489 struct ifnet
*src
= (*m0
)->m_pkthdr
.rcvif
; /* could be NULL in output */
493 int once
= 0; /* execute the loop only once */
494 int canfree
= 1 ; /* can free the buf at the end */
497 struct ether_header
*eh
= mtod(*m0
, struct ether_header
*); /* XXX */
499 if (dst
== BDG_DROP
) { /* this should not happen */
500 printf("xx bdg_forward for BDG_DROP)\n");
505 if (dst
== BDG_LOCAL
) { /* this should not happen as well */
506 printf("xx ouch, bdg_forward for local pkt\n");
509 if (dst
== BDG_BCAST
|| dst
== BDG_MCAST
|| dst
== BDG_UNKNOWN
) {
510 ifp
= ifnet
.tqh_first
;
512 if (dst
!= BDG_UNKNOWN
)
516 once
= 1 ; /* and also canfree */
520 * do filtering in a very similar way to what is done
521 * in ip_output. Only for IP packets, and only pass/fail/dummynet
522 * is supported. The tricky thing is to make sure that enough of
523 * the packet (basically, Eth+IP+TCP/UDP headers) is contiguous
524 * so that calls to m_pullup in ip_fw_chk will not kill the
529 struct ip_fw_chain
*rule
;
533 if (m
->m_type
== MT_DUMMYNET
) {
535 * the packet was already tagged, so part of the
536 * processing was already done, and we need to go down.
538 rule
= (struct ip_fw_chain
*)(m
->m_data
) ;
539 (*m0
) = m
= m
->m_next
;
541 src
= m
->m_pkthdr
.rcvif
; /* could be NULL in output */
542 eh
= mtod(m
, struct ether_header
*); /* XXX */
543 canfree
= 1 ; /* for sure, a copy is not needed later. */
544 goto forward
; /* HACK! */
550 goto forward
; /* do not apply to packets from ether_output */
551 if (canfree
== 0 ) /* need to make a copy */
552 m
= m_copypacket(*m0
, M_DONTWAIT
);
560 * before calling the firewall, swap fields the same as IP does.
561 * here we assume the pkt is an IP one and the header is contiguous
563 eh
= mtod(m
, struct ether_header
*);
564 ip
= (struct ip
*)(eh
+ 1 ) ;
570 * The third parameter to the firewall code is the dst. interface.
571 * Since we apply checks only on input pkts we use NULL.
573 off
= (*ip_fw_chk_ptr
)(NULL
, 0, NULL
, &dummy
, &m
, &rule
, NULL
) ;
574 if (m
== NULL
) { /* pkt discarded by firewall */
580 * on return, the mbuf pointer might have changed. Restore
581 * *m0 (if it was the same as m), eh, ip and then
582 * restore original ordering.
584 eh
= mtod(m
, struct ether_header
*);
585 ip
= (struct ip
*)(eh
+ 1 ) ;
586 if (canfree
) /* m was a reference to *m0, so update *m0 */
599 * pass the pkt to dummynet. Need to include m, dst, rule.
600 * Dummynet consumes the packet in all cases.
602 dummynet_io((off
& 0xffff), DN_TO_BDG_FWD
, m
, dst
, NULL
, 0, rule
);
603 if (canfree
) /* dummynet has consumed the original one */
608 /* if none of the above matches, we have to drop the pkt */
611 if (canfree
&& m
!= *m0
) {
618 #endif /* COMPAT_IPFW */
624 for ( ; ifp
; ifp
= ifp
->if_link
.tqe_next
) {
625 if (ifp
!= src
&& ifp
->if_type
== IFT_ETHER
&&
626 (ifp
->if_flags
& (IFF_UP
|IFF_RUNNING
)) == (IFF_UP
|IFF_RUNNING
) &&
627 SAMEGROUP(ifp
, src
) && !MUTED(ifp
) ) {
628 if (m
== NULL
) { /* do i need to make a copy ? */
629 if (canfree
&& ifp
->if_link
.tqe_next
== NULL
) /* last one! */
631 else /* on a P5-90, m_packetcopy takes 540 ticks */
632 m
= m_copypacket(*m0
, M_DONTWAIT
);
634 printf("bdg_forward: sorry, m_copy failed!\n");
639 * execute last part of ether_output.
643 * Queue message on interface, and start output if interface
646 if (IF_QFULL(&ifp
->if_snd
)) {
647 IF_DROP(&ifp
->if_snd
);
648 MUTE(ifp
); /* good measure... */
652 ifp
->if_obytes
+= m
->m_pkthdr
.len
;
653 if (m
->m_flags
& M_MCAST
)
655 IF_ENQUEUE(&ifp
->if_snd
, m
);
656 if ((ifp
->if_flags
& IFF_OACTIVE
) == 0)
657 (*ifp
->if_start
)(ifp
);
660 *m0
= NULL
; /* the packet is gone... */
663 BDG_STAT(ifp
, BDG_OUT
);
669 /* cleanup any mbuf leftover. */
674 if (canfree
&& *m0
) {