]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/bridge.c
301c18c49fe684864c4bd6ff4f6cd56cc8cab746
[apple/xnu.git] / bsd / net / bridge.c
1 /*
2 * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
3 *
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
29 */
30 /*
31 * Copyright (c) 1998 Luigi Rizzo
32 *
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
35 * are met:
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
41 *
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 *
54 * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $
55 */
56
57 /*
58 * This code implements bridging in FreeBSD. It only acts on ethernet
59 * type of interfaces (others are still usable for routing).
60 * A bridging table holds the source MAC address/dest. interface for each
61 * known node. The table is indexed using an hash of the source address.
62 *
63 * Input packets are tapped near the beginning of ether_input(), and
64 * analysed by calling bridge_in(). Depending on the result, the packet
65 * can be forwarded to one or more output interfaces using bdg_forward(),
66 * and/or sent to the upper layer (e.g. in case of multicast).
67 *
68 * Output packets are intercepted near the end of ether_output(),
69 * the correct destination is selected calling bridge_dst_lookup(),
70 * and then forwarding is done using bdg_forward().
71 * Bridging is controlled by the sysctl variable net.link.ether.bridge
72 *
73 * The arp code is also modified to let a machine answer to requests
74 * irrespective of the port the request came from.
75 *
76 * In case of loops in the bridging topology, the bridge detects this
77 * event and temporarily mutes output bridging on one of the ports.
78 * Periodically, interfaces are unmuted by bdg_timeout().
79 * Muting is only implemented as a safety measure, and also as
80 * a mechanism to support a user-space implementation of the spanning
81 * tree algorithm. In the final release, unmuting will only occur
82 * because of explicit action of the user-level daemon.
83 *
84 * To build a bridging kernel, use the following option
85 * option BRIDGE
86 * and then at runtime set the sysctl variable to enable bridging.
87 *
88 * Only one interface is supposed to have addresses set (but
89 * there are no problems in practice if you set addresses for more
90 * than one interface).
91 * Bridging will act before routing, but nothing prevents a machine
92 * from doing both (modulo bugs in the implementation...).
93 *
94 * THINGS TO REMEMBER
95 * - bridging is incompatible with multicast routing on the same
96 * machine. There is not an easy fix to this.
97 * - loop detection is still not very robust.
98 * - the interface of bdg_forward() could be improved.
99 */
100
101 #include <sys/param.h>
102 #include <sys/mbuf.h>
103 #include <sys/malloc.h>
104 #include <sys/systm.h>
105 #include <sys/socket.h> /* for net/if.h */
106 #include <sys/kernel.h>
107 #include <sys/sysctl.h>
108
109 #include <net/if.h>
110 #include <net/if_types.h>
111
112 #include <netinet/in.h> /* for struct arpcom */
113 #include <netinet/in_systm.h>
114 #include <netinet/in_var.h>
115 #include <netinet/ip.h>
116 #include <netinet/if_ether.h> /* for struct arpcom */
117
118 #include "opt_ipfw.h"
119 #include "opt_ipdn.h"
120
121 #if defined(IPFIREWALL)
122 #include <net/route.h>
123 #include <netinet/ip_fw.h>
124 #if defined(DUMMYNET)
125 #include <netinet/ip_dummynet.h>
126 #endif
127 #endif
128
129 #include <net/bridge.h>
130
131 /*
132 * For debugging, you can use the following macros.
133 * remember, rdtsc() only works on Pentium-class machines
134
135 quad_t ticks;
136 DDB(ticks = rdtsc();)
137 ... interesting code ...
138 DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;)
139
140 *
141 */
142
143 #define DDB(x) x
144 #define DEB(x)
145
146 static void bdginit(void *);
147 static void bdgtakeifaces(void);
148 static void flush_table(void);
149 static void bdg_promisc_on(void);
150 static void parse_bdg_cfg(void);
151
152 static int bdg_ipfw = 0 ;
153 int do_bridge = 0;
154 bdg_hash_table *bdg_table = NULL ;
155
156 /*
157 * System initialization
158 */
159
160 SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL)
161
162 static struct bdg_stats bdg_stats ;
163 struct bdg_softc *ifp2sc = NULL ;
164 /* XXX make it static of size BDG_MAX_PORTS */
165
166 #define IFP_CHK(ifp, x) \
167 if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; }
168
169 /*
170 * turn off promisc mode, optionally clear the IFF_USED flag.
171 * The flag is turned on by parse_bdg_config
172 */
173 static void
174 bdg_promisc_off(int clear_used)
175 {
176 struct ifnet *ifp ;
177 ifnet_head_lock_shared();
178 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
179 if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
180 int s, ret ;
181 s = splimp();
182 ret = ifnet_set_promiscuous(ifp, 0);
183 splx(s);
184 ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ;
185 DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n",
186 ifp->if_name, ifp->if_unit,
187 ifp->if_flags, ifp2sc[ifp->if_index].flags);)
188 }
189 if (clear_used) {
190 ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ;
191 bdg_stats.s[ifp->if_index].name[0] = '\0';
192 }
193 }
194 ifnet_head_done();
195 }
196
197 /*
198 * set promisc mode on the interfaces we use.
199 */
200 static void
201 bdg_promisc_on()
202 {
203 struct ifnet *ifp ;
204 int s ;
205
206 ifnet_head_lock_shared();
207 TAILQ_FOREACH(ifp, &ifnet_head, if_link) {
208 if ( !BDG_USED(ifp) )
209 continue ;
210 if ( 0 == ( ifp->if_flags & IFF_UP) ) {
211 s = splimp();
212 if_up(ifp);
213 splx(s);
214 }
215 if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) {
216 int ret ;
217 s = splimp();
218 ret = ifnet_set_promiscuous(ifp, 1);
219 splx(s);
220 ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ;
221 printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n",
222 ifp->if_name, ifp->if_unit,
223 ifp->if_flags, ifp2sc[ifp->if_index].flags);
224 }
225 if (BDG_MUTED(ifp)) {
226 printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit);
227 BDG_UNMUTE(ifp) ;
228 }
229 }
230 ifnet_head_done();
231 }
232
233 static int
234 sysctl_bdg(SYSCTL_HANDLER_ARGS)
235 {
236 int error, oldval = do_bridge ;
237
238 error = sysctl_handle_int(oidp,
239 oidp->oid_arg1, oidp->oid_arg2, req);
240 DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n",
241 oidp->oid_name, oidp->oid_arg2,
242 oldval, do_bridge); )
243
244 if (bdg_table == NULL)
245 do_bridge = 0 ;
246 if (oldval != do_bridge) {
247 bdg_promisc_off( 1 ); /* reset previously used interfaces */
248 flush_table();
249 if (do_bridge) {
250 parse_bdg_cfg();
251 bdg_promisc_on();
252 }
253 }
254 return error ;
255 }
256
257 static char bridge_cfg[256] = { "" } ;
258
259 /*
260 * parse the config string, set IFF_USED, name and cluster_id
261 * for all interfaces found.
262 */
263 static void
264 parse_bdg_cfg()
265 {
266 char *p, *beg ;
267 int i, l, cluster;
268 struct bdg_softc *b;
269
270 for (p= bridge_cfg; *p ; p++) {
271 /* interface names begin with [a-z] and continue up to ':' */
272 if (*p < 'a' || *p > 'z')
273 continue ;
274 for ( beg = p ; *p && *p != ':' ; p++ )
275 ;
276 if (*p == 0) /* end of string, ':' not found */
277 return ;
278 l = p - beg ; /* length of name string */
279 p++ ;
280 DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);)
281 for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++)
282 cluster = cluster*10 + (*p -'0');
283 /*
284 * now search in bridge strings
285 */
286 for (i=0, b = ifp2sc ; i < if_index ; i++, b++) {
287 char buf[32];
288 struct ifnet *ifp = b->ifp ;
289
290 if (ifp == NULL)
291 continue;
292 sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit);
293 if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */
294 b->cluster_id = htons(cluster) ;
295 b->flags |= IFF_USED ;
296 sprintf(bdg_stats.s[ifp->if_index].name,
297 "%s%d:%d", ifp->if_name, ifp->if_unit, cluster);
298
299 DEB(printf("--++ found %s\n",
300 bdg_stats.s[ifp->if_index].name);)
301 break ;
302 }
303 }
304 if (*p == '\0')
305 break ;
306 }
307 }
308
309 static int
310 sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS)
311 {
312 int error = 0 ;
313 char oldval[256] ;
314
315 strcpy(oldval, bridge_cfg) ;
316
317 error = sysctl_handle_string(oidp,
318 bridge_cfg, oidp->oid_arg2, req);
319 DEB(
320 printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n",
321 oidp->oid_name, oidp->oid_arg2,
322 error,
323 oldval, bridge_cfg);
324 )
325 if (strcmp(oldval, bridge_cfg)) {
326 bdg_promisc_off( 1 ); /* reset previously-used interfaces */
327 flush_table();
328 parse_bdg_cfg(); /* and set new ones... */
329 if (do_bridge)
330 bdg_promisc_on(); /* re-enable interfaces */
331 }
332 return error ;
333 }
334
335 static int
336 sysctl_refresh(SYSCTL_HANDLER_ARGS)
337 {
338 if (req->newptr)
339 bdgtakeifaces();
340
341 return 0;
342 }
343
344
345 SYSCTL_DECL(_net_link_ether);
346 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW,
347 &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A",
348 "Bridge configuration");
349
350 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW,
351 &do_bridge, 0, &sysctl_bdg, "I", "Bridging");
352
353 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW,
354 &bdg_ipfw,0,"Pass bridged pkts through firewall");
355
356 #define SY(parent, var, comment) \
357 static int var ; \
358 SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment);
359
360 int bdg_ipfw_drops;
361 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop,
362 CTLFLAG_RW, &bdg_ipfw_drops,0,"");
363
364 int bdg_ipfw_colls;
365 SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions,
366 CTLFLAG_RW, &bdg_ipfw_colls,0,"");
367
368 SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR,
369 NULL, 0, &sysctl_refresh, "I", "iface refresh");
370
371 #if 1 /* diagnostic vars */
372
373 SY(_net_link_ether, verbose, "Be verbose");
374 SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward");
375
376 SY(_net_link_ether, bdg_thru, "Packets through bridge");
377
378 SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward");
379
380 SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward");
381 SY(_net_link_ether, bdg_predict, "Correctly predicted header location");
382
383 SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg");
384 SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item");
385 SY(_net_link_ether, bdg_fw_count, "Cycle counter count");
386 #endif
387
388 SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats,
389 CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics");
390
391 static int bdg_loops ;
392
393 /*
394 * completely flush the bridge table.
395 */
396 static void
397 flush_table()
398 {
399 int s,i;
400
401 if (bdg_table == NULL)
402 return ;
403 s = splimp();
404 for (i=0; i< HASH_SIZE; i++)
405 bdg_table[i].name= NULL; /* clear table */
406 splx(s);
407 }
408
409 /*
410 * called periodically to flush entries etc.
411 */
412 static void
413 bdg_timeout(void *dummy)
414 {
415 static int slowtimer = 0 ;
416
417 if (do_bridge) {
418 static int age_index = 0 ; /* index of table position to age */
419 int l = age_index + HASH_SIZE/4 ;
420 /*
421 * age entries in the forwarding table.
422 */
423 if (l > HASH_SIZE)
424 l = HASH_SIZE ;
425 for (; age_index < l ; age_index++)
426 if (bdg_table[age_index].used)
427 bdg_table[age_index].used = 0 ;
428 else if (bdg_table[age_index].name) {
429 /* printf("xx flushing stale entry %d\n", age_index); */
430 bdg_table[age_index].name = NULL ;
431 }
432 if (age_index >= HASH_SIZE)
433 age_index = 0 ;
434
435 if (--slowtimer <= 0 ) {
436 slowtimer = 5 ;
437
438 bdg_promisc_on() ; /* we just need unmute, really */
439 bdg_loops = 0 ;
440 }
441 }
442 timeout(bdg_timeout, (void *)0, 2*hz );
443 }
444
445 /*
446 * local MAC addresses are held in a small array. This makes comparisons
447 * much faster.
448 */
449 bdg_addr bdg_addresses[BDG_MAX_PORTS];
450 int bdg_ports ;
451
452 /*
453 * initialization of bridge code. This needs to be done after all
454 * interfaces have been configured.
455 */
456 static void
457 bdginit(void *dummy)
458 {
459
460 if (bdg_table == NULL)
461 bdg_table = (struct hash_table *)
462 _MALLOC(HASH_SIZE * sizeof(struct hash_table),
463 M_IFADDR, M_WAITOK);
464 flush_table();
465
466 ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc),
467 M_IFADDR, M_WAITOK );
468 bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) );
469
470 bzero(&bdg_stats, sizeof(bdg_stats) );
471 bdgtakeifaces();
472 bdg_timeout(0);
473 do_bridge=0;
474 }
475
476 void
477 bdgtakeifaces(void)
478 {
479 int i ;
480 struct ifnet *ifp;
481 bdg_addr *p = bdg_addresses ;
482 struct bdg_softc *bp;
483
484 bdg_ports = 0 ;
485 *bridge_cfg = '\0';
486
487 printf("BRIDGE 010131, have %d interfaces\n", if_index);
488 ifnet_head_lock_shared();
489 for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ;
490 i++, ifp = TAILQ_NEXT(ifp, if_link) )
491 if (ifp->if_type == IFT_ETHER) { /* ethernet ? */
492 ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN);
493 bp = &ifp2sc[ifp->if_index] ;
494 sprintf(bridge_cfg + strlen(bridge_cfg),
495 "%s%d:1,", ifp->if_name, ifp->if_unit);
496 printf("-- index %d %s type %d phy %d addrl %d addr %6D\n",
497 ifp->if_index,
498 bdg_stats.s[ifp->if_index].name,
499 (int)ifp->if_type, (int) ifp->if_physical,
500 (int)ifp->if_addrlen,
501 p->etheraddr, "." );
502 p++ ;
503 bp->ifp = ifp ;
504 bp->flags = IFF_USED ;
505 bp->cluster_id = htons(1) ;
506 bp->magic = 0xDEADBEEF ;
507
508 sprintf(bdg_stats.s[ifp->if_index].name,
509 "%s%d:%d", ifp->if_name, ifp->if_unit,
510 ntohs(bp->cluster_id));
511 bdg_ports ++ ;
512 }
513 ifnet_head_done();
514 }
515
516 /*
517 * bridge_in() is invoked to perform bridging decision on input packets.
518 *
519 * On Input:
520 * eh Ethernet header of the incoming packet.
521 *
522 * On Return: destination of packet, one of
523 * BDG_BCAST broadcast
524 * BDG_MCAST multicast
525 * BDG_LOCAL is only for a local address (do not forward)
526 * BDG_DROP drop the packet
527 * ifp ifp of the destination interface.
528 *
529 * Forwarding is not done directly to give a chance to some drivers
530 * to fetch more of the packet, or simply drop it completely.
531 */
532
533 struct ifnet *
534 bridge_in(struct ifnet *ifp, struct ether_header *eh)
535 {
536 int index;
537 struct ifnet *dst , *old ;
538 int dropit = BDG_MUTED(ifp) ;
539
540 /*
541 * hash the source address
542 */
543 index= HASH_FN(eh->ether_shost);
544 bdg_table[index].used = 1 ;
545 old = bdg_table[index].name ;
546 if ( old ) { /* the entry is valid. */
547 IFP_CHK(old, printf("bridge_in-- reading table\n") );
548
549 if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) {
550 bdg_ipfw_colls++ ;
551 bdg_table[index].name = NULL ;
552 } else if (old != ifp) {
553 /*
554 * found a loop. Either a machine has moved, or there
555 * is a misconfiguration/reconfiguration of the network.
556 * First, do not forward this packet!
557 * Record the relocation anyways; then, if loops persist,
558 * suspect a reconfiguration and disable forwarding
559 * from the old interface.
560 */
561 bdg_table[index].name = ifp ; /* relocate address */
562 printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n",
563 bdg_loops, eh->ether_shost, ".",
564 ifp->if_name, ifp->if_unit,
565 old->if_name, old->if_unit,
566 BDG_MUTED(old) ? "muted":"active");
567 dropit = 1 ;
568 if ( !BDG_MUTED(old) ) {
569 if (++bdg_loops > 10)
570 BDG_MUTE(old) ;
571 }
572 }
573 }
574
575 /*
576 * now write the source address into the table
577 */
578 if (bdg_table[index].name == NULL) {
579 DEB(printf("new addr %6D at %d for %s%d\n",
580 eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);)
581 bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6);
582 bdg_table[index].name = ifp ;
583 }
584 dst = bridge_dst_lookup(eh);
585 /* Return values:
586 * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp.
587 * For muted interfaces, the first 3 are changed in BDG_LOCAL,
588 * and others to BDG_DROP. Also, for incoming packets, ifp is changed
589 * to BDG_DROP in case ifp == src . These mods are not necessary
590 * for outgoing packets from ether_output().
591 */
592 BDG_STAT(ifp, BDG_IN);
593 switch ((int)dst) {
594 case (int)BDG_BCAST:
595 case (int)BDG_MCAST:
596 case (int)BDG_LOCAL:
597 case (int)BDG_UNKNOWN:
598 case (int)BDG_DROP:
599 BDG_STAT(ifp, dst);
600 break ;
601 default :
602 if (dst == ifp || dropit )
603 BDG_STAT(ifp, BDG_DROP);
604 else
605 BDG_STAT(ifp, BDG_FORWARD);
606 break ;
607 }
608
609 if ( dropit ) {
610 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL)
611 return BDG_LOCAL ;
612 else
613 return BDG_DROP ;
614 } else {
615 return (dst == ifp ? BDG_DROP : dst ) ;
616 }
617 }
618
619 /*
620 * Forward to dst, excluding src port and muted interfaces.
621 * If src == NULL, the pkt comes from ether_output, and dst is the real
622 * interface the packet is originally sent to. In this case we must forward
623 * it to the whole cluster. We never call bdg_forward ether_output on
624 * interfaces which are not part of a cluster.
625 *
626 * The packet is freed if possible (i.e. surely not of interest for
627 * the upper layer), otherwise a copy is left for use by the caller
628 * (pointer in m0).
629 *
630 * It would be more efficient to make bdg_forward() always consume
631 * the packet, leaving to the caller the task to check if it needs a copy
632 * and get one in case. As it is now, bdg_forward() can sometimes make
633 * a copy whereas it is not necessary.
634 *
635 * XXX be careful about eh, it can be a pointer into *m
636 */
637 struct mbuf *
638 bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst)
639 {
640 struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */
641 struct ifnet *ifp, *last = NULL ;
642 int s ;
643 int shared = bdg_copy ; /* someone else is using the mbuf */
644 int once = 0; /* loop only once */
645 struct ifnet *real_dst = dst ; /* real dst from ether_output */
646 #ifdef IPFIREWALL
647 struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */
648 #endif
649
650 /*
651 * XXX eh is usually a pointer within the mbuf (some ethernet drivers
652 * do that), so we better copy it before doing anything with the mbuf,
653 * or we might corrupt the header.
654 */
655 struct ether_header save_eh = *eh ;
656
657 #if defined(IPFIREWALL) && defined(DUMMYNET)
658 if (m0->m_type == MT_DUMMYNET) {
659 /* extract info from dummynet header */
660 rule = (struct ip_fw_chain *)(m0->m_data) ;
661 m0 = m0->m_next ;
662 src = m0->m_pkthdr.rcvif;
663 shared = 0 ; /* For sure this is our own mbuf. */
664 } else
665 #endif
666 bdg_thru++; /* only count once */
667
668 if (src == NULL) /* packet from ether_output */
669 dst = bridge_dst_lookup(eh);
670 if (dst == BDG_DROP) { /* this should not happen */
671 printf("xx bdg_forward for BDG_DROP\n");
672 m_freem(m0);
673 return NULL;
674 }
675 if (dst == BDG_LOCAL) { /* this should not happen as well */
676 printf("xx ouch, bdg_forward for local pkt\n");
677 return m0;
678 }
679 if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) {
680 ifp = ifnet_head.tqh_first ; /* scan all ports */
681 once = 0 ;
682 if (dst != BDG_UNKNOWN) /* need a copy for the local stack */
683 shared = 1 ;
684 } else {
685 ifp = dst ;
686 once = 1 ;
687 }
688 if ( (u_int)(ifp) <= (u_int)BDG_FORWARD )
689 panic("bdg_forward: bad dst");
690
691 #ifdef IPFIREWALL
692 /*
693 * Do filtering in a very similar way to what is done in ip_output.
694 * Only if firewall is loaded, enabled, and the packet is not
695 * from ether_output() (src==NULL, or we would filter it twice).
696 * Additional restrictions may apply e.g. non-IP, short packets,
697 * and pkts already gone through a pipe.
698 */
699 if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) {
700 struct ip *ip ;
701 int i;
702
703 if (rule != NULL) /* dummynet packet, already partially processed */
704 goto forward; /* HACK! I should obey the fw_one_pass */
705 if (ntohs(save_eh.ether_type) != ETHERTYPE_IP)
706 goto forward ; /* not an IP packet, ipfw is not appropriate */
707 if (m0->m_pkthdr.len < sizeof(struct ip) )
708 goto forward ; /* header too short for an IP pkt, cannot filter */
709 /*
710 * i need some amt of data to be contiguous, and in case others need
711 * the packet (shared==1) also better be in the first mbuf.
712 */
713 i = min(m0->m_pkthdr.len, max_protohdr) ;
714 if ( shared || m0->m_len < i) {
715 m0 = m_pullup(m0, i) ;
716 if (m0 == NULL) {
717 printf("-- bdg: pullup failed.\n") ;
718 return NULL ;
719 }
720 }
721
722 /*
723 * before calling the firewall, swap fields the same as IP does.
724 * here we assume the pkt is an IP one and the header is contiguous
725 */
726 ip = mtod(m0, struct ip *);
727 NTOHS(ip->ip_len);
728 NTOHS(ip->ip_off);
729
730 /*
731 * The third parameter to the firewall code is the dst. interface.
732 * Since we apply checks only on input pkts we use NULL.
733 * The firewall knows this is a bridged packet as the cookie ptr
734 * is NULL.
735 */
736 i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL);
737 if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */
738 return m0 ;
739 /*
740 * If we get here, the firewall has passed the pkt, but the mbuf
741 * pointer might have changed. Restore ip and the fields NTOHS()'d.
742 */
743 ip = mtod(m0, struct ip *);
744 HTONS(ip->ip_len);
745 HTONS(ip->ip_off);
746
747 if (i == 0) /* a PASS rule. */
748 goto forward ;
749 #ifdef DUMMYNET
750 if (i & IP_FW_PORT_DYNT_FLAG) {
751 /*
752 * Pass the pkt to dummynet, which consumes it.
753 * If shared, make a copy and keep the original.
754 * Need to prepend the ethernet header, optimize the common
755 * case of eh pointing already into the original mbuf.
756 */
757 struct mbuf *m ;
758 if (shared) {
759 m = m_copypacket(m0, M_DONTWAIT);
760 if (m == NULL) {
761 printf("bdg_fwd: copy(1) failed\n");
762 return m0;
763 }
764 } else {
765 m = m0 ; /* pass the original to dummynet */
766 m0 = NULL ; /* and nothing back to the caller */
767 }
768 if ( (void *)(eh + 1) == (void *)m->m_data) {
769 m->m_data -= ETHER_HDR_LEN ;
770 m->m_len += ETHER_HDR_LEN ;
771 m->m_pkthdr.len += ETHER_HDR_LEN ;
772 bdg_predict++;
773 } else {
774 M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
775 if (!m && verbose) printf("M_PREPEND failed\n");
776 if (m == NULL) /* nope... */
777 return m0 ;
778 bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
779 }
780 dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0);
781 return m0 ;
782 }
783 #endif
784 /*
785 * XXX add divert/forward actions...
786 */
787 /* if none of the above matches, we have to drop the pkt */
788 bdg_ipfw_drops++ ;
789 printf("bdg_forward: No rules match, so dropping packet!\n");
790 return m0 ;
791 }
792 forward:
793 #endif /* IPFIREWALL */
794 /*
795 * Again, bring up the headers in case of shared bufs to avoid
796 * corruptions in the future.
797 */
798 if ( shared ) {
799 int i = min(m0->m_pkthdr.len, max_protohdr) ;
800
801 m0 = m_pullup(m0, i) ;
802 if (m0 == NULL) {
803 printf("-- bdg: pullup2 failed.\n") ;
804 return NULL ;
805 }
806 }
807 /* now real_dst is used to determine the cluster where to forward */
808 if (src != NULL) /* pkt comes from ether_input */
809 real_dst = src ;
810 for (;;) {
811 if (last) { /* need to forward packet leftover from previous loop */
812 struct mbuf *m ;
813 if (shared == 0 && once ) { /* no need to copy */
814 m = m0 ;
815 m0 = NULL ; /* original is gone */
816 } else {
817 m = m_copypacket(m0, M_DONTWAIT);
818 if (m == NULL) {
819 printf("bdg_forward: sorry, m_copypacket failed!\n");
820 return m0 ; /* the original is still there... */
821 }
822 }
823 /*
824 * Add header (optimized for the common case of eh pointing
825 * already into the mbuf) and execute last part of ether_output:
826 * queue pkt and start output if interface not yet active.
827 */
828 if ( (void *)(eh + 1) == (void *)m->m_data) {
829 m->m_data -= ETHER_HDR_LEN ;
830 m->m_len += ETHER_HDR_LEN ;
831 m->m_pkthdr.len += ETHER_HDR_LEN ;
832 bdg_predict++;
833 } else {
834 M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT);
835 if (!m && verbose) printf("M_PREPEND failed\n");
836 if (m == NULL)
837 return m0;
838 bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN);
839 }
840 s = splimp();
841 if (IF_QFULL(&last->if_snd)) {
842 IF_DROP(&last->if_snd);
843 #if 0
844 BDG_MUTE(last); /* should I also mute ? */
845 #endif
846 splx(s);
847 m_freem(m); /* consume the pkt anyways */
848 } else {
849 last->if_obytes += m->m_pkthdr.len ;
850 if (m->m_flags & M_MCAST)
851 last->if_omcasts++;
852 if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */
853 bdg_split_pkts++;
854
855 IF_ENQUEUE(&last->if_snd, m);
856 if ((last->if_flags & IFF_OACTIVE) == 0)
857 (*last->if_start)(last);
858 splx(s);
859 }
860 BDG_STAT(last, BDG_OUT);
861 last = NULL ;
862 if (once)
863 break ;
864 }
865 if (ifp == NULL)
866 break ;
867 /*
868 * If the interface is used for bridging, not muted, not full,
869 * up and running, is not the source interface, and belongs to
870 * the same cluster as the 'real_dst', then send here.
871 */
872 if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) &&
873 (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) &&
874 ifp != src && BDG_SAMECLUSTER(ifp, real_dst) )
875 last = ifp ;
876 ifp = TAILQ_NEXT(ifp, if_link) ;
877 if (ifp == NULL)
878 once = 1 ;
879 }
880 DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;
881 if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; )
882 return m0 ;
883 }