| 1 | /* |
| 2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. |
| 3 | * |
| 4 | * @APPLE_LICENSE_HEADER_START@ |
| 5 | * |
| 6 | * The contents of this file constitute Original Code as defined in and |
| 7 | * are subject to the Apple Public Source License Version 1.1 (the |
| 8 | * "License"). You may not use this file except in compliance with the |
| 9 | * License. Please obtain a copy of the License at |
| 10 | * http://www.apple.com/publicsource and read it before using this file. |
| 11 | * |
| 12 | * This Original Code and all software distributed under the License are |
| 13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
| 14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
| 15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, |
| 16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
| 17 | * License for the specific language governing rights and limitations |
| 18 | * under the License. |
| 19 | * |
| 20 | * @APPLE_LICENSE_HEADER_END@ |
| 21 | */ |
| 22 | /* |
| 23 | * Copyright (c) 1998 Luigi Rizzo |
| 24 | * |
| 25 | * Redistribution and use in source and binary forms, with or without |
| 26 | * modification, are permitted provided that the following conditions |
| 27 | * are met: |
| 28 | * 1. Redistributions of source code must retain the above copyright |
| 29 | * notice, this list of conditions and the following disclaimer. |
| 30 | * 2. Redistributions in binary form must reproduce the above copyright |
| 31 | * notice, this list of conditions and the following disclaimer in the |
| 32 | * documentation and/or other materials provided with the distribution. |
| 33 | * |
| 34 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND |
| 35 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 36 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 37 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 38 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 39 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 40 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 41 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 42 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 43 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 44 | * SUCH DAMAGE. |
| 45 | * |
| 46 | * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $ |
| 47 | */ |
| 48 | |
| 49 | /* |
| 50 | * This code implements bridging in FreeBSD. It only acts on ethernet |
| 51 | * type of interfaces (others are still usable for routing). |
| 52 | * A bridging table holds the source MAC address/dest. interface for each |
| 53 | * known node. The table is indexed using an hash of the source address. |
| 54 | * |
| 55 | * Input packets are tapped near the beginning of ether_input(), and |
| 56 | * analysed by calling bridge_in(). Depending on the result, the packet |
| 57 | * can be forwarded to one or more output interfaces using bdg_forward(), |
| 58 | * and/or sent to the upper layer (e.g. in case of multicast). |
| 59 | * |
| 60 | * Output packets are intercepted near the end of ether_output(), |
| 61 | * the correct destination is selected calling bridge_dst_lookup(), |
| 62 | * and then forwarding is done using bdg_forward(). |
| 63 | * Bridging is controlled by the sysctl variable net.link.ether.bridge |
| 64 | * |
| 65 | * The arp code is also modified to let a machine answer to requests |
| 66 | * irrespective of the port the request came from. |
| 67 | * |
| 68 | * In case of loops in the bridging topology, the bridge detects this |
| 69 | * event and temporarily mutes output bridging on one of the ports. |
| 70 | * Periodically, interfaces are unmuted by bdg_timeout(). |
| 71 | * Muting is only implemented as a safety measure, and also as |
| 72 | * a mechanism to support a user-space implementation of the spanning |
| 73 | * tree algorithm. In the final release, unmuting will only occur |
| 74 | * because of explicit action of the user-level daemon. |
| 75 | * |
| 76 | * To build a bridging kernel, use the following option |
| 77 | * option BRIDGE |
| 78 | * and then at runtime set the sysctl variable to enable bridging. |
| 79 | * |
| 80 | * Only one interface is supposed to have addresses set (but |
| 81 | * there are no problems in practice if you set addresses for more |
| 82 | * than one interface). |
| 83 | * Bridging will act before routing, but nothing prevents a machine |
| 84 | * from doing both (modulo bugs in the implementation...). |
| 85 | * |
| 86 | * THINGS TO REMEMBER |
| 87 | * - bridging is incompatible with multicast routing on the same |
| 88 | * machine. There is not an easy fix to this. |
| 89 | * - loop detection is still not very robust. |
| 90 | * - the interface of bdg_forward() could be improved. |
| 91 | */ |
| 92 | |
| 93 | #include <sys/param.h> |
| 94 | #include <sys/mbuf.h> |
| 95 | #include <sys/malloc.h> |
| 96 | #include <sys/systm.h> |
| 97 | #include <sys/socket.h> /* for net/if.h */ |
| 98 | #include <sys/kernel.h> |
| 99 | #include <sys/sysctl.h> |
| 100 | |
| 101 | #include <net/if.h> |
| 102 | #include <net/if_types.h> |
| 103 | |
| 104 | #include <netinet/in.h> /* for struct arpcom */ |
| 105 | #include <netinet/in_systm.h> |
| 106 | #include <netinet/in_var.h> |
| 107 | #include <netinet/ip.h> |
| 108 | #include <netinet/if_ether.h> /* for struct arpcom */ |
| 109 | |
| 110 | #include "opt_ipfw.h" |
| 111 | #include "opt_ipdn.h" |
| 112 | |
| 113 | #if defined(IPFIREWALL) |
| 114 | #include <net/route.h> |
| 115 | #include <netinet/ip_fw.h> |
| 116 | #if defined(DUMMYNET) |
| 117 | #include <netinet/ip_dummynet.h> |
| 118 | #endif |
| 119 | #endif |
| 120 | |
| 121 | #include <net/bridge.h> |
| 122 | |
| 123 | /* |
| 124 | * For debugging, you can use the following macros. |
| 125 | * remember, rdtsc() only works on Pentium-class machines |
| 126 | |
| 127 | quad_t ticks; |
| 128 | DDB(ticks = rdtsc();) |
| 129 | ... interesting code ... |
| 130 | DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;) |
| 131 | |
| 132 | * |
| 133 | */ |
| 134 | |
| 135 | #define DDB(x) x |
| 136 | #define DEB(x) |
| 137 | |
| 138 | static void bdginit(void *); |
| 139 | static void bdgtakeifaces(void); |
| 140 | static void flush_table(void); |
| 141 | static void bdg_promisc_on(void); |
| 142 | static void parse_bdg_cfg(void); |
| 143 | |
| 144 | static int bdg_ipfw = 0 ; |
| 145 | int do_bridge = 0; |
| 146 | bdg_hash_table *bdg_table = NULL ; |
| 147 | |
| 148 | /* |
| 149 | * System initialization |
| 150 | */ |
| 151 | |
| 152 | SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL) |
| 153 | |
| 154 | static struct bdg_stats bdg_stats ; |
| 155 | struct bdg_softc *ifp2sc = NULL ; |
| 156 | /* XXX make it static of size BDG_MAX_PORTS */ |
| 157 | |
| 158 | #define IFP_CHK(ifp, x) \ |
| 159 | if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; } |
| 160 | |
| 161 | /* |
| 162 | * turn off promisc mode, optionally clear the IFF_USED flag. |
| 163 | * The flag is turned on by parse_bdg_config |
| 164 | */ |
| 165 | static void |
| 166 | bdg_promisc_off(int clear_used) |
| 167 | { |
| 168 | struct ifnet *ifp ; |
| 169 | ifnet_head_lock_shared(); |
| 170 | TAILQ_FOREACH(ifp, &ifnet_head, if_link) { |
| 171 | if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { |
| 172 | int s, ret ; |
| 173 | s = splimp(); |
| 174 | ret = ifnet_set_promiscuous(ifp, 0); |
| 175 | splx(s); |
| 176 | ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ; |
| 177 | DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n", |
| 178 | ifp->if_name, ifp->if_unit, |
| 179 | ifp->if_flags, ifp2sc[ifp->if_index].flags);) |
| 180 | } |
| 181 | if (clear_used) { |
| 182 | ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ; |
| 183 | bdg_stats.s[ifp->if_index].name[0] = '\0'; |
| 184 | } |
| 185 | } |
| 186 | ifnet_head_done(); |
| 187 | } |
| 188 | |
| 189 | /* |
| 190 | * set promisc mode on the interfaces we use. |
| 191 | */ |
| 192 | static void |
| 193 | bdg_promisc_on() |
| 194 | { |
| 195 | struct ifnet *ifp ; |
| 196 | int s ; |
| 197 | |
| 198 | ifnet_head_lock_shared(); |
| 199 | TAILQ_FOREACH(ifp, &ifnet_head, if_link) { |
| 200 | if ( !BDG_USED(ifp) ) |
| 201 | continue ; |
| 202 | if ( 0 == ( ifp->if_flags & IFF_UP) ) { |
| 203 | s = splimp(); |
| 204 | if_up(ifp); |
| 205 | splx(s); |
| 206 | } |
| 207 | if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { |
| 208 | int ret ; |
| 209 | s = splimp(); |
| 210 | ret = ifnet_set_promiscuous(ifp, 1); |
| 211 | splx(s); |
| 212 | ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ; |
| 213 | printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", |
| 214 | ifp->if_name, ifp->if_unit, |
| 215 | ifp->if_flags, ifp2sc[ifp->if_index].flags); |
| 216 | } |
| 217 | if (BDG_MUTED(ifp)) { |
| 218 | printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); |
| 219 | BDG_UNMUTE(ifp) ; |
| 220 | } |
| 221 | } |
| 222 | ifnet_head_done(); |
| 223 | } |
| 224 | |
| 225 | static int |
| 226 | sysctl_bdg(SYSCTL_HANDLER_ARGS) |
| 227 | { |
| 228 | int error, oldval = do_bridge ; |
| 229 | |
| 230 | error = sysctl_handle_int(oidp, |
| 231 | oidp->oid_arg1, oidp->oid_arg2, req); |
| 232 | DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n", |
| 233 | oidp->oid_name, oidp->oid_arg2, |
| 234 | oldval, do_bridge); ) |
| 235 | |
| 236 | if (bdg_table == NULL) |
| 237 | do_bridge = 0 ; |
| 238 | if (oldval != do_bridge) { |
| 239 | bdg_promisc_off( 1 ); /* reset previously used interfaces */ |
| 240 | flush_table(); |
| 241 | if (do_bridge) { |
| 242 | parse_bdg_cfg(); |
| 243 | bdg_promisc_on(); |
| 244 | } |
| 245 | } |
| 246 | return error ; |
| 247 | } |
| 248 | |
| 249 | static char bridge_cfg[256] = { "" } ; |
| 250 | |
| 251 | /* |
| 252 | * parse the config string, set IFF_USED, name and cluster_id |
| 253 | * for all interfaces found. |
| 254 | */ |
| 255 | static void |
| 256 | parse_bdg_cfg() |
| 257 | { |
| 258 | char *p, *beg ; |
| 259 | int i, l, cluster; |
| 260 | struct bdg_softc *b; |
| 261 | |
| 262 | for (p= bridge_cfg; *p ; p++) { |
| 263 | /* interface names begin with [a-z] and continue up to ':' */ |
| 264 | if (*p < 'a' || *p > 'z') |
| 265 | continue ; |
| 266 | for ( beg = p ; *p && *p != ':' ; p++ ) |
| 267 | ; |
| 268 | if (*p == 0) /* end of string, ':' not found */ |
| 269 | return ; |
| 270 | l = p - beg ; /* length of name string */ |
| 271 | p++ ; |
| 272 | DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);) |
| 273 | for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++) |
| 274 | cluster = cluster*10 + (*p -'0'); |
| 275 | /* |
| 276 | * now search in bridge strings |
| 277 | */ |
| 278 | for (i=0, b = ifp2sc ; i < if_index ; i++, b++) { |
| 279 | char buf[32]; |
| 280 | struct ifnet *ifp = b->ifp ; |
| 281 | |
| 282 | if (ifp == NULL) |
| 283 | continue; |
| 284 | sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit); |
| 285 | if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */ |
| 286 | b->cluster_id = htons(cluster) ; |
| 287 | b->flags |= IFF_USED ; |
| 288 | sprintf(bdg_stats.s[ifp->if_index].name, |
| 289 | "%s%d:%d", ifp->if_name, ifp->if_unit, cluster); |
| 290 | |
| 291 | DEB(printf("--++ found %s\n", |
| 292 | bdg_stats.s[ifp->if_index].name);) |
| 293 | break ; |
| 294 | } |
| 295 | } |
| 296 | if (*p == '\0') |
| 297 | break ; |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | static int |
| 302 | sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS) |
| 303 | { |
| 304 | int error = 0 ; |
| 305 | char oldval[256] ; |
| 306 | |
| 307 | strcpy(oldval, bridge_cfg) ; |
| 308 | |
| 309 | error = sysctl_handle_string(oidp, |
| 310 | bridge_cfg, oidp->oid_arg2, req); |
| 311 | DEB( |
| 312 | printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n", |
| 313 | oidp->oid_name, oidp->oid_arg2, |
| 314 | error, |
| 315 | oldval, bridge_cfg); |
| 316 | ) |
| 317 | if (strcmp(oldval, bridge_cfg)) { |
| 318 | bdg_promisc_off( 1 ); /* reset previously-used interfaces */ |
| 319 | flush_table(); |
| 320 | parse_bdg_cfg(); /* and set new ones... */ |
| 321 | if (do_bridge) |
| 322 | bdg_promisc_on(); /* re-enable interfaces */ |
| 323 | } |
| 324 | return error ; |
| 325 | } |
| 326 | |
| 327 | static int |
| 328 | sysctl_refresh(SYSCTL_HANDLER_ARGS) |
| 329 | { |
| 330 | if (req->newptr) |
| 331 | bdgtakeifaces(); |
| 332 | |
| 333 | return 0; |
| 334 | } |
| 335 | |
| 336 | |
| 337 | SYSCTL_DECL(_net_link_ether); |
| 338 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW, |
| 339 | &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A", |
| 340 | "Bridge configuration"); |
| 341 | |
| 342 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW, |
| 343 | &do_bridge, 0, &sysctl_bdg, "I", "Bridging"); |
| 344 | |
| 345 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, |
| 346 | &bdg_ipfw,0,"Pass bridged pkts through firewall"); |
| 347 | |
| 348 | #define SY(parent, var, comment) \ |
| 349 | static int var ; \ |
| 350 | SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment); |
| 351 | |
| 352 | int bdg_ipfw_drops; |
| 353 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop, |
| 354 | CTLFLAG_RW, &bdg_ipfw_drops,0,""); |
| 355 | |
| 356 | int bdg_ipfw_colls; |
| 357 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions, |
| 358 | CTLFLAG_RW, &bdg_ipfw_colls,0,""); |
| 359 | |
| 360 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR, |
| 361 | NULL, 0, &sysctl_refresh, "I", "iface refresh"); |
| 362 | |
| 363 | #if 1 /* diagnostic vars */ |
| 364 | |
| 365 | SY(_net_link_ether, verbose, "Be verbose"); |
| 366 | SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward"); |
| 367 | |
| 368 | SY(_net_link_ether, bdg_thru, "Packets through bridge"); |
| 369 | |
| 370 | SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward"); |
| 371 | |
| 372 | SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward"); |
| 373 | SY(_net_link_ether, bdg_predict, "Correctly predicted header location"); |
| 374 | |
| 375 | SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg"); |
| 376 | SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item"); |
| 377 | SY(_net_link_ether, bdg_fw_count, "Cycle counter count"); |
| 378 | #endif |
| 379 | |
| 380 | SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats, |
| 381 | CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics"); |
| 382 | |
| 383 | static int bdg_loops ; |
| 384 | |
| 385 | /* |
| 386 | * completely flush the bridge table. |
| 387 | */ |
| 388 | static void |
| 389 | flush_table() |
| 390 | { |
| 391 | int s,i; |
| 392 | |
| 393 | if (bdg_table == NULL) |
| 394 | return ; |
| 395 | s = splimp(); |
| 396 | for (i=0; i< HASH_SIZE; i++) |
| 397 | bdg_table[i].name= NULL; /* clear table */ |
| 398 | splx(s); |
| 399 | } |
| 400 | |
| 401 | /* |
| 402 | * called periodically to flush entries etc. |
| 403 | */ |
| 404 | static void |
| 405 | bdg_timeout(void *dummy) |
| 406 | { |
| 407 | static int slowtimer = 0 ; |
| 408 | |
| 409 | if (do_bridge) { |
| 410 | static int age_index = 0 ; /* index of table position to age */ |
| 411 | int l = age_index + HASH_SIZE/4 ; |
| 412 | /* |
| 413 | * age entries in the forwarding table. |
| 414 | */ |
| 415 | if (l > HASH_SIZE) |
| 416 | l = HASH_SIZE ; |
| 417 | for (; age_index < l ; age_index++) |
| 418 | if (bdg_table[age_index].used) |
| 419 | bdg_table[age_index].used = 0 ; |
| 420 | else if (bdg_table[age_index].name) { |
| 421 | /* printf("xx flushing stale entry %d\n", age_index); */ |
| 422 | bdg_table[age_index].name = NULL ; |
| 423 | } |
| 424 | if (age_index >= HASH_SIZE) |
| 425 | age_index = 0 ; |
| 426 | |
| 427 | if (--slowtimer <= 0 ) { |
| 428 | slowtimer = 5 ; |
| 429 | |
| 430 | bdg_promisc_on() ; /* we just need unmute, really */ |
| 431 | bdg_loops = 0 ; |
| 432 | } |
| 433 | } |
| 434 | timeout(bdg_timeout, (void *)0, 2*hz ); |
| 435 | } |
| 436 | |
| 437 | /* |
| 438 | * local MAC addresses are held in a small array. This makes comparisons |
| 439 | * much faster. |
| 440 | */ |
| 441 | bdg_addr bdg_addresses[BDG_MAX_PORTS]; |
| 442 | int bdg_ports ; |
| 443 | |
| 444 | /* |
| 445 | * initialization of bridge code. This needs to be done after all |
| 446 | * interfaces have been configured. |
| 447 | */ |
| 448 | static void |
| 449 | bdginit(void *dummy) |
| 450 | { |
| 451 | |
| 452 | if (bdg_table == NULL) |
| 453 | bdg_table = (struct hash_table *) |
| 454 | _MALLOC(HASH_SIZE * sizeof(struct hash_table), |
| 455 | M_IFADDR, M_WAITOK); |
| 456 | flush_table(); |
| 457 | |
| 458 | ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc), |
| 459 | M_IFADDR, M_WAITOK ); |
| 460 | bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) ); |
| 461 | |
| 462 | bzero(&bdg_stats, sizeof(bdg_stats) ); |
| 463 | bdgtakeifaces(); |
| 464 | bdg_timeout(0); |
| 465 | do_bridge=0; |
| 466 | } |
| 467 | |
| 468 | void |
| 469 | bdgtakeifaces(void) |
| 470 | { |
| 471 | int i ; |
| 472 | struct ifnet *ifp; |
| 473 | bdg_addr *p = bdg_addresses ; |
| 474 | struct bdg_softc *bp; |
| 475 | |
| 476 | bdg_ports = 0 ; |
| 477 | *bridge_cfg = '\0'; |
| 478 | |
| 479 | printf("BRIDGE 010131, have %d interfaces\n", if_index); |
| 480 | ifnet_head_lock_shared(); |
| 481 | for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ; |
| 482 | i++, ifp = TAILQ_NEXT(ifp, if_link) ) |
| 483 | if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ |
| 484 | ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN); |
| 485 | bp = &ifp2sc[ifp->if_index] ; |
| 486 | sprintf(bridge_cfg + strlen(bridge_cfg), |
| 487 | "%s%d:1,", ifp->if_name, ifp->if_unit); |
| 488 | printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", |
| 489 | ifp->if_index, |
| 490 | bdg_stats.s[ifp->if_index].name, |
| 491 | (int)ifp->if_type, (int) ifp->if_physical, |
| 492 | (int)ifp->if_addrlen, |
| 493 | p->etheraddr, "." ); |
| 494 | p++ ; |
| 495 | bp->ifp = ifp ; |
| 496 | bp->flags = IFF_USED ; |
| 497 | bp->cluster_id = htons(1) ; |
| 498 | bp->magic = 0xDEADBEEF ; |
| 499 | |
| 500 | sprintf(bdg_stats.s[ifp->if_index].name, |
| 501 | "%s%d:%d", ifp->if_name, ifp->if_unit, |
| 502 | ntohs(bp->cluster_id)); |
| 503 | bdg_ports ++ ; |
| 504 | } |
| 505 | ifnet_head_done(); |
| 506 | } |
| 507 | |
| 508 | /* |
| 509 | * bridge_in() is invoked to perform bridging decision on input packets. |
| 510 | * |
| 511 | * On Input: |
| 512 | * eh Ethernet header of the incoming packet. |
| 513 | * |
| 514 | * On Return: destination of packet, one of |
| 515 | * BDG_BCAST broadcast |
| 516 | * BDG_MCAST multicast |
| 517 | * BDG_LOCAL is only for a local address (do not forward) |
| 518 | * BDG_DROP drop the packet |
| 519 | * ifp ifp of the destination interface. |
| 520 | * |
| 521 | * Forwarding is not done directly to give a chance to some drivers |
| 522 | * to fetch more of the packet, or simply drop it completely. |
| 523 | */ |
| 524 | |
| 525 | struct ifnet * |
| 526 | bridge_in(struct ifnet *ifp, struct ether_header *eh) |
| 527 | { |
| 528 | int index; |
| 529 | struct ifnet *dst , *old ; |
| 530 | int dropit = BDG_MUTED(ifp) ; |
| 531 | |
| 532 | /* |
| 533 | * hash the source address |
| 534 | */ |
| 535 | index= HASH_FN(eh->ether_shost); |
| 536 | bdg_table[index].used = 1 ; |
| 537 | old = bdg_table[index].name ; |
| 538 | if ( old ) { /* the entry is valid. */ |
| 539 | IFP_CHK(old, printf("bridge_in-- reading table\n") ); |
| 540 | |
| 541 | if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) { |
| 542 | bdg_ipfw_colls++ ; |
| 543 | bdg_table[index].name = NULL ; |
| 544 | } else if (old != ifp) { |
| 545 | /* |
| 546 | * found a loop. Either a machine has moved, or there |
| 547 | * is a misconfiguration/reconfiguration of the network. |
| 548 | * First, do not forward this packet! |
| 549 | * Record the relocation anyways; then, if loops persist, |
| 550 | * suspect a reconfiguration and disable forwarding |
| 551 | * from the old interface. |
| 552 | */ |
| 553 | bdg_table[index].name = ifp ; /* relocate address */ |
| 554 | printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n", |
| 555 | bdg_loops, eh->ether_shost, ".", |
| 556 | ifp->if_name, ifp->if_unit, |
| 557 | old->if_name, old->if_unit, |
| 558 | BDG_MUTED(old) ? "muted":"active"); |
| 559 | dropit = 1 ; |
| 560 | if ( !BDG_MUTED(old) ) { |
| 561 | if (++bdg_loops > 10) |
| 562 | BDG_MUTE(old) ; |
| 563 | } |
| 564 | } |
| 565 | } |
| 566 | |
| 567 | /* |
| 568 | * now write the source address into the table |
| 569 | */ |
| 570 | if (bdg_table[index].name == NULL) { |
| 571 | DEB(printf("new addr %6D at %d for %s%d\n", |
| 572 | eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);) |
| 573 | bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6); |
| 574 | bdg_table[index].name = ifp ; |
| 575 | } |
| 576 | dst = bridge_dst_lookup(eh); |
| 577 | /* Return values: |
| 578 | * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp. |
| 579 | * For muted interfaces, the first 3 are changed in BDG_LOCAL, |
| 580 | * and others to BDG_DROP. Also, for incoming packets, ifp is changed |
| 581 | * to BDG_DROP in case ifp == src . These mods are not necessary |
| 582 | * for outgoing packets from ether_output(). |
| 583 | */ |
| 584 | BDG_STAT(ifp, BDG_IN); |
| 585 | switch ((int)dst) { |
| 586 | case (int)BDG_BCAST: |
| 587 | case (int)BDG_MCAST: |
| 588 | case (int)BDG_LOCAL: |
| 589 | case (int)BDG_UNKNOWN: |
| 590 | case (int)BDG_DROP: |
| 591 | BDG_STAT(ifp, dst); |
| 592 | break ; |
| 593 | default : |
| 594 | if (dst == ifp || dropit ) |
| 595 | BDG_STAT(ifp, BDG_DROP); |
| 596 | else |
| 597 | BDG_STAT(ifp, BDG_FORWARD); |
| 598 | break ; |
| 599 | } |
| 600 | |
| 601 | if ( dropit ) { |
| 602 | if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL) |
| 603 | return BDG_LOCAL ; |
| 604 | else |
| 605 | return BDG_DROP ; |
| 606 | } else { |
| 607 | return (dst == ifp ? BDG_DROP : dst ) ; |
| 608 | } |
| 609 | } |
| 610 | |
| 611 | /* |
| 612 | * Forward to dst, excluding src port and muted interfaces. |
| 613 | * If src == NULL, the pkt comes from ether_output, and dst is the real |
| 614 | * interface the packet is originally sent to. In this case we must forward |
| 615 | * it to the whole cluster. We never call bdg_forward ether_output on |
| 616 | * interfaces which are not part of a cluster. |
| 617 | * |
| 618 | * The packet is freed if possible (i.e. surely not of interest for |
| 619 | * the upper layer), otherwise a copy is left for use by the caller |
| 620 | * (pointer in m0). |
| 621 | * |
| 622 | * It would be more efficient to make bdg_forward() always consume |
| 623 | * the packet, leaving to the caller the task to check if it needs a copy |
| 624 | * and get one in case. As it is now, bdg_forward() can sometimes make |
| 625 | * a copy whereas it is not necessary. |
| 626 | * |
| 627 | * XXX be careful about eh, it can be a pointer into *m |
| 628 | */ |
| 629 | struct mbuf * |
| 630 | bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst) |
| 631 | { |
| 632 | struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */ |
| 633 | struct ifnet *ifp, *last = NULL ; |
| 634 | int s ; |
| 635 | int shared = bdg_copy ; /* someone else is using the mbuf */ |
| 636 | int once = 0; /* loop only once */ |
| 637 | struct ifnet *real_dst = dst ; /* real dst from ether_output */ |
| 638 | #ifdef IPFIREWALL |
| 639 | struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */ |
| 640 | #endif |
| 641 | |
| 642 | /* |
| 643 | * XXX eh is usually a pointer within the mbuf (some ethernet drivers |
| 644 | * do that), so we better copy it before doing anything with the mbuf, |
| 645 | * or we might corrupt the header. |
| 646 | */ |
| 647 | struct ether_header save_eh = *eh ; |
| 648 | |
| 649 | #if defined(IPFIREWALL) && defined(DUMMYNET) |
| 650 | if (m0->m_type == MT_DUMMYNET) { |
| 651 | /* extract info from dummynet header */ |
| 652 | rule = (struct ip_fw_chain *)(m0->m_data) ; |
| 653 | m0 = m0->m_next ; |
| 654 | src = m0->m_pkthdr.rcvif; |
| 655 | shared = 0 ; /* For sure this is our own mbuf. */ |
| 656 | } else |
| 657 | #endif |
| 658 | bdg_thru++; /* only count once */ |
| 659 | |
| 660 | if (src == NULL) /* packet from ether_output */ |
| 661 | dst = bridge_dst_lookup(eh); |
| 662 | if (dst == BDG_DROP) { /* this should not happen */ |
| 663 | printf("xx bdg_forward for BDG_DROP\n"); |
| 664 | m_freem(m0); |
| 665 | return NULL; |
| 666 | } |
| 667 | if (dst == BDG_LOCAL) { /* this should not happen as well */ |
| 668 | printf("xx ouch, bdg_forward for local pkt\n"); |
| 669 | return m0; |
| 670 | } |
| 671 | if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) { |
| 672 | ifp = ifnet_head.tqh_first ; /* scan all ports */ |
| 673 | once = 0 ; |
| 674 | if (dst != BDG_UNKNOWN) /* need a copy for the local stack */ |
| 675 | shared = 1 ; |
| 676 | } else { |
| 677 | ifp = dst ; |
| 678 | once = 1 ; |
| 679 | } |
| 680 | if ( (u_int)(ifp) <= (u_int)BDG_FORWARD ) |
| 681 | panic("bdg_forward: bad dst"); |
| 682 | |
| 683 | #ifdef IPFIREWALL |
| 684 | /* |
| 685 | * Do filtering in a very similar way to what is done in ip_output. |
| 686 | * Only if firewall is loaded, enabled, and the packet is not |
| 687 | * from ether_output() (src==NULL, or we would filter it twice). |
| 688 | * Additional restrictions may apply e.g. non-IP, short packets, |
| 689 | * and pkts already gone through a pipe. |
| 690 | */ |
| 691 | if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) { |
| 692 | struct ip *ip ; |
| 693 | int i; |
| 694 | |
| 695 | if (rule != NULL) /* dummynet packet, already partially processed */ |
| 696 | goto forward; /* HACK! I should obey the fw_one_pass */ |
| 697 | if (ntohs(save_eh.ether_type) != ETHERTYPE_IP) |
| 698 | goto forward ; /* not an IP packet, ipfw is not appropriate */ |
| 699 | if (m0->m_pkthdr.len < sizeof(struct ip) ) |
| 700 | goto forward ; /* header too short for an IP pkt, cannot filter */ |
| 701 | /* |
| 702 | * i need some amt of data to be contiguous, and in case others need |
| 703 | * the packet (shared==1) also better be in the first mbuf. |
| 704 | */ |
| 705 | i = min(m0->m_pkthdr.len, max_protohdr) ; |
| 706 | if ( shared || m0->m_len < i) { |
| 707 | m0 = m_pullup(m0, i) ; |
| 708 | if (m0 == NULL) { |
| 709 | printf("-- bdg: pullup failed.\n") ; |
| 710 | return NULL ; |
| 711 | } |
| 712 | } |
| 713 | |
| 714 | /* |
| 715 | * before calling the firewall, swap fields the same as IP does. |
| 716 | * here we assume the pkt is an IP one and the header is contiguous |
| 717 | */ |
| 718 | ip = mtod(m0, struct ip *); |
| 719 | NTOHS(ip->ip_len); |
| 720 | NTOHS(ip->ip_off); |
| 721 | |
| 722 | /* |
| 723 | * The third parameter to the firewall code is the dst. interface. |
| 724 | * Since we apply checks only on input pkts we use NULL. |
| 725 | * The firewall knows this is a bridged packet as the cookie ptr |
| 726 | * is NULL. |
| 727 | */ |
| 728 | i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL); |
| 729 | if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */ |
| 730 | return m0 ; |
| 731 | /* |
| 732 | * If we get here, the firewall has passed the pkt, but the mbuf |
| 733 | * pointer might have changed. Restore ip and the fields NTOHS()'d. |
| 734 | */ |
| 735 | ip = mtod(m0, struct ip *); |
| 736 | HTONS(ip->ip_len); |
| 737 | HTONS(ip->ip_off); |
| 738 | |
| 739 | if (i == 0) /* a PASS rule. */ |
| 740 | goto forward ; |
| 741 | #ifdef DUMMYNET |
| 742 | if (i & IP_FW_PORT_DYNT_FLAG) { |
| 743 | /* |
| 744 | * Pass the pkt to dummynet, which consumes it. |
| 745 | * If shared, make a copy and keep the original. |
| 746 | * Need to prepend the ethernet header, optimize the common |
| 747 | * case of eh pointing already into the original mbuf. |
| 748 | */ |
| 749 | struct mbuf *m ; |
| 750 | if (shared) { |
| 751 | m = m_copypacket(m0, M_DONTWAIT); |
| 752 | if (m == NULL) { |
| 753 | printf("bdg_fwd: copy(1) failed\n"); |
| 754 | return m0; |
| 755 | } |
| 756 | } else { |
| 757 | m = m0 ; /* pass the original to dummynet */ |
| 758 | m0 = NULL ; /* and nothing back to the caller */ |
| 759 | } |
| 760 | if ( (void *)(eh + 1) == (void *)m->m_data) { |
| 761 | m->m_data -= ETHER_HDR_LEN ; |
| 762 | m->m_len += ETHER_HDR_LEN ; |
| 763 | m->m_pkthdr.len += ETHER_HDR_LEN ; |
| 764 | bdg_predict++; |
| 765 | } else { |
| 766 | M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); |
| 767 | if (!m && verbose) printf("M_PREPEND failed\n"); |
| 768 | if (m == NULL) /* nope... */ |
| 769 | return m0 ; |
| 770 | bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); |
| 771 | } |
| 772 | dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0); |
| 773 | return m0 ; |
| 774 | } |
| 775 | #endif |
| 776 | /* |
| 777 | * XXX add divert/forward actions... |
| 778 | */ |
| 779 | /* if none of the above matches, we have to drop the pkt */ |
| 780 | bdg_ipfw_drops++ ; |
| 781 | printf("bdg_forward: No rules match, so dropping packet!\n"); |
| 782 | return m0 ; |
| 783 | } |
| 784 | forward: |
| 785 | #endif /* IPFIREWALL */ |
| 786 | /* |
| 787 | * Again, bring up the headers in case of shared bufs to avoid |
| 788 | * corruptions in the future. |
| 789 | */ |
| 790 | if ( shared ) { |
| 791 | int i = min(m0->m_pkthdr.len, max_protohdr) ; |
| 792 | |
| 793 | m0 = m_pullup(m0, i) ; |
| 794 | if (m0 == NULL) { |
| 795 | printf("-- bdg: pullup2 failed.\n") ; |
| 796 | return NULL ; |
| 797 | } |
| 798 | } |
| 799 | /* now real_dst is used to determine the cluster where to forward */ |
| 800 | if (src != NULL) /* pkt comes from ether_input */ |
| 801 | real_dst = src ; |
| 802 | for (;;) { |
| 803 | if (last) { /* need to forward packet leftover from previous loop */ |
| 804 | struct mbuf *m ; |
| 805 | if (shared == 0 && once ) { /* no need to copy */ |
| 806 | m = m0 ; |
| 807 | m0 = NULL ; /* original is gone */ |
| 808 | } else { |
| 809 | m = m_copypacket(m0, M_DONTWAIT); |
| 810 | if (m == NULL) { |
| 811 | printf("bdg_forward: sorry, m_copypacket failed!\n"); |
| 812 | return m0 ; /* the original is still there... */ |
| 813 | } |
| 814 | } |
| 815 | /* |
| 816 | * Add header (optimized for the common case of eh pointing |
| 817 | * already into the mbuf) and execute last part of ether_output: |
| 818 | * queue pkt and start output if interface not yet active. |
| 819 | */ |
| 820 | if ( (void *)(eh + 1) == (void *)m->m_data) { |
| 821 | m->m_data -= ETHER_HDR_LEN ; |
| 822 | m->m_len += ETHER_HDR_LEN ; |
| 823 | m->m_pkthdr.len += ETHER_HDR_LEN ; |
| 824 | bdg_predict++; |
| 825 | } else { |
| 826 | M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); |
| 827 | if (!m && verbose) printf("M_PREPEND failed\n"); |
| 828 | if (m == NULL) |
| 829 | return m0; |
| 830 | bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); |
| 831 | } |
| 832 | s = splimp(); |
| 833 | if (IF_QFULL(&last->if_snd)) { |
| 834 | IF_DROP(&last->if_snd); |
| 835 | #if 0 |
| 836 | BDG_MUTE(last); /* should I also mute ? */ |
| 837 | #endif |
| 838 | splx(s); |
| 839 | m_freem(m); /* consume the pkt anyways */ |
| 840 | } else { |
| 841 | last->if_obytes += m->m_pkthdr.len ; |
| 842 | if (m->m_flags & M_MCAST) |
| 843 | last->if_omcasts++; |
| 844 | if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */ |
| 845 | bdg_split_pkts++; |
| 846 | |
| 847 | IF_ENQUEUE(&last->if_snd, m); |
| 848 | if ((last->if_flags & IFF_OACTIVE) == 0) |
| 849 | (*last->if_start)(last); |
| 850 | splx(s); |
| 851 | } |
| 852 | BDG_STAT(last, BDG_OUT); |
| 853 | last = NULL ; |
| 854 | if (once) |
| 855 | break ; |
| 856 | } |
| 857 | if (ifp == NULL) |
| 858 | break ; |
| 859 | /* |
| 860 | * If the interface is used for bridging, not muted, not full, |
| 861 | * up and running, is not the source interface, and belongs to |
| 862 | * the same cluster as the 'real_dst', then send here. |
| 863 | */ |
| 864 | if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) && |
| 865 | (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) && |
| 866 | ifp != src && BDG_SAMECLUSTER(ifp, real_dst) ) |
| 867 | last = ifp ; |
| 868 | ifp = TAILQ_NEXT(ifp, if_link) ; |
| 869 | if (ifp == NULL) |
| 870 | once = 1 ; |
| 871 | } |
| 872 | DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ; |
| 873 | if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; ) |
| 874 | return m0 ; |
| 875 | } |