]>
Commit | Line | Data |
---|---|---|
1c79356b A |
1 | /* |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
37839358 A |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
1c79356b | 11 | * |
37839358 A |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
37839358 A |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
1c79356b A |
19 | * |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 1998 Luigi Rizzo | |
24 | * | |
25 | * Redistribution and use in source and binary forms, with or without | |
26 | * modification, are permitted provided that the following conditions | |
27 | * are met: | |
28 | * 1. Redistributions of source code must retain the above copyright | |
29 | * notice, this list of conditions and the following disclaimer. | |
30 | * 2. Redistributions in binary form must reproduce the above copyright | |
31 | * notice, this list of conditions and the following disclaimer in the | |
32 | * documentation and/or other materials provided with the distribution. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND | |
35 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
36 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
37 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
38 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
39 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
40 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
41 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
42 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
43 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
44 | * SUCH DAMAGE. | |
45 | * | |
9bccf70c | 46 | * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $ |
1c79356b A |
47 | */ |
48 | ||
49 | /* | |
50 | * This code implements bridging in FreeBSD. It only acts on ethernet | |
51 | * type of interfaces (others are still usable for routing). | |
52 | * A bridging table holds the source MAC address/dest. interface for each | |
53 | * known node. The table is indexed using an hash of the source address. | |
54 | * | |
9bccf70c A |
55 | * Input packets are tapped near the beginning of ether_input(), and |
56 | * analysed by calling bridge_in(). Depending on the result, the packet | |
1c79356b A |
57 | * can be forwarded to one or more output interfaces using bdg_forward(), |
58 | * and/or sent to the upper layer (e.g. in case of multicast). | |
59 | * | |
60 | * Output packets are intercepted near the end of ether_output(), | |
9bccf70c | 61 | * the correct destination is selected calling bridge_dst_lookup(), |
1c79356b A |
62 | * and then forwarding is done using bdg_forward(). |
63 | * Bridging is controlled by the sysctl variable net.link.ether.bridge | |
64 | * | |
65 | * The arp code is also modified to let a machine answer to requests | |
66 | * irrespective of the port the request came from. | |
67 | * | |
68 | * In case of loops in the bridging topology, the bridge detects this | |
69 | * event and temporarily mutes output bridging on one of the ports. | |
9bccf70c A |
70 | * Periodically, interfaces are unmuted by bdg_timeout(). |
71 | * Muting is only implemented as a safety measure, and also as | |
1c79356b A |
72 | * a mechanism to support a user-space implementation of the spanning |
73 | * tree algorithm. In the final release, unmuting will only occur | |
74 | * because of explicit action of the user-level daemon. | |
75 | * | |
76 | * To build a bridging kernel, use the following option | |
77 | * option BRIDGE | |
78 | * and then at runtime set the sysctl variable to enable bridging. | |
79 | * | |
80 | * Only one interface is supposed to have addresses set (but | |
81 | * there are no problems in practice if you set addresses for more | |
82 | * than one interface). | |
83 | * Bridging will act before routing, but nothing prevents a machine | |
84 | * from doing both (modulo bugs in the implementation...). | |
85 | * | |
86 | * THINGS TO REMEMBER | |
1c79356b A |
87 | * - bridging is incompatible with multicast routing on the same |
88 | * machine. There is not an easy fix to this. | |
89 | * - loop detection is still not very robust. | |
90 | * - the interface of bdg_forward() could be improved. | |
91 | */ | |
92 | ||
93 | #include <sys/param.h> | |
94 | #include <sys/mbuf.h> | |
95 | #include <sys/malloc.h> | |
96 | #include <sys/systm.h> | |
97 | #include <sys/socket.h> /* for net/if.h */ | |
98 | #include <sys/kernel.h> | |
99 | #include <sys/sysctl.h> | |
100 | ||
101 | #include <net/if.h> | |
102 | #include <net/if_types.h> | |
103 | ||
104 | #include <netinet/in.h> /* for struct arpcom */ | |
105 | #include <netinet/in_systm.h> | |
106 | #include <netinet/in_var.h> | |
107 | #include <netinet/ip.h> | |
108 | #include <netinet/if_ether.h> /* for struct arpcom */ | |
109 | ||
110 | #include "opt_ipfw.h" | |
111 | #include "opt_ipdn.h" | |
112 | ||
9bccf70c | 113 | #if defined(IPFIREWALL) |
1c79356b A |
114 | #include <net/route.h> |
115 | #include <netinet/ip_fw.h> | |
9bccf70c | 116 | #if defined(DUMMYNET) |
1c79356b A |
117 | #include <netinet/ip_dummynet.h> |
118 | #endif | |
9bccf70c | 119 | #endif |
1c79356b A |
120 | |
121 | #include <net/bridge.h> | |
122 | ||
123 | /* | |
124 | * For debugging, you can use the following macros. | |
125 | * remember, rdtsc() only works on Pentium-class machines | |
126 | ||
127 | quad_t ticks; | |
128 | DDB(ticks = rdtsc();) | |
129 | ... interesting code ... | |
130 | DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;) | |
131 | ||
132 | * | |
133 | */ | |
134 | ||
135 | #define DDB(x) x | |
136 | #define DEB(x) | |
137 | ||
1c79356b | 138 | static void bdginit(void *); |
9bccf70c | 139 | static void bdgtakeifaces(void); |
1c79356b | 140 | static void flush_table(void); |
9bccf70c A |
141 | static void bdg_promisc_on(void); |
142 | static void parse_bdg_cfg(void); | |
1c79356b A |
143 | |
144 | static int bdg_ipfw = 0 ; | |
145 | int do_bridge = 0; | |
146 | bdg_hash_table *bdg_table = NULL ; | |
147 | ||
148 | /* | |
9bccf70c A |
149 | * System initialization |
150 | */ | |
151 | ||
152 | SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL) | |
153 | ||
154 | static struct bdg_stats bdg_stats ; | |
155 | struct bdg_softc *ifp2sc = NULL ; | |
156 | /* XXX make it static of size BDG_MAX_PORTS */ | |
157 | ||
158 | #define IFP_CHK(ifp, x) \ | |
159 | if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; } | |
160 | ||
161 | /* | |
162 | * turn off promisc mode, optionally clear the IFF_USED flag. | |
163 | * The flag is turned on by parse_bdg_config | |
1c79356b | 164 | */ |
9bccf70c A |
165 | static void |
166 | bdg_promisc_off(int clear_used) | |
167 | { | |
1c79356b | 168 | struct ifnet *ifp ; |
91447636 A |
169 | ifnet_head_lock_shared(); |
170 | TAILQ_FOREACH(ifp, &ifnet_head, if_link) { | |
171 | if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { | |
172 | int s, ret ; | |
173 | s = splimp(); | |
174 | ret = ifnet_set_promiscuous(ifp, 0); | |
175 | splx(s); | |
176 | ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ; | |
177 | DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n", | |
178 | ifp->if_name, ifp->if_unit, | |
179 | ifp->if_flags, ifp2sc[ifp->if_index].flags);) | |
180 | } | |
181 | if (clear_used) { | |
182 | ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ; | |
183 | bdg_stats.s[ifp->if_index].name[0] = '\0'; | |
184 | } | |
9bccf70c | 185 | } |
91447636 | 186 | ifnet_head_done(); |
9bccf70c A |
187 | } |
188 | ||
189 | /* | |
190 | * set promisc mode on the interfaces we use. | |
191 | */ | |
192 | static void | |
193 | bdg_promisc_on() | |
194 | { | |
195 | struct ifnet *ifp ; | |
196 | int s ; | |
197 | ||
91447636 A |
198 | ifnet_head_lock_shared(); |
199 | TAILQ_FOREACH(ifp, &ifnet_head, if_link) { | |
200 | if ( !BDG_USED(ifp) ) | |
201 | continue ; | |
202 | if ( 0 == ( ifp->if_flags & IFF_UP) ) { | |
203 | s = splimp(); | |
204 | if_up(ifp); | |
205 | splx(s); | |
206 | } | |
207 | if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { | |
208 | int ret ; | |
209 | s = splimp(); | |
210 | ret = ifnet_set_promiscuous(ifp, 1); | |
211 | splx(s); | |
212 | ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ; | |
213 | printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", | |
214 | ifp->if_name, ifp->if_unit, | |
215 | ifp->if_flags, ifp2sc[ifp->if_index].flags); | |
216 | } | |
217 | if (BDG_MUTED(ifp)) { | |
218 | printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); | |
219 | BDG_UNMUTE(ifp) ; | |
220 | } | |
9bccf70c | 221 | } |
91447636 | 222 | ifnet_head_done(); |
9bccf70c | 223 | } |
1c79356b A |
224 | |
225 | static int | |
9bccf70c | 226 | sysctl_bdg(SYSCTL_HANDLER_ARGS) |
1c79356b A |
227 | { |
228 | int error, oldval = do_bridge ; | |
229 | ||
230 | error = sysctl_handle_int(oidp, | |
231 | oidp->oid_arg1, oidp->oid_arg2, req); | |
9bccf70c | 232 | DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n", |
1c79356b | 233 | oidp->oid_name, oidp->oid_arg2, |
9bccf70c A |
234 | oldval, do_bridge); ) |
235 | ||
1c79356b A |
236 | if (bdg_table == NULL) |
237 | do_bridge = 0 ; | |
238 | if (oldval != do_bridge) { | |
9bccf70c | 239 | bdg_promisc_off( 1 ); /* reset previously used interfaces */ |
1c79356b | 240 | flush_table(); |
9bccf70c A |
241 | if (do_bridge) { |
242 | parse_bdg_cfg(); | |
243 | bdg_promisc_on(); | |
244 | } | |
1c79356b A |
245 | } |
246 | return error ; | |
247 | } | |
248 | ||
9bccf70c A |
249 | static char bridge_cfg[256] = { "" } ; |
250 | ||
251 | /* | |
252 | * parse the config string, set IFF_USED, name and cluster_id | |
253 | * for all interfaces found. | |
254 | */ | |
255 | static void | |
256 | parse_bdg_cfg() | |
257 | { | |
258 | char *p, *beg ; | |
259 | int i, l, cluster; | |
260 | struct bdg_softc *b; | |
261 | ||
262 | for (p= bridge_cfg; *p ; p++) { | |
263 | /* interface names begin with [a-z] and continue up to ':' */ | |
264 | if (*p < 'a' || *p > 'z') | |
265 | continue ; | |
266 | for ( beg = p ; *p && *p != ':' ; p++ ) | |
267 | ; | |
268 | if (*p == 0) /* end of string, ':' not found */ | |
269 | return ; | |
270 | l = p - beg ; /* length of name string */ | |
271 | p++ ; | |
272 | DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);) | |
273 | for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++) | |
274 | cluster = cluster*10 + (*p -'0'); | |
275 | /* | |
276 | * now search in bridge strings | |
277 | */ | |
278 | for (i=0, b = ifp2sc ; i < if_index ; i++, b++) { | |
279 | char buf[32]; | |
280 | struct ifnet *ifp = b->ifp ; | |
281 | ||
282 | if (ifp == NULL) | |
283 | continue; | |
284 | sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit); | |
285 | if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */ | |
286 | b->cluster_id = htons(cluster) ; | |
287 | b->flags |= IFF_USED ; | |
288 | sprintf(bdg_stats.s[ifp->if_index].name, | |
289 | "%s%d:%d", ifp->if_name, ifp->if_unit, cluster); | |
290 | ||
291 | DEB(printf("--++ found %s\n", | |
292 | bdg_stats.s[ifp->if_index].name);) | |
293 | break ; | |
294 | } | |
295 | } | |
296 | if (*p == '\0') | |
297 | break ; | |
298 | } | |
299 | } | |
300 | ||
301 | static int | |
302 | sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS) | |
303 | { | |
304 | int error = 0 ; | |
305 | char oldval[256] ; | |
306 | ||
307 | strcpy(oldval, bridge_cfg) ; | |
308 | ||
309 | error = sysctl_handle_string(oidp, | |
310 | bridge_cfg, oidp->oid_arg2, req); | |
311 | DEB( | |
312 | printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n", | |
313 | oidp->oid_name, oidp->oid_arg2, | |
314 | error, | |
315 | oldval, bridge_cfg); | |
316 | ) | |
317 | if (strcmp(oldval, bridge_cfg)) { | |
318 | bdg_promisc_off( 1 ); /* reset previously-used interfaces */ | |
319 | flush_table(); | |
320 | parse_bdg_cfg(); /* and set new ones... */ | |
321 | if (do_bridge) | |
322 | bdg_promisc_on(); /* re-enable interfaces */ | |
323 | } | |
324 | return error ; | |
325 | } | |
326 | ||
327 | static int | |
328 | sysctl_refresh(SYSCTL_HANDLER_ARGS) | |
329 | { | |
330 | if (req->newptr) | |
331 | bdgtakeifaces(); | |
332 | ||
333 | return 0; | |
334 | } | |
335 | ||
336 | ||
1c79356b | 337 | SYSCTL_DECL(_net_link_ether); |
9bccf70c A |
338 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW, |
339 | &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A", | |
340 | "Bridge configuration"); | |
341 | ||
1c79356b | 342 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW, |
9bccf70c A |
343 | &do_bridge, 0, &sysctl_bdg, "I", "Bridging"); |
344 | ||
345 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, | |
346 | &bdg_ipfw,0,"Pass bridged pkts through firewall"); | |
347 | ||
348 | #define SY(parent, var, comment) \ | |
349 | static int var ; \ | |
350 | SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment); | |
351 | ||
352 | int bdg_ipfw_drops; | |
353 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop, | |
354 | CTLFLAG_RW, &bdg_ipfw_drops,0,""); | |
355 | ||
356 | int bdg_ipfw_colls; | |
357 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions, | |
358 | CTLFLAG_RW, &bdg_ipfw_colls,0,""); | |
359 | ||
360 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR, | |
361 | NULL, 0, &sysctl_refresh, "I", "iface refresh"); | |
1c79356b | 362 | |
1c79356b | 363 | #if 1 /* diagnostic vars */ |
9bccf70c A |
364 | |
365 | SY(_net_link_ether, verbose, "Be verbose"); | |
366 | SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward"); | |
367 | ||
368 | SY(_net_link_ether, bdg_thru, "Packets through bridge"); | |
369 | ||
370 | SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward"); | |
371 | ||
372 | SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward"); | |
373 | SY(_net_link_ether, bdg_predict, "Correctly predicted header location"); | |
374 | ||
375 | SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg"); | |
376 | SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item"); | |
377 | SY(_net_link_ether, bdg_fw_count, "Cycle counter count"); | |
1c79356b | 378 | #endif |
9bccf70c | 379 | |
1c79356b A |
380 | SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats, |
381 | CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics"); | |
382 | ||
383 | static int bdg_loops ; | |
384 | ||
385 | /* | |
386 | * completely flush the bridge table. | |
387 | */ | |
388 | static void | |
389 | flush_table() | |
390 | { | |
391 | int s,i; | |
392 | ||
393 | if (bdg_table == NULL) | |
394 | return ; | |
395 | s = splimp(); | |
396 | for (i=0; i< HASH_SIZE; i++) | |
397 | bdg_table[i].name= NULL; /* clear table */ | |
398 | splx(s); | |
399 | } | |
400 | ||
1c79356b A |
401 | /* |
402 | * called periodically to flush entries etc. | |
403 | */ | |
404 | static void | |
405 | bdg_timeout(void *dummy) | |
406 | { | |
1c79356b | 407 | static int slowtimer = 0 ; |
9bccf70c | 408 | |
1c79356b A |
409 | if (do_bridge) { |
410 | static int age_index = 0 ; /* index of table position to age */ | |
411 | int l = age_index + HASH_SIZE/4 ; | |
412 | /* | |
413 | * age entries in the forwarding table. | |
414 | */ | |
415 | if (l > HASH_SIZE) | |
416 | l = HASH_SIZE ; | |
417 | for (; age_index < l ; age_index++) | |
418 | if (bdg_table[age_index].used) | |
419 | bdg_table[age_index].used = 0 ; | |
420 | else if (bdg_table[age_index].name) { | |
421 | /* printf("xx flushing stale entry %d\n", age_index); */ | |
422 | bdg_table[age_index].name = NULL ; | |
423 | } | |
424 | if (age_index >= HASH_SIZE) | |
425 | age_index = 0 ; | |
426 | ||
427 | if (--slowtimer <= 0 ) { | |
428 | slowtimer = 5 ; | |
429 | ||
9bccf70c | 430 | bdg_promisc_on() ; /* we just need unmute, really */ |
1c79356b A |
431 | bdg_loops = 0 ; |
432 | } | |
433 | } | |
91447636 | 434 | timeout(bdg_timeout, (void *)0, 2*hz ); |
1c79356b A |
435 | } |
436 | ||
437 | /* | |
438 | * local MAC addresses are held in a small array. This makes comparisons | |
439 | * much faster. | |
440 | */ | |
9bccf70c | 441 | bdg_addr bdg_addresses[BDG_MAX_PORTS]; |
1c79356b A |
442 | int bdg_ports ; |
443 | ||
444 | /* | |
9bccf70c A |
445 | * initialization of bridge code. This needs to be done after all |
446 | * interfaces have been configured. | |
1c79356b A |
447 | */ |
448 | static void | |
9bccf70c | 449 | bdginit(void *dummy) |
1c79356b | 450 | { |
9bccf70c | 451 | |
1c79356b A |
452 | if (bdg_table == NULL) |
453 | bdg_table = (struct hash_table *) | |
454 | _MALLOC(HASH_SIZE * sizeof(struct hash_table), | |
455 | M_IFADDR, M_WAITOK); | |
456 | flush_table(); | |
457 | ||
9bccf70c A |
458 | ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc), |
459 | M_IFADDR, M_WAITOK ); | |
460 | bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) ); | |
1c79356b A |
461 | |
462 | bzero(&bdg_stats, sizeof(bdg_stats) ); | |
9bccf70c A |
463 | bdgtakeifaces(); |
464 | bdg_timeout(0); | |
465 | do_bridge=0; | |
466 | } | |
467 | ||
468 | void | |
469 | bdgtakeifaces(void) | |
470 | { | |
471 | int i ; | |
472 | struct ifnet *ifp; | |
9bccf70c A |
473 | bdg_addr *p = bdg_addresses ; |
474 | struct bdg_softc *bp; | |
475 | ||
1c79356b | 476 | bdg_ports = 0 ; |
9bccf70c | 477 | *bridge_cfg = '\0'; |
1c79356b | 478 | |
9bccf70c | 479 | printf("BRIDGE 010131, have %d interfaces\n", if_index); |
91447636 | 480 | ifnet_head_lock_shared(); |
1c79356b | 481 | for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ; |
9bccf70c | 482 | i++, ifp = TAILQ_NEXT(ifp, if_link) ) |
91447636 A |
483 | if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ |
484 | ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN); | |
485 | bp = &ifp2sc[ifp->if_index] ; | |
486 | sprintf(bridge_cfg + strlen(bridge_cfg), | |
487 | "%s%d:1,", ifp->if_name, ifp->if_unit); | |
488 | printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", | |
489 | ifp->if_index, | |
490 | bdg_stats.s[ifp->if_index].name, | |
491 | (int)ifp->if_type, (int) ifp->if_physical, | |
492 | (int)ifp->if_addrlen, | |
493 | p->etheraddr, "." ); | |
494 | p++ ; | |
495 | bp->ifp = ifp ; | |
496 | bp->flags = IFF_USED ; | |
497 | bp->cluster_id = htons(1) ; | |
498 | bp->magic = 0xDEADBEEF ; | |
499 | ||
500 | sprintf(bdg_stats.s[ifp->if_index].name, | |
501 | "%s%d:%d", ifp->if_name, ifp->if_unit, | |
502 | ntohs(bp->cluster_id)); | |
503 | bdg_ports ++ ; | |
504 | } | |
505 | ifnet_head_done(); | |
1c79356b A |
506 | } |
507 | ||
508 | /* | |
509 | * bridge_in() is invoked to perform bridging decision on input packets. | |
9bccf70c | 510 | * |
1c79356b | 511 | * On Input: |
9bccf70c | 512 | * eh Ethernet header of the incoming packet. |
1c79356b A |
513 | * |
514 | * On Return: destination of packet, one of | |
515 | * BDG_BCAST broadcast | |
516 | * BDG_MCAST multicast | |
517 | * BDG_LOCAL is only for a local address (do not forward) | |
518 | * BDG_DROP drop the packet | |
519 | * ifp ifp of the destination interface. | |
520 | * | |
521 | * Forwarding is not done directly to give a chance to some drivers | |
522 | * to fetch more of the packet, or simply drop it completely. | |
523 | */ | |
524 | ||
1c79356b | 525 | struct ifnet * |
9bccf70c | 526 | bridge_in(struct ifnet *ifp, struct ether_header *eh) |
1c79356b A |
527 | { |
528 | int index; | |
9bccf70c A |
529 | struct ifnet *dst , *old ; |
530 | int dropit = BDG_MUTED(ifp) ; | |
1c79356b A |
531 | |
532 | /* | |
533 | * hash the source address | |
534 | */ | |
535 | index= HASH_FN(eh->ether_shost); | |
536 | bdg_table[index].used = 1 ; | |
537 | old = bdg_table[index].name ; | |
538 | if ( old ) { /* the entry is valid. */ | |
9bccf70c A |
539 | IFP_CHK(old, printf("bridge_in-- reading table\n") ); |
540 | ||
1c79356b | 541 | if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) { |
9bccf70c | 542 | bdg_ipfw_colls++ ; |
1c79356b A |
543 | bdg_table[index].name = NULL ; |
544 | } else if (old != ifp) { | |
545 | /* | |
546 | * found a loop. Either a machine has moved, or there | |
547 | * is a misconfiguration/reconfiguration of the network. | |
548 | * First, do not forward this packet! | |
549 | * Record the relocation anyways; then, if loops persist, | |
550 | * suspect a reconfiguration and disable forwarding | |
551 | * from the old interface. | |
552 | */ | |
553 | bdg_table[index].name = ifp ; /* relocate address */ | |
554 | printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n", | |
555 | bdg_loops, eh->ether_shost, ".", | |
556 | ifp->if_name, ifp->if_unit, | |
557 | old->if_name, old->if_unit, | |
9bccf70c | 558 | BDG_MUTED(old) ? "muted":"active"); |
1c79356b | 559 | dropit = 1 ; |
9bccf70c | 560 | if ( !BDG_MUTED(old) ) { |
1c79356b | 561 | if (++bdg_loops > 10) |
9bccf70c | 562 | BDG_MUTE(old) ; |
1c79356b A |
563 | } |
564 | } | |
565 | } | |
566 | ||
567 | /* | |
568 | * now write the source address into the table | |
569 | */ | |
570 | if (bdg_table[index].name == NULL) { | |
571 | DEB(printf("new addr %6D at %d for %s%d\n", | |
572 | eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);) | |
573 | bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6); | |
574 | bdg_table[index].name = ifp ; | |
575 | } | |
9bccf70c | 576 | dst = bridge_dst_lookup(eh); |
1c79356b A |
577 | /* Return values: |
578 | * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp. | |
579 | * For muted interfaces, the first 3 are changed in BDG_LOCAL, | |
580 | * and others to BDG_DROP. Also, for incoming packets, ifp is changed | |
581 | * to BDG_DROP in case ifp == src . These mods are not necessary | |
582 | * for outgoing packets from ether_output(). | |
583 | */ | |
584 | BDG_STAT(ifp, BDG_IN); | |
585 | switch ((int)dst) { | |
586 | case (int)BDG_BCAST: | |
587 | case (int)BDG_MCAST: | |
588 | case (int)BDG_LOCAL: | |
589 | case (int)BDG_UNKNOWN: | |
590 | case (int)BDG_DROP: | |
591 | BDG_STAT(ifp, dst); | |
592 | break ; | |
593 | default : | |
594 | if (dst == ifp || dropit ) | |
595 | BDG_STAT(ifp, BDG_DROP); | |
596 | else | |
597 | BDG_STAT(ifp, BDG_FORWARD); | |
598 | break ; | |
599 | } | |
600 | ||
601 | if ( dropit ) { | |
602 | if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL) | |
603 | return BDG_LOCAL ; | |
604 | else | |
605 | return BDG_DROP ; | |
606 | } else { | |
607 | return (dst == ifp ? BDG_DROP : dst ) ; | |
608 | } | |
609 | } | |
610 | ||
611 | /* | |
9bccf70c A |
612 | * Forward to dst, excluding src port and muted interfaces. |
613 | * If src == NULL, the pkt comes from ether_output, and dst is the real | |
614 | * interface the packet is originally sent to. In this case we must forward | |
615 | * it to the whole cluster. We never call bdg_forward ether_output on | |
616 | * interfaces which are not part of a cluster. | |
617 | * | |
618 | * The packet is freed if possible (i.e. surely not of interest for | |
619 | * the upper layer), otherwise a copy is left for use by the caller | |
620 | * (pointer in m0). | |
621 | * | |
622 | * It would be more efficient to make bdg_forward() always consume | |
623 | * the packet, leaving to the caller the task to check if it needs a copy | |
624 | * and get one in case. As it is now, bdg_forward() can sometimes make | |
625 | * a copy whereas it is not necessary. | |
626 | * | |
627 | * XXX be careful about eh, it can be a pointer into *m | |
1c79356b | 628 | */ |
9bccf70c A |
629 | struct mbuf * |
630 | bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst) | |
1c79356b | 631 | { |
9bccf70c A |
632 | struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */ |
633 | struct ifnet *ifp, *last = NULL ; | |
634 | int s ; | |
635 | int shared = bdg_copy ; /* someone else is using the mbuf */ | |
636 | int once = 0; /* loop only once */ | |
637 | struct ifnet *real_dst = dst ; /* real dst from ether_output */ | |
638 | #ifdef IPFIREWALL | |
639 | struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */ | |
640 | #endif | |
1c79356b | 641 | |
9bccf70c A |
642 | /* |
643 | * XXX eh is usually a pointer within the mbuf (some ethernet drivers | |
644 | * do that), so we better copy it before doing anything with the mbuf, | |
645 | * or we might corrupt the header. | |
646 | */ | |
647 | struct ether_header save_eh = *eh ; | |
648 | ||
649 | #if defined(IPFIREWALL) && defined(DUMMYNET) | |
650 | if (m0->m_type == MT_DUMMYNET) { | |
651 | /* extract info from dummynet header */ | |
652 | rule = (struct ip_fw_chain *)(m0->m_data) ; | |
653 | m0 = m0->m_next ; | |
654 | src = m0->m_pkthdr.rcvif; | |
655 | shared = 0 ; /* For sure this is our own mbuf. */ | |
656 | } else | |
657 | #endif | |
658 | bdg_thru++; /* only count once */ | |
1c79356b | 659 | |
9bccf70c | 660 | if (src == NULL) /* packet from ether_output */ |
91447636 | 661 | dst = bridge_dst_lookup(eh); |
1c79356b | 662 | if (dst == BDG_DROP) { /* this should not happen */ |
91447636 A |
663 | printf("xx bdg_forward for BDG_DROP\n"); |
664 | m_freem(m0); | |
665 | return NULL; | |
1c79356b A |
666 | } |
667 | if (dst == BDG_LOCAL) { /* this should not happen as well */ | |
91447636 A |
668 | printf("xx ouch, bdg_forward for local pkt\n"); |
669 | return m0; | |
1c79356b A |
670 | } |
671 | if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) { | |
91447636 A |
672 | ifp = ifnet_head.tqh_first ; /* scan all ports */ |
673 | once = 0 ; | |
674 | if (dst != BDG_UNKNOWN) /* need a copy for the local stack */ | |
675 | shared = 1 ; | |
1c79356b | 676 | } else { |
91447636 A |
677 | ifp = dst ; |
678 | once = 1 ; | |
1c79356b | 679 | } |
9bccf70c | 680 | if ( (u_int)(ifp) <= (u_int)BDG_FORWARD ) |
91447636 | 681 | panic("bdg_forward: bad dst"); |
9bccf70c A |
682 | |
683 | #ifdef IPFIREWALL | |
1c79356b | 684 | /* |
9bccf70c A |
685 | * Do filtering in a very similar way to what is done in ip_output. |
686 | * Only if firewall is loaded, enabled, and the packet is not | |
687 | * from ether_output() (src==NULL, or we would filter it twice). | |
688 | * Additional restrictions may apply e.g. non-IP, short packets, | |
689 | * and pkts already gone through a pipe. | |
1c79356b | 690 | */ |
9bccf70c A |
691 | if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) { |
692 | struct ip *ip ; | |
693 | int i; | |
694 | ||
695 | if (rule != NULL) /* dummynet packet, already partially processed */ | |
696 | goto forward; /* HACK! I should obey the fw_one_pass */ | |
697 | if (ntohs(save_eh.ether_type) != ETHERTYPE_IP) | |
698 | goto forward ; /* not an IP packet, ipfw is not appropriate */ | |
699 | if (m0->m_pkthdr.len < sizeof(struct ip) ) | |
700 | goto forward ; /* header too short for an IP pkt, cannot filter */ | |
701 | /* | |
702 | * i need some amt of data to be contiguous, and in case others need | |
703 | * the packet (shared==1) also better be in the first mbuf. | |
704 | */ | |
705 | i = min(m0->m_pkthdr.len, max_protohdr) ; | |
706 | if ( shared || m0->m_len < i) { | |
707 | m0 = m_pullup(m0, i) ; | |
708 | if (m0 == NULL) { | |
709 | printf("-- bdg: pullup failed.\n") ; | |
710 | return NULL ; | |
711 | } | |
1c79356b | 712 | } |
9bccf70c | 713 | |
1c79356b A |
714 | /* |
715 | * before calling the firewall, swap fields the same as IP does. | |
716 | * here we assume the pkt is an IP one and the header is contiguous | |
717 | */ | |
9bccf70c | 718 | ip = mtod(m0, struct ip *); |
1c79356b | 719 | NTOHS(ip->ip_len); |
1c79356b A |
720 | NTOHS(ip->ip_off); |
721 | ||
722 | /* | |
9bccf70c | 723 | * The third parameter to the firewall code is the dst. interface. |
1c79356b | 724 | * Since we apply checks only on input pkts we use NULL. |
9bccf70c A |
725 | * The firewall knows this is a bridged packet as the cookie ptr |
726 | * is NULL. | |
1c79356b | 727 | */ |
9bccf70c A |
728 | i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL); |
729 | if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */ | |
730 | return m0 ; | |
1c79356b | 731 | /* |
9bccf70c A |
732 | * If we get here, the firewall has passed the pkt, but the mbuf |
733 | * pointer might have changed. Restore ip and the fields NTOHS()'d. | |
1c79356b | 734 | */ |
9bccf70c | 735 | ip = mtod(m0, struct ip *); |
1c79356b | 736 | HTONS(ip->ip_len); |
1c79356b | 737 | HTONS(ip->ip_off); |
9bccf70c A |
738 | |
739 | if (i == 0) /* a PASS rule. */ | |
1c79356b | 740 | goto forward ; |
9bccf70c A |
741 | #ifdef DUMMYNET |
742 | if (i & IP_FW_PORT_DYNT_FLAG) { | |
1c79356b | 743 | /* |
9bccf70c A |
744 | * Pass the pkt to dummynet, which consumes it. |
745 | * If shared, make a copy and keep the original. | |
746 | * Need to prepend the ethernet header, optimize the common | |
747 | * case of eh pointing already into the original mbuf. | |
1c79356b | 748 | */ |
9bccf70c A |
749 | struct mbuf *m ; |
750 | if (shared) { | |
751 | m = m_copypacket(m0, M_DONTWAIT); | |
752 | if (m == NULL) { | |
753 | printf("bdg_fwd: copy(1) failed\n"); | |
754 | return m0; | |
755 | } | |
756 | } else { | |
757 | m = m0 ; /* pass the original to dummynet */ | |
758 | m0 = NULL ; /* and nothing back to the caller */ | |
759 | } | |
760 | if ( (void *)(eh + 1) == (void *)m->m_data) { | |
761 | m->m_data -= ETHER_HDR_LEN ; | |
762 | m->m_len += ETHER_HDR_LEN ; | |
763 | m->m_pkthdr.len += ETHER_HDR_LEN ; | |
764 | bdg_predict++; | |
765 | } else { | |
766 | M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); | |
767 | if (!m && verbose) printf("M_PREPEND failed\n"); | |
768 | if (m == NULL) /* nope... */ | |
769 | return m0 ; | |
770 | bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); | |
771 | } | |
772 | dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0); | |
773 | return m0 ; | |
1c79356b A |
774 | } |
775 | #endif | |
9bccf70c A |
776 | /* |
777 | * XXX add divert/forward actions... | |
778 | */ | |
1c79356b | 779 | /* if none of the above matches, we have to drop the pkt */ |
9bccf70c A |
780 | bdg_ipfw_drops++ ; |
781 | printf("bdg_forward: No rules match, so dropping packet!\n"); | |
782 | return m0 ; | |
1c79356b A |
783 | } |
784 | forward: | |
9bccf70c A |
785 | #endif /* IPFIREWALL */ |
786 | /* | |
787 | * Again, bring up the headers in case of shared bufs to avoid | |
788 | * corruptions in the future. | |
789 | */ | |
790 | if ( shared ) { | |
791 | int i = min(m0->m_pkthdr.len, max_protohdr) ; | |
792 | ||
793 | m0 = m_pullup(m0, i) ; | |
794 | if (m0 == NULL) { | |
795 | printf("-- bdg: pullup2 failed.\n") ; | |
796 | return NULL ; | |
797 | } | |
798 | } | |
799 | /* now real_dst is used to determine the cluster where to forward */ | |
800 | if (src != NULL) /* pkt comes from ether_input */ | |
801 | real_dst = src ; | |
802 | for (;;) { | |
803 | if (last) { /* need to forward packet leftover from previous loop */ | |
804 | struct mbuf *m ; | |
805 | if (shared == 0 && once ) { /* no need to copy */ | |
806 | m = m0 ; | |
807 | m0 = NULL ; /* original is gone */ | |
808 | } else { | |
809 | m = m_copypacket(m0, M_DONTWAIT); | |
1c79356b | 810 | if (m == NULL) { |
9bccf70c A |
811 | printf("bdg_forward: sorry, m_copypacket failed!\n"); |
812 | return m0 ; /* the original is still there... */ | |
1c79356b A |
813 | } |
814 | } | |
815 | /* | |
9bccf70c A |
816 | * Add header (optimized for the common case of eh pointing |
817 | * already into the mbuf) and execute last part of ether_output: | |
818 | * queue pkt and start output if interface not yet active. | |
1c79356b | 819 | */ |
9bccf70c A |
820 | if ( (void *)(eh + 1) == (void *)m->m_data) { |
821 | m->m_data -= ETHER_HDR_LEN ; | |
822 | m->m_len += ETHER_HDR_LEN ; | |
823 | m->m_pkthdr.len += ETHER_HDR_LEN ; | |
824 | bdg_predict++; | |
825 | } else { | |
826 | M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); | |
827 | if (!m && verbose) printf("M_PREPEND failed\n"); | |
828 | if (m == NULL) | |
829 | return m0; | |
830 | bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); | |
831 | } | |
1c79356b | 832 | s = splimp(); |
9bccf70c A |
833 | if (IF_QFULL(&last->if_snd)) { |
834 | IF_DROP(&last->if_snd); | |
835 | #if 0 | |
836 | BDG_MUTE(last); /* should I also mute ? */ | |
837 | #endif | |
1c79356b | 838 | splx(s); |
9bccf70c | 839 | m_freem(m); /* consume the pkt anyways */ |
1c79356b | 840 | } else { |
9bccf70c | 841 | last->if_obytes += m->m_pkthdr.len ; |
1c79356b | 842 | if (m->m_flags & M_MCAST) |
9bccf70c A |
843 | last->if_omcasts++; |
844 | if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */ | |
845 | bdg_split_pkts++; | |
846 | ||
847 | IF_ENQUEUE(&last->if_snd, m); | |
848 | if ((last->if_flags & IFF_OACTIVE) == 0) | |
849 | (*last->if_start)(last); | |
1c79356b | 850 | splx(s); |
1c79356b | 851 | } |
9bccf70c A |
852 | BDG_STAT(last, BDG_OUT); |
853 | last = NULL ; | |
854 | if (once) | |
855 | break ; | |
1c79356b | 856 | } |
9bccf70c | 857 | if (ifp == NULL) |
1c79356b | 858 | break ; |
9bccf70c A |
859 | /* |
860 | * If the interface is used for bridging, not muted, not full, | |
861 | * up and running, is not the source interface, and belongs to | |
862 | * the same cluster as the 'real_dst', then send here. | |
863 | */ | |
864 | if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) && | |
865 | (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) && | |
866 | ifp != src && BDG_SAMECLUSTER(ifp, real_dst) ) | |
867 | last = ifp ; | |
868 | ifp = TAILQ_NEXT(ifp, if_link) ; | |
869 | if (ifp == NULL) | |
870 | once = 1 ; | |
1c79356b | 871 | } |
9bccf70c A |
872 | DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ; |
873 | if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; ) | |
874 | return m0 ; | |
1c79356b | 875 | } |