]>
Commit | Line | Data |
---|---|---|
1c79356b A |
1 | /* |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
e5568f75 A |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
1c79356b | 11 | * |
e5568f75 A |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
e5568f75 A |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
1c79356b A |
19 | * |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* | |
23 | * Copyright (c) 1998 Luigi Rizzo | |
24 | * | |
25 | * Redistribution and use in source and binary forms, with or without | |
26 | * modification, are permitted provided that the following conditions | |
27 | * are met: | |
28 | * 1. Redistributions of source code must retain the above copyright | |
29 | * notice, this list of conditions and the following disclaimer. | |
30 | * 2. Redistributions in binary form must reproduce the above copyright | |
31 | * notice, this list of conditions and the following disclaimer in the | |
32 | * documentation and/or other materials provided with the distribution. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND | |
35 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
36 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
37 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
38 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
39 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
40 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
41 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
42 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
43 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
44 | * SUCH DAMAGE. | |
45 | * | |
9bccf70c | 46 | * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $ |
1c79356b A |
47 | */ |
48 | ||
49 | /* | |
50 | * This code implements bridging in FreeBSD. It only acts on ethernet | |
51 | * type of interfaces (others are still usable for routing). | |
52 | * A bridging table holds the source MAC address/dest. interface for each | |
53 | * known node. The table is indexed using an hash of the source address. | |
54 | * | |
9bccf70c A |
55 | * Input packets are tapped near the beginning of ether_input(), and |
56 | * analysed by calling bridge_in(). Depending on the result, the packet | |
1c79356b A |
57 | * can be forwarded to one or more output interfaces using bdg_forward(), |
58 | * and/or sent to the upper layer (e.g. in case of multicast). | |
59 | * | |
60 | * Output packets are intercepted near the end of ether_output(), | |
9bccf70c | 61 | * the correct destination is selected calling bridge_dst_lookup(), |
1c79356b A |
62 | * and then forwarding is done using bdg_forward(). |
63 | * Bridging is controlled by the sysctl variable net.link.ether.bridge | |
64 | * | |
65 | * The arp code is also modified to let a machine answer to requests | |
66 | * irrespective of the port the request came from. | |
67 | * | |
68 | * In case of loops in the bridging topology, the bridge detects this | |
69 | * event and temporarily mutes output bridging on one of the ports. | |
9bccf70c A |
70 | * Periodically, interfaces are unmuted by bdg_timeout(). |
71 | * Muting is only implemented as a safety measure, and also as | |
1c79356b A |
72 | * a mechanism to support a user-space implementation of the spanning |
73 | * tree algorithm. In the final release, unmuting will only occur | |
74 | * because of explicit action of the user-level daemon. | |
75 | * | |
76 | * To build a bridging kernel, use the following option | |
77 | * option BRIDGE | |
78 | * and then at runtime set the sysctl variable to enable bridging. | |
79 | * | |
80 | * Only one interface is supposed to have addresses set (but | |
81 | * there are no problems in practice if you set addresses for more | |
82 | * than one interface). | |
83 | * Bridging will act before routing, but nothing prevents a machine | |
84 | * from doing both (modulo bugs in the implementation...). | |
85 | * | |
86 | * THINGS TO REMEMBER | |
1c79356b A |
87 | * - bridging is incompatible with multicast routing on the same |
88 | * machine. There is not an easy fix to this. | |
89 | * - loop detection is still not very robust. | |
90 | * - the interface of bdg_forward() could be improved. | |
91 | */ | |
92 | ||
93 | #include <sys/param.h> | |
94 | #include <sys/mbuf.h> | |
95 | #include <sys/malloc.h> | |
96 | #include <sys/systm.h> | |
97 | #include <sys/socket.h> /* for net/if.h */ | |
98 | #include <sys/kernel.h> | |
99 | #include <sys/sysctl.h> | |
100 | ||
101 | #include <net/if.h> | |
102 | #include <net/if_types.h> | |
103 | ||
104 | #include <netinet/in.h> /* for struct arpcom */ | |
105 | #include <netinet/in_systm.h> | |
106 | #include <netinet/in_var.h> | |
107 | #include <netinet/ip.h> | |
108 | #include <netinet/if_ether.h> /* for struct arpcom */ | |
109 | ||
110 | #include "opt_ipfw.h" | |
111 | #include "opt_ipdn.h" | |
112 | ||
9bccf70c | 113 | #if defined(IPFIREWALL) |
1c79356b A |
114 | #include <net/route.h> |
115 | #include <netinet/ip_fw.h> | |
9bccf70c | 116 | #if defined(DUMMYNET) |
1c79356b A |
117 | #include <netinet/ip_dummynet.h> |
118 | #endif | |
9bccf70c | 119 | #endif |
1c79356b A |
120 | |
121 | #include <net/bridge.h> | |
122 | ||
123 | /* | |
124 | * For debugging, you can use the following macros. | |
125 | * remember, rdtsc() only works on Pentium-class machines | |
126 | ||
127 | quad_t ticks; | |
128 | DDB(ticks = rdtsc();) | |
129 | ... interesting code ... | |
130 | DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;) | |
131 | ||
132 | * | |
133 | */ | |
134 | ||
135 | #define DDB(x) x | |
136 | #define DEB(x) | |
137 | ||
1c79356b | 138 | static void bdginit(void *); |
9bccf70c | 139 | static void bdgtakeifaces(void); |
1c79356b | 140 | static void flush_table(void); |
9bccf70c A |
141 | static void bdg_promisc_on(void); |
142 | static void parse_bdg_cfg(void); | |
1c79356b A |
143 | |
144 | static int bdg_ipfw = 0 ; | |
145 | int do_bridge = 0; | |
146 | bdg_hash_table *bdg_table = NULL ; | |
147 | ||
148 | /* | |
9bccf70c A |
149 | * System initialization |
150 | */ | |
151 | ||
152 | SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL) | |
153 | ||
154 | static struct bdg_stats bdg_stats ; | |
155 | struct bdg_softc *ifp2sc = NULL ; | |
156 | /* XXX make it static of size BDG_MAX_PORTS */ | |
157 | ||
158 | #define IFP_CHK(ifp, x) \ | |
159 | if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; } | |
160 | ||
161 | /* | |
162 | * turn off promisc mode, optionally clear the IFF_USED flag. | |
163 | * The flag is turned on by parse_bdg_config | |
1c79356b | 164 | */ |
9bccf70c A |
165 | static void |
166 | bdg_promisc_off(int clear_used) | |
167 | { | |
1c79356b | 168 | struct ifnet *ifp ; |
9bccf70c A |
169 | TAILQ_FOREACH(ifp, &ifnet, if_link) { |
170 | if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { | |
171 | int s, ret ; | |
172 | s = splimp(); | |
173 | ret = ifpromisc(ifp, 0); | |
174 | splx(s); | |
175 | ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ; | |
176 | DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n", | |
177 | ifp->if_name, ifp->if_unit, | |
178 | ifp->if_flags, ifp2sc[ifp->if_index].flags);) | |
179 | } | |
180 | if (clear_used) { | |
181 | ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ; | |
182 | bdg_stats.s[ifp->if_index].name[0] = '\0'; | |
183 | } | |
184 | } | |
185 | } | |
186 | ||
187 | /* | |
188 | * set promisc mode on the interfaces we use. | |
189 | */ | |
190 | static void | |
191 | bdg_promisc_on() | |
192 | { | |
193 | struct ifnet *ifp ; | |
194 | int s ; | |
195 | ||
196 | TAILQ_FOREACH(ifp, &ifnet, if_link) { | |
197 | if ( !BDG_USED(ifp) ) | |
198 | continue ; | |
199 | if ( 0 == ( ifp->if_flags & IFF_UP) ) { | |
200 | s = splimp(); | |
201 | if_up(ifp); | |
202 | splx(s); | |
203 | } | |
204 | if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { | |
205 | int ret ; | |
206 | s = splimp(); | |
207 | ret = ifpromisc(ifp, 1); | |
208 | splx(s); | |
209 | ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ; | |
210 | printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", | |
211 | ifp->if_name, ifp->if_unit, | |
212 | ifp->if_flags, ifp2sc[ifp->if_index].flags); | |
213 | } | |
214 | if (BDG_MUTED(ifp)) { | |
215 | printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); | |
216 | BDG_UNMUTE(ifp) ; | |
217 | } | |
218 | } | |
219 | } | |
1c79356b A |
220 | |
221 | static int | |
9bccf70c | 222 | sysctl_bdg(SYSCTL_HANDLER_ARGS) |
1c79356b A |
223 | { |
224 | int error, oldval = do_bridge ; | |
225 | ||
226 | error = sysctl_handle_int(oidp, | |
227 | oidp->oid_arg1, oidp->oid_arg2, req); | |
9bccf70c | 228 | DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n", |
1c79356b | 229 | oidp->oid_name, oidp->oid_arg2, |
9bccf70c A |
230 | oldval, do_bridge); ) |
231 | ||
1c79356b A |
232 | if (bdg_table == NULL) |
233 | do_bridge = 0 ; | |
234 | if (oldval != do_bridge) { | |
9bccf70c | 235 | bdg_promisc_off( 1 ); /* reset previously used interfaces */ |
1c79356b | 236 | flush_table(); |
9bccf70c A |
237 | if (do_bridge) { |
238 | parse_bdg_cfg(); | |
239 | bdg_promisc_on(); | |
240 | } | |
1c79356b A |
241 | } |
242 | return error ; | |
243 | } | |
244 | ||
9bccf70c A |
245 | static char bridge_cfg[256] = { "" } ; |
246 | ||
247 | /* | |
248 | * parse the config string, set IFF_USED, name and cluster_id | |
249 | * for all interfaces found. | |
250 | */ | |
251 | static void | |
252 | parse_bdg_cfg() | |
253 | { | |
254 | char *p, *beg ; | |
255 | int i, l, cluster; | |
256 | struct bdg_softc *b; | |
257 | ||
258 | for (p= bridge_cfg; *p ; p++) { | |
259 | /* interface names begin with [a-z] and continue up to ':' */ | |
260 | if (*p < 'a' || *p > 'z') | |
261 | continue ; | |
262 | for ( beg = p ; *p && *p != ':' ; p++ ) | |
263 | ; | |
264 | if (*p == 0) /* end of string, ':' not found */ | |
265 | return ; | |
266 | l = p - beg ; /* length of name string */ | |
267 | p++ ; | |
268 | DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);) | |
269 | for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++) | |
270 | cluster = cluster*10 + (*p -'0'); | |
271 | /* | |
272 | * now search in bridge strings | |
273 | */ | |
274 | for (i=0, b = ifp2sc ; i < if_index ; i++, b++) { | |
275 | char buf[32]; | |
276 | struct ifnet *ifp = b->ifp ; | |
277 | ||
278 | if (ifp == NULL) | |
279 | continue; | |
280 | sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit); | |
281 | if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */ | |
282 | b->cluster_id = htons(cluster) ; | |
283 | b->flags |= IFF_USED ; | |
284 | sprintf(bdg_stats.s[ifp->if_index].name, | |
285 | "%s%d:%d", ifp->if_name, ifp->if_unit, cluster); | |
286 | ||
287 | DEB(printf("--++ found %s\n", | |
288 | bdg_stats.s[ifp->if_index].name);) | |
289 | break ; | |
290 | } | |
291 | } | |
292 | if (*p == '\0') | |
293 | break ; | |
294 | } | |
295 | } | |
296 | ||
297 | static int | |
298 | sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS) | |
299 | { | |
300 | int error = 0 ; | |
301 | char oldval[256] ; | |
302 | ||
303 | strcpy(oldval, bridge_cfg) ; | |
304 | ||
305 | error = sysctl_handle_string(oidp, | |
306 | bridge_cfg, oidp->oid_arg2, req); | |
307 | DEB( | |
308 | printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n", | |
309 | oidp->oid_name, oidp->oid_arg2, | |
310 | error, | |
311 | oldval, bridge_cfg); | |
312 | ) | |
313 | if (strcmp(oldval, bridge_cfg)) { | |
314 | bdg_promisc_off( 1 ); /* reset previously-used interfaces */ | |
315 | flush_table(); | |
316 | parse_bdg_cfg(); /* and set new ones... */ | |
317 | if (do_bridge) | |
318 | bdg_promisc_on(); /* re-enable interfaces */ | |
319 | } | |
320 | return error ; | |
321 | } | |
322 | ||
323 | static int | |
324 | sysctl_refresh(SYSCTL_HANDLER_ARGS) | |
325 | { | |
326 | if (req->newptr) | |
327 | bdgtakeifaces(); | |
328 | ||
329 | return 0; | |
330 | } | |
331 | ||
332 | ||
1c79356b | 333 | SYSCTL_DECL(_net_link_ether); |
9bccf70c A |
334 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW, |
335 | &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A", | |
336 | "Bridge configuration"); | |
337 | ||
1c79356b | 338 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW, |
9bccf70c A |
339 | &do_bridge, 0, &sysctl_bdg, "I", "Bridging"); |
340 | ||
341 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, | |
342 | &bdg_ipfw,0,"Pass bridged pkts through firewall"); | |
343 | ||
344 | #define SY(parent, var, comment) \ | |
345 | static int var ; \ | |
346 | SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment); | |
347 | ||
348 | int bdg_ipfw_drops; | |
349 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop, | |
350 | CTLFLAG_RW, &bdg_ipfw_drops,0,""); | |
351 | ||
352 | int bdg_ipfw_colls; | |
353 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions, | |
354 | CTLFLAG_RW, &bdg_ipfw_colls,0,""); | |
355 | ||
356 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR, | |
357 | NULL, 0, &sysctl_refresh, "I", "iface refresh"); | |
1c79356b | 358 | |
1c79356b | 359 | #if 1 /* diagnostic vars */ |
9bccf70c A |
360 | |
361 | SY(_net_link_ether, verbose, "Be verbose"); | |
362 | SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward"); | |
363 | ||
364 | SY(_net_link_ether, bdg_thru, "Packets through bridge"); | |
365 | ||
366 | SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward"); | |
367 | ||
368 | SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward"); | |
369 | SY(_net_link_ether, bdg_predict, "Correctly predicted header location"); | |
370 | ||
371 | SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg"); | |
372 | SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item"); | |
373 | SY(_net_link_ether, bdg_fw_count, "Cycle counter count"); | |
1c79356b | 374 | #endif |
9bccf70c | 375 | |
1c79356b A |
376 | SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats, |
377 | CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics"); | |
378 | ||
379 | static int bdg_loops ; | |
380 | ||
381 | /* | |
382 | * completely flush the bridge table. | |
383 | */ | |
384 | static void | |
385 | flush_table() | |
386 | { | |
387 | int s,i; | |
388 | ||
389 | if (bdg_table == NULL) | |
390 | return ; | |
391 | s = splimp(); | |
392 | for (i=0; i< HASH_SIZE; i++) | |
393 | bdg_table[i].name= NULL; /* clear table */ | |
394 | splx(s); | |
395 | } | |
396 | ||
397 | /* wrapper for funnel */ | |
398 | void | |
399 | bdg_timeout_funneled(void * dummy) | |
400 | { | |
401 | boolean_t funnel_state; | |
402 | ||
403 | funnel_state = thread_funnel_set(network_flock, TRUE); | |
404 | bdg_timeout(dummy); | |
405 | funnel_state = thread_funnel_set(network_flock, FALSE); | |
406 | } | |
407 | ||
408 | /* | |
409 | * called periodically to flush entries etc. | |
410 | */ | |
411 | static void | |
412 | bdg_timeout(void *dummy) | |
413 | { | |
1c79356b | 414 | static int slowtimer = 0 ; |
9bccf70c | 415 | |
1c79356b A |
416 | if (do_bridge) { |
417 | static int age_index = 0 ; /* index of table position to age */ | |
418 | int l = age_index + HASH_SIZE/4 ; | |
419 | /* | |
420 | * age entries in the forwarding table. | |
421 | */ | |
422 | if (l > HASH_SIZE) | |
423 | l = HASH_SIZE ; | |
424 | for (; age_index < l ; age_index++) | |
425 | if (bdg_table[age_index].used) | |
426 | bdg_table[age_index].used = 0 ; | |
427 | else if (bdg_table[age_index].name) { | |
428 | /* printf("xx flushing stale entry %d\n", age_index); */ | |
429 | bdg_table[age_index].name = NULL ; | |
430 | } | |
431 | if (age_index >= HASH_SIZE) | |
432 | age_index = 0 ; | |
433 | ||
434 | if (--slowtimer <= 0 ) { | |
435 | slowtimer = 5 ; | |
436 | ||
9bccf70c | 437 | bdg_promisc_on() ; /* we just need unmute, really */ |
1c79356b A |
438 | bdg_loops = 0 ; |
439 | } | |
440 | } | |
441 | timeout(bdg_timeout_funneled, (void *)0, 2*hz ); | |
1c79356b A |
442 | } |
443 | ||
444 | /* | |
445 | * local MAC addresses are held in a small array. This makes comparisons | |
446 | * much faster. | |
447 | */ | |
9bccf70c | 448 | bdg_addr bdg_addresses[BDG_MAX_PORTS]; |
1c79356b A |
449 | int bdg_ports ; |
450 | ||
451 | /* | |
9bccf70c A |
452 | * initialization of bridge code. This needs to be done after all |
453 | * interfaces have been configured. | |
1c79356b A |
454 | */ |
455 | static void | |
9bccf70c | 456 | bdginit(void *dummy) |
1c79356b | 457 | { |
9bccf70c | 458 | |
1c79356b A |
459 | if (bdg_table == NULL) |
460 | bdg_table = (struct hash_table *) | |
461 | _MALLOC(HASH_SIZE * sizeof(struct hash_table), | |
462 | M_IFADDR, M_WAITOK); | |
463 | flush_table(); | |
464 | ||
9bccf70c A |
465 | ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc), |
466 | M_IFADDR, M_WAITOK ); | |
467 | bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) ); | |
1c79356b A |
468 | |
469 | bzero(&bdg_stats, sizeof(bdg_stats) ); | |
9bccf70c A |
470 | bdgtakeifaces(); |
471 | bdg_timeout(0); | |
472 | do_bridge=0; | |
473 | } | |
474 | ||
475 | void | |
476 | bdgtakeifaces(void) | |
477 | { | |
478 | int i ; | |
479 | struct ifnet *ifp; | |
480 | struct arpcom *ac ; | |
481 | bdg_addr *p = bdg_addresses ; | |
482 | struct bdg_softc *bp; | |
483 | ||
1c79356b | 484 | bdg_ports = 0 ; |
9bccf70c | 485 | *bridge_cfg = '\0'; |
1c79356b | 486 | |
9bccf70c | 487 | printf("BRIDGE 010131, have %d interfaces\n", if_index); |
1c79356b | 488 | for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ; |
9bccf70c | 489 | i++, ifp = TAILQ_NEXT(ifp, if_link) ) |
1c79356b | 490 | if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ |
9bccf70c | 491 | bp = &ifp2sc[ifp->if_index] ; |
1c79356b | 492 | ac = (struct arpcom *)ifp; |
9bccf70c A |
493 | sprintf(bridge_cfg + strlen(bridge_cfg), |
494 | "%s%d:1,", ifp->if_name, ifp->if_unit); | |
495 | printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", | |
496 | ifp->if_index, | |
497 | bdg_stats.s[ifp->if_index].name, | |
498 | (int)ifp->if_type, (int) ifp->if_physical, | |
499 | (int)ifp->if_addrlen, | |
500 | ac->ac_enaddr, "." ); | |
501 | bcopy(ac->ac_enaddr, p->etheraddr, 6); | |
502 | p++ ; | |
503 | bp->ifp = ifp ; | |
504 | bp->flags = IFF_USED ; | |
505 | bp->cluster_id = htons(1) ; | |
506 | bp->magic = 0xDEADBEEF ; | |
507 | ||
508 | sprintf(bdg_stats.s[ifp->if_index].name, | |
509 | "%s%d:%d", ifp->if_name, ifp->if_unit, | |
510 | ntohs(bp->cluster_id)); | |
511 | bdg_ports ++ ; | |
512 | } | |
513 | ||
1c79356b A |
514 | } |
515 | ||
516 | /* | |
517 | * bridge_in() is invoked to perform bridging decision on input packets. | |
9bccf70c | 518 | * |
1c79356b | 519 | * On Input: |
9bccf70c | 520 | * eh Ethernet header of the incoming packet. |
1c79356b A |
521 | * |
522 | * On Return: destination of packet, one of | |
523 | * BDG_BCAST broadcast | |
524 | * BDG_MCAST multicast | |
525 | * BDG_LOCAL is only for a local address (do not forward) | |
526 | * BDG_DROP drop the packet | |
527 | * ifp ifp of the destination interface. | |
528 | * | |
529 | * Forwarding is not done directly to give a chance to some drivers | |
530 | * to fetch more of the packet, or simply drop it completely. | |
531 | */ | |
532 | ||
1c79356b | 533 | struct ifnet * |
9bccf70c | 534 | bridge_in(struct ifnet *ifp, struct ether_header *eh) |
1c79356b A |
535 | { |
536 | int index; | |
9bccf70c A |
537 | struct ifnet *dst , *old ; |
538 | int dropit = BDG_MUTED(ifp) ; | |
1c79356b A |
539 | |
540 | /* | |
541 | * hash the source address | |
542 | */ | |
543 | index= HASH_FN(eh->ether_shost); | |
544 | bdg_table[index].used = 1 ; | |
545 | old = bdg_table[index].name ; | |
546 | if ( old ) { /* the entry is valid. */ | |
9bccf70c A |
547 | IFP_CHK(old, printf("bridge_in-- reading table\n") ); |
548 | ||
1c79356b | 549 | if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) { |
9bccf70c | 550 | bdg_ipfw_colls++ ; |
1c79356b A |
551 | bdg_table[index].name = NULL ; |
552 | } else if (old != ifp) { | |
553 | /* | |
554 | * found a loop. Either a machine has moved, or there | |
555 | * is a misconfiguration/reconfiguration of the network. | |
556 | * First, do not forward this packet! | |
557 | * Record the relocation anyways; then, if loops persist, | |
558 | * suspect a reconfiguration and disable forwarding | |
559 | * from the old interface. | |
560 | */ | |
561 | bdg_table[index].name = ifp ; /* relocate address */ | |
562 | printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n", | |
563 | bdg_loops, eh->ether_shost, ".", | |
564 | ifp->if_name, ifp->if_unit, | |
565 | old->if_name, old->if_unit, | |
9bccf70c | 566 | BDG_MUTED(old) ? "muted":"active"); |
1c79356b | 567 | dropit = 1 ; |
9bccf70c | 568 | if ( !BDG_MUTED(old) ) { |
1c79356b | 569 | if (++bdg_loops > 10) |
9bccf70c | 570 | BDG_MUTE(old) ; |
1c79356b A |
571 | } |
572 | } | |
573 | } | |
574 | ||
575 | /* | |
576 | * now write the source address into the table | |
577 | */ | |
578 | if (bdg_table[index].name == NULL) { | |
579 | DEB(printf("new addr %6D at %d for %s%d\n", | |
580 | eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);) | |
581 | bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6); | |
582 | bdg_table[index].name = ifp ; | |
583 | } | |
9bccf70c | 584 | dst = bridge_dst_lookup(eh); |
1c79356b A |
585 | /* Return values: |
586 | * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp. | |
587 | * For muted interfaces, the first 3 are changed in BDG_LOCAL, | |
588 | * and others to BDG_DROP. Also, for incoming packets, ifp is changed | |
589 | * to BDG_DROP in case ifp == src . These mods are not necessary | |
590 | * for outgoing packets from ether_output(). | |
591 | */ | |
592 | BDG_STAT(ifp, BDG_IN); | |
593 | switch ((int)dst) { | |
594 | case (int)BDG_BCAST: | |
595 | case (int)BDG_MCAST: | |
596 | case (int)BDG_LOCAL: | |
597 | case (int)BDG_UNKNOWN: | |
598 | case (int)BDG_DROP: | |
599 | BDG_STAT(ifp, dst); | |
600 | break ; | |
601 | default : | |
602 | if (dst == ifp || dropit ) | |
603 | BDG_STAT(ifp, BDG_DROP); | |
604 | else | |
605 | BDG_STAT(ifp, BDG_FORWARD); | |
606 | break ; | |
607 | } | |
608 | ||
609 | if ( dropit ) { | |
610 | if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL) | |
611 | return BDG_LOCAL ; | |
612 | else | |
613 | return BDG_DROP ; | |
614 | } else { | |
615 | return (dst == ifp ? BDG_DROP : dst ) ; | |
616 | } | |
617 | } | |
618 | ||
619 | /* | |
9bccf70c A |
620 | * Forward to dst, excluding src port and muted interfaces. |
621 | * If src == NULL, the pkt comes from ether_output, and dst is the real | |
622 | * interface the packet is originally sent to. In this case we must forward | |
623 | * it to the whole cluster. We never call bdg_forward ether_output on | |
624 | * interfaces which are not part of a cluster. | |
625 | * | |
626 | * The packet is freed if possible (i.e. surely not of interest for | |
627 | * the upper layer), otherwise a copy is left for use by the caller | |
628 | * (pointer in m0). | |
629 | * | |
630 | * It would be more efficient to make bdg_forward() always consume | |
631 | * the packet, leaving to the caller the task to check if it needs a copy | |
632 | * and get one in case. As it is now, bdg_forward() can sometimes make | |
633 | * a copy whereas it is not necessary. | |
634 | * | |
635 | * XXX be careful about eh, it can be a pointer into *m | |
1c79356b | 636 | */ |
9bccf70c A |
637 | struct mbuf * |
638 | bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst) | |
1c79356b | 639 | { |
9bccf70c A |
640 | struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */ |
641 | struct ifnet *ifp, *last = NULL ; | |
642 | int s ; | |
643 | int shared = bdg_copy ; /* someone else is using the mbuf */ | |
644 | int once = 0; /* loop only once */ | |
645 | struct ifnet *real_dst = dst ; /* real dst from ether_output */ | |
646 | #ifdef IPFIREWALL | |
647 | struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */ | |
648 | #endif | |
1c79356b | 649 | |
9bccf70c A |
650 | /* |
651 | * XXX eh is usually a pointer within the mbuf (some ethernet drivers | |
652 | * do that), so we better copy it before doing anything with the mbuf, | |
653 | * or we might corrupt the header. | |
654 | */ | |
655 | struct ether_header save_eh = *eh ; | |
656 | ||
657 | #if defined(IPFIREWALL) && defined(DUMMYNET) | |
658 | if (m0->m_type == MT_DUMMYNET) { | |
659 | /* extract info from dummynet header */ | |
660 | rule = (struct ip_fw_chain *)(m0->m_data) ; | |
661 | m0 = m0->m_next ; | |
662 | src = m0->m_pkthdr.rcvif; | |
663 | shared = 0 ; /* For sure this is our own mbuf. */ | |
664 | } else | |
665 | #endif | |
666 | bdg_thru++; /* only count once */ | |
1c79356b | 667 | |
9bccf70c A |
668 | if (src == NULL) /* packet from ether_output */ |
669 | dst = bridge_dst_lookup(eh); | |
1c79356b | 670 | if (dst == BDG_DROP) { /* this should not happen */ |
9bccf70c A |
671 | printf("xx bdg_forward for BDG_DROP\n"); |
672 | m_freem(m0); | |
673 | return NULL; | |
1c79356b A |
674 | } |
675 | if (dst == BDG_LOCAL) { /* this should not happen as well */ | |
676 | printf("xx ouch, bdg_forward for local pkt\n"); | |
9bccf70c | 677 | return m0; |
1c79356b A |
678 | } |
679 | if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) { | |
9bccf70c | 680 | ifp = ifnet.tqh_first ; /* scan all ports */ |
1c79356b | 681 | once = 0 ; |
9bccf70c A |
682 | if (dst != BDG_UNKNOWN) /* need a copy for the local stack */ |
683 | shared = 1 ; | |
1c79356b A |
684 | } else { |
685 | ifp = dst ; | |
9bccf70c | 686 | once = 1 ; |
1c79356b | 687 | } |
9bccf70c A |
688 | if ( (u_int)(ifp) <= (u_int)BDG_FORWARD ) |
689 | panic("bdg_forward: bad dst"); | |
690 | ||
691 | #ifdef IPFIREWALL | |
1c79356b | 692 | /* |
9bccf70c A |
693 | * Do filtering in a very similar way to what is done in ip_output. |
694 | * Only if firewall is loaded, enabled, and the packet is not | |
695 | * from ether_output() (src==NULL, or we would filter it twice). | |
696 | * Additional restrictions may apply e.g. non-IP, short packets, | |
697 | * and pkts already gone through a pipe. | |
1c79356b | 698 | */ |
9bccf70c A |
699 | if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) { |
700 | struct ip *ip ; | |
701 | int i; | |
702 | ||
703 | if (rule != NULL) /* dummynet packet, already partially processed */ | |
704 | goto forward; /* HACK! I should obey the fw_one_pass */ | |
705 | if (ntohs(save_eh.ether_type) != ETHERTYPE_IP) | |
706 | goto forward ; /* not an IP packet, ipfw is not appropriate */ | |
707 | if (m0->m_pkthdr.len < sizeof(struct ip) ) | |
708 | goto forward ; /* header too short for an IP pkt, cannot filter */ | |
709 | /* | |
710 | * i need some amt of data to be contiguous, and in case others need | |
711 | * the packet (shared==1) also better be in the first mbuf. | |
712 | */ | |
713 | i = min(m0->m_pkthdr.len, max_protohdr) ; | |
714 | if ( shared || m0->m_len < i) { | |
715 | m0 = m_pullup(m0, i) ; | |
716 | if (m0 == NULL) { | |
717 | printf("-- bdg: pullup failed.\n") ; | |
718 | return NULL ; | |
719 | } | |
1c79356b | 720 | } |
9bccf70c | 721 | |
1c79356b A |
722 | /* |
723 | * before calling the firewall, swap fields the same as IP does. | |
724 | * here we assume the pkt is an IP one and the header is contiguous | |
725 | */ | |
9bccf70c | 726 | ip = mtod(m0, struct ip *); |
1c79356b | 727 | NTOHS(ip->ip_len); |
1c79356b A |
728 | NTOHS(ip->ip_off); |
729 | ||
730 | /* | |
9bccf70c | 731 | * The third parameter to the firewall code is the dst. interface. |
1c79356b | 732 | * Since we apply checks only on input pkts we use NULL. |
9bccf70c A |
733 | * The firewall knows this is a bridged packet as the cookie ptr |
734 | * is NULL. | |
1c79356b | 735 | */ |
9bccf70c A |
736 | i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL); |
737 | if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */ | |
738 | return m0 ; | |
1c79356b | 739 | /* |
9bccf70c A |
740 | * If we get here, the firewall has passed the pkt, but the mbuf |
741 | * pointer might have changed. Restore ip and the fields NTOHS()'d. | |
1c79356b | 742 | */ |
9bccf70c | 743 | ip = mtod(m0, struct ip *); |
1c79356b | 744 | HTONS(ip->ip_len); |
1c79356b | 745 | HTONS(ip->ip_off); |
9bccf70c A |
746 | |
747 | if (i == 0) /* a PASS rule. */ | |
1c79356b | 748 | goto forward ; |
9bccf70c A |
749 | #ifdef DUMMYNET |
750 | if (i & IP_FW_PORT_DYNT_FLAG) { | |
1c79356b | 751 | /* |
9bccf70c A |
752 | * Pass the pkt to dummynet, which consumes it. |
753 | * If shared, make a copy and keep the original. | |
754 | * Need to prepend the ethernet header, optimize the common | |
755 | * case of eh pointing already into the original mbuf. | |
1c79356b | 756 | */ |
9bccf70c A |
757 | struct mbuf *m ; |
758 | if (shared) { | |
759 | m = m_copypacket(m0, M_DONTWAIT); | |
760 | if (m == NULL) { | |
761 | printf("bdg_fwd: copy(1) failed\n"); | |
762 | return m0; | |
763 | } | |
764 | } else { | |
765 | m = m0 ; /* pass the original to dummynet */ | |
766 | m0 = NULL ; /* and nothing back to the caller */ | |
767 | } | |
768 | if ( (void *)(eh + 1) == (void *)m->m_data) { | |
769 | m->m_data -= ETHER_HDR_LEN ; | |
770 | m->m_len += ETHER_HDR_LEN ; | |
771 | m->m_pkthdr.len += ETHER_HDR_LEN ; | |
772 | bdg_predict++; | |
773 | } else { | |
774 | M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); | |
775 | if (!m && verbose) printf("M_PREPEND failed\n"); | |
776 | if (m == NULL) /* nope... */ | |
777 | return m0 ; | |
778 | bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); | |
779 | } | |
780 | dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0); | |
781 | return m0 ; | |
1c79356b A |
782 | } |
783 | #endif | |
9bccf70c A |
784 | /* |
785 | * XXX add divert/forward actions... | |
786 | */ | |
1c79356b | 787 | /* if none of the above matches, we have to drop the pkt */ |
9bccf70c A |
788 | bdg_ipfw_drops++ ; |
789 | printf("bdg_forward: No rules match, so dropping packet!\n"); | |
790 | return m0 ; | |
1c79356b A |
791 | } |
792 | forward: | |
9bccf70c A |
793 | #endif /* IPFIREWALL */ |
794 | /* | |
795 | * Again, bring up the headers in case of shared bufs to avoid | |
796 | * corruptions in the future. | |
797 | */ | |
798 | if ( shared ) { | |
799 | int i = min(m0->m_pkthdr.len, max_protohdr) ; | |
800 | ||
801 | m0 = m_pullup(m0, i) ; | |
802 | if (m0 == NULL) { | |
803 | printf("-- bdg: pullup2 failed.\n") ; | |
804 | return NULL ; | |
805 | } | |
806 | } | |
807 | /* now real_dst is used to determine the cluster where to forward */ | |
808 | if (src != NULL) /* pkt comes from ether_input */ | |
809 | real_dst = src ; | |
810 | for (;;) { | |
811 | if (last) { /* need to forward packet leftover from previous loop */ | |
812 | struct mbuf *m ; | |
813 | if (shared == 0 && once ) { /* no need to copy */ | |
814 | m = m0 ; | |
815 | m0 = NULL ; /* original is gone */ | |
816 | } else { | |
817 | m = m_copypacket(m0, M_DONTWAIT); | |
1c79356b | 818 | if (m == NULL) { |
9bccf70c A |
819 | printf("bdg_forward: sorry, m_copypacket failed!\n"); |
820 | return m0 ; /* the original is still there... */ | |
1c79356b A |
821 | } |
822 | } | |
823 | /* | |
9bccf70c A |
824 | * Add header (optimized for the common case of eh pointing |
825 | * already into the mbuf) and execute last part of ether_output: | |
826 | * queue pkt and start output if interface not yet active. | |
1c79356b | 827 | */ |
9bccf70c A |
828 | if ( (void *)(eh + 1) == (void *)m->m_data) { |
829 | m->m_data -= ETHER_HDR_LEN ; | |
830 | m->m_len += ETHER_HDR_LEN ; | |
831 | m->m_pkthdr.len += ETHER_HDR_LEN ; | |
832 | bdg_predict++; | |
833 | } else { | |
834 | M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); | |
835 | if (!m && verbose) printf("M_PREPEND failed\n"); | |
836 | if (m == NULL) | |
837 | return m0; | |
838 | bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); | |
839 | } | |
1c79356b | 840 | s = splimp(); |
9bccf70c A |
841 | if (IF_QFULL(&last->if_snd)) { |
842 | IF_DROP(&last->if_snd); | |
843 | #if 0 | |
844 | BDG_MUTE(last); /* should I also mute ? */ | |
845 | #endif | |
1c79356b | 846 | splx(s); |
9bccf70c | 847 | m_freem(m); /* consume the pkt anyways */ |
1c79356b | 848 | } else { |
9bccf70c | 849 | last->if_obytes += m->m_pkthdr.len ; |
1c79356b | 850 | if (m->m_flags & M_MCAST) |
9bccf70c A |
851 | last->if_omcasts++; |
852 | if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */ | |
853 | bdg_split_pkts++; | |
854 | ||
855 | IF_ENQUEUE(&last->if_snd, m); | |
856 | if ((last->if_flags & IFF_OACTIVE) == 0) | |
857 | (*last->if_start)(last); | |
1c79356b | 858 | splx(s); |
1c79356b | 859 | } |
9bccf70c A |
860 | BDG_STAT(last, BDG_OUT); |
861 | last = NULL ; | |
862 | if (once) | |
863 | break ; | |
1c79356b | 864 | } |
9bccf70c | 865 | if (ifp == NULL) |
1c79356b | 866 | break ; |
9bccf70c A |
867 | /* |
868 | * If the interface is used for bridging, not muted, not full, | |
869 | * up and running, is not the source interface, and belongs to | |
870 | * the same cluster as the 'real_dst', then send here. | |
871 | */ | |
872 | if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) && | |
873 | (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) && | |
874 | ifp != src && BDG_SAMECLUSTER(ifp, real_dst) ) | |
875 | last = ifp ; | |
876 | ifp = TAILQ_NEXT(ifp, if_link) ; | |
877 | if (ifp == NULL) | |
878 | once = 1 ; | |
1c79356b | 879 | } |
9bccf70c A |
880 | DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ; |
881 | if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; ) | |
882 | return m0 ; | |
1c79356b | 883 | } |