]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * Copyright (c) 1998 Luigi Rizzo | |
30 | * | |
31 | * Redistribution and use in source and binary forms, with or without | |
32 | * modification, are permitted provided that the following conditions | |
33 | * are met: | |
34 | * 1. Redistributions of source code must retain the above copyright | |
35 | * notice, this list of conditions and the following disclaimer. | |
36 | * 2. Redistributions in binary form must reproduce the above copyright | |
37 | * notice, this list of conditions and the following disclaimer in the | |
38 | * documentation and/or other materials provided with the distribution. | |
39 | * | |
40 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND | |
41 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
42 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
43 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
44 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
45 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
46 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
47 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
48 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
49 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
50 | * SUCH DAMAGE. | |
51 | * | |
52 | * $FreeBSD: src/sys/net/bridge.c,v 1.16.2.14 2001/02/09 23:13:41 luigi Exp $ | |
53 | */ | |
54 | ||
55 | /* | |
56 | * This code implements bridging in FreeBSD. It only acts on ethernet | |
57 | * type of interfaces (others are still usable for routing). | |
58 | * A bridging table holds the source MAC address/dest. interface for each | |
59 | * known node. The table is indexed using an hash of the source address. | |
60 | * | |
61 | * Input packets are tapped near the beginning of ether_input(), and | |
62 | * analysed by calling bridge_in(). Depending on the result, the packet | |
63 | * can be forwarded to one or more output interfaces using bdg_forward(), | |
64 | * and/or sent to the upper layer (e.g. in case of multicast). | |
65 | * | |
66 | * Output packets are intercepted near the end of ether_output(), | |
67 | * the correct destination is selected calling bridge_dst_lookup(), | |
68 | * and then forwarding is done using bdg_forward(). | |
69 | * Bridging is controlled by the sysctl variable net.link.ether.bridge | |
70 | * | |
71 | * The arp code is also modified to let a machine answer to requests | |
72 | * irrespective of the port the request came from. | |
73 | * | |
74 | * In case of loops in the bridging topology, the bridge detects this | |
75 | * event and temporarily mutes output bridging on one of the ports. | |
76 | * Periodically, interfaces are unmuted by bdg_timeout(). | |
77 | * Muting is only implemented as a safety measure, and also as | |
78 | * a mechanism to support a user-space implementation of the spanning | |
79 | * tree algorithm. In the final release, unmuting will only occur | |
80 | * because of explicit action of the user-level daemon. | |
81 | * | |
82 | * To build a bridging kernel, use the following option | |
83 | * option BRIDGE | |
84 | * and then at runtime set the sysctl variable to enable bridging. | |
85 | * | |
86 | * Only one interface is supposed to have addresses set (but | |
87 | * there are no problems in practice if you set addresses for more | |
88 | * than one interface). | |
89 | * Bridging will act before routing, but nothing prevents a machine | |
90 | * from doing both (modulo bugs in the implementation...). | |
91 | * | |
92 | * THINGS TO REMEMBER | |
93 | * - bridging is incompatible with multicast routing on the same | |
94 | * machine. There is not an easy fix to this. | |
95 | * - loop detection is still not very robust. | |
96 | * - the interface of bdg_forward() could be improved. | |
97 | */ | |
98 | ||
99 | #include <sys/param.h> | |
100 | #include <sys/mbuf.h> | |
101 | #include <sys/malloc.h> | |
102 | #include <sys/systm.h> | |
103 | #include <sys/socket.h> /* for net/if.h */ | |
104 | #include <sys/kernel.h> | |
105 | #include <sys/sysctl.h> | |
106 | ||
107 | #include <net/if.h> | |
108 | #include <net/if_types.h> | |
109 | ||
110 | #include <netinet/in.h> /* for struct arpcom */ | |
111 | #include <netinet/in_systm.h> | |
112 | #include <netinet/in_var.h> | |
113 | #include <netinet/ip.h> | |
114 | #include <netinet/if_ether.h> /* for struct arpcom */ | |
115 | ||
116 | #include "opt_ipfw.h" | |
117 | #include "opt_ipdn.h" | |
118 | ||
119 | #if defined(IPFIREWALL) | |
120 | #include <net/route.h> | |
121 | #include <netinet/ip_fw.h> | |
122 | #if defined(DUMMYNET) | |
123 | #include <netinet/ip_dummynet.h> | |
124 | #endif | |
125 | #endif | |
126 | ||
127 | #include <net/bridge.h> | |
128 | ||
129 | /* | |
130 | * For debugging, you can use the following macros. | |
131 | * remember, rdtsc() only works on Pentium-class machines | |
132 | ||
133 | quad_t ticks; | |
134 | DDB(ticks = rdtsc();) | |
135 | ... interesting code ... | |
136 | DDB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ;) | |
137 | ||
138 | * | |
139 | */ | |
140 | ||
141 | #define DDB(x) x | |
142 | #define DEB(x) | |
143 | ||
144 | static void bdginit(void *); | |
145 | static void bdgtakeifaces(void); | |
146 | static void flush_table(void); | |
147 | static void bdg_promisc_on(void); | |
148 | static void parse_bdg_cfg(void); | |
149 | ||
150 | static int bdg_ipfw = 0 ; | |
151 | int do_bridge = 0; | |
152 | bdg_hash_table *bdg_table = NULL ; | |
153 | ||
154 | /* | |
155 | * System initialization | |
156 | */ | |
157 | ||
158 | SYSINIT(interfaces, SI_SUB_PROTO_IF, SI_ORDER_FIRST, bdginit, NULL) | |
159 | ||
160 | static struct bdg_stats bdg_stats ; | |
161 | struct bdg_softc *ifp2sc = NULL ; | |
162 | /* XXX make it static of size BDG_MAX_PORTS */ | |
163 | ||
164 | #define IFP_CHK(ifp, x) \ | |
165 | if (ifp2sc[ifp->if_index].magic != 0xDEADBEEF) { x ; } | |
166 | ||
167 | /* | |
168 | * turn off promisc mode, optionally clear the IFF_USED flag. | |
169 | * The flag is turned on by parse_bdg_config | |
170 | */ | |
171 | static void | |
172 | bdg_promisc_off(int clear_used) | |
173 | { | |
174 | struct ifnet *ifp ; | |
175 | ifnet_head_lock_shared(); | |
176 | TAILQ_FOREACH(ifp, &ifnet_head, if_link) { | |
177 | if ( (ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { | |
178 | int s, ret ; | |
179 | s = splimp(); | |
180 | ret = ifnet_set_promiscuous(ifp, 0); | |
181 | splx(s); | |
182 | ifp2sc[ifp->if_index].flags &= ~(IFF_BDG_PROMISC|IFF_MUTE) ; | |
183 | DEB(printf(">> now %s%d promisc OFF if_flags 0x%x bdg_flags 0x%x\n", | |
184 | ifp->if_name, ifp->if_unit, | |
185 | ifp->if_flags, ifp2sc[ifp->if_index].flags);) | |
186 | } | |
187 | if (clear_used) { | |
188 | ifp2sc[ifp->if_index].flags &= ~(IFF_USED) ; | |
189 | bdg_stats.s[ifp->if_index].name[0] = '\0'; | |
190 | } | |
191 | } | |
192 | ifnet_head_done(); | |
193 | } | |
194 | ||
195 | /* | |
196 | * set promisc mode on the interfaces we use. | |
197 | */ | |
198 | static void | |
199 | bdg_promisc_on() | |
200 | { | |
201 | struct ifnet *ifp ; | |
202 | int s ; | |
203 | ||
204 | ifnet_head_lock_shared(); | |
205 | TAILQ_FOREACH(ifp, &ifnet_head, if_link) { | |
206 | if ( !BDG_USED(ifp) ) | |
207 | continue ; | |
208 | if ( 0 == ( ifp->if_flags & IFF_UP) ) { | |
209 | s = splimp(); | |
210 | if_up(ifp); | |
211 | splx(s); | |
212 | } | |
213 | if ( !(ifp2sc[ifp->if_index].flags & IFF_BDG_PROMISC) ) { | |
214 | int ret ; | |
215 | s = splimp(); | |
216 | ret = ifnet_set_promiscuous(ifp, 1); | |
217 | splx(s); | |
218 | ifp2sc[ifp->if_index].flags |= IFF_BDG_PROMISC ; | |
219 | printf(">> now %s%d promisc ON if_flags 0x%x bdg_flags 0x%x\n", | |
220 | ifp->if_name, ifp->if_unit, | |
221 | ifp->if_flags, ifp2sc[ifp->if_index].flags); | |
222 | } | |
223 | if (BDG_MUTED(ifp)) { | |
224 | printf(">> unmuting %s%d\n", ifp->if_name, ifp->if_unit); | |
225 | BDG_UNMUTE(ifp) ; | |
226 | } | |
227 | } | |
228 | ifnet_head_done(); | |
229 | } | |
230 | ||
231 | static int | |
232 | sysctl_bdg(SYSCTL_HANDLER_ARGS) | |
233 | { | |
234 | int error, oldval = do_bridge ; | |
235 | ||
236 | error = sysctl_handle_int(oidp, | |
237 | oidp->oid_arg1, oidp->oid_arg2, req); | |
238 | DEB( printf("called sysctl for bridge name %s arg2 %d val %d->%d\n", | |
239 | oidp->oid_name, oidp->oid_arg2, | |
240 | oldval, do_bridge); ) | |
241 | ||
242 | if (bdg_table == NULL) | |
243 | do_bridge = 0 ; | |
244 | if (oldval != do_bridge) { | |
245 | bdg_promisc_off( 1 ); /* reset previously used interfaces */ | |
246 | flush_table(); | |
247 | if (do_bridge) { | |
248 | parse_bdg_cfg(); | |
249 | bdg_promisc_on(); | |
250 | } | |
251 | } | |
252 | return error ; | |
253 | } | |
254 | ||
255 | static char bridge_cfg[256] = { "" } ; | |
256 | ||
257 | /* | |
258 | * parse the config string, set IFF_USED, name and cluster_id | |
259 | * for all interfaces found. | |
260 | */ | |
261 | static void | |
262 | parse_bdg_cfg() | |
263 | { | |
264 | char *p, *beg ; | |
265 | int i, l, cluster; | |
266 | struct bdg_softc *b; | |
267 | ||
268 | for (p= bridge_cfg; *p ; p++) { | |
269 | /* interface names begin with [a-z] and continue up to ':' */ | |
270 | if (*p < 'a' || *p > 'z') | |
271 | continue ; | |
272 | for ( beg = p ; *p && *p != ':' ; p++ ) | |
273 | ; | |
274 | if (*p == 0) /* end of string, ':' not found */ | |
275 | return ; | |
276 | l = p - beg ; /* length of name string */ | |
277 | p++ ; | |
278 | DEB(printf("-- match beg(%d) <%s> p <%s>\n", l, beg, p);) | |
279 | for (cluster = 0 ; *p && *p >= '0' && *p <= '9' ; p++) | |
280 | cluster = cluster*10 + (*p -'0'); | |
281 | /* | |
282 | * now search in bridge strings | |
283 | */ | |
284 | for (i=0, b = ifp2sc ; i < if_index ; i++, b++) { | |
285 | char buf[32]; | |
286 | struct ifnet *ifp = b->ifp ; | |
287 | ||
288 | if (ifp == NULL) | |
289 | continue; | |
290 | sprintf(buf, "%s%d", ifp->if_name, ifp->if_unit); | |
291 | if (!strncmp(beg, buf, l)) { /* XXX not correct for >10 if! */ | |
292 | b->cluster_id = htons(cluster) ; | |
293 | b->flags |= IFF_USED ; | |
294 | sprintf(bdg_stats.s[ifp->if_index].name, | |
295 | "%s%d:%d", ifp->if_name, ifp->if_unit, cluster); | |
296 | ||
297 | DEB(printf("--++ found %s\n", | |
298 | bdg_stats.s[ifp->if_index].name);) | |
299 | break ; | |
300 | } | |
301 | } | |
302 | if (*p == '\0') | |
303 | break ; | |
304 | } | |
305 | } | |
306 | ||
307 | static int | |
308 | sysctl_bdg_cfg(SYSCTL_HANDLER_ARGS) | |
309 | { | |
310 | int error = 0 ; | |
311 | char oldval[256] ; | |
312 | ||
313 | strlcpy(oldval, bridge_cfg, sizeof (oldval)); | |
314 | ||
315 | error = sysctl_handle_string(oidp, | |
316 | bridge_cfg, oidp->oid_arg2, req); | |
317 | DEB( | |
318 | printf("called sysctl for bridge name %s arg2 %d err %d val %s->%s\n", | |
319 | oidp->oid_name, oidp->oid_arg2, | |
320 | error, | |
321 | oldval, bridge_cfg); | |
322 | ) | |
323 | if (strcmp(oldval, bridge_cfg)) { | |
324 | bdg_promisc_off( 1 ); /* reset previously-used interfaces */ | |
325 | flush_table(); | |
326 | parse_bdg_cfg(); /* and set new ones... */ | |
327 | if (do_bridge) | |
328 | bdg_promisc_on(); /* re-enable interfaces */ | |
329 | } | |
330 | return error ; | |
331 | } | |
332 | ||
333 | static int | |
334 | sysctl_refresh(SYSCTL_HANDLER_ARGS) | |
335 | { | |
336 | if (req->newptr) | |
337 | bdgtakeifaces(); | |
338 | ||
339 | return 0; | |
340 | } | |
341 | ||
342 | ||
343 | SYSCTL_DECL(_net_link_ether); | |
344 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_cfg, CTLTYPE_STRING|CTLFLAG_RW, | |
345 | &bridge_cfg, sizeof(bridge_cfg), &sysctl_bdg_cfg, "A", | |
346 | "Bridge configuration"); | |
347 | ||
348 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge, CTLTYPE_INT|CTLFLAG_RW, | |
349 | &do_bridge, 0, &sysctl_bdg, "I", "Bridging"); | |
350 | ||
351 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw, CTLFLAG_RW, | |
352 | &bdg_ipfw,0,"Pass bridged pkts through firewall"); | |
353 | ||
354 | #define SY(parent, var, comment) \ | |
355 | static int var ; \ | |
356 | SYSCTL_INT(parent, OID_AUTO, var, CTLFLAG_RW, &(var), 0, comment); | |
357 | ||
358 | int bdg_ipfw_drops; | |
359 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_drop, | |
360 | CTLFLAG_RW, &bdg_ipfw_drops,0,""); | |
361 | ||
362 | int bdg_ipfw_colls; | |
363 | SYSCTL_INT(_net_link_ether, OID_AUTO, bridge_ipfw_collisions, | |
364 | CTLFLAG_RW, &bdg_ipfw_colls,0,""); | |
365 | ||
366 | SYSCTL_PROC(_net_link_ether, OID_AUTO, bridge_refresh, CTLTYPE_INT|CTLFLAG_WR, | |
367 | NULL, 0, &sysctl_refresh, "I", "iface refresh"); | |
368 | ||
369 | #if 1 /* diagnostic vars */ | |
370 | ||
371 | SY(_net_link_ether, verbose, "Be verbose"); | |
372 | SY(_net_link_ether, bdg_split_pkts, "Packets split in bdg_forward"); | |
373 | ||
374 | SY(_net_link_ether, bdg_thru, "Packets through bridge"); | |
375 | ||
376 | SY(_net_link_ether, bdg_copied, "Packets copied in bdg_forward"); | |
377 | ||
378 | SY(_net_link_ether, bdg_copy, "Force copy in bdg_forward"); | |
379 | SY(_net_link_ether, bdg_predict, "Correctly predicted header location"); | |
380 | ||
381 | SY(_net_link_ether, bdg_fw_avg, "Cycle counter avg"); | |
382 | SY(_net_link_ether, bdg_fw_ticks, "Cycle counter item"); | |
383 | SY(_net_link_ether, bdg_fw_count, "Cycle counter count"); | |
384 | #endif | |
385 | ||
386 | SYSCTL_STRUCT(_net_link_ether, PF_BDG, bdgstats, | |
387 | CTLFLAG_RD, &bdg_stats , bdg_stats, "bridge statistics"); | |
388 | ||
389 | static int bdg_loops ; | |
390 | ||
391 | /* | |
392 | * completely flush the bridge table. | |
393 | */ | |
394 | static void | |
395 | flush_table() | |
396 | { | |
397 | int s,i; | |
398 | ||
399 | if (bdg_table == NULL) | |
400 | return ; | |
401 | s = splimp(); | |
402 | for (i=0; i< HASH_SIZE; i++) | |
403 | bdg_table[i].name= NULL; /* clear table */ | |
404 | splx(s); | |
405 | } | |
406 | ||
407 | /* | |
408 | * called periodically to flush entries etc. | |
409 | */ | |
410 | static void | |
411 | bdg_timeout(void *dummy) | |
412 | { | |
413 | static int slowtimer = 0 ; | |
414 | ||
415 | if (do_bridge) { | |
416 | static int age_index = 0 ; /* index of table position to age */ | |
417 | int l = age_index + HASH_SIZE/4 ; | |
418 | /* | |
419 | * age entries in the forwarding table. | |
420 | */ | |
421 | if (l > HASH_SIZE) | |
422 | l = HASH_SIZE ; | |
423 | for (; age_index < l ; age_index++) | |
424 | if (bdg_table[age_index].used) | |
425 | bdg_table[age_index].used = 0 ; | |
426 | else if (bdg_table[age_index].name) { | |
427 | /* printf("xx flushing stale entry %d\n", age_index); */ | |
428 | bdg_table[age_index].name = NULL ; | |
429 | } | |
430 | if (age_index >= HASH_SIZE) | |
431 | age_index = 0 ; | |
432 | ||
433 | if (--slowtimer <= 0 ) { | |
434 | slowtimer = 5 ; | |
435 | ||
436 | bdg_promisc_on() ; /* we just need unmute, really */ | |
437 | bdg_loops = 0 ; | |
438 | } | |
439 | } | |
440 | timeout(bdg_timeout, (void *)0, 2*hz ); | |
441 | } | |
442 | ||
443 | /* | |
444 | * local MAC addresses are held in a small array. This makes comparisons | |
445 | * much faster. | |
446 | */ | |
447 | bdg_addr bdg_addresses[BDG_MAX_PORTS]; | |
448 | int bdg_ports ; | |
449 | ||
450 | /* | |
451 | * initialization of bridge code. This needs to be done after all | |
452 | * interfaces have been configured. | |
453 | */ | |
454 | static void | |
455 | bdginit(void *dummy) | |
456 | { | |
457 | ||
458 | if (bdg_table == NULL) | |
459 | bdg_table = (struct hash_table *) | |
460 | _MALLOC(HASH_SIZE * sizeof(struct hash_table), | |
461 | M_IFADDR, M_WAITOK); | |
462 | flush_table(); | |
463 | ||
464 | ifp2sc = _MALLOC(BDG_MAX_PORTS * sizeof(struct bdg_softc), | |
465 | M_IFADDR, M_WAITOK ); | |
466 | bzero(ifp2sc, BDG_MAX_PORTS * sizeof(struct bdg_softc) ); | |
467 | ||
468 | bzero(&bdg_stats, sizeof(bdg_stats) ); | |
469 | bdgtakeifaces(); | |
470 | bdg_timeout(0); | |
471 | do_bridge=0; | |
472 | } | |
473 | ||
474 | void | |
475 | bdgtakeifaces(void) | |
476 | { | |
477 | int i ; | |
478 | struct ifnet *ifp; | |
479 | bdg_addr *p = bdg_addresses ; | |
480 | struct bdg_softc *bp; | |
481 | ||
482 | bdg_ports = 0 ; | |
483 | *bridge_cfg = '\0'; | |
484 | ||
485 | printf("BRIDGE 010131, have %d interfaces\n", if_index); | |
486 | ifnet_head_lock_shared(); | |
487 | for (i = 0 , ifp = ifnet.tqh_first ; i < if_index ; | |
488 | i++, ifp = TAILQ_NEXT(ifp, if_link) ) | |
489 | if (ifp->if_type == IFT_ETHER) { /* ethernet ? */ | |
490 | ifnet_lladdr_copy_bytes(ifp, p->etheraddr, ETHER_ADDR_LEN); | |
491 | bp = &ifp2sc[ifp->if_index] ; | |
492 | sprintf(bridge_cfg + strlen(bridge_cfg), | |
493 | "%s%d:1,", ifp->if_name, ifp->if_unit); | |
494 | printf("-- index %d %s type %d phy %d addrl %d addr %6D\n", | |
495 | ifp->if_index, | |
496 | bdg_stats.s[ifp->if_index].name, | |
497 | (int)ifp->if_type, (int) ifp->if_physical, | |
498 | (int)ifp->if_addrlen, | |
499 | p->etheraddr, "." ); | |
500 | p++ ; | |
501 | bp->ifp = ifp ; | |
502 | bp->flags = IFF_USED ; | |
503 | bp->cluster_id = htons(1) ; | |
504 | bp->magic = 0xDEADBEEF ; | |
505 | ||
506 | sprintf(bdg_stats.s[ifp->if_index].name, | |
507 | "%s%d:%d", ifp->if_name, ifp->if_unit, | |
508 | ntohs(bp->cluster_id)); | |
509 | bdg_ports ++ ; | |
510 | } | |
511 | ifnet_head_done(); | |
512 | } | |
513 | ||
514 | /* | |
515 | * bridge_in() is invoked to perform bridging decision on input packets. | |
516 | * | |
517 | * On Input: | |
518 | * eh Ethernet header of the incoming packet. | |
519 | * | |
520 | * On Return: destination of packet, one of | |
521 | * BDG_BCAST broadcast | |
522 | * BDG_MCAST multicast | |
523 | * BDG_LOCAL is only for a local address (do not forward) | |
524 | * BDG_DROP drop the packet | |
525 | * ifp ifp of the destination interface. | |
526 | * | |
527 | * Forwarding is not done directly to give a chance to some drivers | |
528 | * to fetch more of the packet, or simply drop it completely. | |
529 | */ | |
530 | ||
531 | struct ifnet * | |
532 | bridge_in(struct ifnet *ifp, struct ether_header *eh) | |
533 | { | |
534 | int index; | |
535 | struct ifnet *dst , *old ; | |
536 | int dropit = BDG_MUTED(ifp) ; | |
537 | ||
538 | /* | |
539 | * hash the source address | |
540 | */ | |
541 | index= HASH_FN(eh->ether_shost); | |
542 | bdg_table[index].used = 1 ; | |
543 | old = bdg_table[index].name ; | |
544 | if ( old ) { /* the entry is valid. */ | |
545 | IFP_CHK(old, printf("bridge_in-- reading table\n") ); | |
546 | ||
547 | if (!BDG_MATCH( eh->ether_shost, bdg_table[index].etheraddr) ) { | |
548 | bdg_ipfw_colls++ ; | |
549 | bdg_table[index].name = NULL ; | |
550 | } else if (old != ifp) { | |
551 | /* | |
552 | * found a loop. Either a machine has moved, or there | |
553 | * is a misconfiguration/reconfiguration of the network. | |
554 | * First, do not forward this packet! | |
555 | * Record the relocation anyways; then, if loops persist, | |
556 | * suspect a reconfiguration and disable forwarding | |
557 | * from the old interface. | |
558 | */ | |
559 | bdg_table[index].name = ifp ; /* relocate address */ | |
560 | printf("-- loop (%d) %6D to %s%d from %s%d (%s)\n", | |
561 | bdg_loops, eh->ether_shost, ".", | |
562 | ifp->if_name, ifp->if_unit, | |
563 | old->if_name, old->if_unit, | |
564 | BDG_MUTED(old) ? "muted":"active"); | |
565 | dropit = 1 ; | |
566 | if ( !BDG_MUTED(old) ) { | |
567 | if (++bdg_loops > 10) | |
568 | BDG_MUTE(old) ; | |
569 | } | |
570 | } | |
571 | } | |
572 | ||
573 | /* | |
574 | * now write the source address into the table | |
575 | */ | |
576 | if (bdg_table[index].name == NULL) { | |
577 | DEB(printf("new addr %6D at %d for %s%d\n", | |
578 | eh->ether_shost, ".", index, ifp->if_name, ifp->if_unit);) | |
579 | bcopy(eh->ether_shost, bdg_table[index].etheraddr, 6); | |
580 | bdg_table[index].name = ifp ; | |
581 | } | |
582 | dst = bridge_dst_lookup(eh); | |
583 | /* Return values: | |
584 | * BDG_BCAST, BDG_MCAST, BDG_LOCAL, BDG_UNKNOWN, BDG_DROP, ifp. | |
585 | * For muted interfaces, the first 3 are changed in BDG_LOCAL, | |
586 | * and others to BDG_DROP. Also, for incoming packets, ifp is changed | |
587 | * to BDG_DROP in case ifp == src . These mods are not necessary | |
588 | * for outgoing packets from ether_output(). | |
589 | */ | |
590 | BDG_STAT(ifp, BDG_IN); | |
591 | switch ((int)dst) { | |
592 | case (int)BDG_BCAST: | |
593 | case (int)BDG_MCAST: | |
594 | case (int)BDG_LOCAL: | |
595 | case (int)BDG_UNKNOWN: | |
596 | case (int)BDG_DROP: | |
597 | BDG_STAT(ifp, dst); | |
598 | break ; | |
599 | default : | |
600 | if (dst == ifp || dropit ) | |
601 | BDG_STAT(ifp, BDG_DROP); | |
602 | else | |
603 | BDG_STAT(ifp, BDG_FORWARD); | |
604 | break ; | |
605 | } | |
606 | ||
607 | if ( dropit ) { | |
608 | if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_LOCAL) | |
609 | return BDG_LOCAL ; | |
610 | else | |
611 | return BDG_DROP ; | |
612 | } else { | |
613 | return (dst == ifp ? BDG_DROP : dst ) ; | |
614 | } | |
615 | } | |
616 | ||
617 | /* | |
618 | * Forward to dst, excluding src port and muted interfaces. | |
619 | * If src == NULL, the pkt comes from ether_output, and dst is the real | |
620 | * interface the packet is originally sent to. In this case we must forward | |
621 | * it to the whole cluster. We never call bdg_forward ether_output on | |
622 | * interfaces which are not part of a cluster. | |
623 | * | |
624 | * The packet is freed if possible (i.e. surely not of interest for | |
625 | * the upper layer), otherwise a copy is left for use by the caller | |
626 | * (pointer in m0). | |
627 | * | |
628 | * It would be more efficient to make bdg_forward() always consume | |
629 | * the packet, leaving to the caller the task to check if it needs a copy | |
630 | * and get one in case. As it is now, bdg_forward() can sometimes make | |
631 | * a copy whereas it is not necessary. | |
632 | * | |
633 | * XXX be careful about eh, it can be a pointer into *m | |
634 | */ | |
635 | struct mbuf * | |
636 | bdg_forward(struct mbuf *m0, struct ether_header *const eh, struct ifnet *dst) | |
637 | { | |
638 | struct ifnet *src = m0->m_pkthdr.rcvif; /* could be NULL in output */ | |
639 | struct ifnet *ifp, *last = NULL ; | |
640 | int s ; | |
641 | int shared = bdg_copy ; /* someone else is using the mbuf */ | |
642 | int once = 0; /* loop only once */ | |
643 | struct ifnet *real_dst = dst ; /* real dst from ether_output */ | |
644 | #ifdef IPFIREWALL | |
645 | struct ip_fw_chain *rule = NULL ; /* did we match a firewall rule ? */ | |
646 | #endif | |
647 | ||
648 | /* | |
649 | * XXX eh is usually a pointer within the mbuf (some ethernet drivers | |
650 | * do that), so we better copy it before doing anything with the mbuf, | |
651 | * or we might corrupt the header. | |
652 | */ | |
653 | struct ether_header save_eh = *eh ; | |
654 | ||
655 | #if defined(IPFIREWALL) && defined(DUMMYNET) | |
656 | if (m0->m_type == MT_DUMMYNET) { | |
657 | /* extract info from dummynet header */ | |
658 | rule = (struct ip_fw_chain *)(m0->m_data) ; | |
659 | m0 = m0->m_next ; | |
660 | src = m0->m_pkthdr.rcvif; | |
661 | shared = 0 ; /* For sure this is our own mbuf. */ | |
662 | } else | |
663 | #endif | |
664 | bdg_thru++; /* only count once */ | |
665 | ||
666 | if (src == NULL) /* packet from ether_output */ | |
667 | dst = bridge_dst_lookup(eh); | |
668 | if (dst == BDG_DROP) { /* this should not happen */ | |
669 | printf("xx bdg_forward for BDG_DROP\n"); | |
670 | m_freem(m0); | |
671 | return NULL; | |
672 | } | |
673 | if (dst == BDG_LOCAL) { /* this should not happen as well */ | |
674 | printf("xx ouch, bdg_forward for local pkt\n"); | |
675 | return m0; | |
676 | } | |
677 | if (dst == BDG_BCAST || dst == BDG_MCAST || dst == BDG_UNKNOWN) { | |
678 | ifp = ifnet_head.tqh_first ; /* scan all ports */ | |
679 | once = 0 ; | |
680 | if (dst != BDG_UNKNOWN) /* need a copy for the local stack */ | |
681 | shared = 1 ; | |
682 | } else { | |
683 | ifp = dst ; | |
684 | once = 1 ; | |
685 | } | |
686 | if ( (u_int)(ifp) <= (u_int)BDG_FORWARD ) | |
687 | panic("bdg_forward: bad dst"); | |
688 | ||
689 | #ifdef IPFIREWALL | |
690 | /* | |
691 | * Do filtering in a very similar way to what is done in ip_output. | |
692 | * Only if firewall is loaded, enabled, and the packet is not | |
693 | * from ether_output() (src==NULL, or we would filter it twice). | |
694 | * Additional restrictions may apply e.g. non-IP, short packets, | |
695 | * and pkts already gone through a pipe. | |
696 | */ | |
697 | if (ip_fw_chk_ptr && bdg_ipfw != 0 && src != NULL) { | |
698 | struct ip *ip ; | |
699 | int i; | |
700 | ||
701 | if (rule != NULL) /* dummynet packet, already partially processed */ | |
702 | goto forward; /* HACK! I should obey the fw_one_pass */ | |
703 | if (ntohs(save_eh.ether_type) != ETHERTYPE_IP) | |
704 | goto forward ; /* not an IP packet, ipfw is not appropriate */ | |
705 | if (m0->m_pkthdr.len < sizeof(struct ip) ) | |
706 | goto forward ; /* header too short for an IP pkt, cannot filter */ | |
707 | /* | |
708 | * i need some amt of data to be contiguous, and in case others need | |
709 | * the packet (shared==1) also better be in the first mbuf. | |
710 | */ | |
711 | i = min(m0->m_pkthdr.len, max_protohdr) ; | |
712 | if ( shared || m0->m_len < i) { | |
713 | m0 = m_pullup(m0, i) ; | |
714 | if (m0 == NULL) { | |
715 | printf("-- bdg: pullup failed.\n") ; | |
716 | return NULL ; | |
717 | } | |
718 | } | |
719 | ||
720 | /* | |
721 | * before calling the firewall, swap fields the same as IP does. | |
722 | * here we assume the pkt is an IP one and the header is contiguous | |
723 | */ | |
724 | ip = mtod(m0, struct ip *); | |
725 | NTOHS(ip->ip_len); | |
726 | NTOHS(ip->ip_off); | |
727 | ||
728 | /* | |
729 | * The third parameter to the firewall code is the dst. interface. | |
730 | * Since we apply checks only on input pkts we use NULL. | |
731 | * The firewall knows this is a bridged packet as the cookie ptr | |
732 | * is NULL. | |
733 | */ | |
734 | i = (*ip_fw_chk_ptr)(&ip, 0, NULL, NULL /* cookie */, &m0, &rule, NULL); | |
735 | if ( (i & IP_FW_PORT_DENY_FLAG) || m0 == NULL) /* drop */ | |
736 | return m0 ; | |
737 | /* | |
738 | * If we get here, the firewall has passed the pkt, but the mbuf | |
739 | * pointer might have changed. Restore ip and the fields NTOHS()'d. | |
740 | */ | |
741 | ip = mtod(m0, struct ip *); | |
742 | HTONS(ip->ip_len); | |
743 | HTONS(ip->ip_off); | |
744 | ||
745 | if (i == 0) /* a PASS rule. */ | |
746 | goto forward ; | |
747 | #ifdef DUMMYNET | |
748 | if (i & IP_FW_PORT_DYNT_FLAG) { | |
749 | /* | |
750 | * Pass the pkt to dummynet, which consumes it. | |
751 | * If shared, make a copy and keep the original. | |
752 | * Need to prepend the ethernet header, optimize the common | |
753 | * case of eh pointing already into the original mbuf. | |
754 | */ | |
755 | struct mbuf *m ; | |
756 | if (shared) { | |
757 | m = m_copypacket(m0, M_DONTWAIT); | |
758 | if (m == NULL) { | |
759 | printf("bdg_fwd: copy(1) failed\n"); | |
760 | return m0; | |
761 | } | |
762 | } else { | |
763 | m = m0 ; /* pass the original to dummynet */ | |
764 | m0 = NULL ; /* and nothing back to the caller */ | |
765 | } | |
766 | if ( (void *)(eh + 1) == (void *)m->m_data) { | |
767 | m->m_data -= ETHER_HDR_LEN ; | |
768 | m->m_len += ETHER_HDR_LEN ; | |
769 | m->m_pkthdr.len += ETHER_HDR_LEN ; | |
770 | bdg_predict++; | |
771 | } else { | |
772 | M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); | |
773 | if (!m && verbose) printf("M_PREPEND failed\n"); | |
774 | if (m == NULL) /* nope... */ | |
775 | return m0 ; | |
776 | bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); | |
777 | } | |
778 | dummynet_io((i & 0xffff),DN_TO_BDG_FWD,m,real_dst,NULL,0,rule,0); | |
779 | return m0 ; | |
780 | } | |
781 | #endif | |
782 | /* | |
783 | * XXX add divert/forward actions... | |
784 | */ | |
785 | /* if none of the above matches, we have to drop the pkt */ | |
786 | bdg_ipfw_drops++ ; | |
787 | printf("bdg_forward: No rules match, so dropping packet!\n"); | |
788 | return m0 ; | |
789 | } | |
790 | forward: | |
791 | #endif /* IPFIREWALL */ | |
792 | /* | |
793 | * Again, bring up the headers in case of shared bufs to avoid | |
794 | * corruptions in the future. | |
795 | */ | |
796 | if ( shared ) { | |
797 | int i = min(m0->m_pkthdr.len, max_protohdr) ; | |
798 | ||
799 | m0 = m_pullup(m0, i) ; | |
800 | if (m0 == NULL) { | |
801 | printf("-- bdg: pullup2 failed.\n") ; | |
802 | return NULL ; | |
803 | } | |
804 | } | |
805 | /* now real_dst is used to determine the cluster where to forward */ | |
806 | if (src != NULL) /* pkt comes from ether_input */ | |
807 | real_dst = src ; | |
808 | for (;;) { | |
809 | if (last) { /* need to forward packet leftover from previous loop */ | |
810 | struct mbuf *m ; | |
811 | if (shared == 0 && once ) { /* no need to copy */ | |
812 | m = m0 ; | |
813 | m0 = NULL ; /* original is gone */ | |
814 | } else { | |
815 | m = m_copypacket(m0, M_DONTWAIT); | |
816 | if (m == NULL) { | |
817 | printf("bdg_forward: sorry, m_copypacket failed!\n"); | |
818 | return m0 ; /* the original is still there... */ | |
819 | } | |
820 | } | |
821 | /* | |
822 | * Add header (optimized for the common case of eh pointing | |
823 | * already into the mbuf) and execute last part of ether_output: | |
824 | * queue pkt and start output if interface not yet active. | |
825 | */ | |
826 | if ( (void *)(eh + 1) == (void *)m->m_data) { | |
827 | m->m_data -= ETHER_HDR_LEN ; | |
828 | m->m_len += ETHER_HDR_LEN ; | |
829 | m->m_pkthdr.len += ETHER_HDR_LEN ; | |
830 | bdg_predict++; | |
831 | } else { | |
832 | M_PREPEND(m, ETHER_HDR_LEN, M_DONTWAIT); | |
833 | if (!m && verbose) printf("M_PREPEND failed\n"); | |
834 | if (m == NULL) | |
835 | return m0; | |
836 | bcopy(&save_eh, mtod(m, struct ether_header *), ETHER_HDR_LEN); | |
837 | } | |
838 | s = splimp(); | |
839 | if (IF_QFULL(&last->if_snd)) { | |
840 | IF_DROP(&last->if_snd); | |
841 | #if 0 | |
842 | BDG_MUTE(last); /* should I also mute ? */ | |
843 | #endif | |
844 | splx(s); | |
845 | m_freem(m); /* consume the pkt anyways */ | |
846 | } else { | |
847 | last->if_obytes += m->m_pkthdr.len ; | |
848 | if (m->m_flags & M_MCAST) | |
849 | last->if_omcasts++; | |
850 | if (m->m_pkthdr.len != m->m_len) /* this pkt is on >1 bufs */ | |
851 | bdg_split_pkts++; | |
852 | ||
853 | IF_ENQUEUE(&last->if_snd, m); | |
854 | if ((last->if_flags & IFF_OACTIVE) == 0) | |
855 | (*last->if_start)(last); | |
856 | splx(s); | |
857 | } | |
858 | BDG_STAT(last, BDG_OUT); | |
859 | last = NULL ; | |
860 | if (once) | |
861 | break ; | |
862 | } | |
863 | if (ifp == NULL) | |
864 | break ; | |
865 | /* | |
866 | * If the interface is used for bridging, not muted, not full, | |
867 | * up and running, is not the source interface, and belongs to | |
868 | * the same cluster as the 'real_dst', then send here. | |
869 | */ | |
870 | if ( BDG_USED(ifp) && !BDG_MUTED(ifp) && !IF_QFULL(&ifp->if_snd) && | |
871 | (ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING) && | |
872 | ifp != src && BDG_SAMECLUSTER(ifp, real_dst) ) | |
873 | last = ifp ; | |
874 | ifp = TAILQ_NEXT(ifp, if_link) ; | |
875 | if (ifp == NULL) | |
876 | once = 1 ; | |
877 | } | |
878 | DEB(bdg_fw_ticks += (u_long)(rdtsc() - ticks) ; bdg_fw_count++ ; | |
879 | if (bdg_fw_count != 0) bdg_fw_avg = bdg_fw_ticks/bdg_fw_count; ) | |
880 | return m0 ; | |
881 | } |