]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2007 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
30 | * support for mandatory and extensible security protections. This notice | |
31 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
32 | * Version 2.0. | |
33 | */ | |
34 | /* | |
35 | * IP multicast forwarding procedures | |
36 | * | |
37 | * Written by David Waitzman, BBN Labs, August 1988. | |
38 | * Modified by Steve Deering, Stanford, February 1989. | |
39 | * Modified by Mark J. Steiglitz, Stanford, May, 1991 | |
40 | * Modified by Van Jacobson, LBL, January 1993 | |
41 | * Modified by Ajit Thyagarajan, PARC, August 1993 | |
42 | * Modified by Bill Fenner, PARC, April 1995 | |
43 | * | |
44 | * MROUTING Revision: 3.5 | |
45 | * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.56.2.2 2001/07/19 06:37:26 kris Exp $ | |
46 | */ | |
47 | ||
48 | ||
49 | #include <sys/param.h> | |
50 | #include <sys/systm.h> | |
51 | #include <sys/malloc.h> | |
52 | #include <sys/mbuf.h> | |
53 | #include <sys/socket.h> | |
54 | #include <sys/socketvar.h> | |
55 | #include <sys/protosw.h> | |
56 | #include <sys/time.h> | |
57 | #include <sys/kernel.h> | |
58 | #include <sys/sockio.h> | |
59 | #include <sys/syslog.h> | |
60 | ||
61 | #include <machine/endian.h> | |
62 | ||
63 | #include <net/if.h> | |
64 | #include <net/route.h> | |
65 | #include <net/kpi_protocol.h> | |
66 | #include <netinet/in.h> | |
67 | #include <netinet/in_systm.h> | |
68 | #include <netinet/ip.h> | |
69 | #include <netinet/ip_var.h> | |
70 | #include <netinet/in_var.h> | |
71 | #include <netinet/igmp.h> | |
72 | #include <netinet/ip_mroute.h> | |
73 | #include <netinet/udp.h> | |
74 | ||
75 | #if CONFIG_MACF_NET | |
76 | #include <security/mac_framework.h> | |
77 | #endif | |
78 | ||
79 | ||
80 | #if !MROUTING | |
81 | extern u_int32_t _ip_mcast_src(int vifi); | |
82 | extern int _ip_mforward(struct ip *ip, struct ifnet *ifp, | |
83 | struct mbuf *m, struct ip_moptions *imo); | |
84 | extern int _ip_mrouter_done(void); | |
85 | extern int _ip_mrouter_get(struct socket *so, struct sockopt *sopt); | |
86 | extern int _ip_mrouter_set(struct socket *so, struct sockopt *sopt); | |
87 | extern int _mrt_ioctl(int req, caddr_t data, struct proc *p); | |
88 | ||
89 | /* | |
90 | * Dummy routines and globals used when multicast routing is not compiled in. | |
91 | */ | |
92 | ||
93 | struct socket *ip_mrouter = NULL; | |
94 | u_int rsvpdebug = 0; | |
95 | ||
96 | int | |
97 | _ip_mrouter_set(__unused struct socket *so, | |
98 | __unused struct sockopt *sopt) | |
99 | { | |
100 | return(EOPNOTSUPP); | |
101 | } | |
102 | ||
103 | int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set; | |
104 | ||
105 | ||
106 | int | |
107 | _ip_mrouter_get(__unused struct socket *so, | |
108 | __unused sockopt *sopt) | |
109 | { | |
110 | return(EOPNOTSUPP); | |
111 | } | |
112 | ||
113 | int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get; | |
114 | ||
115 | int | |
116 | _ip_mrouter_done(void) | |
117 | { | |
118 | return(0); | |
119 | } | |
120 | ||
121 | int (*ip_mrouter_done)(void) = _ip_mrouter_done; | |
122 | ||
123 | int | |
124 | _ip_mforward(__unused struct ip *ip, __unused struct ifnet *ifp, | |
125 | __unused struct mbuf *m, __unused ip_moptions *imo) | |
126 | { | |
127 | return(0); | |
128 | } | |
129 | ||
130 | int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, | |
131 | struct ip_moptions *) = _ip_mforward; | |
132 | ||
133 | int | |
134 | _mrt_ioctl(__unused int req, __unused caddr_t data, __unused struct proc *p) | |
135 | { | |
136 | return EOPNOTSUPP; | |
137 | } | |
138 | ||
139 | int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; | |
140 | ||
141 | void | |
142 | rsvp_input(struct mbuf *m, int iphlen) /* XXX must fixup manually */ | |
143 | { | |
144 | /* Can still get packets with rsvp_on = 0 if there is a local member | |
145 | * of the group to which the RSVP packet is addressed. But in this | |
146 | * case we want to throw the packet away. | |
147 | */ | |
148 | if (!rsvp_on) { | |
149 | m_freem(m); | |
150 | return; | |
151 | } | |
152 | ||
153 | if (ip_rsvpd != NULL) { | |
154 | if (rsvpdebug) | |
155 | printf("rsvp_input: Sending packet up old-style socket\n"); | |
156 | rip_input(m, iphlen); | |
157 | return; | |
158 | } | |
159 | /* Drop the packet */ | |
160 | m_freem(m); | |
161 | } | |
162 | ||
163 | void ipip_input(struct mbuf *m, int iphlen) { /* XXX must fixup manually */ | |
164 | rip_input(m, iphlen); | |
165 | } | |
166 | ||
167 | int (*legal_vif_num)(int) = 0; | |
168 | ||
169 | /* | |
170 | * This should never be called, since IP_MULTICAST_VIF should fail, but | |
171 | * just in case it does get called, the code a little lower in ip_output | |
172 | * will assign the packet a local address. | |
173 | */ | |
174 | u_int32_t | |
175 | _ip_mcast_src(int vifi) { return INADDR_ANY; } | |
176 | u_int32_t (*ip_mcast_src)(int) = _ip_mcast_src; | |
177 | ||
178 | int | |
179 | ip_rsvp_vif_init(so, sopt) | |
180 | struct socket *so; | |
181 | struct sockopt *sopt; | |
182 | { | |
183 | return(EINVAL); | |
184 | } | |
185 | ||
186 | int | |
187 | ip_rsvp_vif_done(so, sopt) | |
188 | struct socket *so; | |
189 | struct sockopt *sopt; | |
190 | { | |
191 | return(EINVAL); | |
192 | } | |
193 | ||
194 | void | |
195 | ip_rsvp_force_done(so) | |
196 | struct socket *so; | |
197 | { | |
198 | return; | |
199 | } | |
200 | ||
201 | #else /* MROUTING */ | |
202 | ||
203 | #define M_HASCL(m) ((m)->m_flags & M_EXT) | |
204 | ||
205 | #define INSIZ sizeof(struct in_addr) | |
206 | #define same(a1, a2) \ | |
207 | (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) | |
208 | ||
209 | ||
210 | /* | |
211 | * Globals. All but ip_mrouter and ip_mrtproto could be static, | |
212 | * except for netstat or debugging purposes. | |
213 | */ | |
214 | #ifndef MROUTE_LKM | |
215 | struct socket *ip_mrouter = NULL; | |
216 | static struct mrtstat mrtstat; | |
217 | #else /* MROUTE_LKM */ | |
218 | extern void X_ipip_input(struct mbuf *m, int iphlen); | |
219 | extern struct mrtstat mrtstat; | |
220 | static int ip_mrtproto; | |
221 | #endif | |
222 | ||
223 | #define NO_RTE_FOUND 0x1 | |
224 | #define RTE_FOUND 0x2 | |
225 | ||
226 | static struct mfc *mfctable[CONFIG_MFCTBLSIZ]; | |
227 | static u_char nexpire[CONFIG_MFCTBLSIZ]; | |
228 | static struct vif viftable[CONFIG_MAXVIFS]; | |
229 | static u_int mrtdebug = 0; /* debug level */ | |
230 | #define DEBUG_MFC 0x02 | |
231 | #define DEBUG_FORWARD 0x04 | |
232 | #define DEBUG_EXPIRE 0x08 | |
233 | #define DEBUG_XMIT 0x10 | |
234 | static u_int tbfdebug = 0; /* tbf debug level */ | |
235 | static u_int rsvpdebug = 0; /* rsvp debug level */ | |
236 | ||
237 | #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ | |
238 | #define UPCALL_EXPIRE 6 /* number of timeouts */ | |
239 | ||
240 | /* | |
241 | * Define the token bucket filter structures | |
242 | * tbftable -> each vif has one of these for storing info | |
243 | */ | |
244 | ||
245 | static struct tbf tbftable[CONFIG_MAXVIFS]; | |
246 | #define TBF_REPROCESS (hz / 100) /* 100x / second */ | |
247 | ||
248 | /* | |
249 | * 'Interfaces' associated with decapsulator (so we can tell | |
250 | * packets that went through it from ones that get reflected | |
251 | * by a broken gateway). These interfaces are never linked into | |
252 | * the system ifnet list & no routes point to them. I.e., packets | |
253 | * can't be sent this way. They only exist as a placeholder for | |
254 | * multicast source verification. | |
255 | */ | |
256 | static struct ifnet multicast_decap_if[CONFIG_MAXVIFS]; | |
257 | ||
258 | #define ENCAP_TTL 64 | |
259 | #define ENCAP_PROTO IPPROTO_IPIP /* 4 */ | |
260 | ||
261 | /* prototype IP hdr for encapsulated packets */ | |
262 | static struct ip multicast_encap_iphdr = { | |
263 | #if BYTE_ORDER == LITTLE_ENDIAN | |
264 | sizeof(struct ip) >> 2, IPVERSION, | |
265 | #else | |
266 | IPVERSION, sizeof(struct ip) >> 2, | |
267 | #endif | |
268 | 0, /* tos */ | |
269 | sizeof(struct ip), /* total length */ | |
270 | 0, /* id */ | |
271 | 0, /* frag offset */ | |
272 | ENCAP_TTL, ENCAP_PROTO, | |
273 | 0, /* checksum */ | |
274 | { 0 }, { 0 } | |
275 | }; | |
276 | ||
277 | /* | |
278 | * Private variables. | |
279 | */ | |
280 | static vifi_t numvifs = 0; | |
281 | static int have_encap_tunnel = 0; | |
282 | ||
283 | /* | |
284 | * one-back cache used by ipip_input to locate a tunnel's vif | |
285 | * given a datagram's src ip address. | |
286 | */ | |
287 | static u_int32_t last_encap_src; | |
288 | static struct vif *last_encap_vif; | |
289 | ||
290 | static u_int32_t X_ip_mcast_src(int vifi); | |
291 | static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); | |
292 | static int X_ip_mrouter_done(void); | |
293 | static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); | |
294 | static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); | |
295 | static int X_legal_vif_num(int vif); | |
296 | static int X_mrt_ioctl(int cmd, caddr_t data); | |
297 | ||
298 | static int get_sg_cnt(struct sioc_sg_req *); | |
299 | static int get_vif_cnt(struct sioc_vif_req *); | |
300 | static int ip_mrouter_init(struct socket *, int); | |
301 | static int add_vif(struct vifctl *); | |
302 | static int del_vif(vifi_t); | |
303 | static int add_mfc(struct mfcctl *); | |
304 | static int del_mfc(struct mfcctl *); | |
305 | static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); | |
306 | static int set_assert(int); | |
307 | static void expire_upcalls(void *); | |
308 | static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, | |
309 | vifi_t); | |
310 | static void phyint_send(struct ip *, struct vif *, struct mbuf *); | |
311 | static void encap_send(struct ip *, struct vif *, struct mbuf *); | |
312 | static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_int32_t); | |
313 | static void tbf_queue(struct vif *, struct mbuf *); | |
314 | static void tbf_process_q(struct vif *); | |
315 | static void tbf_reprocess_q(void *); | |
316 | static int tbf_dq_sel(struct vif *, struct ip *); | |
317 | static void tbf_send_packet(struct vif *, struct mbuf *); | |
318 | static void tbf_update_tokens(struct vif *); | |
319 | static int priority(struct vif *, struct ip *); | |
320 | void multiencap_decap(struct mbuf *); | |
321 | ||
322 | /* | |
323 | * whether or not special PIM assert processing is enabled. | |
324 | */ | |
325 | static int pim_assert; | |
326 | /* | |
327 | * Rate limit for assert notification messages, in usec | |
328 | */ | |
329 | #define ASSERT_MSG_TIME 3000000 | |
330 | ||
331 | /* | |
332 | * Hash function for a source, group entry | |
333 | */ | |
334 | #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ | |
335 | ((g) >> 20) ^ ((g) >> 10) ^ (g)) | |
336 | ||
337 | /* | |
338 | * Find a route for a given origin IP address and Multicast group address | |
339 | * Type of service parameter to be added in the future!!! | |
340 | */ | |
341 | ||
342 | #define MFCFIND(o, g, rt) { \ | |
343 | struct mfc *_rt = mfctable[MFCHASH(o,g)]; \ | |
344 | rt = NULL; \ | |
345 | ++mrtstat.mrts_mfc_lookups; \ | |
346 | while (_rt) { \ | |
347 | if ((_rt->mfc_origin.s_addr == o) && \ | |
348 | (_rt->mfc_mcastgrp.s_addr == g) && \ | |
349 | (_rt->mfc_stall == NULL)) { \ | |
350 | rt = _rt; \ | |
351 | break; \ | |
352 | } \ | |
353 | _rt = _rt->mfc_next; \ | |
354 | } \ | |
355 | if (rt == NULL) { \ | |
356 | ++mrtstat.mrts_mfc_misses; \ | |
357 | } \ | |
358 | } | |
359 | ||
360 | ||
361 | /* | |
362 | * Macros to compute elapsed time efficiently | |
363 | * Borrowed from Van Jacobson's scheduling code | |
364 | */ | |
365 | #define TV_DELTA(a, b, delta) { \ | |
366 | int xxs; \ | |
367 | \ | |
368 | delta = (a).tv_usec - (b).tv_usec; \ | |
369 | if ((xxs = (a).tv_sec - (b).tv_sec)) { \ | |
370 | switch (xxs) { \ | |
371 | case 2: \ | |
372 | delta += 1000000; \ | |
373 | /* fall through */ \ | |
374 | case 1: \ | |
375 | delta += 1000000; \ | |
376 | break; \ | |
377 | default: \ | |
378 | delta += (1000000 * xxs); \ | |
379 | } \ | |
380 | } \ | |
381 | } | |
382 | ||
383 | #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ | |
384 | (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) | |
385 | ||
386 | #if UPCALL_TIMING | |
387 | u_int32_t upcall_data[51]; | |
388 | static void collate(struct timeval *); | |
389 | #endif /* UPCALL_TIMING */ | |
390 | ||
391 | ||
392 | /* | |
393 | * Handle MRT setsockopt commands to modify the multicast routing tables. | |
394 | */ | |
395 | static int | |
396 | X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) | |
397 | { | |
398 | int error, optval; | |
399 | vifi_t vifi; | |
400 | struct vifctl vifc; | |
401 | struct mfcctl mfc; | |
402 | ||
403 | if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) | |
404 | return (EPERM); | |
405 | ||
406 | error = 0; | |
407 | switch (sopt->sopt_name) { | |
408 | case MRT_INIT: | |
409 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
410 | sizeof optval); | |
411 | if (error) | |
412 | break; | |
413 | error = ip_mrouter_init(so, optval); | |
414 | break; | |
415 | ||
416 | case MRT_DONE: | |
417 | error = ip_mrouter_done(); | |
418 | break; | |
419 | ||
420 | case MRT_ADD_VIF: | |
421 | error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); | |
422 | if (error) | |
423 | break; | |
424 | error = add_vif(&vifc); | |
425 | break; | |
426 | ||
427 | case MRT_DEL_VIF: | |
428 | error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); | |
429 | if (error) | |
430 | break; | |
431 | error = del_vif(vifi); | |
432 | break; | |
433 | ||
434 | case MRT_ADD_MFC: | |
435 | case MRT_DEL_MFC: | |
436 | error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc); | |
437 | if (error) | |
438 | break; | |
439 | if (sopt->sopt_name == MRT_ADD_MFC) | |
440 | error = add_mfc(&mfc); | |
441 | else | |
442 | error = del_mfc(&mfc); | |
443 | break; | |
444 | ||
445 | case MRT_ASSERT: | |
446 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
447 | sizeof optval); | |
448 | if (error) | |
449 | break; | |
450 | set_assert(optval); | |
451 | break; | |
452 | ||
453 | default: | |
454 | error = EOPNOTSUPP; | |
455 | break; | |
456 | } | |
457 | return (error); | |
458 | } | |
459 | ||
460 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
461 | int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set; | |
462 | #endif | |
463 | ||
464 | /* | |
465 | * Handle MRT getsockopt commands | |
466 | */ | |
467 | static int | |
468 | X_ip_mrouter_get(__unused struct socket *so, struct sockopt *sopt) | |
469 | { | |
470 | int error; | |
471 | static int vers = 0x0305; /* !!! why is this here? XXX */ | |
472 | ||
473 | switch (sopt->sopt_name) { | |
474 | case MRT_VERSION: | |
475 | error = sooptcopyout(sopt, &vers, sizeof vers); | |
476 | break; | |
477 | ||
478 | case MRT_ASSERT: | |
479 | error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); | |
480 | break; | |
481 | default: | |
482 | error = EOPNOTSUPP; | |
483 | break; | |
484 | } | |
485 | return (error); | |
486 | } | |
487 | ||
488 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
489 | int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get; | |
490 | #endif | |
491 | ||
492 | /* | |
493 | * Handle ioctl commands to obtain information from the cache | |
494 | */ | |
495 | static int | |
496 | X_mrt_ioctl(int cmd, caddr_t data) | |
497 | { | |
498 | int error = 0; | |
499 | ||
500 | switch (cmd) { | |
501 | case (SIOCGETVIFCNT): | |
502 | return (get_vif_cnt((struct sioc_vif_req *)data)); | |
503 | break; | |
504 | case (SIOCGETSGCNT): | |
505 | return (get_sg_cnt((struct sioc_sg_req *)data)); | |
506 | break; | |
507 | default: | |
508 | return (EINVAL); | |
509 | break; | |
510 | } | |
511 | return error; | |
512 | } | |
513 | ||
514 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
515 | int (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl; | |
516 | #endif | |
517 | ||
518 | /* | |
519 | * returns the packet, byte, rpf-failure count for the source group provided | |
520 | */ | |
521 | static int | |
522 | get_sg_cnt(struct sioc_sg_req *req) | |
523 | { | |
524 | struct mfc *rt; | |
525 | ||
526 | MFCFIND(req->src.s_addr, req->grp.s_addr, rt); | |
527 | if (rt != NULL) { | |
528 | req->pktcnt = rt->mfc_pkt_cnt; | |
529 | req->bytecnt = rt->mfc_byte_cnt; | |
530 | req->wrong_if = rt->mfc_wrong_if; | |
531 | } else | |
532 | req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; | |
533 | ||
534 | return 0; | |
535 | } | |
536 | ||
537 | /* | |
538 | * returns the input and output packet and byte counts on the vif provided | |
539 | */ | |
540 | static int | |
541 | get_vif_cnt(struct sioc_vif_req *req) | |
542 | { | |
543 | vifi_t vifi = req->vifi; | |
544 | ||
545 | if (vifi >= numvifs) return EINVAL; | |
546 | ||
547 | req->icount = viftable[vifi].v_pkt_in; | |
548 | req->ocount = viftable[vifi].v_pkt_out; | |
549 | req->ibytes = viftable[vifi].v_bytes_in; | |
550 | req->obytes = viftable[vifi].v_bytes_out; | |
551 | ||
552 | return 0; | |
553 | } | |
554 | ||
555 | /* | |
556 | * Enable multicast routing | |
557 | */ | |
558 | static int | |
559 | ip_mrouter_init(struct socket *so, int vers) | |
560 | { | |
561 | if (mrtdebug) | |
562 | log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n", | |
563 | so->so_type, so->so_proto->pr_protocol); | |
564 | ||
565 | if (so->so_type != SOCK_RAW || | |
566 | so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; | |
567 | ||
568 | if (vers != 1) | |
569 | return ENOPROTOOPT; | |
570 | ||
571 | if (ip_mrouter != NULL) return EADDRINUSE; | |
572 | ||
573 | ip_mrouter = so; | |
574 | ||
575 | bzero((caddr_t)mfctable, sizeof(mfctable)); | |
576 | bzero((caddr_t)nexpire, sizeof(nexpire)); | |
577 | ||
578 | pim_assert = 0; | |
579 | ||
580 | timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); | |
581 | ||
582 | if (mrtdebug) | |
583 | log(LOG_DEBUG, "ip_mrouter_init\n"); | |
584 | ||
585 | return 0; | |
586 | } | |
587 | ||
588 | /* | |
589 | * Disable multicast routing | |
590 | */ | |
591 | static int | |
592 | X_ip_mrouter_done(void) | |
593 | { | |
594 | vifi_t vifi; | |
595 | int i; | |
596 | struct ifnet *ifp; | |
597 | struct ifreq ifr; | |
598 | struct mfc *rt; | |
599 | struct rtdetq *rte; | |
600 | ||
601 | /* | |
602 | * For each phyint in use, disable promiscuous reception of all IP | |
603 | * multicasts. | |
604 | */ | |
605 | for (vifi = 0; vifi < numvifs; vifi++) { | |
606 | if (viftable[vifi].v_lcl_addr.s_addr != 0 && | |
607 | !(viftable[vifi].v_flags & VIFF_TUNNEL)) { | |
608 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; | |
609 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr | |
610 | = INADDR_ANY; | |
611 | ifp = viftable[vifi].v_ifp; | |
612 | if_allmulti(ifp, 0); | |
613 | } | |
614 | } | |
615 | bzero((caddr_t)tbftable, sizeof(tbftable)); | |
616 | bzero((caddr_t)viftable, sizeof(viftable)); | |
617 | numvifs = 0; | |
618 | pim_assert = 0; | |
619 | ||
620 | untimeout(expire_upcalls, (caddr_t)NULL); | |
621 | ||
622 | /* | |
623 | * Free all multicast forwarding cache entries. | |
624 | */ | |
625 | for (i = 0; i < CONFIG_MFCTBLSIZ; i++) { | |
626 | for (rt = mfctable[i]; rt != NULL; ) { | |
627 | struct mfc *nr = rt->mfc_next; | |
628 | ||
629 | for (rte = rt->mfc_stall; rte != NULL; ) { | |
630 | struct rtdetq *n = rte->next; | |
631 | ||
632 | m_freem(rte->m); | |
633 | FREE(rte, M_MRTABLE); | |
634 | rte = n; | |
635 | } | |
636 | FREE(rt, M_MRTABLE); | |
637 | rt = nr; | |
638 | } | |
639 | } | |
640 | ||
641 | bzero((caddr_t)mfctable, sizeof(mfctable)); | |
642 | ||
643 | /* | |
644 | * Reset de-encapsulation cache | |
645 | */ | |
646 | last_encap_src = 0; | |
647 | last_encap_vif = NULL; | |
648 | have_encap_tunnel = 0; | |
649 | ||
650 | ip_mrouter = NULL; | |
651 | ||
652 | if (mrtdebug) | |
653 | log(LOG_DEBUG, "ip_mrouter_done\n"); | |
654 | ||
655 | return 0; | |
656 | } | |
657 | ||
658 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
659 | int (*ip_mrouter_done)(void) = X_ip_mrouter_done; | |
660 | #endif | |
661 | ||
662 | /* | |
663 | * Set PIM assert processing global | |
664 | */ | |
665 | static int | |
666 | set_assert(int i) | |
667 | { | |
668 | if ((i != 1) && (i != 0)) | |
669 | return EINVAL; | |
670 | ||
671 | pim_assert = i; | |
672 | ||
673 | return 0; | |
674 | } | |
675 | ||
676 | /* | |
677 | * Add a vif to the vif table | |
678 | */ | |
679 | static int | |
680 | add_vif(struct vifctl *vifcp) | |
681 | { | |
682 | struct vif *vifp = viftable + vifcp->vifc_vifi; | |
683 | static struct sockaddr_in sin = { sizeof sin, AF_INET, | |
684 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
685 | struct ifaddr *ifa; | |
686 | struct ifnet *ifp; | |
687 | int error, s; | |
688 | struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; | |
689 | ||
690 | if (vifcp->vifc_vifi >= CONFIG_MAXVIFS) return EINVAL; | |
691 | if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; | |
692 | ||
693 | /* Find the interface with an address in AF_INET family */ | |
694 | sin.sin_addr = vifcp->vifc_lcl_addr; | |
695 | ifa = ifa_ifwithaddr((struct sockaddr *)&sin); | |
696 | if (ifa == 0) return EADDRNOTAVAIL; | |
697 | ifp = ifa->ifa_ifp; | |
698 | ifafree(ifa); | |
699 | ifa = NULL; | |
700 | ||
701 | if (vifcp->vifc_flags & VIFF_TUNNEL) { | |
702 | if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { | |
703 | /* | |
704 | * An encapsulating tunnel is wanted. Tell ipip_input() to | |
705 | * start paying attention to encapsulated packets. | |
706 | */ | |
707 | if (have_encap_tunnel == 0) { | |
708 | have_encap_tunnel = 1; | |
709 | for (s = 0; s < CONFIG_MAXVIFS; ++s) { | |
710 | multicast_decap_if[s].if_name = "mdecap"; | |
711 | multicast_decap_if[s].if_unit = s; | |
712 | multicast_decap_if[s].if_family = APPLE_IF_FAM_MDECAP; | |
713 | } | |
714 | } | |
715 | /* | |
716 | * Set interface to fake encapsulator interface | |
717 | */ | |
718 | ifp = &multicast_decap_if[vifcp->vifc_vifi]; | |
719 | /* | |
720 | * Prepare cached route entry | |
721 | */ | |
722 | bzero(&vifp->v_route, sizeof(vifp->v_route)); | |
723 | } else { | |
724 | log(LOG_ERR, "source routed tunnels not supported\n"); | |
725 | return EOPNOTSUPP; | |
726 | } | |
727 | } else { | |
728 | /* Make sure the interface supports multicast */ | |
729 | if ((ifp->if_flags & IFF_MULTICAST) == 0) | |
730 | return EOPNOTSUPP; | |
731 | ||
732 | /* Enable promiscuous reception of all IP multicasts from the if */ | |
733 | error = if_allmulti(ifp, 1); | |
734 | if (error) | |
735 | return error; | |
736 | } | |
737 | ||
738 | /* define parameters for the tbf structure */ | |
739 | vifp->v_tbf = v_tbf; | |
740 | GET_TIME(vifp->v_tbf->tbf_last_pkt_t); | |
741 | vifp->v_tbf->tbf_n_tok = 0; | |
742 | vifp->v_tbf->tbf_q_len = 0; | |
743 | vifp->v_tbf->tbf_max_q_len = MAXQSIZE; | |
744 | vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; | |
745 | ||
746 | vifp->v_flags = vifcp->vifc_flags; | |
747 | vifp->v_threshold = vifcp->vifc_threshold; | |
748 | vifp->v_lcl_addr = vifcp->vifc_lcl_addr; | |
749 | vifp->v_rmt_addr = vifcp->vifc_rmt_addr; | |
750 | vifp->v_ifp = ifp; | |
751 | /* scaling up here allows division by 1024 in critical code */ | |
752 | vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; | |
753 | vifp->v_rsvp_on = 0; | |
754 | vifp->v_rsvpd = NULL; | |
755 | /* initialize per vif pkt counters */ | |
756 | vifp->v_pkt_in = 0; | |
757 | vifp->v_pkt_out = 0; | |
758 | vifp->v_bytes_in = 0; | |
759 | vifp->v_bytes_out = 0; | |
760 | ||
761 | /* Adjust numvifs up if the vifi is higher than numvifs */ | |
762 | if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; | |
763 | ||
764 | if (mrtdebug) | |
765 | log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", | |
766 | vifcp->vifc_vifi, | |
767 | (u_int32_t)ntohl(vifcp->vifc_lcl_addr.s_addr), | |
768 | (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", | |
769 | (u_int32_t)ntohl(vifcp->vifc_rmt_addr.s_addr), | |
770 | vifcp->vifc_threshold, | |
771 | vifcp->vifc_rate_limit); | |
772 | ||
773 | return 0; | |
774 | } | |
775 | ||
776 | /* | |
777 | * Delete a vif from the vif table | |
778 | */ | |
779 | static int | |
780 | del_vif(vifi_t vifi) | |
781 | { | |
782 | struct vif *vifp = &viftable[vifi]; | |
783 | struct mbuf *m; | |
784 | struct ifnet *ifp; | |
785 | struct ifreq ifr; | |
786 | ||
787 | if (vifi >= numvifs) return EINVAL; | |
788 | if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; | |
789 | ||
790 | if (!(vifp->v_flags & VIFF_TUNNEL)) { | |
791 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; | |
792 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; | |
793 | ifp = vifp->v_ifp; | |
794 | if_allmulti(ifp, 0); | |
795 | } | |
796 | ||
797 | if (vifp == last_encap_vif) { | |
798 | last_encap_vif = 0; | |
799 | last_encap_src = 0; | |
800 | } | |
801 | ||
802 | /* | |
803 | * Free packets queued at the interface | |
804 | */ | |
805 | while (vifp->v_tbf->tbf_q) { | |
806 | m = vifp->v_tbf->tbf_q; | |
807 | vifp->v_tbf->tbf_q = m->m_act; | |
808 | m_freem(m); | |
809 | } | |
810 | ||
811 | bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); | |
812 | bzero((caddr_t)vifp, sizeof (*vifp)); | |
813 | ||
814 | if (mrtdebug) | |
815 | log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); | |
816 | ||
817 | /* Adjust numvifs down */ | |
818 | for (vifi = numvifs; vifi > 0; vifi--) | |
819 | if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; | |
820 | numvifs = vifi; | |
821 | ||
822 | return 0; | |
823 | } | |
824 | ||
825 | /* | |
826 | * Add an mfc entry | |
827 | */ | |
828 | static int | |
829 | add_mfc(struct mfcctl *mfccp) | |
830 | { | |
831 | struct mfc *rt; | |
832 | u_int32_t hash; | |
833 | struct rtdetq *rte; | |
834 | u_short nstl; | |
835 | int i; | |
836 | ||
837 | MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt); | |
838 | ||
839 | /* If an entry already exists, just update the fields */ | |
840 | if (rt) { | |
841 | if (mrtdebug & DEBUG_MFC) | |
842 | log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", | |
843 | (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), | |
844 | (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
845 | mfccp->mfcc_parent); | |
846 | ||
847 | rt->mfc_parent = mfccp->mfcc_parent; | |
848 | for (i = 0; i < numvifs; i++) | |
849 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
850 | return 0; | |
851 | } | |
852 | ||
853 | /* | |
854 | * Find the entry for which the upcall was made and update | |
855 | */ | |
856 | hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); | |
857 | for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { | |
858 | ||
859 | if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && | |
860 | (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && | |
861 | (rt->mfc_stall != NULL)) { | |
862 | ||
863 | if (nstl++) | |
864 | log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", | |
865 | "multiple kernel entries", | |
866 | (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), | |
867 | (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
868 | mfccp->mfcc_parent, (void *)rt->mfc_stall); | |
869 | ||
870 | if (mrtdebug & DEBUG_MFC) | |
871 | log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", | |
872 | (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), | |
873 | (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
874 | mfccp->mfcc_parent, (void *)rt->mfc_stall); | |
875 | ||
876 | rt->mfc_origin = mfccp->mfcc_origin; | |
877 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
878 | rt->mfc_parent = mfccp->mfcc_parent; | |
879 | for (i = 0; i < numvifs; i++) | |
880 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
881 | /* initialize pkt counters per src-grp */ | |
882 | rt->mfc_pkt_cnt = 0; | |
883 | rt->mfc_byte_cnt = 0; | |
884 | rt->mfc_wrong_if = 0; | |
885 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
886 | ||
887 | rt->mfc_expire = 0; /* Don't clean this guy up */ | |
888 | nexpire[hash]--; | |
889 | ||
890 | /* free packets Qed at the end of this entry */ | |
891 | for (rte = rt->mfc_stall; rte != NULL; ) { | |
892 | struct rtdetq *n = rte->next; | |
893 | ||
894 | ip_mdq(rte->m, rte->ifp, rt, -1); | |
895 | m_freem(rte->m); | |
896 | #if UPCALL_TIMING | |
897 | collate(&(rte->t)); | |
898 | #endif /* UPCALL_TIMING */ | |
899 | FREE(rte, M_MRTABLE); | |
900 | rte = n; | |
901 | } | |
902 | rt->mfc_stall = NULL; | |
903 | } | |
904 | } | |
905 | ||
906 | /* | |
907 | * It is possible that an entry is being inserted without an upcall | |
908 | */ | |
909 | if (nstl == 0) { | |
910 | if (mrtdebug & DEBUG_MFC) | |
911 | log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", | |
912 | hash, (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), | |
913 | (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
914 | mfccp->mfcc_parent); | |
915 | ||
916 | for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { | |
917 | ||
918 | if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && | |
919 | (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { | |
920 | ||
921 | rt->mfc_origin = mfccp->mfcc_origin; | |
922 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
923 | rt->mfc_parent = mfccp->mfcc_parent; | |
924 | for (i = 0; i < numvifs; i++) | |
925 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
926 | /* initialize pkt counters per src-grp */ | |
927 | rt->mfc_pkt_cnt = 0; | |
928 | rt->mfc_byte_cnt = 0; | |
929 | rt->mfc_wrong_if = 0; | |
930 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
931 | if (rt->mfc_expire) | |
932 | nexpire[hash]--; | |
933 | rt->mfc_expire = 0; | |
934 | } | |
935 | } | |
936 | if (rt == NULL) { | |
937 | /* no upcall, so make a new entry */ | |
938 | rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT); | |
939 | if (rt == NULL) { | |
940 | return ENOBUFS; | |
941 | } | |
942 | ||
943 | /* insert new entry at head of hash chain */ | |
944 | rt->mfc_origin = mfccp->mfcc_origin; | |
945 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
946 | rt->mfc_parent = mfccp->mfcc_parent; | |
947 | for (i = 0; i < numvifs; i++) | |
948 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
949 | /* initialize pkt counters per src-grp */ | |
950 | rt->mfc_pkt_cnt = 0; | |
951 | rt->mfc_byte_cnt = 0; | |
952 | rt->mfc_wrong_if = 0; | |
953 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
954 | rt->mfc_expire = 0; | |
955 | rt->mfc_stall = NULL; | |
956 | ||
957 | /* link into table */ | |
958 | rt->mfc_next = mfctable[hash]; | |
959 | mfctable[hash] = rt; | |
960 | } | |
961 | } | |
962 | return 0; | |
963 | } | |
964 | ||
965 | #if UPCALL_TIMING | |
966 | /* | |
967 | * collect delay statistics on the upcalls | |
968 | */ | |
969 | static void | |
970 | collate(struct timeval *t) | |
971 | { | |
972 | u_int32_t d; | |
973 | struct timeval tp; | |
974 | u_int32_t delta; | |
975 | ||
976 | GET_TIME(tp); | |
977 | ||
978 | if (TV_LT(*t, tp)) | |
979 | { | |
980 | TV_DELTA(tp, *t, delta); | |
981 | ||
982 | d = delta >> 10; | |
983 | if (d > 50) | |
984 | d = 50; | |
985 | ||
986 | ++upcall_data[d]; | |
987 | } | |
988 | } | |
989 | #endif /* UPCALL_TIMING */ | |
990 | ||
991 | /* | |
992 | * Delete an mfc entry | |
993 | */ | |
994 | static int | |
995 | del_mfc(struct mfcctl *mfccp) | |
996 | { | |
997 | struct in_addr origin; | |
998 | struct in_addr mcastgrp; | |
999 | struct mfc *rt; | |
1000 | struct mfc **nptr; | |
1001 | u_int32_t hash; | |
1002 | ||
1003 | origin = mfccp->mfcc_origin; | |
1004 | mcastgrp = mfccp->mfcc_mcastgrp; | |
1005 | hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); | |
1006 | ||
1007 | if (mrtdebug & DEBUG_MFC) | |
1008 | log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", | |
1009 | (u_int32_t)ntohl(origin.s_addr), (u_int32_t)ntohl(mcastgrp.s_addr)); | |
1010 | ||
1011 | nptr = &mfctable[hash]; | |
1012 | while ((rt = *nptr) != NULL) { | |
1013 | if (origin.s_addr == rt->mfc_origin.s_addr && | |
1014 | mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && | |
1015 | rt->mfc_stall == NULL) | |
1016 | break; | |
1017 | ||
1018 | nptr = &rt->mfc_next; | |
1019 | } | |
1020 | if (rt == NULL) { | |
1021 | return EADDRNOTAVAIL; | |
1022 | } | |
1023 | ||
1024 | *nptr = rt->mfc_next; | |
1025 | FREE(rt, M_MRTABLE); | |
1026 | ||
1027 | return 0; | |
1028 | } | |
1029 | ||
1030 | /* | |
1031 | * Send a message to mrouted on the multicast routing socket | |
1032 | */ | |
1033 | static int | |
1034 | socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) | |
1035 | { | |
1036 | socket_lock(s, 1); | |
1037 | if (s) { | |
1038 | if (sbappendaddr(&s->so_rcv, | |
1039 | (struct sockaddr *)src, | |
1040 | mm, (struct mbuf *)0, NULL) != 0) { | |
1041 | sorwakeup(s); | |
1042 | socket_unlock(s, 1); | |
1043 | return 0; | |
1044 | } | |
1045 | } | |
1046 | socket_unlock(s, 1); | |
1047 | m_freem(mm); | |
1048 | return -1; | |
1049 | } | |
1050 | ||
1051 | /* | |
1052 | * IP multicast forwarding function. This function assumes that the packet | |
1053 | * pointed to by "ip" has arrived on (or is about to be sent to) the interface | |
1054 | * pointed to by "ifp", and the packet is to be relayed to other networks | |
1055 | * that have members of the packet's destination IP multicast group. | |
1056 | * | |
1057 | * The packet is returned unscathed to the caller, unless it is | |
1058 | * erroneous, in which case a non-zero return value tells the caller to | |
1059 | * discard it. | |
1060 | */ | |
1061 | ||
1062 | #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ | |
1063 | #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ | |
1064 | ||
1065 | static int | |
1066 | X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, | |
1067 | struct ip_moptions *imo) | |
1068 | { | |
1069 | struct mfc *rt; | |
1070 | u_char *ipoptions; | |
1071 | static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET, | |
1072 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
1073 | static int srctun = 0; | |
1074 | struct mbuf *mm; | |
1075 | vifi_t vifi; | |
1076 | struct vif *vifp; | |
1077 | ||
1078 | if (mrtdebug & DEBUG_FORWARD) | |
1079 | log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", | |
1080 | (u_int32_t)ntohl(ip->ip_src.s_addr), (u_int32_t)ntohl(ip->ip_dst.s_addr), | |
1081 | (void *)ifp); | |
1082 | ||
1083 | if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || | |
1084 | (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { | |
1085 | /* | |
1086 | * Packet arrived via a physical interface or | |
1087 | * an encapsulated tunnel. | |
1088 | */ | |
1089 | } else { | |
1090 | /* | |
1091 | * Packet arrived through a source-route tunnel. | |
1092 | * Source-route tunnels are no longer supported. | |
1093 | */ | |
1094 | if ((srctun++ % 1000) == 0) | |
1095 | log(LOG_ERR, | |
1096 | "ip_mforward: received source-routed packet from %lx\n", | |
1097 | (u_int32_t)ntohl(ip->ip_src.s_addr)); | |
1098 | ||
1099 | return 1; | |
1100 | } | |
1101 | ||
1102 | if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { | |
1103 | if (ip->ip_ttl < 255) | |
1104 | ip->ip_ttl++; /* compensate for -1 in *_send routines */ | |
1105 | if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { | |
1106 | vifp = viftable + vifi; | |
1107 | printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n", | |
1108 | ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi, | |
1109 | (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", | |
1110 | vifp->v_ifp->if_name, vifp->v_ifp->if_unit); | |
1111 | } | |
1112 | return (ip_mdq(m, ifp, NULL, vifi)); | |
1113 | } | |
1114 | if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { | |
1115 | printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", | |
1116 | ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); | |
1117 | if(!imo) | |
1118 | printf("In fact, no options were specified at all\n"); | |
1119 | } | |
1120 | ||
1121 | /* | |
1122 | * Don't forward a packet with time-to-live of zero or one, | |
1123 | * or a packet destined to a local-only group. | |
1124 | */ | |
1125 | if (ip->ip_ttl <= 1 || | |
1126 | ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) | |
1127 | return 0; | |
1128 | ||
1129 | /* | |
1130 | * Determine forwarding vifs from the forwarding cache table | |
1131 | */ | |
1132 | MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); | |
1133 | ||
1134 | /* Entry exists, so forward if necessary */ | |
1135 | if (rt != NULL) { | |
1136 | return (ip_mdq(m, ifp, rt, -1)); | |
1137 | } else { | |
1138 | /* | |
1139 | * If we don't have a route for packet's origin, | |
1140 | * Make a copy of the packet & | |
1141 | * send message to routing daemon | |
1142 | */ | |
1143 | ||
1144 | struct mbuf *mb0; | |
1145 | struct rtdetq *rte; | |
1146 | u_int32_t hash; | |
1147 | int hlen = ip->ip_hl << 2; | |
1148 | #if UPCALL_TIMING | |
1149 | struct timeval tp; | |
1150 | ||
1151 | GET_TIME(tp); | |
1152 | #endif | |
1153 | ||
1154 | mrtstat.mrts_no_route++; | |
1155 | if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) | |
1156 | log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", | |
1157 | (u_int32_t)ntohl(ip->ip_src.s_addr), | |
1158 | (u_int32_t)ntohl(ip->ip_dst.s_addr)); | |
1159 | ||
1160 | /* | |
1161 | * Allocate mbufs early so that we don't do extra work if we are | |
1162 | * just going to fail anyway. Make sure to pullup the header so | |
1163 | * that other people can't step on it. | |
1164 | */ | |
1165 | rte = (struct rtdetq *) _MALLOC((sizeof *rte), M_MRTABLE, M_NOWAIT); | |
1166 | if (rte == NULL) { | |
1167 | return ENOBUFS; | |
1168 | } | |
1169 | mb0 = m_copy(m, 0, M_COPYALL); | |
1170 | if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) | |
1171 | mb0 = m_pullup(mb0, hlen); | |
1172 | if (mb0 == NULL) { | |
1173 | FREE(rte, M_MRTABLE); | |
1174 | return ENOBUFS; | |
1175 | } | |
1176 | ||
1177 | /* is there an upcall waiting for this packet? */ | |
1178 | hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); | |
1179 | for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { | |
1180 | if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && | |
1181 | (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && | |
1182 | (rt->mfc_stall != NULL)) | |
1183 | break; | |
1184 | } | |
1185 | ||
1186 | if (rt == NULL) { | |
1187 | int i; | |
1188 | struct igmpmsg *im; | |
1189 | ||
1190 | /* no upcall, so make a new entry */ | |
1191 | rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT); | |
1192 | if (rt == NULL) { | |
1193 | FREE(rte, M_MRTABLE); | |
1194 | m_freem(mb0); | |
1195 | return ENOBUFS; | |
1196 | } | |
1197 | /* Make a copy of the header to send to the user level process */ | |
1198 | mm = m_copy(mb0, 0, hlen); | |
1199 | if (mm == NULL) { | |
1200 | FREE(rte, M_MRTABLE); | |
1201 | m_freem(mb0); | |
1202 | FREE(rt, M_MRTABLE); | |
1203 | return ENOBUFS; | |
1204 | } | |
1205 | ||
1206 | /* | |
1207 | * Send message to routing daemon to install | |
1208 | * a route into the kernel table | |
1209 | */ | |
1210 | k_igmpsrc.sin_addr = ip->ip_src; | |
1211 | ||
1212 | im = mtod(mm, struct igmpmsg *); | |
1213 | im->im_msgtype = IGMPMSG_NOCACHE; | |
1214 | im->im_mbz = 0; | |
1215 | ||
1216 | mrtstat.mrts_upcalls++; | |
1217 | ||
1218 | if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { | |
1219 | log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); | |
1220 | ++mrtstat.mrts_upq_sockfull; | |
1221 | FREE(rte, M_MRTABLE); | |
1222 | m_freem(mb0); | |
1223 | FREE(rt, M_MRTABLE); | |
1224 | return ENOBUFS; | |
1225 | } | |
1226 | ||
1227 | /* insert new entry at head of hash chain */ | |
1228 | rt->mfc_origin.s_addr = ip->ip_src.s_addr; | |
1229 | rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; | |
1230 | rt->mfc_expire = UPCALL_EXPIRE; | |
1231 | nexpire[hash]++; | |
1232 | for (i = 0; i < numvifs; i++) | |
1233 | rt->mfc_ttls[i] = 0; | |
1234 | rt->mfc_parent = -1; | |
1235 | ||
1236 | /* link into table */ | |
1237 | rt->mfc_next = mfctable[hash]; | |
1238 | mfctable[hash] = rt; | |
1239 | rt->mfc_stall = rte; | |
1240 | ||
1241 | } else { | |
1242 | /* determine if q has overflowed */ | |
1243 | int npkts = 0; | |
1244 | struct rtdetq **p; | |
1245 | ||
1246 | for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) | |
1247 | npkts++; | |
1248 | ||
1249 | if (npkts > MAX_UPQ) { | |
1250 | mrtstat.mrts_upq_ovflw++; | |
1251 | FREE(rte, M_MRTABLE); | |
1252 | m_freem(mb0); | |
1253 | return 0; | |
1254 | } | |
1255 | ||
1256 | /* Add this entry to the end of the queue */ | |
1257 | *p = rte; | |
1258 | } | |
1259 | ||
1260 | rte->m = mb0; | |
1261 | rte->ifp = ifp; | |
1262 | #if UPCALL_TIMING | |
1263 | rte->t = tp; | |
1264 | #endif | |
1265 | rte->next = NULL; | |
1266 | ||
1267 | return 0; | |
1268 | } | |
1269 | } | |
1270 | ||
1271 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1272 | int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, | |
1273 | struct ip_moptions *) = X_ip_mforward; | |
1274 | #endif | |
1275 | ||
1276 | /* | |
1277 | * Clean up the cache entry if upcall is not serviced | |
1278 | */ | |
1279 | static void | |
1280 | expire_upcalls(__unused void *unused) | |
1281 | { | |
1282 | struct rtdetq *rte; | |
1283 | struct mfc *mfc, **nptr; | |
1284 | int i; | |
1285 | ||
1286 | for (i = 0; i < CONFIG_MFCTBLSIZ; i++) { | |
1287 | if (nexpire[i] == 0) | |
1288 | continue; | |
1289 | nptr = &mfctable[i]; | |
1290 | for (mfc = *nptr; mfc != NULL; mfc = *nptr) { | |
1291 | /* | |
1292 | * Skip real cache entries | |
1293 | * Make sure it wasn't marked to not expire (shouldn't happen) | |
1294 | * If it expires now | |
1295 | */ | |
1296 | if (mfc->mfc_stall != NULL && | |
1297 | mfc->mfc_expire != 0 && | |
1298 | --mfc->mfc_expire == 0) { | |
1299 | if (mrtdebug & DEBUG_EXPIRE) | |
1300 | log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", | |
1301 | (u_int32_t)ntohl(mfc->mfc_origin.s_addr), | |
1302 | (u_int32_t)ntohl(mfc->mfc_mcastgrp.s_addr)); | |
1303 | /* | |
1304 | * drop all the packets | |
1305 | * free the mbuf with the pkt, if, timing info | |
1306 | */ | |
1307 | for (rte = mfc->mfc_stall; rte; ) { | |
1308 | struct rtdetq *n = rte->next; | |
1309 | ||
1310 | m_freem(rte->m); | |
1311 | FREE(rte, M_MRTABLE); | |
1312 | rte = n; | |
1313 | } | |
1314 | ++mrtstat.mrts_cache_cleanups; | |
1315 | nexpire[i]--; | |
1316 | ||
1317 | *nptr = mfc->mfc_next; | |
1318 | FREE(mfc, M_MRTABLE); | |
1319 | } else { | |
1320 | nptr = &mfc->mfc_next; | |
1321 | } | |
1322 | } | |
1323 | } | |
1324 | timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); | |
1325 | } | |
1326 | ||
1327 | /* | |
1328 | * Packet forwarding routine once entry in the cache is made | |
1329 | */ | |
1330 | static int | |
1331 | ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, | |
1332 | vifi_t xmt_vif) | |
1333 | { | |
1334 | struct ip *ip = mtod(m, struct ip *); | |
1335 | vifi_t vifi; | |
1336 | struct vif *vifp; | |
1337 | int plen = ip->ip_len; | |
1338 | ||
1339 | /* | |
1340 | * Macro to send packet on vif. Since RSVP packets don't get counted on | |
1341 | * input, they shouldn't get counted on output, so statistics keeping is | |
1342 | * seperate. | |
1343 | */ | |
1344 | #define MC_SEND(ip,vifp,m) { \ | |
1345 | if ((vifp)->v_flags & VIFF_TUNNEL) \ | |
1346 | encap_send((ip), (vifp), (m)); \ | |
1347 | else \ | |
1348 | phyint_send((ip), (vifp), (m)); \ | |
1349 | } | |
1350 | ||
1351 | /* | |
1352 | * If xmt_vif is not -1, send on only the requested vif. | |
1353 | * | |
1354 | * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) | |
1355 | */ | |
1356 | if (xmt_vif < numvifs) { | |
1357 | MC_SEND(ip, viftable + xmt_vif, m); | |
1358 | return 1; | |
1359 | } | |
1360 | ||
1361 | /* | |
1362 | * Don't forward if it didn't arrive from the parent vif for its origin. | |
1363 | */ | |
1364 | vifi = rt->mfc_parent; | |
1365 | if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { | |
1366 | /* came in the wrong interface */ | |
1367 | if (mrtdebug & DEBUG_FORWARD) | |
1368 | log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", | |
1369 | (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); | |
1370 | ++mrtstat.mrts_wrong_if; | |
1371 | ++rt->mfc_wrong_if; | |
1372 | /* | |
1373 | * If we are doing PIM assert processing, and we are forwarding | |
1374 | * packets on this interface, and it is a broadcast medium | |
1375 | * interface (and not a tunnel), send a message to the routing daemon. | |
1376 | */ | |
1377 | if (pim_assert && rt->mfc_ttls[vifi] && | |
1378 | (ifp->if_flags & IFF_BROADCAST) && | |
1379 | !(viftable[vifi].v_flags & VIFF_TUNNEL)) { | |
1380 | struct sockaddr_in k_igmpsrc; | |
1381 | struct mbuf *mm; | |
1382 | struct igmpmsg *im; | |
1383 | int hlen = ip->ip_hl << 2; | |
1384 | struct timeval now; | |
1385 | u_int32_t delta; | |
1386 | ||
1387 | GET_TIME(now); | |
1388 | ||
1389 | TV_DELTA(rt->mfc_last_assert, now, delta); | |
1390 | ||
1391 | if (delta > ASSERT_MSG_TIME) { | |
1392 | mm = m_copy(m, 0, hlen); | |
1393 | if (mm && (M_HASCL(mm) || mm->m_len < hlen)) | |
1394 | mm = m_pullup(mm, hlen); | |
1395 | if (mm == NULL) { | |
1396 | return ENOBUFS; | |
1397 | } | |
1398 | ||
1399 | rt->mfc_last_assert = now; | |
1400 | ||
1401 | im = mtod(mm, struct igmpmsg *); | |
1402 | im->im_msgtype = IGMPMSG_WRONGVIF; | |
1403 | im->im_mbz = 0; | |
1404 | im->im_vif = vifi; | |
1405 | ||
1406 | k_igmpsrc.sin_addr = im->im_src; | |
1407 | ||
1408 | socket_send(ip_mrouter, mm, &k_igmpsrc); | |
1409 | } | |
1410 | } | |
1411 | return 0; | |
1412 | } | |
1413 | ||
1414 | /* If I sourced this packet, it counts as output, else it was input. */ | |
1415 | if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { | |
1416 | viftable[vifi].v_pkt_out++; | |
1417 | viftable[vifi].v_bytes_out += plen; | |
1418 | } else { | |
1419 | viftable[vifi].v_pkt_in++; | |
1420 | viftable[vifi].v_bytes_in += plen; | |
1421 | } | |
1422 | rt->mfc_pkt_cnt++; | |
1423 | rt->mfc_byte_cnt += plen; | |
1424 | ||
1425 | /* | |
1426 | * For each vif, decide if a copy of the packet should be forwarded. | |
1427 | * Forward if: | |
1428 | * - the ttl exceeds the vif's threshold | |
1429 | * - there are group members downstream on interface | |
1430 | */ | |
1431 | for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) | |
1432 | if ((rt->mfc_ttls[vifi] > 0) && | |
1433 | (ip->ip_ttl > rt->mfc_ttls[vifi])) { | |
1434 | vifp->v_pkt_out++; | |
1435 | vifp->v_bytes_out += plen; | |
1436 | MC_SEND(ip, vifp, m); | |
1437 | } | |
1438 | ||
1439 | return 0; | |
1440 | } | |
1441 | ||
1442 | /* | |
1443 | * check if a vif number is legal/ok. This is used by ip_output, to export | |
1444 | * numvifs there, | |
1445 | */ | |
1446 | static int | |
1447 | X_legal_vif_num(int vif) | |
1448 | { | |
1449 | if (vif >= 0 && vif < numvifs) | |
1450 | return(1); | |
1451 | else | |
1452 | return(0); | |
1453 | } | |
1454 | ||
1455 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1456 | int (*legal_vif_num)(int) = X_legal_vif_num; | |
1457 | #endif | |
1458 | ||
1459 | /* | |
1460 | * Return the local address used by this vif | |
1461 | */ | |
1462 | static u_int32_t | |
1463 | X_ip_mcast_src(int vifi) | |
1464 | { | |
1465 | if (vifi >= 0 && vifi < numvifs) | |
1466 | return viftable[vifi].v_lcl_addr.s_addr; | |
1467 | else | |
1468 | return INADDR_ANY; | |
1469 | } | |
1470 | ||
1471 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1472 | u_int32_t (*ip_mcast_src)(int) = X_ip_mcast_src; | |
1473 | #endif | |
1474 | ||
1475 | static void | |
1476 | phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) | |
1477 | { | |
1478 | struct mbuf *mb_copy; | |
1479 | int hlen = ip->ip_hl << 2; | |
1480 | ||
1481 | /* | |
1482 | * Make a new reference to the packet; make sure that | |
1483 | * the IP header is actually copied, not just referenced, | |
1484 | * so that ip_output() only scribbles on the copy. | |
1485 | */ | |
1486 | mb_copy = m_copy(m, 0, M_COPYALL); | |
1487 | if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) | |
1488 | mb_copy = m_pullup(mb_copy, hlen); | |
1489 | if (mb_copy == NULL) | |
1490 | return; | |
1491 | ||
1492 | if (vifp->v_rate_limit == 0) | |
1493 | tbf_send_packet(vifp, mb_copy); | |
1494 | else | |
1495 | tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); | |
1496 | } | |
1497 | ||
1498 | static void | |
1499 | encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) | |
1500 | { | |
1501 | struct mbuf *mb_copy; | |
1502 | struct ip *ip_copy; | |
1503 | int i, len = ip->ip_len; | |
1504 | ||
1505 | /* | |
1506 | * copy the old packet & pullup its IP header into the | |
1507 | * new mbuf so we can modify it. Try to fill the new | |
1508 | * mbuf since if we don't the ethernet driver will. | |
1509 | */ | |
1510 | MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); | |
1511 | if (mb_copy == NULL) | |
1512 | return; | |
1513 | #if CONFIG_MACF_NET | |
1514 | mac_mbuf_label_associate_multicast_encap(m, vifp->v_ifp, mb_copy); | |
1515 | #endif | |
1516 | mb_copy->m_data += max_linkhdr; | |
1517 | mb_copy->m_len = sizeof(multicast_encap_iphdr); | |
1518 | ||
1519 | if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { | |
1520 | m_freem(mb_copy); | |
1521 | return; | |
1522 | } | |
1523 | i = MHLEN - M_LEADINGSPACE(mb_copy); | |
1524 | if (i > len) | |
1525 | i = len; | |
1526 | mb_copy = m_pullup(mb_copy, i); | |
1527 | if (mb_copy == NULL) | |
1528 | return; | |
1529 | mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); | |
1530 | ||
1531 | /* | |
1532 | * fill in the encapsulating IP header. | |
1533 | */ | |
1534 | ip_copy = mtod(mb_copy, struct ip *); | |
1535 | *ip_copy = multicast_encap_iphdr; | |
1536 | #if RANDOM_IP_ID | |
1537 | ip_copy->ip_id = ip_randomid(); | |
1538 | #else | |
1539 | ip_copy->ip_id = htons(ip_id++); | |
1540 | #endif | |
1541 | ip_copy->ip_len += len; | |
1542 | ip_copy->ip_src = vifp->v_lcl_addr; | |
1543 | ip_copy->ip_dst = vifp->v_rmt_addr; | |
1544 | ||
1545 | /* | |
1546 | * turn the encapsulated IP header back into a valid one. | |
1547 | */ | |
1548 | ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); | |
1549 | --ip->ip_ttl; | |
1550 | ||
1551 | #if BYTE_ORDER != BIG_ENDIAN | |
1552 | HTONS(ip->ip_len); | |
1553 | HTONS(ip->ip_off); | |
1554 | #endif | |
1555 | ||
1556 | ip->ip_sum = 0; | |
1557 | mb_copy->m_data += sizeof(multicast_encap_iphdr); | |
1558 | ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); | |
1559 | mb_copy->m_data -= sizeof(multicast_encap_iphdr); | |
1560 | ||
1561 | if (vifp->v_rate_limit == 0) | |
1562 | tbf_send_packet(vifp, mb_copy); | |
1563 | else | |
1564 | tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); | |
1565 | } | |
1566 | ||
1567 | /* | |
1568 | * De-encapsulate a packet and feed it back through ip input (this | |
1569 | * routine is called whenever IP gets a packet with proto type | |
1570 | * ENCAP_PROTO and a local destination address). | |
1571 | */ | |
1572 | void | |
1573 | #if MROUTE_LKM | |
1574 | X_ipip_input(struct mbuf *m, int iphlen) | |
1575 | #else | |
1576 | ipip_input(struct mbuf *m, int iphlen) | |
1577 | #endif | |
1578 | { | |
1579 | struct ifnet *ifp = m->m_pkthdr.rcvif; | |
1580 | struct ip *ip = mtod(m, struct ip *); | |
1581 | int hlen = ip->ip_hl << 2; | |
1582 | struct vif *vifp; | |
1583 | ||
1584 | if (!have_encap_tunnel) { | |
1585 | rip_input(m, iphlen); | |
1586 | return; | |
1587 | } | |
1588 | /* | |
1589 | * dump the packet if it's not to a multicast destination or if | |
1590 | * we don't have an encapsulating tunnel with the source. | |
1591 | * Note: This code assumes that the remote site IP address | |
1592 | * uniquely identifies the tunnel (i.e., that this site has | |
1593 | * at most one tunnel with the remote site). | |
1594 | */ | |
1595 | if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { | |
1596 | ++mrtstat.mrts_bad_tunnel; | |
1597 | m_freem(m); | |
1598 | return; | |
1599 | } | |
1600 | if (ip->ip_src.s_addr != last_encap_src) { | |
1601 | struct vif *vife; | |
1602 | ||
1603 | vifp = viftable; | |
1604 | vife = vifp + numvifs; | |
1605 | last_encap_src = ip->ip_src.s_addr; | |
1606 | last_encap_vif = 0; | |
1607 | for ( ; vifp < vife; ++vifp) | |
1608 | if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { | |
1609 | if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) | |
1610 | == VIFF_TUNNEL) | |
1611 | last_encap_vif = vifp; | |
1612 | break; | |
1613 | } | |
1614 | } | |
1615 | if ((vifp = last_encap_vif) == 0) { | |
1616 | last_encap_src = 0; | |
1617 | mrtstat.mrts_cant_tunnel++; /*XXX*/ | |
1618 | m_freem(m); | |
1619 | if (mrtdebug) | |
1620 | log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n", | |
1621 | (u_int32_t)ntohl(ip->ip_src.s_addr)); | |
1622 | return; | |
1623 | } | |
1624 | ifp = vifp->v_ifp; | |
1625 | ||
1626 | if (hlen > IP_HDR_LEN) | |
1627 | ip_stripoptions(m, (struct mbuf *) 0); | |
1628 | m->m_data += IP_HDR_LEN; | |
1629 | m->m_len -= IP_HDR_LEN; | |
1630 | m->m_pkthdr.len -= IP_HDR_LEN; | |
1631 | m->m_pkthdr.rcvif = ifp; | |
1632 | ||
1633 | proto_inject(PF_INET, m); | |
1634 | } | |
1635 | ||
1636 | /* | |
1637 | * Token bucket filter module | |
1638 | */ | |
1639 | ||
1640 | static void | |
1641 | tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, | |
1642 | u_int32_t p_len) | |
1643 | { | |
1644 | struct tbf *t = vifp->v_tbf; | |
1645 | ||
1646 | if (p_len > MAX_BKT_SIZE) { | |
1647 | /* drop if packet is too large */ | |
1648 | mrtstat.mrts_pkt2large++; | |
1649 | m_freem(m); | |
1650 | return; | |
1651 | } | |
1652 | ||
1653 | tbf_update_tokens(vifp); | |
1654 | ||
1655 | /* if there are enough tokens, | |
1656 | * and the queue is empty, | |
1657 | * send this packet out | |
1658 | */ | |
1659 | ||
1660 | if (t->tbf_q_len == 0) { | |
1661 | /* queue empty, send packet if enough tokens */ | |
1662 | if (p_len <= t->tbf_n_tok) { | |
1663 | t->tbf_n_tok -= p_len; | |
1664 | tbf_send_packet(vifp, m); | |
1665 | } else { | |
1666 | /* queue packet and timeout till later */ | |
1667 | tbf_queue(vifp, m); | |
1668 | timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); | |
1669 | } | |
1670 | } else if (t->tbf_q_len < t->tbf_max_q_len) { | |
1671 | /* finite queue length, so queue pkts and process queue */ | |
1672 | tbf_queue(vifp, m); | |
1673 | tbf_process_q(vifp); | |
1674 | } else { | |
1675 | /* queue length too much, try to dq and queue and process */ | |
1676 | if (!tbf_dq_sel(vifp, ip)) { | |
1677 | mrtstat.mrts_q_overflow++; | |
1678 | m_freem(m); | |
1679 | return; | |
1680 | } else { | |
1681 | tbf_queue(vifp, m); | |
1682 | tbf_process_q(vifp); | |
1683 | } | |
1684 | } | |
1685 | return; | |
1686 | } | |
1687 | ||
1688 | /* | |
1689 | * adds a packet to the queue at the interface | |
1690 | */ | |
1691 | static void | |
1692 | tbf_queue(struct vif *vifp, struct mbuf *m) | |
1693 | { | |
1694 | struct tbf *t = vifp->v_tbf; | |
1695 | ||
1696 | if (t->tbf_t == NULL) { | |
1697 | /* Queue was empty */ | |
1698 | t->tbf_q = m; | |
1699 | } else { | |
1700 | /* Insert at tail */ | |
1701 | t->tbf_t->m_act = m; | |
1702 | } | |
1703 | ||
1704 | /* Set new tail pointer */ | |
1705 | t->tbf_t = m; | |
1706 | ||
1707 | #if DIAGNOSTIC | |
1708 | /* Make sure we didn't get fed a bogus mbuf */ | |
1709 | if (m->m_act) | |
1710 | panic("tbf_queue: m_act"); | |
1711 | #endif | |
1712 | m->m_act = NULL; | |
1713 | ||
1714 | t->tbf_q_len++; | |
1715 | } | |
1716 | ||
1717 | ||
1718 | /* | |
1719 | * processes the queue at the interface | |
1720 | */ | |
1721 | static void | |
1722 | tbf_process_q(struct vif *vifp) | |
1723 | { | |
1724 | struct mbuf *m; | |
1725 | int len; | |
1726 | struct tbf *t = vifp->v_tbf; | |
1727 | ||
1728 | /* loop through the queue at the interface and send as many packets | |
1729 | * as possible | |
1730 | */ | |
1731 | while (t->tbf_q_len > 0) { | |
1732 | m = t->tbf_q; | |
1733 | ||
1734 | len = mtod(m, struct ip *)->ip_len; | |
1735 | ||
1736 | /* determine if the packet can be sent */ | |
1737 | if (len <= t->tbf_n_tok) { | |
1738 | /* if so, | |
1739 | * reduce no of tokens, dequeue the packet, | |
1740 | * send the packet. | |
1741 | */ | |
1742 | t->tbf_n_tok -= len; | |
1743 | ||
1744 | t->tbf_q = m->m_act; | |
1745 | if (--t->tbf_q_len == 0) | |
1746 | t->tbf_t = NULL; | |
1747 | ||
1748 | m->m_act = NULL; | |
1749 | tbf_send_packet(vifp, m); | |
1750 | ||
1751 | } else break; | |
1752 | } | |
1753 | } | |
1754 | ||
1755 | static void | |
1756 | tbf_reprocess_q(void *xvifp) | |
1757 | { | |
1758 | struct vif *vifp = xvifp; | |
1759 | ||
1760 | if (ip_mrouter == NULL) { | |
1761 | return; | |
1762 | } | |
1763 | ||
1764 | tbf_update_tokens(vifp); | |
1765 | ||
1766 | tbf_process_q(vifp); | |
1767 | ||
1768 | if (vifp->v_tbf->tbf_q_len) | |
1769 | timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); | |
1770 | } | |
1771 | ||
1772 | /* function that will selectively discard a member of the queue | |
1773 | * based on the precedence value and the priority | |
1774 | */ | |
1775 | static int | |
1776 | tbf_dq_sel(struct vif *vifp, struct ip *ip) | |
1777 | { | |
1778 | u_int p; | |
1779 | struct mbuf *m, *last; | |
1780 | struct mbuf **np; | |
1781 | struct tbf *t = vifp->v_tbf; | |
1782 | ||
1783 | p = priority(vifp, ip); | |
1784 | ||
1785 | np = &t->tbf_q; | |
1786 | last = NULL; | |
1787 | while ((m = *np) != NULL) { | |
1788 | if (p > priority(vifp, mtod(m, struct ip *))) { | |
1789 | *np = m->m_act; | |
1790 | /* If we're removing the last packet, fix the tail pointer */ | |
1791 | if (m == t->tbf_t) | |
1792 | t->tbf_t = last; | |
1793 | m_freem(m); | |
1794 | /* it's impossible for the queue to be empty, but | |
1795 | * we check anyway. */ | |
1796 | if (--t->tbf_q_len == 0) | |
1797 | t->tbf_t = NULL; | |
1798 | mrtstat.mrts_drop_sel++; | |
1799 | return(1); | |
1800 | } | |
1801 | np = &m->m_act; | |
1802 | last = m; | |
1803 | } | |
1804 | return(0); | |
1805 | } | |
1806 | ||
1807 | static void | |
1808 | tbf_send_packet(struct vif *vifp, struct mbuf *m) | |
1809 | { | |
1810 | struct ip_moptions imo; | |
1811 | int error; | |
1812 | static struct route ro; | |
1813 | ||
1814 | if (vifp->v_flags & VIFF_TUNNEL) { | |
1815 | /* If tunnel options */ | |
1816 | ip_output(m, (struct mbuf *)0, &vifp->v_route, | |
1817 | IP_FORWARDING, (struct ip_moptions *)0, NULL); | |
1818 | } else { | |
1819 | imo.imo_multicast_ifp = vifp->v_ifp; | |
1820 | imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; | |
1821 | imo.imo_multicast_loop = 1; | |
1822 | imo.imo_multicast_vif = -1; | |
1823 | ||
1824 | /* | |
1825 | * Re-entrancy should not be a problem here, because | |
1826 | * the packets that we send out and are looped back at us | |
1827 | * should get rejected because they appear to come from | |
1828 | * the loopback interface, thus preventing looping. | |
1829 | */ | |
1830 | error = ip_output(m, (struct mbuf *)0, &ro, | |
1831 | IP_FORWARDING, &imo, NULL); | |
1832 | ||
1833 | if (mrtdebug & DEBUG_XMIT) | |
1834 | log(LOG_DEBUG, "phyint_send on vif %d err %d\n", | |
1835 | vifp - viftable, error); | |
1836 | } | |
1837 | } | |
1838 | ||
1839 | /* determine the current time and then | |
1840 | * the elapsed time (between the last time and time now) | |
1841 | * in milliseconds & update the no. of tokens in the bucket | |
1842 | */ | |
1843 | static void | |
1844 | tbf_update_tokens(struct vif *vifp) | |
1845 | { | |
1846 | struct timeval tp; | |
1847 | u_int32_t tm; | |
1848 | struct tbf *t = vifp->v_tbf; | |
1849 | ||
1850 | GET_TIME(tp); | |
1851 | ||
1852 | TV_DELTA(tp, t->tbf_last_pkt_t, tm); | |
1853 | ||
1854 | /* | |
1855 | * This formula is actually | |
1856 | * "time in seconds" * "bytes/second". | |
1857 | * | |
1858 | * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) | |
1859 | * | |
1860 | * The (1000/1024) was introduced in add_vif to optimize | |
1861 | * this divide into a shift. | |
1862 | */ | |
1863 | t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; | |
1864 | t->tbf_last_pkt_t = tp; | |
1865 | ||
1866 | if (t->tbf_n_tok > MAX_BKT_SIZE) | |
1867 | t->tbf_n_tok = MAX_BKT_SIZE; | |
1868 | } | |
1869 | ||
1870 | static int | |
1871 | priority(__unused struct vif *vifp, struct ip *ip) | |
1872 | { | |
1873 | int prio; | |
1874 | ||
1875 | /* temporary hack; may add general packet classifier some day */ | |
1876 | ||
1877 | /* | |
1878 | * The UDP port space is divided up into four priority ranges: | |
1879 | * [0, 16384) : unclassified - lowest priority | |
1880 | * [16384, 32768) : audio - highest priority | |
1881 | * [32768, 49152) : whiteboard - medium priority | |
1882 | * [49152, 65536) : video - low priority | |
1883 | */ | |
1884 | if (ip->ip_p == IPPROTO_UDP) { | |
1885 | struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); | |
1886 | switch (ntohs(udp->uh_dport) & 0xc000) { | |
1887 | case 0x4000: | |
1888 | prio = 70; | |
1889 | break; | |
1890 | case 0x8000: | |
1891 | prio = 60; | |
1892 | break; | |
1893 | case 0xc000: | |
1894 | prio = 55; | |
1895 | break; | |
1896 | default: | |
1897 | prio = 50; | |
1898 | break; | |
1899 | } | |
1900 | if (tbfdebug > 1) | |
1901 | log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio); | |
1902 | } else { | |
1903 | prio = 50; | |
1904 | } | |
1905 | return prio; | |
1906 | } | |
1907 | ||
1908 | /* | |
1909 | * End of token bucket filter modifications | |
1910 | */ | |
1911 | ||
1912 | int | |
1913 | ip_rsvp_vif_init(struct socket *so, struct sockopt *sopt) | |
1914 | { | |
1915 | int error, i; | |
1916 | ||
1917 | if (rsvpdebug) | |
1918 | printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", | |
1919 | so->so_type, so->so_proto->pr_protocol); | |
1920 | ||
1921 | if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) | |
1922 | return EOPNOTSUPP; | |
1923 | ||
1924 | /* Check mbuf. */ | |
1925 | error = sooptcopyin(sopt, &i, sizeof i, sizeof i); | |
1926 | if (error) | |
1927 | return (error); | |
1928 | ||
1929 | if (rsvpdebug) | |
1930 | printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on); | |
1931 | ||
1932 | /* Check vif. */ | |
1933 | if (!legal_vif_num(i)) { | |
1934 | return EADDRNOTAVAIL; | |
1935 | } | |
1936 | ||
1937 | /* Check if socket is available. */ | |
1938 | if (viftable[i].v_rsvpd != NULL) { | |
1939 | return EADDRINUSE; | |
1940 | } | |
1941 | ||
1942 | viftable[i].v_rsvpd = so; | |
1943 | /* This may seem silly, but we need to be sure we don't over-increment | |
1944 | * the RSVP counter, in case something slips up. | |
1945 | */ | |
1946 | if (!viftable[i].v_rsvp_on) { | |
1947 | viftable[i].v_rsvp_on = 1; | |
1948 | rsvp_on++; | |
1949 | } | |
1950 | ||
1951 | return 0; | |
1952 | } | |
1953 | ||
1954 | int | |
1955 | ip_rsvp_vif_done(struct socket *so, struct sockopt *sopt) | |
1956 | { | |
1957 | int error, i; | |
1958 | ||
1959 | if (rsvpdebug) | |
1960 | printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", | |
1961 | so->so_type, so->so_proto->pr_protocol); | |
1962 | ||
1963 | if (so->so_type != SOCK_RAW || | |
1964 | so->so_proto->pr_protocol != IPPROTO_RSVP) | |
1965 | return EOPNOTSUPP; | |
1966 | ||
1967 | error = sooptcopyin(sopt, &i, sizeof i, sizeof i); | |
1968 | if (error) | |
1969 | return (error); | |
1970 | ||
1971 | /* Check vif. */ | |
1972 | if (!legal_vif_num(i)) { | |
1973 | return EADDRNOTAVAIL; | |
1974 | } | |
1975 | ||
1976 | if (rsvpdebug) | |
1977 | printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n", | |
1978 | viftable[i].v_rsvpd, so); | |
1979 | ||
1980 | viftable[i].v_rsvpd = NULL; | |
1981 | /* | |
1982 | * This may seem silly, but we need to be sure we don't over-decrement | |
1983 | * the RSVP counter, in case something slips up. | |
1984 | */ | |
1985 | if (viftable[i].v_rsvp_on) { | |
1986 | viftable[i].v_rsvp_on = 0; | |
1987 | rsvp_on--; | |
1988 | } | |
1989 | ||
1990 | return 0; | |
1991 | } | |
1992 | ||
1993 | void | |
1994 | ip_rsvp_force_done(struct socket *so) | |
1995 | { | |
1996 | int vifi; | |
1997 | ||
1998 | /* Don't bother if it is not the right type of socket. */ | |
1999 | if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) | |
2000 | return; | |
2001 | ||
2002 | /* The socket may be attached to more than one vif...this | |
2003 | * is perfectly legal. | |
2004 | */ | |
2005 | for (vifi = 0; vifi < numvifs; vifi++) { | |
2006 | if (viftable[vifi].v_rsvpd == so) { | |
2007 | viftable[vifi].v_rsvpd = NULL; | |
2008 | /* This may seem silly, but we need to be sure we don't | |
2009 | * over-decrement the RSVP counter, in case something slips up. | |
2010 | */ | |
2011 | if (viftable[vifi].v_rsvp_on) { | |
2012 | viftable[vifi].v_rsvp_on = 0; | |
2013 | rsvp_on--; | |
2014 | } | |
2015 | } | |
2016 | } | |
2017 | ||
2018 | return; | |
2019 | } | |
2020 | ||
2021 | void | |
2022 | rsvp_input(struct mbuf *m, int iphlen) | |
2023 | { | |
2024 | int vifi; | |
2025 | struct ip *ip = mtod(m, struct ip *); | |
2026 | static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET, | |
2027 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
2028 | struct ifnet *ifp; | |
2029 | ||
2030 | if (rsvpdebug) | |
2031 | printf("rsvp_input: rsvp_on %d\n",rsvp_on); | |
2032 | ||
2033 | /* Can still get packets with rsvp_on = 0 if there is a local member | |
2034 | * of the group to which the RSVP packet is addressed. But in this | |
2035 | * case we want to throw the packet away. | |
2036 | */ | |
2037 | if (!rsvp_on) { | |
2038 | m_freem(m); | |
2039 | return; | |
2040 | } | |
2041 | ||
2042 | if (rsvpdebug) | |
2043 | printf("rsvp_input: check vifs\n"); | |
2044 | ||
2045 | #if DIAGNOSTIC | |
2046 | if (!(m->m_flags & M_PKTHDR)) | |
2047 | panic("rsvp_input no hdr"); | |
2048 | #endif | |
2049 | ||
2050 | ifp = m->m_pkthdr.rcvif; | |
2051 | /* Find which vif the packet arrived on. */ | |
2052 | for (vifi = 0; vifi < numvifs; vifi++) | |
2053 | if (viftable[vifi].v_ifp == ifp) | |
2054 | break; | |
2055 | ||
2056 | if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { | |
2057 | /* | |
2058 | * If the old-style non-vif-associated socket is set, | |
2059 | * then use it. Otherwise, drop packet since there | |
2060 | * is no specific socket for this vif. | |
2061 | */ | |
2062 | if (ip_rsvpd != NULL) { | |
2063 | if (rsvpdebug) | |
2064 | printf("rsvp_input: Sending packet up old-style socket\n"); | |
2065 | rip_input(m, iphlen); /* xxx */ | |
2066 | } else { | |
2067 | if (rsvpdebug && vifi == numvifs) | |
2068 | printf("rsvp_input: Can't find vif for packet.\n"); | |
2069 | else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) | |
2070 | printf("rsvp_input: No socket defined for vif %d\n",vifi); | |
2071 | m_freem(m); | |
2072 | } | |
2073 | return; | |
2074 | } | |
2075 | rsvp_src.sin_addr = ip->ip_src; | |
2076 | ||
2077 | if (rsvpdebug && m) | |
2078 | printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", | |
2079 | m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); | |
2080 | ||
2081 | if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { | |
2082 | if (rsvpdebug) | |
2083 | printf("rsvp_input: Failed to append to socket\n"); | |
2084 | } else { | |
2085 | if (rsvpdebug) | |
2086 | printf("rsvp_input: send packet up\n"); | |
2087 | } | |
2088 | ||
2089 | } | |
2090 | ||
2091 | #if MROUTE_LKM | |
2092 | #include <sys/conf.h> | |
2093 | #include <sys/exec.h> | |
2094 | #include <sys/sysent.h> | |
2095 | #include <sys/lkm.h> | |
2096 | ||
2097 | MOD_MISC("ip_mroute_mod") | |
2098 | ||
2099 | static int | |
2100 | ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) | |
2101 | { | |
2102 | int i; | |
2103 | struct lkm_misc *args = lkmtp->private.lkm_misc; | |
2104 | int err = 0; | |
2105 | ||
2106 | switch(cmd) { | |
2107 | static int (*old_ip_mrouter_cmd)(); | |
2108 | static int (*old_ip_mrouter_done)(); | |
2109 | static int (*old_ip_mforward)(); | |
2110 | static int (*old_mrt_ioctl)(); | |
2111 | static void (*old_proto4_input)(); | |
2112 | static int (*old_legal_vif_num)(); | |
2113 | extern struct protosw inetsw[]; | |
2114 | ||
2115 | case LKM_E_LOAD: | |
2116 | if(lkmexists(lkmtp) || ip_mrtproto) | |
2117 | return(EEXIST); | |
2118 | old_ip_mrouter_cmd = ip_mrouter_cmd; | |
2119 | ip_mrouter_cmd = X_ip_mrouter_cmd; | |
2120 | old_ip_mrouter_done = ip_mrouter_done; | |
2121 | ip_mrouter_done = X_ip_mrouter_done; | |
2122 | old_ip_mforward = ip_mforward; | |
2123 | ip_mforward = X_ip_mforward; | |
2124 | old_mrt_ioctl = mrt_ioctl; | |
2125 | mrt_ioctl = X_mrt_ioctl; | |
2126 | old_proto4_input = ip_protox[ENCAP_PROTO]->pr_input; | |
2127 | ip_protox[ENCAP_PROTO]->pr_input = X_ipip_input; | |
2128 | old_legal_vif_num = legal_vif_num; | |
2129 | legal_vif_num = X_legal_vif_num; | |
2130 | ip_mrtproto = IGMP_DVMRP; | |
2131 | ||
2132 | printf("\nIP multicast routing loaded\n"); | |
2133 | break; | |
2134 | ||
2135 | case LKM_E_UNLOAD: | |
2136 | if (ip_mrouter) | |
2137 | return EINVAL; | |
2138 | ||
2139 | ip_mrouter_cmd = old_ip_mrouter_cmd; | |
2140 | ip_mrouter_done = old_ip_mrouter_done; | |
2141 | ip_mforward = old_ip_mforward; | |
2142 | mrt_ioctl = old_mrt_ioctl; | |
2143 | ip_protox[ENCAP_PROTO]->pr_input = old_proto4_input; | |
2144 | legal_vif_num = old_legal_vif_num; | |
2145 | ip_mrtproto = 0; | |
2146 | break; | |
2147 | ||
2148 | default: | |
2149 | err = EINVAL; | |
2150 | break; | |
2151 | } | |
2152 | ||
2153 | return(err); | |
2154 | } | |
2155 | ||
2156 | int | |
2157 | ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { | |
2158 | DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, | |
2159 | nosys); | |
2160 | } | |
2161 | ||
2162 | #endif /* MROUTE_LKM */ | |
2163 | #endif /* MROUTING */ |