]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2007 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
30 | * support for mandatory and extensible security protections. This notice | |
31 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
32 | * Version 2.0. | |
33 | */ | |
34 | /* | |
35 | * IP multicast forwarding procedures | |
36 | * | |
37 | * Written by David Waitzman, BBN Labs, August 1988. | |
38 | * Modified by Steve Deering, Stanford, February 1989. | |
39 | * Modified by Mark J. Steiglitz, Stanford, May, 1991 | |
40 | * Modified by Van Jacobson, LBL, January 1993 | |
41 | * Modified by Ajit Thyagarajan, PARC, August 1993 | |
42 | * Modified by Bill Fenner, PARC, April 1995 | |
43 | * | |
44 | * MROUTING Revision: 3.5 | |
45 | * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.56.2.2 2001/07/19 06:37:26 kris Exp $ | |
46 | */ | |
47 | ||
48 | ||
49 | #include <sys/param.h> | |
50 | #include <sys/systm.h> | |
51 | #include <sys/malloc.h> | |
52 | #include <sys/mbuf.h> | |
53 | #include <sys/socket.h> | |
54 | #include <sys/socketvar.h> | |
55 | #include <sys/protosw.h> | |
56 | #include <sys/time.h> | |
57 | #include <sys/kernel.h> | |
58 | #include <sys/sockio.h> | |
59 | #include <sys/syslog.h> | |
60 | #include <net/if.h> | |
61 | #include <net/route.h> | |
62 | #include <net/kpi_protocol.h> | |
63 | #include <netinet/in.h> | |
64 | #include <netinet/in_systm.h> | |
65 | #include <netinet/ip.h> | |
66 | #include <netinet/ip_var.h> | |
67 | #include <netinet/in_var.h> | |
68 | #include <netinet/igmp.h> | |
69 | #include <netinet/ip_mroute.h> | |
70 | #include <netinet/udp.h> | |
71 | ||
72 | #if CONFIG_MACF_NET | |
73 | #include <security/mac_framework.h> | |
74 | #endif | |
75 | ||
76 | #ifndef NTOHL | |
77 | #if BYTE_ORDER != BIG_ENDIAN | |
78 | #define NTOHL(d) ((d) = ntohl((d))) | |
79 | #define NTOHS(d) ((d) = ntohs((u_short)(d))) | |
80 | #define HTONL(d) ((d) = htonl((d))) | |
81 | #define HTONS(d) ((d) = htons((u_short)(d))) | |
82 | #else | |
83 | #define NTOHL(d) | |
84 | #define NTOHS(d) | |
85 | #define HTONL(d) | |
86 | #define HTONS(d) | |
87 | #endif | |
88 | #endif | |
89 | ||
90 | #ifndef MROUTING | |
91 | extern u_long _ip_mcast_src(int vifi); | |
92 | extern int _ip_mforward(struct ip *ip, struct ifnet *ifp, | |
93 | struct mbuf *m, struct ip_moptions *imo); | |
94 | extern int _ip_mrouter_done(void); | |
95 | extern int _ip_mrouter_get(struct socket *so, struct sockopt *sopt); | |
96 | extern int _ip_mrouter_set(struct socket *so, struct sockopt *sopt); | |
97 | extern int _mrt_ioctl(int req, caddr_t data, struct proc *p); | |
98 | ||
99 | /* | |
100 | * Dummy routines and globals used when multicast routing is not compiled in. | |
101 | */ | |
102 | ||
103 | struct socket *ip_mrouter = NULL; | |
104 | u_int rsvpdebug = 0; | |
105 | ||
106 | int | |
107 | _ip_mrouter_set(__unused struct socket *so, | |
108 | __unused struct sockopt *sopt) | |
109 | { | |
110 | return(EOPNOTSUPP); | |
111 | } | |
112 | ||
113 | int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set; | |
114 | ||
115 | ||
116 | int | |
117 | _ip_mrouter_get(__unused struct socket *so, | |
118 | __unused sockopt *sopt) | |
119 | { | |
120 | return(EOPNOTSUPP); | |
121 | } | |
122 | ||
123 | int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get; | |
124 | ||
125 | int | |
126 | _ip_mrouter_done(void) | |
127 | { | |
128 | return(0); | |
129 | } | |
130 | ||
131 | int (*ip_mrouter_done)(void) = _ip_mrouter_done; | |
132 | ||
133 | int | |
134 | _ip_mforward(__unused struct ip *ip, __unused struct ifnet *ifp, | |
135 | __unused struct mbuf *m, __unused ip_moptions *imo) | |
136 | { | |
137 | return(0); | |
138 | } | |
139 | ||
140 | int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, | |
141 | struct ip_moptions *) = _ip_mforward; | |
142 | ||
143 | int | |
144 | _mrt_ioctl(__unused int req, __unused caddr_t data, __unused struct proc *p) | |
145 | { | |
146 | return EOPNOTSUPP; | |
147 | } | |
148 | ||
149 | int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; | |
150 | ||
151 | void | |
152 | rsvp_input(struct mbuf *m, int iphlen) /* XXX must fixup manually */ | |
153 | { | |
154 | /* Can still get packets with rsvp_on = 0 if there is a local member | |
155 | * of the group to which the RSVP packet is addressed. But in this | |
156 | * case we want to throw the packet away. | |
157 | */ | |
158 | if (!rsvp_on) { | |
159 | m_freem(m); | |
160 | return; | |
161 | } | |
162 | ||
163 | if (ip_rsvpd != NULL) { | |
164 | if (rsvpdebug) | |
165 | printf("rsvp_input: Sending packet up old-style socket\n"); | |
166 | rip_input(m, iphlen); | |
167 | return; | |
168 | } | |
169 | /* Drop the packet */ | |
170 | m_freem(m); | |
171 | } | |
172 | ||
173 | void ipip_input(struct mbuf *m, int iphlen) { /* XXX must fixup manually */ | |
174 | rip_input(m, iphlen); | |
175 | } | |
176 | ||
177 | int (*legal_vif_num)(int) = 0; | |
178 | ||
179 | /* | |
180 | * This should never be called, since IP_MULTICAST_VIF should fail, but | |
181 | * just in case it does get called, the code a little lower in ip_output | |
182 | * will assign the packet a local address. | |
183 | */ | |
184 | u_long | |
185 | _ip_mcast_src(int vifi) { return INADDR_ANY; } | |
186 | u_long (*ip_mcast_src)(int) = _ip_mcast_src; | |
187 | ||
188 | int | |
189 | ip_rsvp_vif_init(so, sopt) | |
190 | struct socket *so; | |
191 | struct sockopt *sopt; | |
192 | { | |
193 | return(EINVAL); | |
194 | } | |
195 | ||
196 | int | |
197 | ip_rsvp_vif_done(so, sopt) | |
198 | struct socket *so; | |
199 | struct sockopt *sopt; | |
200 | { | |
201 | return(EINVAL); | |
202 | } | |
203 | ||
204 | void | |
205 | ip_rsvp_force_done(so) | |
206 | struct socket *so; | |
207 | { | |
208 | return; | |
209 | } | |
210 | ||
211 | #else /* MROUTING */ | |
212 | ||
213 | #define M_HASCL(m) ((m)->m_flags & M_EXT) | |
214 | ||
215 | #define INSIZ sizeof(struct in_addr) | |
216 | #define same(a1, a2) \ | |
217 | (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) | |
218 | ||
219 | ||
220 | /* | |
221 | * Globals. All but ip_mrouter and ip_mrtproto could be static, | |
222 | * except for netstat or debugging purposes. | |
223 | */ | |
224 | #ifndef MROUTE_LKM | |
225 | struct socket *ip_mrouter = NULL; | |
226 | static struct mrtstat mrtstat; | |
227 | #else /* MROUTE_LKM */ | |
228 | extern void X_ipip_input(struct mbuf *m, int iphlen); | |
229 | extern struct mrtstat mrtstat; | |
230 | static int ip_mrtproto; | |
231 | #endif | |
232 | ||
233 | #define NO_RTE_FOUND 0x1 | |
234 | #define RTE_FOUND 0x2 | |
235 | ||
236 | static struct mfc *mfctable[CONFIG_MFCTBLSIZ]; | |
237 | static u_char nexpire[CONFIG_MFCTBLSIZ]; | |
238 | static struct vif viftable[CONFIG_MAXVIFS]; | |
239 | static u_int mrtdebug = 0; /* debug level */ | |
240 | #define DEBUG_MFC 0x02 | |
241 | #define DEBUG_FORWARD 0x04 | |
242 | #define DEBUG_EXPIRE 0x08 | |
243 | #define DEBUG_XMIT 0x10 | |
244 | static u_int tbfdebug = 0; /* tbf debug level */ | |
245 | static u_int rsvpdebug = 0; /* rsvp debug level */ | |
246 | ||
247 | #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ | |
248 | #define UPCALL_EXPIRE 6 /* number of timeouts */ | |
249 | ||
250 | /* | |
251 | * Define the token bucket filter structures | |
252 | * tbftable -> each vif has one of these for storing info | |
253 | */ | |
254 | ||
255 | static struct tbf tbftable[CONFIG_MAXVIFS]; | |
256 | #define TBF_REPROCESS (hz / 100) /* 100x / second */ | |
257 | ||
258 | /* | |
259 | * 'Interfaces' associated with decapsulator (so we can tell | |
260 | * packets that went through it from ones that get reflected | |
261 | * by a broken gateway). These interfaces are never linked into | |
262 | * the system ifnet list & no routes point to them. I.e., packets | |
263 | * can't be sent this way. They only exist as a placeholder for | |
264 | * multicast source verification. | |
265 | */ | |
266 | static struct ifnet multicast_decap_if[CONFIG_MAXVIFS]; | |
267 | ||
268 | #define ENCAP_TTL 64 | |
269 | #define ENCAP_PROTO IPPROTO_IPIP /* 4 */ | |
270 | ||
271 | /* prototype IP hdr for encapsulated packets */ | |
272 | static struct ip multicast_encap_iphdr = { | |
273 | #if BYTE_ORDER == LITTLE_ENDIAN | |
274 | sizeof(struct ip) >> 2, IPVERSION, | |
275 | #else | |
276 | IPVERSION, sizeof(struct ip) >> 2, | |
277 | #endif | |
278 | 0, /* tos */ | |
279 | sizeof(struct ip), /* total length */ | |
280 | 0, /* id */ | |
281 | 0, /* frag offset */ | |
282 | ENCAP_TTL, ENCAP_PROTO, | |
283 | 0, /* checksum */ | |
284 | { 0 }, { 0 } | |
285 | }; | |
286 | ||
287 | /* | |
288 | * Private variables. | |
289 | */ | |
290 | static vifi_t numvifs = 0; | |
291 | static int have_encap_tunnel = 0; | |
292 | ||
293 | /* | |
294 | * one-back cache used by ipip_input to locate a tunnel's vif | |
295 | * given a datagram's src ip address. | |
296 | */ | |
297 | static u_long last_encap_src; | |
298 | static struct vif *last_encap_vif; | |
299 | ||
300 | static u_long X_ip_mcast_src(int vifi); | |
301 | static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); | |
302 | static int X_ip_mrouter_done(void); | |
303 | static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); | |
304 | static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); | |
305 | static int X_legal_vif_num(int vif); | |
306 | static int X_mrt_ioctl(int cmd, caddr_t data); | |
307 | ||
308 | static int get_sg_cnt(struct sioc_sg_req *); | |
309 | static int get_vif_cnt(struct sioc_vif_req *); | |
310 | static int ip_mrouter_init(struct socket *, int); | |
311 | static int add_vif(struct vifctl *); | |
312 | static int del_vif(vifi_t); | |
313 | static int add_mfc(struct mfcctl *); | |
314 | static int del_mfc(struct mfcctl *); | |
315 | static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); | |
316 | static int set_assert(int); | |
317 | static void expire_upcalls(void *); | |
318 | static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, | |
319 | vifi_t); | |
320 | static void phyint_send(struct ip *, struct vif *, struct mbuf *); | |
321 | static void encap_send(struct ip *, struct vif *, struct mbuf *); | |
322 | static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long); | |
323 | static void tbf_queue(struct vif *, struct mbuf *); | |
324 | static void tbf_process_q(struct vif *); | |
325 | static void tbf_reprocess_q(void *); | |
326 | static int tbf_dq_sel(struct vif *, struct ip *); | |
327 | static void tbf_send_packet(struct vif *, struct mbuf *); | |
328 | static void tbf_update_tokens(struct vif *); | |
329 | static int priority(struct vif *, struct ip *); | |
330 | void multiencap_decap(struct mbuf *); | |
331 | ||
332 | /* | |
333 | * whether or not special PIM assert processing is enabled. | |
334 | */ | |
335 | static int pim_assert; | |
336 | /* | |
337 | * Rate limit for assert notification messages, in usec | |
338 | */ | |
339 | #define ASSERT_MSG_TIME 3000000 | |
340 | ||
341 | /* | |
342 | * Hash function for a source, group entry | |
343 | */ | |
344 | #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ | |
345 | ((g) >> 20) ^ ((g) >> 10) ^ (g)) | |
346 | ||
347 | /* | |
348 | * Find a route for a given origin IP address and Multicast group address | |
349 | * Type of service parameter to be added in the future!!! | |
350 | */ | |
351 | ||
352 | #define MFCFIND(o, g, rt) { \ | |
353 | struct mfc *_rt = mfctable[MFCHASH(o,g)]; \ | |
354 | rt = NULL; \ | |
355 | ++mrtstat.mrts_mfc_lookups; \ | |
356 | while (_rt) { \ | |
357 | if ((_rt->mfc_origin.s_addr == o) && \ | |
358 | (_rt->mfc_mcastgrp.s_addr == g) && \ | |
359 | (_rt->mfc_stall == NULL)) { \ | |
360 | rt = _rt; \ | |
361 | break; \ | |
362 | } \ | |
363 | _rt = _rt->mfc_next; \ | |
364 | } \ | |
365 | if (rt == NULL) { \ | |
366 | ++mrtstat.mrts_mfc_misses; \ | |
367 | } \ | |
368 | } | |
369 | ||
370 | ||
371 | /* | |
372 | * Macros to compute elapsed time efficiently | |
373 | * Borrowed from Van Jacobson's scheduling code | |
374 | */ | |
375 | #define TV_DELTA(a, b, delta) { \ | |
376 | int xxs; \ | |
377 | \ | |
378 | delta = (a).tv_usec - (b).tv_usec; \ | |
379 | if ((xxs = (a).tv_sec - (b).tv_sec)) { \ | |
380 | switch (xxs) { \ | |
381 | case 2: \ | |
382 | delta += 1000000; \ | |
383 | /* fall through */ \ | |
384 | case 1: \ | |
385 | delta += 1000000; \ | |
386 | break; \ | |
387 | default: \ | |
388 | delta += (1000000 * xxs); \ | |
389 | } \ | |
390 | } \ | |
391 | } | |
392 | ||
393 | #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ | |
394 | (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) | |
395 | ||
396 | #if UPCALL_TIMING | |
397 | u_long upcall_data[51]; | |
398 | static void collate(struct timeval *); | |
399 | #endif /* UPCALL_TIMING */ | |
400 | ||
401 | ||
402 | /* | |
403 | * Handle MRT setsockopt commands to modify the multicast routing tables. | |
404 | */ | |
405 | static int | |
406 | X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) | |
407 | { | |
408 | int error, optval; | |
409 | vifi_t vifi; | |
410 | struct vifctl vifc; | |
411 | struct mfcctl mfc; | |
412 | ||
413 | if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) | |
414 | return (EPERM); | |
415 | ||
416 | error = 0; | |
417 | switch (sopt->sopt_name) { | |
418 | case MRT_INIT: | |
419 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
420 | sizeof optval); | |
421 | if (error) | |
422 | break; | |
423 | error = ip_mrouter_init(so, optval); | |
424 | break; | |
425 | ||
426 | case MRT_DONE: | |
427 | error = ip_mrouter_done(); | |
428 | break; | |
429 | ||
430 | case MRT_ADD_VIF: | |
431 | error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); | |
432 | if (error) | |
433 | break; | |
434 | error = add_vif(&vifc); | |
435 | break; | |
436 | ||
437 | case MRT_DEL_VIF: | |
438 | error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); | |
439 | if (error) | |
440 | break; | |
441 | error = del_vif(vifi); | |
442 | break; | |
443 | ||
444 | case MRT_ADD_MFC: | |
445 | case MRT_DEL_MFC: | |
446 | error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc); | |
447 | if (error) | |
448 | break; | |
449 | if (sopt->sopt_name == MRT_ADD_MFC) | |
450 | error = add_mfc(&mfc); | |
451 | else | |
452 | error = del_mfc(&mfc); | |
453 | break; | |
454 | ||
455 | case MRT_ASSERT: | |
456 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
457 | sizeof optval); | |
458 | if (error) | |
459 | break; | |
460 | set_assert(optval); | |
461 | break; | |
462 | ||
463 | default: | |
464 | error = EOPNOTSUPP; | |
465 | break; | |
466 | } | |
467 | return (error); | |
468 | } | |
469 | ||
470 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
471 | int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set; | |
472 | #endif | |
473 | ||
474 | /* | |
475 | * Handle MRT getsockopt commands | |
476 | */ | |
477 | static int | |
478 | X_ip_mrouter_get(__unused struct socket *so, struct sockopt *sopt) | |
479 | { | |
480 | int error; | |
481 | static int vers = 0x0305; /* !!! why is this here? XXX */ | |
482 | ||
483 | switch (sopt->sopt_name) { | |
484 | case MRT_VERSION: | |
485 | error = sooptcopyout(sopt, &vers, sizeof vers); | |
486 | break; | |
487 | ||
488 | case MRT_ASSERT: | |
489 | error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); | |
490 | break; | |
491 | default: | |
492 | error = EOPNOTSUPP; | |
493 | break; | |
494 | } | |
495 | return (error); | |
496 | } | |
497 | ||
498 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
499 | int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get; | |
500 | #endif | |
501 | ||
502 | /* | |
503 | * Handle ioctl commands to obtain information from the cache | |
504 | */ | |
505 | static int | |
506 | X_mrt_ioctl(int cmd, caddr_t data) | |
507 | { | |
508 | int error = 0; | |
509 | ||
510 | switch (cmd) { | |
511 | case (SIOCGETVIFCNT): | |
512 | return (get_vif_cnt((struct sioc_vif_req *)data)); | |
513 | break; | |
514 | case (SIOCGETSGCNT): | |
515 | return (get_sg_cnt((struct sioc_sg_req *)data)); | |
516 | break; | |
517 | default: | |
518 | return (EINVAL); | |
519 | break; | |
520 | } | |
521 | return error; | |
522 | } | |
523 | ||
524 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
525 | int (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl; | |
526 | #endif | |
527 | ||
528 | /* | |
529 | * returns the packet, byte, rpf-failure count for the source group provided | |
530 | */ | |
531 | static int | |
532 | get_sg_cnt(struct sioc_sg_req *req) | |
533 | { | |
534 | struct mfc *rt; | |
535 | ||
536 | MFCFIND(req->src.s_addr, req->grp.s_addr, rt); | |
537 | if (rt != NULL) { | |
538 | req->pktcnt = rt->mfc_pkt_cnt; | |
539 | req->bytecnt = rt->mfc_byte_cnt; | |
540 | req->wrong_if = rt->mfc_wrong_if; | |
541 | } else | |
542 | req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; | |
543 | ||
544 | return 0; | |
545 | } | |
546 | ||
547 | /* | |
548 | * returns the input and output packet and byte counts on the vif provided | |
549 | */ | |
550 | static int | |
551 | get_vif_cnt(struct sioc_vif_req *req) | |
552 | { | |
553 | vifi_t vifi = req->vifi; | |
554 | ||
555 | if (vifi >= numvifs) return EINVAL; | |
556 | ||
557 | req->icount = viftable[vifi].v_pkt_in; | |
558 | req->ocount = viftable[vifi].v_pkt_out; | |
559 | req->ibytes = viftable[vifi].v_bytes_in; | |
560 | req->obytes = viftable[vifi].v_bytes_out; | |
561 | ||
562 | return 0; | |
563 | } | |
564 | ||
565 | /* | |
566 | * Enable multicast routing | |
567 | */ | |
568 | static int | |
569 | ip_mrouter_init(struct socket *so, int vers) | |
570 | { | |
571 | if (mrtdebug) | |
572 | log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n", | |
573 | so->so_type, so->so_proto->pr_protocol); | |
574 | ||
575 | if (so->so_type != SOCK_RAW || | |
576 | so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; | |
577 | ||
578 | if (vers != 1) | |
579 | return ENOPROTOOPT; | |
580 | ||
581 | if (ip_mrouter != NULL) return EADDRINUSE; | |
582 | ||
583 | ip_mrouter = so; | |
584 | ||
585 | bzero((caddr_t)mfctable, sizeof(mfctable)); | |
586 | bzero((caddr_t)nexpire, sizeof(nexpire)); | |
587 | ||
588 | pim_assert = 0; | |
589 | ||
590 | timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); | |
591 | ||
592 | if (mrtdebug) | |
593 | log(LOG_DEBUG, "ip_mrouter_init\n"); | |
594 | ||
595 | return 0; | |
596 | } | |
597 | ||
598 | /* | |
599 | * Disable multicast routing | |
600 | */ | |
601 | static int | |
602 | X_ip_mrouter_done(void) | |
603 | { | |
604 | vifi_t vifi; | |
605 | int i; | |
606 | struct ifnet *ifp; | |
607 | struct ifreq ifr; | |
608 | struct mfc *rt; | |
609 | struct rtdetq *rte; | |
610 | ||
611 | /* | |
612 | * For each phyint in use, disable promiscuous reception of all IP | |
613 | * multicasts. | |
614 | */ | |
615 | for (vifi = 0; vifi < numvifs; vifi++) { | |
616 | if (viftable[vifi].v_lcl_addr.s_addr != 0 && | |
617 | !(viftable[vifi].v_flags & VIFF_TUNNEL)) { | |
618 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; | |
619 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr | |
620 | = INADDR_ANY; | |
621 | ifp = viftable[vifi].v_ifp; | |
622 | if_allmulti(ifp, 0); | |
623 | } | |
624 | } | |
625 | bzero((caddr_t)tbftable, sizeof(tbftable)); | |
626 | bzero((caddr_t)viftable, sizeof(viftable)); | |
627 | numvifs = 0; | |
628 | pim_assert = 0; | |
629 | ||
630 | untimeout(expire_upcalls, (caddr_t)NULL); | |
631 | ||
632 | /* | |
633 | * Free all multicast forwarding cache entries. | |
634 | */ | |
635 | for (i = 0; i < CONFIG_MFCTBLSIZ; i++) { | |
636 | for (rt = mfctable[i]; rt != NULL; ) { | |
637 | struct mfc *nr = rt->mfc_next; | |
638 | ||
639 | for (rte = rt->mfc_stall; rte != NULL; ) { | |
640 | struct rtdetq *n = rte->next; | |
641 | ||
642 | m_freem(rte->m); | |
643 | FREE(rte, M_MRTABLE); | |
644 | rte = n; | |
645 | } | |
646 | FREE(rt, M_MRTABLE); | |
647 | rt = nr; | |
648 | } | |
649 | } | |
650 | ||
651 | bzero((caddr_t)mfctable, sizeof(mfctable)); | |
652 | ||
653 | /* | |
654 | * Reset de-encapsulation cache | |
655 | */ | |
656 | last_encap_src = 0; | |
657 | last_encap_vif = NULL; | |
658 | have_encap_tunnel = 0; | |
659 | ||
660 | ip_mrouter = NULL; | |
661 | ||
662 | if (mrtdebug) | |
663 | log(LOG_DEBUG, "ip_mrouter_done\n"); | |
664 | ||
665 | return 0; | |
666 | } | |
667 | ||
668 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
669 | int (*ip_mrouter_done)(void) = X_ip_mrouter_done; | |
670 | #endif | |
671 | ||
672 | /* | |
673 | * Set PIM assert processing global | |
674 | */ | |
675 | static int | |
676 | set_assert(int i) | |
677 | { | |
678 | if ((i != 1) && (i != 0)) | |
679 | return EINVAL; | |
680 | ||
681 | pim_assert = i; | |
682 | ||
683 | return 0; | |
684 | } | |
685 | ||
686 | /* | |
687 | * Add a vif to the vif table | |
688 | */ | |
689 | static int | |
690 | add_vif(struct vifctl *vifcp) | |
691 | { | |
692 | struct vif *vifp = viftable + vifcp->vifc_vifi; | |
693 | static struct sockaddr_in sin = { sizeof sin, AF_INET, | |
694 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
695 | struct ifaddr *ifa; | |
696 | struct ifnet *ifp; | |
697 | int error, s; | |
698 | struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; | |
699 | ||
700 | if (vifcp->vifc_vifi >= CONFIG_MAXVIFS) return EINVAL; | |
701 | if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; | |
702 | ||
703 | /* Find the interface with an address in AF_INET family */ | |
704 | sin.sin_addr = vifcp->vifc_lcl_addr; | |
705 | ifa = ifa_ifwithaddr((struct sockaddr *)&sin); | |
706 | if (ifa == 0) return EADDRNOTAVAIL; | |
707 | ifp = ifa->ifa_ifp; | |
708 | ifafree(ifa); | |
709 | ifa = NULL; | |
710 | ||
711 | if (vifcp->vifc_flags & VIFF_TUNNEL) { | |
712 | if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { | |
713 | /* | |
714 | * An encapsulating tunnel is wanted. Tell ipip_input() to | |
715 | * start paying attention to encapsulated packets. | |
716 | */ | |
717 | if (have_encap_tunnel == 0) { | |
718 | have_encap_tunnel = 1; | |
719 | for (s = 0; s < CONFIG_MAXVIFS; ++s) { | |
720 | multicast_decap_if[s].if_name = "mdecap"; | |
721 | multicast_decap_if[s].if_unit = s; | |
722 | multicast_decap_if[s].if_family = APPLE_IF_FAM_MDECAP; | |
723 | } | |
724 | } | |
725 | /* | |
726 | * Set interface to fake encapsulator interface | |
727 | */ | |
728 | ifp = &multicast_decap_if[vifcp->vifc_vifi]; | |
729 | /* | |
730 | * Prepare cached route entry | |
731 | */ | |
732 | bzero(&vifp->v_route, sizeof(vifp->v_route)); | |
733 | } else { | |
734 | log(LOG_ERR, "source routed tunnels not supported\n"); | |
735 | return EOPNOTSUPP; | |
736 | } | |
737 | } else { | |
738 | /* Make sure the interface supports multicast */ | |
739 | if ((ifp->if_flags & IFF_MULTICAST) == 0) | |
740 | return EOPNOTSUPP; | |
741 | ||
742 | /* Enable promiscuous reception of all IP multicasts from the if */ | |
743 | error = if_allmulti(ifp, 1); | |
744 | if (error) | |
745 | return error; | |
746 | } | |
747 | ||
748 | /* define parameters for the tbf structure */ | |
749 | vifp->v_tbf = v_tbf; | |
750 | GET_TIME(vifp->v_tbf->tbf_last_pkt_t); | |
751 | vifp->v_tbf->tbf_n_tok = 0; | |
752 | vifp->v_tbf->tbf_q_len = 0; | |
753 | vifp->v_tbf->tbf_max_q_len = MAXQSIZE; | |
754 | vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; | |
755 | ||
756 | vifp->v_flags = vifcp->vifc_flags; | |
757 | vifp->v_threshold = vifcp->vifc_threshold; | |
758 | vifp->v_lcl_addr = vifcp->vifc_lcl_addr; | |
759 | vifp->v_rmt_addr = vifcp->vifc_rmt_addr; | |
760 | vifp->v_ifp = ifp; | |
761 | /* scaling up here allows division by 1024 in critical code */ | |
762 | vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; | |
763 | vifp->v_rsvp_on = 0; | |
764 | vifp->v_rsvpd = NULL; | |
765 | /* initialize per vif pkt counters */ | |
766 | vifp->v_pkt_in = 0; | |
767 | vifp->v_pkt_out = 0; | |
768 | vifp->v_bytes_in = 0; | |
769 | vifp->v_bytes_out = 0; | |
770 | ||
771 | /* Adjust numvifs up if the vifi is higher than numvifs */ | |
772 | if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; | |
773 | ||
774 | if (mrtdebug) | |
775 | log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", | |
776 | vifcp->vifc_vifi, | |
777 | (u_long)ntohl(vifcp->vifc_lcl_addr.s_addr), | |
778 | (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", | |
779 | (u_long)ntohl(vifcp->vifc_rmt_addr.s_addr), | |
780 | vifcp->vifc_threshold, | |
781 | vifcp->vifc_rate_limit); | |
782 | ||
783 | return 0; | |
784 | } | |
785 | ||
786 | /* | |
787 | * Delete a vif from the vif table | |
788 | */ | |
789 | static int | |
790 | del_vif(vifi_t vifi) | |
791 | { | |
792 | struct vif *vifp = &viftable[vifi]; | |
793 | struct mbuf *m; | |
794 | struct ifnet *ifp; | |
795 | struct ifreq ifr; | |
796 | ||
797 | if (vifi >= numvifs) return EINVAL; | |
798 | if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; | |
799 | ||
800 | if (!(vifp->v_flags & VIFF_TUNNEL)) { | |
801 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; | |
802 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; | |
803 | ifp = vifp->v_ifp; | |
804 | if_allmulti(ifp, 0); | |
805 | } | |
806 | ||
807 | if (vifp == last_encap_vif) { | |
808 | last_encap_vif = 0; | |
809 | last_encap_src = 0; | |
810 | } | |
811 | ||
812 | /* | |
813 | * Free packets queued at the interface | |
814 | */ | |
815 | while (vifp->v_tbf->tbf_q) { | |
816 | m = vifp->v_tbf->tbf_q; | |
817 | vifp->v_tbf->tbf_q = m->m_act; | |
818 | m_freem(m); | |
819 | } | |
820 | ||
821 | bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); | |
822 | bzero((caddr_t)vifp, sizeof (*vifp)); | |
823 | ||
824 | if (mrtdebug) | |
825 | log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); | |
826 | ||
827 | /* Adjust numvifs down */ | |
828 | for (vifi = numvifs; vifi > 0; vifi--) | |
829 | if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; | |
830 | numvifs = vifi; | |
831 | ||
832 | return 0; | |
833 | } | |
834 | ||
835 | /* | |
836 | * Add an mfc entry | |
837 | */ | |
838 | static int | |
839 | add_mfc(struct mfcctl *mfccp) | |
840 | { | |
841 | struct mfc *rt; | |
842 | u_long hash; | |
843 | struct rtdetq *rte; | |
844 | u_short nstl; | |
845 | int i; | |
846 | ||
847 | MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt); | |
848 | ||
849 | /* If an entry already exists, just update the fields */ | |
850 | if (rt) { | |
851 | if (mrtdebug & DEBUG_MFC) | |
852 | log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", | |
853 | (u_long)ntohl(mfccp->mfcc_origin.s_addr), | |
854 | (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
855 | mfccp->mfcc_parent); | |
856 | ||
857 | rt->mfc_parent = mfccp->mfcc_parent; | |
858 | for (i = 0; i < numvifs; i++) | |
859 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
860 | return 0; | |
861 | } | |
862 | ||
863 | /* | |
864 | * Find the entry for which the upcall was made and update | |
865 | */ | |
866 | hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); | |
867 | for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { | |
868 | ||
869 | if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && | |
870 | (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && | |
871 | (rt->mfc_stall != NULL)) { | |
872 | ||
873 | if (nstl++) | |
874 | log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", | |
875 | "multiple kernel entries", | |
876 | (u_long)ntohl(mfccp->mfcc_origin.s_addr), | |
877 | (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
878 | mfccp->mfcc_parent, (void *)rt->mfc_stall); | |
879 | ||
880 | if (mrtdebug & DEBUG_MFC) | |
881 | log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", | |
882 | (u_long)ntohl(mfccp->mfcc_origin.s_addr), | |
883 | (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
884 | mfccp->mfcc_parent, (void *)rt->mfc_stall); | |
885 | ||
886 | rt->mfc_origin = mfccp->mfcc_origin; | |
887 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
888 | rt->mfc_parent = mfccp->mfcc_parent; | |
889 | for (i = 0; i < numvifs; i++) | |
890 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
891 | /* initialize pkt counters per src-grp */ | |
892 | rt->mfc_pkt_cnt = 0; | |
893 | rt->mfc_byte_cnt = 0; | |
894 | rt->mfc_wrong_if = 0; | |
895 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
896 | ||
897 | rt->mfc_expire = 0; /* Don't clean this guy up */ | |
898 | nexpire[hash]--; | |
899 | ||
900 | /* free packets Qed at the end of this entry */ | |
901 | for (rte = rt->mfc_stall; rte != NULL; ) { | |
902 | struct rtdetq *n = rte->next; | |
903 | ||
904 | ip_mdq(rte->m, rte->ifp, rt, -1); | |
905 | m_freem(rte->m); | |
906 | #if UPCALL_TIMING | |
907 | collate(&(rte->t)); | |
908 | #endif /* UPCALL_TIMING */ | |
909 | FREE(rte, M_MRTABLE); | |
910 | rte = n; | |
911 | } | |
912 | rt->mfc_stall = NULL; | |
913 | } | |
914 | } | |
915 | ||
916 | /* | |
917 | * It is possible that an entry is being inserted without an upcall | |
918 | */ | |
919 | if (nstl == 0) { | |
920 | if (mrtdebug & DEBUG_MFC) | |
921 | log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", | |
922 | hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr), | |
923 | (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
924 | mfccp->mfcc_parent); | |
925 | ||
926 | for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { | |
927 | ||
928 | if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && | |
929 | (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { | |
930 | ||
931 | rt->mfc_origin = mfccp->mfcc_origin; | |
932 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
933 | rt->mfc_parent = mfccp->mfcc_parent; | |
934 | for (i = 0; i < numvifs; i++) | |
935 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
936 | /* initialize pkt counters per src-grp */ | |
937 | rt->mfc_pkt_cnt = 0; | |
938 | rt->mfc_byte_cnt = 0; | |
939 | rt->mfc_wrong_if = 0; | |
940 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
941 | if (rt->mfc_expire) | |
942 | nexpire[hash]--; | |
943 | rt->mfc_expire = 0; | |
944 | } | |
945 | } | |
946 | if (rt == NULL) { | |
947 | /* no upcall, so make a new entry */ | |
948 | rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT); | |
949 | if (rt == NULL) { | |
950 | return ENOBUFS; | |
951 | } | |
952 | ||
953 | /* insert new entry at head of hash chain */ | |
954 | rt->mfc_origin = mfccp->mfcc_origin; | |
955 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
956 | rt->mfc_parent = mfccp->mfcc_parent; | |
957 | for (i = 0; i < numvifs; i++) | |
958 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
959 | /* initialize pkt counters per src-grp */ | |
960 | rt->mfc_pkt_cnt = 0; | |
961 | rt->mfc_byte_cnt = 0; | |
962 | rt->mfc_wrong_if = 0; | |
963 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
964 | rt->mfc_expire = 0; | |
965 | rt->mfc_stall = NULL; | |
966 | ||
967 | /* link into table */ | |
968 | rt->mfc_next = mfctable[hash]; | |
969 | mfctable[hash] = rt; | |
970 | } | |
971 | } | |
972 | return 0; | |
973 | } | |
974 | ||
975 | #if UPCALL_TIMING | |
976 | /* | |
977 | * collect delay statistics on the upcalls | |
978 | */ | |
979 | static void | |
980 | collate(struct timeval *t) | |
981 | { | |
982 | u_long d; | |
983 | struct timeval tp; | |
984 | u_long delta; | |
985 | ||
986 | GET_TIME(tp); | |
987 | ||
988 | if (TV_LT(*t, tp)) | |
989 | { | |
990 | TV_DELTA(tp, *t, delta); | |
991 | ||
992 | d = delta >> 10; | |
993 | if (d > 50) | |
994 | d = 50; | |
995 | ||
996 | ++upcall_data[d]; | |
997 | } | |
998 | } | |
999 | #endif /* UPCALL_TIMING */ | |
1000 | ||
1001 | /* | |
1002 | * Delete an mfc entry | |
1003 | */ | |
1004 | static int | |
1005 | del_mfc(struct mfcctl *mfccp) | |
1006 | { | |
1007 | struct in_addr origin; | |
1008 | struct in_addr mcastgrp; | |
1009 | struct mfc *rt; | |
1010 | struct mfc **nptr; | |
1011 | u_long hash; | |
1012 | ||
1013 | origin = mfccp->mfcc_origin; | |
1014 | mcastgrp = mfccp->mfcc_mcastgrp; | |
1015 | hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); | |
1016 | ||
1017 | if (mrtdebug & DEBUG_MFC) | |
1018 | log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", | |
1019 | (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); | |
1020 | ||
1021 | nptr = &mfctable[hash]; | |
1022 | while ((rt = *nptr) != NULL) { | |
1023 | if (origin.s_addr == rt->mfc_origin.s_addr && | |
1024 | mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && | |
1025 | rt->mfc_stall == NULL) | |
1026 | break; | |
1027 | ||
1028 | nptr = &rt->mfc_next; | |
1029 | } | |
1030 | if (rt == NULL) { | |
1031 | return EADDRNOTAVAIL; | |
1032 | } | |
1033 | ||
1034 | *nptr = rt->mfc_next; | |
1035 | FREE(rt, M_MRTABLE); | |
1036 | ||
1037 | return 0; | |
1038 | } | |
1039 | ||
1040 | /* | |
1041 | * Send a message to mrouted on the multicast routing socket | |
1042 | */ | |
1043 | static int | |
1044 | socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) | |
1045 | { | |
1046 | socket_lock(s, 1); | |
1047 | if (s) { | |
1048 | if (sbappendaddr(&s->so_rcv, | |
1049 | (struct sockaddr *)src, | |
1050 | mm, (struct mbuf *)0, NULL) != 0) { | |
1051 | sorwakeup(s); | |
1052 | socket_unlock(s, 1); | |
1053 | return 0; | |
1054 | } | |
1055 | } | |
1056 | socket_unlock(s, 1); | |
1057 | m_freem(mm); | |
1058 | return -1; | |
1059 | } | |
1060 | ||
1061 | /* | |
1062 | * IP multicast forwarding function. This function assumes that the packet | |
1063 | * pointed to by "ip" has arrived on (or is about to be sent to) the interface | |
1064 | * pointed to by "ifp", and the packet is to be relayed to other networks | |
1065 | * that have members of the packet's destination IP multicast group. | |
1066 | * | |
1067 | * The packet is returned unscathed to the caller, unless it is | |
1068 | * erroneous, in which case a non-zero return value tells the caller to | |
1069 | * discard it. | |
1070 | */ | |
1071 | ||
1072 | #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ | |
1073 | #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ | |
1074 | ||
1075 | static int | |
1076 | X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, | |
1077 | struct ip_moptions *imo) | |
1078 | { | |
1079 | struct mfc *rt; | |
1080 | u_char *ipoptions; | |
1081 | static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET, | |
1082 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
1083 | static int srctun = 0; | |
1084 | struct mbuf *mm; | |
1085 | vifi_t vifi; | |
1086 | struct vif *vifp; | |
1087 | ||
1088 | if (mrtdebug & DEBUG_FORWARD) | |
1089 | log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", | |
1090 | (u_long)ntohl(ip->ip_src.s_addr), (u_long)ntohl(ip->ip_dst.s_addr), | |
1091 | (void *)ifp); | |
1092 | ||
1093 | if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || | |
1094 | (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { | |
1095 | /* | |
1096 | * Packet arrived via a physical interface or | |
1097 | * an encapsulated tunnel. | |
1098 | */ | |
1099 | } else { | |
1100 | /* | |
1101 | * Packet arrived through a source-route tunnel. | |
1102 | * Source-route tunnels are no longer supported. | |
1103 | */ | |
1104 | if ((srctun++ % 1000) == 0) | |
1105 | log(LOG_ERR, | |
1106 | "ip_mforward: received source-routed packet from %lx\n", | |
1107 | (u_long)ntohl(ip->ip_src.s_addr)); | |
1108 | ||
1109 | return 1; | |
1110 | } | |
1111 | ||
1112 | if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { | |
1113 | if (ip->ip_ttl < 255) | |
1114 | ip->ip_ttl++; /* compensate for -1 in *_send routines */ | |
1115 | if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { | |
1116 | vifp = viftable + vifi; | |
1117 | printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n", | |
1118 | ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi, | |
1119 | (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", | |
1120 | vifp->v_ifp->if_name, vifp->v_ifp->if_unit); | |
1121 | } | |
1122 | return (ip_mdq(m, ifp, NULL, vifi)); | |
1123 | } | |
1124 | if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { | |
1125 | printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", | |
1126 | ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); | |
1127 | if(!imo) | |
1128 | printf("In fact, no options were specified at all\n"); | |
1129 | } | |
1130 | ||
1131 | /* | |
1132 | * Don't forward a packet with time-to-live of zero or one, | |
1133 | * or a packet destined to a local-only group. | |
1134 | */ | |
1135 | if (ip->ip_ttl <= 1 || | |
1136 | ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) | |
1137 | return 0; | |
1138 | ||
1139 | /* | |
1140 | * Determine forwarding vifs from the forwarding cache table | |
1141 | */ | |
1142 | MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); | |
1143 | ||
1144 | /* Entry exists, so forward if necessary */ | |
1145 | if (rt != NULL) { | |
1146 | return (ip_mdq(m, ifp, rt, -1)); | |
1147 | } else { | |
1148 | /* | |
1149 | * If we don't have a route for packet's origin, | |
1150 | * Make a copy of the packet & | |
1151 | * send message to routing daemon | |
1152 | */ | |
1153 | ||
1154 | struct mbuf *mb0; | |
1155 | struct rtdetq *rte; | |
1156 | u_long hash; | |
1157 | int hlen = ip->ip_hl << 2; | |
1158 | #if UPCALL_TIMING | |
1159 | struct timeval tp; | |
1160 | ||
1161 | GET_TIME(tp); | |
1162 | #endif | |
1163 | ||
1164 | mrtstat.mrts_no_route++; | |
1165 | if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) | |
1166 | log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", | |
1167 | (u_long)ntohl(ip->ip_src.s_addr), | |
1168 | (u_long)ntohl(ip->ip_dst.s_addr)); | |
1169 | ||
1170 | /* | |
1171 | * Allocate mbufs early so that we don't do extra work if we are | |
1172 | * just going to fail anyway. Make sure to pullup the header so | |
1173 | * that other people can't step on it. | |
1174 | */ | |
1175 | rte = (struct rtdetq *) _MALLOC((sizeof *rte), M_MRTABLE, M_NOWAIT); | |
1176 | if (rte == NULL) { | |
1177 | return ENOBUFS; | |
1178 | } | |
1179 | mb0 = m_copy(m, 0, M_COPYALL); | |
1180 | if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) | |
1181 | mb0 = m_pullup(mb0, hlen); | |
1182 | if (mb0 == NULL) { | |
1183 | FREE(rte, M_MRTABLE); | |
1184 | return ENOBUFS; | |
1185 | } | |
1186 | ||
1187 | /* is there an upcall waiting for this packet? */ | |
1188 | hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); | |
1189 | for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { | |
1190 | if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && | |
1191 | (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && | |
1192 | (rt->mfc_stall != NULL)) | |
1193 | break; | |
1194 | } | |
1195 | ||
1196 | if (rt == NULL) { | |
1197 | int i; | |
1198 | struct igmpmsg *im; | |
1199 | ||
1200 | /* no upcall, so make a new entry */ | |
1201 | rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT); | |
1202 | if (rt == NULL) { | |
1203 | FREE(rte, M_MRTABLE); | |
1204 | m_freem(mb0); | |
1205 | return ENOBUFS; | |
1206 | } | |
1207 | /* Make a copy of the header to send to the user level process */ | |
1208 | mm = m_copy(mb0, 0, hlen); | |
1209 | if (mm == NULL) { | |
1210 | FREE(rte, M_MRTABLE); | |
1211 | m_freem(mb0); | |
1212 | FREE(rt, M_MRTABLE); | |
1213 | return ENOBUFS; | |
1214 | } | |
1215 | ||
1216 | /* | |
1217 | * Send message to routing daemon to install | |
1218 | * a route into the kernel table | |
1219 | */ | |
1220 | k_igmpsrc.sin_addr = ip->ip_src; | |
1221 | ||
1222 | im = mtod(mm, struct igmpmsg *); | |
1223 | im->im_msgtype = IGMPMSG_NOCACHE; | |
1224 | im->im_mbz = 0; | |
1225 | ||
1226 | mrtstat.mrts_upcalls++; | |
1227 | ||
1228 | if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { | |
1229 | log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); | |
1230 | ++mrtstat.mrts_upq_sockfull; | |
1231 | FREE(rte, M_MRTABLE); | |
1232 | m_freem(mb0); | |
1233 | FREE(rt, M_MRTABLE); | |
1234 | return ENOBUFS; | |
1235 | } | |
1236 | ||
1237 | /* insert new entry at head of hash chain */ | |
1238 | rt->mfc_origin.s_addr = ip->ip_src.s_addr; | |
1239 | rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; | |
1240 | rt->mfc_expire = UPCALL_EXPIRE; | |
1241 | nexpire[hash]++; | |
1242 | for (i = 0; i < numvifs; i++) | |
1243 | rt->mfc_ttls[i] = 0; | |
1244 | rt->mfc_parent = -1; | |
1245 | ||
1246 | /* link into table */ | |
1247 | rt->mfc_next = mfctable[hash]; | |
1248 | mfctable[hash] = rt; | |
1249 | rt->mfc_stall = rte; | |
1250 | ||
1251 | } else { | |
1252 | /* determine if q has overflowed */ | |
1253 | int npkts = 0; | |
1254 | struct rtdetq **p; | |
1255 | ||
1256 | for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) | |
1257 | npkts++; | |
1258 | ||
1259 | if (npkts > MAX_UPQ) { | |
1260 | mrtstat.mrts_upq_ovflw++; | |
1261 | FREE(rte, M_MRTABLE); | |
1262 | m_freem(mb0); | |
1263 | return 0; | |
1264 | } | |
1265 | ||
1266 | /* Add this entry to the end of the queue */ | |
1267 | *p = rte; | |
1268 | } | |
1269 | ||
1270 | rte->m = mb0; | |
1271 | rte->ifp = ifp; | |
1272 | #if UPCALL_TIMING | |
1273 | rte->t = tp; | |
1274 | #endif | |
1275 | rte->next = NULL; | |
1276 | ||
1277 | return 0; | |
1278 | } | |
1279 | } | |
1280 | ||
1281 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1282 | int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, | |
1283 | struct ip_moptions *) = X_ip_mforward; | |
1284 | #endif | |
1285 | ||
1286 | /* | |
1287 | * Clean up the cache entry if upcall is not serviced | |
1288 | */ | |
1289 | static void | |
1290 | expire_upcalls(__unused void *unused) | |
1291 | { | |
1292 | struct rtdetq *rte; | |
1293 | struct mfc *mfc, **nptr; | |
1294 | int i; | |
1295 | ||
1296 | for (i = 0; i < CONFIG_MFCTBLSIZ; i++) { | |
1297 | if (nexpire[i] == 0) | |
1298 | continue; | |
1299 | nptr = &mfctable[i]; | |
1300 | for (mfc = *nptr; mfc != NULL; mfc = *nptr) { | |
1301 | /* | |
1302 | * Skip real cache entries | |
1303 | * Make sure it wasn't marked to not expire (shouldn't happen) | |
1304 | * If it expires now | |
1305 | */ | |
1306 | if (mfc->mfc_stall != NULL && | |
1307 | mfc->mfc_expire != 0 && | |
1308 | --mfc->mfc_expire == 0) { | |
1309 | if (mrtdebug & DEBUG_EXPIRE) | |
1310 | log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", | |
1311 | (u_long)ntohl(mfc->mfc_origin.s_addr), | |
1312 | (u_long)ntohl(mfc->mfc_mcastgrp.s_addr)); | |
1313 | /* | |
1314 | * drop all the packets | |
1315 | * free the mbuf with the pkt, if, timing info | |
1316 | */ | |
1317 | for (rte = mfc->mfc_stall; rte; ) { | |
1318 | struct rtdetq *n = rte->next; | |
1319 | ||
1320 | m_freem(rte->m); | |
1321 | FREE(rte, M_MRTABLE); | |
1322 | rte = n; | |
1323 | } | |
1324 | ++mrtstat.mrts_cache_cleanups; | |
1325 | nexpire[i]--; | |
1326 | ||
1327 | *nptr = mfc->mfc_next; | |
1328 | FREE(mfc, M_MRTABLE); | |
1329 | } else { | |
1330 | nptr = &mfc->mfc_next; | |
1331 | } | |
1332 | } | |
1333 | } | |
1334 | timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); | |
1335 | } | |
1336 | ||
1337 | /* | |
1338 | * Packet forwarding routine once entry in the cache is made | |
1339 | */ | |
1340 | static int | |
1341 | ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, | |
1342 | vifi_t xmt_vif) | |
1343 | { | |
1344 | struct ip *ip = mtod(m, struct ip *); | |
1345 | vifi_t vifi; | |
1346 | struct vif *vifp; | |
1347 | int plen = ip->ip_len; | |
1348 | ||
1349 | /* | |
1350 | * Macro to send packet on vif. Since RSVP packets don't get counted on | |
1351 | * input, they shouldn't get counted on output, so statistics keeping is | |
1352 | * seperate. | |
1353 | */ | |
1354 | #define MC_SEND(ip,vifp,m) { \ | |
1355 | if ((vifp)->v_flags & VIFF_TUNNEL) \ | |
1356 | encap_send((ip), (vifp), (m)); \ | |
1357 | else \ | |
1358 | phyint_send((ip), (vifp), (m)); \ | |
1359 | } | |
1360 | ||
1361 | /* | |
1362 | * If xmt_vif is not -1, send on only the requested vif. | |
1363 | * | |
1364 | * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) | |
1365 | */ | |
1366 | if (xmt_vif < numvifs) { | |
1367 | MC_SEND(ip, viftable + xmt_vif, m); | |
1368 | return 1; | |
1369 | } | |
1370 | ||
1371 | /* | |
1372 | * Don't forward if it didn't arrive from the parent vif for its origin. | |
1373 | */ | |
1374 | vifi = rt->mfc_parent; | |
1375 | if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { | |
1376 | /* came in the wrong interface */ | |
1377 | if (mrtdebug & DEBUG_FORWARD) | |
1378 | log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", | |
1379 | (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); | |
1380 | ++mrtstat.mrts_wrong_if; | |
1381 | ++rt->mfc_wrong_if; | |
1382 | /* | |
1383 | * If we are doing PIM assert processing, and we are forwarding | |
1384 | * packets on this interface, and it is a broadcast medium | |
1385 | * interface (and not a tunnel), send a message to the routing daemon. | |
1386 | */ | |
1387 | if (pim_assert && rt->mfc_ttls[vifi] && | |
1388 | (ifp->if_flags & IFF_BROADCAST) && | |
1389 | !(viftable[vifi].v_flags & VIFF_TUNNEL)) { | |
1390 | struct sockaddr_in k_igmpsrc; | |
1391 | struct mbuf *mm; | |
1392 | struct igmpmsg *im; | |
1393 | int hlen = ip->ip_hl << 2; | |
1394 | struct timeval now; | |
1395 | u_long delta; | |
1396 | ||
1397 | GET_TIME(now); | |
1398 | ||
1399 | TV_DELTA(rt->mfc_last_assert, now, delta); | |
1400 | ||
1401 | if (delta > ASSERT_MSG_TIME) { | |
1402 | mm = m_copy(m, 0, hlen); | |
1403 | if (mm && (M_HASCL(mm) || mm->m_len < hlen)) | |
1404 | mm = m_pullup(mm, hlen); | |
1405 | if (mm == NULL) { | |
1406 | return ENOBUFS; | |
1407 | } | |
1408 | ||
1409 | rt->mfc_last_assert = now; | |
1410 | ||
1411 | im = mtod(mm, struct igmpmsg *); | |
1412 | im->im_msgtype = IGMPMSG_WRONGVIF; | |
1413 | im->im_mbz = 0; | |
1414 | im->im_vif = vifi; | |
1415 | ||
1416 | k_igmpsrc.sin_addr = im->im_src; | |
1417 | ||
1418 | socket_send(ip_mrouter, mm, &k_igmpsrc); | |
1419 | } | |
1420 | } | |
1421 | return 0; | |
1422 | } | |
1423 | ||
1424 | /* If I sourced this packet, it counts as output, else it was input. */ | |
1425 | if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { | |
1426 | viftable[vifi].v_pkt_out++; | |
1427 | viftable[vifi].v_bytes_out += plen; | |
1428 | } else { | |
1429 | viftable[vifi].v_pkt_in++; | |
1430 | viftable[vifi].v_bytes_in += plen; | |
1431 | } | |
1432 | rt->mfc_pkt_cnt++; | |
1433 | rt->mfc_byte_cnt += plen; | |
1434 | ||
1435 | /* | |
1436 | * For each vif, decide if a copy of the packet should be forwarded. | |
1437 | * Forward if: | |
1438 | * - the ttl exceeds the vif's threshold | |
1439 | * - there are group members downstream on interface | |
1440 | */ | |
1441 | for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) | |
1442 | if ((rt->mfc_ttls[vifi] > 0) && | |
1443 | (ip->ip_ttl > rt->mfc_ttls[vifi])) { | |
1444 | vifp->v_pkt_out++; | |
1445 | vifp->v_bytes_out += plen; | |
1446 | MC_SEND(ip, vifp, m); | |
1447 | } | |
1448 | ||
1449 | return 0; | |
1450 | } | |
1451 | ||
1452 | /* | |
1453 | * check if a vif number is legal/ok. This is used by ip_output, to export | |
1454 | * numvifs there, | |
1455 | */ | |
1456 | static int | |
1457 | X_legal_vif_num(int vif) | |
1458 | { | |
1459 | if (vif >= 0 && vif < numvifs) | |
1460 | return(1); | |
1461 | else | |
1462 | return(0); | |
1463 | } | |
1464 | ||
1465 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1466 | int (*legal_vif_num)(int) = X_legal_vif_num; | |
1467 | #endif | |
1468 | ||
1469 | /* | |
1470 | * Return the local address used by this vif | |
1471 | */ | |
1472 | static u_long | |
1473 | X_ip_mcast_src(int vifi) | |
1474 | { | |
1475 | if (vifi >= 0 && vifi < numvifs) | |
1476 | return viftable[vifi].v_lcl_addr.s_addr; | |
1477 | else | |
1478 | return INADDR_ANY; | |
1479 | } | |
1480 | ||
1481 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1482 | u_long (*ip_mcast_src)(int) = X_ip_mcast_src; | |
1483 | #endif | |
1484 | ||
1485 | static void | |
1486 | phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) | |
1487 | { | |
1488 | struct mbuf *mb_copy; | |
1489 | int hlen = ip->ip_hl << 2; | |
1490 | ||
1491 | /* | |
1492 | * Make a new reference to the packet; make sure that | |
1493 | * the IP header is actually copied, not just referenced, | |
1494 | * so that ip_output() only scribbles on the copy. | |
1495 | */ | |
1496 | mb_copy = m_copy(m, 0, M_COPYALL); | |
1497 | if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) | |
1498 | mb_copy = m_pullup(mb_copy, hlen); | |
1499 | if (mb_copy == NULL) | |
1500 | return; | |
1501 | ||
1502 | if (vifp->v_rate_limit == 0) | |
1503 | tbf_send_packet(vifp, mb_copy); | |
1504 | else | |
1505 | tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); | |
1506 | } | |
1507 | ||
1508 | static void | |
1509 | encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) | |
1510 | { | |
1511 | struct mbuf *mb_copy; | |
1512 | struct ip *ip_copy; | |
1513 | int i, len = ip->ip_len; | |
1514 | ||
1515 | /* | |
1516 | * copy the old packet & pullup its IP header into the | |
1517 | * new mbuf so we can modify it. Try to fill the new | |
1518 | * mbuf since if we don't the ethernet driver will. | |
1519 | */ | |
1520 | MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); | |
1521 | if (mb_copy == NULL) | |
1522 | return; | |
1523 | #if CONFIG_MACF_NET | |
1524 | mac_mbuf_label_associate_multicast_encap(m, vifp->v_ifp, mb_copy); | |
1525 | #endif | |
1526 | mb_copy->m_data += max_linkhdr; | |
1527 | mb_copy->m_len = sizeof(multicast_encap_iphdr); | |
1528 | ||
1529 | if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { | |
1530 | m_freem(mb_copy); | |
1531 | return; | |
1532 | } | |
1533 | i = MHLEN - M_LEADINGSPACE(mb_copy); | |
1534 | if (i > len) | |
1535 | i = len; | |
1536 | mb_copy = m_pullup(mb_copy, i); | |
1537 | if (mb_copy == NULL) | |
1538 | return; | |
1539 | mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); | |
1540 | ||
1541 | /* | |
1542 | * fill in the encapsulating IP header. | |
1543 | */ | |
1544 | ip_copy = mtod(mb_copy, struct ip *); | |
1545 | *ip_copy = multicast_encap_iphdr; | |
1546 | #if RANDOM_IP_ID | |
1547 | ip_copy->ip_id = ip_randomid(); | |
1548 | #else | |
1549 | ip_copy->ip_id = htons(ip_id++); | |
1550 | #endif | |
1551 | ip_copy->ip_len += len; | |
1552 | ip_copy->ip_src = vifp->v_lcl_addr; | |
1553 | ip_copy->ip_dst = vifp->v_rmt_addr; | |
1554 | ||
1555 | /* | |
1556 | * turn the encapsulated IP header back into a valid one. | |
1557 | */ | |
1558 | ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); | |
1559 | --ip->ip_ttl; | |
1560 | HTONS(ip->ip_len); | |
1561 | HTONS(ip->ip_off); | |
1562 | ip->ip_sum = 0; | |
1563 | mb_copy->m_data += sizeof(multicast_encap_iphdr); | |
1564 | ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); | |
1565 | mb_copy->m_data -= sizeof(multicast_encap_iphdr); | |
1566 | ||
1567 | if (vifp->v_rate_limit == 0) | |
1568 | tbf_send_packet(vifp, mb_copy); | |
1569 | else | |
1570 | tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); | |
1571 | } | |
1572 | ||
1573 | /* | |
1574 | * De-encapsulate a packet and feed it back through ip input (this | |
1575 | * routine is called whenever IP gets a packet with proto type | |
1576 | * ENCAP_PROTO and a local destination address). | |
1577 | */ | |
1578 | void | |
1579 | #if MROUTE_LKM | |
1580 | X_ipip_input(struct mbuf *m, int iphlen) | |
1581 | #else | |
1582 | ipip_input(struct mbuf *m, int iphlen) | |
1583 | #endif | |
1584 | { | |
1585 | struct ifnet *ifp = m->m_pkthdr.rcvif; | |
1586 | struct ip *ip = mtod(m, struct ip *); | |
1587 | int hlen = ip->ip_hl << 2; | |
1588 | struct vif *vifp; | |
1589 | ||
1590 | if (!have_encap_tunnel) { | |
1591 | rip_input(m, iphlen); | |
1592 | return; | |
1593 | } | |
1594 | /* | |
1595 | * dump the packet if it's not to a multicast destination or if | |
1596 | * we don't have an encapsulating tunnel with the source. | |
1597 | * Note: This code assumes that the remote site IP address | |
1598 | * uniquely identifies the tunnel (i.e., that this site has | |
1599 | * at most one tunnel with the remote site). | |
1600 | */ | |
1601 | if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { | |
1602 | ++mrtstat.mrts_bad_tunnel; | |
1603 | m_freem(m); | |
1604 | return; | |
1605 | } | |
1606 | if (ip->ip_src.s_addr != last_encap_src) { | |
1607 | struct vif *vife; | |
1608 | ||
1609 | vifp = viftable; | |
1610 | vife = vifp + numvifs; | |
1611 | last_encap_src = ip->ip_src.s_addr; | |
1612 | last_encap_vif = 0; | |
1613 | for ( ; vifp < vife; ++vifp) | |
1614 | if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { | |
1615 | if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) | |
1616 | == VIFF_TUNNEL) | |
1617 | last_encap_vif = vifp; | |
1618 | break; | |
1619 | } | |
1620 | } | |
1621 | if ((vifp = last_encap_vif) == 0) { | |
1622 | last_encap_src = 0; | |
1623 | mrtstat.mrts_cant_tunnel++; /*XXX*/ | |
1624 | m_freem(m); | |
1625 | if (mrtdebug) | |
1626 | log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n", | |
1627 | (u_long)ntohl(ip->ip_src.s_addr)); | |
1628 | return; | |
1629 | } | |
1630 | ifp = vifp->v_ifp; | |
1631 | ||
1632 | if (hlen > IP_HDR_LEN) | |
1633 | ip_stripoptions(m, (struct mbuf *) 0); | |
1634 | m->m_data += IP_HDR_LEN; | |
1635 | m->m_len -= IP_HDR_LEN; | |
1636 | m->m_pkthdr.len -= IP_HDR_LEN; | |
1637 | m->m_pkthdr.rcvif = ifp; | |
1638 | ||
1639 | proto_inject(PF_INET, m); | |
1640 | } | |
1641 | ||
1642 | /* | |
1643 | * Token bucket filter module | |
1644 | */ | |
1645 | ||
1646 | static void | |
1647 | tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, | |
1648 | u_long p_len) | |
1649 | { | |
1650 | struct tbf *t = vifp->v_tbf; | |
1651 | ||
1652 | if (p_len > MAX_BKT_SIZE) { | |
1653 | /* drop if packet is too large */ | |
1654 | mrtstat.mrts_pkt2large++; | |
1655 | m_freem(m); | |
1656 | return; | |
1657 | } | |
1658 | ||
1659 | tbf_update_tokens(vifp); | |
1660 | ||
1661 | /* if there are enough tokens, | |
1662 | * and the queue is empty, | |
1663 | * send this packet out | |
1664 | */ | |
1665 | ||
1666 | if (t->tbf_q_len == 0) { | |
1667 | /* queue empty, send packet if enough tokens */ | |
1668 | if (p_len <= t->tbf_n_tok) { | |
1669 | t->tbf_n_tok -= p_len; | |
1670 | tbf_send_packet(vifp, m); | |
1671 | } else { | |
1672 | /* queue packet and timeout till later */ | |
1673 | tbf_queue(vifp, m); | |
1674 | timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); | |
1675 | } | |
1676 | } else if (t->tbf_q_len < t->tbf_max_q_len) { | |
1677 | /* finite queue length, so queue pkts and process queue */ | |
1678 | tbf_queue(vifp, m); | |
1679 | tbf_process_q(vifp); | |
1680 | } else { | |
1681 | /* queue length too much, try to dq and queue and process */ | |
1682 | if (!tbf_dq_sel(vifp, ip)) { | |
1683 | mrtstat.mrts_q_overflow++; | |
1684 | m_freem(m); | |
1685 | return; | |
1686 | } else { | |
1687 | tbf_queue(vifp, m); | |
1688 | tbf_process_q(vifp); | |
1689 | } | |
1690 | } | |
1691 | return; | |
1692 | } | |
1693 | ||
1694 | /* | |
1695 | * adds a packet to the queue at the interface | |
1696 | */ | |
1697 | static void | |
1698 | tbf_queue(struct vif *vifp, struct mbuf *m) | |
1699 | { | |
1700 | struct tbf *t = vifp->v_tbf; | |
1701 | ||
1702 | if (t->tbf_t == NULL) { | |
1703 | /* Queue was empty */ | |
1704 | t->tbf_q = m; | |
1705 | } else { | |
1706 | /* Insert at tail */ | |
1707 | t->tbf_t->m_act = m; | |
1708 | } | |
1709 | ||
1710 | /* Set new tail pointer */ | |
1711 | t->tbf_t = m; | |
1712 | ||
1713 | #if DIAGNOSTIC | |
1714 | /* Make sure we didn't get fed a bogus mbuf */ | |
1715 | if (m->m_act) | |
1716 | panic("tbf_queue: m_act"); | |
1717 | #endif | |
1718 | m->m_act = NULL; | |
1719 | ||
1720 | t->tbf_q_len++; | |
1721 | } | |
1722 | ||
1723 | ||
1724 | /* | |
1725 | * processes the queue at the interface | |
1726 | */ | |
1727 | static void | |
1728 | tbf_process_q(struct vif *vifp) | |
1729 | { | |
1730 | struct mbuf *m; | |
1731 | int len; | |
1732 | struct tbf *t = vifp->v_tbf; | |
1733 | ||
1734 | /* loop through the queue at the interface and send as many packets | |
1735 | * as possible | |
1736 | */ | |
1737 | while (t->tbf_q_len > 0) { | |
1738 | m = t->tbf_q; | |
1739 | ||
1740 | len = mtod(m, struct ip *)->ip_len; | |
1741 | ||
1742 | /* determine if the packet can be sent */ | |
1743 | if (len <= t->tbf_n_tok) { | |
1744 | /* if so, | |
1745 | * reduce no of tokens, dequeue the packet, | |
1746 | * send the packet. | |
1747 | */ | |
1748 | t->tbf_n_tok -= len; | |
1749 | ||
1750 | t->tbf_q = m->m_act; | |
1751 | if (--t->tbf_q_len == 0) | |
1752 | t->tbf_t = NULL; | |
1753 | ||
1754 | m->m_act = NULL; | |
1755 | tbf_send_packet(vifp, m); | |
1756 | ||
1757 | } else break; | |
1758 | } | |
1759 | } | |
1760 | ||
1761 | static void | |
1762 | tbf_reprocess_q(void *xvifp) | |
1763 | { | |
1764 | struct vif *vifp = xvifp; | |
1765 | ||
1766 | if (ip_mrouter == NULL) { | |
1767 | return; | |
1768 | } | |
1769 | ||
1770 | tbf_update_tokens(vifp); | |
1771 | ||
1772 | tbf_process_q(vifp); | |
1773 | ||
1774 | if (vifp->v_tbf->tbf_q_len) | |
1775 | timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); | |
1776 | } | |
1777 | ||
1778 | /* function that will selectively discard a member of the queue | |
1779 | * based on the precedence value and the priority | |
1780 | */ | |
1781 | static int | |
1782 | tbf_dq_sel(struct vif *vifp, struct ip *ip) | |
1783 | { | |
1784 | u_int p; | |
1785 | struct mbuf *m, *last; | |
1786 | struct mbuf **np; | |
1787 | struct tbf *t = vifp->v_tbf; | |
1788 | ||
1789 | p = priority(vifp, ip); | |
1790 | ||
1791 | np = &t->tbf_q; | |
1792 | last = NULL; | |
1793 | while ((m = *np) != NULL) { | |
1794 | if (p > priority(vifp, mtod(m, struct ip *))) { | |
1795 | *np = m->m_act; | |
1796 | /* If we're removing the last packet, fix the tail pointer */ | |
1797 | if (m == t->tbf_t) | |
1798 | t->tbf_t = last; | |
1799 | m_freem(m); | |
1800 | /* it's impossible for the queue to be empty, but | |
1801 | * we check anyway. */ | |
1802 | if (--t->tbf_q_len == 0) | |
1803 | t->tbf_t = NULL; | |
1804 | mrtstat.mrts_drop_sel++; | |
1805 | return(1); | |
1806 | } | |
1807 | np = &m->m_act; | |
1808 | last = m; | |
1809 | } | |
1810 | return(0); | |
1811 | } | |
1812 | ||
1813 | static void | |
1814 | tbf_send_packet(struct vif *vifp, struct mbuf *m) | |
1815 | { | |
1816 | struct ip_moptions imo; | |
1817 | int error; | |
1818 | static struct route ro; | |
1819 | ||
1820 | if (vifp->v_flags & VIFF_TUNNEL) { | |
1821 | /* If tunnel options */ | |
1822 | ip_output(m, (struct mbuf *)0, &vifp->v_route, | |
1823 | IP_FORWARDING, (struct ip_moptions *)0, NULL); | |
1824 | } else { | |
1825 | imo.imo_multicast_ifp = vifp->v_ifp; | |
1826 | imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; | |
1827 | imo.imo_multicast_loop = 1; | |
1828 | imo.imo_multicast_vif = -1; | |
1829 | ||
1830 | /* | |
1831 | * Re-entrancy should not be a problem here, because | |
1832 | * the packets that we send out and are looped back at us | |
1833 | * should get rejected because they appear to come from | |
1834 | * the loopback interface, thus preventing looping. | |
1835 | */ | |
1836 | error = ip_output(m, (struct mbuf *)0, &ro, | |
1837 | IP_FORWARDING, &imo, NULL); | |
1838 | ||
1839 | if (mrtdebug & DEBUG_XMIT) | |
1840 | log(LOG_DEBUG, "phyint_send on vif %d err %d\n", | |
1841 | vifp - viftable, error); | |
1842 | } | |
1843 | } | |
1844 | ||
1845 | /* determine the current time and then | |
1846 | * the elapsed time (between the last time and time now) | |
1847 | * in milliseconds & update the no. of tokens in the bucket | |
1848 | */ | |
1849 | static void | |
1850 | tbf_update_tokens(struct vif *vifp) | |
1851 | { | |
1852 | struct timeval tp; | |
1853 | u_long tm; | |
1854 | struct tbf *t = vifp->v_tbf; | |
1855 | ||
1856 | GET_TIME(tp); | |
1857 | ||
1858 | TV_DELTA(tp, t->tbf_last_pkt_t, tm); | |
1859 | ||
1860 | /* | |
1861 | * This formula is actually | |
1862 | * "time in seconds" * "bytes/second". | |
1863 | * | |
1864 | * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) | |
1865 | * | |
1866 | * The (1000/1024) was introduced in add_vif to optimize | |
1867 | * this divide into a shift. | |
1868 | */ | |
1869 | t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; | |
1870 | t->tbf_last_pkt_t = tp; | |
1871 | ||
1872 | if (t->tbf_n_tok > MAX_BKT_SIZE) | |
1873 | t->tbf_n_tok = MAX_BKT_SIZE; | |
1874 | } | |
1875 | ||
1876 | static int | |
1877 | priority(__unused struct vif *vifp, struct ip *ip) | |
1878 | { | |
1879 | int prio; | |
1880 | ||
1881 | /* temporary hack; may add general packet classifier some day */ | |
1882 | ||
1883 | /* | |
1884 | * The UDP port space is divided up into four priority ranges: | |
1885 | * [0, 16384) : unclassified - lowest priority | |
1886 | * [16384, 32768) : audio - highest priority | |
1887 | * [32768, 49152) : whiteboard - medium priority | |
1888 | * [49152, 65536) : video - low priority | |
1889 | */ | |
1890 | if (ip->ip_p == IPPROTO_UDP) { | |
1891 | struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); | |
1892 | switch (ntohs(udp->uh_dport) & 0xc000) { | |
1893 | case 0x4000: | |
1894 | prio = 70; | |
1895 | break; | |
1896 | case 0x8000: | |
1897 | prio = 60; | |
1898 | break; | |
1899 | case 0xc000: | |
1900 | prio = 55; | |
1901 | break; | |
1902 | default: | |
1903 | prio = 50; | |
1904 | break; | |
1905 | } | |
1906 | if (tbfdebug > 1) | |
1907 | log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio); | |
1908 | } else { | |
1909 | prio = 50; | |
1910 | } | |
1911 | return prio; | |
1912 | } | |
1913 | ||
1914 | /* | |
1915 | * End of token bucket filter modifications | |
1916 | */ | |
1917 | ||
1918 | int | |
1919 | ip_rsvp_vif_init(struct socket *so, struct sockopt *sopt) | |
1920 | { | |
1921 | int error, i; | |
1922 | ||
1923 | if (rsvpdebug) | |
1924 | printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", | |
1925 | so->so_type, so->so_proto->pr_protocol); | |
1926 | ||
1927 | if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) | |
1928 | return EOPNOTSUPP; | |
1929 | ||
1930 | /* Check mbuf. */ | |
1931 | error = sooptcopyin(sopt, &i, sizeof i, sizeof i); | |
1932 | if (error) | |
1933 | return (error); | |
1934 | ||
1935 | if (rsvpdebug) | |
1936 | printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on); | |
1937 | ||
1938 | /* Check vif. */ | |
1939 | if (!legal_vif_num(i)) { | |
1940 | return EADDRNOTAVAIL; | |
1941 | } | |
1942 | ||
1943 | /* Check if socket is available. */ | |
1944 | if (viftable[i].v_rsvpd != NULL) { | |
1945 | return EADDRINUSE; | |
1946 | } | |
1947 | ||
1948 | viftable[i].v_rsvpd = so; | |
1949 | /* This may seem silly, but we need to be sure we don't over-increment | |
1950 | * the RSVP counter, in case something slips up. | |
1951 | */ | |
1952 | if (!viftable[i].v_rsvp_on) { | |
1953 | viftable[i].v_rsvp_on = 1; | |
1954 | rsvp_on++; | |
1955 | } | |
1956 | ||
1957 | return 0; | |
1958 | } | |
1959 | ||
1960 | int | |
1961 | ip_rsvp_vif_done(struct socket *so, struct sockopt *sopt) | |
1962 | { | |
1963 | int error, i; | |
1964 | ||
1965 | if (rsvpdebug) | |
1966 | printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", | |
1967 | so->so_type, so->so_proto->pr_protocol); | |
1968 | ||
1969 | if (so->so_type != SOCK_RAW || | |
1970 | so->so_proto->pr_protocol != IPPROTO_RSVP) | |
1971 | return EOPNOTSUPP; | |
1972 | ||
1973 | error = sooptcopyin(sopt, &i, sizeof i, sizeof i); | |
1974 | if (error) | |
1975 | return (error); | |
1976 | ||
1977 | /* Check vif. */ | |
1978 | if (!legal_vif_num(i)) { | |
1979 | return EADDRNOTAVAIL; | |
1980 | } | |
1981 | ||
1982 | if (rsvpdebug) | |
1983 | printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n", | |
1984 | viftable[i].v_rsvpd, so); | |
1985 | ||
1986 | viftable[i].v_rsvpd = NULL; | |
1987 | /* | |
1988 | * This may seem silly, but we need to be sure we don't over-decrement | |
1989 | * the RSVP counter, in case something slips up. | |
1990 | */ | |
1991 | if (viftable[i].v_rsvp_on) { | |
1992 | viftable[i].v_rsvp_on = 0; | |
1993 | rsvp_on--; | |
1994 | } | |
1995 | ||
1996 | return 0; | |
1997 | } | |
1998 | ||
1999 | void | |
2000 | ip_rsvp_force_done(struct socket *so) | |
2001 | { | |
2002 | int vifi; | |
2003 | ||
2004 | /* Don't bother if it is not the right type of socket. */ | |
2005 | if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) | |
2006 | return; | |
2007 | ||
2008 | /* The socket may be attached to more than one vif...this | |
2009 | * is perfectly legal. | |
2010 | */ | |
2011 | for (vifi = 0; vifi < numvifs; vifi++) { | |
2012 | if (viftable[vifi].v_rsvpd == so) { | |
2013 | viftable[vifi].v_rsvpd = NULL; | |
2014 | /* This may seem silly, but we need to be sure we don't | |
2015 | * over-decrement the RSVP counter, in case something slips up. | |
2016 | */ | |
2017 | if (viftable[vifi].v_rsvp_on) { | |
2018 | viftable[vifi].v_rsvp_on = 0; | |
2019 | rsvp_on--; | |
2020 | } | |
2021 | } | |
2022 | } | |
2023 | ||
2024 | return; | |
2025 | } | |
2026 | ||
2027 | void | |
2028 | rsvp_input(struct mbuf *m, int iphlen) | |
2029 | { | |
2030 | int vifi; | |
2031 | struct ip *ip = mtod(m, struct ip *); | |
2032 | static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET, | |
2033 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
2034 | struct ifnet *ifp; | |
2035 | ||
2036 | if (rsvpdebug) | |
2037 | printf("rsvp_input: rsvp_on %d\n",rsvp_on); | |
2038 | ||
2039 | /* Can still get packets with rsvp_on = 0 if there is a local member | |
2040 | * of the group to which the RSVP packet is addressed. But in this | |
2041 | * case we want to throw the packet away. | |
2042 | */ | |
2043 | if (!rsvp_on) { | |
2044 | m_freem(m); | |
2045 | return; | |
2046 | } | |
2047 | ||
2048 | if (rsvpdebug) | |
2049 | printf("rsvp_input: check vifs\n"); | |
2050 | ||
2051 | #if DIAGNOSTIC | |
2052 | if (!(m->m_flags & M_PKTHDR)) | |
2053 | panic("rsvp_input no hdr"); | |
2054 | #endif | |
2055 | ||
2056 | ifp = m->m_pkthdr.rcvif; | |
2057 | /* Find which vif the packet arrived on. */ | |
2058 | for (vifi = 0; vifi < numvifs; vifi++) | |
2059 | if (viftable[vifi].v_ifp == ifp) | |
2060 | break; | |
2061 | ||
2062 | if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { | |
2063 | /* | |
2064 | * If the old-style non-vif-associated socket is set, | |
2065 | * then use it. Otherwise, drop packet since there | |
2066 | * is no specific socket for this vif. | |
2067 | */ | |
2068 | if (ip_rsvpd != NULL) { | |
2069 | if (rsvpdebug) | |
2070 | printf("rsvp_input: Sending packet up old-style socket\n"); | |
2071 | rip_input(m, iphlen); /* xxx */ | |
2072 | } else { | |
2073 | if (rsvpdebug && vifi == numvifs) | |
2074 | printf("rsvp_input: Can't find vif for packet.\n"); | |
2075 | else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) | |
2076 | printf("rsvp_input: No socket defined for vif %d\n",vifi); | |
2077 | m_freem(m); | |
2078 | } | |
2079 | return; | |
2080 | } | |
2081 | rsvp_src.sin_addr = ip->ip_src; | |
2082 | ||
2083 | if (rsvpdebug && m) | |
2084 | printf("rsvp_input: m->m_len = %ld, sbspace() = %ld\n", | |
2085 | m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); | |
2086 | ||
2087 | if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { | |
2088 | if (rsvpdebug) | |
2089 | printf("rsvp_input: Failed to append to socket\n"); | |
2090 | } else { | |
2091 | if (rsvpdebug) | |
2092 | printf("rsvp_input: send packet up\n"); | |
2093 | } | |
2094 | ||
2095 | } | |
2096 | ||
2097 | #if MROUTE_LKM | |
2098 | #include <sys/conf.h> | |
2099 | #include <sys/exec.h> | |
2100 | #include <sys/sysent.h> | |
2101 | #include <sys/lkm.h> | |
2102 | ||
2103 | MOD_MISC("ip_mroute_mod") | |
2104 | ||
2105 | static int | |
2106 | ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) | |
2107 | { | |
2108 | int i; | |
2109 | struct lkm_misc *args = lkmtp->private.lkm_misc; | |
2110 | int err = 0; | |
2111 | ||
2112 | switch(cmd) { | |
2113 | static int (*old_ip_mrouter_cmd)(); | |
2114 | static int (*old_ip_mrouter_done)(); | |
2115 | static int (*old_ip_mforward)(); | |
2116 | static int (*old_mrt_ioctl)(); | |
2117 | static void (*old_proto4_input)(); | |
2118 | static int (*old_legal_vif_num)(); | |
2119 | extern struct protosw inetsw[]; | |
2120 | ||
2121 | case LKM_E_LOAD: | |
2122 | if(lkmexists(lkmtp) || ip_mrtproto) | |
2123 | return(EEXIST); | |
2124 | old_ip_mrouter_cmd = ip_mrouter_cmd; | |
2125 | ip_mrouter_cmd = X_ip_mrouter_cmd; | |
2126 | old_ip_mrouter_done = ip_mrouter_done; | |
2127 | ip_mrouter_done = X_ip_mrouter_done; | |
2128 | old_ip_mforward = ip_mforward; | |
2129 | ip_mforward = X_ip_mforward; | |
2130 | old_mrt_ioctl = mrt_ioctl; | |
2131 | mrt_ioctl = X_mrt_ioctl; | |
2132 | old_proto4_input = ip_protox[ENCAP_PROTO]->pr_input; | |
2133 | ip_protox[ENCAP_PROTO]->pr_input = X_ipip_input; | |
2134 | old_legal_vif_num = legal_vif_num; | |
2135 | legal_vif_num = X_legal_vif_num; | |
2136 | ip_mrtproto = IGMP_DVMRP; | |
2137 | ||
2138 | printf("\nIP multicast routing loaded\n"); | |
2139 | break; | |
2140 | ||
2141 | case LKM_E_UNLOAD: | |
2142 | if (ip_mrouter) | |
2143 | return EINVAL; | |
2144 | ||
2145 | ip_mrouter_cmd = old_ip_mrouter_cmd; | |
2146 | ip_mrouter_done = old_ip_mrouter_done; | |
2147 | ip_mforward = old_ip_mforward; | |
2148 | mrt_ioctl = old_mrt_ioctl; | |
2149 | ip_protox[ENCAP_PROTO]->pr_input = old_proto4_input; | |
2150 | legal_vif_num = old_legal_vif_num; | |
2151 | ip_mrtproto = 0; | |
2152 | break; | |
2153 | ||
2154 | default: | |
2155 | err = EINVAL; | |
2156 | break; | |
2157 | } | |
2158 | ||
2159 | return(err); | |
2160 | } | |
2161 | ||
2162 | int | |
2163 | ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { | |
2164 | DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, | |
2165 | nosys); | |
2166 | } | |
2167 | ||
2168 | #endif /* MROUTE_LKM */ | |
2169 | #endif /* MROUTING */ |