]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2000-2010 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* | |
29 | * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce | |
30 | * support for mandatory and extensible security protections. This notice | |
31 | * is included in support of clause 2.2 (b) of the Apple Public License, | |
32 | * Version 2.0. | |
33 | */ | |
34 | /* | |
35 | * IP multicast forwarding procedures | |
36 | * | |
37 | * Written by David Waitzman, BBN Labs, August 1988. | |
38 | * Modified by Steve Deering, Stanford, February 1989. | |
39 | * Modified by Mark J. Steiglitz, Stanford, May, 1991 | |
40 | * Modified by Van Jacobson, LBL, January 1993 | |
41 | * Modified by Ajit Thyagarajan, PARC, August 1993 | |
42 | * Modified by Bill Fenner, PARC, April 1995 | |
43 | * | |
44 | * MROUTING Revision: 3.5 | |
45 | * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.56.2.2 2001/07/19 06:37:26 kris Exp $ | |
46 | */ | |
47 | ||
48 | ||
49 | #include <sys/param.h> | |
50 | #include <sys/systm.h> | |
51 | #include <sys/malloc.h> | |
52 | #include <sys/mbuf.h> | |
53 | #include <sys/socket.h> | |
54 | #include <sys/socketvar.h> | |
55 | #include <sys/protosw.h> | |
56 | #include <sys/time.h> | |
57 | #include <sys/kernel.h> | |
58 | #include <sys/sockio.h> | |
59 | #include <sys/syslog.h> | |
60 | ||
61 | #include <machine/endian.h> | |
62 | ||
63 | #include <net/if.h> | |
64 | #include <net/route.h> | |
65 | #include <net/kpi_protocol.h> | |
66 | #include <netinet/in.h> | |
67 | #include <netinet/in_systm.h> | |
68 | #include <netinet/ip.h> | |
69 | #include <netinet/ip_var.h> | |
70 | #include <netinet/in_var.h> | |
71 | #include <netinet/igmp.h> | |
72 | #include <netinet/ip_mroute.h> | |
73 | #include <netinet/udp.h> | |
74 | ||
75 | #if CONFIG_MACF_NET | |
76 | #include <security/mac_framework.h> | |
77 | #endif | |
78 | ||
79 | ||
80 | #if !MROUTING | |
81 | extern u_int32_t _ip_mcast_src(int vifi); | |
82 | extern int _ip_mforward(struct ip *ip, struct ifnet *ifp, | |
83 | struct mbuf *m, struct ip_moptions *imo); | |
84 | extern int _ip_mrouter_done(void); | |
85 | extern int _ip_mrouter_get(struct socket *so, struct sockopt *sopt); | |
86 | extern int _ip_mrouter_set(struct socket *so, struct sockopt *sopt); | |
87 | extern int _mrt_ioctl(int req, caddr_t data, struct proc *p); | |
88 | ||
89 | /* | |
90 | * Dummy routines and globals used when multicast routing is not compiled in. | |
91 | */ | |
92 | ||
93 | struct socket *ip_mrouter = NULL; | |
94 | u_int rsvpdebug = 0; | |
95 | ||
96 | int | |
97 | _ip_mrouter_set(__unused struct socket *so, | |
98 | __unused struct sockopt *sopt) | |
99 | { | |
100 | return(EOPNOTSUPP); | |
101 | } | |
102 | ||
103 | int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set; | |
104 | ||
105 | ||
106 | int | |
107 | _ip_mrouter_get(__unused struct socket *so, | |
108 | __unused sockopt *sopt) | |
109 | { | |
110 | return(EOPNOTSUPP); | |
111 | } | |
112 | ||
113 | int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get; | |
114 | ||
115 | int | |
116 | _ip_mrouter_done(void) | |
117 | { | |
118 | return(0); | |
119 | } | |
120 | ||
121 | int (*ip_mrouter_done)(void) = _ip_mrouter_done; | |
122 | ||
123 | int | |
124 | _ip_mforward(__unused struct ip *ip, __unused struct ifnet *ifp, | |
125 | __unused struct mbuf *m, __unused ip_moptions *imo) | |
126 | { | |
127 | return(0); | |
128 | } | |
129 | ||
130 | int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, | |
131 | struct ip_moptions *) = _ip_mforward; | |
132 | ||
133 | int | |
134 | _mrt_ioctl(__unused u_long req, __unused caddr_t data, __unused struct proc *p) | |
135 | { | |
136 | return EOPNOTSUPP; | |
137 | } | |
138 | ||
139 | int (*mrt_ioctl)(u_long, caddr_t, struct proc *) = _mrt_ioctl; | |
140 | ||
141 | void | |
142 | rsvp_input(struct mbuf *m, int iphlen) /* XXX must fixup manually */ | |
143 | { | |
144 | /* Can still get packets with rsvp_on = 0 if there is a local member | |
145 | * of the group to which the RSVP packet is addressed. But in this | |
146 | * case we want to throw the packet away. | |
147 | */ | |
148 | if (!rsvp_on) { | |
149 | m_freem(m); | |
150 | return; | |
151 | } | |
152 | ||
153 | if (ip_rsvpd != NULL) { | |
154 | if (rsvpdebug) | |
155 | printf("rsvp_input: Sending packet up old-style socket\n"); | |
156 | rip_input(m, iphlen); | |
157 | return; | |
158 | } | |
159 | /* Drop the packet */ | |
160 | m_freem(m); | |
161 | } | |
162 | ||
163 | void ipip_input(struct mbuf *m, int iphlen) { /* XXX must fixup manually */ | |
164 | rip_input(m, iphlen); | |
165 | } | |
166 | ||
167 | int (*legal_vif_num)(int) = 0; | |
168 | ||
169 | /* | |
170 | * This should never be called, since IP_MULTICAST_VIF should fail, but | |
171 | * just in case it does get called, the code a little lower in ip_output | |
172 | * will assign the packet a local address. | |
173 | */ | |
174 | u_int32_t | |
175 | _ip_mcast_src(int vifi) { return INADDR_ANY; } | |
176 | u_int32_t (*ip_mcast_src)(int) = _ip_mcast_src; | |
177 | ||
178 | int | |
179 | ip_rsvp_vif_init(so, sopt) | |
180 | struct socket *so; | |
181 | struct sockopt *sopt; | |
182 | { | |
183 | return(EINVAL); | |
184 | } | |
185 | ||
186 | int | |
187 | ip_rsvp_vif_done(so, sopt) | |
188 | struct socket *so; | |
189 | struct sockopt *sopt; | |
190 | { | |
191 | return(EINVAL); | |
192 | } | |
193 | ||
194 | void | |
195 | ip_rsvp_force_done(so) | |
196 | struct socket *so; | |
197 | { | |
198 | return; | |
199 | } | |
200 | ||
201 | #else /* MROUTING */ | |
202 | ||
203 | #define M_HASCL(m) ((m)->m_flags & M_EXT) | |
204 | ||
205 | #define INSIZ sizeof(struct in_addr) | |
206 | #define same(a1, a2) \ | |
207 | (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) | |
208 | ||
209 | ||
210 | /* | |
211 | * Globals. All but ip_mrouter and ip_mrtproto could be static, | |
212 | * except for netstat or debugging purposes. | |
213 | */ | |
214 | #ifndef MROUTE_LKM | |
215 | struct socket *ip_mrouter = NULL; | |
216 | static struct mrtstat mrtstat; | |
217 | #else /* MROUTE_LKM */ | |
218 | extern void X_ipip_input(struct mbuf *m, int iphlen); | |
219 | extern struct mrtstat mrtstat; | |
220 | static int ip_mrtproto; | |
221 | #endif | |
222 | ||
223 | #define NO_RTE_FOUND 0x1 | |
224 | #define RTE_FOUND 0x2 | |
225 | ||
226 | static struct mfc *mfctable[CONFIG_MFCTBLSIZ]; | |
227 | static u_char nexpire[CONFIG_MFCTBLSIZ]; | |
228 | static struct vif viftable[CONFIG_MAXVIFS]; | |
229 | static u_int mrtdebug = 0; /* debug level */ | |
230 | #define DEBUG_MFC 0x02 | |
231 | #define DEBUG_FORWARD 0x04 | |
232 | #define DEBUG_EXPIRE 0x08 | |
233 | #define DEBUG_XMIT 0x10 | |
234 | static u_int tbfdebug = 0; /* tbf debug level */ | |
235 | static u_int rsvpdebug = 0; /* rsvp debug level */ | |
236 | ||
237 | #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ | |
238 | #define UPCALL_EXPIRE 6 /* number of timeouts */ | |
239 | ||
240 | /* | |
241 | * Define the token bucket filter structures | |
242 | * tbftable -> each vif has one of these for storing info | |
243 | */ | |
244 | ||
245 | static struct tbf tbftable[CONFIG_MAXVIFS]; | |
246 | #define TBF_REPROCESS (hz / 100) /* 100x / second */ | |
247 | ||
248 | /* | |
249 | * 'Interfaces' associated with decapsulator (so we can tell | |
250 | * packets that went through it from ones that get reflected | |
251 | * by a broken gateway). These interfaces are never linked into | |
252 | * the system ifnet list & no routes point to them. I.e., packets | |
253 | * can't be sent this way. They only exist as a placeholder for | |
254 | * multicast source verification. | |
255 | */ | |
256 | static struct ifnet multicast_decap_if[CONFIG_MAXVIFS]; | |
257 | ||
258 | #define ENCAP_TTL 64 | |
259 | #define ENCAP_PROTO IPPROTO_IPIP /* 4 */ | |
260 | ||
261 | /* prototype IP hdr for encapsulated packets */ | |
262 | static struct ip multicast_encap_iphdr = { | |
263 | #if BYTE_ORDER == LITTLE_ENDIAN | |
264 | sizeof(struct ip) >> 2, IPVERSION, | |
265 | #else | |
266 | IPVERSION, sizeof(struct ip) >> 2, | |
267 | #endif | |
268 | 0, /* tos */ | |
269 | sizeof(struct ip), /* total length */ | |
270 | 0, /* id */ | |
271 | 0, /* frag offset */ | |
272 | ENCAP_TTL, ENCAP_PROTO, | |
273 | 0, /* checksum */ | |
274 | { 0 }, { 0 } | |
275 | }; | |
276 | ||
277 | /* | |
278 | * Private variables. | |
279 | */ | |
280 | static vifi_t numvifs = 0; | |
281 | static int have_encap_tunnel = 0; | |
282 | ||
283 | /* | |
284 | * one-back cache used by ipip_input to locate a tunnel's vif | |
285 | * given a datagram's src ip address. | |
286 | */ | |
287 | static u_int32_t last_encap_src; | |
288 | static struct vif *last_encap_vif; | |
289 | ||
290 | static u_int32_t X_ip_mcast_src(int vifi); | |
291 | static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo); | |
292 | static int X_ip_mrouter_done(void); | |
293 | static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); | |
294 | static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); | |
295 | static int X_legal_vif_num(int vif); | |
296 | static int X_mrt_ioctl(u_long cmd, caddr_t data); | |
297 | ||
298 | static int get_sg_cnt(struct sioc_sg_req *); | |
299 | static int get_vif_cnt(struct sioc_vif_req *); | |
300 | static int ip_mrouter_init(struct socket *, int); | |
301 | static int add_vif(struct vifctl *); | |
302 | static int del_vif(vifi_t); | |
303 | static int add_mfc(struct mfcctl *); | |
304 | static int del_mfc(struct mfcctl *); | |
305 | static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); | |
306 | static int set_assert(int); | |
307 | static void expire_upcalls(void *); | |
308 | static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, | |
309 | vifi_t); | |
310 | static void phyint_send(struct ip *, struct vif *, struct mbuf *); | |
311 | static void encap_send(struct ip *, struct vif *, struct mbuf *); | |
312 | static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_int32_t); | |
313 | static void tbf_queue(struct vif *, struct mbuf *); | |
314 | static void tbf_process_q(struct vif *); | |
315 | static void tbf_reprocess_q(void *); | |
316 | static int tbf_dq_sel(struct vif *, struct ip *); | |
317 | static void tbf_send_packet(struct vif *, struct mbuf *); | |
318 | static void tbf_update_tokens(struct vif *); | |
319 | static int priority(struct vif *, struct ip *); | |
320 | void multiencap_decap(struct mbuf *); | |
321 | ||
322 | /* | |
323 | * whether or not special PIM assert processing is enabled. | |
324 | */ | |
325 | static int pim_assert; | |
326 | /* | |
327 | * Rate limit for assert notification messages, in usec | |
328 | */ | |
329 | #define ASSERT_MSG_TIME 3000000 | |
330 | ||
331 | /* | |
332 | * Hash function for a source, group entry | |
333 | */ | |
334 | #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ | |
335 | ((g) >> 20) ^ ((g) >> 10) ^ (g)) | |
336 | ||
337 | /* | |
338 | * Find a route for a given origin IP address and Multicast group address | |
339 | * Type of service parameter to be added in the future!!! | |
340 | */ | |
341 | ||
342 | #define MFCFIND(o, g, rt) { \ | |
343 | struct mfc *_rt = mfctable[MFCHASH(o,g)]; \ | |
344 | rt = NULL; \ | |
345 | ++mrtstat.mrts_mfc_lookups; \ | |
346 | while (_rt) { \ | |
347 | if ((_rt->mfc_origin.s_addr == o) && \ | |
348 | (_rt->mfc_mcastgrp.s_addr == g) && \ | |
349 | (_rt->mfc_stall == NULL)) { \ | |
350 | rt = _rt; \ | |
351 | break; \ | |
352 | } \ | |
353 | _rt = _rt->mfc_next; \ | |
354 | } \ | |
355 | if (rt == NULL) { \ | |
356 | ++mrtstat.mrts_mfc_misses; \ | |
357 | } \ | |
358 | } | |
359 | ||
360 | ||
361 | /* | |
362 | * Macros to compute elapsed time efficiently | |
363 | * Borrowed from Van Jacobson's scheduling code | |
364 | */ | |
365 | #define TV_DELTA(a, b, delta) { \ | |
366 | int xxs; \ | |
367 | \ | |
368 | delta = (a).tv_usec - (b).tv_usec; \ | |
369 | if ((xxs = (a).tv_sec - (b).tv_sec)) { \ | |
370 | switch (xxs) { \ | |
371 | case 2: \ | |
372 | delta += 1000000; \ | |
373 | /* fall through */ \ | |
374 | case 1: \ | |
375 | delta += 1000000; \ | |
376 | break; \ | |
377 | default: \ | |
378 | delta += (1000000 * xxs); \ | |
379 | } \ | |
380 | } \ | |
381 | } | |
382 | ||
383 | #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ | |
384 | (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) | |
385 | ||
386 | #if UPCALL_TIMING | |
387 | u_int32_t upcall_data[51]; | |
388 | static void collate(struct timeval *); | |
389 | #endif /* UPCALL_TIMING */ | |
390 | ||
391 | ||
392 | /* | |
393 | * Handle MRT setsockopt commands to modify the multicast routing tables. | |
394 | */ | |
395 | static int | |
396 | X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) | |
397 | { | |
398 | int error, optval; | |
399 | vifi_t vifi; | |
400 | struct vifctl vifc; | |
401 | struct mfcctl mfc; | |
402 | ||
403 | if (so != ip_mrouter && sopt->sopt_name != MRT_INIT) | |
404 | return (EPERM); | |
405 | ||
406 | error = 0; | |
407 | switch (sopt->sopt_name) { | |
408 | case MRT_INIT: | |
409 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
410 | sizeof optval); | |
411 | if (error) | |
412 | break; | |
413 | error = ip_mrouter_init(so, optval); | |
414 | break; | |
415 | ||
416 | case MRT_DONE: | |
417 | error = ip_mrouter_done(); | |
418 | break; | |
419 | ||
420 | case MRT_ADD_VIF: | |
421 | error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc); | |
422 | if (error) | |
423 | break; | |
424 | error = add_vif(&vifc); | |
425 | break; | |
426 | ||
427 | case MRT_DEL_VIF: | |
428 | error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); | |
429 | if (error) | |
430 | break; | |
431 | error = del_vif(vifi); | |
432 | break; | |
433 | ||
434 | case MRT_ADD_MFC: | |
435 | case MRT_DEL_MFC: | |
436 | error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc); | |
437 | if (error) | |
438 | break; | |
439 | if (sopt->sopt_name == MRT_ADD_MFC) | |
440 | error = add_mfc(&mfc); | |
441 | else | |
442 | error = del_mfc(&mfc); | |
443 | break; | |
444 | ||
445 | case MRT_ASSERT: | |
446 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
447 | sizeof optval); | |
448 | if (error) | |
449 | break; | |
450 | set_assert(optval); | |
451 | break; | |
452 | ||
453 | default: | |
454 | error = EOPNOTSUPP; | |
455 | break; | |
456 | } | |
457 | return (error); | |
458 | } | |
459 | ||
460 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
461 | int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set; | |
462 | #endif | |
463 | ||
464 | /* | |
465 | * Handle MRT getsockopt commands | |
466 | */ | |
467 | static int | |
468 | X_ip_mrouter_get(__unused struct socket *so, struct sockopt *sopt) | |
469 | { | |
470 | int error; | |
471 | static int vers = 0x0305; /* !!! why is this here? XXX */ | |
472 | ||
473 | switch (sopt->sopt_name) { | |
474 | case MRT_VERSION: | |
475 | error = sooptcopyout(sopt, &vers, sizeof vers); | |
476 | break; | |
477 | ||
478 | case MRT_ASSERT: | |
479 | error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); | |
480 | break; | |
481 | default: | |
482 | error = EOPNOTSUPP; | |
483 | break; | |
484 | } | |
485 | return (error); | |
486 | } | |
487 | ||
488 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
489 | int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get; | |
490 | #endif | |
491 | ||
492 | /* | |
493 | * Handle ioctl commands to obtain information from the cache | |
494 | */ | |
495 | static int | |
496 | X_mrt_ioctl(u_long cmd, caddr_t data) | |
497 | { | |
498 | int error = 0; | |
499 | ||
500 | switch (cmd) { | |
501 | case (SIOCGETVIFCNT): | |
502 | return (get_vif_cnt((struct sioc_vif_req *)data)); | |
503 | break; | |
504 | case (SIOCGETSGCNT): | |
505 | return (get_sg_cnt((struct sioc_sg_req *)data)); | |
506 | break; | |
507 | default: | |
508 | return (EINVAL); | |
509 | break; | |
510 | } | |
511 | return error; | |
512 | } | |
513 | ||
514 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
515 | int (*mrt_ioctl)(u_long, caddr_t) = X_mrt_ioctl; | |
516 | #endif | |
517 | ||
518 | /* | |
519 | * returns the packet, byte, rpf-failure count for the source group provided | |
520 | */ | |
521 | static int | |
522 | get_sg_cnt(struct sioc_sg_req *req) | |
523 | { | |
524 | struct mfc *rt; | |
525 | ||
526 | MFCFIND(req->src.s_addr, req->grp.s_addr, rt); | |
527 | if (rt != NULL) { | |
528 | req->pktcnt = rt->mfc_pkt_cnt; | |
529 | req->bytecnt = rt->mfc_byte_cnt; | |
530 | req->wrong_if = rt->mfc_wrong_if; | |
531 | } else | |
532 | req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; | |
533 | ||
534 | return 0; | |
535 | } | |
536 | ||
537 | /* | |
538 | * returns the input and output packet and byte counts on the vif provided | |
539 | */ | |
540 | static int | |
541 | get_vif_cnt(struct sioc_vif_req *req) | |
542 | { | |
543 | vifi_t vifi = req->vifi; | |
544 | ||
545 | if (vifi >= numvifs) return EINVAL; | |
546 | ||
547 | req->icount = viftable[vifi].v_pkt_in; | |
548 | req->ocount = viftable[vifi].v_pkt_out; | |
549 | req->ibytes = viftable[vifi].v_bytes_in; | |
550 | req->obytes = viftable[vifi].v_bytes_out; | |
551 | ||
552 | return 0; | |
553 | } | |
554 | ||
555 | /* | |
556 | * Enable multicast routing | |
557 | */ | |
558 | static int | |
559 | ip_mrouter_init(struct socket *so, int vers) | |
560 | { | |
561 | if (mrtdebug) | |
562 | log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n", | |
563 | so->so_type, so->so_proto->pr_protocol); | |
564 | ||
565 | if (so->so_type != SOCK_RAW || | |
566 | so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; | |
567 | ||
568 | if (vers != 1) | |
569 | return ENOPROTOOPT; | |
570 | ||
571 | if (ip_mrouter != NULL) return EADDRINUSE; | |
572 | ||
573 | ip_mrouter = so; | |
574 | ||
575 | bzero((caddr_t)mfctable, sizeof(mfctable)); | |
576 | bzero((caddr_t)nexpire, sizeof(nexpire)); | |
577 | ||
578 | pim_assert = 0; | |
579 | ||
580 | timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); | |
581 | ||
582 | if (mrtdebug) | |
583 | log(LOG_DEBUG, "ip_mrouter_init\n"); | |
584 | ||
585 | return 0; | |
586 | } | |
587 | ||
588 | /* | |
589 | * Disable multicast routing | |
590 | */ | |
591 | static int | |
592 | X_ip_mrouter_done(void) | |
593 | { | |
594 | vifi_t vifi; | |
595 | int i; | |
596 | struct ifnet *ifp; | |
597 | struct ifreq ifr; | |
598 | struct mfc *rt; | |
599 | struct rtdetq *rte; | |
600 | ||
601 | /* | |
602 | * For each phyint in use, disable promiscuous reception of all IP | |
603 | * multicasts. | |
604 | */ | |
605 | for (vifi = 0; vifi < numvifs; vifi++) { | |
606 | if (viftable[vifi].v_lcl_addr.s_addr != 0 && | |
607 | !(viftable[vifi].v_flags & VIFF_TUNNEL)) { | |
608 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; | |
609 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr | |
610 | = INADDR_ANY; | |
611 | ifp = viftable[vifi].v_ifp; | |
612 | if_allmulti(ifp, 0); | |
613 | } | |
614 | } | |
615 | bzero((caddr_t)tbftable, sizeof(tbftable)); | |
616 | bzero((caddr_t)viftable, sizeof(viftable)); | |
617 | numvifs = 0; | |
618 | pim_assert = 0; | |
619 | ||
620 | untimeout(expire_upcalls, (caddr_t)NULL); | |
621 | ||
622 | /* | |
623 | * Free all multicast forwarding cache entries. | |
624 | */ | |
625 | for (i = 0; i < CONFIG_MFCTBLSIZ; i++) { | |
626 | for (rt = mfctable[i]; rt != NULL; ) { | |
627 | struct mfc *nr = rt->mfc_next; | |
628 | ||
629 | for (rte = rt->mfc_stall; rte != NULL; ) { | |
630 | struct rtdetq *n = rte->next; | |
631 | ||
632 | m_freem(rte->m); | |
633 | FREE(rte, M_MRTABLE); | |
634 | rte = n; | |
635 | } | |
636 | FREE(rt, M_MRTABLE); | |
637 | rt = nr; | |
638 | } | |
639 | } | |
640 | ||
641 | bzero((caddr_t)mfctable, sizeof(mfctable)); | |
642 | ||
643 | /* | |
644 | * Reset de-encapsulation cache | |
645 | */ | |
646 | last_encap_src = 0; | |
647 | last_encap_vif = NULL; | |
648 | have_encap_tunnel = 0; | |
649 | ||
650 | ip_mrouter = NULL; | |
651 | ||
652 | if (mrtdebug) | |
653 | log(LOG_DEBUG, "ip_mrouter_done\n"); | |
654 | ||
655 | return 0; | |
656 | } | |
657 | ||
658 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
659 | int (*ip_mrouter_done)(void) = X_ip_mrouter_done; | |
660 | #endif | |
661 | ||
662 | /* | |
663 | * Set PIM assert processing global | |
664 | */ | |
665 | static int | |
666 | set_assert(int i) | |
667 | { | |
668 | if ((i != 1) && (i != 0)) | |
669 | return EINVAL; | |
670 | ||
671 | pim_assert = i; | |
672 | ||
673 | return 0; | |
674 | } | |
675 | ||
676 | /* | |
677 | * Add a vif to the vif table | |
678 | */ | |
679 | static int | |
680 | add_vif(struct vifctl *vifcp) | |
681 | { | |
682 | struct vif *vifp = viftable + vifcp->vifc_vifi; | |
683 | static struct sockaddr_in sin = { sizeof sin, AF_INET, | |
684 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
685 | struct ifaddr *ifa; | |
686 | struct ifnet *ifp; | |
687 | int error, s; | |
688 | struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; | |
689 | ||
690 | if (vifcp->vifc_vifi >= CONFIG_MAXVIFS) return EINVAL; | |
691 | if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; | |
692 | ||
693 | /* Find the interface with an address in AF_INET family */ | |
694 | sin.sin_addr = vifcp->vifc_lcl_addr; | |
695 | ifa = ifa_ifwithaddr((struct sockaddr *)&sin); | |
696 | if (ifa == 0) return EADDRNOTAVAIL; | |
697 | ifp = ifa->ifa_ifp; | |
698 | IFA_REMREF(ifa); | |
699 | ifa = NULL; | |
700 | ||
701 | if (vifcp->vifc_flags & VIFF_TUNNEL) { | |
702 | if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { | |
703 | /* | |
704 | * An encapsulating tunnel is wanted. Tell ipip_input() to | |
705 | * start paying attention to encapsulated packets. | |
706 | */ | |
707 | if (have_encap_tunnel == 0) { | |
708 | have_encap_tunnel = 1; | |
709 | for (s = 0; s < CONFIG_MAXVIFS; ++s) { | |
710 | multicast_decap_if[s].if_name = "mdecap"; | |
711 | multicast_decap_if[s].if_unit = s; | |
712 | multicast_decap_if[s].if_family = APPLE_IF_FAM_MDECAP; | |
713 | } | |
714 | } | |
715 | /* | |
716 | * Set interface to fake encapsulator interface | |
717 | */ | |
718 | ifp = &multicast_decap_if[vifcp->vifc_vifi]; | |
719 | /* | |
720 | * Prepare cached route entry | |
721 | */ | |
722 | bzero(&vifp->v_route, sizeof(vifp->v_route)); | |
723 | } else { | |
724 | log(LOG_ERR, "source routed tunnels not supported\n"); | |
725 | return EOPNOTSUPP; | |
726 | } | |
727 | } else { | |
728 | /* Make sure the interface supports multicast */ | |
729 | if ((ifp->if_flags & IFF_MULTICAST) == 0) | |
730 | return EOPNOTSUPP; | |
731 | ||
732 | /* Enable promiscuous reception of all IP multicasts from the if */ | |
733 | error = if_allmulti(ifp, 1); | |
734 | if (error) | |
735 | return error; | |
736 | } | |
737 | ||
738 | /* define parameters for the tbf structure */ | |
739 | vifp->v_tbf = v_tbf; | |
740 | GET_TIME(vifp->v_tbf->tbf_last_pkt_t); | |
741 | vifp->v_tbf->tbf_n_tok = 0; | |
742 | vifp->v_tbf->tbf_q_len = 0; | |
743 | vifp->v_tbf->tbf_max_q_len = MAXQSIZE; | |
744 | vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; | |
745 | ||
746 | vifp->v_flags = vifcp->vifc_flags; | |
747 | vifp->v_threshold = vifcp->vifc_threshold; | |
748 | vifp->v_lcl_addr = vifcp->vifc_lcl_addr; | |
749 | vifp->v_rmt_addr = vifcp->vifc_rmt_addr; | |
750 | vifp->v_ifp = ifp; | |
751 | /* scaling up here allows division by 1024 in critical code */ | |
752 | vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; | |
753 | vifp->v_rsvp_on = 0; | |
754 | vifp->v_rsvpd = NULL; | |
755 | /* initialize per vif pkt counters */ | |
756 | vifp->v_pkt_in = 0; | |
757 | vifp->v_pkt_out = 0; | |
758 | vifp->v_bytes_in = 0; | |
759 | vifp->v_bytes_out = 0; | |
760 | ||
761 | /* Adjust numvifs up if the vifi is higher than numvifs */ | |
762 | if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; | |
763 | ||
764 | if (mrtdebug) | |
765 | log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n", | |
766 | vifcp->vifc_vifi, | |
767 | (u_int32_t)ntohl(vifcp->vifc_lcl_addr.s_addr), | |
768 | (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", | |
769 | (u_int32_t)ntohl(vifcp->vifc_rmt_addr.s_addr), | |
770 | vifcp->vifc_threshold, | |
771 | vifcp->vifc_rate_limit); | |
772 | ||
773 | return 0; | |
774 | } | |
775 | ||
776 | /* | |
777 | * Delete a vif from the vif table | |
778 | */ | |
779 | static int | |
780 | del_vif(vifi_t vifi) | |
781 | { | |
782 | struct vif *vifp = &viftable[vifi]; | |
783 | struct mbuf *m; | |
784 | struct ifnet *ifp; | |
785 | struct ifreq ifr; | |
786 | ||
787 | if (vifi >= numvifs) return EINVAL; | |
788 | if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; | |
789 | ||
790 | if (!(vifp->v_flags & VIFF_TUNNEL)) { | |
791 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; | |
792 | ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; | |
793 | ifp = vifp->v_ifp; | |
794 | if_allmulti(ifp, 0); | |
795 | } | |
796 | ||
797 | if (vifp == last_encap_vif) { | |
798 | last_encap_vif = 0; | |
799 | last_encap_src = 0; | |
800 | } | |
801 | ||
802 | /* | |
803 | * Free packets queued at the interface | |
804 | */ | |
805 | while (vifp->v_tbf->tbf_q) { | |
806 | m = vifp->v_tbf->tbf_q; | |
807 | vifp->v_tbf->tbf_q = m->m_act; | |
808 | m_freem(m); | |
809 | } | |
810 | ||
811 | bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); | |
812 | bzero((caddr_t)vifp, sizeof (*vifp)); | |
813 | ||
814 | if (mrtdebug) | |
815 | log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs); | |
816 | ||
817 | /* Adjust numvifs down */ | |
818 | for (vifi = numvifs; vifi > 0; vifi--) | |
819 | if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; | |
820 | numvifs = vifi; | |
821 | ||
822 | return 0; | |
823 | } | |
824 | ||
825 | /* | |
826 | * Add an mfc entry | |
827 | */ | |
828 | static int | |
829 | add_mfc(struct mfcctl *mfccp) | |
830 | { | |
831 | struct mfc *rt; | |
832 | u_int32_t hash; | |
833 | struct rtdetq *rte; | |
834 | u_short nstl; | |
835 | int i; | |
836 | ||
837 | MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt); | |
838 | ||
839 | /* If an entry already exists, just update the fields */ | |
840 | if (rt) { | |
841 | if (mrtdebug & DEBUG_MFC) | |
842 | log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n", | |
843 | (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), | |
844 | (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
845 | mfccp->mfcc_parent); | |
846 | ||
847 | rt->mfc_parent = mfccp->mfcc_parent; | |
848 | for (i = 0; i < numvifs; i++) | |
849 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
850 | return 0; | |
851 | } | |
852 | ||
853 | /* | |
854 | * Find the entry for which the upcall was made and update | |
855 | */ | |
856 | hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); | |
857 | for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { | |
858 | ||
859 | if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && | |
860 | (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && | |
861 | (rt->mfc_stall != NULL)) { | |
862 | ||
863 | if (nstl++) | |
864 | log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", | |
865 | "multiple kernel entries", | |
866 | (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), | |
867 | (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
868 | mfccp->mfcc_parent, (void *)rt->mfc_stall); | |
869 | ||
870 | if (mrtdebug & DEBUG_MFC) | |
871 | log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", | |
872 | (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), | |
873 | (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
874 | mfccp->mfcc_parent, (void *)rt->mfc_stall); | |
875 | ||
876 | rt->mfc_origin = mfccp->mfcc_origin; | |
877 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
878 | rt->mfc_parent = mfccp->mfcc_parent; | |
879 | for (i = 0; i < numvifs; i++) | |
880 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
881 | /* initialize pkt counters per src-grp */ | |
882 | rt->mfc_pkt_cnt = 0; | |
883 | rt->mfc_byte_cnt = 0; | |
884 | rt->mfc_wrong_if = 0; | |
885 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
886 | ||
887 | rt->mfc_expire = 0; /* Don't clean this guy up */ | |
888 | nexpire[hash]--; | |
889 | ||
890 | /* free packets Qed at the end of this entry */ | |
891 | for (rte = rt->mfc_stall; rte != NULL; ) { | |
892 | struct rtdetq *n = rte->next; | |
893 | ||
894 | ip_mdq(rte->m, rte->ifp, rt, -1); | |
895 | m_freem(rte->m); | |
896 | #if UPCALL_TIMING | |
897 | collate(&(rte->t)); | |
898 | #endif /* UPCALL_TIMING */ | |
899 | FREE(rte, M_MRTABLE); | |
900 | rte = n; | |
901 | } | |
902 | rt->mfc_stall = NULL; | |
903 | } | |
904 | } | |
905 | ||
906 | /* | |
907 | * It is possible that an entry is being inserted without an upcall | |
908 | */ | |
909 | if (nstl == 0) { | |
910 | if (mrtdebug & DEBUG_MFC) | |
911 | log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", | |
912 | hash, (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr), | |
913 | (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr), | |
914 | mfccp->mfcc_parent); | |
915 | ||
916 | for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { | |
917 | ||
918 | if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && | |
919 | (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { | |
920 | ||
921 | rt->mfc_origin = mfccp->mfcc_origin; | |
922 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
923 | rt->mfc_parent = mfccp->mfcc_parent; | |
924 | for (i = 0; i < numvifs; i++) | |
925 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
926 | /* initialize pkt counters per src-grp */ | |
927 | rt->mfc_pkt_cnt = 0; | |
928 | rt->mfc_byte_cnt = 0; | |
929 | rt->mfc_wrong_if = 0; | |
930 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
931 | if (rt->mfc_expire) | |
932 | nexpire[hash]--; | |
933 | rt->mfc_expire = 0; | |
934 | } | |
935 | } | |
936 | if (rt == NULL) { | |
937 | /* no upcall, so make a new entry */ | |
938 | rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT); | |
939 | if (rt == NULL) { | |
940 | return ENOBUFS; | |
941 | } | |
942 | ||
943 | /* insert new entry at head of hash chain */ | |
944 | rt->mfc_origin = mfccp->mfcc_origin; | |
945 | rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; | |
946 | rt->mfc_parent = mfccp->mfcc_parent; | |
947 | for (i = 0; i < numvifs; i++) | |
948 | rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; | |
949 | /* initialize pkt counters per src-grp */ | |
950 | rt->mfc_pkt_cnt = 0; | |
951 | rt->mfc_byte_cnt = 0; | |
952 | rt->mfc_wrong_if = 0; | |
953 | rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; | |
954 | rt->mfc_expire = 0; | |
955 | rt->mfc_stall = NULL; | |
956 | ||
957 | /* link into table */ | |
958 | rt->mfc_next = mfctable[hash]; | |
959 | mfctable[hash] = rt; | |
960 | } | |
961 | } | |
962 | return 0; | |
963 | } | |
964 | ||
965 | #if UPCALL_TIMING | |
966 | /* | |
967 | * collect delay statistics on the upcalls | |
968 | */ | |
969 | static void | |
970 | collate(struct timeval *t) | |
971 | { | |
972 | u_int32_t d; | |
973 | struct timeval tp; | |
974 | u_int32_t delta; | |
975 | ||
976 | GET_TIME(tp); | |
977 | ||
978 | if (TV_LT(*t, tp)) | |
979 | { | |
980 | TV_DELTA(tp, *t, delta); | |
981 | ||
982 | d = delta >> 10; | |
983 | if (d > 50) | |
984 | d = 50; | |
985 | ||
986 | ++upcall_data[d]; | |
987 | } | |
988 | } | |
989 | #endif /* UPCALL_TIMING */ | |
990 | ||
991 | /* | |
992 | * Delete an mfc entry | |
993 | */ | |
994 | static int | |
995 | del_mfc(struct mfcctl *mfccp) | |
996 | { | |
997 | struct in_addr origin; | |
998 | struct in_addr mcastgrp; | |
999 | struct mfc *rt; | |
1000 | struct mfc **nptr; | |
1001 | u_int32_t hash; | |
1002 | ||
1003 | origin = mfccp->mfcc_origin; | |
1004 | mcastgrp = mfccp->mfcc_mcastgrp; | |
1005 | hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); | |
1006 | ||
1007 | if (mrtdebug & DEBUG_MFC) | |
1008 | log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", | |
1009 | (u_int32_t)ntohl(origin.s_addr), (u_int32_t)ntohl(mcastgrp.s_addr)); | |
1010 | ||
1011 | nptr = &mfctable[hash]; | |
1012 | while ((rt = *nptr) != NULL) { | |
1013 | if (origin.s_addr == rt->mfc_origin.s_addr && | |
1014 | mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && | |
1015 | rt->mfc_stall == NULL) | |
1016 | break; | |
1017 | ||
1018 | nptr = &rt->mfc_next; | |
1019 | } | |
1020 | if (rt == NULL) { | |
1021 | return EADDRNOTAVAIL; | |
1022 | } | |
1023 | ||
1024 | *nptr = rt->mfc_next; | |
1025 | FREE(rt, M_MRTABLE); | |
1026 | ||
1027 | return 0; | |
1028 | } | |
1029 | ||
1030 | /* | |
1031 | * Send a message to mrouted on the multicast routing socket | |
1032 | */ | |
1033 | static int | |
1034 | socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) | |
1035 | { | |
1036 | socket_lock(s, 1); | |
1037 | if (s) { | |
1038 | if (sbappendaddr(&s->so_rcv, | |
1039 | (struct sockaddr *)src, | |
1040 | mm, (struct mbuf *)0, NULL) != 0) { | |
1041 | sorwakeup(s); | |
1042 | socket_unlock(s, 1); | |
1043 | return 0; | |
1044 | } | |
1045 | } | |
1046 | socket_unlock(s, 1); | |
1047 | m_freem(mm); | |
1048 | return -1; | |
1049 | } | |
1050 | ||
1051 | /* | |
1052 | * IP multicast forwarding function. This function assumes that the packet | |
1053 | * pointed to by "ip" has arrived on (or is about to be sent to) the interface | |
1054 | * pointed to by "ifp", and the packet is to be relayed to other networks | |
1055 | * that have members of the packet's destination IP multicast group. | |
1056 | * | |
1057 | * The packet is returned unscathed to the caller, unless it is | |
1058 | * erroneous, in which case a non-zero return value tells the caller to | |
1059 | * discard it. | |
1060 | */ | |
1061 | ||
1062 | #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ | |
1063 | #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ | |
1064 | ||
1065 | static int | |
1066 | X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, | |
1067 | struct ip_moptions *imo) | |
1068 | { | |
1069 | struct mfc *rt; | |
1070 | u_char *ipoptions; | |
1071 | static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET, | |
1072 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
1073 | static int srctun = 0; | |
1074 | struct mbuf *mm; | |
1075 | vifi_t vifi; | |
1076 | struct vif *vifp; | |
1077 | ||
1078 | if (mrtdebug & DEBUG_FORWARD) | |
1079 | log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n", | |
1080 | (u_int32_t)ntohl(ip->ip_src.s_addr), (u_int32_t)ntohl(ip->ip_dst.s_addr), | |
1081 | (void *)ifp); | |
1082 | ||
1083 | if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || | |
1084 | (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { | |
1085 | /* | |
1086 | * Packet arrived via a physical interface or | |
1087 | * an encapsulated tunnel. | |
1088 | */ | |
1089 | } else { | |
1090 | /* | |
1091 | * Packet arrived through a source-route tunnel. | |
1092 | * Source-route tunnels are no longer supported. | |
1093 | */ | |
1094 | if ((srctun++ % 1000) == 0) | |
1095 | log(LOG_ERR, | |
1096 | "ip_mforward: received source-routed packet from %lx\n", | |
1097 | (u_int32_t)ntohl(ip->ip_src.s_addr)); | |
1098 | ||
1099 | return 1; | |
1100 | } | |
1101 | ||
1102 | if (imo != NULL) | |
1103 | IMO_LOCK(imo); | |
1104 | if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { | |
1105 | IMO_UNLOCK(imo); | |
1106 | if (ip->ip_ttl < 255) | |
1107 | ip->ip_ttl++; /* compensate for -1 in *_send routines */ | |
1108 | if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { | |
1109 | vifp = viftable + vifi; | |
1110 | printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n", | |
1111 | ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi, | |
1112 | (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", | |
1113 | vifp->v_ifp->if_name, vifp->v_ifp->if_unit); | |
1114 | } | |
1115 | return (ip_mdq(m, ifp, NULL, vifi)); | |
1116 | } else if (imo != NULL) { | |
1117 | IMO_UNLOCK(imo); | |
1118 | } | |
1119 | if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { | |
1120 | printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", | |
1121 | ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); | |
1122 | if(!imo) | |
1123 | printf("In fact, no options were specified at all\n"); | |
1124 | } | |
1125 | ||
1126 | /* | |
1127 | * Don't forward a packet with time-to-live of zero or one, | |
1128 | * or a packet destined to a local-only group. | |
1129 | */ | |
1130 | if (ip->ip_ttl <= 1 || | |
1131 | ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) | |
1132 | return 0; | |
1133 | ||
1134 | /* | |
1135 | * Determine forwarding vifs from the forwarding cache table | |
1136 | */ | |
1137 | MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); | |
1138 | ||
1139 | /* Entry exists, so forward if necessary */ | |
1140 | if (rt != NULL) { | |
1141 | return (ip_mdq(m, ifp, rt, -1)); | |
1142 | } else { | |
1143 | /* | |
1144 | * If we don't have a route for packet's origin, | |
1145 | * Make a copy of the packet & | |
1146 | * send message to routing daemon | |
1147 | */ | |
1148 | ||
1149 | struct mbuf *mb0; | |
1150 | struct rtdetq *rte; | |
1151 | u_int32_t hash; | |
1152 | int hlen = ip->ip_hl << 2; | |
1153 | #if UPCALL_TIMING | |
1154 | struct timeval tp; | |
1155 | ||
1156 | GET_TIME(tp); | |
1157 | #endif | |
1158 | ||
1159 | mrtstat.mrts_no_route++; | |
1160 | if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) | |
1161 | log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n", | |
1162 | (u_int32_t)ntohl(ip->ip_src.s_addr), | |
1163 | (u_int32_t)ntohl(ip->ip_dst.s_addr)); | |
1164 | ||
1165 | /* | |
1166 | * Allocate mbufs early so that we don't do extra work if we are | |
1167 | * just going to fail anyway. Make sure to pullup the header so | |
1168 | * that other people can't step on it. | |
1169 | */ | |
1170 | rte = (struct rtdetq *) _MALLOC((sizeof *rte), M_MRTABLE, M_NOWAIT); | |
1171 | if (rte == NULL) { | |
1172 | return ENOBUFS; | |
1173 | } | |
1174 | mb0 = m_copy(m, 0, M_COPYALL); | |
1175 | if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) | |
1176 | mb0 = m_pullup(mb0, hlen); | |
1177 | if (mb0 == NULL) { | |
1178 | FREE(rte, M_MRTABLE); | |
1179 | return ENOBUFS; | |
1180 | } | |
1181 | ||
1182 | /* is there an upcall waiting for this packet? */ | |
1183 | hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); | |
1184 | for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { | |
1185 | if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && | |
1186 | (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && | |
1187 | (rt->mfc_stall != NULL)) | |
1188 | break; | |
1189 | } | |
1190 | ||
1191 | if (rt == NULL) { | |
1192 | int i; | |
1193 | struct igmpmsg *im; | |
1194 | ||
1195 | /* no upcall, so make a new entry */ | |
1196 | rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT); | |
1197 | if (rt == NULL) { | |
1198 | FREE(rte, M_MRTABLE); | |
1199 | m_freem(mb0); | |
1200 | return ENOBUFS; | |
1201 | } | |
1202 | /* Make a copy of the header to send to the user level process */ | |
1203 | mm = m_copy(mb0, 0, hlen); | |
1204 | if (mm == NULL) { | |
1205 | FREE(rte, M_MRTABLE); | |
1206 | m_freem(mb0); | |
1207 | FREE(rt, M_MRTABLE); | |
1208 | return ENOBUFS; | |
1209 | } | |
1210 | ||
1211 | /* | |
1212 | * Send message to routing daemon to install | |
1213 | * a route into the kernel table | |
1214 | */ | |
1215 | k_igmpsrc.sin_addr = ip->ip_src; | |
1216 | ||
1217 | im = mtod(mm, struct igmpmsg *); | |
1218 | im->im_msgtype = IGMPMSG_NOCACHE; | |
1219 | im->im_mbz = 0; | |
1220 | ||
1221 | mrtstat.mrts_upcalls++; | |
1222 | ||
1223 | if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { | |
1224 | log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n"); | |
1225 | ++mrtstat.mrts_upq_sockfull; | |
1226 | FREE(rte, M_MRTABLE); | |
1227 | m_freem(mb0); | |
1228 | FREE(rt, M_MRTABLE); | |
1229 | return ENOBUFS; | |
1230 | } | |
1231 | ||
1232 | /* insert new entry at head of hash chain */ | |
1233 | rt->mfc_origin.s_addr = ip->ip_src.s_addr; | |
1234 | rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; | |
1235 | rt->mfc_expire = UPCALL_EXPIRE; | |
1236 | nexpire[hash]++; | |
1237 | for (i = 0; i < numvifs; i++) | |
1238 | rt->mfc_ttls[i] = 0; | |
1239 | rt->mfc_parent = -1; | |
1240 | ||
1241 | /* link into table */ | |
1242 | rt->mfc_next = mfctable[hash]; | |
1243 | mfctable[hash] = rt; | |
1244 | rt->mfc_stall = rte; | |
1245 | ||
1246 | } else { | |
1247 | /* determine if q has overflowed */ | |
1248 | int npkts = 0; | |
1249 | struct rtdetq **p; | |
1250 | ||
1251 | for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) | |
1252 | npkts++; | |
1253 | ||
1254 | if (npkts > MAX_UPQ) { | |
1255 | mrtstat.mrts_upq_ovflw++; | |
1256 | FREE(rte, M_MRTABLE); | |
1257 | m_freem(mb0); | |
1258 | return 0; | |
1259 | } | |
1260 | ||
1261 | /* Add this entry to the end of the queue */ | |
1262 | *p = rte; | |
1263 | } | |
1264 | ||
1265 | rte->m = mb0; | |
1266 | rte->ifp = ifp; | |
1267 | #if UPCALL_TIMING | |
1268 | rte->t = tp; | |
1269 | #endif | |
1270 | rte->next = NULL; | |
1271 | ||
1272 | return 0; | |
1273 | } | |
1274 | } | |
1275 | ||
1276 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1277 | int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, | |
1278 | struct ip_moptions *) = X_ip_mforward; | |
1279 | #endif | |
1280 | ||
1281 | /* | |
1282 | * Clean up the cache entry if upcall is not serviced | |
1283 | */ | |
1284 | static void | |
1285 | expire_upcalls(__unused void *unused) | |
1286 | { | |
1287 | struct rtdetq *rte; | |
1288 | struct mfc *mfc, **nptr; | |
1289 | int i; | |
1290 | ||
1291 | for (i = 0; i < CONFIG_MFCTBLSIZ; i++) { | |
1292 | if (nexpire[i] == 0) | |
1293 | continue; | |
1294 | nptr = &mfctable[i]; | |
1295 | for (mfc = *nptr; mfc != NULL; mfc = *nptr) { | |
1296 | /* | |
1297 | * Skip real cache entries | |
1298 | * Make sure it wasn't marked to not expire (shouldn't happen) | |
1299 | * If it expires now | |
1300 | */ | |
1301 | if (mfc->mfc_stall != NULL && | |
1302 | mfc->mfc_expire != 0 && | |
1303 | --mfc->mfc_expire == 0) { | |
1304 | if (mrtdebug & DEBUG_EXPIRE) | |
1305 | log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", | |
1306 | (u_int32_t)ntohl(mfc->mfc_origin.s_addr), | |
1307 | (u_int32_t)ntohl(mfc->mfc_mcastgrp.s_addr)); | |
1308 | /* | |
1309 | * drop all the packets | |
1310 | * free the mbuf with the pkt, if, timing info | |
1311 | */ | |
1312 | for (rte = mfc->mfc_stall; rte; ) { | |
1313 | struct rtdetq *n = rte->next; | |
1314 | ||
1315 | m_freem(rte->m); | |
1316 | FREE(rte, M_MRTABLE); | |
1317 | rte = n; | |
1318 | } | |
1319 | ++mrtstat.mrts_cache_cleanups; | |
1320 | nexpire[i]--; | |
1321 | ||
1322 | *nptr = mfc->mfc_next; | |
1323 | FREE(mfc, M_MRTABLE); | |
1324 | } else { | |
1325 | nptr = &mfc->mfc_next; | |
1326 | } | |
1327 | } | |
1328 | } | |
1329 | timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); | |
1330 | } | |
1331 | ||
1332 | /* | |
1333 | * Packet forwarding routine once entry in the cache is made | |
1334 | */ | |
1335 | static int | |
1336 | ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, | |
1337 | vifi_t xmt_vif) | |
1338 | { | |
1339 | struct ip *ip = mtod(m, struct ip *); | |
1340 | vifi_t vifi; | |
1341 | struct vif *vifp; | |
1342 | int plen = ip->ip_len; | |
1343 | ||
1344 | /* | |
1345 | * Macro to send packet on vif. Since RSVP packets don't get counted on | |
1346 | * input, they shouldn't get counted on output, so statistics keeping is | |
1347 | * seperate. | |
1348 | */ | |
1349 | #define MC_SEND(ip,vifp,m) { \ | |
1350 | if ((vifp)->v_flags & VIFF_TUNNEL) \ | |
1351 | encap_send((ip), (vifp), (m)); \ | |
1352 | else \ | |
1353 | phyint_send((ip), (vifp), (m)); \ | |
1354 | } | |
1355 | ||
1356 | /* | |
1357 | * If xmt_vif is not -1, send on only the requested vif. | |
1358 | * | |
1359 | * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) | |
1360 | */ | |
1361 | if (xmt_vif < numvifs) { | |
1362 | MC_SEND(ip, viftable + xmt_vif, m); | |
1363 | return 1; | |
1364 | } | |
1365 | ||
1366 | /* | |
1367 | * Don't forward if it didn't arrive from the parent vif for its origin. | |
1368 | */ | |
1369 | vifi = rt->mfc_parent; | |
1370 | if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { | |
1371 | /* came in the wrong interface */ | |
1372 | if (mrtdebug & DEBUG_FORWARD) | |
1373 | log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n", | |
1374 | (void *)ifp, vifi, (void *)viftable[vifi].v_ifp); | |
1375 | ++mrtstat.mrts_wrong_if; | |
1376 | ++rt->mfc_wrong_if; | |
1377 | /* | |
1378 | * If we are doing PIM assert processing, and we are forwarding | |
1379 | * packets on this interface, and it is a broadcast medium | |
1380 | * interface (and not a tunnel), send a message to the routing daemon. | |
1381 | */ | |
1382 | if (pim_assert && rt->mfc_ttls[vifi] && | |
1383 | (ifp->if_flags & IFF_BROADCAST) && | |
1384 | !(viftable[vifi].v_flags & VIFF_TUNNEL)) { | |
1385 | struct sockaddr_in k_igmpsrc; | |
1386 | struct mbuf *mm; | |
1387 | struct igmpmsg *im; | |
1388 | int hlen = ip->ip_hl << 2; | |
1389 | struct timeval now; | |
1390 | u_int32_t delta; | |
1391 | ||
1392 | GET_TIME(now); | |
1393 | ||
1394 | TV_DELTA(rt->mfc_last_assert, now, delta); | |
1395 | ||
1396 | if (delta > ASSERT_MSG_TIME) { | |
1397 | mm = m_copy(m, 0, hlen); | |
1398 | if (mm && (M_HASCL(mm) || mm->m_len < hlen)) | |
1399 | mm = m_pullup(mm, hlen); | |
1400 | if (mm == NULL) { | |
1401 | return ENOBUFS; | |
1402 | } | |
1403 | ||
1404 | rt->mfc_last_assert = now; | |
1405 | ||
1406 | im = mtod(mm, struct igmpmsg *); | |
1407 | im->im_msgtype = IGMPMSG_WRONGVIF; | |
1408 | im->im_mbz = 0; | |
1409 | im->im_vif = vifi; | |
1410 | ||
1411 | k_igmpsrc.sin_addr = im->im_src; | |
1412 | ||
1413 | socket_send(ip_mrouter, mm, &k_igmpsrc); | |
1414 | } | |
1415 | } | |
1416 | return 0; | |
1417 | } | |
1418 | ||
1419 | /* If I sourced this packet, it counts as output, else it was input. */ | |
1420 | if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { | |
1421 | viftable[vifi].v_pkt_out++; | |
1422 | viftable[vifi].v_bytes_out += plen; | |
1423 | } else { | |
1424 | viftable[vifi].v_pkt_in++; | |
1425 | viftable[vifi].v_bytes_in += plen; | |
1426 | } | |
1427 | rt->mfc_pkt_cnt++; | |
1428 | rt->mfc_byte_cnt += plen; | |
1429 | ||
1430 | /* | |
1431 | * For each vif, decide if a copy of the packet should be forwarded. | |
1432 | * Forward if: | |
1433 | * - the ttl exceeds the vif's threshold | |
1434 | * - there are group members downstream on interface | |
1435 | */ | |
1436 | for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) | |
1437 | if ((rt->mfc_ttls[vifi] > 0) && | |
1438 | (ip->ip_ttl > rt->mfc_ttls[vifi])) { | |
1439 | vifp->v_pkt_out++; | |
1440 | vifp->v_bytes_out += plen; | |
1441 | MC_SEND(ip, vifp, m); | |
1442 | } | |
1443 | ||
1444 | return 0; | |
1445 | } | |
1446 | ||
1447 | /* | |
1448 | * check if a vif number is legal/ok. This is used by ip_output, to export | |
1449 | * numvifs there, | |
1450 | */ | |
1451 | static int | |
1452 | X_legal_vif_num(int vif) | |
1453 | { | |
1454 | if (vif >= 0 && vif < numvifs) | |
1455 | return(1); | |
1456 | else | |
1457 | return(0); | |
1458 | } | |
1459 | ||
1460 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1461 | int (*legal_vif_num)(int) = X_legal_vif_num; | |
1462 | #endif | |
1463 | ||
1464 | /* | |
1465 | * Return the local address used by this vif | |
1466 | */ | |
1467 | static u_int32_t | |
1468 | X_ip_mcast_src(int vifi) | |
1469 | { | |
1470 | if (vifi >= 0 && vifi < numvifs) | |
1471 | return viftable[vifi].v_lcl_addr.s_addr; | |
1472 | else | |
1473 | return INADDR_ANY; | |
1474 | } | |
1475 | ||
1476 | #if !defined(MROUTE_LKM) || !MROUTE_LKM | |
1477 | u_int32_t (*ip_mcast_src)(int) = X_ip_mcast_src; | |
1478 | #endif | |
1479 | ||
1480 | static void | |
1481 | phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m) | |
1482 | { | |
1483 | struct mbuf *mb_copy; | |
1484 | int hlen = ip->ip_hl << 2; | |
1485 | ||
1486 | /* | |
1487 | * Make a new reference to the packet; make sure that | |
1488 | * the IP header is actually copied, not just referenced, | |
1489 | * so that ip_output() only scribbles on the copy. | |
1490 | */ | |
1491 | mb_copy = m_copy(m, 0, M_COPYALL); | |
1492 | if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) | |
1493 | mb_copy = m_pullup(mb_copy, hlen); | |
1494 | if (mb_copy == NULL) | |
1495 | return; | |
1496 | ||
1497 | if (vifp->v_rate_limit == 0) | |
1498 | tbf_send_packet(vifp, mb_copy); | |
1499 | else | |
1500 | tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); | |
1501 | } | |
1502 | ||
1503 | static void | |
1504 | encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m) | |
1505 | { | |
1506 | struct mbuf *mb_copy; | |
1507 | struct ip *ip_copy; | |
1508 | int i, len = ip->ip_len; | |
1509 | ||
1510 | /* | |
1511 | * copy the old packet & pullup its IP header into the | |
1512 | * new mbuf so we can modify it. Try to fill the new | |
1513 | * mbuf since if we don't the ethernet driver will. | |
1514 | */ | |
1515 | MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER); | |
1516 | if (mb_copy == NULL) | |
1517 | return; | |
1518 | #if CONFIG_MACF_NET | |
1519 | mac_mbuf_label_associate_multicast_encap(m, vifp->v_ifp, mb_copy); | |
1520 | #endif | |
1521 | mb_copy->m_data += max_linkhdr; | |
1522 | mb_copy->m_len = sizeof(multicast_encap_iphdr); | |
1523 | ||
1524 | if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { | |
1525 | m_freem(mb_copy); | |
1526 | return; | |
1527 | } | |
1528 | i = MHLEN - M_LEADINGSPACE(mb_copy); | |
1529 | if (i > len) | |
1530 | i = len; | |
1531 | mb_copy = m_pullup(mb_copy, i); | |
1532 | if (mb_copy == NULL) | |
1533 | return; | |
1534 | mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); | |
1535 | ||
1536 | /* | |
1537 | * fill in the encapsulating IP header. | |
1538 | */ | |
1539 | ip_copy = mtod(mb_copy, struct ip *); | |
1540 | *ip_copy = multicast_encap_iphdr; | |
1541 | #if RANDOM_IP_ID | |
1542 | ip_copy->ip_id = ip_randomid(); | |
1543 | #else | |
1544 | ip_copy->ip_id = htons(ip_id++); | |
1545 | #endif | |
1546 | ip_copy->ip_len += len; | |
1547 | ip_copy->ip_src = vifp->v_lcl_addr; | |
1548 | ip_copy->ip_dst = vifp->v_rmt_addr; | |
1549 | ||
1550 | /* | |
1551 | * turn the encapsulated IP header back into a valid one. | |
1552 | */ | |
1553 | ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); | |
1554 | --ip->ip_ttl; | |
1555 | ||
1556 | #if BYTE_ORDER != BIG_ENDIAN | |
1557 | HTONS(ip->ip_len); | |
1558 | HTONS(ip->ip_off); | |
1559 | #endif | |
1560 | ||
1561 | ip->ip_sum = 0; | |
1562 | mb_copy->m_data += sizeof(multicast_encap_iphdr); | |
1563 | ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); | |
1564 | mb_copy->m_data -= sizeof(multicast_encap_iphdr); | |
1565 | ||
1566 | if (vifp->v_rate_limit == 0) | |
1567 | tbf_send_packet(vifp, mb_copy); | |
1568 | else | |
1569 | tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); | |
1570 | } | |
1571 | ||
1572 | /* | |
1573 | * De-encapsulate a packet and feed it back through ip input (this | |
1574 | * routine is called whenever IP gets a packet with proto type | |
1575 | * ENCAP_PROTO and a local destination address). | |
1576 | */ | |
1577 | void | |
1578 | #if MROUTE_LKM | |
1579 | X_ipip_input(struct mbuf *m, int iphlen) | |
1580 | #else | |
1581 | ipip_input(struct mbuf *m, int iphlen) | |
1582 | #endif | |
1583 | { | |
1584 | struct ifnet *ifp = m->m_pkthdr.rcvif; | |
1585 | struct ip *ip = mtod(m, struct ip *); | |
1586 | int hlen = ip->ip_hl << 2; | |
1587 | struct vif *vifp; | |
1588 | ||
1589 | if (!have_encap_tunnel) { | |
1590 | rip_input(m, iphlen); | |
1591 | return; | |
1592 | } | |
1593 | /* | |
1594 | * dump the packet if it's not to a multicast destination or if | |
1595 | * we don't have an encapsulating tunnel with the source. | |
1596 | * Note: This code assumes that the remote site IP address | |
1597 | * uniquely identifies the tunnel (i.e., that this site has | |
1598 | * at most one tunnel with the remote site). | |
1599 | */ | |
1600 | if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { | |
1601 | ++mrtstat.mrts_bad_tunnel; | |
1602 | m_freem(m); | |
1603 | return; | |
1604 | } | |
1605 | if (ip->ip_src.s_addr != last_encap_src) { | |
1606 | struct vif *vife; | |
1607 | ||
1608 | vifp = viftable; | |
1609 | vife = vifp + numvifs; | |
1610 | last_encap_src = ip->ip_src.s_addr; | |
1611 | last_encap_vif = 0; | |
1612 | for ( ; vifp < vife; ++vifp) | |
1613 | if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { | |
1614 | if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) | |
1615 | == VIFF_TUNNEL) | |
1616 | last_encap_vif = vifp; | |
1617 | break; | |
1618 | } | |
1619 | } | |
1620 | if ((vifp = last_encap_vif) == 0) { | |
1621 | last_encap_src = 0; | |
1622 | mrtstat.mrts_cant_tunnel++; /*XXX*/ | |
1623 | m_freem(m); | |
1624 | if (mrtdebug) | |
1625 | log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n", | |
1626 | (u_int32_t)ntohl(ip->ip_src.s_addr)); | |
1627 | return; | |
1628 | } | |
1629 | ifp = vifp->v_ifp; | |
1630 | ||
1631 | if (hlen > IP_HDR_LEN) | |
1632 | ip_stripoptions(m, (struct mbuf *) 0); | |
1633 | m->m_data += IP_HDR_LEN; | |
1634 | m->m_len -= IP_HDR_LEN; | |
1635 | m->m_pkthdr.len -= IP_HDR_LEN; | |
1636 | m->m_pkthdr.rcvif = ifp; | |
1637 | ||
1638 | proto_inject(PF_INET, m); | |
1639 | } | |
1640 | ||
1641 | /* | |
1642 | * Token bucket filter module | |
1643 | */ | |
1644 | ||
1645 | static void | |
1646 | tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip, | |
1647 | u_int32_t p_len) | |
1648 | { | |
1649 | struct tbf *t = vifp->v_tbf; | |
1650 | ||
1651 | if (p_len > MAX_BKT_SIZE) { | |
1652 | /* drop if packet is too large */ | |
1653 | mrtstat.mrts_pkt2large++; | |
1654 | m_freem(m); | |
1655 | return; | |
1656 | } | |
1657 | ||
1658 | tbf_update_tokens(vifp); | |
1659 | ||
1660 | /* if there are enough tokens, | |
1661 | * and the queue is empty, | |
1662 | * send this packet out | |
1663 | */ | |
1664 | ||
1665 | if (t->tbf_q_len == 0) { | |
1666 | /* queue empty, send packet if enough tokens */ | |
1667 | if (p_len <= t->tbf_n_tok) { | |
1668 | t->tbf_n_tok -= p_len; | |
1669 | tbf_send_packet(vifp, m); | |
1670 | } else { | |
1671 | /* queue packet and timeout till later */ | |
1672 | tbf_queue(vifp, m); | |
1673 | timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); | |
1674 | } | |
1675 | } else if (t->tbf_q_len < t->tbf_max_q_len) { | |
1676 | /* finite queue length, so queue pkts and process queue */ | |
1677 | tbf_queue(vifp, m); | |
1678 | tbf_process_q(vifp); | |
1679 | } else { | |
1680 | /* queue length too much, try to dq and queue and process */ | |
1681 | if (!tbf_dq_sel(vifp, ip)) { | |
1682 | mrtstat.mrts_q_overflow++; | |
1683 | m_freem(m); | |
1684 | return; | |
1685 | } else { | |
1686 | tbf_queue(vifp, m); | |
1687 | tbf_process_q(vifp); | |
1688 | } | |
1689 | } | |
1690 | return; | |
1691 | } | |
1692 | ||
1693 | /* | |
1694 | * adds a packet to the queue at the interface | |
1695 | */ | |
1696 | static void | |
1697 | tbf_queue(struct vif *vifp, struct mbuf *m) | |
1698 | { | |
1699 | struct tbf *t = vifp->v_tbf; | |
1700 | ||
1701 | if (t->tbf_t == NULL) { | |
1702 | /* Queue was empty */ | |
1703 | t->tbf_q = m; | |
1704 | } else { | |
1705 | /* Insert at tail */ | |
1706 | t->tbf_t->m_act = m; | |
1707 | } | |
1708 | ||
1709 | /* Set new tail pointer */ | |
1710 | t->tbf_t = m; | |
1711 | ||
1712 | #if DIAGNOSTIC | |
1713 | /* Make sure we didn't get fed a bogus mbuf */ | |
1714 | if (m->m_act) | |
1715 | panic("tbf_queue: m_act"); | |
1716 | #endif | |
1717 | m->m_act = NULL; | |
1718 | ||
1719 | t->tbf_q_len++; | |
1720 | } | |
1721 | ||
1722 | ||
1723 | /* | |
1724 | * processes the queue at the interface | |
1725 | */ | |
1726 | static void | |
1727 | tbf_process_q(struct vif *vifp) | |
1728 | { | |
1729 | struct mbuf *m; | |
1730 | int len; | |
1731 | struct tbf *t = vifp->v_tbf; | |
1732 | ||
1733 | /* loop through the queue at the interface and send as many packets | |
1734 | * as possible | |
1735 | */ | |
1736 | while (t->tbf_q_len > 0) { | |
1737 | m = t->tbf_q; | |
1738 | ||
1739 | len = mtod(m, struct ip *)->ip_len; | |
1740 | ||
1741 | /* determine if the packet can be sent */ | |
1742 | if (len <= t->tbf_n_tok) { | |
1743 | /* if so, | |
1744 | * reduce no of tokens, dequeue the packet, | |
1745 | * send the packet. | |
1746 | */ | |
1747 | t->tbf_n_tok -= len; | |
1748 | ||
1749 | t->tbf_q = m->m_act; | |
1750 | if (--t->tbf_q_len == 0) | |
1751 | t->tbf_t = NULL; | |
1752 | ||
1753 | m->m_act = NULL; | |
1754 | tbf_send_packet(vifp, m); | |
1755 | ||
1756 | } else break; | |
1757 | } | |
1758 | } | |
1759 | ||
1760 | static void | |
1761 | tbf_reprocess_q(void *xvifp) | |
1762 | { | |
1763 | struct vif *vifp = xvifp; | |
1764 | ||
1765 | if (ip_mrouter == NULL) { | |
1766 | return; | |
1767 | } | |
1768 | ||
1769 | tbf_update_tokens(vifp); | |
1770 | ||
1771 | tbf_process_q(vifp); | |
1772 | ||
1773 | if (vifp->v_tbf->tbf_q_len) | |
1774 | timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); | |
1775 | } | |
1776 | ||
1777 | /* function that will selectively discard a member of the queue | |
1778 | * based on the precedence value and the priority | |
1779 | */ | |
1780 | static int | |
1781 | tbf_dq_sel(struct vif *vifp, struct ip *ip) | |
1782 | { | |
1783 | u_int p; | |
1784 | struct mbuf *m, *last; | |
1785 | struct mbuf **np; | |
1786 | struct tbf *t = vifp->v_tbf; | |
1787 | ||
1788 | p = priority(vifp, ip); | |
1789 | ||
1790 | np = &t->tbf_q; | |
1791 | last = NULL; | |
1792 | while ((m = *np) != NULL) { | |
1793 | if (p > priority(vifp, mtod(m, struct ip *))) { | |
1794 | *np = m->m_act; | |
1795 | /* If we're removing the last packet, fix the tail pointer */ | |
1796 | if (m == t->tbf_t) | |
1797 | t->tbf_t = last; | |
1798 | m_freem(m); | |
1799 | /* it's impossible for the queue to be empty, but | |
1800 | * we check anyway. */ | |
1801 | if (--t->tbf_q_len == 0) | |
1802 | t->tbf_t = NULL; | |
1803 | mrtstat.mrts_drop_sel++; | |
1804 | return(1); | |
1805 | } | |
1806 | np = &m->m_act; | |
1807 | last = m; | |
1808 | } | |
1809 | return(0); | |
1810 | } | |
1811 | ||
1812 | static void | |
1813 | tbf_send_packet(struct vif *vifp, struct mbuf *m) | |
1814 | { | |
1815 | int error; | |
1816 | static struct route ro; | |
1817 | ||
1818 | if (vifp->v_flags & VIFF_TUNNEL) { | |
1819 | /* If tunnel options */ | |
1820 | ip_output(m, (struct mbuf *)0, &vifp->v_route, | |
1821 | IP_FORWARDING, (struct ip_moptions *)0, NULL); | |
1822 | } else { | |
1823 | struct ip_moptions *imo; | |
1824 | ||
1825 | imo = ip_allocmoptions(M_DONTWAIT); | |
1826 | if (imo == NULL) { | |
1827 | error = ENOMEM; | |
1828 | goto done; | |
1829 | } | |
1830 | ||
1831 | imo->imo_multicast_ifp = vifp->v_ifp; | |
1832 | imo->imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; | |
1833 | imo->imo_multicast_loop = 1; | |
1834 | imo->imo_multicast_vif = -1; | |
1835 | ||
1836 | /* | |
1837 | * Re-entrancy should not be a problem here, because | |
1838 | * the packets that we send out and are looped back at us | |
1839 | * should get rejected because they appear to come from | |
1840 | * the loopback interface, thus preventing looping. | |
1841 | */ | |
1842 | error = ip_output(m, (struct mbuf *)0, &ro, | |
1843 | IP_FORWARDING, imo, NULL); | |
1844 | ||
1845 | IMO_REMREF(imo); | |
1846 | done: | |
1847 | if (mrtdebug & DEBUG_XMIT) | |
1848 | log(LOG_DEBUG, "phyint_send on vif %d err %d\n", | |
1849 | vifp - viftable, error); | |
1850 | } | |
1851 | } | |
1852 | ||
1853 | /* determine the current time and then | |
1854 | * the elapsed time (between the last time and time now) | |
1855 | * in milliseconds & update the no. of tokens in the bucket | |
1856 | */ | |
1857 | static void | |
1858 | tbf_update_tokens(struct vif *vifp) | |
1859 | { | |
1860 | struct timeval tp; | |
1861 | u_int32_t tm; | |
1862 | struct tbf *t = vifp->v_tbf; | |
1863 | ||
1864 | GET_TIME(tp); | |
1865 | ||
1866 | TV_DELTA(tp, t->tbf_last_pkt_t, tm); | |
1867 | ||
1868 | /* | |
1869 | * This formula is actually | |
1870 | * "time in seconds" * "bytes/second". | |
1871 | * | |
1872 | * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) | |
1873 | * | |
1874 | * The (1000/1024) was introduced in add_vif to optimize | |
1875 | * this divide into a shift. | |
1876 | */ | |
1877 | t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; | |
1878 | t->tbf_last_pkt_t = tp; | |
1879 | ||
1880 | if (t->tbf_n_tok > MAX_BKT_SIZE) | |
1881 | t->tbf_n_tok = MAX_BKT_SIZE; | |
1882 | } | |
1883 | ||
1884 | static int | |
1885 | priority(__unused struct vif *vifp, struct ip *ip) | |
1886 | { | |
1887 | int prio; | |
1888 | ||
1889 | /* temporary hack; may add general packet classifier some day */ | |
1890 | ||
1891 | /* | |
1892 | * The UDP port space is divided up into four priority ranges: | |
1893 | * [0, 16384) : unclassified - lowest priority | |
1894 | * [16384, 32768) : audio - highest priority | |
1895 | * [32768, 49152) : whiteboard - medium priority | |
1896 | * [49152, 65536) : video - low priority | |
1897 | */ | |
1898 | if (ip->ip_p == IPPROTO_UDP) { | |
1899 | struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); | |
1900 | switch (ntohs(udp->uh_dport) & 0xc000) { | |
1901 | case 0x4000: | |
1902 | prio = 70; | |
1903 | break; | |
1904 | case 0x8000: | |
1905 | prio = 60; | |
1906 | break; | |
1907 | case 0xc000: | |
1908 | prio = 55; | |
1909 | break; | |
1910 | default: | |
1911 | prio = 50; | |
1912 | break; | |
1913 | } | |
1914 | if (tbfdebug > 1) | |
1915 | log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio); | |
1916 | } else { | |
1917 | prio = 50; | |
1918 | } | |
1919 | return prio; | |
1920 | } | |
1921 | ||
1922 | /* | |
1923 | * End of token bucket filter modifications | |
1924 | */ | |
1925 | ||
1926 | int | |
1927 | ip_rsvp_vif_init(struct socket *so, struct sockopt *sopt) | |
1928 | { | |
1929 | int error, i; | |
1930 | ||
1931 | if (rsvpdebug) | |
1932 | printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", | |
1933 | so->so_type, so->so_proto->pr_protocol); | |
1934 | ||
1935 | if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) | |
1936 | return EOPNOTSUPP; | |
1937 | ||
1938 | /* Check mbuf. */ | |
1939 | error = sooptcopyin(sopt, &i, sizeof i, sizeof i); | |
1940 | if (error) | |
1941 | return (error); | |
1942 | ||
1943 | if (rsvpdebug) | |
1944 | printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on); | |
1945 | ||
1946 | /* Check vif. */ | |
1947 | if (!legal_vif_num(i)) { | |
1948 | return EADDRNOTAVAIL; | |
1949 | } | |
1950 | ||
1951 | /* Check if socket is available. */ | |
1952 | if (viftable[i].v_rsvpd != NULL) { | |
1953 | return EADDRINUSE; | |
1954 | } | |
1955 | ||
1956 | viftable[i].v_rsvpd = so; | |
1957 | /* This may seem silly, but we need to be sure we don't over-increment | |
1958 | * the RSVP counter, in case something slips up. | |
1959 | */ | |
1960 | if (!viftable[i].v_rsvp_on) { | |
1961 | viftable[i].v_rsvp_on = 1; | |
1962 | rsvp_on++; | |
1963 | } | |
1964 | ||
1965 | return 0; | |
1966 | } | |
1967 | ||
1968 | int | |
1969 | ip_rsvp_vif_done(struct socket *so, struct sockopt *sopt) | |
1970 | { | |
1971 | int error, i; | |
1972 | ||
1973 | if (rsvpdebug) | |
1974 | printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", | |
1975 | so->so_type, so->so_proto->pr_protocol); | |
1976 | ||
1977 | if (so->so_type != SOCK_RAW || | |
1978 | so->so_proto->pr_protocol != IPPROTO_RSVP) | |
1979 | return EOPNOTSUPP; | |
1980 | ||
1981 | error = sooptcopyin(sopt, &i, sizeof i, sizeof i); | |
1982 | if (error) | |
1983 | return (error); | |
1984 | ||
1985 | /* Check vif. */ | |
1986 | if (!legal_vif_num(i)) { | |
1987 | return EADDRNOTAVAIL; | |
1988 | } | |
1989 | ||
1990 | if (rsvpdebug) | |
1991 | printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n", | |
1992 | viftable[i].v_rsvpd, so); | |
1993 | ||
1994 | viftable[i].v_rsvpd = NULL; | |
1995 | /* | |
1996 | * This may seem silly, but we need to be sure we don't over-decrement | |
1997 | * the RSVP counter, in case something slips up. | |
1998 | */ | |
1999 | if (viftable[i].v_rsvp_on) { | |
2000 | viftable[i].v_rsvp_on = 0; | |
2001 | rsvp_on--; | |
2002 | } | |
2003 | ||
2004 | return 0; | |
2005 | } | |
2006 | ||
2007 | void | |
2008 | ip_rsvp_force_done(struct socket *so) | |
2009 | { | |
2010 | int vifi; | |
2011 | ||
2012 | /* Don't bother if it is not the right type of socket. */ | |
2013 | if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) | |
2014 | return; | |
2015 | ||
2016 | /* The socket may be attached to more than one vif...this | |
2017 | * is perfectly legal. | |
2018 | */ | |
2019 | for (vifi = 0; vifi < numvifs; vifi++) { | |
2020 | if (viftable[vifi].v_rsvpd == so) { | |
2021 | viftable[vifi].v_rsvpd = NULL; | |
2022 | /* This may seem silly, but we need to be sure we don't | |
2023 | * over-decrement the RSVP counter, in case something slips up. | |
2024 | */ | |
2025 | if (viftable[vifi].v_rsvp_on) { | |
2026 | viftable[vifi].v_rsvp_on = 0; | |
2027 | rsvp_on--; | |
2028 | } | |
2029 | } | |
2030 | } | |
2031 | ||
2032 | return; | |
2033 | } | |
2034 | ||
2035 | void | |
2036 | rsvp_input(struct mbuf *m, int iphlen) | |
2037 | { | |
2038 | int vifi; | |
2039 | struct ip *ip = mtod(m, struct ip *); | |
2040 | static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET, | |
2041 | 0 , {0}, {0,0,0,0,0,0,0,0,} }; | |
2042 | struct ifnet *ifp; | |
2043 | ||
2044 | if (rsvpdebug) | |
2045 | printf("rsvp_input: rsvp_on %d\n",rsvp_on); | |
2046 | ||
2047 | /* Can still get packets with rsvp_on = 0 if there is a local member | |
2048 | * of the group to which the RSVP packet is addressed. But in this | |
2049 | * case we want to throw the packet away. | |
2050 | */ | |
2051 | if (!rsvp_on) { | |
2052 | m_freem(m); | |
2053 | return; | |
2054 | } | |
2055 | ||
2056 | if (rsvpdebug) | |
2057 | printf("rsvp_input: check vifs\n"); | |
2058 | ||
2059 | #if DIAGNOSTIC | |
2060 | if (!(m->m_flags & M_PKTHDR)) | |
2061 | panic("rsvp_input no hdr"); | |
2062 | #endif | |
2063 | ||
2064 | ifp = m->m_pkthdr.rcvif; | |
2065 | /* Find which vif the packet arrived on. */ | |
2066 | for (vifi = 0; vifi < numvifs; vifi++) | |
2067 | if (viftable[vifi].v_ifp == ifp) | |
2068 | break; | |
2069 | ||
2070 | if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { | |
2071 | /* | |
2072 | * If the old-style non-vif-associated socket is set, | |
2073 | * then use it. Otherwise, drop packet since there | |
2074 | * is no specific socket for this vif. | |
2075 | */ | |
2076 | if (ip_rsvpd != NULL) { | |
2077 | if (rsvpdebug) | |
2078 | printf("rsvp_input: Sending packet up old-style socket\n"); | |
2079 | rip_input(m, iphlen); /* xxx */ | |
2080 | } else { | |
2081 | if (rsvpdebug && vifi == numvifs) | |
2082 | printf("rsvp_input: Can't find vif for packet.\n"); | |
2083 | else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) | |
2084 | printf("rsvp_input: No socket defined for vif %d\n",vifi); | |
2085 | m_freem(m); | |
2086 | } | |
2087 | return; | |
2088 | } | |
2089 | rsvp_src.sin_addr = ip->ip_src; | |
2090 | ||
2091 | if (rsvpdebug && m) | |
2092 | printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", | |
2093 | m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); | |
2094 | ||
2095 | if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { | |
2096 | if (rsvpdebug) | |
2097 | printf("rsvp_input: Failed to append to socket\n"); | |
2098 | } else { | |
2099 | if (rsvpdebug) | |
2100 | printf("rsvp_input: send packet up\n"); | |
2101 | } | |
2102 | ||
2103 | } | |
2104 | ||
2105 | #if MROUTE_LKM | |
2106 | #include <sys/conf.h> | |
2107 | #include <sys/exec.h> | |
2108 | #include <sys/sysent.h> | |
2109 | #include <sys/lkm.h> | |
2110 | ||
2111 | MOD_MISC("ip_mroute_mod") | |
2112 | ||
2113 | static int | |
2114 | ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) | |
2115 | { | |
2116 | int i; | |
2117 | struct lkm_misc *args = lkmtp->private.lkm_misc; | |
2118 | int err = 0; | |
2119 | ||
2120 | switch(cmd) { | |
2121 | static int (*old_ip_mrouter_cmd)(); | |
2122 | static int (*old_ip_mrouter_done)(); | |
2123 | static int (*old_ip_mforward)(); | |
2124 | static int (*old_mrt_ioctl)(); | |
2125 | static void (*old_proto4_input)(); | |
2126 | static int (*old_legal_vif_num)(); | |
2127 | extern struct protosw inetsw[]; | |
2128 | ||
2129 | case LKM_E_LOAD: | |
2130 | if(lkmexists(lkmtp) || ip_mrtproto) | |
2131 | return(EEXIST); | |
2132 | old_ip_mrouter_cmd = ip_mrouter_cmd; | |
2133 | ip_mrouter_cmd = X_ip_mrouter_cmd; | |
2134 | old_ip_mrouter_done = ip_mrouter_done; | |
2135 | ip_mrouter_done = X_ip_mrouter_done; | |
2136 | old_ip_mforward = ip_mforward; | |
2137 | ip_mforward = X_ip_mforward; | |
2138 | old_mrt_ioctl = mrt_ioctl; | |
2139 | mrt_ioctl = X_mrt_ioctl; | |
2140 | old_proto4_input = ip_protox[ENCAP_PROTO]->pr_input; | |
2141 | ip_protox[ENCAP_PROTO]->pr_input = X_ipip_input; | |
2142 | old_legal_vif_num = legal_vif_num; | |
2143 | legal_vif_num = X_legal_vif_num; | |
2144 | ip_mrtproto = IGMP_DVMRP; | |
2145 | ||
2146 | printf("\nIP multicast routing loaded\n"); | |
2147 | break; | |
2148 | ||
2149 | case LKM_E_UNLOAD: | |
2150 | if (ip_mrouter) | |
2151 | return EINVAL; | |
2152 | ||
2153 | ip_mrouter_cmd = old_ip_mrouter_cmd; | |
2154 | ip_mrouter_done = old_ip_mrouter_done; | |
2155 | ip_mforward = old_ip_mforward; | |
2156 | mrt_ioctl = old_mrt_ioctl; | |
2157 | ip_protox[ENCAP_PROTO]->pr_input = old_proto4_input; | |
2158 | legal_vif_num = old_legal_vif_num; | |
2159 | ip_mrtproto = 0; | |
2160 | break; | |
2161 | ||
2162 | default: | |
2163 | err = EINVAL; | |
2164 | break; | |
2165 | } | |
2166 | ||
2167 | return(err); | |
2168 | } | |
2169 | ||
2170 | int | |
2171 | ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { | |
2172 | DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, | |
2173 | nosys); | |
2174 | } | |
2175 | ||
2176 | #endif /* MROUTE_LKM */ | |
2177 | #endif /* MROUTING */ |