bsd/netinet/ip_mroute.c

   1 /*
   2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  30  * support for mandatory and extensible security protections.  This notice
  31  * is included in support of clause 2.2 (b) of the Apple Public License,
  32  * Version 2.0.
  33  */
  34 /*
  35  * IP multicast forwarding procedures
  36  *
  37  * Written by David Waitzman, BBN Labs, August 1988.
  38  * Modified by Steve Deering, Stanford, February 1989.
  39  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  40  * Modified by Van Jacobson, LBL, January 1993
  41  * Modified by Ajit Thyagarajan, PARC, August 1993
  42  * Modified by Bill Fenner, PARC, April 1995
  43  *
  44  * MROUTING Revision: 3.5
  45  * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.56.2.2 2001/07/19 06:37:26 kris Exp $
  46  */
  47
  48
  49 #include <sys/param.h>
  50 #include <sys/systm.h>
  51 #include <sys/malloc.h>
  52 #include <sys/mbuf.h>
  53 #include <sys/socket.h>
  54 #include <sys/socketvar.h>
  55 #include <sys/protosw.h>
  56 #include <sys/time.h>
  57 #include <sys/kernel.h>
  58 #include <sys/sockio.h>
  59 #include <sys/syslog.h>
  60
  61 #include <machine/endian.h>
  62
  63 #include <net/if.h>
  64 #include <net/route.h>
  65 #include <net/kpi_protocol.h>
  66 #include <netinet/in.h>
  67 #include <netinet/in_systm.h>
  68 #include <netinet/ip.h>
  69 #include <netinet/ip_var.h>
  70 #include <netinet/in_var.h>
  71 #include <netinet/igmp.h>
  72 #include <netinet/ip_mroute.h>
  73 #include <netinet/udp.h>
  74
  75 #if CONFIG_MACF_NET
  76 #include <security/mac_framework.h>
  77 #endif
  78
  79
  80 #if !MROUTING
  81 extern u_int32_t        _ip_mcast_src(int vifi);
  82 extern int      _ip_mforward(struct ip *ip, struct ifnet *ifp,
  83                                   struct mbuf *m, struct ip_moptions *imo);
  84 extern int      _ip_mrouter_done(void);
  85 extern int      _ip_mrouter_get(struct socket *so, struct sockopt *sopt);
  86 extern int      _ip_mrouter_set(struct socket *so, struct sockopt *sopt);
  87 extern int      _mrt_ioctl(int req, caddr_t data, struct proc *p);
  88
  89 /*
  90  * Dummy routines and globals used when multicast routing is not compiled in.
  91  */
  92
  93 struct socket  *ip_mrouter  = NULL;
  94 u_int           rsvpdebug = 0;
  95
  96 int
  97 _ip_mrouter_set(__unused struct socket *so,
  98                 __unused struct sockopt *sopt)
  99 {
 100         return(EOPNOTSUPP);
 101 }
 102
 103 int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set;
 104
 105
 106 int
 107 _ip_mrouter_get(__unused struct socket *so,
 108                 __unused sockopt *sopt)
 109 {
 110         return(EOPNOTSUPP);
 111 }
 112
 113 int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get;
 114
 115 int
 116 _ip_mrouter_done(void)
 117 {
 118         return(0);
 119 }
 120
 121 int (*ip_mrouter_done)(void) = _ip_mrouter_done;
 122
 123 int
 124 _ip_mforward(__unused struct ip *ip, __unused struct ifnet *ifp,
 125              __unused struct mbuf *m, __unused ip_moptions *imo)
 126 {
 127         return(0);
 128 }
 129
 130 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 131                    struct ip_moptions *) = _ip_mforward;
 132
 133 int
 134 _mrt_ioctl(__unused u_long req, __unused caddr_t data, __unused struct proc *p)
 135 {
 136         return EOPNOTSUPP;
 137 }
 138
 139 int (*mrt_ioctl)(u_long, caddr_t, struct proc *) = _mrt_ioctl;
 140
 141 void
 142 rsvp_input(struct mbuf *m, int iphlen)          /* XXX must fixup manually */
 143 {
 144     /* Can still get packets with rsvp_on = 0 if there is a local member
 145      * of the group to which the RSVP packet is addressed.  But in this
 146      * case we want to throw the packet away.
 147      */
 148     if (!rsvp_on) {
 149         m_freem(m);
 150         return;
 151     }
 152
 153     if (ip_rsvpd != NULL) {
 154         if (rsvpdebug)
 155             printf("rsvp_input: Sending packet up old-style socket\n");
 156         rip_input(m, iphlen);
 157         return;
 158     }
 159     /* Drop the packet */
 160     m_freem(m);
 161 }
 162
 163 void ipip_input(struct mbuf *m, int iphlen) { /* XXX must fixup manually */
 164         rip_input(m, iphlen);
 165 }
 166
 167 int (*legal_vif_num)(int) = 0;
 168
 169 /*
 170  * This should never be called, since IP_MULTICAST_VIF should fail, but
 171  * just in case it does get called, the code a little lower in ip_output
 172  * will assign the packet a local address.
 173  */
 174 u_int32_t
 175 _ip_mcast_src(int vifi) { return INADDR_ANY; }
 176 u_int32_t (*ip_mcast_src)(int) = _ip_mcast_src;
 177
 178 int
 179 ip_rsvp_vif_init(so, sopt)
 180     struct socket *so;
 181     struct sockopt *sopt;
 182 {
 183     return(EINVAL);
 184 }
 185
 186 int
 187 ip_rsvp_vif_done(so, sopt)
 188     struct socket *so;
 189     struct sockopt *sopt;
 190 {
 191     return(EINVAL);
 192 }
 193
 194 void
 195 ip_rsvp_force_done(so)
 196     struct socket *so;
 197 {
 198     return;
 199 }
 200
 201 #else /* MROUTING */
 202
 203 #define M_HASCL(m)      ((m)->m_flags & M_EXT)
 204
 205 #define INSIZ           sizeof(struct in_addr)
 206 #define same(a1, a2) \
 207         (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
 208
 209
 210 /*
 211  * Globals.  All but ip_mrouter and ip_mrtproto could be static,
 212  * except for netstat or debugging purposes.
 213  */
 214 #ifndef MROUTE_LKM
 215 struct socket  *ip_mrouter  = NULL;
 216 static struct mrtstat   mrtstat;
 217 #else /* MROUTE_LKM */
 218 extern void     X_ipip_input(struct mbuf *m, int iphlen);
 219 extern struct mrtstat mrtstat;
 220 static int ip_mrtproto;
 221 #endif
 222
 223 #define NO_RTE_FOUND    0x1
 224 #define RTE_FOUND       0x2
 225
 226 static struct mfc       *mfctable[CONFIG_MFCTBLSIZ];
 227 static u_char           nexpire[CONFIG_MFCTBLSIZ];
 228 static struct vif       viftable[CONFIG_MAXVIFS];
 229 static u_int    mrtdebug = 0;     /* debug level        */
 230 #define         DEBUG_MFC       0x02
 231 #define         DEBUG_FORWARD   0x04
 232 #define         DEBUG_EXPIRE    0x08
 233 #define         DEBUG_XMIT      0x10
 234 static u_int    tbfdebug = 0;     /* tbf debug level    */
 235 static u_int    rsvpdebug = 0;    /* rsvp debug level   */
 236
 237 #define         EXPIRE_TIMEOUT  (hz / 4)        /* 4x / second          */
 238 #define         UPCALL_EXPIRE   6               /* number of timeouts   */
 239
 240 /*
 241  * Define the token bucket filter structures
 242  * tbftable -> each vif has one of these for storing info
 243  */
 244
 245 static struct tbf tbftable[CONFIG_MAXVIFS];
 246 #define         TBF_REPROCESS   (hz / 100)      /* 100x / second */
 247
 248 /*
 249  * 'Interfaces' associated with decapsulator (so we can tell
 250  * packets that went through it from ones that get reflected
 251  * by a broken gateway).  These interfaces are never linked into
 252  * the system ifnet list & no routes point to them.  I.e., packets
 253  * can't be sent this way.  They only exist as a placeholder for
 254  * multicast source verification.
 255  */
 256 static struct ifnet multicast_decap_if[CONFIG_MAXVIFS];
 257
 258 #define ENCAP_TTL 64
 259 #define ENCAP_PROTO IPPROTO_IPIP        /* 4 */
 260
 261 /* prototype IP hdr for encapsulated packets */
 262 static struct ip multicast_encap_iphdr = {
 263 #if BYTE_ORDER == LITTLE_ENDIAN
 264         sizeof(struct ip) >> 2, IPVERSION,
 265 #else
 266         IPVERSION, sizeof(struct ip) >> 2,
 267 #endif
 268         0,                              /* tos */
 269         sizeof(struct ip),              /* total length */
 270         0,                              /* id */
 271         0,                              /* frag offset */
 272         ENCAP_TTL, ENCAP_PROTO,
 273         0,                              /* checksum */
 274         { 0 }, { 0 }
 275 };
 276
 277 /*
 278  * Private variables.
 279  */
 280 static vifi_t      numvifs = 0;
 281 static int have_encap_tunnel = 0;
 282
 283 /*
 284  * one-back cache used by ipip_input to locate a tunnel's vif
 285  * given a datagram's src ip address.
 286  */
 287 static u_int32_t last_encap_src;
 288 static struct vif *last_encap_vif;
 289
 290 static u_int32_t        X_ip_mcast_src(int vifi);
 291 static int      X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo);
 292 static int      X_ip_mrouter_done(void);
 293 static int      X_ip_mrouter_get(struct socket *so, struct sockopt *m);
 294 static int      X_ip_mrouter_set(struct socket *so, struct sockopt *m);
 295 static int      X_legal_vif_num(int vif);
 296 static int      X_mrt_ioctl(u_long cmd, caddr_t data);
 297
 298 static int get_sg_cnt(struct sioc_sg_req *);
 299 static int get_vif_cnt(struct sioc_vif_req *);
 300 static int ip_mrouter_init(struct socket *, int);
 301 static int add_vif(struct vifctl *);
 302 static int del_vif(vifi_t);
 303 static int add_mfc(struct mfcctl *);
 304 static int del_mfc(struct mfcctl *);
 305 static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
 306 static int set_assert(int);
 307 static void expire_upcalls(void *);
 308 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
 309                   vifi_t);
 310 static void phyint_send(struct ip *, struct vif *, struct mbuf *);
 311 static void encap_send(struct ip *, struct vif *, struct mbuf *);
 312 static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_int32_t);
 313 static void tbf_queue(struct vif *, struct mbuf *);
 314 static void tbf_process_q(struct vif *);
 315 static void tbf_reprocess_q(void *);
 316 static int tbf_dq_sel(struct vif *, struct ip *);
 317 static void tbf_send_packet(struct vif *, struct mbuf *);
 318 static void tbf_update_tokens(struct vif *);
 319 static int priority(struct vif *, struct ip *);
 320 void multiencap_decap(struct mbuf *);
 321
 322 /*
 323  * whether or not special PIM assert processing is enabled.
 324  */
 325 static int pim_assert;
 326 /*
 327  * Rate limit for assert notification messages, in usec
 328  */
 329 #define ASSERT_MSG_TIME         3000000
 330
 331 /*
 332  * Hash function for a source, group entry
 333  */
 334 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
 335                         ((g) >> 20) ^ ((g) >> 10) ^ (g))
 336
 337 /*
 338  * Find a route for a given origin IP address and Multicast group address
 339  * Type of service parameter to be added in the future!!!
 340  */
 341
 342 #define MFCFIND(o, g, rt) { \
 343         struct mfc *_rt = mfctable[MFCHASH(o,g)]; \
 344         rt = NULL; \
 345         ++mrtstat.mrts_mfc_lookups; \
 346         while (_rt) { \
 347                 if ((_rt->mfc_origin.s_addr == o) && \
 348                     (_rt->mfc_mcastgrp.s_addr == g) && \
 349                     (_rt->mfc_stall == NULL)) { \
 350                         rt = _rt; \
 351                         break; \
 352                 } \
 353                 _rt = _rt->mfc_next; \
 354         } \
 355         if (rt == NULL) { \
 356                 ++mrtstat.mrts_mfc_misses; \
 357         } \
 358 }
 359
 360
 361 /*
 362  * Macros to compute elapsed time efficiently
 363  * Borrowed from Van Jacobson's scheduling code
 364  */
 365 #define TV_DELTA(a, b, delta) { \
 366             int xxs; \
 367                 \
 368             delta = (a).tv_usec - (b).tv_usec; \
 369             if ((xxs = (a).tv_sec - (b).tv_sec)) { \
 370                switch (xxs) { \
 371                       case 2: \
 372                           delta += 1000000; \
 373                               /* fall through */ \
 374                       case 1: \
 375                           delta += 1000000; \
 376                           break; \
 377                       default: \
 378                           delta += (1000000 * xxs); \
 379                } \
 380             } \
 381 }
 382
 383 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 384               (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 385
 386 #if UPCALL_TIMING
 387 u_int32_t upcall_data[51];
 388 static void collate(struct timeval *);
 389 #endif /* UPCALL_TIMING */
 390
 391
 392 /*
 393  * Handle MRT setsockopt commands to modify the multicast routing tables.
 394  */
 395 static int
 396 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 397 {
 398         int     error, optval;
 399         vifi_t  vifi;
 400         struct  vifctl vifc;
 401         struct  mfcctl mfc;
 402
 403         if (so != ip_mrouter && sopt->sopt_name != MRT_INIT)
 404                 return (EPERM);
 405
 406         error = 0;
 407         switch (sopt->sopt_name) {
 408         case MRT_INIT:
 409                 error = sooptcopyin(sopt, &optval, sizeof optval,
 410                                     sizeof optval);
 411                 if (error)
 412                         break;
 413                 error = ip_mrouter_init(so, optval);
 414                 break;
 415
 416         case MRT_DONE:
 417                 error = ip_mrouter_done();
 418                 break;
 419
 420         case MRT_ADD_VIF:
 421                 error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 422                 if (error)
 423                         break;
 424                 error = add_vif(&vifc);
 425                 break;
 426
 427         case MRT_DEL_VIF:
 428                 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 429                 if (error)
 430                         break;
 431                 error = del_vif(vifi);
 432                 break;
 433
 434         case MRT_ADD_MFC:
 435         case MRT_DEL_MFC:
 436                 error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc);
 437                 if (error)
 438                         break;
 439                 if (sopt->sopt_name == MRT_ADD_MFC)
 440                         error = add_mfc(&mfc);
 441                 else
 442                         error = del_mfc(&mfc);
 443                 break;
 444
 445         case MRT_ASSERT:
 446                 error = sooptcopyin(sopt, &optval, sizeof optval,
 447                                     sizeof optval);
 448                 if (error)
 449                         break;
 450                 set_assert(optval);
 451                 break;
 452
 453         default:
 454                 error = EOPNOTSUPP;
 455                 break;
 456         }
 457         return (error);
 458 }
 459
 460 #if !defined(MROUTE_LKM) || !MROUTE_LKM
 461 int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set;
 462 #endif
 463
 464 /*
 465  * Handle MRT getsockopt commands
 466  */
 467 static int
 468 X_ip_mrouter_get(__unused struct socket *so, struct sockopt *sopt)
 469 {
 470         int error;
 471         static int vers = 0x0305; /* !!! why is this here? XXX */
 472
 473         switch (sopt->sopt_name) {
 474         case MRT_VERSION:
 475                 error = sooptcopyout(sopt, &vers, sizeof vers);
 476                 break;
 477
 478         case MRT_ASSERT:
 479                 error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
 480                 break;
 481         default:
 482                 error = EOPNOTSUPP;
 483                 break;
 484         }
 485         return (error);
 486 }
 487
 488 #if !defined(MROUTE_LKM) || !MROUTE_LKM
 489 int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get;
 490 #endif
 491
 492 /*
 493  * Handle ioctl commands to obtain information from the cache
 494  */
 495 static int
 496 X_mrt_ioctl(u_long cmd, caddr_t data)
 497 {
 498     int error = 0;
 499
 500     switch (cmd) {
 501         case (SIOCGETVIFCNT):
 502             return (get_vif_cnt((struct sioc_vif_req *)data));
 503             break;
 504         case (SIOCGETSGCNT):
 505             return (get_sg_cnt((struct sioc_sg_req *)data));
 506             break;
 507         default:
 508             return (EINVAL);
 509             break;
 510     }
 511     return error;
 512 }
 513
 514 #if !defined(MROUTE_LKM) || !MROUTE_LKM
 515 int (*mrt_ioctl)(u_long, caddr_t) = X_mrt_ioctl;
 516 #endif
 517
 518 /*
 519  * returns the packet, byte, rpf-failure count for the source group provided
 520  */
 521 static int
 522 get_sg_cnt(struct sioc_sg_req *req)
 523 {
 524     struct mfc *rt;
 525
 526     MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
 527     if (rt != NULL) {
 528         req->pktcnt = rt->mfc_pkt_cnt;
 529         req->bytecnt = rt->mfc_byte_cnt;
 530         req->wrong_if = rt->mfc_wrong_if;
 531     } else
 532         req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 533
 534     return 0;
 535 }
 536
 537 /*
 538  * returns the input and output packet and byte counts on the vif provided
 539  */
 540 static int
 541 get_vif_cnt(struct sioc_vif_req *req)
 542 {
 543     vifi_t vifi = req->vifi;
 544
 545     if (vifi >= numvifs) return EINVAL;
 546
 547     req->icount = viftable[vifi].v_pkt_in;
 548     req->ocount = viftable[vifi].v_pkt_out;
 549     req->ibytes = viftable[vifi].v_bytes_in;
 550     req->obytes = viftable[vifi].v_bytes_out;
 551
 552     return 0;
 553 }
 554
 555 /*
 556  * Enable multicast routing
 557  */
 558 static int
 559 ip_mrouter_init(struct socket *so, int vers)
 560 {
 561     if (mrtdebug)
 562         log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
 563                 so->so_type, so->so_proto->pr_protocol);
 564
 565     if (so->so_type != SOCK_RAW ||
 566         so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
 567
 568     if (vers != 1)
 569         return ENOPROTOOPT;
 570
 571     if (ip_mrouter != NULL) return EADDRINUSE;
 572
 573     ip_mrouter = so;
 574
 575     bzero((caddr_t)mfctable, sizeof(mfctable));
 576     bzero((caddr_t)nexpire, sizeof(nexpire));
 577
 578     pim_assert = 0;
 579
 580     timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
 581
 582     if (mrtdebug)
 583         log(LOG_DEBUG, "ip_mrouter_init\n");
 584
 585     return 0;
 586 }
 587
 588 /*
 589  * Disable multicast routing
 590  */
 591 static int
 592 X_ip_mrouter_done(void)
 593 {
 594     vifi_t vifi;
 595     int i;
 596     struct ifnet *ifp;
 597     struct ifreq ifr;
 598     struct mfc *rt;
 599     struct rtdetq *rte;
 600
 601     /*
 602      * For each phyint in use, disable promiscuous reception of all IP
 603      * multicasts.
 604      */
 605     for (vifi = 0; vifi < numvifs; vifi++) {
 606         if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
 607             !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
 608             ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
 609             ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
 610                                                                 = INADDR_ANY;
 611             ifp = viftable[vifi].v_ifp;
 612             if_allmulti(ifp, 0);
 613         }
 614     }
 615     bzero((caddr_t)tbftable, sizeof(tbftable));
 616     bzero((caddr_t)viftable, sizeof(viftable));
 617     numvifs = 0;
 618     pim_assert = 0;
 619
 620     untimeout(expire_upcalls, (caddr_t)NULL);
 621
 622     /*
 623      * Free all multicast forwarding cache entries.
 624      */
 625     for (i = 0; i < CONFIG_MFCTBLSIZ; i++) {
 626         for (rt = mfctable[i]; rt != NULL; ) {
 627             struct mfc *nr = rt->mfc_next;
 628
 629             for (rte = rt->mfc_stall; rte != NULL; ) {
 630                 struct rtdetq *n = rte->next;
 631
 632                 m_freem(rte->m);
 633                 FREE(rte, M_MRTABLE);
 634                 rte = n;
 635             }
 636             FREE(rt, M_MRTABLE);
 637             rt = nr;
 638         }
 639     }
 640
 641     bzero((caddr_t)mfctable, sizeof(mfctable));
 642
 643     /*
 644      * Reset de-encapsulation cache
 645      */
 646     last_encap_src = 0;
 647     last_encap_vif = NULL;
 648     have_encap_tunnel = 0;
 649
 650     ip_mrouter = NULL;
 651
 652     if (mrtdebug)
 653         log(LOG_DEBUG, "ip_mrouter_done\n");
 654
 655     return 0;
 656 }
 657
 658 #if !defined(MROUTE_LKM) || !MROUTE_LKM
 659 int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
 660 #endif
 661
 662 /*
 663  * Set PIM assert processing global
 664  */
 665 static int
 666 set_assert(int i)
 667 {
 668     if ((i != 1) && (i != 0))
 669         return EINVAL;
 670
 671     pim_assert = i;
 672
 673     return 0;
 674 }
 675
 676 /*
 677  * Add a vif to the vif table
 678  */
 679 static int
 680 add_vif(struct vifctl *vifcp)
 681 {
 682     struct vif *vifp = viftable + vifcp->vifc_vifi;
 683     static struct sockaddr_in sin = { sizeof sin, AF_INET,
 684                                                                                 0 , {0}, {0,0,0,0,0,0,0,0,} };
 685     struct ifaddr *ifa;
 686     struct ifnet *ifp;
 687     int error, s;
 688     struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
 689
 690     if (vifcp->vifc_vifi >= CONFIG_MAXVIFS)  return EINVAL;
 691     if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
 692
 693     /* Find the interface with an address in AF_INET family */
 694     sin.sin_addr = vifcp->vifc_lcl_addr;
 695     ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 696     if (ifa == 0) return EADDRNOTAVAIL;
 697     ifp = ifa->ifa_ifp;
 698     IFA_REMREF(ifa);
 699     ifa = NULL;
 700
 701     if (vifcp->vifc_flags & VIFF_TUNNEL) {
 702         if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
 703                 /*
 704                  * An encapsulating tunnel is wanted.  Tell ipip_input() to
 705                  * start paying attention to encapsulated packets.
 706                  */
 707                 if (have_encap_tunnel == 0) {
 708                         have_encap_tunnel = 1;
 709                         for (s = 0; s < CONFIG_MAXVIFS; ++s) {
 710                                 multicast_decap_if[s].if_name = "mdecap";
 711                                 multicast_decap_if[s].if_unit = s;
 712                                 multicast_decap_if[s].if_family = APPLE_IF_FAM_MDECAP;
 713                         }
 714                 }
 715                 /*
 716                  * Set interface to fake encapsulator interface
 717                  */
 718                 ifp = &multicast_decap_if[vifcp->vifc_vifi];
 719                 /*
 720                  * Prepare cached route entry
 721                  */
 722                 bzero(&vifp->v_route, sizeof(vifp->v_route));
 723         } else {
 724             log(LOG_ERR, "source routed tunnels not supported\n");
 725             return EOPNOTSUPP;
 726         }
 727     } else {
 728         /* Make sure the interface supports multicast */
 729         if ((ifp->if_flags & IFF_MULTICAST) == 0)
 730             return EOPNOTSUPP;
 731
 732         /* Enable promiscuous reception of all IP multicasts from the if */
 733         error = if_allmulti(ifp, 1);
 734         if (error)
 735             return error;
 736     }
 737
 738     /* define parameters for the tbf structure */
 739     vifp->v_tbf = v_tbf;
 740     GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
 741     vifp->v_tbf->tbf_n_tok = 0;
 742     vifp->v_tbf->tbf_q_len = 0;
 743     vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
 744     vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
 745
 746     vifp->v_flags     = vifcp->vifc_flags;
 747     vifp->v_threshold = vifcp->vifc_threshold;
 748     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
 749     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
 750     vifp->v_ifp       = ifp;
 751     /* scaling up here allows division by 1024 in critical code */
 752     vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
 753     vifp->v_rsvp_on   = 0;
 754     vifp->v_rsvpd     = NULL;
 755     /* initialize per vif pkt counters */
 756     vifp->v_pkt_in    = 0;
 757     vifp->v_pkt_out   = 0;
 758     vifp->v_bytes_in  = 0;
 759     vifp->v_bytes_out = 0;
 760
 761     /* Adjust numvifs up if the vifi is higher than numvifs */
 762     if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
 763
 764     if (mrtdebug)
 765         log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n",
 766             vifcp->vifc_vifi,
 767             (u_int32_t)ntohl(vifcp->vifc_lcl_addr.s_addr),
 768             (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
 769             (u_int32_t)ntohl(vifcp->vifc_rmt_addr.s_addr),
 770             vifcp->vifc_threshold,
 771             vifcp->vifc_rate_limit);
 772
 773     return 0;
 774 }
 775
 776 /*
 777  * Delete a vif from the vif table
 778  */
 779 static int
 780 del_vif(vifi_t vifi)
 781 {
 782     struct vif *vifp = &viftable[vifi];
 783     struct mbuf *m;
 784     struct ifnet *ifp;
 785     struct ifreq ifr;
 786
 787     if (vifi >= numvifs) return EINVAL;
 788     if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
 789
 790     if (!(vifp->v_flags & VIFF_TUNNEL)) {
 791         ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
 792         ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
 793         ifp = vifp->v_ifp;
 794         if_allmulti(ifp, 0);
 795     }
 796
 797     if (vifp == last_encap_vif) {
 798         last_encap_vif = 0;
 799         last_encap_src = 0;
 800     }
 801
 802     /*
 803      * Free packets queued at the interface
 804      */
 805     while (vifp->v_tbf->tbf_q) {
 806         m = vifp->v_tbf->tbf_q;
 807         vifp->v_tbf->tbf_q = m->m_act;
 808         m_freem(m);
 809     }
 810
 811     bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
 812     bzero((caddr_t)vifp, sizeof (*vifp));
 813
 814     if (mrtdebug)
 815       log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs);
 816
 817     /* Adjust numvifs down */
 818     for (vifi = numvifs; vifi > 0; vifi--)
 819         if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
 820     numvifs = vifi;
 821
 822     return 0;
 823 }
 824
 825 /*
 826  * Add an mfc entry
 827  */
 828 static int
 829 add_mfc(struct mfcctl *mfccp)
 830 {
 831     struct mfc *rt;
 832     u_int32_t hash;
 833     struct rtdetq *rte;
 834     u_short nstl;
 835     int i;
 836
 837     MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
 838
 839     /* If an entry already exists, just update the fields */
 840     if (rt) {
 841         if (mrtdebug & DEBUG_MFC)
 842             log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n",
 843                 (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr),
 844                 (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 845                 mfccp->mfcc_parent);
 846
 847         rt->mfc_parent = mfccp->mfcc_parent;
 848         for (i = 0; i < numvifs; i++)
 849             rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 850         return 0;
 851     }
 852
 853     /*
 854      * Find the entry for which the upcall was made and update
 855      */
 856     hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
 857     for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
 858
 859         if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 860             (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
 861             (rt->mfc_stall != NULL)) {
 862
 863             if (nstl++)
 864                 log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
 865                     "multiple kernel entries",
 866                     (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr),
 867                     (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 868                     mfccp->mfcc_parent, (void *)rt->mfc_stall);
 869
 870             if (mrtdebug & DEBUG_MFC)
 871                 log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
 872                     (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr),
 873                     (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 874                     mfccp->mfcc_parent, (void *)rt->mfc_stall);
 875
 876             rt->mfc_origin     = mfccp->mfcc_origin;
 877             rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 878             rt->mfc_parent     = mfccp->mfcc_parent;
 879             for (i = 0; i < numvifs; i++)
 880                 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 881             /* initialize pkt counters per src-grp */
 882             rt->mfc_pkt_cnt    = 0;
 883             rt->mfc_byte_cnt   = 0;
 884             rt->mfc_wrong_if   = 0;
 885             rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 886
 887             rt->mfc_expire = 0; /* Don't clean this guy up */
 888             nexpire[hash]--;
 889
 890             /* free packets Qed at the end of this entry */
 891             for (rte = rt->mfc_stall; rte != NULL; ) {
 892                 struct rtdetq *n = rte->next;
 893
 894                 ip_mdq(rte->m, rte->ifp, rt, -1);
 895                 m_freem(rte->m);
 896 #if UPCALL_TIMING
 897                 collate(&(rte->t));
 898 #endif /* UPCALL_TIMING */
 899                 FREE(rte, M_MRTABLE);
 900                 rte = n;
 901             }
 902             rt->mfc_stall = NULL;
 903         }
 904     }
 905
 906     /*
 907      * It is possible that an entry is being inserted without an upcall
 908      */
 909     if (nstl == 0) {
 910         if (mrtdebug & DEBUG_MFC)
 911             log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
 912                 hash, (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr),
 913                 (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 914                 mfccp->mfcc_parent);
 915
 916         for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
 917
 918             if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 919                 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
 920
 921                 rt->mfc_origin     = mfccp->mfcc_origin;
 922                 rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 923                 rt->mfc_parent     = mfccp->mfcc_parent;
 924                 for (i = 0; i < numvifs; i++)
 925                     rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 926                 /* initialize pkt counters per src-grp */
 927                 rt->mfc_pkt_cnt    = 0;
 928                 rt->mfc_byte_cnt   = 0;
 929                 rt->mfc_wrong_if   = 0;
 930                 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 931                 if (rt->mfc_expire)
 932                     nexpire[hash]--;
 933                 rt->mfc_expire     = 0;
 934             }
 935         }
 936         if (rt == NULL) {
 937             /* no upcall, so make a new entry */
 938             rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 939             if (rt == NULL) {
 940                 return ENOBUFS;
 941             }
 942
 943             /* insert new entry at head of hash chain */
 944             rt->mfc_origin     = mfccp->mfcc_origin;
 945             rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 946             rt->mfc_parent     = mfccp->mfcc_parent;
 947             for (i = 0; i < numvifs; i++)
 948                     rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 949             /* initialize pkt counters per src-grp */
 950             rt->mfc_pkt_cnt    = 0;
 951             rt->mfc_byte_cnt   = 0;
 952             rt->mfc_wrong_if   = 0;
 953             rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 954             rt->mfc_expire     = 0;
 955             rt->mfc_stall      = NULL;
 956
 957             /* link into table */
 958             rt->mfc_next = mfctable[hash];
 959             mfctable[hash] = rt;
 960         }
 961     }
 962     return 0;
 963 }
 964
 965 #if UPCALL_TIMING
 966 /*
 967  * collect delay statistics on the upcalls
 968  */
 969 static void
 970 collate(struct timeval *t)
 971 {
 972     u_int32_t d;
 973     struct timeval tp;
 974     u_int32_t delta;
 975
 976     GET_TIME(tp);
 977
 978     if (TV_LT(*t, tp))
 979     {
 980         TV_DELTA(tp, *t, delta);
 981
 982         d = delta >> 10;
 983         if (d > 50)
 984             d = 50;
 985
 986         ++upcall_data[d];
 987     }
 988 }
 989 #endif /* UPCALL_TIMING */
 990
 991 /*
 992  * Delete an mfc entry
 993  */
 994 static int
 995 del_mfc(struct mfcctl *mfccp)
 996 {
 997     struct in_addr      origin;
 998     struct in_addr      mcastgrp;
 999     struct mfc          *rt;
1000     struct mfc          **nptr;
1001     u_int32_t           hash;
1002
1003     origin = mfccp->mfcc_origin;
1004     mcastgrp = mfccp->mfcc_mcastgrp;
1005     hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
1006
1007     if (mrtdebug & DEBUG_MFC)
1008         log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
1009             (u_int32_t)ntohl(origin.s_addr), (u_int32_t)ntohl(mcastgrp.s_addr));
1010
1011     nptr = &mfctable[hash];
1012     while ((rt = *nptr) != NULL) {
1013         if (origin.s_addr == rt->mfc_origin.s_addr &&
1014             mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
1015             rt->mfc_stall == NULL)
1016             break;
1017
1018         nptr = &rt->mfc_next;
1019     }
1020     if (rt == NULL) {
1021         return EADDRNOTAVAIL;
1022     }
1023
1024     *nptr = rt->mfc_next;
1025     FREE(rt, M_MRTABLE);
1026
1027     return 0;
1028 }
1029
1030 /*
1031  * Send a message to mrouted on the multicast routing socket
1032  */
1033 static int
1034 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
1035 {
1036         socket_lock(s, 1);
1037         if (s) {
1038                 if (sbappendaddr(&s->so_rcv,
1039                                  (struct sockaddr *)src,
1040                                  mm, (struct mbuf *)0, NULL) != 0) {
1041                         sorwakeup(s);
1042                         socket_unlock(s, 1);
1043                         return 0;
1044                 }
1045         }
1046         socket_unlock(s, 1);
1047         m_freem(mm);
1048         return -1;
1049 }
1050
1051 /*
1052  * IP multicast forwarding function. This function assumes that the packet
1053  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1054  * pointed to by "ifp", and the packet is to be relayed to other networks
1055  * that have members of the packet's destination IP multicast group.
1056  *
1057  * The packet is returned unscathed to the caller, unless it is
1058  * erroneous, in which case a non-zero return value tells the caller to
1059  * discard it.
1060  */
1061
1062 #define IP_HDR_LEN  20  /* # bytes of fixed IP header (excluding options) */
1063 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
1064
1065 static int
1066 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
1067               struct ip_moptions *imo)
1068 {
1069     struct mfc *rt;
1070     u_char *ipoptions;
1071     static struct sockaddr_in   k_igmpsrc = { sizeof k_igmpsrc, AF_INET,
1072                                                                                         0 , {0}, {0,0,0,0,0,0,0,0,} };
1073     static int srctun = 0;
1074     struct mbuf *mm;
1075     vifi_t vifi;
1076     struct vif *vifp;
1077
1078     if (mrtdebug & DEBUG_FORWARD)
1079         log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n",
1080             (u_int32_t)ntohl(ip->ip_src.s_addr), (u_int32_t)ntohl(ip->ip_dst.s_addr),
1081             (void *)ifp);
1082
1083     if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1084         (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
1085         /*
1086          * Packet arrived via a physical interface or
1087          * an encapsulated tunnel.
1088          */
1089     } else {
1090         /*
1091          * Packet arrived through a source-route tunnel.
1092          * Source-route tunnels are no longer supported.
1093          */
1094         if ((srctun++ % 1000) == 0)
1095             log(LOG_ERR,
1096                 "ip_mforward: received source-routed packet from %lx\n",
1097                 (u_int32_t)ntohl(ip->ip_src.s_addr));
1098
1099         return 1;
1100     }
1101
1102     if (imo != NULL)
1103         IMO_LOCK(imo);
1104     if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1105         IMO_UNLOCK(imo);
1106         if (ip->ip_ttl < 255)
1107                 ip->ip_ttl++;   /* compensate for -1 in *_send routines */
1108         if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1109             vifp = viftable + vifi;
1110             printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n",
1111                 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi,
1112                 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1113                 vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
1114         }
1115         return (ip_mdq(m, ifp, NULL, vifi));
1116     } else if (imo != NULL) {
1117         IMO_UNLOCK(imo);
1118     }
1119     if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1120         printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1121             ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr));
1122         if(!imo)
1123                 printf("In fact, no options were specified at all\n");
1124     }
1125
1126     /*
1127      * Don't forward a packet with time-to-live of zero or one,
1128      * or a packet destined to a local-only group.
1129      */
1130     if (ip->ip_ttl <= 1 ||
1131         ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1132         return 0;
1133
1134     /*
1135      * Determine forwarding vifs from the forwarding cache table
1136      */
1137     MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1138
1139     /* Entry exists, so forward if necessary */
1140     if (rt != NULL) {
1141         return (ip_mdq(m, ifp, rt, -1));
1142     } else {
1143         /*
1144          * If we don't have a route for packet's origin,
1145          * Make a copy of the packet &
1146          * send message to routing daemon
1147          */
1148
1149         struct mbuf *mb0;
1150         struct rtdetq *rte;
1151         u_int32_t hash;
1152         int hlen = ip->ip_hl << 2;
1153 #if UPCALL_TIMING
1154         struct timeval tp;
1155
1156         GET_TIME(tp);
1157 #endif
1158
1159         mrtstat.mrts_no_route++;
1160         if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1161             log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n",
1162                 (u_int32_t)ntohl(ip->ip_src.s_addr),
1163                 (u_int32_t)ntohl(ip->ip_dst.s_addr));
1164
1165         /*
1166          * Allocate mbufs early so that we don't do extra work if we are
1167          * just going to fail anyway.  Make sure to pullup the header so
1168          * that other people can't step on it.
1169          */
1170         rte = (struct rtdetq *) _MALLOC((sizeof *rte), M_MRTABLE, M_NOWAIT);
1171         if (rte == NULL) {
1172             return ENOBUFS;
1173         }
1174         mb0 = m_copy(m, 0, M_COPYALL);
1175         if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
1176             mb0 = m_pullup(mb0, hlen);
1177         if (mb0 == NULL) {
1178             FREE(rte, M_MRTABLE);
1179             return ENOBUFS;
1180         }
1181
1182         /* is there an upcall waiting for this packet? */
1183         hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1184         for (rt = mfctable[hash]; rt; rt = rt->mfc_next) {
1185             if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
1186                 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1187                 (rt->mfc_stall != NULL))
1188                 break;
1189         }
1190
1191         if (rt == NULL) {
1192             int i;
1193             struct igmpmsg *im;
1194
1195             /* no upcall, so make a new entry */
1196             rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1197             if (rt == NULL) {
1198                 FREE(rte, M_MRTABLE);
1199                 m_freem(mb0);
1200                 return ENOBUFS;
1201             }
1202             /* Make a copy of the header to send to the user level process */
1203             mm = m_copy(mb0, 0, hlen);
1204             if (mm == NULL) {
1205                 FREE(rte, M_MRTABLE);
1206                 m_freem(mb0);
1207                 FREE(rt, M_MRTABLE);
1208                 return ENOBUFS;
1209             }
1210
1211             /*
1212              * Send message to routing daemon to install
1213              * a route into the kernel table
1214              */
1215             k_igmpsrc.sin_addr = ip->ip_src;
1216
1217             im = mtod(mm, struct igmpmsg *);
1218             im->im_msgtype      = IGMPMSG_NOCACHE;
1219             im->im_mbz          = 0;
1220
1221             mrtstat.mrts_upcalls++;
1222
1223             if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
1224                 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1225                 ++mrtstat.mrts_upq_sockfull;
1226                 FREE(rte, M_MRTABLE);
1227                 m_freem(mb0);
1228                 FREE(rt, M_MRTABLE);
1229                 return ENOBUFS;
1230             }
1231
1232             /* insert new entry at head of hash chain */
1233             rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1234             rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1235             rt->mfc_expire            = UPCALL_EXPIRE;
1236             nexpire[hash]++;
1237             for (i = 0; i < numvifs; i++)
1238                 rt->mfc_ttls[i] = 0;
1239             rt->mfc_parent = -1;
1240
1241             /* link into table */
1242             rt->mfc_next   = mfctable[hash];
1243             mfctable[hash] = rt;
1244             rt->mfc_stall = rte;
1245
1246         } else {
1247             /* determine if q has overflowed */
1248             int npkts = 0;
1249             struct rtdetq **p;
1250
1251             for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
1252                 npkts++;
1253
1254             if (npkts > MAX_UPQ) {
1255                 mrtstat.mrts_upq_ovflw++;
1256                 FREE(rte, M_MRTABLE);
1257                 m_freem(mb0);
1258                 return 0;
1259             }
1260
1261             /* Add this entry to the end of the queue */
1262             *p = rte;
1263         }
1264
1265         rte->m                  = mb0;
1266         rte->ifp                = ifp;
1267 #if UPCALL_TIMING
1268         rte->t                  = tp;
1269 #endif
1270         rte->next               = NULL;
1271
1272         return 0;
1273     }
1274 }
1275
1276 #if !defined(MROUTE_LKM) || !MROUTE_LKM
1277 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1278                    struct ip_moptions *) = X_ip_mforward;
1279 #endif
1280
1281 /*
1282  * Clean up the cache entry if upcall is not serviced
1283  */
1284 static void
1285 expire_upcalls(__unused void *unused)
1286 {
1287     struct rtdetq *rte;
1288     struct mfc *mfc, **nptr;
1289     int i;
1290
1291     for (i = 0; i < CONFIG_MFCTBLSIZ; i++) {
1292         if (nexpire[i] == 0)
1293             continue;
1294         nptr = &mfctable[i];
1295         for (mfc = *nptr; mfc != NULL; mfc = *nptr) {
1296             /*
1297              * Skip real cache entries
1298              * Make sure it wasn't marked to not expire (shouldn't happen)
1299              * If it expires now
1300              */
1301             if (mfc->mfc_stall != NULL &&
1302                 mfc->mfc_expire != 0 &&
1303                 --mfc->mfc_expire == 0) {
1304                 if (mrtdebug & DEBUG_EXPIRE)
1305                     log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
1306                         (u_int32_t)ntohl(mfc->mfc_origin.s_addr),
1307                         (u_int32_t)ntohl(mfc->mfc_mcastgrp.s_addr));
1308                 /*
1309                  * drop all the packets
1310                  * free the mbuf with the pkt, if, timing info
1311                  */
1312                 for (rte = mfc->mfc_stall; rte; ) {
1313                     struct rtdetq *n = rte->next;
1314
1315                     m_freem(rte->m);
1316                     FREE(rte, M_MRTABLE);
1317                     rte = n;
1318                 }
1319                 ++mrtstat.mrts_cache_cleanups;
1320                 nexpire[i]--;
1321
1322                 *nptr = mfc->mfc_next;
1323                 FREE(mfc, M_MRTABLE);
1324             } else {
1325                 nptr = &mfc->mfc_next;
1326             }
1327         }
1328     }
1329     timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
1330 }
1331
1332 /*
1333  * Packet forwarding routine once entry in the cache is made
1334  */
1335 static int
1336 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt,
1337        vifi_t xmt_vif)
1338 {
1339     struct ip  *ip = mtod(m, struct ip *);
1340     vifi_t vifi;
1341     struct vif *vifp;
1342     int plen = ip->ip_len;
1343
1344 /*
1345  * Macro to send packet on vif.  Since RSVP packets don't get counted on
1346  * input, they shouldn't get counted on output, so statistics keeping is
1347  * seperate.
1348  */
1349 #define MC_SEND(ip,vifp,m) {                             \
1350                 if ((vifp)->v_flags & VIFF_TUNNEL)       \
1351                     encap_send((ip), (vifp), (m));       \
1352                 else                                     \
1353                     phyint_send((ip), (vifp), (m));      \
1354 }
1355
1356     /*
1357      * If xmt_vif is not -1, send on only the requested vif.
1358      *
1359      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
1360      */
1361     if (xmt_vif < numvifs) {
1362         MC_SEND(ip, viftable + xmt_vif, m);
1363         return 1;
1364     }
1365
1366     /*
1367      * Don't forward if it didn't arrive from the parent vif for its origin.
1368      */
1369     vifi = rt->mfc_parent;
1370     if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1371         /* came in the wrong interface */
1372         if (mrtdebug & DEBUG_FORWARD)
1373             log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1374                 (void *)ifp, vifi, (void *)viftable[vifi].v_ifp);
1375         ++mrtstat.mrts_wrong_if;
1376         ++rt->mfc_wrong_if;
1377         /*
1378          * If we are doing PIM assert processing, and we are forwarding
1379          * packets on this interface, and it is a broadcast medium
1380          * interface (and not a tunnel), send a message to the routing daemon.
1381          */
1382         if (pim_assert && rt->mfc_ttls[vifi] &&
1383                 (ifp->if_flags & IFF_BROADCAST) &&
1384                 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1385             struct sockaddr_in k_igmpsrc;
1386             struct mbuf *mm;
1387             struct igmpmsg *im;
1388             int hlen = ip->ip_hl << 2;
1389             struct timeval now;
1390             u_int32_t delta;
1391
1392             GET_TIME(now);
1393
1394             TV_DELTA(rt->mfc_last_assert, now, delta);
1395
1396             if (delta > ASSERT_MSG_TIME) {
1397                 mm = m_copy(m, 0, hlen);
1398                 if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1399                     mm = m_pullup(mm, hlen);
1400                 if (mm == NULL) {
1401                     return ENOBUFS;
1402                 }
1403
1404                 rt->mfc_last_assert = now;
1405
1406                 im = mtod(mm, struct igmpmsg *);
1407                 im->im_msgtype  = IGMPMSG_WRONGVIF;
1408                 im->im_mbz              = 0;
1409                 im->im_vif              = vifi;
1410
1411                 k_igmpsrc.sin_addr = im->im_src;
1412
1413                 socket_send(ip_mrouter, mm, &k_igmpsrc);
1414             }
1415         }
1416         return 0;
1417     }
1418
1419     /* If I sourced this packet, it counts as output, else it was input. */
1420     if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1421         viftable[vifi].v_pkt_out++;
1422         viftable[vifi].v_bytes_out += plen;
1423     } else {
1424         viftable[vifi].v_pkt_in++;
1425         viftable[vifi].v_bytes_in += plen;
1426     }
1427     rt->mfc_pkt_cnt++;
1428     rt->mfc_byte_cnt += plen;
1429
1430     /*
1431      * For each vif, decide if a copy of the packet should be forwarded.
1432      * Forward if:
1433      *          - the ttl exceeds the vif's threshold
1434      *          - there are group members downstream on interface
1435      */
1436     for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1437         if ((rt->mfc_ttls[vifi] > 0) &&
1438             (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1439             vifp->v_pkt_out++;
1440             vifp->v_bytes_out += plen;
1441             MC_SEND(ip, vifp, m);
1442         }
1443
1444     return 0;
1445 }
1446
1447 /*
1448  * check if a vif number is legal/ok. This is used by ip_output, to export
1449  * numvifs there,
1450  */
1451 static int
1452 X_legal_vif_num(int vif)
1453 {
1454     if (vif >= 0 && vif < numvifs)
1455        return(1);
1456     else
1457        return(0);
1458 }
1459
1460 #if !defined(MROUTE_LKM) || !MROUTE_LKM
1461 int (*legal_vif_num)(int) = X_legal_vif_num;
1462 #endif
1463
1464 /*
1465  * Return the local address used by this vif
1466  */
1467 static u_int32_t
1468 X_ip_mcast_src(int vifi)
1469 {
1470     if (vifi >= 0 && vifi < numvifs)
1471         return viftable[vifi].v_lcl_addr.s_addr;
1472     else
1473         return INADDR_ANY;
1474 }
1475
1476 #if !defined(MROUTE_LKM) || !MROUTE_LKM
1477 u_int32_t (*ip_mcast_src)(int) = X_ip_mcast_src;
1478 #endif
1479
1480 static void
1481 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
1482 {
1483     struct mbuf *mb_copy;
1484     int hlen = ip->ip_hl << 2;
1485
1486     /*
1487      * Make a new reference to the packet; make sure that
1488      * the IP header is actually copied, not just referenced,
1489      * so that ip_output() only scribbles on the copy.
1490      */
1491     mb_copy = m_copy(m, 0, M_COPYALL);
1492     if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
1493         mb_copy = m_pullup(mb_copy, hlen);
1494     if (mb_copy == NULL)
1495         return;
1496
1497     if (vifp->v_rate_limit == 0)
1498         tbf_send_packet(vifp, mb_copy);
1499     else
1500         tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1501 }
1502
1503 static void
1504 encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
1505 {
1506     struct mbuf *mb_copy;
1507     struct ip *ip_copy;
1508     int i, len = ip->ip_len;
1509
1510     /*
1511      * copy the old packet & pullup its IP header into the
1512      * new mbuf so we can modify it.  Try to fill the new
1513      * mbuf since if we don't the ethernet driver will.
1514      */
1515     MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
1516     if (mb_copy == NULL)
1517         return;
1518 #if CONFIG_MACF_NET
1519     mac_mbuf_label_associate_multicast_encap(m, vifp->v_ifp, mb_copy);
1520 #endif
1521     mb_copy->m_data += max_linkhdr;
1522     mb_copy->m_len = sizeof(multicast_encap_iphdr);
1523
1524     if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1525         m_freem(mb_copy);
1526         return;
1527     }
1528     i = MHLEN - M_LEADINGSPACE(mb_copy);
1529     if (i > len)
1530         i = len;
1531     mb_copy = m_pullup(mb_copy, i);
1532     if (mb_copy == NULL)
1533         return;
1534     mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
1535
1536     /*
1537      * fill in the encapsulating IP header.
1538      */
1539     ip_copy = mtod(mb_copy, struct ip *);
1540     *ip_copy = multicast_encap_iphdr;
1541 #if RANDOM_IP_ID
1542     ip_copy->ip_id = ip_randomid();
1543 #else
1544     ip_copy->ip_id = htons(ip_id++);
1545 #endif
1546     ip_copy->ip_len += len;
1547     ip_copy->ip_src = vifp->v_lcl_addr;
1548     ip_copy->ip_dst = vifp->v_rmt_addr;
1549
1550     /*
1551      * turn the encapsulated IP header back into a valid one.
1552      */
1553     ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1554     --ip->ip_ttl;
1555
1556 #if BYTE_ORDER != BIG_ENDIAN
1557     HTONS(ip->ip_len);
1558     HTONS(ip->ip_off);
1559 #endif
1560
1561     ip->ip_sum = 0;
1562     mb_copy->m_data += sizeof(multicast_encap_iphdr);
1563     ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1564     mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1565
1566     if (vifp->v_rate_limit == 0)
1567         tbf_send_packet(vifp, mb_copy);
1568     else
1569         tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1570 }
1571
1572 /*
1573  * De-encapsulate a packet and feed it back through ip input (this
1574  * routine is called whenever IP gets a packet with proto type
1575  * ENCAP_PROTO and a local destination address).
1576  */
1577 void
1578 #if MROUTE_LKM
1579 X_ipip_input(struct mbuf *m, int iphlen)
1580 #else
1581 ipip_input(struct mbuf *m, int iphlen)
1582 #endif
1583 {
1584     struct ifnet *ifp = m->m_pkthdr.rcvif;
1585     struct ip *ip = mtod(m, struct ip *);
1586     int hlen = ip->ip_hl << 2;
1587     struct vif *vifp;
1588
1589     if (!have_encap_tunnel) {
1590             rip_input(m, iphlen);
1591             return;
1592     }
1593     /*
1594      * dump the packet if it's not to a multicast destination or if
1595      * we don't have an encapsulating tunnel with the source.
1596      * Note:  This code assumes that the remote site IP address
1597      * uniquely identifies the tunnel (i.e., that this site has
1598      * at most one tunnel with the remote site).
1599      */
1600     if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1601         ++mrtstat.mrts_bad_tunnel;
1602         m_freem(m);
1603         return;
1604     }
1605     if (ip->ip_src.s_addr != last_encap_src) {
1606         struct vif *vife;
1607
1608         vifp = viftable;
1609         vife = vifp + numvifs;
1610         last_encap_src = ip->ip_src.s_addr;
1611         last_encap_vif = 0;
1612         for ( ; vifp < vife; ++vifp)
1613             if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1614                 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1615                     == VIFF_TUNNEL)
1616                     last_encap_vif = vifp;
1617                 break;
1618             }
1619     }
1620     if ((vifp = last_encap_vif) == 0) {
1621         last_encap_src = 0;
1622         mrtstat.mrts_cant_tunnel++; /*XXX*/
1623         m_freem(m);
1624         if (mrtdebug)
1625           log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n",
1626                 (u_int32_t)ntohl(ip->ip_src.s_addr));
1627         return;
1628     }
1629     ifp = vifp->v_ifp;
1630
1631     if (hlen > IP_HDR_LEN)
1632       ip_stripoptions(m, (struct mbuf *) 0);
1633     m->m_data += IP_HDR_LEN;
1634     m->m_len -= IP_HDR_LEN;
1635     m->m_pkthdr.len -= IP_HDR_LEN;
1636     m->m_pkthdr.rcvif = ifp;
1637
1638     proto_inject(PF_INET, m);
1639 }
1640
1641 /*
1642  * Token bucket filter module
1643  */
1644
1645 static void
1646 tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip,
1647             u_int32_t p_len)
1648 {
1649     struct tbf *t = vifp->v_tbf;
1650
1651     if (p_len > MAX_BKT_SIZE) {
1652         /* drop if packet is too large */
1653         mrtstat.mrts_pkt2large++;
1654         m_freem(m);
1655         return;
1656     }
1657
1658     tbf_update_tokens(vifp);
1659
1660     /* if there are enough tokens,
1661      * and the queue is empty,
1662      * send this packet out
1663      */
1664
1665     if (t->tbf_q_len == 0) {
1666         /* queue empty, send packet if enough tokens */
1667         if (p_len <= t->tbf_n_tok) {
1668             t->tbf_n_tok -= p_len;
1669             tbf_send_packet(vifp, m);
1670         } else {
1671             /* queue packet and timeout till later */
1672             tbf_queue(vifp, m);
1673             timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1674         }
1675     } else if (t->tbf_q_len < t->tbf_max_q_len) {
1676         /* finite queue length, so queue pkts and process queue */
1677         tbf_queue(vifp, m);
1678         tbf_process_q(vifp);
1679     } else {
1680         /* queue length too much, try to dq and queue and process */
1681         if (!tbf_dq_sel(vifp, ip)) {
1682             mrtstat.mrts_q_overflow++;
1683             m_freem(m);
1684             return;
1685         } else {
1686             tbf_queue(vifp, m);
1687             tbf_process_q(vifp);
1688         }
1689     }
1690     return;
1691 }
1692
1693 /*
1694  * adds a packet to the queue at the interface
1695  */
1696 static void
1697 tbf_queue(struct vif *vifp, struct mbuf *m)
1698 {
1699     struct tbf *t = vifp->v_tbf;
1700
1701     if (t->tbf_t == NULL) {
1702         /* Queue was empty */
1703         t->tbf_q = m;
1704     } else {
1705         /* Insert at tail */
1706         t->tbf_t->m_act = m;
1707     }
1708
1709     /* Set new tail pointer */
1710     t->tbf_t = m;
1711
1712 #if DIAGNOSTIC
1713     /* Make sure we didn't get fed a bogus mbuf */
1714     if (m->m_act)
1715         panic("tbf_queue: m_act");
1716 #endif
1717     m->m_act = NULL;
1718
1719     t->tbf_q_len++;
1720 }
1721
1722
1723 /*
1724  * processes the queue at the interface
1725  */
1726 static void
1727 tbf_process_q(struct vif *vifp)
1728 {
1729     struct mbuf *m;
1730     int len;
1731     struct tbf *t = vifp->v_tbf;
1732
1733     /* loop through the queue at the interface and send as many packets
1734      * as possible
1735      */
1736     while (t->tbf_q_len > 0) {
1737         m = t->tbf_q;
1738
1739         len = mtod(m, struct ip *)->ip_len;
1740
1741         /* determine if the packet can be sent */
1742         if (len <= t->tbf_n_tok) {
1743             /* if so,
1744              * reduce no of tokens, dequeue the packet,
1745              * send the packet.
1746              */
1747             t->tbf_n_tok -= len;
1748
1749             t->tbf_q = m->m_act;
1750             if (--t->tbf_q_len == 0)
1751                 t->tbf_t = NULL;
1752
1753             m->m_act = NULL;
1754             tbf_send_packet(vifp, m);
1755
1756         } else break;
1757     }
1758 }
1759
1760 static void
1761 tbf_reprocess_q(void *xvifp)
1762 {
1763     struct vif *vifp = xvifp;
1764
1765     if (ip_mrouter == NULL)  {
1766         return;
1767      }
1768
1769     tbf_update_tokens(vifp);
1770
1771     tbf_process_q(vifp);
1772
1773     if (vifp->v_tbf->tbf_q_len)
1774         timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1775 }
1776
1777 /* function that will selectively discard a member of the queue
1778  * based on the precedence value and the priority
1779  */
1780 static int
1781 tbf_dq_sel(struct vif *vifp, struct ip *ip)
1782 {
1783     u_int p;
1784     struct mbuf *m, *last;
1785     struct mbuf **np;
1786     struct tbf *t = vifp->v_tbf;
1787
1788     p = priority(vifp, ip);
1789
1790     np = &t->tbf_q;
1791     last = NULL;
1792     while ((m = *np) != NULL) {
1793         if (p > priority(vifp, mtod(m, struct ip *))) {
1794             *np = m->m_act;
1795             /* If we're removing the last packet, fix the tail pointer */
1796             if (m == t->tbf_t)
1797                 t->tbf_t = last;
1798             m_freem(m);
1799             /* it's impossible for the queue to be empty, but
1800              * we check anyway. */
1801             if (--t->tbf_q_len == 0)
1802                 t->tbf_t = NULL;
1803             mrtstat.mrts_drop_sel++;
1804             return(1);
1805         }
1806         np = &m->m_act;
1807         last = m;
1808     }
1809     return(0);
1810 }
1811
1812 static void
1813 tbf_send_packet(struct vif *vifp, struct mbuf *m)
1814 {
1815     int error;
1816     static struct route ro;
1817
1818     if (vifp->v_flags & VIFF_TUNNEL) {
1819         /* If tunnel options */
1820         ip_output(m, (struct mbuf *)0, &vifp->v_route,
1821                   IP_FORWARDING, (struct ip_moptions *)0, NULL);
1822     } else {
1823         struct ip_moptions *imo;
1824
1825         imo = ip_allocmoptions(M_DONTWAIT);
1826         if (imo == NULL) {
1827                 error = ENOMEM;
1828                 goto done;
1829         }
1830
1831         imo->imo_multicast_ifp  = vifp->v_ifp;
1832         imo->imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
1833         imo->imo_multicast_loop = 1;
1834         imo->imo_multicast_vif  = -1;
1835
1836         /*
1837          * Re-entrancy should not be a problem here, because
1838          * the packets that we send out and are looped back at us
1839          * should get rejected because they appear to come from
1840          * the loopback interface, thus preventing looping.
1841          */
1842         error = ip_output(m, (struct mbuf *)0, &ro,
1843                           IP_FORWARDING, imo, NULL);
1844
1845         IMO_REMREF(imo);
1846 done:
1847         if (mrtdebug & DEBUG_XMIT)
1848             log(LOG_DEBUG, "phyint_send on vif %d err %d\n",
1849                 vifp - viftable, error);
1850     }
1851 }
1852
1853 /* determine the current time and then
1854  * the elapsed time (between the last time and time now)
1855  * in milliseconds & update the no. of tokens in the bucket
1856  */
1857 static void
1858 tbf_update_tokens(struct vif *vifp)
1859 {
1860     struct timeval tp;
1861     u_int32_t tm;
1862     struct tbf *t = vifp->v_tbf;
1863
1864     GET_TIME(tp);
1865
1866     TV_DELTA(tp, t->tbf_last_pkt_t, tm);
1867
1868     /*
1869      * This formula is actually
1870      * "time in seconds" * "bytes/second".
1871      *
1872      * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1873      *
1874      * The (1000/1024) was introduced in add_vif to optimize
1875      * this divide into a shift.
1876      */
1877     t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
1878     t->tbf_last_pkt_t = tp;
1879
1880     if (t->tbf_n_tok > MAX_BKT_SIZE)
1881         t->tbf_n_tok = MAX_BKT_SIZE;
1882 }
1883
1884 static int
1885 priority(__unused struct vif *vifp, struct ip *ip)
1886 {
1887     int prio;
1888
1889     /* temporary hack; may add general packet classifier some day */
1890
1891     /*
1892      * The UDP port space is divided up into four priority ranges:
1893      * [0, 16384)     : unclassified - lowest priority
1894      * [16384, 32768) : audio - highest priority
1895      * [32768, 49152) : whiteboard - medium priority
1896      * [49152, 65536) : video - low priority
1897      */
1898     if (ip->ip_p == IPPROTO_UDP) {
1899         struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1900         switch (ntohs(udp->uh_dport) & 0xc000) {
1901             case 0x4000:
1902                 prio = 70;
1903                 break;
1904             case 0x8000:
1905                 prio = 60;
1906                 break;
1907             case 0xc000:
1908                 prio = 55;
1909                 break;
1910             default:
1911                 prio = 50;
1912                 break;
1913         }
1914         if (tbfdebug > 1)
1915                 log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio);
1916     } else {
1917             prio = 50;
1918     }
1919     return prio;
1920 }
1921
1922 /*
1923  * End of token bucket filter modifications
1924  */
1925
1926 int
1927 ip_rsvp_vif_init(struct socket *so, struct sockopt *sopt)
1928 {
1929     int error, i;
1930
1931     if (rsvpdebug)
1932         printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1933                so->so_type, so->so_proto->pr_protocol);
1934
1935     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1936         return EOPNOTSUPP;
1937
1938     /* Check mbuf. */
1939     error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1940     if (error)
1941             return (error);
1942
1943     if (rsvpdebug)
1944         printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on);
1945
1946     /* Check vif. */
1947     if (!legal_vif_num(i)) {
1948         return EADDRNOTAVAIL;
1949     }
1950
1951     /* Check if socket is available. */
1952     if (viftable[i].v_rsvpd != NULL) {
1953         return EADDRINUSE;
1954     }
1955
1956     viftable[i].v_rsvpd = so;
1957     /* This may seem silly, but we need to be sure we don't over-increment
1958      * the RSVP counter, in case something slips up.
1959      */
1960     if (!viftable[i].v_rsvp_on) {
1961         viftable[i].v_rsvp_on = 1;
1962         rsvp_on++;
1963     }
1964
1965     return 0;
1966 }
1967
1968 int
1969 ip_rsvp_vif_done(struct socket *so, struct sockopt *sopt)
1970 {
1971         int error, i;
1972
1973         if (rsvpdebug)
1974                 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1975                        so->so_type, so->so_proto->pr_protocol);
1976
1977         if (so->so_type != SOCK_RAW ||
1978             so->so_proto->pr_protocol != IPPROTO_RSVP)
1979                 return EOPNOTSUPP;
1980
1981         error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1982         if (error)
1983                 return (error);
1984
1985         /* Check vif. */
1986         if (!legal_vif_num(i)) {
1987                 return EADDRNOTAVAIL;
1988         }
1989
1990         if (rsvpdebug)
1991                 printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n",
1992                        viftable[i].v_rsvpd, so);
1993
1994         viftable[i].v_rsvpd = NULL;
1995         /*
1996          * This may seem silly, but we need to be sure we don't over-decrement
1997          * the RSVP counter, in case something slips up.
1998          */
1999         if (viftable[i].v_rsvp_on) {
2000                 viftable[i].v_rsvp_on = 0;
2001                 rsvp_on--;
2002         }
2003
2004         return 0;
2005 }
2006
2007 void
2008 ip_rsvp_force_done(struct socket *so)
2009 {
2010     int vifi;
2011
2012     /* Don't bother if it is not the right type of socket. */
2013     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2014         return;
2015
2016     /* The socket may be attached to more than one vif...this
2017      * is perfectly legal.
2018      */
2019     for (vifi = 0; vifi < numvifs; vifi++) {
2020         if (viftable[vifi].v_rsvpd == so) {
2021             viftable[vifi].v_rsvpd = NULL;
2022             /* This may seem silly, but we need to be sure we don't
2023              * over-decrement the RSVP counter, in case something slips up.
2024              */
2025             if (viftable[vifi].v_rsvp_on) {
2026                 viftable[vifi].v_rsvp_on = 0;
2027                 rsvp_on--;
2028             }
2029         }
2030     }
2031
2032     return;
2033 }
2034
2035 void
2036 rsvp_input(struct mbuf *m, int iphlen)
2037 {
2038     int vifi;
2039     struct ip *ip = mtod(m, struct ip *);
2040     static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET,
2041                                                                                 0 , {0}, {0,0,0,0,0,0,0,0,} };
2042     struct ifnet *ifp;
2043
2044     if (rsvpdebug)
2045         printf("rsvp_input: rsvp_on %d\n",rsvp_on);
2046
2047     /* Can still get packets with rsvp_on = 0 if there is a local member
2048      * of the group to which the RSVP packet is addressed.  But in this
2049      * case we want to throw the packet away.
2050      */
2051     if (!rsvp_on) {
2052         m_freem(m);
2053         return;
2054     }
2055
2056     if (rsvpdebug)
2057         printf("rsvp_input: check vifs\n");
2058
2059 #if DIAGNOSTIC
2060     if (!(m->m_flags & M_PKTHDR))
2061             panic("rsvp_input no hdr");
2062 #endif
2063
2064     ifp = m->m_pkthdr.rcvif;
2065     /* Find which vif the packet arrived on. */
2066     for (vifi = 0; vifi < numvifs; vifi++)
2067         if (viftable[vifi].v_ifp == ifp)
2068             break;
2069
2070     if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) {
2071         /*
2072          * If the old-style non-vif-associated socket is set,
2073          * then use it.  Otherwise, drop packet since there
2074          * is no specific socket for this vif.
2075          */
2076         if (ip_rsvpd != NULL) {
2077             if (rsvpdebug)
2078                 printf("rsvp_input: Sending packet up old-style socket\n");
2079             rip_input(m, iphlen);  /* xxx */
2080         } else {
2081             if (rsvpdebug && vifi == numvifs)
2082                 printf("rsvp_input: Can't find vif for packet.\n");
2083             else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL)
2084                 printf("rsvp_input: No socket defined for vif %d\n",vifi);
2085             m_freem(m);
2086         }
2087         return;
2088     }
2089     rsvp_src.sin_addr = ip->ip_src;
2090
2091     if (rsvpdebug && m)
2092         printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2093                m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
2094
2095     if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) {
2096         if (rsvpdebug)
2097             printf("rsvp_input: Failed to append to socket\n");
2098     } else {
2099         if (rsvpdebug)
2100             printf("rsvp_input: send packet up\n");
2101     }
2102
2103 }
2104
2105 #if MROUTE_LKM
2106 #include <sys/conf.h>
2107 #include <sys/exec.h>
2108 #include <sys/sysent.h>
2109 #include <sys/lkm.h>
2110
2111 MOD_MISC("ip_mroute_mod")
2112
2113 static int
2114 ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
2115 {
2116         int i;
2117         struct lkm_misc *args = lkmtp->private.lkm_misc;
2118         int err = 0;
2119
2120         switch(cmd) {
2121                 static int (*old_ip_mrouter_cmd)();
2122                 static int (*old_ip_mrouter_done)();
2123                 static int (*old_ip_mforward)();
2124                 static int (*old_mrt_ioctl)();
2125                 static void (*old_proto4_input)();
2126                 static int (*old_legal_vif_num)();
2127                 extern struct protosw inetsw[];
2128
2129         case LKM_E_LOAD:
2130                 if(lkmexists(lkmtp) || ip_mrtproto)
2131                   return(EEXIST);
2132                 old_ip_mrouter_cmd = ip_mrouter_cmd;
2133                 ip_mrouter_cmd = X_ip_mrouter_cmd;
2134                 old_ip_mrouter_done = ip_mrouter_done;
2135                 ip_mrouter_done = X_ip_mrouter_done;
2136                 old_ip_mforward = ip_mforward;
2137                 ip_mforward = X_ip_mforward;
2138                 old_mrt_ioctl = mrt_ioctl;
2139                 mrt_ioctl = X_mrt_ioctl;
2140               old_proto4_input = ip_protox[ENCAP_PROTO]->pr_input;
2141               ip_protox[ENCAP_PROTO]->pr_input = X_ipip_input;
2142                 old_legal_vif_num = legal_vif_num;
2143                 legal_vif_num = X_legal_vif_num;
2144                 ip_mrtproto = IGMP_DVMRP;
2145
2146                 printf("\nIP multicast routing loaded\n");
2147                 break;
2148
2149         case LKM_E_UNLOAD:
2150                 if (ip_mrouter)
2151                   return EINVAL;
2152
2153                 ip_mrouter_cmd = old_ip_mrouter_cmd;
2154                 ip_mrouter_done = old_ip_mrouter_done;
2155                 ip_mforward = old_ip_mforward;
2156                 mrt_ioctl = old_mrt_ioctl;
2157               ip_protox[ENCAP_PROTO]->pr_input = old_proto4_input;
2158                 legal_vif_num = old_legal_vif_num;
2159                 ip_mrtproto = 0;
2160                 break;
2161
2162         default:
2163                 err = EINVAL;
2164                 break;
2165         }
2166
2167         return(err);
2168 }
2169
2170 int
2171 ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
2172         DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
2173                  nosys);
2174 }
2175
2176 #endif /* MROUTE_LKM */
2177 #endif /* MROUTING */