bsd/netinet/ip_mroute.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  30  * support for mandatory and extensible security protections.  This notice
  31  * is included in support of clause 2.2 (b) of the Apple Public License,
  32  * Version 2.0.
  33  */
  34 /*
  35  * IP multicast forwarding procedures
  36  *
  37  * Written by David Waitzman, BBN Labs, August 1988.
  38  * Modified by Steve Deering, Stanford, February 1989.
  39  * Modified by Mark J. Steiglitz, Stanford, May, 1991
  40  * Modified by Van Jacobson, LBL, January 1993
  41  * Modified by Ajit Thyagarajan, PARC, August 1993
  42  * Modified by Bill Fenner, PARC, April 1995
  43  *
  44  * MROUTING Revision: 3.5
  45  * $FreeBSD: src/sys/netinet/ip_mroute.c,v 1.56.2.2 2001/07/19 06:37:26 kris Exp $
  46  */
  47
  48
  49 #include <sys/param.h>
  50 #include <sys/systm.h>
  51 #include <sys/malloc.h>
  52 #include <sys/mbuf.h>
  53 #include <sys/socket.h>
  54 #include <sys/socketvar.h>
  55 #include <sys/protosw.h>
  56 #include <sys/time.h>
  57 #include <sys/kernel.h>
  58 #include <sys/sockio.h>
  59 #include <sys/syslog.h>
  60
  61 #include <machine/endian.h>
  62
  63 #include <net/if.h>
  64 #include <net/route.h>
  65 #include <net/kpi_protocol.h>
  66 #include <netinet/in.h>
  67 #include <netinet/in_systm.h>
  68 #include <netinet/ip.h>
  69 #include <netinet/ip_var.h>
  70 #include <netinet/in_var.h>
  71 #include <netinet/igmp.h>
  72 #include <netinet/ip_mroute.h>
  73 #include <netinet/udp.h>
  74
  75 #if CONFIG_MACF_NET
  76 #include <security/mac_framework.h>
  77 #endif
  78
  79
  80 #if !MROUTING
  81 extern u_int32_t        _ip_mcast_src(int vifi);
  82 extern int      _ip_mforward(struct ip *ip, struct ifnet *ifp,
  83                                   struct mbuf *m, struct ip_moptions *imo);
  84 extern int      _ip_mrouter_done(void);
  85 extern int      _ip_mrouter_get(struct socket *so, struct sockopt *sopt);
  86 extern int      _ip_mrouter_set(struct socket *so, struct sockopt *sopt);
  87 extern int      _mrt_ioctl(int req, caddr_t data, struct proc *p);
  88
  89 /*
  90  * Dummy routines and globals used when multicast routing is not compiled in.
  91  */
  92
  93 struct socket  *ip_mrouter  = NULL;
  94 u_int           rsvpdebug = 0;
  95
  96 int
  97 _ip_mrouter_set(__unused struct socket *so,
  98                 __unused struct sockopt *sopt)
  99 {
 100         return(EOPNOTSUPP);
 101 }
 102
 103 int (*ip_mrouter_set)(struct socket *, struct sockopt *) = _ip_mrouter_set;
 104
 105
 106 int
 107 _ip_mrouter_get(__unused struct socket *so,
 108                 __unused sockopt *sopt)
 109 {
 110         return(EOPNOTSUPP);
 111 }
 112
 113 int (*ip_mrouter_get)(struct socket *, struct sockopt *) = _ip_mrouter_get;
 114
 115 int
 116 _ip_mrouter_done(void)
 117 {
 118         return(0);
 119 }
 120
 121 int (*ip_mrouter_done)(void) = _ip_mrouter_done;
 122
 123 int
 124 _ip_mforward(__unused struct ip *ip, __unused struct ifnet *ifp,
 125              __unused struct mbuf *m, __unused ip_moptions *imo)
 126 {
 127         return(0);
 128 }
 129
 130 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
 131                    struct ip_moptions *) = _ip_mforward;
 132
 133 int
 134 _mrt_ioctl(__unused int req, __unused caddr_t data, __unused struct proc *p)
 135 {
 136         return EOPNOTSUPP;
 137 }
 138
 139 int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl;
 140
 141 void
 142 rsvp_input(struct mbuf *m, int iphlen)          /* XXX must fixup manually */
 143 {
 144     /* Can still get packets with rsvp_on = 0 if there is a local member
 145      * of the group to which the RSVP packet is addressed.  But in this
 146      * case we want to throw the packet away.
 147      */
 148     if (!rsvp_on) {
 149         m_freem(m);
 150         return;
 151     }
 152
 153     if (ip_rsvpd != NULL) {
 154         if (rsvpdebug)
 155             printf("rsvp_input: Sending packet up old-style socket\n");
 156         rip_input(m, iphlen);
 157         return;
 158     }
 159     /* Drop the packet */
 160     m_freem(m);
 161 }
 162
 163 void ipip_input(struct mbuf *m, int iphlen) { /* XXX must fixup manually */
 164         rip_input(m, iphlen);
 165 }
 166
 167 int (*legal_vif_num)(int) = 0;
 168
 169 /*
 170  * This should never be called, since IP_MULTICAST_VIF should fail, but
 171  * just in case it does get called, the code a little lower in ip_output
 172  * will assign the packet a local address.
 173  */
 174 u_int32_t
 175 _ip_mcast_src(int vifi) { return INADDR_ANY; }
 176 u_int32_t (*ip_mcast_src)(int) = _ip_mcast_src;
 177
 178 int
 179 ip_rsvp_vif_init(so, sopt)
 180     struct socket *so;
 181     struct sockopt *sopt;
 182 {
 183     return(EINVAL);
 184 }
 185
 186 int
 187 ip_rsvp_vif_done(so, sopt)
 188     struct socket *so;
 189     struct sockopt *sopt;
 190 {
 191     return(EINVAL);
 192 }
 193
 194 void
 195 ip_rsvp_force_done(so)
 196     struct socket *so;
 197 {
 198     return;
 199 }
 200
 201 #else /* MROUTING */
 202
 203 #define M_HASCL(m)      ((m)->m_flags & M_EXT)
 204
 205 #define INSIZ           sizeof(struct in_addr)
 206 #define same(a1, a2) \
 207         (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0)
 208
 209
 210 /*
 211  * Globals.  All but ip_mrouter and ip_mrtproto could be static,
 212  * except for netstat or debugging purposes.
 213  */
 214 #ifndef MROUTE_LKM
 215 struct socket  *ip_mrouter  = NULL;
 216 static struct mrtstat   mrtstat;
 217 #else /* MROUTE_LKM */
 218 extern void     X_ipip_input(struct mbuf *m, int iphlen);
 219 extern struct mrtstat mrtstat;
 220 static int ip_mrtproto;
 221 #endif
 222
 223 #define NO_RTE_FOUND    0x1
 224 #define RTE_FOUND       0x2
 225
 226 static struct mfc       *mfctable[CONFIG_MFCTBLSIZ];
 227 static u_char           nexpire[CONFIG_MFCTBLSIZ];
 228 static struct vif       viftable[CONFIG_MAXVIFS];
 229 static u_int    mrtdebug = 0;     /* debug level        */
 230 #define         DEBUG_MFC       0x02
 231 #define         DEBUG_FORWARD   0x04
 232 #define         DEBUG_EXPIRE    0x08
 233 #define         DEBUG_XMIT      0x10
 234 static u_int    tbfdebug = 0;     /* tbf debug level    */
 235 static u_int    rsvpdebug = 0;    /* rsvp debug level   */
 236
 237 #define         EXPIRE_TIMEOUT  (hz / 4)        /* 4x / second          */
 238 #define         UPCALL_EXPIRE   6               /* number of timeouts   */
 239
 240 /*
 241  * Define the token bucket filter structures
 242  * tbftable -> each vif has one of these for storing info
 243  */
 244
 245 static struct tbf tbftable[CONFIG_MAXVIFS];
 246 #define         TBF_REPROCESS   (hz / 100)      /* 100x / second */
 247
 248 /*
 249  * 'Interfaces' associated with decapsulator (so we can tell
 250  * packets that went through it from ones that get reflected
 251  * by a broken gateway).  These interfaces are never linked into
 252  * the system ifnet list & no routes point to them.  I.e., packets
 253  * can't be sent this way.  They only exist as a placeholder for
 254  * multicast source verification.
 255  */
 256 static struct ifnet multicast_decap_if[CONFIG_MAXVIFS];
 257
 258 #define ENCAP_TTL 64
 259 #define ENCAP_PROTO IPPROTO_IPIP        /* 4 */
 260
 261 /* prototype IP hdr for encapsulated packets */
 262 static struct ip multicast_encap_iphdr = {
 263 #if BYTE_ORDER == LITTLE_ENDIAN
 264         sizeof(struct ip) >> 2, IPVERSION,
 265 #else
 266         IPVERSION, sizeof(struct ip) >> 2,
 267 #endif
 268         0,                              /* tos */
 269         sizeof(struct ip),              /* total length */
 270         0,                              /* id */
 271         0,                              /* frag offset */
 272         ENCAP_TTL, ENCAP_PROTO,
 273         0,                              /* checksum */
 274         { 0 }, { 0 }
 275 };
 276
 277 /*
 278  * Private variables.
 279  */
 280 static vifi_t      numvifs = 0;
 281 static int have_encap_tunnel = 0;
 282
 283 /*
 284  * one-back cache used by ipip_input to locate a tunnel's vif
 285  * given a datagram's src ip address.
 286  */
 287 static u_int32_t last_encap_src;
 288 static struct vif *last_encap_vif;
 289
 290 static u_int32_t        X_ip_mcast_src(int vifi);
 291 static int      X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, struct ip_moptions *imo);
 292 static int      X_ip_mrouter_done(void);
 293 static int      X_ip_mrouter_get(struct socket *so, struct sockopt *m);
 294 static int      X_ip_mrouter_set(struct socket *so, struct sockopt *m);
 295 static int      X_legal_vif_num(int vif);
 296 static int      X_mrt_ioctl(int cmd, caddr_t data);
 297
 298 static int get_sg_cnt(struct sioc_sg_req *);
 299 static int get_vif_cnt(struct sioc_vif_req *);
 300 static int ip_mrouter_init(struct socket *, int);
 301 static int add_vif(struct vifctl *);
 302 static int del_vif(vifi_t);
 303 static int add_mfc(struct mfcctl *);
 304 static int del_mfc(struct mfcctl *);
 305 static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *);
 306 static int set_assert(int);
 307 static void expire_upcalls(void *);
 308 static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *,
 309                   vifi_t);
 310 static void phyint_send(struct ip *, struct vif *, struct mbuf *);
 311 static void encap_send(struct ip *, struct vif *, struct mbuf *);
 312 static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_int32_t);
 313 static void tbf_queue(struct vif *, struct mbuf *);
 314 static void tbf_process_q(struct vif *);
 315 static void tbf_reprocess_q(void *);
 316 static int tbf_dq_sel(struct vif *, struct ip *);
 317 static void tbf_send_packet(struct vif *, struct mbuf *);
 318 static void tbf_update_tokens(struct vif *);
 319 static int priority(struct vif *, struct ip *);
 320 void multiencap_decap(struct mbuf *);
 321
 322 /*
 323  * whether or not special PIM assert processing is enabled.
 324  */
 325 static int pim_assert;
 326 /*
 327  * Rate limit for assert notification messages, in usec
 328  */
 329 #define ASSERT_MSG_TIME         3000000
 330
 331 /*
 332  * Hash function for a source, group entry
 333  */
 334 #define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \
 335                         ((g) >> 20) ^ ((g) >> 10) ^ (g))
 336
 337 /*
 338  * Find a route for a given origin IP address and Multicast group address
 339  * Type of service parameter to be added in the future!!!
 340  */
 341
 342 #define MFCFIND(o, g, rt) { \
 343         struct mfc *_rt = mfctable[MFCHASH(o,g)]; \
 344         rt = NULL; \
 345         ++mrtstat.mrts_mfc_lookups; \
 346         while (_rt) { \
 347                 if ((_rt->mfc_origin.s_addr == o) && \
 348                     (_rt->mfc_mcastgrp.s_addr == g) && \
 349                     (_rt->mfc_stall == NULL)) { \
 350                         rt = _rt; \
 351                         break; \
 352                 } \
 353                 _rt = _rt->mfc_next; \
 354         } \
 355         if (rt == NULL) { \
 356                 ++mrtstat.mrts_mfc_misses; \
 357         } \
 358 }
 359
 360
 361 /*
 362  * Macros to compute elapsed time efficiently
 363  * Borrowed from Van Jacobson's scheduling code
 364  */
 365 #define TV_DELTA(a, b, delta) { \
 366             int xxs; \
 367                 \
 368             delta = (a).tv_usec - (b).tv_usec; \
 369             if ((xxs = (a).tv_sec - (b).tv_sec)) { \
 370                switch (xxs) { \
 371                       case 2: \
 372                           delta += 1000000; \
 373                               /* fall through */ \
 374                       case 1: \
 375                           delta += 1000000; \
 376                           break; \
 377                       default: \
 378                           delta += (1000000 * xxs); \
 379                } \
 380             } \
 381 }
 382
 383 #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
 384               (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
 385
 386 #if UPCALL_TIMING
 387 u_int32_t upcall_data[51];
 388 static void collate(struct timeval *);
 389 #endif /* UPCALL_TIMING */
 390
 391
 392 /*
 393  * Handle MRT setsockopt commands to modify the multicast routing tables.
 394  */
 395 static int
 396 X_ip_mrouter_set(struct socket *so, struct sockopt *sopt)
 397 {
 398         int     error, optval;
 399         vifi_t  vifi;
 400         struct  vifctl vifc;
 401         struct  mfcctl mfc;
 402
 403         if (so != ip_mrouter && sopt->sopt_name != MRT_INIT)
 404                 return (EPERM);
 405
 406         error = 0;
 407         switch (sopt->sopt_name) {
 408         case MRT_INIT:
 409                 error = sooptcopyin(sopt, &optval, sizeof optval,
 410                                     sizeof optval);
 411                 if (error)
 412                         break;
 413                 error = ip_mrouter_init(so, optval);
 414                 break;
 415
 416         case MRT_DONE:
 417                 error = ip_mrouter_done();
 418                 break;
 419
 420         case MRT_ADD_VIF:
 421                 error = sooptcopyin(sopt, &vifc, sizeof vifc, sizeof vifc);
 422                 if (error)
 423                         break;
 424                 error = add_vif(&vifc);
 425                 break;
 426
 427         case MRT_DEL_VIF:
 428                 error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi);
 429                 if (error)
 430                         break;
 431                 error = del_vif(vifi);
 432                 break;
 433
 434         case MRT_ADD_MFC:
 435         case MRT_DEL_MFC:
 436                 error = sooptcopyin(sopt, &mfc, sizeof mfc, sizeof mfc);
 437                 if (error)
 438                         break;
 439                 if (sopt->sopt_name == MRT_ADD_MFC)
 440                         error = add_mfc(&mfc);
 441                 else
 442                         error = del_mfc(&mfc);
 443                 break;
 444
 445         case MRT_ASSERT:
 446                 error = sooptcopyin(sopt, &optval, sizeof optval,
 447                                     sizeof optval);
 448                 if (error)
 449                         break;
 450                 set_assert(optval);
 451                 break;
 452
 453         default:
 454                 error = EOPNOTSUPP;
 455                 break;
 456         }
 457         return (error);
 458 }
 459
 460 #if !defined(MROUTE_LKM) || !MROUTE_LKM
 461 int (*ip_mrouter_set)(struct socket *, struct sockopt *) = X_ip_mrouter_set;
 462 #endif
 463
 464 /*
 465  * Handle MRT getsockopt commands
 466  */
 467 static int
 468 X_ip_mrouter_get(__unused struct socket *so, struct sockopt *sopt)
 469 {
 470         int error;
 471         static int vers = 0x0305; /* !!! why is this here? XXX */
 472
 473         switch (sopt->sopt_name) {
 474         case MRT_VERSION:
 475                 error = sooptcopyout(sopt, &vers, sizeof vers);
 476                 break;
 477
 478         case MRT_ASSERT:
 479                 error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert);
 480                 break;
 481         default:
 482                 error = EOPNOTSUPP;
 483                 break;
 484         }
 485         return (error);
 486 }
 487
 488 #if !defined(MROUTE_LKM) || !MROUTE_LKM
 489 int (*ip_mrouter_get)(struct socket *, struct sockopt *) = X_ip_mrouter_get;
 490 #endif
 491
 492 /*
 493  * Handle ioctl commands to obtain information from the cache
 494  */
 495 static int
 496 X_mrt_ioctl(int cmd, caddr_t data)
 497 {
 498     int error = 0;
 499
 500     switch (cmd) {
 501         case (SIOCGETVIFCNT):
 502             return (get_vif_cnt((struct sioc_vif_req *)data));
 503             break;
 504         case (SIOCGETSGCNT):
 505             return (get_sg_cnt((struct sioc_sg_req *)data));
 506             break;
 507         default:
 508             return (EINVAL);
 509             break;
 510     }
 511     return error;
 512 }
 513
 514 #if !defined(MROUTE_LKM) || !MROUTE_LKM
 515 int (*mrt_ioctl)(int, caddr_t) = X_mrt_ioctl;
 516 #endif
 517
 518 /*
 519  * returns the packet, byte, rpf-failure count for the source group provided
 520  */
 521 static int
 522 get_sg_cnt(struct sioc_sg_req *req)
 523 {
 524     struct mfc *rt;
 525
 526     MFCFIND(req->src.s_addr, req->grp.s_addr, rt);
 527     if (rt != NULL) {
 528         req->pktcnt = rt->mfc_pkt_cnt;
 529         req->bytecnt = rt->mfc_byte_cnt;
 530         req->wrong_if = rt->mfc_wrong_if;
 531     } else
 532         req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
 533
 534     return 0;
 535 }
 536
 537 /*
 538  * returns the input and output packet and byte counts on the vif provided
 539  */
 540 static int
 541 get_vif_cnt(struct sioc_vif_req *req)
 542 {
 543     vifi_t vifi = req->vifi;
 544
 545     if (vifi >= numvifs) return EINVAL;
 546
 547     req->icount = viftable[vifi].v_pkt_in;
 548     req->ocount = viftable[vifi].v_pkt_out;
 549     req->ibytes = viftable[vifi].v_bytes_in;
 550     req->obytes = viftable[vifi].v_bytes_out;
 551
 552     return 0;
 553 }
 554
 555 /*
 556  * Enable multicast routing
 557  */
 558 static int
 559 ip_mrouter_init(struct socket *so, int vers)
 560 {
 561     if (mrtdebug)
 562         log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d\n",
 563                 so->so_type, so->so_proto->pr_protocol);
 564
 565     if (so->so_type != SOCK_RAW ||
 566         so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP;
 567
 568     if (vers != 1)
 569         return ENOPROTOOPT;
 570
 571     if (ip_mrouter != NULL) return EADDRINUSE;
 572
 573     ip_mrouter = so;
 574
 575     bzero((caddr_t)mfctable, sizeof(mfctable));
 576     bzero((caddr_t)nexpire, sizeof(nexpire));
 577
 578     pim_assert = 0;
 579
 580     timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
 581
 582     if (mrtdebug)
 583         log(LOG_DEBUG, "ip_mrouter_init\n");
 584
 585     return 0;
 586 }
 587
 588 /*
 589  * Disable multicast routing
 590  */
 591 static int
 592 X_ip_mrouter_done(void)
 593 {
 594     vifi_t vifi;
 595     int i;
 596     struct ifnet *ifp;
 597     struct ifreq ifr;
 598     struct mfc *rt;
 599     struct rtdetq *rte;
 600
 601     /*
 602      * For each phyint in use, disable promiscuous reception of all IP
 603      * multicasts.
 604      */
 605     for (vifi = 0; vifi < numvifs; vifi++) {
 606         if (viftable[vifi].v_lcl_addr.s_addr != 0 &&
 607             !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
 608             ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
 609             ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr
 610                                                                 = INADDR_ANY;
 611             ifp = viftable[vifi].v_ifp;
 612             if_allmulti(ifp, 0);
 613         }
 614     }
 615     bzero((caddr_t)tbftable, sizeof(tbftable));
 616     bzero((caddr_t)viftable, sizeof(viftable));
 617     numvifs = 0;
 618     pim_assert = 0;
 619
 620     untimeout(expire_upcalls, (caddr_t)NULL);
 621
 622     /*
 623      * Free all multicast forwarding cache entries.
 624      */
 625     for (i = 0; i < CONFIG_MFCTBLSIZ; i++) {
 626         for (rt = mfctable[i]; rt != NULL; ) {
 627             struct mfc *nr = rt->mfc_next;
 628
 629             for (rte = rt->mfc_stall; rte != NULL; ) {
 630                 struct rtdetq *n = rte->next;
 631
 632                 m_freem(rte->m);
 633                 FREE(rte, M_MRTABLE);
 634                 rte = n;
 635             }
 636             FREE(rt, M_MRTABLE);
 637             rt = nr;
 638         }
 639     }
 640
 641     bzero((caddr_t)mfctable, sizeof(mfctable));
 642
 643     /*
 644      * Reset de-encapsulation cache
 645      */
 646     last_encap_src = 0;
 647     last_encap_vif = NULL;
 648     have_encap_tunnel = 0;
 649
 650     ip_mrouter = NULL;
 651
 652     if (mrtdebug)
 653         log(LOG_DEBUG, "ip_mrouter_done\n");
 654
 655     return 0;
 656 }
 657
 658 #if !defined(MROUTE_LKM) || !MROUTE_LKM
 659 int (*ip_mrouter_done)(void) = X_ip_mrouter_done;
 660 #endif
 661
 662 /*
 663  * Set PIM assert processing global
 664  */
 665 static int
 666 set_assert(int i)
 667 {
 668     if ((i != 1) && (i != 0))
 669         return EINVAL;
 670
 671     pim_assert = i;
 672
 673     return 0;
 674 }
 675
 676 /*
 677  * Add a vif to the vif table
 678  */
 679 static int
 680 add_vif(struct vifctl *vifcp)
 681 {
 682     struct vif *vifp = viftable + vifcp->vifc_vifi;
 683     static struct sockaddr_in sin = { sizeof sin, AF_INET,
 684                                                                                 0 , {0}, {0,0,0,0,0,0,0,0,} };
 685     struct ifaddr *ifa;
 686     struct ifnet *ifp;
 687     int error, s;
 688     struct tbf *v_tbf = tbftable + vifcp->vifc_vifi;
 689
 690     if (vifcp->vifc_vifi >= CONFIG_MAXVIFS)  return EINVAL;
 691     if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE;
 692
 693     /* Find the interface with an address in AF_INET family */
 694     sin.sin_addr = vifcp->vifc_lcl_addr;
 695     ifa = ifa_ifwithaddr((struct sockaddr *)&sin);
 696     if (ifa == 0) return EADDRNOTAVAIL;
 697     ifp = ifa->ifa_ifp;
 698     ifafree(ifa);
 699     ifa = NULL;
 700
 701     if (vifcp->vifc_flags & VIFF_TUNNEL) {
 702         if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) {
 703                 /*
 704                  * An encapsulating tunnel is wanted.  Tell ipip_input() to
 705                  * start paying attention to encapsulated packets.
 706                  */
 707                 if (have_encap_tunnel == 0) {
 708                         have_encap_tunnel = 1;
 709                         for (s = 0; s < CONFIG_MAXVIFS; ++s) {
 710                                 multicast_decap_if[s].if_name = "mdecap";
 711                                 multicast_decap_if[s].if_unit = s;
 712                                 multicast_decap_if[s].if_family = APPLE_IF_FAM_MDECAP;
 713                         }
 714                 }
 715                 /*
 716                  * Set interface to fake encapsulator interface
 717                  */
 718                 ifp = &multicast_decap_if[vifcp->vifc_vifi];
 719                 /*
 720                  * Prepare cached route entry
 721                  */
 722                 bzero(&vifp->v_route, sizeof(vifp->v_route));
 723         } else {
 724             log(LOG_ERR, "source routed tunnels not supported\n");
 725             return EOPNOTSUPP;
 726         }
 727     } else {
 728         /* Make sure the interface supports multicast */
 729         if ((ifp->if_flags & IFF_MULTICAST) == 0)
 730             return EOPNOTSUPP;
 731
 732         /* Enable promiscuous reception of all IP multicasts from the if */
 733         error = if_allmulti(ifp, 1);
 734         if (error)
 735             return error;
 736     }
 737
 738     /* define parameters for the tbf structure */
 739     vifp->v_tbf = v_tbf;
 740     GET_TIME(vifp->v_tbf->tbf_last_pkt_t);
 741     vifp->v_tbf->tbf_n_tok = 0;
 742     vifp->v_tbf->tbf_q_len = 0;
 743     vifp->v_tbf->tbf_max_q_len = MAXQSIZE;
 744     vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL;
 745
 746     vifp->v_flags     = vifcp->vifc_flags;
 747     vifp->v_threshold = vifcp->vifc_threshold;
 748     vifp->v_lcl_addr  = vifcp->vifc_lcl_addr;
 749     vifp->v_rmt_addr  = vifcp->vifc_rmt_addr;
 750     vifp->v_ifp       = ifp;
 751     /* scaling up here allows division by 1024 in critical code */
 752     vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000;
 753     vifp->v_rsvp_on   = 0;
 754     vifp->v_rsvpd     = NULL;
 755     /* initialize per vif pkt counters */
 756     vifp->v_pkt_in    = 0;
 757     vifp->v_pkt_out   = 0;
 758     vifp->v_bytes_in  = 0;
 759     vifp->v_bytes_out = 0;
 760
 761     /* Adjust numvifs up if the vifi is higher than numvifs */
 762     if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1;
 763
 764     if (mrtdebug)
 765         log(LOG_DEBUG, "add_vif #%d, lcladdr %lx, %s %lx, thresh %x, rate %d\n",
 766             vifcp->vifc_vifi,
 767             (u_int32_t)ntohl(vifcp->vifc_lcl_addr.s_addr),
 768             (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask",
 769             (u_int32_t)ntohl(vifcp->vifc_rmt_addr.s_addr),
 770             vifcp->vifc_threshold,
 771             vifcp->vifc_rate_limit);
 772
 773     return 0;
 774 }
 775
 776 /*
 777  * Delete a vif from the vif table
 778  */
 779 static int
 780 del_vif(vifi_t vifi)
 781 {
 782     struct vif *vifp = &viftable[vifi];
 783     struct mbuf *m;
 784     struct ifnet *ifp;
 785     struct ifreq ifr;
 786
 787     if (vifi >= numvifs) return EINVAL;
 788     if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL;
 789
 790     if (!(vifp->v_flags & VIFF_TUNNEL)) {
 791         ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET;
 792         ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY;
 793         ifp = vifp->v_ifp;
 794         if_allmulti(ifp, 0);
 795     }
 796
 797     if (vifp == last_encap_vif) {
 798         last_encap_vif = 0;
 799         last_encap_src = 0;
 800     }
 801
 802     /*
 803      * Free packets queued at the interface
 804      */
 805     while (vifp->v_tbf->tbf_q) {
 806         m = vifp->v_tbf->tbf_q;
 807         vifp->v_tbf->tbf_q = m->m_act;
 808         m_freem(m);
 809     }
 810
 811     bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf)));
 812     bzero((caddr_t)vifp, sizeof (*vifp));
 813
 814     if (mrtdebug)
 815       log(LOG_DEBUG, "del_vif %d, numvifs %d\n", vifi, numvifs);
 816
 817     /* Adjust numvifs down */
 818     for (vifi = numvifs; vifi > 0; vifi--)
 819         if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break;
 820     numvifs = vifi;
 821
 822     return 0;
 823 }
 824
 825 /*
 826  * Add an mfc entry
 827  */
 828 static int
 829 add_mfc(struct mfcctl *mfccp)
 830 {
 831     struct mfc *rt;
 832     u_int32_t hash;
 833     struct rtdetq *rte;
 834     u_short nstl;
 835     int i;
 836
 837     MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt);
 838
 839     /* If an entry already exists, just update the fields */
 840     if (rt) {
 841         if (mrtdebug & DEBUG_MFC)
 842             log(LOG_DEBUG,"add_mfc update o %lx g %lx p %x\n",
 843                 (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr),
 844                 (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 845                 mfccp->mfcc_parent);
 846
 847         rt->mfc_parent = mfccp->mfcc_parent;
 848         for (i = 0; i < numvifs; i++)
 849             rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 850         return 0;
 851     }
 852
 853     /*
 854      * Find the entry for which the upcall was made and update
 855      */
 856     hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr);
 857     for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) {
 858
 859         if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 860             (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) &&
 861             (rt->mfc_stall != NULL)) {
 862
 863             if (nstl++)
 864                 log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n",
 865                     "multiple kernel entries",
 866                     (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr),
 867                     (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 868                     mfccp->mfcc_parent, (void *)rt->mfc_stall);
 869
 870             if (mrtdebug & DEBUG_MFC)
 871                 log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n",
 872                     (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr),
 873                     (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 874                     mfccp->mfcc_parent, (void *)rt->mfc_stall);
 875
 876             rt->mfc_origin     = mfccp->mfcc_origin;
 877             rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 878             rt->mfc_parent     = mfccp->mfcc_parent;
 879             for (i = 0; i < numvifs; i++)
 880                 rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 881             /* initialize pkt counters per src-grp */
 882             rt->mfc_pkt_cnt    = 0;
 883             rt->mfc_byte_cnt   = 0;
 884             rt->mfc_wrong_if   = 0;
 885             rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 886
 887             rt->mfc_expire = 0; /* Don't clean this guy up */
 888             nexpire[hash]--;
 889
 890             /* free packets Qed at the end of this entry */
 891             for (rte = rt->mfc_stall; rte != NULL; ) {
 892                 struct rtdetq *n = rte->next;
 893
 894                 ip_mdq(rte->m, rte->ifp, rt, -1);
 895                 m_freem(rte->m);
 896 #if UPCALL_TIMING
 897                 collate(&(rte->t));
 898 #endif /* UPCALL_TIMING */
 899                 FREE(rte, M_MRTABLE);
 900                 rte = n;
 901             }
 902             rt->mfc_stall = NULL;
 903         }
 904     }
 905
 906     /*
 907      * It is possible that an entry is being inserted without an upcall
 908      */
 909     if (nstl == 0) {
 910         if (mrtdebug & DEBUG_MFC)
 911             log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n",
 912                 hash, (u_int32_t)ntohl(mfccp->mfcc_origin.s_addr),
 913                 (u_int32_t)ntohl(mfccp->mfcc_mcastgrp.s_addr),
 914                 mfccp->mfcc_parent);
 915
 916         for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) {
 917
 918             if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) &&
 919                 (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) {
 920
 921                 rt->mfc_origin     = mfccp->mfcc_origin;
 922                 rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 923                 rt->mfc_parent     = mfccp->mfcc_parent;
 924                 for (i = 0; i < numvifs; i++)
 925                     rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 926                 /* initialize pkt counters per src-grp */
 927                 rt->mfc_pkt_cnt    = 0;
 928                 rt->mfc_byte_cnt   = 0;
 929                 rt->mfc_wrong_if   = 0;
 930                 rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 931                 if (rt->mfc_expire)
 932                     nexpire[hash]--;
 933                 rt->mfc_expire     = 0;
 934             }
 935         }
 936         if (rt == NULL) {
 937             /* no upcall, so make a new entry */
 938             rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT);
 939             if (rt == NULL) {
 940                 return ENOBUFS;
 941             }
 942
 943             /* insert new entry at head of hash chain */
 944             rt->mfc_origin     = mfccp->mfcc_origin;
 945             rt->mfc_mcastgrp   = mfccp->mfcc_mcastgrp;
 946             rt->mfc_parent     = mfccp->mfcc_parent;
 947             for (i = 0; i < numvifs; i++)
 948                     rt->mfc_ttls[i] = mfccp->mfcc_ttls[i];
 949             /* initialize pkt counters per src-grp */
 950             rt->mfc_pkt_cnt    = 0;
 951             rt->mfc_byte_cnt   = 0;
 952             rt->mfc_wrong_if   = 0;
 953             rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0;
 954             rt->mfc_expire     = 0;
 955             rt->mfc_stall      = NULL;
 956
 957             /* link into table */
 958             rt->mfc_next = mfctable[hash];
 959             mfctable[hash] = rt;
 960         }
 961     }
 962     return 0;
 963 }
 964
 965 #if UPCALL_TIMING
 966 /*
 967  * collect delay statistics on the upcalls
 968  */
 969 static void
 970 collate(struct timeval *t)
 971 {
 972     u_int32_t d;
 973     struct timeval tp;
 974     u_int32_t delta;
 975
 976     GET_TIME(tp);
 977
 978     if (TV_LT(*t, tp))
 979     {
 980         TV_DELTA(tp, *t, delta);
 981
 982         d = delta >> 10;
 983         if (d > 50)
 984             d = 50;
 985
 986         ++upcall_data[d];
 987     }
 988 }
 989 #endif /* UPCALL_TIMING */
 990
 991 /*
 992  * Delete an mfc entry
 993  */
 994 static int
 995 del_mfc(struct mfcctl *mfccp)
 996 {
 997     struct in_addr      origin;
 998     struct in_addr      mcastgrp;
 999     struct mfc          *rt;
1000     struct mfc          **nptr;
1001     u_int32_t           hash;
1002
1003     origin = mfccp->mfcc_origin;
1004     mcastgrp = mfccp->mfcc_mcastgrp;
1005     hash = MFCHASH(origin.s_addr, mcastgrp.s_addr);
1006
1007     if (mrtdebug & DEBUG_MFC)
1008         log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n",
1009             (u_int32_t)ntohl(origin.s_addr), (u_int32_t)ntohl(mcastgrp.s_addr));
1010
1011     nptr = &mfctable[hash];
1012     while ((rt = *nptr) != NULL) {
1013         if (origin.s_addr == rt->mfc_origin.s_addr &&
1014             mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr &&
1015             rt->mfc_stall == NULL)
1016             break;
1017
1018         nptr = &rt->mfc_next;
1019     }
1020     if (rt == NULL) {
1021         return EADDRNOTAVAIL;
1022     }
1023
1024     *nptr = rt->mfc_next;
1025     FREE(rt, M_MRTABLE);
1026
1027     return 0;
1028 }
1029
1030 /*
1031  * Send a message to mrouted on the multicast routing socket
1032  */
1033 static int
1034 socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src)
1035 {
1036         socket_lock(s, 1);
1037         if (s) {
1038                 if (sbappendaddr(&s->so_rcv,
1039                                  (struct sockaddr *)src,
1040                                  mm, (struct mbuf *)0, NULL) != 0) {
1041                         sorwakeup(s);
1042                         socket_unlock(s, 1);
1043                         return 0;
1044                 }
1045         }
1046         socket_unlock(s, 1);
1047         m_freem(mm);
1048         return -1;
1049 }
1050
1051 /*
1052  * IP multicast forwarding function. This function assumes that the packet
1053  * pointed to by "ip" has arrived on (or is about to be sent to) the interface
1054  * pointed to by "ifp", and the packet is to be relayed to other networks
1055  * that have members of the packet's destination IP multicast group.
1056  *
1057  * The packet is returned unscathed to the caller, unless it is
1058  * erroneous, in which case a non-zero return value tells the caller to
1059  * discard it.
1060  */
1061
1062 #define IP_HDR_LEN  20  /* # bytes of fixed IP header (excluding options) */
1063 #define TUNNEL_LEN  12  /* # bytes of IP option for tunnel encapsulation  */
1064
1065 static int
1066 X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m,
1067               struct ip_moptions *imo)
1068 {
1069     struct mfc *rt;
1070     u_char *ipoptions;
1071     static struct sockaddr_in   k_igmpsrc = { sizeof k_igmpsrc, AF_INET,
1072                                                                                         0 , {0}, {0,0,0,0,0,0,0,0,} };
1073     static int srctun = 0;
1074     struct mbuf *mm;
1075     vifi_t vifi;
1076     struct vif *vifp;
1077
1078     if (mrtdebug & DEBUG_FORWARD)
1079         log(LOG_DEBUG, "ip_mforward: src %lx, dst %lx, ifp %p\n",
1080             (u_int32_t)ntohl(ip->ip_src.s_addr), (u_int32_t)ntohl(ip->ip_dst.s_addr),
1081             (void *)ifp);
1082
1083     if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 ||
1084         (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) {
1085         /*
1086          * Packet arrived via a physical interface or
1087          * an encapsulated tunnel.
1088          */
1089     } else {
1090         /*
1091          * Packet arrived through a source-route tunnel.
1092          * Source-route tunnels are no longer supported.
1093          */
1094         if ((srctun++ % 1000) == 0)
1095             log(LOG_ERR,
1096                 "ip_mforward: received source-routed packet from %lx\n",
1097                 (u_int32_t)ntohl(ip->ip_src.s_addr));
1098
1099         return 1;
1100     }
1101
1102     if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) {
1103         if (ip->ip_ttl < 255)
1104                 ip->ip_ttl++;   /* compensate for -1 in *_send routines */
1105         if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1106             vifp = viftable + vifi;
1107             printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n",
1108                 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi,
1109                 (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "",
1110                 vifp->v_ifp->if_name, vifp->v_ifp->if_unit);
1111         }
1112         return (ip_mdq(m, ifp, NULL, vifi));
1113     }
1114     if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) {
1115         printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n",
1116             ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr));
1117         if(!imo)
1118                 printf("In fact, no options were specified at all\n");
1119     }
1120
1121     /*
1122      * Don't forward a packet with time-to-live of zero or one,
1123      * or a packet destined to a local-only group.
1124      */
1125     if (ip->ip_ttl <= 1 ||
1126         ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP)
1127         return 0;
1128
1129     /*
1130      * Determine forwarding vifs from the forwarding cache table
1131      */
1132     MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt);
1133
1134     /* Entry exists, so forward if necessary */
1135     if (rt != NULL) {
1136         return (ip_mdq(m, ifp, rt, -1));
1137     } else {
1138         /*
1139          * If we don't have a route for packet's origin,
1140          * Make a copy of the packet &
1141          * send message to routing daemon
1142          */
1143
1144         struct mbuf *mb0;
1145         struct rtdetq *rte;
1146         u_int32_t hash;
1147         int hlen = ip->ip_hl << 2;
1148 #if UPCALL_TIMING
1149         struct timeval tp;
1150
1151         GET_TIME(tp);
1152 #endif
1153
1154         mrtstat.mrts_no_route++;
1155         if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC))
1156             log(LOG_DEBUG, "ip_mforward: no rte s %lx g %lx\n",
1157                 (u_int32_t)ntohl(ip->ip_src.s_addr),
1158                 (u_int32_t)ntohl(ip->ip_dst.s_addr));
1159
1160         /*
1161          * Allocate mbufs early so that we don't do extra work if we are
1162          * just going to fail anyway.  Make sure to pullup the header so
1163          * that other people can't step on it.
1164          */
1165         rte = (struct rtdetq *) _MALLOC((sizeof *rte), M_MRTABLE, M_NOWAIT);
1166         if (rte == NULL) {
1167             return ENOBUFS;
1168         }
1169         mb0 = m_copy(m, 0, M_COPYALL);
1170         if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen))
1171             mb0 = m_pullup(mb0, hlen);
1172         if (mb0 == NULL) {
1173             FREE(rte, M_MRTABLE);
1174             return ENOBUFS;
1175         }
1176
1177         /* is there an upcall waiting for this packet? */
1178         hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr);
1179         for (rt = mfctable[hash]; rt; rt = rt->mfc_next) {
1180             if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) &&
1181                 (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) &&
1182                 (rt->mfc_stall != NULL))
1183                 break;
1184         }
1185
1186         if (rt == NULL) {
1187             int i;
1188             struct igmpmsg *im;
1189
1190             /* no upcall, so make a new entry */
1191             rt = (struct mfc *) _MALLOC(sizeof(*rt), M_MRTABLE, M_NOWAIT);
1192             if (rt == NULL) {
1193                 FREE(rte, M_MRTABLE);
1194                 m_freem(mb0);
1195                 return ENOBUFS;
1196             }
1197             /* Make a copy of the header to send to the user level process */
1198             mm = m_copy(mb0, 0, hlen);
1199             if (mm == NULL) {
1200                 FREE(rte, M_MRTABLE);
1201                 m_freem(mb0);
1202                 FREE(rt, M_MRTABLE);
1203                 return ENOBUFS;
1204             }
1205
1206             /*
1207              * Send message to routing daemon to install
1208              * a route into the kernel table
1209              */
1210             k_igmpsrc.sin_addr = ip->ip_src;
1211
1212             im = mtod(mm, struct igmpmsg *);
1213             im->im_msgtype      = IGMPMSG_NOCACHE;
1214             im->im_mbz          = 0;
1215
1216             mrtstat.mrts_upcalls++;
1217
1218             if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) {
1219                 log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full\n");
1220                 ++mrtstat.mrts_upq_sockfull;
1221                 FREE(rte, M_MRTABLE);
1222                 m_freem(mb0);
1223                 FREE(rt, M_MRTABLE);
1224                 return ENOBUFS;
1225             }
1226
1227             /* insert new entry at head of hash chain */
1228             rt->mfc_origin.s_addr     = ip->ip_src.s_addr;
1229             rt->mfc_mcastgrp.s_addr   = ip->ip_dst.s_addr;
1230             rt->mfc_expire            = UPCALL_EXPIRE;
1231             nexpire[hash]++;
1232             for (i = 0; i < numvifs; i++)
1233                 rt->mfc_ttls[i] = 0;
1234             rt->mfc_parent = -1;
1235
1236             /* link into table */
1237             rt->mfc_next   = mfctable[hash];
1238             mfctable[hash] = rt;
1239             rt->mfc_stall = rte;
1240
1241         } else {
1242             /* determine if q has overflowed */
1243             int npkts = 0;
1244             struct rtdetq **p;
1245
1246             for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next)
1247                 npkts++;
1248
1249             if (npkts > MAX_UPQ) {
1250                 mrtstat.mrts_upq_ovflw++;
1251                 FREE(rte, M_MRTABLE);
1252                 m_freem(mb0);
1253                 return 0;
1254             }
1255
1256             /* Add this entry to the end of the queue */
1257             *p = rte;
1258         }
1259
1260         rte->m                  = mb0;
1261         rte->ifp                = ifp;
1262 #if UPCALL_TIMING
1263         rte->t                  = tp;
1264 #endif
1265         rte->next               = NULL;
1266
1267         return 0;
1268     }
1269 }
1270
1271 #if !defined(MROUTE_LKM) || !MROUTE_LKM
1272 int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *,
1273                    struct ip_moptions *) = X_ip_mforward;
1274 #endif
1275
1276 /*
1277  * Clean up the cache entry if upcall is not serviced
1278  */
1279 static void
1280 expire_upcalls(__unused void *unused)
1281 {
1282     struct rtdetq *rte;
1283     struct mfc *mfc, **nptr;
1284     int i;
1285
1286     for (i = 0; i < CONFIG_MFCTBLSIZ; i++) {
1287         if (nexpire[i] == 0)
1288             continue;
1289         nptr = &mfctable[i];
1290         for (mfc = *nptr; mfc != NULL; mfc = *nptr) {
1291             /*
1292              * Skip real cache entries
1293              * Make sure it wasn't marked to not expire (shouldn't happen)
1294              * If it expires now
1295              */
1296             if (mfc->mfc_stall != NULL &&
1297                 mfc->mfc_expire != 0 &&
1298                 --mfc->mfc_expire == 0) {
1299                 if (mrtdebug & DEBUG_EXPIRE)
1300                     log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n",
1301                         (u_int32_t)ntohl(mfc->mfc_origin.s_addr),
1302                         (u_int32_t)ntohl(mfc->mfc_mcastgrp.s_addr));
1303                 /*
1304                  * drop all the packets
1305                  * free the mbuf with the pkt, if, timing info
1306                  */
1307                 for (rte = mfc->mfc_stall; rte; ) {
1308                     struct rtdetq *n = rte->next;
1309
1310                     m_freem(rte->m);
1311                     FREE(rte, M_MRTABLE);
1312                     rte = n;
1313                 }
1314                 ++mrtstat.mrts_cache_cleanups;
1315                 nexpire[i]--;
1316
1317                 *nptr = mfc->mfc_next;
1318                 FREE(mfc, M_MRTABLE);
1319             } else {
1320                 nptr = &mfc->mfc_next;
1321             }
1322         }
1323     }
1324     timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT);
1325 }
1326
1327 /*
1328  * Packet forwarding routine once entry in the cache is made
1329  */
1330 static int
1331 ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt,
1332        vifi_t xmt_vif)
1333 {
1334     struct ip  *ip = mtod(m, struct ip *);
1335     vifi_t vifi;
1336     struct vif *vifp;
1337     int plen = ip->ip_len;
1338
1339 /*
1340  * Macro to send packet on vif.  Since RSVP packets don't get counted on
1341  * input, they shouldn't get counted on output, so statistics keeping is
1342  * seperate.
1343  */
1344 #define MC_SEND(ip,vifp,m) {                             \
1345                 if ((vifp)->v_flags & VIFF_TUNNEL)       \
1346                     encap_send((ip), (vifp), (m));       \
1347                 else                                     \
1348                     phyint_send((ip), (vifp), (m));      \
1349 }
1350
1351     /*
1352      * If xmt_vif is not -1, send on only the requested vif.
1353      *
1354      * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.)
1355      */
1356     if (xmt_vif < numvifs) {
1357         MC_SEND(ip, viftable + xmt_vif, m);
1358         return 1;
1359     }
1360
1361     /*
1362      * Don't forward if it didn't arrive from the parent vif for its origin.
1363      */
1364     vifi = rt->mfc_parent;
1365     if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) {
1366         /* came in the wrong interface */
1367         if (mrtdebug & DEBUG_FORWARD)
1368             log(LOG_DEBUG, "wrong if: ifp %p vifi %d vififp %p\n",
1369                 (void *)ifp, vifi, (void *)viftable[vifi].v_ifp);
1370         ++mrtstat.mrts_wrong_if;
1371         ++rt->mfc_wrong_if;
1372         /*
1373          * If we are doing PIM assert processing, and we are forwarding
1374          * packets on this interface, and it is a broadcast medium
1375          * interface (and not a tunnel), send a message to the routing daemon.
1376          */
1377         if (pim_assert && rt->mfc_ttls[vifi] &&
1378                 (ifp->if_flags & IFF_BROADCAST) &&
1379                 !(viftable[vifi].v_flags & VIFF_TUNNEL)) {
1380             struct sockaddr_in k_igmpsrc;
1381             struct mbuf *mm;
1382             struct igmpmsg *im;
1383             int hlen = ip->ip_hl << 2;
1384             struct timeval now;
1385             u_int32_t delta;
1386
1387             GET_TIME(now);
1388
1389             TV_DELTA(rt->mfc_last_assert, now, delta);
1390
1391             if (delta > ASSERT_MSG_TIME) {
1392                 mm = m_copy(m, 0, hlen);
1393                 if (mm && (M_HASCL(mm) || mm->m_len < hlen))
1394                     mm = m_pullup(mm, hlen);
1395                 if (mm == NULL) {
1396                     return ENOBUFS;
1397                 }
1398
1399                 rt->mfc_last_assert = now;
1400
1401                 im = mtod(mm, struct igmpmsg *);
1402                 im->im_msgtype  = IGMPMSG_WRONGVIF;
1403                 im->im_mbz              = 0;
1404                 im->im_vif              = vifi;
1405
1406                 k_igmpsrc.sin_addr = im->im_src;
1407
1408                 socket_send(ip_mrouter, mm, &k_igmpsrc);
1409             }
1410         }
1411         return 0;
1412     }
1413
1414     /* If I sourced this packet, it counts as output, else it was input. */
1415     if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) {
1416         viftable[vifi].v_pkt_out++;
1417         viftable[vifi].v_bytes_out += plen;
1418     } else {
1419         viftable[vifi].v_pkt_in++;
1420         viftable[vifi].v_bytes_in += plen;
1421     }
1422     rt->mfc_pkt_cnt++;
1423     rt->mfc_byte_cnt += plen;
1424
1425     /*
1426      * For each vif, decide if a copy of the packet should be forwarded.
1427      * Forward if:
1428      *          - the ttl exceeds the vif's threshold
1429      *          - there are group members downstream on interface
1430      */
1431     for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++)
1432         if ((rt->mfc_ttls[vifi] > 0) &&
1433             (ip->ip_ttl > rt->mfc_ttls[vifi])) {
1434             vifp->v_pkt_out++;
1435             vifp->v_bytes_out += plen;
1436             MC_SEND(ip, vifp, m);
1437         }
1438
1439     return 0;
1440 }
1441
1442 /*
1443  * check if a vif number is legal/ok. This is used by ip_output, to export
1444  * numvifs there,
1445  */
1446 static int
1447 X_legal_vif_num(int vif)
1448 {
1449     if (vif >= 0 && vif < numvifs)
1450        return(1);
1451     else
1452        return(0);
1453 }
1454
1455 #if !defined(MROUTE_LKM) || !MROUTE_LKM
1456 int (*legal_vif_num)(int) = X_legal_vif_num;
1457 #endif
1458
1459 /*
1460  * Return the local address used by this vif
1461  */
1462 static u_int32_t
1463 X_ip_mcast_src(int vifi)
1464 {
1465     if (vifi >= 0 && vifi < numvifs)
1466         return viftable[vifi].v_lcl_addr.s_addr;
1467     else
1468         return INADDR_ANY;
1469 }
1470
1471 #if !defined(MROUTE_LKM) || !MROUTE_LKM
1472 u_int32_t (*ip_mcast_src)(int) = X_ip_mcast_src;
1473 #endif
1474
1475 static void
1476 phyint_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
1477 {
1478     struct mbuf *mb_copy;
1479     int hlen = ip->ip_hl << 2;
1480
1481     /*
1482      * Make a new reference to the packet; make sure that
1483      * the IP header is actually copied, not just referenced,
1484      * so that ip_output() only scribbles on the copy.
1485      */
1486     mb_copy = m_copy(m, 0, M_COPYALL);
1487     if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen))
1488         mb_copy = m_pullup(mb_copy, hlen);
1489     if (mb_copy == NULL)
1490         return;
1491
1492     if (vifp->v_rate_limit == 0)
1493         tbf_send_packet(vifp, mb_copy);
1494     else
1495         tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len);
1496 }
1497
1498 static void
1499 encap_send(struct ip *ip, struct vif *vifp, struct mbuf *m)
1500 {
1501     struct mbuf *mb_copy;
1502     struct ip *ip_copy;
1503     int i, len = ip->ip_len;
1504
1505     /*
1506      * copy the old packet & pullup its IP header into the
1507      * new mbuf so we can modify it.  Try to fill the new
1508      * mbuf since if we don't the ethernet driver will.
1509      */
1510     MGETHDR(mb_copy, M_DONTWAIT, MT_HEADER);
1511     if (mb_copy == NULL)
1512         return;
1513 #if CONFIG_MACF_NET
1514     mac_mbuf_label_associate_multicast_encap(m, vifp->v_ifp, mb_copy);
1515 #endif
1516     mb_copy->m_data += max_linkhdr;
1517     mb_copy->m_len = sizeof(multicast_encap_iphdr);
1518
1519     if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
1520         m_freem(mb_copy);
1521         return;
1522     }
1523     i = MHLEN - M_LEADINGSPACE(mb_copy);
1524     if (i > len)
1525         i = len;
1526     mb_copy = m_pullup(mb_copy, i);
1527     if (mb_copy == NULL)
1528         return;
1529     mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr);
1530
1531     /*
1532      * fill in the encapsulating IP header.
1533      */
1534     ip_copy = mtod(mb_copy, struct ip *);
1535     *ip_copy = multicast_encap_iphdr;
1536 #if RANDOM_IP_ID
1537     ip_copy->ip_id = ip_randomid();
1538 #else
1539     ip_copy->ip_id = htons(ip_id++);
1540 #endif
1541     ip_copy->ip_len += len;
1542     ip_copy->ip_src = vifp->v_lcl_addr;
1543     ip_copy->ip_dst = vifp->v_rmt_addr;
1544
1545     /*
1546      * turn the encapsulated IP header back into a valid one.
1547      */
1548     ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr));
1549     --ip->ip_ttl;
1550
1551 #if BYTE_ORDER != BIG_ENDIAN
1552     HTONS(ip->ip_len);
1553     HTONS(ip->ip_off);
1554 #endif
1555
1556     ip->ip_sum = 0;
1557     mb_copy->m_data += sizeof(multicast_encap_iphdr);
1558     ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2);
1559     mb_copy->m_data -= sizeof(multicast_encap_iphdr);
1560
1561     if (vifp->v_rate_limit == 0)
1562         tbf_send_packet(vifp, mb_copy);
1563     else
1564         tbf_control(vifp, mb_copy, ip, ip_copy->ip_len);
1565 }
1566
1567 /*
1568  * De-encapsulate a packet and feed it back through ip input (this
1569  * routine is called whenever IP gets a packet with proto type
1570  * ENCAP_PROTO and a local destination address).
1571  */
1572 void
1573 #if MROUTE_LKM
1574 X_ipip_input(struct mbuf *m, int iphlen)
1575 #else
1576 ipip_input(struct mbuf *m, int iphlen)
1577 #endif
1578 {
1579     struct ifnet *ifp = m->m_pkthdr.rcvif;
1580     struct ip *ip = mtod(m, struct ip *);
1581     int hlen = ip->ip_hl << 2;
1582     struct vif *vifp;
1583
1584     if (!have_encap_tunnel) {
1585             rip_input(m, iphlen);
1586             return;
1587     }
1588     /*
1589      * dump the packet if it's not to a multicast destination or if
1590      * we don't have an encapsulating tunnel with the source.
1591      * Note:  This code assumes that the remote site IP address
1592      * uniquely identifies the tunnel (i.e., that this site has
1593      * at most one tunnel with the remote site).
1594      */
1595     if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) {
1596         ++mrtstat.mrts_bad_tunnel;
1597         m_freem(m);
1598         return;
1599     }
1600     if (ip->ip_src.s_addr != last_encap_src) {
1601         struct vif *vife;
1602
1603         vifp = viftable;
1604         vife = vifp + numvifs;
1605         last_encap_src = ip->ip_src.s_addr;
1606         last_encap_vif = 0;
1607         for ( ; vifp < vife; ++vifp)
1608             if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) {
1609                 if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT))
1610                     == VIFF_TUNNEL)
1611                     last_encap_vif = vifp;
1612                 break;
1613             }
1614     }
1615     if ((vifp = last_encap_vif) == 0) {
1616         last_encap_src = 0;
1617         mrtstat.mrts_cant_tunnel++; /*XXX*/
1618         m_freem(m);
1619         if (mrtdebug)
1620           log(LOG_DEBUG, "ip_mforward: no tunnel with %lx\n",
1621                 (u_int32_t)ntohl(ip->ip_src.s_addr));
1622         return;
1623     }
1624     ifp = vifp->v_ifp;
1625
1626     if (hlen > IP_HDR_LEN)
1627       ip_stripoptions(m, (struct mbuf *) 0);
1628     m->m_data += IP_HDR_LEN;
1629     m->m_len -= IP_HDR_LEN;
1630     m->m_pkthdr.len -= IP_HDR_LEN;
1631     m->m_pkthdr.rcvif = ifp;
1632
1633     proto_inject(PF_INET, m);
1634 }
1635
1636 /*
1637  * Token bucket filter module
1638  */
1639
1640 static void
1641 tbf_control(struct vif *vifp, struct mbuf *m, struct ip *ip,
1642             u_int32_t p_len)
1643 {
1644     struct tbf *t = vifp->v_tbf;
1645
1646     if (p_len > MAX_BKT_SIZE) {
1647         /* drop if packet is too large */
1648         mrtstat.mrts_pkt2large++;
1649         m_freem(m);
1650         return;
1651     }
1652
1653     tbf_update_tokens(vifp);
1654
1655     /* if there are enough tokens,
1656      * and the queue is empty,
1657      * send this packet out
1658      */
1659
1660     if (t->tbf_q_len == 0) {
1661         /* queue empty, send packet if enough tokens */
1662         if (p_len <= t->tbf_n_tok) {
1663             t->tbf_n_tok -= p_len;
1664             tbf_send_packet(vifp, m);
1665         } else {
1666             /* queue packet and timeout till later */
1667             tbf_queue(vifp, m);
1668             timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1669         }
1670     } else if (t->tbf_q_len < t->tbf_max_q_len) {
1671         /* finite queue length, so queue pkts and process queue */
1672         tbf_queue(vifp, m);
1673         tbf_process_q(vifp);
1674     } else {
1675         /* queue length too much, try to dq and queue and process */
1676         if (!tbf_dq_sel(vifp, ip)) {
1677             mrtstat.mrts_q_overflow++;
1678             m_freem(m);
1679             return;
1680         } else {
1681             tbf_queue(vifp, m);
1682             tbf_process_q(vifp);
1683         }
1684     }
1685     return;
1686 }
1687
1688 /*
1689  * adds a packet to the queue at the interface
1690  */
1691 static void
1692 tbf_queue(struct vif *vifp, struct mbuf *m)
1693 {
1694     struct tbf *t = vifp->v_tbf;
1695
1696     if (t->tbf_t == NULL) {
1697         /* Queue was empty */
1698         t->tbf_q = m;
1699     } else {
1700         /* Insert at tail */
1701         t->tbf_t->m_act = m;
1702     }
1703
1704     /* Set new tail pointer */
1705     t->tbf_t = m;
1706
1707 #if DIAGNOSTIC
1708     /* Make sure we didn't get fed a bogus mbuf */
1709     if (m->m_act)
1710         panic("tbf_queue: m_act");
1711 #endif
1712     m->m_act = NULL;
1713
1714     t->tbf_q_len++;
1715 }
1716
1717
1718 /*
1719  * processes the queue at the interface
1720  */
1721 static void
1722 tbf_process_q(struct vif *vifp)
1723 {
1724     struct mbuf *m;
1725     int len;
1726     struct tbf *t = vifp->v_tbf;
1727
1728     /* loop through the queue at the interface and send as many packets
1729      * as possible
1730      */
1731     while (t->tbf_q_len > 0) {
1732         m = t->tbf_q;
1733
1734         len = mtod(m, struct ip *)->ip_len;
1735
1736         /* determine if the packet can be sent */
1737         if (len <= t->tbf_n_tok) {
1738             /* if so,
1739              * reduce no of tokens, dequeue the packet,
1740              * send the packet.
1741              */
1742             t->tbf_n_tok -= len;
1743
1744             t->tbf_q = m->m_act;
1745             if (--t->tbf_q_len == 0)
1746                 t->tbf_t = NULL;
1747
1748             m->m_act = NULL;
1749             tbf_send_packet(vifp, m);
1750
1751         } else break;
1752     }
1753 }
1754
1755 static void
1756 tbf_reprocess_q(void *xvifp)
1757 {
1758     struct vif *vifp = xvifp;
1759
1760     if (ip_mrouter == NULL)  {
1761         return;
1762      }
1763
1764     tbf_update_tokens(vifp);
1765
1766     tbf_process_q(vifp);
1767
1768     if (vifp->v_tbf->tbf_q_len)
1769         timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS);
1770 }
1771
1772 /* function that will selectively discard a member of the queue
1773  * based on the precedence value and the priority
1774  */
1775 static int
1776 tbf_dq_sel(struct vif *vifp, struct ip *ip)
1777 {
1778     u_int p;
1779     struct mbuf *m, *last;
1780     struct mbuf **np;
1781     struct tbf *t = vifp->v_tbf;
1782
1783     p = priority(vifp, ip);
1784
1785     np = &t->tbf_q;
1786     last = NULL;
1787     while ((m = *np) != NULL) {
1788         if (p > priority(vifp, mtod(m, struct ip *))) {
1789             *np = m->m_act;
1790             /* If we're removing the last packet, fix the tail pointer */
1791             if (m == t->tbf_t)
1792                 t->tbf_t = last;
1793             m_freem(m);
1794             /* it's impossible for the queue to be empty, but
1795              * we check anyway. */
1796             if (--t->tbf_q_len == 0)
1797                 t->tbf_t = NULL;
1798             mrtstat.mrts_drop_sel++;
1799             return(1);
1800         }
1801         np = &m->m_act;
1802         last = m;
1803     }
1804     return(0);
1805 }
1806
1807 static void
1808 tbf_send_packet(struct vif *vifp, struct mbuf *m)
1809 {
1810     struct ip_moptions imo;
1811     int error;
1812     static struct route ro;
1813
1814     if (vifp->v_flags & VIFF_TUNNEL) {
1815         /* If tunnel options */
1816         ip_output(m, (struct mbuf *)0, &vifp->v_route,
1817                   IP_FORWARDING, (struct ip_moptions *)0, NULL);
1818     } else {
1819         imo.imo_multicast_ifp  = vifp->v_ifp;
1820         imo.imo_multicast_ttl  = mtod(m, struct ip *)->ip_ttl - 1;
1821         imo.imo_multicast_loop = 1;
1822         imo.imo_multicast_vif  = -1;
1823
1824         /*
1825          * Re-entrancy should not be a problem here, because
1826          * the packets that we send out and are looped back at us
1827          * should get rejected because they appear to come from
1828          * the loopback interface, thus preventing looping.
1829          */
1830         error = ip_output(m, (struct mbuf *)0, &ro,
1831                           IP_FORWARDING, &imo, NULL);
1832
1833         if (mrtdebug & DEBUG_XMIT)
1834             log(LOG_DEBUG, "phyint_send on vif %d err %d\n",
1835                 vifp - viftable, error);
1836     }
1837 }
1838
1839 /* determine the current time and then
1840  * the elapsed time (between the last time and time now)
1841  * in milliseconds & update the no. of tokens in the bucket
1842  */
1843 static void
1844 tbf_update_tokens(struct vif *vifp)
1845 {
1846     struct timeval tp;
1847     u_int32_t tm;
1848     struct tbf *t = vifp->v_tbf;
1849
1850     GET_TIME(tp);
1851
1852     TV_DELTA(tp, t->tbf_last_pkt_t, tm);
1853
1854     /*
1855      * This formula is actually
1856      * "time in seconds" * "bytes/second".
1857      *
1858      * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8)
1859      *
1860      * The (1000/1024) was introduced in add_vif to optimize
1861      * this divide into a shift.
1862      */
1863     t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8;
1864     t->tbf_last_pkt_t = tp;
1865
1866     if (t->tbf_n_tok > MAX_BKT_SIZE)
1867         t->tbf_n_tok = MAX_BKT_SIZE;
1868 }
1869
1870 static int
1871 priority(__unused struct vif *vifp, struct ip *ip)
1872 {
1873     int prio;
1874
1875     /* temporary hack; may add general packet classifier some day */
1876
1877     /*
1878      * The UDP port space is divided up into four priority ranges:
1879      * [0, 16384)     : unclassified - lowest priority
1880      * [16384, 32768) : audio - highest priority
1881      * [32768, 49152) : whiteboard - medium priority
1882      * [49152, 65536) : video - low priority
1883      */
1884     if (ip->ip_p == IPPROTO_UDP) {
1885         struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2));
1886         switch (ntohs(udp->uh_dport) & 0xc000) {
1887             case 0x4000:
1888                 prio = 70;
1889                 break;
1890             case 0x8000:
1891                 prio = 60;
1892                 break;
1893             case 0xc000:
1894                 prio = 55;
1895                 break;
1896             default:
1897                 prio = 50;
1898                 break;
1899         }
1900         if (tbfdebug > 1)
1901                 log(LOG_DEBUG, "port %x prio%d\n", ntohs(udp->uh_dport), prio);
1902     } else {
1903             prio = 50;
1904     }
1905     return prio;
1906 }
1907
1908 /*
1909  * End of token bucket filter modifications
1910  */
1911
1912 int
1913 ip_rsvp_vif_init(struct socket *so, struct sockopt *sopt)
1914 {
1915     int error, i;
1916
1917     if (rsvpdebug)
1918         printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n",
1919                so->so_type, so->so_proto->pr_protocol);
1920
1921     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
1922         return EOPNOTSUPP;
1923
1924     /* Check mbuf. */
1925     error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1926     if (error)
1927             return (error);
1928
1929     if (rsvpdebug)
1930         printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n", i, rsvp_on);
1931
1932     /* Check vif. */
1933     if (!legal_vif_num(i)) {
1934         return EADDRNOTAVAIL;
1935     }
1936
1937     /* Check if socket is available. */
1938     if (viftable[i].v_rsvpd != NULL) {
1939         return EADDRINUSE;
1940     }
1941
1942     viftable[i].v_rsvpd = so;
1943     /* This may seem silly, but we need to be sure we don't over-increment
1944      * the RSVP counter, in case something slips up.
1945      */
1946     if (!viftable[i].v_rsvp_on) {
1947         viftable[i].v_rsvp_on = 1;
1948         rsvp_on++;
1949     }
1950
1951     return 0;
1952 }
1953
1954 int
1955 ip_rsvp_vif_done(struct socket *so, struct sockopt *sopt)
1956 {
1957         int error, i;
1958
1959         if (rsvpdebug)
1960                 printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n",
1961                        so->so_type, so->so_proto->pr_protocol);
1962
1963         if (so->so_type != SOCK_RAW ||
1964             so->so_proto->pr_protocol != IPPROTO_RSVP)
1965                 return EOPNOTSUPP;
1966
1967         error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1968         if (error)
1969                 return (error);
1970
1971         /* Check vif. */
1972         if (!legal_vif_num(i)) {
1973                 return EADDRNOTAVAIL;
1974         }
1975
1976         if (rsvpdebug)
1977                 printf("ip_rsvp_vif_done: v_rsvpd = %p so = %p\n",
1978                        viftable[i].v_rsvpd, so);
1979
1980         viftable[i].v_rsvpd = NULL;
1981         /*
1982          * This may seem silly, but we need to be sure we don't over-decrement
1983          * the RSVP counter, in case something slips up.
1984          */
1985         if (viftable[i].v_rsvp_on) {
1986                 viftable[i].v_rsvp_on = 0;
1987                 rsvp_on--;
1988         }
1989
1990         return 0;
1991 }
1992
1993 void
1994 ip_rsvp_force_done(struct socket *so)
1995 {
1996     int vifi;
1997
1998     /* Don't bother if it is not the right type of socket. */
1999     if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP)
2000         return;
2001
2002     /* The socket may be attached to more than one vif...this
2003      * is perfectly legal.
2004      */
2005     for (vifi = 0; vifi < numvifs; vifi++) {
2006         if (viftable[vifi].v_rsvpd == so) {
2007             viftable[vifi].v_rsvpd = NULL;
2008             /* This may seem silly, but we need to be sure we don't
2009              * over-decrement the RSVP counter, in case something slips up.
2010              */
2011             if (viftable[vifi].v_rsvp_on) {
2012                 viftable[vifi].v_rsvp_on = 0;
2013                 rsvp_on--;
2014             }
2015         }
2016     }
2017
2018     return;
2019 }
2020
2021 void
2022 rsvp_input(struct mbuf *m, int iphlen)
2023 {
2024     int vifi;
2025     struct ip *ip = mtod(m, struct ip *);
2026     static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET,
2027                                                                                 0 , {0}, {0,0,0,0,0,0,0,0,} };
2028     struct ifnet *ifp;
2029
2030     if (rsvpdebug)
2031         printf("rsvp_input: rsvp_on %d\n",rsvp_on);
2032
2033     /* Can still get packets with rsvp_on = 0 if there is a local member
2034      * of the group to which the RSVP packet is addressed.  But in this
2035      * case we want to throw the packet away.
2036      */
2037     if (!rsvp_on) {
2038         m_freem(m);
2039         return;
2040     }
2041
2042     if (rsvpdebug)
2043         printf("rsvp_input: check vifs\n");
2044
2045 #if DIAGNOSTIC
2046     if (!(m->m_flags & M_PKTHDR))
2047             panic("rsvp_input no hdr");
2048 #endif
2049
2050     ifp = m->m_pkthdr.rcvif;
2051     /* Find which vif the packet arrived on. */
2052     for (vifi = 0; vifi < numvifs; vifi++)
2053         if (viftable[vifi].v_ifp == ifp)
2054             break;
2055
2056     if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) {
2057         /*
2058          * If the old-style non-vif-associated socket is set,
2059          * then use it.  Otherwise, drop packet since there
2060          * is no specific socket for this vif.
2061          */
2062         if (ip_rsvpd != NULL) {
2063             if (rsvpdebug)
2064                 printf("rsvp_input: Sending packet up old-style socket\n");
2065             rip_input(m, iphlen);  /* xxx */
2066         } else {
2067             if (rsvpdebug && vifi == numvifs)
2068                 printf("rsvp_input: Can't find vif for packet.\n");
2069             else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL)
2070                 printf("rsvp_input: No socket defined for vif %d\n",vifi);
2071             m_freem(m);
2072         }
2073         return;
2074     }
2075     rsvp_src.sin_addr = ip->ip_src;
2076
2077     if (rsvpdebug && m)
2078         printf("rsvp_input: m->m_len = %d, sbspace() = %d\n",
2079                m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv)));
2080
2081     if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) {
2082         if (rsvpdebug)
2083             printf("rsvp_input: Failed to append to socket\n");
2084     } else {
2085         if (rsvpdebug)
2086             printf("rsvp_input: send packet up\n");
2087     }
2088
2089 }
2090
2091 #if MROUTE_LKM
2092 #include <sys/conf.h>
2093 #include <sys/exec.h>
2094 #include <sys/sysent.h>
2095 #include <sys/lkm.h>
2096
2097 MOD_MISC("ip_mroute_mod")
2098
2099 static int
2100 ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd)
2101 {
2102         int i;
2103         struct lkm_misc *args = lkmtp->private.lkm_misc;
2104         int err = 0;
2105
2106         switch(cmd) {
2107                 static int (*old_ip_mrouter_cmd)();
2108                 static int (*old_ip_mrouter_done)();
2109                 static int (*old_ip_mforward)();
2110                 static int (*old_mrt_ioctl)();
2111                 static void (*old_proto4_input)();
2112                 static int (*old_legal_vif_num)();
2113                 extern struct protosw inetsw[];
2114
2115         case LKM_E_LOAD:
2116                 if(lkmexists(lkmtp) || ip_mrtproto)
2117                   return(EEXIST);
2118                 old_ip_mrouter_cmd = ip_mrouter_cmd;
2119                 ip_mrouter_cmd = X_ip_mrouter_cmd;
2120                 old_ip_mrouter_done = ip_mrouter_done;
2121                 ip_mrouter_done = X_ip_mrouter_done;
2122                 old_ip_mforward = ip_mforward;
2123                 ip_mforward = X_ip_mforward;
2124                 old_mrt_ioctl = mrt_ioctl;
2125                 mrt_ioctl = X_mrt_ioctl;
2126               old_proto4_input = ip_protox[ENCAP_PROTO]->pr_input;
2127               ip_protox[ENCAP_PROTO]->pr_input = X_ipip_input;
2128                 old_legal_vif_num = legal_vif_num;
2129                 legal_vif_num = X_legal_vif_num;
2130                 ip_mrtproto = IGMP_DVMRP;
2131
2132                 printf("\nIP multicast routing loaded\n");
2133                 break;
2134
2135         case LKM_E_UNLOAD:
2136                 if (ip_mrouter)
2137                   return EINVAL;
2138
2139                 ip_mrouter_cmd = old_ip_mrouter_cmd;
2140                 ip_mrouter_done = old_ip_mrouter_done;
2141                 ip_mforward = old_ip_mforward;
2142                 mrt_ioctl = old_mrt_ioctl;
2143               ip_protox[ENCAP_PROTO]->pr_input = old_proto4_input;
2144                 legal_vif_num = old_legal_vif_num;
2145                 ip_mrtproto = 0;
2146                 break;
2147
2148         default:
2149                 err = EINVAL;
2150                 break;
2151         }
2152
2153         return(err);
2154 }
2155
2156 int
2157 ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) {
2158         DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle,
2159                  nosys);
2160 }
2161
2162 #endif /* MROUTE_LKM */
2163 #endif /* MROUTING */