bsd/netinet6/mld6.c

   1 /*
   2  * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*-
  29  * Copyright (c) 2009 Bruce Simpson.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. The name of the author may not be used to endorse or promote
  40  *    products derived from this software without specific prior written
  41  *    permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  53  * SUCH DAMAGE.
  54  */
  55
  56 /*
  57  * Copyright (c) 1988 Stephen Deering.
  58  * Copyright (c) 1992, 1993
  59  *      The Regents of the University of California.  All rights reserved.
  60  *
  61  * This code is derived from software contributed to Berkeley by
  62  * Stephen Deering of Stanford University.
  63  *
  64  * Redistribution and use in source and binary forms, with or without
  65  * modification, are permitted provided that the following conditions
  66  * are met:
  67  * 1. Redistributions of source code must retain the above copyright
  68  *    notice, this list of conditions and the following disclaimer.
  69  * 2. Redistributions in binary form must reproduce the above copyright
  70  *    notice, this list of conditions and the following disclaimer in the
  71  *    documentation and/or other materials provided with the distribution.
  72  * 3. All advertising materials mentioning features or use of this software
  73  *    must display the following acknowledgement:
  74  *      This product includes software developed by the University of
  75  *      California, Berkeley and its contributors.
  76  * 4. Neither the name of the University nor the names of its contributors
  77  *    may be used to endorse or promote products derived from this software
  78  *    without specific prior written permission.
  79  *
  80  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  81  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  82  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  83  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  84  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  85  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  86  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  87  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  88  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  89  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  90  * SUCH DAMAGE.
  91  *
  92  *      @(#)igmp.c      8.1 (Berkeley) 7/19/93
  93  */
  94 /*
  95  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  96  * support for mandatory and extensible security protections.  This notice
  97  * is included in support of clause 2.2 (b) of the Apple Public License,
  98  * Version 2.0.
  99  */
 100
 101 #include <sys/cdefs.h>
 102
 103 #include <sys/param.h>
 104 #include <sys/systm.h>
 105 #include <sys/mbuf.h>
 106 #include <sys/socket.h>
 107 #include <sys/protosw.h>
 108 #include <sys/sysctl.h>
 109 #include <sys/kernel.h>
 110 #include <sys/malloc.h>
 111 #include <sys/mcache.h>
 112
 113 #include <kern/zalloc.h>
 114
 115 #include <net/if.h>
 116 #include <net/route.h>
 117
 118 #include <netinet/in.h>
 119 #include <netinet/in_var.h>
 120 #include <netinet6/in6_var.h>
 121 #include <netinet/ip6.h>
 122 #include <netinet6/ip6_var.h>
 123 #include <netinet6/scope6_var.h>
 124 #include <netinet/icmp6.h>
 125 #include <netinet6/mld6.h>
 126 #include <netinet6/mld6_var.h>
 127
 128 /* Lock group and attribute for mld6_mtx */
 129 static lck_attr_t       *mld_mtx_attr;
 130 static lck_grp_t        *mld_mtx_grp;
 131 static lck_grp_attr_t   *mld_mtx_grp_attr;
 132
 133 /*
 134  * Locking and reference counting:
 135  *
 136  * mld_mtx mainly protects mli_head.  In cases where both mld_mtx and
 137  * in6_multihead_lock must be held, the former must be acquired first in order
 138  * to maintain lock ordering.  It is not a requirement that mld_mtx be
 139  * acquired first before in6_multihead_lock, but in case both must be acquired
 140  * in succession, the correct lock ordering must be followed.
 141  *
 142  * Instead of walking the if_multiaddrs list at the interface and returning
 143  * the ifma_protospec value of a matching entry, we search the global list
 144  * of in6_multi records and find it that way; this is done with in6_multihead
 145  * lock held.  Doing so avoids the race condition issues that many other BSDs
 146  * suffer from (therefore in our implementation, ifma_protospec will never be
 147  * NULL for as long as the in6_multi is valid.)
 148  *
 149  * The above creates a requirement for the in6_multi to stay in in6_multihead
 150  * list even after the final MLD leave (in MLDv2 mode) until no longer needs
 151  * be retransmitted (this is not required for MLDv1.)  In order to handle
 152  * this, the request and reference counts of the in6_multi are bumped up when
 153  * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
 154  * handler.  Each in6_multi holds a reference to the underlying mld_ifinfo.
 155  *
 156  * Thus, the permitted lock oder is:
 157  *
 158  *      mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
 159  *
 160  * Any may be taken independently, but if any are held at the same time,
 161  * the above lock order must be followed.
 162  */
 163 static decl_lck_mtx_data(, mld_mtx);
 164
 165 SLIST_HEAD(mld_in6m_relhead, in6_multi);
 166
 167 static void     mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
 168 static struct mld_ifinfo *mli_alloc(int);
 169 static void     mli_free(struct mld_ifinfo *);
 170 static void     mli_delete(const struct ifnet *, struct mld_in6m_relhead *);
 171 static void     mld_dispatch_packet(struct mbuf *);
 172 static void     mld_final_leave(struct in6_multi *, struct mld_ifinfo *);
 173 static int      mld_handle_state_change(struct in6_multi *,
 174                     struct mld_ifinfo *);
 175 static int      mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
 176                     const int);
 177 #ifdef MLD_DEBUG
 178 static const char *     mld_rec_type_to_str(const int);
 179 #endif
 180 static void     mld_set_version(struct mld_ifinfo *, const int);
 181 static void     mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *);
 182 static void     mld_dispatch_queue(struct mld_ifinfo *, struct ifqueue *, int);
 183 static int      mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
 184                     /*const*/ struct mld_hdr *);
 185 static int      mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
 186                     /*const*/ struct mld_hdr *);
 187 static void     mld_v1_process_group_timer(struct in6_multi *, const int);
 188 static void     mld_v1_process_querier_timers(struct mld_ifinfo *);
 189 static int      mld_v1_transmit_report(struct in6_multi *, const int);
 190 static void     mld_v1_update_group(struct in6_multi *, const int);
 191 static void     mld_v2_cancel_link_timers(struct mld_ifinfo *);
 192 static void     mld_v2_dispatch_general_query(struct mld_ifinfo *);
 193 static struct mbuf *
 194                 mld_v2_encap_report(struct ifnet *, struct mbuf *);
 195 static int      mld_v2_enqueue_filter_change(struct ifqueue *,
 196                     struct in6_multi *);
 197 static int      mld_v2_enqueue_group_record(struct ifqueue *,
 198                     struct in6_multi *, const int, const int, const int,
 199                     const int);
 200 static int      mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
 201                     struct mbuf *, const int, const int);
 202 static int      mld_v2_merge_state_changes(struct in6_multi *,
 203                     struct ifqueue *);
 204 static void     mld_v2_process_group_timers(struct mld_ifinfo *,
 205                     struct ifqueue *, struct ifqueue *,
 206                     struct in6_multi *, const int);
 207 static int      mld_v2_process_group_query(struct in6_multi *,
 208                     int, struct mbuf *, const int);
 209 static int      sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
 210 static int      sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
 211
 212 /*
 213  * Normative references: RFC 2710, RFC 3590, RFC 3810.
 214  *
 215  *  XXX LOR PREVENTION
 216  *  A special case for IPv6 is the in6_setscope() routine. ip6_output()
 217  *  will not accept an ifp; it wants an embedded scope ID, unlike
 218  *  ip_output(), which happily takes the ifp given to it. The embedded
 219  *  scope ID is only used by MLD to select the outgoing interface.
 220  *
 221  *  As such, we exploit the fact that the scope ID is just the interface
 222  *  index, and embed it in the IPv6 destination address accordingly.
 223  *  This is potentially NOT VALID for MLDv1 reports, as they
 224  *  are always sent to the multicast group itself; as MLDv2
 225  *  reports are always sent to ff02::16, this is not an issue
 226  *  when MLDv2 is in use.
 227  */
 228
 229 #define MLD_EMBEDSCOPE(pin6, zoneid) \
 230         (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF)
 231
 232 static struct timeval mld_gsrdelay = {10, 0};
 233 static LIST_HEAD(, mld_ifinfo) mli_head;
 234
 235 static int interface_timers_running6;
 236 static int state_change_timers_running6;
 237 static int current_state_timers_running6;
 238
 239 static decl_lck_mtx_data(, mld6_mtx);
 240
 241 #define MLD_LOCK()                      \
 242         lck_mtx_lock(&mld6_mtx)
 243 #define MLD_LOCK_ASSERT_HELD()          \
 244         lck_mtx_assert(&mld6_mtx, LCK_MTX_ASSERT_OWNED)
 245 #define MLD_LOCK_ASSERT_NOTHELD()       \
 246         lck_mtx_assert(&mld6_mtx, LCK_MTX_ASSERT_NOTOWNED)
 247 #define MLD_UNLOCK()                    \
 248         lck_mtx_unlock(&mld6_mtx)
 249
 250 #define MLD_ADD_DETACHED_IN6M(_head, _in6m) {                           \
 251         SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle);                     \
 252 }
 253
 254 #define MLD_REMOVE_DETACHED_IN6M(_head) {                               \
 255         struct in6_multi *_in6m, *_inm_tmp;                             \
 256         SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) {         \
 257                 SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle);       \
 258                 IN6M_REMREF(_in6m);                                     \
 259         }                                                               \
 260         VERIFY(SLIST_EMPTY(_head));                                     \
 261 }
 262
 263 #define MLI_ZONE_MAX            64              /* maximum elements in zone */
 264 #define MLI_ZONE_NAME           "mld_ifinfo"    /* zone name */
 265
 266 static unsigned int mli_size;                   /* size of zone element */
 267 static struct zone *mli_zone;                   /* zone for mld_ifinfo */
 268
 269 SYSCTL_DECL(_net_inet6);        /* Note: Not in any common header. */
 270
 271 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
 272     "IPv6 Multicast Listener Discovery");
 273 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
 274     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 275     &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
 276     "Rate limit for MLDv2 Group-and-Source queries in seconds");
 277
 278 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
 279    sysctl_mld_ifinfo, "Per-interface MLDv2 state");
 280
 281 static int      mld_v1enable = 1;
 282 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
 283     &mld_v1enable, 0, "Enable fallback to MLDv1");
 284
 285 static int      mld_use_allow = 1;
 286 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
 287     &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
 288
 289 #ifdef MLD_DEBUG
 290 int mld_debug = 0;
 291 SYSCTL_INT(_net_inet6_mld, OID_AUTO,
 292         debug, CTLFLAG_RW | CTLFLAG_LOCKED,     &mld_debug, 0, "");
 293 #endif
 294 /*
 295  * Packed Router Alert option structure declaration.
 296  */
 297 struct mld_raopt {
 298         struct ip6_hbh          hbh;
 299         struct ip6_opt          pad;
 300         struct ip6_opt_router   ra;
 301 } __packed;
 302
 303 /*
 304  * Router Alert hop-by-hop option header.
 305  */
 306 static struct mld_raopt mld_ra = {
 307         .hbh = { 0, 0 },
 308         .pad = { .ip6o_type = IP6OPT_PADN, 0 },
 309         .ra = {
 310             .ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
 311             .ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
 312             .ip6or_value =  {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
 313                 (IP6OPT_RTALERT_MLD & 0xFF) }
 314         }
 315 };
 316 static struct ip6_pktopts mld_po;
 317
 318 /*
 319  * Retrieve or set threshold between group-source queries in seconds.
 320  */
 321 static int
 322 sysctl_mld_gsr SYSCTL_HANDLER_ARGS
 323 {
 324 #pragma unused(arg1, arg2)
 325         int error;
 326         int i;
 327
 328         MLD_LOCK();
 329
 330         i = mld_gsrdelay.tv_sec;
 331
 332         error = sysctl_handle_int(oidp, &i, 0, req);
 333         if (error || !req->newptr)
 334                 goto out_locked;
 335
 336         if (i < -1 || i >= 60) {
 337                 error = EINVAL;
 338                 goto out_locked;
 339         }
 340
 341         mld_gsrdelay.tv_sec = i;
 342
 343 out_locked:
 344         MLD_UNLOCK();
 345         return (error);
 346 }
 347 /*
 348  * Expose struct mld_ifinfo to userland, keyed by ifindex.
 349  * For use by ifmcstat(8).
 350  *
 351  */
 352 static int
 353 sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
 354 {
 355 #pragma unused(oidp)
 356         int                     *name;
 357         int                      error;
 358         u_int                    namelen;
 359         struct ifnet            *ifp;
 360         struct mld_ifinfo       *mli;
 361         struct mld_ifinfo_u     mli_u;
 362
 363         name = (int *)arg1;
 364         namelen = arg2;
 365
 366         if (req->newptr != USER_ADDR_NULL)
 367                 return (EPERM);
 368
 369         if (namelen != 1)
 370                 return (EINVAL);
 371
 372         MLD_LOCK();
 373
 374         if (name[0] <= 0 || name[0] > (u_int)if_index) {
 375                 error = ENOENT;
 376                 goto out_locked;
 377         }
 378
 379         error = ENOENT;
 380
 381         ifnet_head_lock_shared();
 382         ifp = ifindex2ifnet[name[0]];
 383         ifnet_head_done();
 384         if (ifp == NULL)
 385                 goto out_locked;
 386
 387         bzero(&mli_u, sizeof (mli_u));
 388
 389         LIST_FOREACH(mli, &mli_head, mli_link) {
 390                 MLI_LOCK(mli);
 391                 if (ifp != mli->mli_ifp) {
 392                         MLI_UNLOCK(mli);
 393                         continue;
 394                 }
 395
 396                 mli_u.mli_ifindex = mli->mli_ifp->if_index;
 397                 mli_u.mli_version = mli->mli_version;
 398                 mli_u.mli_v1_timer = mli->mli_v1_timer;
 399                 mli_u.mli_v2_timer = mli->mli_v2_timer;
 400                 mli_u.mli_flags = mli->mli_flags;
 401                 mli_u.mli_rv = mli->mli_rv;
 402                 mli_u.mli_qi = mli->mli_qi;
 403                 mli_u.mli_qri = mli->mli_qri;
 404                 mli_u.mli_uri = mli->mli_uri;
 405                 MLI_UNLOCK(mli);
 406
 407                 error = SYSCTL_OUT(req, &mli_u, sizeof (mli_u));
 408                 break;
 409         }
 410
 411 out_locked:
 412         MLD_UNLOCK();
 413         return (error);
 414 }
 415
 416 /*
 417  * Dispatch an entire queue of pending packet chains.
 418  *
 419  * Must not be called with in6m_lock held.
 420  */
 421 static void
 422 mld_dispatch_queue(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
 423 {
 424         struct mbuf *m;
 425
 426         if (mli != NULL)
 427                 MLI_LOCK_ASSERT_HELD(mli);
 428
 429         for (;;) {
 430                 IF_DEQUEUE(ifq, m);
 431                 if (m == NULL)
 432                         break;
 433                 MLD_PRINTF(("%s: dispatch %p from %p\n", __func__, ifq, m));
 434                 if (mli != NULL)
 435                         MLI_UNLOCK(mli);
 436                 mld_dispatch_packet(m);
 437                 if (mli != NULL)
 438                         MLI_LOCK(mli);
 439                 if (--limit == 0)
 440                         break;
 441         }
 442
 443         if (mli != NULL)
 444                 MLI_LOCK_ASSERT_HELD(mli);
 445 }
 446
 447 /*
 448  * Filter outgoing MLD report state by group.
 449  *
 450  * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
 451  * and node-local addresses. However, kernel and socket consumers
 452  * always embed the KAME scope ID in the address provided, so strip it
 453  * when performing comparison.
 454  * Note: This is not the same as the *multicast* scope.
 455  *
 456  * Return zero if the given group is one for which MLD reports
 457  * should be suppressed, or non-zero if reports should be issued.
 458  */
 459 static __inline__ int
 460 mld_is_addr_reported(const struct in6_addr *addr)
 461 {
 462
 463         VERIFY(IN6_IS_ADDR_MULTICAST(addr));
 464
 465         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
 466                 return (0);
 467
 468         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
 469                 struct in6_addr tmp = *addr;
 470                 in6_clearscope(&tmp);
 471                 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
 472                         return (0);
 473         }
 474
 475         return (1);
 476 }
 477
 478 /*
 479  * Attach MLD when PF_INET6 is attached to an interface.
 480  */
 481 struct mld_ifinfo *
 482 mld_domifattach(struct ifnet *ifp, int how)
 483 {
 484         struct mld_ifinfo *mli;
 485
 486         MLD_PRINTF(("%s: called for ifp %p(%s%d)\n",
 487             __func__, ifp, ifp->if_name, ifp->if_unit));
 488
 489         mli = mli_alloc(how);
 490         if (mli == NULL)
 491                 return (NULL);
 492
 493         MLD_LOCK();
 494
 495         MLI_LOCK(mli);
 496         mli_initvar(mli, ifp, 0);
 497         mli->mli_debug |= IFD_ATTACHED;
 498         MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
 499         MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
 500         MLI_UNLOCK(mli);
 501
 502         LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 503
 504         MLD_UNLOCK();
 505
 506         MLD_PRINTF(("allocate mld_ifinfo for ifp %p(%s%d)\n",
 507              ifp, ifp->if_name, ifp->if_unit));
 508
 509         return (mli);
 510 }
 511
 512 /*
 513  * Attach MLD when PF_INET6 is reattached to an interface.  Caller is
 514  * expected to have an outstanding reference to the mli.
 515  */
 516 void
 517 mld_domifreattach(struct mld_ifinfo *mli)
 518 {
 519         struct ifnet *ifp;
 520
 521         MLD_LOCK();
 522
 523         MLI_LOCK(mli);
 524         VERIFY(!(mli->mli_debug & IFD_ATTACHED));
 525         ifp = mli->mli_ifp;
 526         VERIFY(ifp != NULL);
 527         mli_initvar(mli, ifp, 1);
 528         mli->mli_debug |= IFD_ATTACHED;
 529         MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
 530         MLI_UNLOCK(mli);
 531
 532         LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 533
 534         MLD_UNLOCK();
 535
 536         MLD_PRINTF(("reattached mld_ifinfo for ifp %p(%s%d)\n",
 537              ifp, ifp->if_name, ifp->if_unit));
 538 }
 539
 540 /*
 541  * Hook for domifdetach.
 542  */
 543 void
 544 mld_domifdetach(struct ifnet *ifp)
 545 {
 546         SLIST_HEAD(, in6_multi) in6m_dthead;
 547
 548         SLIST_INIT(&in6m_dthead);
 549
 550         MLD_PRINTF(("%s: called for ifp %p(%s%d)\n",
 551             __func__, ifp, ifp->if_name, ifp->if_unit));
 552
 553         MLD_LOCK();
 554         mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
 555         MLD_UNLOCK();
 556
 557         /* Now that we're dropped all locks, release detached records */
 558         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
 559 }
 560
 561 /*
 562  * Called at interface detach time.  Note that we only flush all deferred
 563  * responses and record releases; all remaining inm records and their source
 564  * entries related to this interface are left intact, in order to handle
 565  * the reattach case.
 566  */
 567 static void
 568 mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
 569 {
 570         struct mld_ifinfo *mli, *tmli;
 571
 572         MLD_LOCK_ASSERT_HELD();
 573
 574         LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
 575                 MLI_LOCK(mli);
 576                 if (mli->mli_ifp == ifp) {
 577                         /*
 578                          * Free deferred General Query responses.
 579                          */
 580                         IF_DRAIN(&mli->mli_gq);
 581                         IF_DRAIN(&mli->mli_v1q);
 582                         mld_flush_relq(mli, in6m_dthead);
 583                         VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
 584                         mli->mli_debug &= ~IFD_ATTACHED;
 585                         MLI_UNLOCK(mli);
 586
 587                         LIST_REMOVE(mli, mli_link);
 588                         MLI_REMREF(mli); /* release mli_head reference */
 589                         return;
 590                 }
 591                 MLI_UNLOCK(mli);
 592         }
 593         panic("%s: mld_ifinfo not found for ifp %p\n", __func__,  ifp);
 594 }
 595
 596 static void
 597 mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
 598 {
 599         MLI_LOCK_ASSERT_HELD(mli);
 600
 601         mli->mli_ifp = ifp;
 602         mli->mli_version = MLD_VERSION_2;
 603         mli->mli_flags = 0;
 604         mli->mli_rv = MLD_RV_INIT;
 605         mli->mli_qi = MLD_QI_INIT;
 606         mli->mli_qri = MLD_QRI_INIT;
 607         mli->mli_uri = MLD_URI_INIT;
 608
 609         /* ifnet is not yet attached; no need to hold ifnet lock */
 610         if (!(ifp->if_flags & IFF_MULTICAST))
 611                 mli->mli_flags |= MLIF_SILENT;
 612         if (mld_use_allow)
 613                 mli->mli_flags |= MLIF_USEALLOW;
 614         if (!reattach)
 615                 SLIST_INIT(&mli->mli_relinmhead);
 616
 617         /*
 618          * Responses to general queries are subject to bounds.
 619          */
 620         mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
 621         mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
 622 }
 623
 624 static struct mld_ifinfo *
 625 mli_alloc(int how)
 626 {
 627         struct mld_ifinfo *mli;
 628
 629         mli = (how == M_WAITOK) ? zalloc(mli_zone) : zalloc_noblock(mli_zone);
 630         if (mli != NULL) {
 631                 bzero(mli, mli_size);
 632                 lck_mtx_init(&mli->mli_lock, mld_mtx_grp, mld_mtx_attr);
 633                 mli->mli_debug |= IFD_ALLOC;
 634         }
 635         return (mli);
 636 }
 637
 638 static void
 639 mli_free(struct mld_ifinfo *mli)
 640 {
 641         MLI_LOCK(mli);
 642         if (mli->mli_debug & IFD_ATTACHED) {
 643                 panic("%s: attached mli=%p is being freed", __func__, mli);
 644                 /* NOTREACHED */
 645         } else if (mli->mli_ifp != NULL) {
 646                 panic("%s: ifp not NULL for mli=%p", __func__, mli);
 647                 /* NOTREACHED */
 648         } else if (!(mli->mli_debug & IFD_ALLOC)) {
 649                 panic("%s: mli %p cannot be freed", __func__, mli);
 650                 /* NOTREACHED */
 651         } else if (mli->mli_refcnt != 0) {
 652                 panic("%s: non-zero refcnt mli=%p", __func__, mli);
 653                 /* NOTREACHED */
 654         }
 655         mli->mli_debug &= ~IFD_ALLOC;
 656         MLI_UNLOCK(mli);
 657
 658         lck_mtx_destroy(&mli->mli_lock, mld_mtx_grp);
 659         zfree(mli_zone, mli);
 660 }
 661
 662 void
 663 mli_addref(struct mld_ifinfo *mli, int locked)
 664 {
 665         if (!locked)
 666                 MLI_LOCK_SPIN(mli);
 667         else
 668                 MLI_LOCK_ASSERT_HELD(mli);
 669
 670         if (++mli->mli_refcnt == 0) {
 671                 panic("%s: mli=%p wraparound refcnt", __func__, mli);
 672                 /* NOTREACHED */
 673         }
 674         if (!locked)
 675                 MLI_UNLOCK(mli);
 676 }
 677
 678 void
 679 mli_remref(struct mld_ifinfo *mli)
 680 {
 681         SLIST_HEAD(, in6_multi) in6m_dthead;
 682         struct ifnet *ifp;
 683
 684         MLI_LOCK_SPIN(mli);
 685
 686         if (mli->mli_refcnt == 0) {
 687                 panic("%s: mli=%p negative refcnt", __func__, mli);
 688                 /* NOTREACHED */
 689         }
 690
 691         --mli->mli_refcnt;
 692         if (mli->mli_refcnt > 0) {
 693                 MLI_UNLOCK(mli);
 694                 return;
 695         }
 696
 697         ifp = mli->mli_ifp;
 698         mli->mli_ifp = NULL;
 699         IF_DRAIN(&mli->mli_gq);
 700         IF_DRAIN(&mli->mli_v1q);
 701         SLIST_INIT(&in6m_dthead);
 702         mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
 703         VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
 704         MLI_UNLOCK(mli);
 705
 706         /* Now that we're dropped all locks, release detached records */
 707         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
 708
 709         MLD_PRINTF(("%s: freeing mld_ifinfo for ifp %p(%s%d)\n",
 710             __func__, ifp, ifp->if_name, ifp->if_unit));
 711
 712         mli_free(mli);
 713 }
 714
 715 /*
 716  * Process a received MLDv1 general or address-specific query.
 717  * Assumes that the query header has been pulled up to sizeof(mld_hdr).
 718  *
 719  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
 720  * mld_addr. This is OK as we own the mbuf chain.
 721  */
 722 static int
 723 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
 724     /*const*/ struct mld_hdr *mld)
 725 {
 726         struct mld_ifinfo       *mli;
 727         struct in6_multi        *inm;
 728         int                      is_general_query;
 729         uint16_t                 timer;
 730
 731         is_general_query = 0;
 732
 733         if (!mld_v1enable) {
 734                 MLD_PRINTF(("ignore v1 query %s on ifp %p(%s%d)\n",
 735                     ip6_sprintf(&mld->mld_addr),
 736                     ifp, ifp->if_name, ifp->if_unit));
 737                 return (0);
 738         }
 739
 740         /*
 741          * RFC3810 Section 6.2: MLD queries must originate from
 742          * a router's link-local address.
 743          */
 744         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 745                 MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
 746                     ip6_sprintf(&ip6->ip6_src),
 747                     ifp, ifp->if_name, ifp->if_unit));
 748                 return (0);
 749         }
 750
 751         /*
 752          * Do address field validation upfront before we accept
 753          * the query.
 754          */
 755         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
 756                 /*
 757                  * MLDv1 General Query.
 758                  * If this was not sent to the all-nodes group, ignore it.
 759                  */
 760                 struct in6_addr          dst;
 761
 762                 dst = ip6->ip6_dst;
 763                 in6_clearscope(&dst);
 764                 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
 765                         return (EINVAL);
 766                 is_general_query = 1;
 767         } else {
 768                 /*
 769                  * Embed scope ID of receiving interface in MLD query for
 770                  * lookup whilst we don't hold other locks.
 771                  */
 772                 in6_setscope(&mld->mld_addr, ifp, NULL);
 773         }
 774
 775         /*
 776          * Switch to MLDv1 host compatibility mode.
 777          */
 778         mli = MLD_IFINFO(ifp);
 779         VERIFY(mli != NULL);
 780
 781         MLI_LOCK(mli);
 782         mld_set_version(mli, MLD_VERSION_1);
 783         MLI_UNLOCK(mli);
 784
 785         timer = (ntohs(mld->mld_maxdelay) * PR_SLOWHZ) / MLD_TIMER_SCALE;
 786         if (timer == 0)
 787                 timer = 1;
 788
 789         if (is_general_query) {
 790                 struct in6_multistep step;
 791
 792                 MLD_PRINTF(("process v1 general query on ifp %p(%s%d)\n",
 793                     ifp, ifp->if_name, ifp->if_unit));
 794                 /*
 795                  * For each reporting group joined on this
 796                  * interface, kick the report timer.
 797                  */
 798                 in6_multihead_lock_shared();
 799                 IN6_FIRST_MULTI(step, inm);
 800                 while (inm != NULL) {
 801                         IN6M_LOCK(inm);
 802                         if (inm->in6m_ifp == ifp)
 803                                 mld_v1_update_group(inm, timer);
 804                         IN6M_UNLOCK(inm);
 805                         IN6_NEXT_MULTI(step, inm);
 806                 }
 807                 in6_multihead_lock_done();
 808         } else {
 809                 /*
 810                  * MLDv1 Group-Specific Query.
 811                  * If this is a group-specific MLDv1 query, we need only
 812                  * look up the single group to process it.
 813                  */
 814                 in6_multihead_lock_shared();
 815                 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
 816                 in6_multihead_lock_done();
 817
 818                 if (inm != NULL) {
 819                         IN6M_LOCK(inm);
 820                         MLD_PRINTF(("process v1 query %s on ifp %p(%s%d)\n",
 821                             ip6_sprintf(&mld->mld_addr),
 822                             ifp, ifp->if_name, ifp->if_unit));
 823                         mld_v1_update_group(inm, timer);
 824                         IN6M_UNLOCK(inm);
 825                         IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
 826                 }
 827                 /* XXX Clear embedded scope ID as userland won't expect it. */
 828                 in6_clearscope(&mld->mld_addr);
 829         }
 830
 831         return (0);
 832 }
 833
 834 /*
 835  * Update the report timer on a group in response to an MLDv1 query.
 836  *
 837  * If we are becoming the reporting member for this group, start the timer.
 838  * If we already are the reporting member for this group, and timer is
 839  * below the threshold, reset it.
 840  *
 841  * We may be updating the group for the first time since we switched
 842  * to MLDv2. If we are, then we must clear any recorded source lists,
 843  * and transition to REPORTING state; the group timer is overloaded
 844  * for group and group-source query responses.
 845  *
 846  * Unlike MLDv2, the delay per group should be jittered
 847  * to avoid bursts of MLDv1 reports.
 848  */
 849 static void
 850 mld_v1_update_group(struct in6_multi *inm, const int timer)
 851 {
 852         IN6M_LOCK_ASSERT_HELD(inm);
 853
 854         MLD_PRINTF(("%s: %s/%s%d timer=%d\n", __func__,
 855             ip6_sprintf(&inm->in6m_addr),
 856             inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit, timer));
 857
 858         switch (inm->in6m_state) {
 859         case MLD_NOT_MEMBER:
 860         case MLD_SILENT_MEMBER:
 861                 break;
 862         case MLD_REPORTING_MEMBER:
 863                 if (inm->in6m_timer != 0 &&
 864                     inm->in6m_timer <= timer) {
 865                         MLD_PRINTF(("%s: REPORTING and timer running, "
 866                             "skipping.\n", __func__));
 867                         break;
 868                 }
 869                 /* FALLTHROUGH */
 870         case MLD_SG_QUERY_PENDING_MEMBER:
 871         case MLD_G_QUERY_PENDING_MEMBER:
 872         case MLD_IDLE_MEMBER:
 873         case MLD_LAZY_MEMBER:
 874         case MLD_AWAKENING_MEMBER:
 875                 MLD_PRINTF(("%s: ->REPORTING\n", __func__));
 876                 inm->in6m_state = MLD_REPORTING_MEMBER;
 877                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
 878                 current_state_timers_running6 = 1;
 879                 break;
 880         case MLD_SLEEPING_MEMBER:
 881                 MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
 882                 inm->in6m_state = MLD_AWAKENING_MEMBER;
 883                 break;
 884         case MLD_LEAVING_MEMBER:
 885                 break;
 886         }
 887 }
 888
 889 /*
 890  * Process a received MLDv2 general, group-specific or
 891  * group-and-source-specific query.
 892  *
 893  * Assumes that the query header has been pulled up to sizeof(mldv2_query).
 894  *
 895  * Return 0 if successful, otherwise an appropriate error code is returned.
 896  */
 897 static int
 898 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
 899     struct mbuf *m, const int off, const int icmp6len)
 900 {
 901         struct mld_ifinfo       *mli;
 902         struct mldv2_query      *mld;
 903         struct in6_multi        *inm;
 904         uint32_t                 maxdelay, nsrc, qqi;
 905         int                      is_general_query;
 906         uint16_t                 timer;
 907         uint8_t                  qrv;
 908
 909         is_general_query = 0;
 910
 911         /*
 912          * RFC3810 Section 6.2: MLD queries must originate from
 913          * a router's link-local address.
 914          */
 915         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 916                 MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
 917                     ip6_sprintf(&ip6->ip6_src),
 918                     ifp, ifp->if_name, ifp->if_unit));
 919                 return (0);
 920         }
 921
 922         MLD_PRINTF(("input v2 query on ifp %p(%s%d)\n", ifp, ifp->if_name,
 923             ifp->if_unit));
 924
 925         mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
 926
 927         maxdelay = ntohs(mld->mld_maxdelay);    /* in 1/10ths of a second */
 928         if (maxdelay >= 32678) {
 929                 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
 930                            (MLD_MRC_EXP(maxdelay) + 3);
 931         }
 932         timer = (maxdelay * PR_SLOWHZ) / MLD_TIMER_SCALE;
 933         if (timer == 0)
 934                 timer = 1;
 935
 936         qrv = MLD_QRV(mld->mld_misc);
 937         if (qrv < 2) {
 938                 MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
 939                     qrv, MLD_RV_INIT));
 940                 qrv = MLD_RV_INIT;
 941         }
 942
 943         qqi = mld->mld_qqi;
 944         if (qqi >= 128) {
 945                 qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
 946                      (MLD_QQIC_EXP(mld->mld_qqi) + 3);
 947         }
 948
 949         nsrc = ntohs(mld->mld_numsrc);
 950         if (nsrc > MLD_MAX_GS_SOURCES)
 951                 return (EMSGSIZE);
 952         if (icmp6len < sizeof(struct mldv2_query) +
 953             (nsrc * sizeof(struct in6_addr)))
 954                 return (EMSGSIZE);
 955
 956         /*
 957          * Do further input validation upfront to avoid resetting timers
 958          * should we need to discard this query.
 959          */
 960         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
 961                 /*
 962                  * General Queries SHOULD be directed to ff02::1.
 963                  * A general query with a source list has undefined
 964                  * behaviour; discard it.
 965                  */
 966                 struct in6_addr          dst;
 967
 968                 dst = ip6->ip6_dst;
 969                 in6_clearscope(&dst);
 970                 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
 971                     nsrc > 0)
 972                         return (EINVAL);
 973                 is_general_query = 1;
 974         } else {
 975                 /*
 976                  * Embed scope ID of receiving interface in MLD query for
 977                  * lookup whilst we don't hold other locks (due to KAME
 978                  * locking lameness). We own this mbuf chain just now.
 979                  */
 980                 in6_setscope(&mld->mld_addr, ifp, NULL);
 981         }
 982
 983         mli = MLD_IFINFO(ifp);
 984         VERIFY(mli != NULL);
 985
 986         MLI_LOCK(mli);
 987         /*
 988          * Discard the v2 query if we're in Compatibility Mode.
 989          * The RFC is pretty clear that hosts need to stay in MLDv1 mode
 990          * until the Old Version Querier Present timer expires.
 991          */
 992         if (mli->mli_version != MLD_VERSION_2) {
 993                 MLI_UNLOCK(mli);
 994                 return (0);
 995         }
 996
 997         mld_set_version(mli, MLD_VERSION_2);
 998         mli->mli_rv = qrv;
 999         mli->mli_qi = qqi;
1000         mli->mli_qri = maxdelay;
1001
1002         MLD_PRINTF(("%s: qrv %d qi %d maxdelay %d\n", __func__, qrv, qqi,
1003             maxdelay));
1004
1005         if (is_general_query) {
1006                 /*
1007                  * MLDv2 General Query.
1008                  *
1009                  * Schedule a current-state report on this ifp for
1010                  * all groups, possibly containing source lists.
1011                  *
1012                  * If there is a pending General Query response
1013                  * scheduled earlier than the selected delay, do
1014                  * not schedule any other reports.
1015                  * Otherwise, reset the interface timer.
1016                  */
1017                 MLD_PRINTF(("process v2 general query on ifp %p(%s%d)\n",
1018                     ifp, ifp->if_name, ifp->if_unit));
1019                 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1020                         mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
1021                         interface_timers_running6 = 1;
1022                 }
1023                 MLI_UNLOCK(mli);
1024         } else {
1025                 MLI_UNLOCK(mli);
1026                 /*
1027                  * MLDv2 Group-specific or Group-and-source-specific Query.
1028                  *
1029                  * Group-source-specific queries are throttled on
1030                  * a per-group basis to defeat denial-of-service attempts.
1031                  * Queries for groups we are not a member of on this
1032                  * link are simply ignored.
1033                  */
1034                 in6_multihead_lock_shared();
1035                 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1036                 in6_multihead_lock_done();
1037                 if (inm == NULL)
1038                         return (0);
1039
1040                 IN6M_LOCK(inm);
1041 #ifndef __APPLE__
1042                 /* TODO: need ratecheck equivalent */
1043                 if (nsrc > 0) {
1044                         if (!ratecheck(&inm->in6m_lastgsrtv,
1045                             &mld_gsrdelay)) {
1046                                 MLD_PRINTF(("%s: GS query throttled.\n",
1047                                     __func__));
1048                                 IN6M_UNLOCK(inm);
1049                                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1050                                 return (0);
1051                         }
1052                 }
1053 #endif
1054                 MLD_PRINTF(("process v2 group query on ifp %p(%s%d)\n",
1055                      ifp, ifp->if_name, ifp->if_unit));
1056                 /*
1057                  * If there is a pending General Query response
1058                  * scheduled sooner than the selected delay, no
1059                  * further report need be scheduled.
1060                  * Otherwise, prepare to respond to the
1061                  * group-specific or group-and-source query.
1062                  */
1063                 MLI_LOCK(mli);
1064                 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1065                         MLI_UNLOCK(mli);
1066                         mld_v2_process_group_query(inm, timer, m, off);
1067                 } else {
1068                         MLI_UNLOCK(mli);
1069                 }
1070                 IN6M_UNLOCK(inm);
1071                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1072                 /* XXX Clear embedded scope ID as userland won't expect it. */
1073                 in6_clearscope(&mld->mld_addr);
1074         }
1075
1076         return (0);
1077 }
1078
1079 /*
1080  * Process a recieved MLDv2 group-specific or group-and-source-specific
1081  * query.
1082  * Return <0 if any error occured. Currently this is ignored.
1083  */
1084 static int
1085 mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
1086     const int off)
1087 {
1088         struct mldv2_query      *mld;
1089         int                      retval;
1090         uint16_t                 nsrc;
1091
1092         IN6M_LOCK_ASSERT_HELD(inm);
1093
1094         retval = 0;
1095         mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
1096
1097         switch (inm->in6m_state) {
1098         case MLD_NOT_MEMBER:
1099         case MLD_SILENT_MEMBER:
1100         case MLD_SLEEPING_MEMBER:
1101         case MLD_LAZY_MEMBER:
1102         case MLD_AWAKENING_MEMBER:
1103         case MLD_IDLE_MEMBER:
1104         case MLD_LEAVING_MEMBER:
1105                 return (retval);
1106                 break;
1107         case MLD_REPORTING_MEMBER:
1108         case MLD_G_QUERY_PENDING_MEMBER:
1109         case MLD_SG_QUERY_PENDING_MEMBER:
1110                 break;
1111         }
1112
1113         nsrc = ntohs(mld->mld_numsrc);
1114
1115         /*
1116          * Deal with group-specific queries upfront.
1117          * If any group query is already pending, purge any recorded
1118          * source-list state if it exists, and schedule a query response
1119          * for this group-specific query.
1120          */
1121         if (nsrc == 0) {
1122                 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1123                     inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1124                         in6m_clear_recorded(inm);
1125                         timer = min(inm->in6m_timer, timer);
1126                 }
1127                 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1128                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1129                 current_state_timers_running6 = 1;
1130                 return (retval);
1131         }
1132
1133         /*
1134          * Deal with the case where a group-and-source-specific query has
1135          * been received but a group-specific query is already pending.
1136          */
1137         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1138                 timer = min(inm->in6m_timer, timer);
1139                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1140                 current_state_timers_running6 = 1;
1141                 return (retval);
1142         }
1143
1144         /*
1145          * Finally, deal with the case where a group-and-source-specific
1146          * query has been received, where a response to a previous g-s-r
1147          * query exists, or none exists.
1148          * In this case, we need to parse the source-list which the Querier
1149          * has provided us with and check if we have any source list filter
1150          * entries at T1 for these sources. If we do not, there is no need
1151          * schedule a report and the query may be dropped.
1152          * If we do, we must record them and schedule a current-state
1153          * report for those sources.
1154          */
1155         if (inm->in6m_nsrc > 0) {
1156                 struct mbuf             *m;
1157                 uint8_t                 *sp;
1158                 int                      i, nrecorded;
1159                 int                      soff;
1160
1161                 m = m0;
1162                 soff = off + sizeof(struct mldv2_query);
1163                 nrecorded = 0;
1164                 for (i = 0; i < nsrc; i++) {
1165                         sp = mtod(m, uint8_t *) + soff;
1166                         retval = in6m_record_source(inm,
1167                             (const struct in6_addr *)sp);
1168                         if (retval < 0)
1169                                 break;
1170                         nrecorded += retval;
1171                         soff += sizeof(struct in6_addr);
1172                         if (soff >= m->m_len) {
1173                                 soff = soff - m->m_len;
1174                                 m = m->m_next;
1175                                 if (m == NULL)
1176                                         break;
1177                         }
1178                 }
1179                 if (nrecorded > 0) {
1180                         MLD_PRINTF(( "%s: schedule response to SG query\n",
1181                             __func__));
1182                         inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1183                         inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1184                         current_state_timers_running6 = 1;
1185                 }
1186         }
1187
1188         return (retval);
1189 }
1190
1191 /*
1192  * Process a received MLDv1 host membership report.
1193  * Assumes mld points to mld_hdr in pulled up mbuf chain.
1194  *
1195  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1196  * mld_addr. This is OK as we own the mbuf chain.
1197  */
1198 static int
1199 mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
1200     /*const*/ struct mld_hdr *mld)
1201 {
1202         struct in6_addr          src, dst;
1203         struct in6_ifaddr       *ia;
1204         struct in6_multi        *inm;
1205
1206         if (!mld_v1enable) {
1207                 MLD_PRINTF(("ignore v1 report %s on ifp %p(%s%d)\n",
1208                     ip6_sprintf(&mld->mld_addr),
1209                     ifp, ifp->if_name, ifp->if_unit));
1210                 return (0);
1211         }
1212
1213         if (ifp->if_flags & IFF_LOOPBACK)
1214                 return (0);
1215
1216         /*
1217          * MLDv1 reports must originate from a host's link-local address,
1218          * or the unspecified address (when booting).
1219          */
1220         src = ip6->ip6_src;
1221         in6_clearscope(&src);
1222         if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1223                 MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
1224                     ip6_sprintf(&ip6->ip6_src),
1225                     ifp, ifp->if_name, ifp->if_unit));
1226                 return (EINVAL);
1227         }
1228
1229         /*
1230          * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1231          * group, and must be directed to the group itself.
1232          */
1233         dst = ip6->ip6_dst;
1234         in6_clearscope(&dst);
1235         if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1236             !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1237                 MLD_PRINTF(("ignore v1 query dst %s on ifp %p(%s%d)\n",
1238                     ip6_sprintf(&ip6->ip6_dst),
1239                     ifp, ifp->if_name, ifp->if_unit));
1240                 return (EINVAL);
1241         }
1242
1243         /*
1244          * Make sure we don't hear our own membership report, as fast
1245          * leave requires knowing that we are the only member of a
1246          * group. Assume we used the link-local address if available,
1247          * otherwise look for ::.
1248          *
1249          * XXX Note that scope ID comparison is needed for the address
1250          * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1251          * performed for the on-wire address.
1252          */
1253         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1254         if (ia != NULL) {
1255                 IFA_LOCK(&ia->ia_ifa);
1256                 if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))){
1257                         IFA_UNLOCK(&ia->ia_ifa);
1258                         IFA_REMREF(&ia->ia_ifa);
1259                         return (0);
1260                 }
1261                 IFA_UNLOCK(&ia->ia_ifa);
1262                 IFA_REMREF(&ia->ia_ifa);
1263         } else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1264                 return (0);
1265         }
1266
1267         MLD_PRINTF(("process v1 report %s on ifp %p(%s%d)\n",
1268             ip6_sprintf(&mld->mld_addr), ifp, ifp->if_name, ifp->if_unit));
1269
1270         /*
1271          * Embed scope ID of receiving interface in MLD query for lookup
1272          * whilst we don't hold other locks (due to KAME locking lameness).
1273          */
1274         if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
1275                 in6_setscope(&mld->mld_addr, ifp, NULL);
1276
1277         /*
1278          * MLDv1 report suppression.
1279          * If we are a member of this group, and our membership should be
1280          * reported, and our group timer is pending or about to be reset,
1281          * stop our group timer by transitioning to the 'lazy' state.
1282          */
1283         in6_multihead_lock_shared();
1284         IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1285         in6_multihead_lock_done();
1286
1287         if (inm != NULL) {
1288                 struct mld_ifinfo *mli;
1289
1290                 IN6M_LOCK(inm);
1291                 mli = inm->in6m_mli;
1292                 VERIFY(mli != NULL);
1293
1294                 MLI_LOCK(mli);
1295                 /*
1296                  * If we are in MLDv2 host mode, do not allow the
1297                  * other host's MLDv1 report to suppress our reports.
1298                  */
1299                 if (mli->mli_version == MLD_VERSION_2) {
1300                         MLI_UNLOCK(mli);
1301                         IN6M_UNLOCK(inm);
1302                         IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1303                         goto out;
1304                 }
1305                 MLI_UNLOCK(mli);
1306
1307                 inm->in6m_timer = 0;
1308
1309                 switch (inm->in6m_state) {
1310                 case MLD_NOT_MEMBER:
1311                 case MLD_SILENT_MEMBER:
1312                 case MLD_SLEEPING_MEMBER:
1313                         break;
1314                 case MLD_REPORTING_MEMBER:
1315                 case MLD_IDLE_MEMBER:
1316                 case MLD_AWAKENING_MEMBER:
1317                         MLD_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
1318                             ip6_sprintf(&mld->mld_addr),
1319                             ifp, ifp->if_name, ifp->if_unit));
1320                 case MLD_LAZY_MEMBER:
1321                         inm->in6m_state = MLD_LAZY_MEMBER;
1322                         break;
1323                 case MLD_G_QUERY_PENDING_MEMBER:
1324                 case MLD_SG_QUERY_PENDING_MEMBER:
1325                 case MLD_LEAVING_MEMBER:
1326                         break;
1327                 }
1328                 IN6M_UNLOCK(inm);
1329                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1330         }
1331
1332 out:
1333         /* XXX Clear embedded scope ID as userland won't expect it. */
1334         in6_clearscope(&mld->mld_addr);
1335
1336         return (0);
1337 }
1338
1339 /*
1340  * MLD input path.
1341  *
1342  * Assume query messages which fit in a single ICMPv6 message header
1343  * have been pulled up.
1344  * Assume that userland will want to see the message, even if it
1345  * otherwise fails kernel input validation; do not free it.
1346  * Pullup may however free the mbuf chain m if it fails.
1347  *
1348  * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1349  */
1350 int
1351 mld_input(struct mbuf *m, int off, int icmp6len)
1352 {
1353         struct ifnet    *ifp;
1354         struct ip6_hdr  *ip6;
1355         struct mld_hdr  *mld;
1356         int              mldlen;
1357
1358         MLD_PRINTF(("%s: called w/mbuf (%p,%d)\n", __func__, m, off));
1359
1360         ifp = m->m_pkthdr.rcvif;
1361
1362         ip6 = mtod(m, struct ip6_hdr *);
1363
1364         /* Pullup to appropriate size. */
1365         mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1366         if (mld->mld_type == MLD_LISTENER_QUERY &&
1367             icmp6len >= sizeof(struct mldv2_query)) {
1368                 mldlen = sizeof(struct mldv2_query);
1369         } else {
1370                 mldlen = sizeof(struct mld_hdr);
1371         }
1372         IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1373         if (mld == NULL) {
1374                 icmp6stat.icp6s_badlen++;
1375                 return (IPPROTO_DONE);
1376         }
1377
1378         /*
1379          * Userland needs to see all of this traffic for implementing
1380          * the endpoint discovery portion of multicast routing.
1381          */
1382         switch (mld->mld_type) {
1383         case MLD_LISTENER_QUERY:
1384                 icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1385                 if (icmp6len == sizeof(struct mld_hdr)) {
1386                         if (mld_v1_input_query(ifp, ip6, mld) != 0)
1387                                 return (0);
1388                 } else if (icmp6len >= sizeof(struct mldv2_query)) {
1389                         if (mld_v2_input_query(ifp, ip6, m, off,
1390                             icmp6len) != 0)
1391                                 return (0);
1392                 }
1393                 break;
1394         case MLD_LISTENER_REPORT:
1395                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1396                 if (mld_v1_input_report(ifp, ip6, mld) != 0)
1397                         return (0);
1398                 break;
1399         case MLDV2_LISTENER_REPORT:
1400                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1401                 break;
1402         case MLD_LISTENER_DONE:
1403                 icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1404                 break;
1405         default:
1406                 break;
1407         }
1408
1409         return (0);
1410 }
1411
1412 /*
1413  * MLD6 slowtimo handler.
1414  * Combiles both the slow and fast timer into one. We loose some responsivness but
1415  * allows the system to avoid having a pr_fasttimo, thus allowing for power savings.
1416  */
1417 void
1418 mld_slowtimo(void)
1419 {
1420         struct ifqueue           scq;   /* State-change packets */
1421         struct ifqueue           qrq;   /* Query response packets */
1422         struct ifnet            *ifp;
1423         struct mld_ifinfo       *mli;
1424         struct in6_multi        *inm;
1425         int                      uri_fasthz = 0;
1426         SLIST_HEAD(, in6_multi) in6m_dthead;
1427
1428         SLIST_INIT(&in6m_dthead);
1429
1430         MLD_LOCK();
1431
1432         LIST_FOREACH(mli, &mli_head, mli_link) {
1433                 MLI_LOCK(mli);
1434                 mld_v1_process_querier_timers(mli);
1435                 MLI_UNLOCK(mli);
1436         }
1437
1438         /*
1439          * Quick check to see if any work needs to be done, in order to
1440          * minimize the overhead of fasttimo processing.
1441          */
1442         if (!current_state_timers_running6 &&
1443             !interface_timers_running6 &&
1444             !state_change_timers_running6) {
1445                 MLD_UNLOCK();
1446                 return;
1447         }
1448
1449         /*
1450          * MLDv2 General Query response timer processing.
1451          */
1452         if (interface_timers_running6) {
1453 #if 0
1454                 MLD_PRINTF(("%s: interface timers running\n", __func__));
1455 #endif
1456                 interface_timers_running6 = 0;
1457                 LIST_FOREACH(mli, &mli_head, mli_link) {
1458                         MLI_LOCK(mli);
1459                         if (mli->mli_v2_timer == 0) {
1460                                 /* Do nothing. */
1461                         } else if (--mli->mli_v2_timer == 0) {
1462                                 mld_v2_dispatch_general_query(mli);
1463                         } else {
1464                                 interface_timers_running6 = 1;
1465                         }
1466                         MLI_UNLOCK(mli);
1467                 }
1468         }
1469
1470         if (!current_state_timers_running6 &&
1471             !state_change_timers_running6)
1472                 goto out_locked;
1473
1474         current_state_timers_running6 = 0;
1475         state_change_timers_running6 = 0;
1476 #if 0
1477         MLD_PRINTF(("%s: state change timers running\n", __func__));
1478 #endif
1479
1480         memset(&qrq, 0, sizeof(struct ifqueue));
1481         qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
1482
1483         memset(&scq, 0, sizeof(struct ifqueue));
1484         scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
1485
1486         /*
1487          * MLD host report and state-change timer processing.
1488          * Note: Processing a v2 group timer may remove a node.
1489          */
1490         LIST_FOREACH(mli, &mli_head, mli_link) {
1491                 struct in6_multistep step;
1492
1493                 MLI_LOCK(mli);
1494                 ifp = mli->mli_ifp;
1495                 uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * PR_SLOWHZ);
1496                 MLI_UNLOCK(mli);
1497
1498                 in6_multihead_lock_shared();
1499                 IN6_FIRST_MULTI(step, inm);
1500                 while (inm != NULL) {
1501                         IN6M_LOCK(inm);
1502                         if (inm->in6m_ifp != ifp)
1503                                 goto next;
1504
1505                         MLI_LOCK(mli);
1506                         switch (mli->mli_version) {
1507                         case MLD_VERSION_1:
1508                                 mld_v1_process_group_timer(inm,
1509                                     mli->mli_version);
1510                                 break;
1511                         case MLD_VERSION_2:
1512                                 mld_v2_process_group_timers(mli, &qrq,
1513                                     &scq, inm, uri_fasthz);
1514                                 break;
1515                         }
1516                         MLI_UNLOCK(mli);
1517 next:
1518                         IN6M_UNLOCK(inm);
1519                         IN6_NEXT_MULTI(step, inm);
1520                 }
1521                 in6_multihead_lock_done();
1522
1523                 MLI_LOCK(mli);
1524                 if (mli->mli_version == MLD_VERSION_1) {
1525                         mld_dispatch_queue(mli, &mli->mli_v1q, 0);
1526                 } else if (mli->mli_version == MLD_VERSION_2) {
1527                         MLI_UNLOCK(mli);
1528                         mld_dispatch_queue(NULL, &qrq, 0);
1529                         mld_dispatch_queue(NULL, &scq, 0);
1530                         VERIFY(qrq.ifq_len == 0);
1531                         VERIFY(scq.ifq_len == 0);
1532                         MLI_LOCK(mli);
1533                 }
1534                 /*
1535                  * In case there are still any pending membership reports
1536                  * which didn't get drained at version change time.
1537                  */
1538                 IF_DRAIN(&mli->mli_v1q);
1539                 /*
1540                  * Release all deferred inm records, and drain any locally
1541                  * enqueued packets; do it even if the current MLD version
1542                  * for the link is no longer MLDv2, in order to handle the
1543                  * version change case.
1544                  */
1545                 mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
1546                 VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
1547                 MLI_UNLOCK(mli);
1548
1549                 IF_DRAIN(&qrq);
1550                 IF_DRAIN(&scq);
1551         }
1552
1553 out_locked:
1554         MLD_UNLOCK();
1555
1556         /* Now that we're dropped all locks, release detached records */
1557         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
1558 }
1559
1560 /*
1561  * Free the in6_multi reference(s) for this MLD lifecycle.
1562  *
1563  * Caller must be holding mli_lock.
1564  */
1565 static void
1566 mld_flush_relq(struct mld_ifinfo *mli, struct mld_in6m_relhead *in6m_dthead)
1567 {
1568         struct in6_multi *inm;
1569
1570 again:
1571         MLI_LOCK_ASSERT_HELD(mli);
1572         inm = SLIST_FIRST(&mli->mli_relinmhead);
1573         if (inm != NULL) {
1574                 int lastref;
1575
1576                 SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
1577                 MLI_UNLOCK(mli);
1578
1579                 in6_multihead_lock_exclusive();
1580                 IN6M_LOCK(inm);
1581                 VERIFY(inm->in6m_nrelecnt != 0);
1582                 inm->in6m_nrelecnt--;
1583                 lastref = in6_multi_detach(inm);
1584                 VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1585                     inm->in6m_reqcnt == 0));
1586                 IN6M_UNLOCK(inm);
1587                 in6_multihead_lock_done();
1588                 /* from mli_relinmhead */
1589                 IN6M_REMREF(inm);
1590                 /* from in6_multihead_list */
1591                 if (lastref) {
1592                         /*
1593                          * Defer releasing our final reference, as we
1594                          * are holding the MLD lock at this point, and
1595                          * we could end up with locking issues later on
1596                          * (while issuing SIOCDELMULTI) when this is the
1597                          * final reference count.  Let the caller do it
1598                          * when it is safe.
1599                          */
1600                         MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
1601                 }
1602                 MLI_LOCK(mli);
1603                 goto again;
1604         }
1605 }
1606
1607 /*
1608  * Update host report group timer.
1609  * Will update the global pending timer flags.
1610  */
1611 static void
1612 mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
1613 {
1614 #pragma unused(mld_version)
1615         int report_timer_expired;
1616
1617         IN6M_LOCK_ASSERT_HELD(inm);
1618         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1619
1620         if (inm->in6m_timer == 0) {
1621                 report_timer_expired = 0;
1622         } else if (--inm->in6m_timer == 0) {
1623                 report_timer_expired = 1;
1624         } else {
1625                 current_state_timers_running6 = 1;
1626                 return;
1627         }
1628
1629         switch (inm->in6m_state) {
1630         case MLD_NOT_MEMBER:
1631         case MLD_SILENT_MEMBER:
1632         case MLD_IDLE_MEMBER:
1633         case MLD_LAZY_MEMBER:
1634         case MLD_SLEEPING_MEMBER:
1635         case MLD_AWAKENING_MEMBER:
1636                 break;
1637         case MLD_REPORTING_MEMBER:
1638                 if (report_timer_expired) {
1639                         inm->in6m_state = MLD_IDLE_MEMBER;
1640                         (void) mld_v1_transmit_report(inm,
1641                              MLD_LISTENER_REPORT);
1642                         IN6M_LOCK_ASSERT_HELD(inm);
1643                         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1644                 }
1645                 break;
1646         case MLD_G_QUERY_PENDING_MEMBER:
1647         case MLD_SG_QUERY_PENDING_MEMBER:
1648         case MLD_LEAVING_MEMBER:
1649                 break;
1650         }
1651 }
1652
1653 /*
1654  * Update a group's timers for MLDv2.
1655  * Will update the global pending timer flags.
1656  * Note: Unlocked read from mli.
1657  */
1658 static void
1659 mld_v2_process_group_timers(struct mld_ifinfo *mli,
1660     struct ifqueue *qrq, struct ifqueue *scq,
1661     struct in6_multi *inm, const int uri_fasthz)
1662 {
1663         int query_response_timer_expired;
1664         int state_change_retransmit_timer_expired;
1665
1666         IN6M_LOCK_ASSERT_HELD(inm);
1667         MLI_LOCK_ASSERT_HELD(mli);
1668         VERIFY(mli == inm->in6m_mli);
1669
1670         query_response_timer_expired = 0;
1671         state_change_retransmit_timer_expired = 0;
1672
1673         /*
1674          * During a transition from compatibility mode back to MLDv2,
1675          * a group record in REPORTING state may still have its group
1676          * timer active. This is a no-op in this function; it is easier
1677          * to deal with it here than to complicate the slow-timeout path.
1678          */
1679         if (inm->in6m_timer == 0) {
1680                 query_response_timer_expired = 0;
1681         } else if (--inm->in6m_timer == 0) {
1682                 query_response_timer_expired = 1;
1683         } else {
1684                 current_state_timers_running6 = 1;
1685         }
1686
1687         if (inm->in6m_sctimer == 0) {
1688                 state_change_retransmit_timer_expired = 0;
1689         } else if (--inm->in6m_sctimer == 0) {
1690                 state_change_retransmit_timer_expired = 1;
1691         } else {
1692                 state_change_timers_running6 = 1;
1693         }
1694
1695         /* We are in fasttimo, so be quick about it. */
1696         if (!state_change_retransmit_timer_expired &&
1697             !query_response_timer_expired)
1698                 return;
1699
1700         switch (inm->in6m_state) {
1701         case MLD_NOT_MEMBER:
1702         case MLD_SILENT_MEMBER:
1703         case MLD_SLEEPING_MEMBER:
1704         case MLD_LAZY_MEMBER:
1705         case MLD_AWAKENING_MEMBER:
1706         case MLD_IDLE_MEMBER:
1707                 break;
1708         case MLD_G_QUERY_PENDING_MEMBER:
1709         case MLD_SG_QUERY_PENDING_MEMBER:
1710                 /*
1711                  * Respond to a previously pending Group-Specific
1712                  * or Group-and-Source-Specific query by enqueueing
1713                  * the appropriate Current-State report for
1714                  * immediate transmission.
1715                  */
1716                 if (query_response_timer_expired) {
1717                         int retval;
1718
1719                         retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
1720                             (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
1721                             0);
1722                         MLD_PRINTF(("%s: enqueue record = %d\n",
1723                             __func__, retval));
1724                         inm->in6m_state = MLD_REPORTING_MEMBER;
1725                         in6m_clear_recorded(inm);
1726                 }
1727                 /* FALLTHROUGH */
1728         case MLD_REPORTING_MEMBER:
1729         case MLD_LEAVING_MEMBER:
1730                 if (state_change_retransmit_timer_expired) {
1731                         /*
1732                          * State-change retransmission timer fired.
1733                          * If there are any further pending retransmissions,
1734                          * set the global pending state-change flag, and
1735                          * reset the timer.
1736                          */
1737                         if (--inm->in6m_scrv > 0) {
1738                                 inm->in6m_sctimer = uri_fasthz;
1739                                 state_change_timers_running6 = 1;
1740                         }
1741                         /*
1742                          * Retransmit the previously computed state-change
1743                          * report. If there are no further pending
1744                          * retransmissions, the mbuf queue will be consumed.
1745                          * Update T0 state to T1 as we have now sent
1746                          * a state-change.
1747                          */
1748                         (void) mld_v2_merge_state_changes(inm, scq);
1749
1750                         in6m_commit(inm);
1751                         MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
1752                             ip6_sprintf(&inm->in6m_addr),
1753                             inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
1754
1755                         /*
1756                          * If we are leaving the group for good, make sure
1757                          * we release MLD's reference to it.
1758                          * This release must be deferred using a SLIST,
1759                          * as we are called from a loop which traverses
1760                          * the in_ifmultiaddr TAILQ.
1761                          */
1762                         if (inm->in6m_state == MLD_LEAVING_MEMBER &&
1763                             inm->in6m_scrv == 0) {
1764                                 inm->in6m_state = MLD_NOT_MEMBER;
1765                                 /*
1766                                  * A reference has already been held in
1767                                  * mld_final_leave() for this inm, so
1768                                  * no need to hold another one.  We also
1769                                  * bumped up its request count then, so
1770                                  * that it stays in in6_multihead.  Both
1771                                  * of them will be released when it is
1772                                  * dequeued later on.
1773                                  */
1774                                 VERIFY(inm->in6m_nrelecnt != 0);
1775                                 SLIST_INSERT_HEAD(&mli->mli_relinmhead,
1776                                     inm, in6m_nrele);
1777                         }
1778                 }
1779                 break;
1780         }
1781 }
1782
1783 /*
1784  * Switch to a different version on the given interface,
1785  * as per Section 9.12.
1786  */
1787 static void
1788 mld_set_version(struct mld_ifinfo *mli, const int mld_version)
1789 {
1790         int old_version_timer;
1791
1792         MLI_LOCK_ASSERT_HELD(mli);
1793
1794         MLD_PRINTF(("%s: switching to v%d on ifp %p(%s%d)\n", __func__,
1795             mld_version, mli->mli_ifp, mli->mli_ifp->if_name,
1796             mli->mli_ifp->if_unit));
1797
1798         if (mld_version == MLD_VERSION_1) {
1799                 /*
1800                  * Compute the "Older Version Querier Present" timer as per
1801                  * Section 9.12.
1802                  */
1803                 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
1804                 old_version_timer *= PR_SLOWHZ;
1805                 mli->mli_v1_timer = old_version_timer;
1806         }
1807
1808         if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
1809                 mli->mli_version = MLD_VERSION_1;
1810                 mld_v2_cancel_link_timers(mli);
1811         }
1812
1813         MLI_LOCK_ASSERT_HELD(mli);
1814 }
1815
1816 /*
1817  * Cancel pending MLDv2 timers for the given link and all groups
1818  * joined on it; state-change, general-query, and group-query timers.
1819  */
1820 static void
1821 mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
1822 {
1823         struct ifnet            *ifp;
1824         struct in6_multi        *inm;
1825         struct in6_multistep    step;
1826
1827         MLI_LOCK_ASSERT_HELD(mli);
1828
1829         MLD_PRINTF(("%s: cancel v2 timers on ifp %p(%s%d)\n", __func__,
1830             mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit));
1831
1832         /*
1833          * Fast-track this potentially expensive operation
1834          * by checking all the global 'timer pending' flags.
1835          */
1836         if (!interface_timers_running6 &&
1837             !state_change_timers_running6 &&
1838             !current_state_timers_running6)
1839                 return;
1840
1841         mli->mli_v2_timer = 0;
1842         ifp = mli->mli_ifp;
1843         MLI_UNLOCK(mli);
1844
1845         in6_multihead_lock_shared();
1846         IN6_FIRST_MULTI(step, inm);
1847         while (inm != NULL) {
1848                 IN6M_LOCK(inm);
1849                 if (inm->in6m_ifp != ifp)
1850                         goto next;
1851
1852                 switch (inm->in6m_state) {
1853                 case MLD_NOT_MEMBER:
1854                 case MLD_SILENT_MEMBER:
1855                 case MLD_IDLE_MEMBER:
1856                 case MLD_LAZY_MEMBER:
1857                 case MLD_SLEEPING_MEMBER:
1858                 case MLD_AWAKENING_MEMBER:
1859                         break;
1860                 case MLD_LEAVING_MEMBER:
1861                         /*
1862                          * If we are leaving the group and switching
1863                          * version, we need to release the final
1864                          * reference held for issuing the INCLUDE {}.
1865                          * During mld_final_leave(), we bumped up both the
1866                          * request and reference counts.  Since we cannot
1867                          * call in6_multi_detach() here, defer this task to
1868                          * the timer routine.
1869                          */
1870                         VERIFY(inm->in6m_nrelecnt != 0);
1871                         MLI_LOCK(mli);
1872                         SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
1873                             in6m_nrele);
1874                         MLI_UNLOCK(mli);
1875                         /* FALLTHROUGH */
1876                 case MLD_G_QUERY_PENDING_MEMBER:
1877                 case MLD_SG_QUERY_PENDING_MEMBER:
1878                         in6m_clear_recorded(inm);
1879                         /* FALLTHROUGH */
1880                 case MLD_REPORTING_MEMBER:
1881                         inm->in6m_sctimer = 0;
1882                         inm->in6m_timer = 0;
1883                         inm->in6m_state = MLD_REPORTING_MEMBER;
1884                         /*
1885                          * Free any pending MLDv2 state-change records.
1886                          */
1887                         IF_DRAIN(&inm->in6m_scq);
1888                         break;
1889                 }
1890 next:
1891                 IN6M_UNLOCK(inm);
1892                 IN6_NEXT_MULTI(step, inm);
1893         }
1894         in6_multihead_lock_done();
1895
1896         MLI_LOCK(mli);
1897 }
1898
1899 /*
1900  * Update the Older Version Querier Present timers for a link.
1901  * See Section 9.12 of RFC 3810.
1902  */
1903 static void
1904 mld_v1_process_querier_timers(struct mld_ifinfo *mli)
1905 {
1906         MLI_LOCK_ASSERT_HELD(mli);
1907
1908         if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) {
1909                 /*
1910                  * MLDv1 Querier Present timer expired; revert to MLDv2.
1911                  */
1912                 MLD_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
1913                     __func__, mli->mli_version, MLD_VERSION_2,
1914                     mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit));
1915                 mli->mli_version = MLD_VERSION_2;
1916         }
1917 }
1918
1919 /*
1920  * Transmit an MLDv1 report immediately.
1921  */
1922 static int
1923 mld_v1_transmit_report(struct in6_multi *in6m, const int type)
1924 {
1925         struct ifnet            *ifp;
1926         struct in6_ifaddr       *ia;
1927         struct ip6_hdr          *ip6;
1928         struct mbuf             *mh, *md;
1929         struct mld_hdr          *mld;
1930         int                     error = 0;
1931
1932         IN6M_LOCK_ASSERT_HELD(in6m);
1933         MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
1934
1935         ifp = in6m->in6m_ifp;
1936         /* ia may be NULL if link-local address is tentative. */
1937         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1938
1939         MGETHDR(mh, M_DONTWAIT, MT_HEADER);
1940         if (mh == NULL) {
1941                 if (ia != NULL)
1942                         IFA_REMREF(&ia->ia_ifa);
1943                 return (ENOMEM);
1944         }
1945         MGET(md, M_DONTWAIT, MT_DATA);
1946         if (md == NULL) {
1947                 m_free(mh);
1948                 if (ia != NULL)
1949                         IFA_REMREF(&ia->ia_ifa);
1950                 return (ENOMEM);
1951         }
1952         mh->m_next = md;
1953
1954         /*
1955          * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
1956          * that ether_output() does not need to allocate another mbuf
1957          * for the header in the most common case.
1958          */
1959         MH_ALIGN(mh, sizeof(struct ip6_hdr));
1960         mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
1961         mh->m_len = sizeof(struct ip6_hdr);
1962
1963         ip6 = mtod(mh, struct ip6_hdr *);
1964         ip6->ip6_flow = 0;
1965         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1966         ip6->ip6_vfc |= IPV6_VERSION;
1967         ip6->ip6_nxt = IPPROTO_ICMPV6;
1968         if (ia != NULL)
1969                 IFA_LOCK(&ia->ia_ifa);
1970         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
1971         if (ia != NULL) {
1972                 IFA_UNLOCK(&ia->ia_ifa);
1973                 IFA_REMREF(&ia->ia_ifa);
1974                 ia = NULL;
1975         }
1976         ip6->ip6_dst = in6m->in6m_addr;
1977
1978         md->m_len = sizeof(struct mld_hdr);
1979         mld = mtod(md, struct mld_hdr *);
1980         mld->mld_type = type;
1981         mld->mld_code = 0;
1982         mld->mld_cksum = 0;
1983         mld->mld_maxdelay = 0;
1984         mld->mld_reserved = 0;
1985         mld->mld_addr = in6m->in6m_addr;
1986         in6_clearscope(&mld->mld_addr);
1987         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
1988             sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
1989
1990         mh->m_flags |= M_MLDV1;
1991
1992
1993         /*
1994          * Due to the fact that at this point we are possibly holding
1995          * in6_multihead_lock in shared or exclusive mode, we can't call
1996          * mld_dispatch_packet() here since that will eventually call
1997          * ip6_output(), which will try to lock in6_multihead_lock and cause
1998          * a deadlock.
1999          * Instead we defer the work to the mld_slowtimo() thread, thus
2000          * avoiding unlocking in_multihead_lock here.
2001          */
2002         if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
2003                 MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
2004                 error = ENOMEM;
2005                 m_freem(mh);
2006         } else
2007                 IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
2008
2009         return (error);
2010 }
2011
2012 /*
2013  * Process a state change from the upper layer for the given IPv6 group.
2014  *
2015  * Each socket holds a reference on the in6_multi in its own ip_moptions.
2016  * The socket layer will have made the necessary updates to.the group
2017  * state, it is now up to MLD to issue a state change report if there
2018  * has been any change between T0 (when the last state-change was issued)
2019  * and T1 (now).
2020  *
2021  * We use the MLDv2 state machine at group level. The MLd module
2022  * however makes the decision as to which MLD protocol version to speak.
2023  * A state change *from* INCLUDE {} always means an initial join.
2024  * A state change *to* INCLUDE {} always means a final leave.
2025  *
2026  * If delay is non-zero, and the state change is an initial multicast
2027  * join, the state change report will be delayed by 'delay' ticks
2028  * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise
2029  * the initial MLDv2 state change report will be delayed by whichever
2030  * is sooner, a pending state-change timer or delay itself.
2031  */
2032 int
2033 mld_change_state(struct in6_multi *inm, const int delay)
2034 {
2035         struct mld_ifinfo *mli;
2036         struct ifnet *ifp;
2037         int error = 0;
2038
2039         IN6M_LOCK_ASSERT_HELD(inm);
2040         VERIFY(inm->in6m_mli != NULL);
2041         MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
2042
2043         /*
2044          * Try to detect if the upper layer just asked us to change state
2045          * for an interface which has now gone away.
2046          */
2047         VERIFY(inm->in6m_ifma != NULL);
2048         ifp = inm->in6m_ifma->ifma_ifp;
2049         /*
2050          * Sanity check that netinet6's notion of ifp is the same as net's.
2051          */
2052         VERIFY(inm->in6m_ifp == ifp);
2053
2054         mli = MLD_IFINFO(ifp);
2055         VERIFY(mli != NULL);
2056
2057         /*
2058          * If we detect a state transition to or from MCAST_UNDEFINED
2059          * for this group, then we are starting or finishing an MLD
2060          * life cycle for this group.
2061          */
2062         if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
2063                 MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2064                     inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
2065                 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
2066                         MLD_PRINTF(("%s: initial join\n", __func__));
2067                         error = mld_initial_join(inm, mli, delay);
2068                         goto out;
2069                 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
2070                         MLD_PRINTF(("%s: final leave\n", __func__));
2071                         mld_final_leave(inm, mli);
2072                         goto out;
2073                 }
2074         } else {
2075                 MLD_PRINTF(("%s: filter set change\n", __func__));
2076         }
2077
2078         error = mld_handle_state_change(inm, mli);
2079
2080 out:
2081         return (error);
2082 }
2083
2084 /*
2085  * Perform the initial join for an MLD group.
2086  *
2087  * When joining a group:
2088  *  If the group should have its MLD traffic suppressed, do nothing.
2089  *  MLDv1 starts sending MLDv1 host membership reports.
2090  *  MLDv2 will schedule an MLDv2 state-change report containing the
2091  *  initial state of the membership.
2092  *
2093  * If the delay argument is non-zero, then we must delay sending the
2094  * initial state change for delay ticks (in units of PR_FASTHZ).
2095  */
2096 static int
2097 mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
2098     const int delay)
2099 {
2100         struct ifnet            *ifp;
2101         struct ifqueue          *ifq;
2102         int                      error, retval, syncstates;
2103         int                      odelay;
2104
2105         IN6M_LOCK_ASSERT_HELD(inm);
2106         MLI_LOCK_ASSERT_NOTHELD(mli);
2107
2108         MLD_PRINTF(("%s: initial join %s on ifp %p(%s%d)\n",
2109             __func__, ip6_sprintf(&inm->in6m_addr),
2110             inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2111
2112         error = 0;
2113         syncstates = 1;
2114
2115         ifp = inm->in6m_ifp;
2116
2117         MLI_LOCK(mli);
2118         VERIFY(mli->mli_ifp == ifp);
2119
2120         /*
2121          * Groups joined on loopback or marked as 'not reported',
2122          * enter the MLD_SILENT_MEMBER state and
2123          * are never reported in any protocol exchanges.
2124          * All other groups enter the appropriate state machine
2125          * for the version in use on this link.
2126          * A link marked as MLIF_SILENT causes MLD to be completely
2127          * disabled for the link.
2128          */
2129         if ((ifp->if_flags & IFF_LOOPBACK) ||
2130             (mli->mli_flags & MLIF_SILENT) ||
2131             !mld_is_addr_reported(&inm->in6m_addr)) {
2132                 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2133                     __func__));
2134                 inm->in6m_state = MLD_SILENT_MEMBER;
2135                 inm->in6m_timer = 0;
2136         } else {
2137                 /*
2138                  * Deal with overlapping in6_multi lifecycle.
2139                  * If this group was LEAVING, then make sure
2140                  * we drop the reference we picked up to keep the
2141                  * group around for the final INCLUDE {} enqueue.
2142                  * Since we cannot call in6_multi_detach() here,
2143                  * defer this task to the timer routine.
2144                  */
2145                 if (mli->mli_version == MLD_VERSION_2 &&
2146                     inm->in6m_state == MLD_LEAVING_MEMBER) {
2147                         VERIFY(inm->in6m_nrelecnt != 0);
2148                         SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2149                             in6m_nrele);
2150                 }
2151
2152                 inm->in6m_state = MLD_REPORTING_MEMBER;
2153
2154                 switch (mli->mli_version) {
2155                 case MLD_VERSION_1:
2156                         /*
2157                          * If a delay was provided, only use it if
2158                          * it is greater than the delay normally
2159                          * used for an MLDv1 state change report,
2160                          * and delay sending the initial MLDv1 report
2161                          * by not transitioning to the IDLE state.
2162                          */
2163                         odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_SLOWHZ);
2164                         if (delay) {
2165                                 inm->in6m_timer = max(delay, odelay);
2166                                 current_state_timers_running6 = 1;
2167                         } else {
2168                                 inm->in6m_state = MLD_IDLE_MEMBER;
2169                                 error = mld_v1_transmit_report(inm,
2170                                      MLD_LISTENER_REPORT);
2171
2172                                 IN6M_LOCK_ASSERT_HELD(inm);
2173                                 MLI_LOCK_ASSERT_HELD(mli);
2174
2175                                 if (error == 0) {
2176                                         inm->in6m_timer = odelay;
2177                                         current_state_timers_running6 = 1;
2178                                 }
2179                         }
2180                         break;
2181
2182                 case MLD_VERSION_2:
2183                         /*
2184                          * Defer update of T0 to T1, until the first copy
2185                          * of the state change has been transmitted.
2186                          */
2187                         syncstates = 0;
2188
2189                         /*
2190                          * Immediately enqueue a State-Change Report for
2191                          * this interface, freeing any previous reports.
2192                          * Don't kick the timers if there is nothing to do,
2193                          * or if an error occurred.
2194                          */
2195                         ifq = &inm->in6m_scq;
2196                         IF_DRAIN(ifq);
2197                         retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2198                             0, 0, (mli->mli_flags & MLIF_USEALLOW));
2199                         MLD_PRINTF(("%s: enqueue record = %d\n",
2200                             __func__, retval));
2201                         if (retval <= 0) {
2202                                 error = retval * -1;
2203                                 break;
2204                         }
2205
2206                         /*
2207                          * Schedule transmission of pending state-change
2208                          * report up to RV times for this link. The timer
2209                          * will fire at the next mld_fasttimo (~200ms),
2210                          * giving us an opportunity to merge the reports.
2211                          *
2212                          * If a delay was provided to this function, only
2213                          * use this delay if sooner than the existing one.
2214                          */
2215                         VERIFY(mli->mli_rv > 1);
2216                         inm->in6m_scrv = mli->mli_rv;
2217                         if (delay) {
2218                                 if (inm->in6m_sctimer > 1) {
2219                                         inm->in6m_sctimer =
2220                                             min(inm->in6m_sctimer, delay);
2221                                 } else
2222                                         inm->in6m_sctimer = delay;
2223                         } else
2224                                 inm->in6m_sctimer = 1;
2225                         state_change_timers_running6 = 1;
2226
2227                         error = 0;
2228                         break;
2229                 }
2230         }
2231         MLI_UNLOCK(mli);
2232
2233         /*
2234          * Only update the T0 state if state change is atomic,
2235          * i.e. we don't need to wait for a timer to fire before we
2236          * can consider the state change to have been communicated.
2237          */
2238         if (syncstates) {
2239                 in6m_commit(inm);
2240                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2241                     ip6_sprintf(&inm->in6m_addr),
2242                     inm->in6m_ifp->if_name, ifp->if_unit));
2243         }
2244
2245         return (error);
2246 }
2247
2248 /*
2249  * Issue an intermediate state change during the life-cycle.
2250  */
2251 static int
2252 mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
2253 {
2254         struct ifnet            *ifp;
2255         int                      retval;
2256
2257         IN6M_LOCK_ASSERT_HELD(inm);
2258         MLI_LOCK_ASSERT_NOTHELD(mli);
2259
2260         MLD_PRINTF(("%s: state change for %s on ifp %p(%s%d)\n",
2261             __func__, ip6_sprintf(&inm->in6m_addr),
2262             inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2263
2264         ifp = inm->in6m_ifp;
2265
2266         MLI_LOCK(mli);
2267         VERIFY(mli->mli_ifp == ifp);
2268
2269         if ((ifp->if_flags & IFF_LOOPBACK) ||
2270             (mli->mli_flags & MLIF_SILENT) ||
2271             !mld_is_addr_reported(&inm->in6m_addr) ||
2272             (mli->mli_version != MLD_VERSION_2)) {
2273                 MLI_UNLOCK(mli);
2274                 if (!mld_is_addr_reported(&inm->in6m_addr)) {
2275                         MLD_PRINTF(("%s: not kicking state machine for silent "
2276                             "group\n", __func__));
2277                 }
2278                 MLD_PRINTF(("%s: nothing to do\n", __func__));
2279                 in6m_commit(inm);
2280                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2281                     ip6_sprintf(&inm->in6m_addr),
2282                     inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2283                 return (0);
2284         }
2285
2286         IF_DRAIN(&inm->in6m_scq);
2287
2288         retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2289             (mli->mli_flags & MLIF_USEALLOW));
2290         MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2291         if (retval <= 0) {
2292                 MLI_UNLOCK(mli);
2293                 return (-retval);
2294         }
2295         /*
2296          * If record(s) were enqueued, start the state-change
2297          * report timer for this group.
2298          */
2299         inm->in6m_scrv = mli->mli_rv;
2300         inm->in6m_sctimer = 1;
2301         state_change_timers_running6 = 1;
2302         MLI_UNLOCK(mli);
2303
2304         return (0);
2305 }
2306
2307 /*
2308  * Perform the final leave for a multicast address.
2309  *
2310  * When leaving a group:
2311  *  MLDv1 sends a DONE message, if and only if we are the reporter.
2312  *  MLDv2 enqueues a state-change report containing a transition
2313  *  to INCLUDE {} for immediate transmission.
2314  */
2315 static void
2316 mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
2317 {
2318         int syncstates = 1;
2319
2320         IN6M_LOCK_ASSERT_HELD(inm);
2321         MLI_LOCK_ASSERT_NOTHELD(mli);
2322
2323         MLD_PRINTF(("%s: final leave %s on ifp %p(%s%d)\n",
2324             __func__, ip6_sprintf(&inm->in6m_addr),
2325             inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2326
2327         switch (inm->in6m_state) {
2328         case MLD_NOT_MEMBER:
2329         case MLD_SILENT_MEMBER:
2330         case MLD_LEAVING_MEMBER:
2331                 /* Already leaving or left; do nothing. */
2332                 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2333                     __func__));
2334                 break;
2335         case MLD_REPORTING_MEMBER:
2336         case MLD_IDLE_MEMBER:
2337         case MLD_G_QUERY_PENDING_MEMBER:
2338         case MLD_SG_QUERY_PENDING_MEMBER:
2339                 MLI_LOCK(mli);
2340                 if (mli->mli_version == MLD_VERSION_1) {
2341                         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2342                             inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
2343                                 panic("%s: MLDv2 state reached, not MLDv2 "
2344                                     "mode\n", __func__);
2345                                 /* NOTREACHED */
2346                         }
2347                         mld_v1_transmit_report(inm, MLD_LISTENER_DONE);
2348
2349                         IN6M_LOCK_ASSERT_HELD(inm);
2350                         MLI_LOCK_ASSERT_HELD(mli);
2351
2352                         inm->in6m_state = MLD_NOT_MEMBER;
2353                 } else if (mli->mli_version == MLD_VERSION_2) {
2354                         /*
2355                          * Stop group timer and all pending reports.
2356                          * Immediately enqueue a state-change report
2357                          * TO_IN {} to be sent on the next fast timeout,
2358                          * giving us an opportunity to merge reports.
2359                          */
2360                         IF_DRAIN(&inm->in6m_scq);
2361                         inm->in6m_timer = 0;
2362                         inm->in6m_scrv = mli->mli_rv;
2363                         MLD_PRINTF(("%s: Leaving %s/%s%d with %d "
2364                             "pending retransmissions.\n", __func__,
2365                             ip6_sprintf(&inm->in6m_addr),
2366                             inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit,
2367                             inm->in6m_scrv));
2368                         if (inm->in6m_scrv == 0) {
2369                                 inm->in6m_state = MLD_NOT_MEMBER;
2370                                 inm->in6m_sctimer = 0;
2371                         } else {
2372                                 int retval;
2373                                 /*
2374                                  * Stick around in the in6_multihead list;
2375                                  * the final detach will be issued by
2376                                  * mld_v2_process_group_timers() when
2377                                  * the retransmit timer expires.
2378                                  */
2379                                 IN6M_ADDREF_LOCKED(inm);
2380                                 VERIFY(inm->in6m_debug & IFD_ATTACHED);
2381                                 inm->in6m_reqcnt++;
2382                                 VERIFY(inm->in6m_reqcnt >= 1);
2383                                 inm->in6m_nrelecnt++;
2384                                 VERIFY(inm->in6m_nrelecnt != 0);
2385
2386                                 retval = mld_v2_enqueue_group_record(
2387                                     &inm->in6m_scq, inm, 1, 0, 0,
2388                                     (mli->mli_flags & MLIF_USEALLOW));
2389                                 KASSERT(retval != 0,
2390                                     ("%s: enqueue record = %d\n", __func__,
2391                                      retval));
2392
2393                                 inm->in6m_state = MLD_LEAVING_MEMBER;
2394                                 inm->in6m_sctimer = 1;
2395                                 state_change_timers_running6 = 1;
2396                                 syncstates = 0;
2397                         }
2398                 }
2399                 MLI_UNLOCK(mli);
2400                 break;
2401         case MLD_LAZY_MEMBER:
2402         case MLD_SLEEPING_MEMBER:
2403         case MLD_AWAKENING_MEMBER:
2404                 /* Our reports are suppressed; do nothing. */
2405                 break;
2406         }
2407
2408         if (syncstates) {
2409                 in6m_commit(inm);
2410                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2411                     ip6_sprintf(&inm->in6m_addr),
2412                     inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2413                 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2414                 MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for %p/%s%d\n",
2415                     __func__, &inm->in6m_addr, inm->in6m_ifp->if_name,
2416                     inm->in6m_ifp->if_unit));
2417         }
2418 }
2419
2420 /*
2421  * Enqueue an MLDv2 group record to the given output queue.
2422  *
2423  * If is_state_change is zero, a current-state record is appended.
2424  * If is_state_change is non-zero, a state-change report is appended.
2425  *
2426  * If is_group_query is non-zero, an mbuf packet chain is allocated.
2427  * If is_group_query is zero, and if there is a packet with free space
2428  * at the tail of the queue, it will be appended to providing there
2429  * is enough free space.
2430  * Otherwise a new mbuf packet chain is allocated.
2431  *
2432  * If is_source_query is non-zero, each source is checked to see if
2433  * it was recorded for a Group-Source query, and will be omitted if
2434  * it is not both in-mode and recorded.
2435  *
2436  * If use_block_allow is non-zero, state change reports for initial join
2437  * and final leave, on an inclusive mode group with a source list, will be
2438  * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2439  *
2440  * The function will attempt to allocate leading space in the packet
2441  * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2442  *
2443  * If successful the size of all data appended to the queue is returned,
2444  * otherwise an error code less than zero is returned, or zero if
2445  * no record(s) were appended.
2446  */
2447 static int
2448 mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2449     const int is_state_change, const int is_group_query,
2450     const int is_source_query, const int use_block_allow)
2451 {
2452         struct mldv2_record      mr;
2453         struct mldv2_record     *pmr;
2454         struct ifnet            *ifp;
2455         struct ip6_msource      *ims, *nims;
2456         struct mbuf             *m0, *m, *md;
2457         int                      error, is_filter_list_change;
2458         int                      minrec0len, m0srcs, msrcs, nbytes, off;
2459         int                      record_has_sources;
2460         int                      now;
2461         int                      type;
2462         uint8_t                  mode;
2463
2464         IN6M_LOCK_ASSERT_HELD(inm);
2465         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2466
2467         error = 0;
2468         ifp = inm->in6m_ifp;
2469         is_filter_list_change = 0;
2470         m = NULL;
2471         m0 = NULL;
2472         m0srcs = 0;
2473         msrcs = 0;
2474         nbytes = 0;
2475         nims = NULL;
2476         record_has_sources = 1;
2477         pmr = NULL;
2478         type = MLD_DO_NOTHING;
2479         mode = inm->in6m_st[1].iss_fmode;
2480
2481         /*
2482          * If we did not transition out of ASM mode during t0->t1,
2483          * and there are no source nodes to process, we can skip
2484          * the generation of source records.
2485          */
2486         if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2487             inm->in6m_nsrc == 0)
2488                 record_has_sources = 0;
2489
2490         if (is_state_change) {
2491                 /*
2492                  * Queue a state change record.
2493                  * If the mode did not change, and there are non-ASM
2494                  * listeners or source filters present,
2495                  * we potentially need to issue two records for the group.
2496                  * If there are ASM listeners, and there was no filter
2497                  * mode transition of any kind, do nothing.
2498                  *
2499                  * If we are transitioning to MCAST_UNDEFINED, we need
2500                  * not send any sources. A transition to/from this state is
2501                  * considered inclusive with some special treatment.
2502                  *
2503                  * If we are rewriting initial joins/leaves to use
2504                  * ALLOW/BLOCK, and the group's membership is inclusive,
2505                  * we need to send sources in all cases.
2506                  */
2507                 if (mode != inm->in6m_st[0].iss_fmode) {
2508                         if (mode == MCAST_EXCLUDE) {
2509                                 MLD_PRINTF(("%s: change to EXCLUDE\n",
2510                                     __func__));
2511                                 type = MLD_CHANGE_TO_EXCLUDE_MODE;
2512                         } else {
2513                                 MLD_PRINTF(("%s: change to INCLUDE\n",
2514                                     __func__));
2515                                 if (use_block_allow) {
2516                                         /*
2517                                          * XXX
2518                                          * Here we're interested in state
2519                                          * edges either direction between
2520                                          * MCAST_UNDEFINED and MCAST_INCLUDE.
2521                                          * Perhaps we should just check
2522                                          * the group state, rather than
2523                                          * the filter mode.
2524                                          */
2525                                         if (mode == MCAST_UNDEFINED) {
2526                                                 type = MLD_BLOCK_OLD_SOURCES;
2527                                         } else {
2528                                                 type = MLD_ALLOW_NEW_SOURCES;
2529                                         }
2530                                 } else {
2531                                         type = MLD_CHANGE_TO_INCLUDE_MODE;
2532                                         if (mode == MCAST_UNDEFINED)
2533                                                 record_has_sources = 0;
2534                                 }
2535                         }
2536                 } else {
2537                         if (record_has_sources) {
2538                                 is_filter_list_change = 1;
2539                         } else {
2540                                 type = MLD_DO_NOTHING;
2541                         }
2542                 }
2543         } else {
2544                 /*
2545                  * Queue a current state record.
2546                  */
2547                 if (mode == MCAST_EXCLUDE) {
2548                         type = MLD_MODE_IS_EXCLUDE;
2549                 } else if (mode == MCAST_INCLUDE) {
2550                         type = MLD_MODE_IS_INCLUDE;
2551                         VERIFY(inm->in6m_st[1].iss_asm == 0);
2552                 }
2553         }
2554
2555         /*
2556          * Generate the filter list changes using a separate function.
2557          */
2558         if (is_filter_list_change)
2559                 return (mld_v2_enqueue_filter_change(ifq, inm));
2560
2561         if (type == MLD_DO_NOTHING) {
2562                 MLD_PRINTF(("%s: nothing to do for %s/%s%d\n",
2563                     __func__, ip6_sprintf(&inm->in6m_addr),
2564                     inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2565                 return (0);
2566         }
2567
2568         /*
2569          * If any sources are present, we must be able to fit at least
2570          * one in the trailing space of the tail packet's mbuf,
2571          * ideally more.
2572          */
2573         minrec0len = sizeof(struct mldv2_record);
2574         if (record_has_sources)
2575                 minrec0len += sizeof(struct in6_addr);
2576         MLD_PRINTF(("%s: queueing %s for %s/%s%d\n", __func__,
2577             mld_rec_type_to_str(type),
2578             ip6_sprintf(&inm->in6m_addr),
2579             inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2580
2581         /*
2582          * Check if we have a packet in the tail of the queue for this
2583          * group into which the first group record for this group will fit.
2584          * Otherwise allocate a new packet.
2585          * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2586          * Note: Group records for G/GSR query responses MUST be sent
2587          * in their own packet.
2588          */
2589         m0 = ifq->ifq_tail;
2590         if (!is_group_query &&
2591             m0 != NULL &&
2592             (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
2593             (m0->m_pkthdr.len + minrec0len) <
2594              (ifp->if_mtu - MLD_MTUSPACE)) {
2595                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2596                             sizeof(struct mldv2_record)) /
2597                             sizeof(struct in6_addr);
2598                 m = m0;
2599                 MLD_PRINTF(("%s: use existing packet\n", __func__));
2600         } else {
2601                 if (IF_QFULL(ifq)) {
2602                         MLD_PRINTF(("%s: outbound queue full\n", __func__));
2603                         return (-ENOMEM);
2604                 }
2605                 m = NULL;
2606                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2607                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2608                 if (!is_state_change && !is_group_query)
2609                         m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2610                 if (m == NULL)
2611                         m = m_gethdr(M_DONTWAIT, MT_DATA);
2612                 if (m == NULL)
2613                         return (-ENOMEM);
2614
2615                 MLD_PRINTF(("%s: allocated first packet\n", __func__));
2616         }
2617
2618         /*
2619          * Append group record.
2620          * If we have sources, we don't know how many yet.
2621          */
2622         mr.mr_type = type;
2623         mr.mr_datalen = 0;
2624         mr.mr_numsrc = 0;
2625         mr.mr_addr = inm->in6m_addr;
2626         in6_clearscope(&mr.mr_addr);
2627         if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2628                 if (m != m0)
2629                         m_freem(m);
2630                 MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2631                 return (-ENOMEM);
2632         }
2633         nbytes += sizeof(struct mldv2_record);
2634
2635         /*
2636          * Append as many sources as will fit in the first packet.
2637          * If we are appending to a new packet, the chain allocation
2638          * may potentially use clusters; use m_getptr() in this case.
2639          * If we are appending to an existing packet, we need to obtain
2640          * a pointer to the group record after m_append(), in case a new
2641          * mbuf was allocated.
2642          *
2643          * Only append sources which are in-mode at t1. If we are
2644          * transitioning to MCAST_UNDEFINED state on the group, and
2645          * use_block_allow is zero, do not include source entries.
2646          * Otherwise, we need to include this source in the report.
2647          *
2648          * Only report recorded sources in our filter set when responding
2649          * to a group-source query.
2650          */
2651         if (record_has_sources) {
2652                 if (m == m0) {
2653                         md = m_last(m);
2654                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2655                             md->m_len - nbytes);
2656                 } else {
2657                         md = m_getptr(m, 0, &off);
2658                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2659                             off);
2660                 }
2661                 msrcs = 0;
2662                 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
2663                     nims) {
2664                         MLD_PRINTF(("%s: visit node %s\n", __func__,
2665                             ip6_sprintf(&ims->im6s_addr)));
2666                         now = im6s_get_mode(inm, ims, 1);
2667                         MLD_PRINTF(("%s: node is %d\n", __func__, now));
2668                         if ((now != mode) ||
2669                             (now == mode &&
2670                              (!use_block_allow && mode == MCAST_UNDEFINED))) {
2671                                 MLD_PRINTF(("%s: skip node\n", __func__));
2672                                 continue;
2673                         }
2674                         if (is_source_query && ims->im6s_stp == 0) {
2675                                 MLD_PRINTF(("%s: skip unrecorded node\n",
2676                                     __func__));
2677                                 continue;
2678                         }
2679                         MLD_PRINTF(("%s: append node\n", __func__));
2680                         if (!m_append(m, sizeof(struct in6_addr),
2681                             (void *)&ims->im6s_addr)) {
2682                                 if (m != m0)
2683                                         m_freem(m);
2684                                 MLD_PRINTF(("%s: m_append() failed.\n",
2685                                     __func__));
2686                                 return (-ENOMEM);
2687                         }
2688                         nbytes += sizeof(struct in6_addr);
2689                         ++msrcs;
2690                         if (msrcs == m0srcs)
2691                                 break;
2692                 }
2693                 MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
2694                     msrcs));
2695                 pmr->mr_numsrc = htons(msrcs);
2696                 nbytes += (msrcs * sizeof(struct in6_addr));
2697         }
2698
2699         if (is_source_query && msrcs == 0) {
2700                 MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
2701                 if (m != m0)
2702                         m_freem(m);
2703                 return (0);
2704         }
2705
2706         /*
2707          * We are good to go with first packet.
2708          */
2709         if (m != m0) {
2710                 MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
2711                 m->m_pkthdr.vt_nrecs = 1;
2712                 m->m_pkthdr.rcvif = ifp;
2713                 IF_ENQUEUE(ifq, m);
2714         } else {
2715                 m->m_pkthdr.vt_nrecs++;
2716         }
2717         /*
2718          * No further work needed if no source list in packet(s).
2719          */
2720         if (!record_has_sources)
2721                 return (nbytes);
2722
2723         /*
2724          * Whilst sources remain to be announced, we need to allocate
2725          * a new packet and fill out as many sources as will fit.
2726          * Always try for a cluster first.
2727          */
2728         while (nims != NULL) {
2729                 if (IF_QFULL(ifq)) {
2730                         MLD_PRINTF(("%s: outbound queue full\n", __func__));
2731                         return (-ENOMEM);
2732                 }
2733                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2734                 if (m == NULL)
2735                         m = m_gethdr(M_DONTWAIT, MT_DATA);
2736                 if (m == NULL)
2737                         return (-ENOMEM);
2738                 md = m_getptr(m, 0, &off);
2739                 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
2740                 MLD_PRINTF(("%s: allocated next packet\n", __func__));
2741
2742                 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2743                         if (m != m0)
2744                                 m_freem(m);
2745                         MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2746                         return (-ENOMEM);
2747                 }
2748                 m->m_pkthdr.vt_nrecs = 1;
2749                 nbytes += sizeof(struct mldv2_record);
2750
2751                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2752                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2753
2754                 msrcs = 0;
2755                 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2756                         MLD_PRINTF(("%s: visit node %s\n",
2757                             __func__, ip6_sprintf(&ims->im6s_addr)));
2758                         now = im6s_get_mode(inm, ims, 1);
2759                         if ((now != mode) ||
2760                             (now == mode &&
2761                              (!use_block_allow && mode == MCAST_UNDEFINED))) {
2762                                 MLD_PRINTF(("%s: skip node\n", __func__));
2763                                 continue;
2764                         }
2765                         if (is_source_query && ims->im6s_stp == 0) {
2766                                 MLD_PRINTF(("%s: skip unrecorded node\n",
2767                                     __func__));
2768                                 continue;
2769                         }
2770                         MLD_PRINTF(("%s: append node\n", __func__));
2771                         if (!m_append(m, sizeof(struct in6_addr),
2772                             (void *)&ims->im6s_addr)) {
2773                                 if (m != m0)
2774                                         m_freem(m);
2775                                 MLD_PRINTF(("%s: m_append() failed.\n",
2776                                     __func__));
2777                                 return (-ENOMEM);
2778                         }
2779                         ++msrcs;
2780                         if (msrcs == m0srcs)
2781                                 break;
2782                 }
2783                 pmr->mr_numsrc = htons(msrcs);
2784                 nbytes += (msrcs * sizeof(struct in6_addr));
2785
2786                 MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
2787                 m->m_pkthdr.rcvif = ifp;
2788                 IF_ENQUEUE(ifq, m);
2789         }
2790
2791         return (nbytes);
2792 }
2793
2794 /*
2795  * Type used to mark record pass completion.
2796  * We exploit the fact we can cast to this easily from the
2797  * current filter modes on each ip_msource node.
2798  */
2799 typedef enum {
2800         REC_NONE = 0x00,        /* MCAST_UNDEFINED */
2801         REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
2802         REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
2803         REC_FULL = REC_ALLOW | REC_BLOCK
2804 } rectype_t;
2805
2806 /*
2807  * Enqueue an MLDv2 filter list change to the given output queue.
2808  *
2809  * Source list filter state is held in an RB-tree. When the filter list
2810  * for a group is changed without changing its mode, we need to compute
2811  * the deltas between T0 and T1 for each source in the filter set,
2812  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
2813  *
2814  * As we may potentially queue two record types, and the entire R-B tree
2815  * needs to be walked at once, we break this out into its own function
2816  * so we can generate a tightly packed queue of packets.
2817  *
2818  * XXX This could be written to only use one tree walk, although that makes
2819  * serializing into the mbuf chains a bit harder. For now we do two walks
2820  * which makes things easier on us, and it may or may not be harder on
2821  * the L2 cache.
2822  *
2823  * If successful the size of all data appended to the queue is returned,
2824  * otherwise an error code less than zero is returned, or zero if
2825  * no record(s) were appended.
2826  */
2827 static int
2828 mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
2829 {
2830         static const int MINRECLEN =
2831             sizeof(struct mldv2_record) + sizeof(struct in6_addr);
2832         struct ifnet            *ifp;
2833         struct mldv2_record      mr;
2834         struct mldv2_record     *pmr;
2835         struct ip6_msource      *ims, *nims;
2836         struct mbuf             *m, *m0, *md;
2837         int                      m0srcs, nbytes, npbytes, off, rsrcs, schanged;
2838         int                      nallow, nblock;
2839         uint8_t                  mode, now, then;
2840         rectype_t                crt, drt, nrt;
2841
2842         IN6M_LOCK_ASSERT_HELD(inm);
2843
2844         if (inm->in6m_nsrc == 0 ||
2845             (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
2846                 return (0);
2847
2848         ifp = inm->in6m_ifp;                    /* interface */
2849         mode = inm->in6m_st[1].iss_fmode;       /* filter mode at t1 */
2850         crt = REC_NONE; /* current group record type */
2851         drt = REC_NONE; /* mask of completed group record types */
2852         nrt = REC_NONE; /* record type for current node */
2853         m0srcs = 0;     /* # source which will fit in current mbuf chain */
2854         npbytes = 0;    /* # of bytes appended this packet */
2855         nbytes = 0;     /* # of bytes appended to group's state-change queue */
2856         rsrcs = 0;      /* # sources encoded in current record */
2857         schanged = 0;   /* # nodes encoded in overall filter change */
2858         nallow = 0;     /* # of source entries in ALLOW_NEW */
2859         nblock = 0;     /* # of source entries in BLOCK_OLD */
2860         nims = NULL;    /* next tree node pointer */
2861
2862         /*
2863          * For each possible filter record mode.
2864          * The first kind of source we encounter tells us which
2865          * is the first kind of record we start appending.
2866          * If a node transitioned to UNDEFINED at t1, its mode is treated
2867          * as the inverse of the group's filter mode.
2868          */
2869         while (drt != REC_FULL) {
2870                 do {
2871                         m0 = ifq->ifq_tail;
2872                         if (m0 != NULL &&
2873                             (m0->m_pkthdr.vt_nrecs + 1 <=
2874                              MLD_V2_REPORT_MAXRECS) &&
2875                             (m0->m_pkthdr.len + MINRECLEN) <
2876                              (ifp->if_mtu - MLD_MTUSPACE)) {
2877                                 m = m0;
2878                                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2879                                             sizeof(struct mldv2_record)) /
2880                                             sizeof(struct in6_addr);
2881                                 MLD_PRINTF(("%s: use previous packet\n",
2882                                     __func__));
2883                         } else {
2884                                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2885                                 if (m == NULL)
2886                                         m = m_gethdr(M_DONTWAIT, MT_DATA);
2887                                 if (m == NULL) {
2888                                         MLD_PRINTF(("%s: m_get*() failed\n",
2889                                             __func__));
2890                                         return (-ENOMEM);
2891                                 }
2892                                 m->m_pkthdr.vt_nrecs = 0;
2893                                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2894                                     sizeof(struct mldv2_record)) /
2895                                     sizeof(struct in6_addr);
2896                                 npbytes = 0;
2897                                 MLD_PRINTF(("%s: allocated new packet\n",
2898                                     __func__));
2899                         }
2900                         /*
2901                          * Append the MLD group record header to the
2902                          * current packet's data area.
2903                          * Recalculate pointer to free space for next
2904                          * group record, in case m_append() allocated
2905                          * a new mbuf or cluster.
2906                          */
2907                         memset(&mr, 0, sizeof(mr));
2908                         mr.mr_addr = inm->in6m_addr;
2909                         in6_clearscope(&mr.mr_addr);
2910                         if (!m_append(m, sizeof(mr), (void *)&mr)) {
2911                                 if (m != m0)
2912                                         m_freem(m);
2913                                 MLD_PRINTF(("%s: m_append() failed\n",
2914                                     __func__));
2915                                 return (-ENOMEM);
2916                         }
2917                         npbytes += sizeof(struct mldv2_record);
2918                         if (m != m0) {
2919                                 /* new packet; offset in chain */
2920                                 md = m_getptr(m, npbytes -
2921                                     sizeof(struct mldv2_record), &off);
2922                                 pmr = (struct mldv2_record *)(mtod(md,
2923                                     uint8_t *) + off);
2924                         } else {
2925                                 /* current packet; offset from last append */
2926                                 md = m_last(m);
2927                                 pmr = (struct mldv2_record *)(mtod(md,
2928                                     uint8_t *) + md->m_len -
2929                                     sizeof(struct mldv2_record));
2930                         }
2931                         /*
2932                          * Begin walking the tree for this record type
2933                          * pass, or continue from where we left off
2934                          * previously if we had to allocate a new packet.
2935                          * Only report deltas in-mode at t1.
2936                          * We need not report included sources as allowed
2937                          * if we are in inclusive mode on the group,
2938                          * however the converse is not true.
2939                          */
2940                         rsrcs = 0;
2941                         if (nims == NULL) {
2942                                 nims = RB_MIN(ip6_msource_tree,
2943                                     &inm->in6m_srcs);
2944                         }
2945                         RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2946                                 MLD_PRINTF(("%s: visit node %s\n", __func__,
2947                                     ip6_sprintf(&ims->im6s_addr)));
2948                                 now = im6s_get_mode(inm, ims, 1);
2949                                 then = im6s_get_mode(inm, ims, 0);
2950                                 MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
2951                                     __func__, then, now));
2952                                 if (now == then) {
2953                                         MLD_PRINTF(("%s: skip unchanged\n",
2954                                             __func__));
2955                                         continue;
2956                                 }
2957                                 if (mode == MCAST_EXCLUDE &&
2958                                     now == MCAST_INCLUDE) {
2959                                         MLD_PRINTF(("%s: skip IN src on EX "
2960                                             "group\n", __func__));
2961                                         continue;
2962                                 }
2963                                 nrt = (rectype_t)now;
2964                                 if (nrt == REC_NONE)
2965                                         nrt = (rectype_t)(~mode & REC_FULL);
2966                                 if (schanged++ == 0) {
2967                                         crt = nrt;
2968                                 } else if (crt != nrt)
2969                                         continue;
2970                                 if (!m_append(m, sizeof(struct in6_addr),
2971                                     (void *)&ims->im6s_addr)) {
2972                                         if (m != m0)
2973                                                 m_freem(m);
2974                                         MLD_PRINTF(("%s: m_append() failed\n",
2975                                             __func__));
2976                                         return (-ENOMEM);
2977                                 }
2978                                 nallow += !!(crt == REC_ALLOW);
2979                                 nblock += !!(crt == REC_BLOCK);
2980                                 if (++rsrcs == m0srcs)
2981                                         break;
2982                         }
2983                         /*
2984                          * If we did not append any tree nodes on this
2985                          * pass, back out of allocations.
2986                          */
2987                         if (rsrcs == 0) {
2988                                 npbytes -= sizeof(struct mldv2_record);
2989                                 if (m != m0) {
2990                                         MLD_PRINTF(("%s: m_free(m)\n",
2991                                             __func__));
2992                                         m_freem(m);
2993                                 } else {
2994                                         MLD_PRINTF(("%s: m_adj(m, -mr)\n",
2995                                             __func__));
2996                                         m_adj(m, -((int)sizeof(
2997                                             struct mldv2_record)));
2998                                 }
2999                                 continue;
3000                         }
3001                         npbytes += (rsrcs * sizeof(struct in6_addr));
3002                         if (crt == REC_ALLOW)
3003                                 pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
3004                         else if (crt == REC_BLOCK)
3005                                 pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
3006                         pmr->mr_numsrc = htons(rsrcs);
3007                         /*
3008                          * Count the new group record, and enqueue this
3009                          * packet if it wasn't already queued.
3010                          */
3011                         m->m_pkthdr.vt_nrecs++;
3012                         m->m_pkthdr.rcvif = ifp;
3013                         if (m != m0)
3014                                 IF_ENQUEUE(ifq, m);
3015                         nbytes += npbytes;
3016                 } while (nims != NULL);
3017                 drt |= crt;
3018                 crt = (~crt & REC_FULL);
3019         }
3020
3021         MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3022             nallow, nblock));
3023
3024         return (nbytes);
3025 }
3026
3027 static int
3028 mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
3029 {
3030         struct ifqueue  *gq;
3031         struct mbuf     *m;             /* pending state-change */
3032         struct mbuf     *m0;            /* copy of pending state-change */
3033         struct mbuf     *mt;            /* last state-change in packet */
3034         struct mbuf     *n;
3035         int              docopy, domerge;
3036         u_int            recslen;
3037
3038         IN6M_LOCK_ASSERT_HELD(inm);
3039
3040         docopy = 0;
3041         domerge = 0;
3042         recslen = 0;
3043
3044         /*
3045          * If there are further pending retransmissions, make a writable
3046          * copy of each queued state-change message before merging.
3047          */
3048         if (inm->in6m_scrv > 0)
3049                 docopy = 1;
3050
3051         gq = &inm->in6m_scq;
3052 #ifdef MLD_DEBUG
3053         if (gq->ifq_head == NULL) {
3054                 MLD_PRINTF(("%s: WARNING: queue for inm %p is empty\n",
3055                     __func__, inm));
3056         }
3057 #endif
3058
3059         /*
3060          * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3061          * packet might not always be at the head of the ifqueue.
3062          */
3063         m = gq->ifq_head;
3064         while (m != NULL) {
3065                 /*
3066                  * Only merge the report into the current packet if
3067                  * there is sufficient space to do so; an MLDv2 report
3068                  * packet may only contain 65,535 group records.
3069                  * Always use a simple mbuf chain concatentation to do this,
3070                  * as large state changes for single groups may have
3071                  * allocated clusters.
3072                  */
3073                 domerge = 0;
3074                 mt = ifscq->ifq_tail;
3075                 if (mt != NULL) {
3076                         recslen = m_length(m);
3077
3078                         if ((mt->m_pkthdr.vt_nrecs +
3079                             m->m_pkthdr.vt_nrecs <=
3080                             MLD_V2_REPORT_MAXRECS) &&
3081                             (mt->m_pkthdr.len + recslen <=
3082                             (inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
3083                                 domerge = 1;
3084                 }
3085
3086                 if (!domerge && IF_QFULL(gq)) {
3087                         MLD_PRINTF(("%s: outbound queue full, skipping whole "
3088                             "packet %p\n", __func__, m));
3089                         n = m->m_nextpkt;
3090                         if (!docopy) {
3091                                 IF_REMQUEUE(gq, m);
3092                                 m_freem(m);
3093                         }
3094                         m = n;
3095                         continue;
3096                 }
3097
3098                 if (!docopy) {
3099                         MLD_PRINTF(("%s: dequeueing %p\n", __func__, m));
3100                         n = m->m_nextpkt;
3101                         IF_REMQUEUE(gq, m);
3102                         m0 = m;
3103                         m = n;
3104                 } else {
3105                         MLD_PRINTF(("%s: copying %p\n", __func__, m));
3106                         m0 = m_dup(m, M_NOWAIT);
3107                         if (m0 == NULL)
3108                                 return (ENOMEM);
3109                         m0->m_nextpkt = NULL;
3110                         m = m->m_nextpkt;
3111                 }
3112
3113                 if (!domerge) {
3114                         MLD_PRINTF(("%s: queueing %p to ifscq %p)\n",
3115                             __func__, m0, ifscq));
3116                         m0->m_pkthdr.rcvif = inm->in6m_ifp;
3117                         IF_ENQUEUE(ifscq, m0);
3118                 } else {
3119                         struct mbuf *mtl;       /* last mbuf of packet mt */
3120
3121                         MLD_PRINTF(("%s: merging %p with ifscq tail %p)\n",
3122                             __func__, m0, mt));
3123
3124                         mtl = m_last(mt);
3125                         m0->m_flags &= ~M_PKTHDR;
3126                         mt->m_pkthdr.len += recslen;
3127                         mt->m_pkthdr.vt_nrecs +=
3128                             m0->m_pkthdr.vt_nrecs;
3129
3130                         mtl->m_next = m0;
3131                 }
3132         }
3133
3134         return (0);
3135 }
3136
3137 /*
3138  * Respond to a pending MLDv2 General Query.
3139  */
3140 static void
3141 mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
3142 {
3143         struct ifnet            *ifp;
3144         struct in6_multi        *inm;
3145         struct in6_multistep    step;
3146         int                      retval;
3147
3148         MLI_LOCK_ASSERT_HELD(mli);
3149
3150         VERIFY(mli->mli_version == MLD_VERSION_2);
3151
3152         ifp = mli->mli_ifp;
3153         MLI_UNLOCK(mli);
3154
3155         in6_multihead_lock_shared();
3156         IN6_FIRST_MULTI(step, inm);
3157         while (inm != NULL) {
3158                 IN6M_LOCK(inm);
3159                 if (inm->in6m_ifp != ifp)
3160                         goto next;
3161
3162                 switch (inm->in6m_state) {
3163                 case MLD_NOT_MEMBER:
3164                 case MLD_SILENT_MEMBER:
3165                         break;
3166                 case MLD_REPORTING_MEMBER:
3167                 case MLD_IDLE_MEMBER:
3168                 case MLD_LAZY_MEMBER:
3169                 case MLD_SLEEPING_MEMBER:
3170                 case MLD_AWAKENING_MEMBER:
3171                         inm->in6m_state = MLD_REPORTING_MEMBER;
3172                         MLI_LOCK(mli);
3173                         retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3174                             inm, 0, 0, 0, 0);
3175                         MLI_UNLOCK(mli);
3176                         MLD_PRINTF(("%s: enqueue record = %d\n",
3177                             __func__, retval));
3178                         break;
3179                 case MLD_G_QUERY_PENDING_MEMBER:
3180                 case MLD_SG_QUERY_PENDING_MEMBER:
3181                 case MLD_LEAVING_MEMBER:
3182                         break;
3183                 }
3184 next:
3185                 IN6M_UNLOCK(inm);
3186                 IN6_NEXT_MULTI(step, inm);
3187         }
3188         in6_multihead_lock_done();
3189
3190         MLI_LOCK(mli);
3191         mld_dispatch_queue(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3192         MLI_LOCK_ASSERT_HELD(mli);
3193
3194         /*
3195          * Slew transmission of bursts over 500ms intervals.
3196          */
3197         if (mli->mli_gq.ifq_head != NULL) {
3198                 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3199                     MLD_RESPONSE_BURST_INTERVAL);
3200                 interface_timers_running6 = 1;
3201         }
3202 }
3203
3204 /*
3205  * Transmit the next pending message in the output queue.
3206  *
3207  * Must not be called with in6m_lockm or mli_lock held.
3208  */
3209 static void
3210 mld_dispatch_packet(struct mbuf *m)
3211 {
3212         struct ip6_moptions     *im6o;
3213         struct ifnet            *ifp;
3214         struct ifnet            *oifp = NULL;
3215         struct mbuf             *m0;
3216         struct mbuf             *md;
3217         struct ip6_hdr          *ip6;
3218         struct mld_hdr          *mld;
3219         int                      error;
3220         int                      off;
3221         int                      type;
3222
3223         MLD_PRINTF(("%s: transmit %p\n", __func__, m));
3224
3225         /*
3226          * Check if the ifnet is still attached.
3227          */
3228         ifp = m->m_pkthdr.rcvif;
3229         if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3230                 MLD_PRINTF(("%s: dropped %p as ifindex %u went away.\n",
3231                     __func__, m, (u_int)if_index));
3232                 m_freem(m);
3233                 ip6stat.ip6s_noroute++;
3234                 return;
3235         }
3236
3237         im6o = ip6_allocmoptions(M_WAITOK);
3238         if (im6o == NULL) {
3239                 m_freem(m);
3240                 return;
3241         }
3242
3243         im6o->im6o_multicast_hlim  = 1;
3244 #if MROUTING
3245         im6o->im6o_multicast_loop = (ip6_mrouter != NULL);
3246 #else
3247         im6o->im6o_multicast_loop = 0;
3248 #endif
3249         im6o->im6o_multicast_ifp = ifp;
3250
3251         if (m->m_flags & M_MLDV1) {
3252                 m0 = m;
3253         } else {
3254                 m0 = mld_v2_encap_report(ifp, m);
3255                 if (m0 == NULL) {
3256                         MLD_PRINTF(("%s: dropped %p\n", __func__, m));
3257                         /*
3258                          * mld_v2_encap_report() has already freed our mbuf.
3259                          */
3260                         IM6O_REMREF(im6o);
3261                         ip6stat.ip6s_odropped++;
3262                         return;
3263                 }
3264         }
3265
3266         m->m_flags &= ~(M_PROTOFLAGS);
3267         m0->m_pkthdr.rcvif = lo_ifp;
3268
3269         ip6 = mtod(m0, struct ip6_hdr *);
3270 #if 0
3271         (void) in6_setscope(&ip6->ip6_dst, ifp, NULL);  /* XXX LOR */
3272 #else
3273         /*
3274          * XXX XXX Break some KPI rules to prevent an LOR which would
3275          * occur if we called in6_setscope() at transmission.
3276          * See comments at top of file.
3277          */
3278         MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index);
3279 #endif
3280
3281         /*
3282          * Retrieve the ICMPv6 type before handoff to ip6_output(),
3283          * so we can bump the stats.
3284          */
3285         md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3286         mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3287         type = mld->mld_type;
3288
3289         error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
3290             &oifp, NULL);
3291
3292         IM6O_REMREF(im6o);
3293
3294         if (error) {
3295                 MLD_PRINTF(("%s: ip6_output(%p) = %d\n", __func__, m0, error));
3296                 if (oifp != NULL)
3297                         ifnet_release(oifp);
3298                 return;
3299         }
3300
3301         icmp6stat.icp6s_outhist[type]++;
3302         if (oifp != NULL) {
3303                 icmp6_ifstat_inc(oifp, ifs6_out_msg);
3304                 switch (type) {
3305                 case MLD_LISTENER_REPORT:
3306                 case MLDV2_LISTENER_REPORT:
3307                         icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3308                         break;
3309                 case MLD_LISTENER_DONE:
3310                         icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3311                         break;
3312                 }
3313                 ifnet_release(oifp);
3314         }
3315 }
3316
3317 /*
3318  * Encapsulate an MLDv2 report.
3319  *
3320  * KAME IPv6 requires that hop-by-hop options be passed separately,
3321  * and that the IPv6 header be prepended in a separate mbuf.
3322  *
3323  * Returns a pointer to the new mbuf chain head, or NULL if the
3324  * allocation failed.
3325  */
3326 static struct mbuf *
3327 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3328 {
3329         struct mbuf             *mh;
3330         struct mldv2_report     *mld;
3331         struct ip6_hdr          *ip6;
3332         struct in6_ifaddr       *ia;
3333         int                      mldreclen;
3334
3335         VERIFY(m->m_flags & M_PKTHDR);
3336
3337         /*
3338          * RFC3590: OK to send as :: or tentative during DAD.
3339          */
3340         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
3341         if (ia == NULL)
3342                 MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
3343
3344         MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3345         if (mh == NULL) {
3346                 if (ia != NULL)
3347                         IFA_REMREF(&ia->ia_ifa);
3348                 m_freem(m);
3349                 return (NULL);
3350         }
3351         MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3352
3353         mldreclen = m_length(m);
3354         MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
3355
3356         mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3357         mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3358             sizeof(struct mldv2_report) + mldreclen;
3359
3360         ip6 = mtod(mh, struct ip6_hdr *);
3361         ip6->ip6_flow = 0;
3362         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3363         ip6->ip6_vfc |= IPV6_VERSION;
3364         ip6->ip6_nxt = IPPROTO_ICMPV6;
3365         if (ia != NULL)
3366                 IFA_LOCK(&ia->ia_ifa);
3367         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3368         if (ia != NULL) {
3369                 IFA_UNLOCK(&ia->ia_ifa);
3370                 IFA_REMREF(&ia->ia_ifa);
3371                 ia = NULL;
3372         }
3373         ip6->ip6_dst = in6addr_linklocal_allv2routers;
3374         /* scope ID will be set in netisr */
3375
3376         mld = (struct mldv2_report *)(ip6 + 1);
3377         mld->mld_type = MLDV2_LISTENER_REPORT;
3378         mld->mld_code = 0;
3379         mld->mld_cksum = 0;
3380         mld->mld_v2_reserved = 0;
3381         mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3382         m->m_pkthdr.vt_nrecs = 0;
3383         m->m_flags &= ~M_PKTHDR;
3384
3385         mh->m_next = m;
3386         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3387             sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3388         return (mh);
3389 }
3390
3391 #ifdef MLD_DEBUG
3392 static const char *
3393 mld_rec_type_to_str(const int type)
3394 {
3395         switch (type) {
3396                 case MLD_CHANGE_TO_EXCLUDE_MODE:
3397                         return "TO_EX";
3398                         break;
3399                 case MLD_CHANGE_TO_INCLUDE_MODE:
3400                         return "TO_IN";
3401                         break;
3402                 case MLD_MODE_IS_EXCLUDE:
3403                         return "MODE_EX";
3404                         break;
3405                 case MLD_MODE_IS_INCLUDE:
3406                         return "MODE_IN";
3407                         break;
3408                 case MLD_ALLOW_NEW_SOURCES:
3409                         return "ALLOW_NEW";
3410                         break;
3411                 case MLD_BLOCK_OLD_SOURCES:
3412                         return "BLOCK_OLD";
3413                         break;
3414                 default:
3415                         break;
3416         }
3417         return "unknown";
3418 }
3419 #endif
3420
3421 void
3422 mld_init(void)
3423 {
3424
3425         MLD_PRINTF(("%s: initializing\n", __func__));
3426
3427         /* Setup lock group and attribute for mld6_mtx */
3428         mld_mtx_grp_attr = lck_grp_attr_alloc_init();
3429         mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
3430         mld_mtx_attr = lck_attr_alloc_init();
3431         lck_mtx_init(&mld_mtx, mld_mtx_grp, mld_mtx_attr);
3432
3433         ip6_initpktopts(&mld_po);
3434         mld_po.ip6po_hlim = 1;
3435         mld_po.ip6po_hbh = &mld_ra.hbh;
3436         mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3437         mld_po.ip6po_flags = IP6PO_DONTFRAG;
3438         LIST_INIT(&mli_head);
3439
3440         mli_size = sizeof (struct mld_ifinfo);
3441         mli_zone = zinit(mli_size, MLI_ZONE_MAX * mli_size,
3442             0, MLI_ZONE_NAME);
3443         if (mli_zone == NULL) {
3444                 panic("%s: failed allocating %s", __func__, MLI_ZONE_NAME);
3445                 /* NOTREACHED */
3446         }
3447         zone_change(mli_zone, Z_EXPAND, TRUE);
3448         zone_change(mli_zone, Z_CALLERACCT, FALSE);
3449 }