bsd/netinet6/mld6.c

   1 /*
   2  * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*-
  29  * Copyright (c) 2009 Bruce Simpson.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. The name of the author may not be used to endorse or promote
  40  *    products derived from this software without specific prior written
  41  *    permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  53  * SUCH DAMAGE.
  54  */
  55
  56 /*
  57  * Copyright (c) 1988 Stephen Deering.
  58  * Copyright (c) 1992, 1993
  59  *      The Regents of the University of California.  All rights reserved.
  60  *
  61  * This code is derived from software contributed to Berkeley by
  62  * Stephen Deering of Stanford University.
  63  *
  64  * Redistribution and use in source and binary forms, with or without
  65  * modification, are permitted provided that the following conditions
  66  * are met:
  67  * 1. Redistributions of source code must retain the above copyright
  68  *    notice, this list of conditions and the following disclaimer.
  69  * 2. Redistributions in binary form must reproduce the above copyright
  70  *    notice, this list of conditions and the following disclaimer in the
  71  *    documentation and/or other materials provided with the distribution.
  72  * 3. All advertising materials mentioning features or use of this software
  73  *    must display the following acknowledgement:
  74  *      This product includes software developed by the University of
  75  *      California, Berkeley and its contributors.
  76  * 4. Neither the name of the University nor the names of its contributors
  77  *    may be used to endorse or promote products derived from this software
  78  *    without specific prior written permission.
  79  *
  80  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  81  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  82  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  83  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  84  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  85  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  86  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  87  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  88  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  89  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  90  * SUCH DAMAGE.
  91  *
  92  *      @(#)igmp.c      8.1 (Berkeley) 7/19/93
  93  */
  94 /*
  95  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  96  * support for mandatory and extensible security protections.  This notice
  97  * is included in support of clause 2.2 (b) of the Apple Public License,
  98  * Version 2.0.
  99  */
 100
 101 #include <sys/cdefs.h>
 102
 103 #include <sys/param.h>
 104 #include <sys/systm.h>
 105 #include <sys/mbuf.h>
 106 #include <sys/socket.h>
 107 #include <sys/protosw.h>
 108 #include <sys/sysctl.h>
 109 #include <sys/kernel.h>
 110 #include <sys/malloc.h>
 111 #include <sys/mcache.h>
 112
 113 #include <dev/random/randomdev.h>
 114
 115 #include <kern/zalloc.h>
 116
 117 #include <net/if.h>
 118 #include <net/route.h>
 119
 120 #include <netinet/in.h>
 121 #include <netinet/in_var.h>
 122 #include <netinet6/in6_var.h>
 123 #include <netinet/ip6.h>
 124 #include <netinet6/ip6_var.h>
 125 #include <netinet6/scope6_var.h>
 126 #include <netinet/icmp6.h>
 127 #include <netinet6/mld6.h>
 128 #include <netinet6/mld6_var.h>
 129
 130 /* Lock group and attribute for mld_mtx */
 131 static lck_attr_t       *mld_mtx_attr;
 132 static lck_grp_t        *mld_mtx_grp;
 133 static lck_grp_attr_t   *mld_mtx_grp_attr;
 134
 135 /*
 136  * Locking and reference counting:
 137  *
 138  * mld_mtx mainly protects mli_head.  In cases where both mld_mtx and
 139  * in6_multihead_lock must be held, the former must be acquired first in order
 140  * to maintain lock ordering.  It is not a requirement that mld_mtx be
 141  * acquired first before in6_multihead_lock, but in case both must be acquired
 142  * in succession, the correct lock ordering must be followed.
 143  *
 144  * Instead of walking the if_multiaddrs list at the interface and returning
 145  * the ifma_protospec value of a matching entry, we search the global list
 146  * of in6_multi records and find it that way; this is done with in6_multihead
 147  * lock held.  Doing so avoids the race condition issues that many other BSDs
 148  * suffer from (therefore in our implementation, ifma_protospec will never be
 149  * NULL for as long as the in6_multi is valid.)
 150  *
 151  * The above creates a requirement for the in6_multi to stay in in6_multihead
 152  * list even after the final MLD leave (in MLDv2 mode) until no longer needs
 153  * be retransmitted (this is not required for MLDv1.)  In order to handle
 154  * this, the request and reference counts of the in6_multi are bumped up when
 155  * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
 156  * handler.  Each in6_multi holds a reference to the underlying mld_ifinfo.
 157  *
 158  * Thus, the permitted lock order is:
 159  *
 160  *      mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
 161  *
 162  * Any may be taken independently, but if any are held at the same time,
 163  * the above lock order must be followed.
 164  */
 165 static decl_lck_mtx_data(, mld_mtx);
 166
 167 SLIST_HEAD(mld_in6m_relhead, in6_multi);
 168
 169 static void     mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
 170 static struct mld_ifinfo *mli_alloc(int);
 171 static void     mli_free(struct mld_ifinfo *);
 172 static void     mli_delete(const struct ifnet *, struct mld_in6m_relhead *);
 173 static void     mld_dispatch_packet(struct mbuf *);
 174 static void     mld_final_leave(struct in6_multi *, struct mld_ifinfo *,
 175     struct mld_tparams *);
 176 static int      mld_handle_state_change(struct in6_multi *, struct mld_ifinfo *,
 177     struct mld_tparams *);
 178 static int      mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
 179     struct mld_tparams *, const int);
 180 #ifdef MLD_DEBUG
 181 static const char *     mld_rec_type_to_str(const int);
 182 #endif
 183 static uint32_t mld_set_version(struct mld_ifinfo *, const int);
 184 static void     mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *);
 185 static void     mld_dispatch_queue_locked(struct mld_ifinfo *, struct ifqueue *, int);
 186 static int      mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
 187     /*const*/ struct mld_hdr *);
 188 static int      mld_v1_input_report(struct ifnet *, struct mbuf *,
 189     const struct ip6_hdr *, /*const*/ struct mld_hdr *);
 190 static void     mld_v1_process_group_timer(struct in6_multi *, const int);
 191 static void     mld_v1_process_querier_timers(struct mld_ifinfo *);
 192 static int      mld_v1_transmit_report(struct in6_multi *, const int);
 193 static uint32_t mld_v1_update_group(struct in6_multi *, const int);
 194 static void     mld_v2_cancel_link_timers(struct mld_ifinfo *);
 195 static uint32_t mld_v2_dispatch_general_query(struct mld_ifinfo *);
 196 static struct mbuf *
 197 mld_v2_encap_report(struct ifnet *, struct mbuf *);
 198 static int      mld_v2_enqueue_filter_change(struct ifqueue *,
 199     struct in6_multi *);
 200 static int      mld_v2_enqueue_group_record(struct ifqueue *,
 201     struct in6_multi *, const int, const int, const int,
 202     const int);
 203 static int      mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
 204     struct mbuf *, const int, const int);
 205 static int      mld_v2_merge_state_changes(struct in6_multi *,
 206     struct ifqueue *);
 207 static void     mld_v2_process_group_timers(struct mld_ifinfo *,
 208     struct ifqueue *, struct ifqueue *,
 209     struct in6_multi *, const int);
 210 static int      mld_v2_process_group_query(struct in6_multi *,
 211     int, struct mbuf *, const int);
 212 static int      sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
 213 static int      sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
 214 static int      sysctl_mld_v2enable SYSCTL_HANDLER_ARGS;
 215
 216 static int mld_timeout_run;             /* MLD timer is scheduled to run */
 217 static void mld_timeout(void *);
 218 static void mld_sched_timeout(void);
 219
 220 /*
 221  * Normative references: RFC 2710, RFC 3590, RFC 3810.
 222  */
 223 static struct timeval mld_gsrdelay = {.tv_sec = 10, .tv_usec = 0};
 224 static LIST_HEAD(, mld_ifinfo) mli_head;
 225
 226 static int querier_present_timers_running6;
 227 static int interface_timers_running6;
 228 static int state_change_timers_running6;
 229 static int current_state_timers_running6;
 230
 231 static unsigned int mld_mli_list_genid;
 232 /*
 233  * Subsystem lock macros.
 234  */
 235 #define MLD_LOCK()                      \
 236         lck_mtx_lock(&mld_mtx)
 237 #define MLD_LOCK_ASSERT_HELD()          \
 238         LCK_MTX_ASSERT(&mld_mtx, LCK_MTX_ASSERT_OWNED)
 239 #define MLD_LOCK_ASSERT_NOTHELD()       \
 240         LCK_MTX_ASSERT(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
 241 #define MLD_UNLOCK()                    \
 242         lck_mtx_unlock(&mld_mtx)
 243
 244 #define MLD_ADD_DETACHED_IN6M(_head, _in6m) {                           \
 245         SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle);                     \
 246 }
 247
 248 #define MLD_REMOVE_DETACHED_IN6M(_head) {                               \
 249         struct in6_multi *_in6m, *_inm_tmp;                             \
 250         SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) {         \
 251                 SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle);       \
 252                 IN6M_REMREF(_in6m);                                     \
 253         }                                                               \
 254         VERIFY(SLIST_EMPTY(_head));                                     \
 255 }
 256
 257 #define MLI_ZONE_MAX            64              /* maximum elements in zone */
 258 #define MLI_ZONE_NAME           "mld_ifinfo"    /* zone name */
 259
 260 static unsigned int mli_size;                   /* size of zone element */
 261 static struct zone *mli_zone;                   /* zone for mld_ifinfo */
 262
 263 SYSCTL_DECL(_net_inet6);        /* Note: Not in any common header. */
 264
 265 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
 266     "IPv6 Multicast Listener Discovery");
 267 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
 268     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 269     &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
 270     "Rate limit for MLDv2 Group-and-Source queries in seconds");
 271
 272 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
 273     sysctl_mld_ifinfo, "Per-interface MLDv2 state");
 274
 275 static int      mld_v1enable = 1;
 276 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
 277     &mld_v1enable, 0, "Enable fallback to MLDv1");
 278
 279 static int      mld_v2enable = 1;
 280 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, v2enable,
 281     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 282     &mld_v2enable, 0, sysctl_mld_v2enable, "I",
 283     "Enable MLDv2 (debug purposes only)");
 284
 285 static int      mld_use_allow = 1;
 286 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
 287     &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
 288
 289 #ifdef MLD_DEBUG
 290 int mld_debug = 0;
 291 SYSCTL_INT(_net_inet6_mld, OID_AUTO,
 292     debug, CTLFLAG_RW | CTLFLAG_LOCKED, &mld_debug, 0, "");
 293 #endif
 294 /*
 295  * Packed Router Alert option structure declaration.
 296  */
 297 struct mld_raopt {
 298         struct ip6_hbh          hbh;
 299         struct ip6_opt          pad;
 300         struct ip6_opt_router   ra;
 301 } __packed;
 302
 303 /*
 304  * Router Alert hop-by-hop option header.
 305  */
 306 static struct mld_raopt mld_ra = {
 307         .hbh = { .ip6h_nxt = 0, .ip6h_len = 0 },
 308         .pad = { .ip6o_type = IP6OPT_PADN, .ip6o_len = 0 },
 309         .ra = {
 310                 .ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
 311                 .ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
 312                 .ip6or_value =  {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
 313                                  (IP6OPT_RTALERT_MLD & 0xFF) }
 314         }
 315 };
 316 static struct ip6_pktopts mld_po;
 317
 318 /* Store MLDv2 record count in the module private scratch space */
 319 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
 320
 321 static __inline void
 322 mld_save_context(struct mbuf *m, struct ifnet *ifp)
 323 {
 324         m->m_pkthdr.rcvif = ifp;
 325 }
 326
 327 static __inline void
 328 mld_scrub_context(struct mbuf *m)
 329 {
 330         m->m_pkthdr.rcvif = NULL;
 331 }
 332
 333 /*
 334  * Restore context from a queued output chain.
 335  * Return saved ifp.
 336  */
 337 static __inline struct ifnet *
 338 mld_restore_context(struct mbuf *m)
 339 {
 340         return m->m_pkthdr.rcvif;
 341 }
 342
 343 /*
 344  * Retrieve or set threshold between group-source queries in seconds.
 345  */
 346 static int
 347 sysctl_mld_gsr SYSCTL_HANDLER_ARGS
 348 {
 349 #pragma unused(arg1, arg2)
 350         int error;
 351         int i;
 352
 353         MLD_LOCK();
 354
 355         i = mld_gsrdelay.tv_sec;
 356
 357         error = sysctl_handle_int(oidp, &i, 0, req);
 358         if (error || !req->newptr) {
 359                 goto out_locked;
 360         }
 361
 362         if (i < -1 || i >= 60) {
 363                 error = EINVAL;
 364                 goto out_locked;
 365         }
 366
 367         mld_gsrdelay.tv_sec = i;
 368
 369 out_locked:
 370         MLD_UNLOCK();
 371         return error;
 372 }
 373 /*
 374  * Expose struct mld_ifinfo to userland, keyed by ifindex.
 375  * For use by ifmcstat(8).
 376  *
 377  */
 378 static int
 379 sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
 380 {
 381 #pragma unused(oidp)
 382         int                     *name;
 383         int                      error;
 384         u_int                    namelen;
 385         struct ifnet            *ifp;
 386         struct mld_ifinfo       *mli;
 387         struct mld_ifinfo_u     mli_u;
 388
 389         name = (int *)arg1;
 390         namelen = arg2;
 391
 392         if (req->newptr != USER_ADDR_NULL) {
 393                 return EPERM;
 394         }
 395
 396         if (namelen != 1) {
 397                 return EINVAL;
 398         }
 399
 400         MLD_LOCK();
 401
 402         if (name[0] <= 0 || name[0] > (u_int)if_index) {
 403                 error = ENOENT;
 404                 goto out_locked;
 405         }
 406
 407         error = ENOENT;
 408
 409         ifnet_head_lock_shared();
 410         ifp = ifindex2ifnet[name[0]];
 411         ifnet_head_done();
 412         if (ifp == NULL) {
 413                 goto out_locked;
 414         }
 415
 416         bzero(&mli_u, sizeof(mli_u));
 417
 418         LIST_FOREACH(mli, &mli_head, mli_link) {
 419                 MLI_LOCK(mli);
 420                 if (ifp != mli->mli_ifp) {
 421                         MLI_UNLOCK(mli);
 422                         continue;
 423                 }
 424
 425                 mli_u.mli_ifindex = mli->mli_ifp->if_index;
 426                 mli_u.mli_version = mli->mli_version;
 427                 mli_u.mli_v1_timer = mli->mli_v1_timer;
 428                 mli_u.mli_v2_timer = mli->mli_v2_timer;
 429                 mli_u.mli_flags = mli->mli_flags;
 430                 mli_u.mli_rv = mli->mli_rv;
 431                 mli_u.mli_qi = mli->mli_qi;
 432                 mli_u.mli_qri = mli->mli_qri;
 433                 mli_u.mli_uri = mli->mli_uri;
 434                 MLI_UNLOCK(mli);
 435
 436                 error = SYSCTL_OUT(req, &mli_u, sizeof(mli_u));
 437                 break;
 438         }
 439
 440 out_locked:
 441         MLD_UNLOCK();
 442         return error;
 443 }
 444
 445 static int
 446 sysctl_mld_v2enable SYSCTL_HANDLER_ARGS
 447 {
 448 #pragma unused(arg1, arg2)
 449         int error;
 450         int i;
 451         struct mld_ifinfo *mli;
 452         struct mld_tparams mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
 453
 454         MLD_LOCK();
 455
 456         i = mld_v2enable;
 457
 458         error = sysctl_handle_int(oidp, &i, 0, req);
 459         if (error || !req->newptr) {
 460                 goto out_locked;
 461         }
 462
 463         if (i < 0 || i > 1) {
 464                 error = EINVAL;
 465                 goto out_locked;
 466         }
 467
 468         mld_v2enable = i;
 469         /*
 470          * If we enabled v2, the state transition will take care of upgrading
 471          * the MLD version back to v2. Otherwise, we have to explicitly
 472          * downgrade. Note that this functionality is to be used for debugging.
 473          */
 474         if (mld_v2enable == 1) {
 475                 goto out_locked;
 476         }
 477
 478         LIST_FOREACH(mli, &mli_head, mli_link) {
 479                 MLI_LOCK(mli);
 480                 if (mld_set_version(mli, MLD_VERSION_1) > 0) {
 481                         mtp.qpt = 1;
 482                 }
 483                 MLI_UNLOCK(mli);
 484         }
 485
 486 out_locked:
 487         MLD_UNLOCK();
 488
 489         mld_set_timeout(&mtp);
 490
 491         return error;
 492 }
 493
 494 /*
 495  * Dispatch an entire queue of pending packet chains.
 496  *
 497  * Must not be called with in6m_lock held.
 498  * XXX This routine unlocks MLD global lock and also mli locks.
 499  * Make sure that the calling routine takes reference on the mli
 500  * before calling this routine.
 501  * Also if we are traversing mli_head, remember to check for
 502  * mli list generation count and restart the loop if generation count
 503  * has changed.
 504  */
 505 static void
 506 mld_dispatch_queue_locked(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
 507 {
 508         struct mbuf *m;
 509
 510         MLD_LOCK_ASSERT_HELD();
 511
 512         if (mli != NULL) {
 513                 MLI_LOCK_ASSERT_HELD(mli);
 514         }
 515
 516         for (;;) {
 517                 IF_DEQUEUE(ifq, m);
 518                 if (m == NULL) {
 519                         break;
 520                 }
 521                 MLD_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
 522                     (uint64_t)VM_KERNEL_ADDRPERM(ifq),
 523                     (uint64_t)VM_KERNEL_ADDRPERM(m)));
 524
 525                 if (mli != NULL) {
 526                         MLI_UNLOCK(mli);
 527                 }
 528                 MLD_UNLOCK();
 529
 530                 mld_dispatch_packet(m);
 531
 532                 MLD_LOCK();
 533                 if (mli != NULL) {
 534                         MLI_LOCK(mli);
 535                 }
 536
 537                 if (--limit == 0) {
 538                         break;
 539                 }
 540         }
 541
 542         if (mli != NULL) {
 543                 MLI_LOCK_ASSERT_HELD(mli);
 544         }
 545 }
 546
 547 /*
 548  * Filter outgoing MLD report state by group.
 549  *
 550  * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
 551  * and node-local addresses. However, kernel and socket consumers
 552  * always embed the KAME scope ID in the address provided, so strip it
 553  * when performing comparison.
 554  * Note: This is not the same as the *multicast* scope.
 555  *
 556  * Return zero if the given group is one for which MLD reports
 557  * should be suppressed, or non-zero if reports should be issued.
 558  */
 559 static __inline__ int
 560 mld_is_addr_reported(const struct in6_addr *addr)
 561 {
 562         VERIFY(IN6_IS_ADDR_MULTICAST(addr));
 563
 564         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) {
 565                 return 0;
 566         }
 567
 568         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
 569                 struct in6_addr tmp = *addr;
 570                 in6_clearscope(&tmp);
 571                 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) {
 572                         return 0;
 573                 }
 574         }
 575
 576         return 1;
 577 }
 578
 579 /*
 580  * Attach MLD when PF_INET6 is attached to an interface.
 581  */
 582 struct mld_ifinfo *
 583 mld_domifattach(struct ifnet *ifp, int how)
 584 {
 585         struct mld_ifinfo *mli;
 586
 587         MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
 588             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 589
 590         mli = mli_alloc(how);
 591         if (mli == NULL) {
 592                 return NULL;
 593         }
 594
 595         MLD_LOCK();
 596
 597         MLI_LOCK(mli);
 598         mli_initvar(mli, ifp, 0);
 599         mli->mli_debug |= IFD_ATTACHED;
 600         MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
 601         MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
 602         MLI_UNLOCK(mli);
 603         ifnet_lock_shared(ifp);
 604         mld6_initsilent(ifp, mli);
 605         ifnet_lock_done(ifp);
 606
 607         LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 608         mld_mli_list_genid++;
 609
 610         MLD_UNLOCK();
 611
 612         MLD_PRINTF(("%s: allocate mld_ifinfo for ifp 0x%llx(%s)\n",
 613             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 614
 615         return mli;
 616 }
 617
 618 /*
 619  * Attach MLD when PF_INET6 is reattached to an interface.  Caller is
 620  * expected to have an outstanding reference to the mli.
 621  */
 622 void
 623 mld_domifreattach(struct mld_ifinfo *mli)
 624 {
 625         struct ifnet *ifp;
 626
 627         MLD_LOCK();
 628
 629         MLI_LOCK(mli);
 630         VERIFY(!(mli->mli_debug & IFD_ATTACHED));
 631         ifp = mli->mli_ifp;
 632         VERIFY(ifp != NULL);
 633         mli_initvar(mli, ifp, 1);
 634         mli->mli_debug |= IFD_ATTACHED;
 635         MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
 636         MLI_UNLOCK(mli);
 637         ifnet_lock_shared(ifp);
 638         mld6_initsilent(ifp, mli);
 639         ifnet_lock_done(ifp);
 640
 641         LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 642         mld_mli_list_genid++;
 643
 644         MLD_UNLOCK();
 645
 646         MLD_PRINTF(("%s: reattached mld_ifinfo for ifp 0x%llx(%s)\n",
 647             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 648 }
 649
 650 /*
 651  * Hook for domifdetach.
 652  */
 653 void
 654 mld_domifdetach(struct ifnet *ifp)
 655 {
 656         SLIST_HEAD(, in6_multi) in6m_dthead;
 657
 658         SLIST_INIT(&in6m_dthead);
 659
 660         MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
 661             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 662
 663         MLD_LOCK();
 664         mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
 665         MLD_UNLOCK();
 666
 667         /* Now that we're dropped all locks, release detached records */
 668         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
 669 }
 670
 671 /*
 672  * Called at interface detach time.  Note that we only flush all deferred
 673  * responses and record releases; all remaining inm records and their source
 674  * entries related to this interface are left intact, in order to handle
 675  * the reattach case.
 676  */
 677 static void
 678 mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
 679 {
 680         struct mld_ifinfo *mli, *tmli;
 681
 682         MLD_LOCK_ASSERT_HELD();
 683
 684         LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
 685                 MLI_LOCK(mli);
 686                 if (mli->mli_ifp == ifp) {
 687                         /*
 688                          * Free deferred General Query responses.
 689                          */
 690                         IF_DRAIN(&mli->mli_gq);
 691                         IF_DRAIN(&mli->mli_v1q);
 692                         mld_flush_relq(mli, in6m_dthead);
 693                         VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
 694                         mli->mli_debug &= ~IFD_ATTACHED;
 695                         MLI_UNLOCK(mli);
 696
 697                         LIST_REMOVE(mli, mli_link);
 698                         MLI_REMREF(mli); /* release mli_head reference */
 699                         mld_mli_list_genid++;
 700                         return;
 701                 }
 702                 MLI_UNLOCK(mli);
 703         }
 704         panic("%s: mld_ifinfo not found for ifp %p(%s)\n", __func__,
 705             ifp, ifp->if_xname);
 706 }
 707
 708 __private_extern__ void
 709 mld6_initsilent(struct ifnet *ifp, struct mld_ifinfo *mli)
 710 {
 711         ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
 712
 713         MLI_LOCK_ASSERT_NOTHELD(mli);
 714         MLI_LOCK(mli);
 715         if (!(ifp->if_flags & IFF_MULTICAST) &&
 716             (ifp->if_eflags & (IFEF_IPV6_ND6ALT | IFEF_LOCALNET_PRIVATE))) {
 717                 mli->mli_flags |= MLIF_SILENT;
 718         } else {
 719                 mli->mli_flags &= ~MLIF_SILENT;
 720         }
 721         MLI_UNLOCK(mli);
 722 }
 723
 724 static void
 725 mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
 726 {
 727         MLI_LOCK_ASSERT_HELD(mli);
 728
 729         mli->mli_ifp = ifp;
 730         if (mld_v2enable) {
 731                 mli->mli_version = MLD_VERSION_2;
 732         } else {
 733                 mli->mli_version = MLD_VERSION_1;
 734         }
 735         mli->mli_flags = 0;
 736         mli->mli_rv = MLD_RV_INIT;
 737         mli->mli_qi = MLD_QI_INIT;
 738         mli->mli_qri = MLD_QRI_INIT;
 739         mli->mli_uri = MLD_URI_INIT;
 740
 741         if (mld_use_allow) {
 742                 mli->mli_flags |= MLIF_USEALLOW;
 743         }
 744         if (!reattach) {
 745                 SLIST_INIT(&mli->mli_relinmhead);
 746         }
 747
 748         /*
 749          * Responses to general queries are subject to bounds.
 750          */
 751         mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
 752         mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
 753 }
 754
 755 static struct mld_ifinfo *
 756 mli_alloc(int how)
 757 {
 758         struct mld_ifinfo *mli;
 759
 760         mli = (how == M_WAITOK) ? zalloc(mli_zone) : zalloc_noblock(mli_zone);
 761         if (mli != NULL) {
 762                 bzero(mli, mli_size);
 763                 lck_mtx_init(&mli->mli_lock, mld_mtx_grp, mld_mtx_attr);
 764                 mli->mli_debug |= IFD_ALLOC;
 765         }
 766         return mli;
 767 }
 768
 769 static void
 770 mli_free(struct mld_ifinfo *mli)
 771 {
 772         MLI_LOCK(mli);
 773         if (mli->mli_debug & IFD_ATTACHED) {
 774                 panic("%s: attached mli=%p is being freed", __func__, mli);
 775                 /* NOTREACHED */
 776         } else if (mli->mli_ifp != NULL) {
 777                 panic("%s: ifp not NULL for mli=%p", __func__, mli);
 778                 /* NOTREACHED */
 779         } else if (!(mli->mli_debug & IFD_ALLOC)) {
 780                 panic("%s: mli %p cannot be freed", __func__, mli);
 781                 /* NOTREACHED */
 782         } else if (mli->mli_refcnt != 0) {
 783                 panic("%s: non-zero refcnt mli=%p", __func__, mli);
 784                 /* NOTREACHED */
 785         }
 786         mli->mli_debug &= ~IFD_ALLOC;
 787         MLI_UNLOCK(mli);
 788
 789         lck_mtx_destroy(&mli->mli_lock, mld_mtx_grp);
 790         zfree(mli_zone, mli);
 791 }
 792
 793 void
 794 mli_addref(struct mld_ifinfo *mli, int locked)
 795 {
 796         if (!locked) {
 797                 MLI_LOCK_SPIN(mli);
 798         } else {
 799                 MLI_LOCK_ASSERT_HELD(mli);
 800         }
 801
 802         if (++mli->mli_refcnt == 0) {
 803                 panic("%s: mli=%p wraparound refcnt", __func__, mli);
 804                 /* NOTREACHED */
 805         }
 806         if (!locked) {
 807                 MLI_UNLOCK(mli);
 808         }
 809 }
 810
 811 void
 812 mli_remref(struct mld_ifinfo *mli)
 813 {
 814         SLIST_HEAD(, in6_multi) in6m_dthead;
 815         struct ifnet *ifp;
 816
 817         MLI_LOCK_SPIN(mli);
 818
 819         if (mli->mli_refcnt == 0) {
 820                 panic("%s: mli=%p negative refcnt", __func__, mli);
 821                 /* NOTREACHED */
 822         }
 823
 824         --mli->mli_refcnt;
 825         if (mli->mli_refcnt > 0) {
 826                 MLI_UNLOCK(mli);
 827                 return;
 828         }
 829
 830         ifp = mli->mli_ifp;
 831         mli->mli_ifp = NULL;
 832         IF_DRAIN(&mli->mli_gq);
 833         IF_DRAIN(&mli->mli_v1q);
 834         SLIST_INIT(&in6m_dthead);
 835         mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
 836         VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
 837         MLI_UNLOCK(mli);
 838
 839         /* Now that we're dropped all locks, release detached records */
 840         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
 841
 842         MLD_PRINTF(("%s: freeing mld_ifinfo for ifp 0x%llx(%s)\n",
 843             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 844
 845         mli_free(mli);
 846 }
 847
 848 /*
 849  * Process a received MLDv1 general or address-specific query.
 850  * Assumes that the query header has been pulled up to sizeof(mld_hdr).
 851  *
 852  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
 853  * mld_addr. This is OK as we own the mbuf chain.
 854  */
 855 static int
 856 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
 857     /*const*/ struct mld_hdr *mld)
 858 {
 859         struct mld_ifinfo       *mli;
 860         struct in6_multi        *inm;
 861         int                      err = 0, is_general_query;
 862         uint16_t                 timer;
 863         struct mld_tparams       mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
 864
 865         MLD_LOCK_ASSERT_NOTHELD();
 866
 867         is_general_query = 0;
 868
 869         if (!mld_v1enable) {
 870                 MLD_PRINTF(("%s: ignore v1 query %s on ifp 0x%llx(%s)\n",
 871                     __func__, ip6_sprintf(&mld->mld_addr),
 872                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 873                 goto done;
 874         }
 875
 876         /*
 877          * RFC3810 Section 6.2: MLD queries must originate from
 878          * a router's link-local address.
 879          */
 880         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 881                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
 882                     __func__, ip6_sprintf(&ip6->ip6_src),
 883                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 884                 goto done;
 885         }
 886
 887         /*
 888          * Do address field validation upfront before we accept
 889          * the query.
 890          */
 891         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
 892                 /*
 893                  * MLDv1 General Query.
 894                  * If this was not sent to the all-nodes group, ignore it.
 895                  */
 896                 struct in6_addr          dst;
 897
 898                 dst = ip6->ip6_dst;
 899                 in6_clearscope(&dst);
 900                 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) {
 901                         err = EINVAL;
 902                         goto done;
 903                 }
 904                 is_general_query = 1;
 905         } else {
 906                 /*
 907                  * Embed scope ID of receiving interface in MLD query for
 908                  * lookup whilst we don't hold other locks.
 909                  */
 910                 (void)in6_setscope(&mld->mld_addr, ifp, NULL);
 911         }
 912
 913         /*
 914          * Switch to MLDv1 host compatibility mode.
 915          */
 916         mli = MLD_IFINFO(ifp);
 917         VERIFY(mli != NULL);
 918
 919         MLI_LOCK(mli);
 920         mtp.qpt = mld_set_version(mli, MLD_VERSION_1);
 921         MLI_UNLOCK(mli);
 922
 923         timer = ntohs(mld->mld_maxdelay) / MLD_TIMER_SCALE;
 924         if (timer == 0) {
 925                 timer = 1;
 926         }
 927
 928         if (is_general_query) {
 929                 struct in6_multistep step;
 930
 931                 MLD_PRINTF(("%s: process v1 general query on ifp 0x%llx(%s)\n",
 932                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 933                 /*
 934                  * For each reporting group joined on this
 935                  * interface, kick the report timer.
 936                  */
 937                 in6_multihead_lock_shared();
 938                 IN6_FIRST_MULTI(step, inm);
 939                 while (inm != NULL) {
 940                         IN6M_LOCK(inm);
 941                         if (inm->in6m_ifp == ifp) {
 942                                 mtp.cst += mld_v1_update_group(inm, timer);
 943                         }
 944                         IN6M_UNLOCK(inm);
 945                         IN6_NEXT_MULTI(step, inm);
 946                 }
 947                 in6_multihead_lock_done();
 948         } else {
 949                 /*
 950                  * MLDv1 Group-Specific Query.
 951                  * If this is a group-specific MLDv1 query, we need only
 952                  * look up the single group to process it.
 953                  */
 954                 in6_multihead_lock_shared();
 955                 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
 956                 in6_multihead_lock_done();
 957
 958                 if (inm != NULL) {
 959                         IN6M_LOCK(inm);
 960                         MLD_PRINTF(("%s: process v1 query %s on "
 961                             "ifp 0x%llx(%s)\n", __func__,
 962                             ip6_sprintf(&mld->mld_addr),
 963                             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 964                         mtp.cst = mld_v1_update_group(inm, timer);
 965                         IN6M_UNLOCK(inm);
 966                         IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
 967                 }
 968                 /* XXX Clear embedded scope ID as userland won't expect it. */
 969                 in6_clearscope(&mld->mld_addr);
 970         }
 971 done:
 972         mld_set_timeout(&mtp);
 973
 974         return err;
 975 }
 976
 977 /*
 978  * Update the report timer on a group in response to an MLDv1 query.
 979  *
 980  * If we are becoming the reporting member for this group, start the timer.
 981  * If we already are the reporting member for this group, and timer is
 982  * below the threshold, reset it.
 983  *
 984  * We may be updating the group for the first time since we switched
 985  * to MLDv2. If we are, then we must clear any recorded source lists,
 986  * and transition to REPORTING state; the group timer is overloaded
 987  * for group and group-source query responses.
 988  *
 989  * Unlike MLDv2, the delay per group should be jittered
 990  * to avoid bursts of MLDv1 reports.
 991  */
 992 static uint32_t
 993 mld_v1_update_group(struct in6_multi *inm, const int timer)
 994 {
 995         IN6M_LOCK_ASSERT_HELD(inm);
 996
 997         MLD_PRINTF(("%s: %s/%s timer=%d\n", __func__,
 998             ip6_sprintf(&inm->in6m_addr),
 999             if_name(inm->in6m_ifp), timer));
1000
1001         switch (inm->in6m_state) {
1002         case MLD_NOT_MEMBER:
1003         case MLD_SILENT_MEMBER:
1004                 break;
1005         case MLD_REPORTING_MEMBER:
1006                 if (inm->in6m_timer != 0 &&
1007                     inm->in6m_timer <= timer) {
1008                         MLD_PRINTF(("%s: REPORTING and timer running, "
1009                             "skipping.\n", __func__));
1010                         break;
1011                 }
1012         /* FALLTHROUGH */
1013         case MLD_SG_QUERY_PENDING_MEMBER:
1014         case MLD_G_QUERY_PENDING_MEMBER:
1015         case MLD_IDLE_MEMBER:
1016         case MLD_LAZY_MEMBER:
1017         case MLD_AWAKENING_MEMBER:
1018                 MLD_PRINTF(("%s: ->REPORTING\n", __func__));
1019                 inm->in6m_state = MLD_REPORTING_MEMBER;
1020                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1021                 break;
1022         case MLD_SLEEPING_MEMBER:
1023                 MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
1024                 inm->in6m_state = MLD_AWAKENING_MEMBER;
1025                 break;
1026         case MLD_LEAVING_MEMBER:
1027                 break;
1028         }
1029
1030         return inm->in6m_timer;
1031 }
1032
1033 /*
1034  * Process a received MLDv2 general, group-specific or
1035  * group-and-source-specific query.
1036  *
1037  * Assumes that the query header has been pulled up to sizeof(mldv2_query).
1038  *
1039  * Return 0 if successful, otherwise an appropriate error code is returned.
1040  */
1041 static int
1042 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
1043     struct mbuf *m, const int off, const int icmp6len)
1044 {
1045         struct mld_ifinfo       *mli;
1046         struct mldv2_query      *mld;
1047         struct in6_multi        *inm;
1048         uint32_t                 maxdelay, nsrc, qqi;
1049         int                      err = 0, is_general_query;
1050         uint16_t                 timer;
1051         uint8_t                  qrv;
1052         struct mld_tparams       mtp = { .qpt = 0, .it = 0, .cst = 0, .sct = 0 };
1053
1054         MLD_LOCK_ASSERT_NOTHELD();
1055
1056         is_general_query = 0;
1057
1058         if (!mld_v2enable) {
1059                 MLD_PRINTF(("%s: ignore v2 query %s on ifp 0x%llx(%s)\n",
1060                     __func__, ip6_sprintf(&ip6->ip6_src),
1061                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1062                 goto done;
1063         }
1064
1065         /*
1066          * RFC3810 Section 6.2: MLD queries must originate from
1067          * a router's link-local address.
1068          */
1069         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
1070                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1071                     __func__, ip6_sprintf(&ip6->ip6_src),
1072                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1073                 goto done;
1074         }
1075
1076         MLD_PRINTF(("%s: input v2 query on ifp 0x%llx(%s)\n", __func__,
1077             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1078
1079         mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
1080
1081         maxdelay = ntohs(mld->mld_maxdelay);    /* in 1/10ths of a second */
1082         if (maxdelay >= 32768) {
1083                 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
1084                     (MLD_MRC_EXP(maxdelay) + 3);
1085         }
1086         timer = maxdelay / MLD_TIMER_SCALE;
1087         if (timer == 0) {
1088                 timer = 1;
1089         }
1090
1091         qrv = MLD_QRV(mld->mld_misc);
1092         if (qrv < 2) {
1093                 MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1094                     qrv, MLD_RV_INIT));
1095                 qrv = MLD_RV_INIT;
1096         }
1097
1098         qqi = mld->mld_qqi;
1099         if (qqi >= 128) {
1100                 qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
1101                     (MLD_QQIC_EXP(mld->mld_qqi) + 3);
1102         }
1103
1104         nsrc = ntohs(mld->mld_numsrc);
1105         if (nsrc > MLD_MAX_GS_SOURCES) {
1106                 err = EMSGSIZE;
1107                 goto done;
1108         }
1109         if (icmp6len < sizeof(struct mldv2_query) +
1110             (nsrc * sizeof(struct in6_addr))) {
1111                 err = EMSGSIZE;
1112                 goto done;
1113         }
1114
1115         /*
1116          * Do further input validation upfront to avoid resetting timers
1117          * should we need to discard this query.
1118          */
1119         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
1120                 /*
1121                  * A general query with a source list has undefined
1122                  * behaviour; discard it.
1123                  */
1124                 if (nsrc > 0) {
1125                         err = EINVAL;
1126                         goto done;
1127                 }
1128                 is_general_query = 1;
1129         } else {
1130                 /*
1131                  * Embed scope ID of receiving interface in MLD query for
1132                  * lookup whilst we don't hold other locks (due to KAME
1133                  * locking lameness). We own this mbuf chain just now.
1134                  */
1135                 (void)in6_setscope(&mld->mld_addr, ifp, NULL);
1136         }
1137
1138         mli = MLD_IFINFO(ifp);
1139         VERIFY(mli != NULL);
1140
1141         MLI_LOCK(mli);
1142         /*
1143          * Discard the v2 query if we're in Compatibility Mode.
1144          * The RFC is pretty clear that hosts need to stay in MLDv1 mode
1145          * until the Old Version Querier Present timer expires.
1146          */
1147         if (mli->mli_version != MLD_VERSION_2) {
1148                 MLI_UNLOCK(mli);
1149                 goto done;
1150         }
1151
1152         mtp.qpt = mld_set_version(mli, MLD_VERSION_2);
1153         mli->mli_rv = qrv;
1154         mli->mli_qi = qqi;
1155         mli->mli_qri = MAX(timer, MLD_QRI_MIN);
1156
1157         MLD_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, mli->mli_rv,
1158             mli->mli_qi, mli->mli_qri));
1159
1160         if (is_general_query) {
1161                 /*
1162                  * MLDv2 General Query.
1163                  *
1164                  * Schedule a current-state report on this ifp for
1165                  * all groups, possibly containing source lists.
1166                  *
1167                  * If there is a pending General Query response
1168                  * scheduled earlier than the selected delay, do
1169                  * not schedule any other reports.
1170                  * Otherwise, reset the interface timer.
1171                  */
1172                 MLD_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1173                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1174                 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1175                         mtp.it = mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
1176                 }
1177                 MLI_UNLOCK(mli);
1178         } else {
1179                 MLI_UNLOCK(mli);
1180                 /*
1181                  * MLDv2 Group-specific or Group-and-source-specific Query.
1182                  *
1183                  * Group-source-specific queries are throttled on
1184                  * a per-group basis to defeat denial-of-service attempts.
1185                  * Queries for groups we are not a member of on this
1186                  * link are simply ignored.
1187                  */
1188                 in6_multihead_lock_shared();
1189                 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1190                 in6_multihead_lock_done();
1191                 if (inm == NULL) {
1192                         goto done;
1193                 }
1194
1195                 IN6M_LOCK(inm);
1196                 if (nsrc > 0) {
1197                         if (!ratecheck(&inm->in6m_lastgsrtv,
1198                             &mld_gsrdelay)) {
1199                                 MLD_PRINTF(("%s: GS query throttled.\n",
1200                                     __func__));
1201                                 IN6M_UNLOCK(inm);
1202                                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1203                                 goto done;
1204                         }
1205                 }
1206                 MLD_PRINTF(("%s: process v2 group query on ifp 0x%llx(%s)\n",
1207                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1208                 /*
1209                  * If there is a pending General Query response
1210                  * scheduled sooner than the selected delay, no
1211                  * further report need be scheduled.
1212                  * Otherwise, prepare to respond to the
1213                  * group-specific or group-and-source query.
1214                  */
1215                 MLI_LOCK(mli);
1216                 mtp.it = mli->mli_v2_timer;
1217                 MLI_UNLOCK(mli);
1218                 if (mtp.it == 0 || mtp.it >= timer) {
1219                         (void) mld_v2_process_group_query(inm, timer, m, off);
1220                         mtp.cst = inm->in6m_timer;
1221                 }
1222                 IN6M_UNLOCK(inm);
1223                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1224                 /* XXX Clear embedded scope ID as userland won't expect it. */
1225                 in6_clearscope(&mld->mld_addr);
1226         }
1227 done:
1228         if (mtp.it > 0) {
1229                 MLD_PRINTF(("%s: v2 general query response scheduled in "
1230                     "T+%d seconds on ifp 0x%llx(%s)\n", __func__, mtp.it,
1231                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1232         }
1233         mld_set_timeout(&mtp);
1234
1235         return err;
1236 }
1237
1238 /*
1239  * Process a recieved MLDv2 group-specific or group-and-source-specific
1240  * query.
1241  * Return <0 if any error occured. Currently this is ignored.
1242  */
1243 static int
1244 mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
1245     const int off)
1246 {
1247         struct mldv2_query      *mld;
1248         int                      retval;
1249         uint16_t                 nsrc;
1250
1251         IN6M_LOCK_ASSERT_HELD(inm);
1252
1253         retval = 0;
1254         mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
1255
1256         switch (inm->in6m_state) {
1257         case MLD_NOT_MEMBER:
1258         case MLD_SILENT_MEMBER:
1259         case MLD_SLEEPING_MEMBER:
1260         case MLD_LAZY_MEMBER:
1261         case MLD_AWAKENING_MEMBER:
1262         case MLD_IDLE_MEMBER:
1263         case MLD_LEAVING_MEMBER:
1264                 return retval;
1265         case MLD_REPORTING_MEMBER:
1266         case MLD_G_QUERY_PENDING_MEMBER:
1267         case MLD_SG_QUERY_PENDING_MEMBER:
1268                 break;
1269         }
1270
1271         nsrc = ntohs(mld->mld_numsrc);
1272
1273         /*
1274          * Deal with group-specific queries upfront.
1275          * If any group query is already pending, purge any recorded
1276          * source-list state if it exists, and schedule a query response
1277          * for this group-specific query.
1278          */
1279         if (nsrc == 0) {
1280                 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1281                     inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1282                         in6m_clear_recorded(inm);
1283                         timer = min(inm->in6m_timer, timer);
1284                 }
1285                 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1286                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1287                 return retval;
1288         }
1289
1290         /*
1291          * Deal with the case where a group-and-source-specific query has
1292          * been received but a group-specific query is already pending.
1293          */
1294         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1295                 timer = min(inm->in6m_timer, timer);
1296                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1297                 return retval;
1298         }
1299
1300         /*
1301          * Finally, deal with the case where a group-and-source-specific
1302          * query has been received, where a response to a previous g-s-r
1303          * query exists, or none exists.
1304          * In this case, we need to parse the source-list which the Querier
1305          * has provided us with and check if we have any source list filter
1306          * entries at T1 for these sources. If we do not, there is no need
1307          * schedule a report and the query may be dropped.
1308          * If we do, we must record them and schedule a current-state
1309          * report for those sources.
1310          */
1311         if (inm->in6m_nsrc > 0) {
1312                 struct mbuf             *m;
1313                 uint8_t                 *sp;
1314                 int                      i, nrecorded;
1315                 int                      soff;
1316
1317                 m = m0;
1318                 soff = off + sizeof(struct mldv2_query);
1319                 nrecorded = 0;
1320                 for (i = 0; i < nsrc; i++) {
1321                         sp = mtod(m, uint8_t *) + soff;
1322                         retval = in6m_record_source(inm,
1323                             (const struct in6_addr *)(void *)sp);
1324                         if (retval < 0) {
1325                                 break;
1326                         }
1327                         nrecorded += retval;
1328                         soff += sizeof(struct in6_addr);
1329                         if (soff >= m->m_len) {
1330                                 soff = soff - m->m_len;
1331                                 m = m->m_next;
1332                                 if (m == NULL) {
1333                                         break;
1334                                 }
1335                         }
1336                 }
1337                 if (nrecorded > 0) {
1338                         MLD_PRINTF(("%s: schedule response to SG query\n",
1339                             __func__));
1340                         inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1341                         inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1342                 }
1343         }
1344
1345         return retval;
1346 }
1347
1348 /*
1349  * Process a received MLDv1 host membership report.
1350  * Assumes mld points to mld_hdr in pulled up mbuf chain.
1351  *
1352  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1353  * mld_addr. This is OK as we own the mbuf chain.
1354  */
1355 static int
1356 mld_v1_input_report(struct ifnet *ifp, struct mbuf *m,
1357     const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld)
1358 {
1359         struct in6_addr          src, dst;
1360         struct in6_ifaddr       *ia;
1361         struct in6_multi        *inm;
1362
1363         if (!mld_v1enable) {
1364                 MLD_PRINTF(("%s: ignore v1 report %s on ifp 0x%llx(%s)\n",
1365                     __func__, ip6_sprintf(&mld->mld_addr),
1366                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1367                 return 0;
1368         }
1369
1370         if ((ifp->if_flags & IFF_LOOPBACK) ||
1371             (m->m_pkthdr.pkt_flags & PKTF_LOOP)) {
1372                 return 0;
1373         }
1374
1375         /*
1376          * MLDv1 reports must originate from a host's link-local address,
1377          * or the unspecified address (when booting).
1378          */
1379         src = ip6->ip6_src;
1380         in6_clearscope(&src);
1381         if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1382                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1383                     __func__, ip6_sprintf(&ip6->ip6_src),
1384                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1385                 return EINVAL;
1386         }
1387
1388         /*
1389          * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1390          * group, and must be directed to the group itself.
1391          */
1392         dst = ip6->ip6_dst;
1393         in6_clearscope(&dst);
1394         if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1395             !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1396                 MLD_PRINTF(("%s: ignore v1 query dst %s on ifp 0x%llx(%s)\n",
1397                     __func__, ip6_sprintf(&ip6->ip6_dst),
1398                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1399                 return EINVAL;
1400         }
1401
1402         /*
1403          * Make sure we don't hear our own membership report, as fast
1404          * leave requires knowing that we are the only member of a
1405          * group. Assume we used the link-local address if available,
1406          * otherwise look for ::.
1407          *
1408          * XXX Note that scope ID comparison is needed for the address
1409          * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1410          * performed for the on-wire address.
1411          */
1412         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
1413         if (ia != NULL) {
1414                 IFA_LOCK(&ia->ia_ifa);
1415                 if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))) {
1416                         IFA_UNLOCK(&ia->ia_ifa);
1417                         IFA_REMREF(&ia->ia_ifa);
1418                         return 0;
1419                 }
1420                 IFA_UNLOCK(&ia->ia_ifa);
1421                 IFA_REMREF(&ia->ia_ifa);
1422         } else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1423                 return 0;
1424         }
1425
1426         MLD_PRINTF(("%s: process v1 report %s on ifp 0x%llx(%s)\n",
1427             __func__, ip6_sprintf(&mld->mld_addr),
1428             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1429
1430         /*
1431          * Embed scope ID of receiving interface in MLD query for lookup
1432          * whilst we don't hold other locks (due to KAME locking lameness).
1433          */
1434         if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
1435                 (void)in6_setscope(&mld->mld_addr, ifp, NULL);
1436         }
1437
1438         /*
1439          * MLDv1 report suppression.
1440          * If we are a member of this group, and our membership should be
1441          * reported, and our group timer is pending or about to be reset,
1442          * stop our group timer by transitioning to the 'lazy' state.
1443          */
1444         in6_multihead_lock_shared();
1445         IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1446         in6_multihead_lock_done();
1447
1448         if (inm != NULL) {
1449                 struct mld_ifinfo *mli;
1450
1451                 IN6M_LOCK(inm);
1452                 mli = inm->in6m_mli;
1453                 VERIFY(mli != NULL);
1454
1455                 MLI_LOCK(mli);
1456                 /*
1457                  * If we are in MLDv2 host mode, do not allow the
1458                  * other host's MLDv1 report to suppress our reports.
1459                  */
1460                 if (mli->mli_version == MLD_VERSION_2) {
1461                         MLI_UNLOCK(mli);
1462                         IN6M_UNLOCK(inm);
1463                         IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1464                         goto out;
1465                 }
1466                 MLI_UNLOCK(mli);
1467
1468                 inm->in6m_timer = 0;
1469
1470                 switch (inm->in6m_state) {
1471                 case MLD_NOT_MEMBER:
1472                 case MLD_SILENT_MEMBER:
1473                 case MLD_SLEEPING_MEMBER:
1474                         break;
1475                 case MLD_REPORTING_MEMBER:
1476                 case MLD_IDLE_MEMBER:
1477                 case MLD_AWAKENING_MEMBER:
1478                         MLD_PRINTF(("%s: report suppressed for %s on "
1479                             "ifp 0x%llx(%s)\n", __func__,
1480                             ip6_sprintf(&mld->mld_addr),
1481                             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1482                 case MLD_LAZY_MEMBER:
1483                         inm->in6m_state = MLD_LAZY_MEMBER;
1484                         break;
1485                 case MLD_G_QUERY_PENDING_MEMBER:
1486                 case MLD_SG_QUERY_PENDING_MEMBER:
1487                 case MLD_LEAVING_MEMBER:
1488                         break;
1489                 }
1490                 IN6M_UNLOCK(inm);
1491                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1492         }
1493
1494 out:
1495         /* XXX Clear embedded scope ID as userland won't expect it. */
1496         in6_clearscope(&mld->mld_addr);
1497
1498         return 0;
1499 }
1500
1501 /*
1502  * MLD input path.
1503  *
1504  * Assume query messages which fit in a single ICMPv6 message header
1505  * have been pulled up.
1506  * Assume that userland will want to see the message, even if it
1507  * otherwise fails kernel input validation; do not free it.
1508  * Pullup may however free the mbuf chain m if it fails.
1509  *
1510  * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1511  */
1512 int
1513 mld_input(struct mbuf *m, int off, int icmp6len)
1514 {
1515         struct ifnet    *ifp;
1516         struct ip6_hdr  *ip6;
1517         struct mld_hdr  *mld;
1518         int              mldlen;
1519
1520         MLD_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1521             (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1522
1523         ifp = m->m_pkthdr.rcvif;
1524
1525         ip6 = mtod(m, struct ip6_hdr *);
1526
1527         /* Pullup to appropriate size. */
1528         mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1529         if (mld->mld_type == MLD_LISTENER_QUERY &&
1530             icmp6len >= sizeof(struct mldv2_query)) {
1531                 mldlen = sizeof(struct mldv2_query);
1532         } else {
1533                 mldlen = sizeof(struct mld_hdr);
1534         }
1535         // check if mldv2_query/mld_hdr fits in the first mbuf
1536         IP6_EXTHDR_CHECK(m, off, mldlen, return IPPROTO_DONE);
1537         IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1538         if (mld == NULL) {
1539                 icmp6stat.icp6s_badlen++;
1540                 return IPPROTO_DONE;
1541         }
1542
1543         /*
1544          * Userland needs to see all of this traffic for implementing
1545          * the endpoint discovery portion of multicast routing.
1546          */
1547         switch (mld->mld_type) {
1548         case MLD_LISTENER_QUERY:
1549                 icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1550                 if (icmp6len == sizeof(struct mld_hdr)) {
1551                         if (mld_v1_input_query(ifp, ip6, mld) != 0) {
1552                                 return 0;
1553                         }
1554                 } else if (icmp6len >= sizeof(struct mldv2_query)) {
1555                         if (mld_v2_input_query(ifp, ip6, m, off,
1556                             icmp6len) != 0) {
1557                                 return 0;
1558                         }
1559                 }
1560                 break;
1561         case MLD_LISTENER_REPORT:
1562                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1563                 if (mld_v1_input_report(ifp, m, ip6, mld) != 0) {
1564                         return 0;
1565                 }
1566                 break;
1567         case MLDV2_LISTENER_REPORT:
1568                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1569                 break;
1570         case MLD_LISTENER_DONE:
1571                 icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1572                 break;
1573         default:
1574                 break;
1575         }
1576
1577         return 0;
1578 }
1579
1580 /*
1581  * Schedule MLD timer based on various parameters; caller must ensure that
1582  * lock ordering is maintained as this routine acquires MLD global lock.
1583  */
1584 void
1585 mld_set_timeout(struct mld_tparams *mtp)
1586 {
1587         MLD_LOCK_ASSERT_NOTHELD();
1588         VERIFY(mtp != NULL);
1589
1590         if (mtp->qpt != 0 || mtp->it != 0 || mtp->cst != 0 || mtp->sct != 0) {
1591                 MLD_LOCK();
1592                 if (mtp->qpt != 0) {
1593                         querier_present_timers_running6 = 1;
1594                 }
1595                 if (mtp->it != 0) {
1596                         interface_timers_running6 = 1;
1597                 }
1598                 if (mtp->cst != 0) {
1599                         current_state_timers_running6 = 1;
1600                 }
1601                 if (mtp->sct != 0) {
1602                         state_change_timers_running6 = 1;
1603                 }
1604                 mld_sched_timeout();
1605                 MLD_UNLOCK();
1606         }
1607 }
1608
1609 /*
1610  * MLD6 timer handler (per 1 second).
1611  */
1612 static void
1613 mld_timeout(void *arg)
1614 {
1615 #pragma unused(arg)
1616         struct ifqueue           scq;   /* State-change packets */
1617         struct ifqueue           qrq;   /* Query response packets */
1618         struct ifnet            *ifp;
1619         struct mld_ifinfo       *mli;
1620         struct in6_multi        *inm;
1621         int                      uri_sec = 0;
1622         unsigned int genid = mld_mli_list_genid;
1623
1624         SLIST_HEAD(, in6_multi) in6m_dthead;
1625
1626         SLIST_INIT(&in6m_dthead);
1627
1628         /*
1629          * Update coarse-grained networking timestamp (in sec.); the idea
1630          * is to piggy-back on the timeout callout to update the counter
1631          * returnable via net_uptime().
1632          */
1633         net_update_uptime();
1634
1635         MLD_LOCK();
1636
1637         MLD_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d\n", __func__,
1638             querier_present_timers_running6, interface_timers_running6,
1639             current_state_timers_running6, state_change_timers_running6));
1640
1641         /*
1642          * MLDv1 querier present timer processing.
1643          */
1644         if (querier_present_timers_running6) {
1645                 querier_present_timers_running6 = 0;
1646                 LIST_FOREACH(mli, &mli_head, mli_link) {
1647                         MLI_LOCK(mli);
1648                         mld_v1_process_querier_timers(mli);
1649                         if (mli->mli_v1_timer > 0) {
1650                                 querier_present_timers_running6 = 1;
1651                         }
1652                         MLI_UNLOCK(mli);
1653                 }
1654         }
1655
1656         /*
1657          * MLDv2 General Query response timer processing.
1658          */
1659         if (interface_timers_running6) {
1660                 MLD_PRINTF(("%s: interface timers running\n", __func__));
1661                 interface_timers_running6 = 0;
1662                 mli = LIST_FIRST(&mli_head);
1663
1664                 while (mli != NULL) {
1665                         if (mli->mli_flags & MLIF_PROCESSED) {
1666                                 mli = LIST_NEXT(mli, mli_link);
1667                                 continue;
1668                         }
1669
1670                         MLI_LOCK(mli);
1671                         if (mli->mli_version != MLD_VERSION_2) {
1672                                 MLI_UNLOCK(mli);
1673                                 mli = LIST_NEXT(mli, mli_link);
1674                                 continue;
1675                         }
1676                         /*
1677                          * XXX The logic below ends up calling
1678                          * mld_dispatch_packet which can unlock mli
1679                          * and the global MLD lock.
1680                          * Therefore grab a reference on MLI and also
1681                          * check for generation count to see if we should
1682                          * iterate the list again.
1683                          */
1684                         MLI_ADDREF_LOCKED(mli);
1685
1686                         if (mli->mli_v2_timer == 0) {
1687                                 /* Do nothing. */
1688                         } else if (--mli->mli_v2_timer == 0) {
1689                                 if (mld_v2_dispatch_general_query(mli) > 0) {
1690                                         interface_timers_running6 = 1;
1691                                 }
1692                         } else {
1693                                 interface_timers_running6 = 1;
1694                         }
1695                         mli->mli_flags |= MLIF_PROCESSED;
1696                         MLI_UNLOCK(mli);
1697                         MLI_REMREF(mli);
1698
1699                         if (genid != mld_mli_list_genid) {
1700                                 MLD_PRINTF(("%s: MLD information list changed "
1701                                     "in the middle of iteration! Restart iteration.\n",
1702                                     __func__));
1703                                 mli = LIST_FIRST(&mli_head);
1704                                 genid = mld_mli_list_genid;
1705                         } else {
1706                                 mli = LIST_NEXT(mli, mli_link);
1707                         }
1708                 }
1709
1710                 LIST_FOREACH(mli, &mli_head, mli_link)
1711                 mli->mli_flags &= ~MLIF_PROCESSED;
1712         }
1713
1714
1715
1716         if (!current_state_timers_running6 &&
1717             !state_change_timers_running6) {
1718                 goto out_locked;
1719         }
1720
1721         current_state_timers_running6 = 0;
1722         state_change_timers_running6 = 0;
1723
1724         MLD_PRINTF(("%s: state change timers running\n", __func__));
1725
1726         memset(&qrq, 0, sizeof(struct ifqueue));
1727         qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
1728
1729         memset(&scq, 0, sizeof(struct ifqueue));
1730         scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
1731
1732         /*
1733          * MLD host report and state-change timer processing.
1734          * Note: Processing a v2 group timer may remove a node.
1735          */
1736         mli = LIST_FIRST(&mli_head);
1737
1738         while (mli != NULL) {
1739                 struct in6_multistep step;
1740
1741                 if (mli->mli_flags & MLIF_PROCESSED) {
1742                         mli = LIST_NEXT(mli, mli_link);
1743                         continue;
1744                 }
1745
1746                 MLI_LOCK(mli);
1747                 ifp = mli->mli_ifp;
1748                 uri_sec = MLD_RANDOM_DELAY(mli->mli_uri);
1749                 MLI_UNLOCK(mli);
1750
1751                 in6_multihead_lock_shared();
1752                 IN6_FIRST_MULTI(step, inm);
1753                 while (inm != NULL) {
1754                         IN6M_LOCK(inm);
1755                         if (inm->in6m_ifp != ifp) {
1756                                 goto next;
1757                         }
1758
1759                         MLI_LOCK(mli);
1760                         switch (mli->mli_version) {
1761                         case MLD_VERSION_1:
1762                                 mld_v1_process_group_timer(inm,
1763                                     mli->mli_version);
1764                                 break;
1765                         case MLD_VERSION_2:
1766                                 mld_v2_process_group_timers(mli, &qrq,
1767                                     &scq, inm, uri_sec);
1768                                 break;
1769                         }
1770                         MLI_UNLOCK(mli);
1771 next:
1772                         IN6M_UNLOCK(inm);
1773                         IN6_NEXT_MULTI(step, inm);
1774                 }
1775                 in6_multihead_lock_done();
1776
1777                 /*
1778                  * XXX The logic below ends up calling
1779                  * mld_dispatch_packet which can unlock mli
1780                  * and the global MLD lock.
1781                  * Therefore grab a reference on MLI and also
1782                  * check for generation count to see if we should
1783                  * iterate the list again.
1784                  */
1785                 MLI_LOCK(mli);
1786                 MLI_ADDREF_LOCKED(mli);
1787                 if (mli->mli_version == MLD_VERSION_1) {
1788                         mld_dispatch_queue_locked(mli, &mli->mli_v1q, 0);
1789                 } else if (mli->mli_version == MLD_VERSION_2) {
1790                         MLI_UNLOCK(mli);
1791                         mld_dispatch_queue_locked(NULL, &qrq, 0);
1792                         mld_dispatch_queue_locked(NULL, &scq, 0);
1793                         VERIFY(qrq.ifq_len == 0);
1794                         VERIFY(scq.ifq_len == 0);
1795                         MLI_LOCK(mli);
1796                 }
1797                 /*
1798                  * In case there are still any pending membership reports
1799                  * which didn't get drained at version change time.
1800                  */
1801                 IF_DRAIN(&mli->mli_v1q);
1802                 /*
1803                  * Release all deferred inm records, and drain any locally
1804                  * enqueued packets; do it even if the current MLD version
1805                  * for the link is no longer MLDv2, in order to handle the
1806                  * version change case.
1807                  */
1808                 mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
1809                 VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
1810                 mli->mli_flags |= MLIF_PROCESSED;
1811                 MLI_UNLOCK(mli);
1812                 MLI_REMREF(mli);
1813
1814                 IF_DRAIN(&qrq);
1815                 IF_DRAIN(&scq);
1816
1817                 if (genid != mld_mli_list_genid) {
1818                         MLD_PRINTF(("%s: MLD information list changed "
1819                             "in the middle of iteration! Restart iteration.\n",
1820                             __func__));
1821                         mli = LIST_FIRST(&mli_head);
1822                         genid = mld_mli_list_genid;
1823                 } else {
1824                         mli = LIST_NEXT(mli, mli_link);
1825                 }
1826         }
1827
1828         LIST_FOREACH(mli, &mli_head, mli_link)
1829         mli->mli_flags &= ~MLIF_PROCESSED;
1830
1831 out_locked:
1832         /* re-arm the timer if there's work to do */
1833         mld_timeout_run = 0;
1834         mld_sched_timeout();
1835         MLD_UNLOCK();
1836
1837         /* Now that we're dropped all locks, release detached records */
1838         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
1839 }
1840
1841 static void
1842 mld_sched_timeout(void)
1843 {
1844         MLD_LOCK_ASSERT_HELD();
1845
1846         if (!mld_timeout_run &&
1847             (querier_present_timers_running6 || current_state_timers_running6 ||
1848             interface_timers_running6 || state_change_timers_running6)) {
1849                 mld_timeout_run = 1;
1850                 timeout(mld_timeout, NULL, hz);
1851         }
1852 }
1853
1854 /*
1855  * Free the in6_multi reference(s) for this MLD lifecycle.
1856  *
1857  * Caller must be holding mli_lock.
1858  */
1859 static void
1860 mld_flush_relq(struct mld_ifinfo *mli, struct mld_in6m_relhead *in6m_dthead)
1861 {
1862         struct in6_multi *inm;
1863
1864 again:
1865         MLI_LOCK_ASSERT_HELD(mli);
1866         inm = SLIST_FIRST(&mli->mli_relinmhead);
1867         if (inm != NULL) {
1868                 int lastref;
1869
1870                 SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
1871                 MLI_UNLOCK(mli);
1872
1873                 in6_multihead_lock_exclusive();
1874                 IN6M_LOCK(inm);
1875                 VERIFY(inm->in6m_nrelecnt != 0);
1876                 inm->in6m_nrelecnt--;
1877                 lastref = in6_multi_detach(inm);
1878                 VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1879                     inm->in6m_reqcnt == 0));
1880                 IN6M_UNLOCK(inm);
1881                 in6_multihead_lock_done();
1882                 /* from mli_relinmhead */
1883                 IN6M_REMREF(inm);
1884                 /* from in6_multihead_list */
1885                 if (lastref) {
1886                         /*
1887                          * Defer releasing our final reference, as we
1888                          * are holding the MLD lock at this point, and
1889                          * we could end up with locking issues later on
1890                          * (while issuing SIOCDELMULTI) when this is the
1891                          * final reference count.  Let the caller do it
1892                          * when it is safe.
1893                          */
1894                         MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
1895                 }
1896                 MLI_LOCK(mli);
1897                 goto again;
1898         }
1899 }
1900
1901 /*
1902  * Update host report group timer.
1903  * Will update the global pending timer flags.
1904  */
1905 static void
1906 mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
1907 {
1908 #pragma unused(mld_version)
1909         int report_timer_expired;
1910
1911         MLD_LOCK_ASSERT_HELD();
1912         IN6M_LOCK_ASSERT_HELD(inm);
1913         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1914
1915         if (inm->in6m_timer == 0) {
1916                 report_timer_expired = 0;
1917         } else if (--inm->in6m_timer == 0) {
1918                 report_timer_expired = 1;
1919         } else {
1920                 current_state_timers_running6 = 1;
1921                 /* caller will schedule timer */
1922                 return;
1923         }
1924
1925         switch (inm->in6m_state) {
1926         case MLD_NOT_MEMBER:
1927         case MLD_SILENT_MEMBER:
1928         case MLD_IDLE_MEMBER:
1929         case MLD_LAZY_MEMBER:
1930         case MLD_SLEEPING_MEMBER:
1931         case MLD_AWAKENING_MEMBER:
1932                 break;
1933         case MLD_REPORTING_MEMBER:
1934                 if (report_timer_expired) {
1935                         inm->in6m_state = MLD_IDLE_MEMBER;
1936                         (void) mld_v1_transmit_report(inm,
1937                             MLD_LISTENER_REPORT);
1938                         IN6M_LOCK_ASSERT_HELD(inm);
1939                         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1940                 }
1941                 break;
1942         case MLD_G_QUERY_PENDING_MEMBER:
1943         case MLD_SG_QUERY_PENDING_MEMBER:
1944         case MLD_LEAVING_MEMBER:
1945                 break;
1946         }
1947 }
1948
1949 /*
1950  * Update a group's timers for MLDv2.
1951  * Will update the global pending timer flags.
1952  * Note: Unlocked read from mli.
1953  */
1954 static void
1955 mld_v2_process_group_timers(struct mld_ifinfo *mli,
1956     struct ifqueue *qrq, struct ifqueue *scq,
1957     struct in6_multi *inm, const int uri_sec)
1958 {
1959         int query_response_timer_expired;
1960         int state_change_retransmit_timer_expired;
1961
1962         MLD_LOCK_ASSERT_HELD();
1963         IN6M_LOCK_ASSERT_HELD(inm);
1964         MLI_LOCK_ASSERT_HELD(mli);
1965         VERIFY(mli == inm->in6m_mli);
1966
1967         query_response_timer_expired = 0;
1968         state_change_retransmit_timer_expired = 0;
1969
1970         /*
1971          * During a transition from compatibility mode back to MLDv2,
1972          * a group record in REPORTING state may still have its group
1973          * timer active. This is a no-op in this function; it is easier
1974          * to deal with it here than to complicate the timeout path.
1975          */
1976         if (inm->in6m_timer == 0) {
1977                 query_response_timer_expired = 0;
1978         } else if (--inm->in6m_timer == 0) {
1979                 query_response_timer_expired = 1;
1980         } else {
1981                 current_state_timers_running6 = 1;
1982                 /* caller will schedule timer */
1983         }
1984
1985         if (inm->in6m_sctimer == 0) {
1986                 state_change_retransmit_timer_expired = 0;
1987         } else if (--inm->in6m_sctimer == 0) {
1988                 state_change_retransmit_timer_expired = 1;
1989         } else {
1990                 state_change_timers_running6 = 1;
1991                 /* caller will schedule timer */
1992         }
1993
1994         /* We are in timer callback, so be quick about it. */
1995         if (!state_change_retransmit_timer_expired &&
1996             !query_response_timer_expired) {
1997                 return;
1998         }
1999
2000         switch (inm->in6m_state) {
2001         case MLD_NOT_MEMBER:
2002         case MLD_SILENT_MEMBER:
2003         case MLD_SLEEPING_MEMBER:
2004         case MLD_LAZY_MEMBER:
2005         case MLD_AWAKENING_MEMBER:
2006         case MLD_IDLE_MEMBER:
2007                 break;
2008         case MLD_G_QUERY_PENDING_MEMBER:
2009         case MLD_SG_QUERY_PENDING_MEMBER:
2010                 /*
2011                  * Respond to a previously pending Group-Specific
2012                  * or Group-and-Source-Specific query by enqueueing
2013                  * the appropriate Current-State report for
2014                  * immediate transmission.
2015                  */
2016                 if (query_response_timer_expired) {
2017                         int retval;
2018
2019                         retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
2020                             (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
2021                             0);
2022                         MLD_PRINTF(("%s: enqueue record = %d\n",
2023                             __func__, retval));
2024                         inm->in6m_state = MLD_REPORTING_MEMBER;
2025                         in6m_clear_recorded(inm);
2026                 }
2027         /* FALLTHROUGH */
2028         case MLD_REPORTING_MEMBER:
2029         case MLD_LEAVING_MEMBER:
2030                 if (state_change_retransmit_timer_expired) {
2031                         /*
2032                          * State-change retransmission timer fired.
2033                          * If there are any further pending retransmissions,
2034                          * set the global pending state-change flag, and
2035                          * reset the timer.
2036                          */
2037                         if (--inm->in6m_scrv > 0) {
2038                                 inm->in6m_sctimer = uri_sec;
2039                                 state_change_timers_running6 = 1;
2040                                 /* caller will schedule timer */
2041                         }
2042                         /*
2043                          * Retransmit the previously computed state-change
2044                          * report. If there are no further pending
2045                          * retransmissions, the mbuf queue will be consumed.
2046                          * Update T0 state to T1 as we have now sent
2047                          * a state-change.
2048                          */
2049                         (void) mld_v2_merge_state_changes(inm, scq);
2050
2051                         in6m_commit(inm);
2052                         MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2053                             ip6_sprintf(&inm->in6m_addr),
2054                             if_name(inm->in6m_ifp)));
2055
2056                         /*
2057                          * If we are leaving the group for good, make sure
2058                          * we release MLD's reference to it.
2059                          * This release must be deferred using a SLIST,
2060                          * as we are called from a loop which traverses
2061                          * the in_ifmultiaddr TAILQ.
2062                          */
2063                         if (inm->in6m_state == MLD_LEAVING_MEMBER &&
2064                             inm->in6m_scrv == 0) {
2065                                 inm->in6m_state = MLD_NOT_MEMBER;
2066                                 /*
2067                                  * A reference has already been held in
2068                                  * mld_final_leave() for this inm, so
2069                                  * no need to hold another one.  We also
2070                                  * bumped up its request count then, so
2071                                  * that it stays in in6_multihead.  Both
2072                                  * of them will be released when it is
2073                                  * dequeued later on.
2074                                  */
2075                                 VERIFY(inm->in6m_nrelecnt != 0);
2076                                 SLIST_INSERT_HEAD(&mli->mli_relinmhead,
2077                                     inm, in6m_nrele);
2078                         }
2079                 }
2080                 break;
2081         }
2082 }
2083
2084 /*
2085  * Switch to a different version on the given interface,
2086  * as per Section 9.12.
2087  */
2088 static uint32_t
2089 mld_set_version(struct mld_ifinfo *mli, const int mld_version)
2090 {
2091         int old_version_timer;
2092
2093         MLI_LOCK_ASSERT_HELD(mli);
2094
2095         MLD_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
2096             mld_version, (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
2097             if_name(mli->mli_ifp)));
2098
2099         if (mld_version == MLD_VERSION_1) {
2100                 /*
2101                  * Compute the "Older Version Querier Present" timer as per
2102                  * Section 9.12, in seconds.
2103                  */
2104                 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
2105                 mli->mli_v1_timer = old_version_timer;
2106         }
2107
2108         if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
2109                 mli->mli_version = MLD_VERSION_1;
2110                 mld_v2_cancel_link_timers(mli);
2111         }
2112
2113         MLI_LOCK_ASSERT_HELD(mli);
2114
2115         return mli->mli_v1_timer;
2116 }
2117
2118 /*
2119  * Cancel pending MLDv2 timers for the given link and all groups
2120  * joined on it; state-change, general-query, and group-query timers.
2121  *
2122  * Only ever called on a transition from v2 to Compatibility mode. Kill
2123  * the timers stone dead (this may be expensive for large N groups), they
2124  * will be restarted if Compatibility Mode deems that they must be due to
2125  * query processing.
2126  */
2127 static void
2128 mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
2129 {
2130         struct ifnet            *ifp;
2131         struct in6_multi        *inm;
2132         struct in6_multistep    step;
2133
2134         MLI_LOCK_ASSERT_HELD(mli);
2135
2136         MLD_PRINTF(("%s: cancel v2 timers on ifp 0x%llx(%s)\n", __func__,
2137             (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), if_name(mli->mli_ifp)));
2138
2139         /*
2140          * Stop the v2 General Query Response on this link stone dead.
2141          * If timer is woken up due to interface_timers_running6,
2142          * the flag will be cleared if there are no pending link timers.
2143          */
2144         mli->mli_v2_timer = 0;
2145
2146         /*
2147          * Now clear the current-state and state-change report timers
2148          * for all memberships scoped to this link.
2149          */
2150         ifp = mli->mli_ifp;
2151         MLI_UNLOCK(mli);
2152
2153         in6_multihead_lock_shared();
2154         IN6_FIRST_MULTI(step, inm);
2155         while (inm != NULL) {
2156                 IN6M_LOCK(inm);
2157                 if (inm->in6m_ifp != ifp) {
2158                         goto next;
2159                 }
2160
2161                 switch (inm->in6m_state) {
2162                 case MLD_NOT_MEMBER:
2163                 case MLD_SILENT_MEMBER:
2164                 case MLD_IDLE_MEMBER:
2165                 case MLD_LAZY_MEMBER:
2166                 case MLD_SLEEPING_MEMBER:
2167                 case MLD_AWAKENING_MEMBER:
2168                         /*
2169                          * These states are either not relevant in v2 mode,
2170                          * or are unreported. Do nothing.
2171                          */
2172                         break;
2173                 case MLD_LEAVING_MEMBER:
2174                         /*
2175                          * If we are leaving the group and switching
2176                          * version, we need to release the final
2177                          * reference held for issuing the INCLUDE {}.
2178                          * During mld_final_leave(), we bumped up both the
2179                          * request and reference counts.  Since we cannot
2180                          * call in6_multi_detach() here, defer this task to
2181                          * the timer routine.
2182                          */
2183                         VERIFY(inm->in6m_nrelecnt != 0);
2184                         MLI_LOCK(mli);
2185                         SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2186                             in6m_nrele);
2187                         MLI_UNLOCK(mli);
2188                 /* FALLTHROUGH */
2189                 case MLD_G_QUERY_PENDING_MEMBER:
2190                 case MLD_SG_QUERY_PENDING_MEMBER:
2191                         in6m_clear_recorded(inm);
2192                 /* FALLTHROUGH */
2193                 case MLD_REPORTING_MEMBER:
2194                         inm->in6m_state = MLD_REPORTING_MEMBER;
2195                         break;
2196                 }
2197                 /*
2198                  * Always clear state-change and group report timers.
2199                  * Free any pending MLDv2 state-change records.
2200                  */
2201                 inm->in6m_sctimer = 0;
2202                 inm->in6m_timer = 0;
2203                 IF_DRAIN(&inm->in6m_scq);
2204 next:
2205                 IN6M_UNLOCK(inm);
2206                 IN6_NEXT_MULTI(step, inm);
2207         }
2208         in6_multihead_lock_done();
2209
2210         MLI_LOCK(mli);
2211 }
2212
2213 /*
2214  * Update the Older Version Querier Present timers for a link.
2215  * See Section 9.12 of RFC 3810.
2216  */
2217 static void
2218 mld_v1_process_querier_timers(struct mld_ifinfo *mli)
2219 {
2220         MLI_LOCK_ASSERT_HELD(mli);
2221
2222         if (mld_v2enable && mli->mli_version != MLD_VERSION_2 &&
2223             --mli->mli_v1_timer == 0) {
2224                 /*
2225                  * MLDv1 Querier Present timer expired; revert to MLDv2.
2226                  */
2227                 MLD_PRINTF(("%s: transition from v%d -> v%d on 0x%llx(%s)\n",
2228                     __func__, mli->mli_version, MLD_VERSION_2,
2229                     (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
2230                     if_name(mli->mli_ifp)));
2231                 mli->mli_version = MLD_VERSION_2;
2232         }
2233 }
2234
2235 /*
2236  * Transmit an MLDv1 report immediately.
2237  */
2238 static int
2239 mld_v1_transmit_report(struct in6_multi *in6m, const int type)
2240 {
2241         struct ifnet            *ifp;
2242         struct in6_ifaddr       *ia;
2243         struct ip6_hdr          *ip6;
2244         struct mbuf             *mh, *md;
2245         struct mld_hdr          *mld;
2246         int                     error = 0;
2247
2248         IN6M_LOCK_ASSERT_HELD(in6m);
2249         MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
2250
2251         ifp = in6m->in6m_ifp;
2252         /* ia may be NULL if link-local address is tentative. */
2253         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
2254
2255         MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2256         if (mh == NULL) {
2257                 if (ia != NULL) {
2258                         IFA_REMREF(&ia->ia_ifa);
2259                 }
2260                 return ENOMEM;
2261         }
2262         MGET(md, M_DONTWAIT, MT_DATA);
2263         if (md == NULL) {
2264                 m_free(mh);
2265                 if (ia != NULL) {
2266                         IFA_REMREF(&ia->ia_ifa);
2267                 }
2268                 return ENOMEM;
2269         }
2270         mh->m_next = md;
2271
2272         /*
2273          * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
2274          * that ether_output() does not need to allocate another mbuf
2275          * for the header in the most common case.
2276          */
2277         MH_ALIGN(mh, sizeof(struct ip6_hdr));
2278         mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
2279         mh->m_len = sizeof(struct ip6_hdr);
2280
2281         ip6 = mtod(mh, struct ip6_hdr *);
2282         ip6->ip6_flow = 0;
2283         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2284         ip6->ip6_vfc |= IPV6_VERSION;
2285         ip6->ip6_nxt = IPPROTO_ICMPV6;
2286         if (ia != NULL) {
2287                 IFA_LOCK(&ia->ia_ifa);
2288         }
2289         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
2290         if (ia != NULL) {
2291                 IFA_UNLOCK(&ia->ia_ifa);
2292                 IFA_REMREF(&ia->ia_ifa);
2293                 ia = NULL;
2294         }
2295         ip6->ip6_dst = in6m->in6m_addr;
2296
2297         md->m_len = sizeof(struct mld_hdr);
2298         mld = mtod(md, struct mld_hdr *);
2299         mld->mld_type = type;
2300         mld->mld_code = 0;
2301         mld->mld_cksum = 0;
2302         mld->mld_maxdelay = 0;
2303         mld->mld_reserved = 0;
2304         mld->mld_addr = in6m->in6m_addr;
2305         in6_clearscope(&mld->mld_addr);
2306         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
2307             sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
2308
2309         mld_save_context(mh, ifp);
2310         mh->m_flags |= M_MLDV1;
2311
2312         /*
2313          * Due to the fact that at this point we are possibly holding
2314          * in6_multihead_lock in shared or exclusive mode, we can't call
2315          * mld_dispatch_packet() here since that will eventually call
2316          * ip6_output(), which will try to lock in6_multihead_lock and cause
2317          * a deadlock.
2318          * Instead we defer the work to the mld_timeout() thread, thus
2319          * avoiding unlocking in_multihead_lock here.
2320          */
2321         if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
2322                 MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
2323                 error = ENOMEM;
2324                 m_freem(mh);
2325         } else {
2326                 IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
2327                 VERIFY(error == 0);
2328         }
2329
2330         return error;
2331 }
2332
2333 /*
2334  * Process a state change from the upper layer for the given IPv6 group.
2335  *
2336  * Each socket holds a reference on the in6_multi in its own ip_moptions.
2337  * The socket layer will have made the necessary updates to.the group
2338  * state, it is now up to MLD to issue a state change report if there
2339  * has been any change between T0 (when the last state-change was issued)
2340  * and T1 (now).
2341  *
2342  * We use the MLDv2 state machine at group level. The MLd module
2343  * however makes the decision as to which MLD protocol version to speak.
2344  * A state change *from* INCLUDE {} always means an initial join.
2345  * A state change *to* INCLUDE {} always means a final leave.
2346  *
2347  * If delay is non-zero, and the state change is an initial multicast
2348  * join, the state change report will be delayed by 'delay' ticks
2349  * in units of seconds if MLDv1 is active on the link; otherwise
2350  * the initial MLDv2 state change report will be delayed by whichever
2351  * is sooner, a pending state-change timer or delay itself.
2352  */
2353 int
2354 mld_change_state(struct in6_multi *inm, struct mld_tparams *mtp,
2355     const int delay)
2356 {
2357         struct mld_ifinfo *mli;
2358         struct ifnet *ifp;
2359         int error = 0;
2360
2361         VERIFY(mtp != NULL);
2362         bzero(mtp, sizeof(*mtp));
2363
2364         IN6M_LOCK_ASSERT_HELD(inm);
2365         VERIFY(inm->in6m_mli != NULL);
2366         MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
2367
2368         /*
2369          * Try to detect if the upper layer just asked us to change state
2370          * for an interface which has now gone away.
2371          */
2372         VERIFY(inm->in6m_ifma != NULL);
2373         ifp = inm->in6m_ifma->ifma_ifp;
2374         /*
2375          * Sanity check that netinet6's notion of ifp is the same as net's.
2376          */
2377         VERIFY(inm->in6m_ifp == ifp);
2378
2379         mli = MLD_IFINFO(ifp);
2380         VERIFY(mli != NULL);
2381
2382         /*
2383          * If we detect a state transition to or from MCAST_UNDEFINED
2384          * for this group, then we are starting or finishing an MLD
2385          * life cycle for this group.
2386          */
2387         if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
2388                 MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2389                     inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
2390                 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
2391                         MLD_PRINTF(("%s: initial join\n", __func__));
2392                         error = mld_initial_join(inm, mli, mtp, delay);
2393                         goto out;
2394                 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
2395                         MLD_PRINTF(("%s: final leave\n", __func__));
2396                         mld_final_leave(inm, mli, mtp);
2397                         goto out;
2398                 }
2399         } else {
2400                 MLD_PRINTF(("%s: filter set change\n", __func__));
2401         }
2402
2403         error = mld_handle_state_change(inm, mli, mtp);
2404 out:
2405         return error;
2406 }
2407
2408 /*
2409  * Perform the initial join for an MLD group.
2410  *
2411  * When joining a group:
2412  *  If the group should have its MLD traffic suppressed, do nothing.
2413  *  MLDv1 starts sending MLDv1 host membership reports.
2414  *  MLDv2 will schedule an MLDv2 state-change report containing the
2415  *  initial state of the membership.
2416  *
2417  * If the delay argument is non-zero, then we must delay sending the
2418  * initial state change for delay ticks (in units of seconds).
2419  */
2420 static int
2421 mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
2422     struct mld_tparams *mtp, const int delay)
2423 {
2424         struct ifnet            *ifp;
2425         struct ifqueue          *ifq;
2426         int                      error, retval, syncstates;
2427         int                      odelay;
2428
2429         IN6M_LOCK_ASSERT_HELD(inm);
2430         MLI_LOCK_ASSERT_NOTHELD(mli);
2431         VERIFY(mtp != NULL);
2432
2433         MLD_PRINTF(("%s: initial join %s on ifp 0x%llx(%s)\n",
2434             __func__, ip6_sprintf(&inm->in6m_addr),
2435             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2436             if_name(inm->in6m_ifp)));
2437
2438         error = 0;
2439         syncstates = 1;
2440
2441         ifp = inm->in6m_ifp;
2442
2443         MLI_LOCK(mli);
2444         VERIFY(mli->mli_ifp == ifp);
2445
2446         /*
2447          * Avoid MLD if group is :
2448          * 1. Joined on loopback, OR
2449          * 2. On a link that is marked MLIF_SILENT
2450          * 3. rdar://problem/19227650 Is link local scoped and
2451          *    on cellular interface
2452          * 4. Is a type that should not be reported (node local
2453          *    or all node link local multicast.
2454          * All other groups enter the appropriate state machine
2455          * for the version in use on this link.
2456          */
2457         if ((ifp->if_flags & IFF_LOOPBACK) ||
2458             (mli->mli_flags & MLIF_SILENT) ||
2459             (IFNET_IS_CELLULAR(ifp) &&
2460             IN6_IS_ADDR_MC_LINKLOCAL(&inm->in6m_addr)) ||
2461             !mld_is_addr_reported(&inm->in6m_addr)) {
2462                 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2463                     __func__));
2464                 inm->in6m_state = MLD_SILENT_MEMBER;
2465                 inm->in6m_timer = 0;
2466         } else {
2467                 /*
2468                  * Deal with overlapping in6_multi lifecycle.
2469                  * If this group was LEAVING, then make sure
2470                  * we drop the reference we picked up to keep the
2471                  * group around for the final INCLUDE {} enqueue.
2472                  * Since we cannot call in6_multi_detach() here,
2473                  * defer this task to the timer routine.
2474                  */
2475                 if (mli->mli_version == MLD_VERSION_2 &&
2476                     inm->in6m_state == MLD_LEAVING_MEMBER) {
2477                         VERIFY(inm->in6m_nrelecnt != 0);
2478                         SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2479                             in6m_nrele);
2480                 }
2481
2482                 inm->in6m_state = MLD_REPORTING_MEMBER;
2483
2484                 switch (mli->mli_version) {
2485                 case MLD_VERSION_1:
2486                         /*
2487                          * If a delay was provided, only use it if
2488                          * it is greater than the delay normally
2489                          * used for an MLDv1 state change report,
2490                          * and delay sending the initial MLDv1 report
2491                          * by not transitioning to the IDLE state.
2492                          */
2493                         odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI);
2494                         if (delay) {
2495                                 inm->in6m_timer = max(delay, odelay);
2496                                 mtp->cst = 1;
2497                         } else {
2498                                 inm->in6m_state = MLD_IDLE_MEMBER;
2499                                 error = mld_v1_transmit_report(inm,
2500                                     MLD_LISTENER_REPORT);
2501
2502                                 IN6M_LOCK_ASSERT_HELD(inm);
2503                                 MLI_LOCK_ASSERT_HELD(mli);
2504
2505                                 if (error == 0) {
2506                                         inm->in6m_timer = odelay;
2507                                         mtp->cst = 1;
2508                                 }
2509                         }
2510                         break;
2511
2512                 case MLD_VERSION_2:
2513                         /*
2514                          * Defer update of T0 to T1, until the first copy
2515                          * of the state change has been transmitted.
2516                          */
2517                         syncstates = 0;
2518
2519                         /*
2520                          * Immediately enqueue a State-Change Report for
2521                          * this interface, freeing any previous reports.
2522                          * Don't kick the timers if there is nothing to do,
2523                          * or if an error occurred.
2524                          */
2525                         ifq = &inm->in6m_scq;
2526                         IF_DRAIN(ifq);
2527                         retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2528                             0, 0, (mli->mli_flags & MLIF_USEALLOW));
2529                         mtp->cst = (ifq->ifq_len > 0);
2530                         MLD_PRINTF(("%s: enqueue record = %d\n",
2531                             __func__, retval));
2532                         if (retval <= 0) {
2533                                 error = retval * -1;
2534                                 break;
2535                         }
2536
2537                         /*
2538                          * Schedule transmission of pending state-change
2539                          * report up to RV times for this link. The timer
2540                          * will fire at the next mld_timeout (1 second)),
2541                          * giving us an opportunity to merge the reports.
2542                          *
2543                          * If a delay was provided to this function, only
2544                          * use this delay if sooner than the existing one.
2545                          */
2546                         VERIFY(mli->mli_rv > 1);
2547                         inm->in6m_scrv = mli->mli_rv;
2548                         if (delay) {
2549                                 if (inm->in6m_sctimer > 1) {
2550                                         inm->in6m_sctimer =
2551                                             min(inm->in6m_sctimer, delay);
2552                                 } else {
2553                                         inm->in6m_sctimer = delay;
2554                                 }
2555                         } else {
2556                                 inm->in6m_sctimer = 1;
2557                         }
2558                         mtp->sct = 1;
2559                         error = 0;
2560                         break;
2561                 }
2562         }
2563         MLI_UNLOCK(mli);
2564
2565         /*
2566          * Only update the T0 state if state change is atomic,
2567          * i.e. we don't need to wait for a timer to fire before we
2568          * can consider the state change to have been communicated.
2569          */
2570         if (syncstates) {
2571                 in6m_commit(inm);
2572                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2573                     ip6_sprintf(&inm->in6m_addr),
2574                     if_name(inm->in6m_ifp)));
2575         }
2576
2577         return error;
2578 }
2579
2580 /*
2581  * Issue an intermediate state change during the life-cycle.
2582  */
2583 static int
2584 mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli,
2585     struct mld_tparams *mtp)
2586 {
2587         struct ifnet            *ifp;
2588         int                      retval = 0;
2589
2590         IN6M_LOCK_ASSERT_HELD(inm);
2591         MLI_LOCK_ASSERT_NOTHELD(mli);
2592         VERIFY(mtp != NULL);
2593
2594         MLD_PRINTF(("%s: state change for %s on ifp 0x%llx(%s)\n",
2595             __func__, ip6_sprintf(&inm->in6m_addr),
2596             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2597             if_name(inm->in6m_ifp)));
2598
2599         ifp = inm->in6m_ifp;
2600
2601         MLI_LOCK(mli);
2602         VERIFY(mli->mli_ifp == ifp);
2603
2604         if ((ifp->if_flags & IFF_LOOPBACK) ||
2605             (mli->mli_flags & MLIF_SILENT) ||
2606             !mld_is_addr_reported(&inm->in6m_addr) ||
2607             (mli->mli_version != MLD_VERSION_2)) {
2608                 MLI_UNLOCK(mli);
2609                 if (!mld_is_addr_reported(&inm->in6m_addr)) {
2610                         MLD_PRINTF(("%s: not kicking state machine for silent "
2611                             "group\n", __func__));
2612                 }
2613                 MLD_PRINTF(("%s: nothing to do\n", __func__));
2614                 in6m_commit(inm);
2615                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2616                     ip6_sprintf(&inm->in6m_addr),
2617                     if_name(inm->in6m_ifp)));
2618                 goto done;
2619         }
2620
2621         IF_DRAIN(&inm->in6m_scq);
2622
2623         retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2624             (mli->mli_flags & MLIF_USEALLOW));
2625         mtp->cst = (inm->in6m_scq.ifq_len > 0);
2626         MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2627         if (retval <= 0) {
2628                 MLI_UNLOCK(mli);
2629                 retval *= -1;
2630                 goto done;
2631         } else {
2632                 retval = 0;
2633         }
2634
2635         /*
2636          * If record(s) were enqueued, start the state-change
2637          * report timer for this group.
2638          */
2639         inm->in6m_scrv = mli->mli_rv;
2640         inm->in6m_sctimer = 1;
2641         mtp->sct = 1;
2642         MLI_UNLOCK(mli);
2643
2644 done:
2645         return retval;
2646 }
2647
2648 /*
2649  * Perform the final leave for a multicast address.
2650  *
2651  * When leaving a group:
2652  *  MLDv1 sends a DONE message, if and only if we are the reporter.
2653  *  MLDv2 enqueues a state-change report containing a transition
2654  *  to INCLUDE {} for immediate transmission.
2655  */
2656 static void
2657 mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli,
2658     struct mld_tparams *mtp)
2659 {
2660         int syncstates = 1;
2661
2662         IN6M_LOCK_ASSERT_HELD(inm);
2663         MLI_LOCK_ASSERT_NOTHELD(mli);
2664         VERIFY(mtp != NULL);
2665
2666         MLD_PRINTF(("%s: final leave %s on ifp 0x%llx(%s)\n",
2667             __func__, ip6_sprintf(&inm->in6m_addr),
2668             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2669             if_name(inm->in6m_ifp)));
2670
2671         switch (inm->in6m_state) {
2672         case MLD_NOT_MEMBER:
2673         case MLD_SILENT_MEMBER:
2674         case MLD_LEAVING_MEMBER:
2675                 /* Already leaving or left; do nothing. */
2676                 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2677                     __func__));
2678                 break;
2679         case MLD_REPORTING_MEMBER:
2680         case MLD_IDLE_MEMBER:
2681         case MLD_G_QUERY_PENDING_MEMBER:
2682         case MLD_SG_QUERY_PENDING_MEMBER:
2683                 MLI_LOCK(mli);
2684                 if (mli->mli_version == MLD_VERSION_1) {
2685                         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2686                             inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
2687                                 panic("%s: MLDv2 state reached, not MLDv2 "
2688                                     "mode\n", __func__);
2689                                 /* NOTREACHED */
2690                         }
2691                         /* scheduler timer if enqueue is successful */
2692                         mtp->cst = (mld_v1_transmit_report(inm,
2693                             MLD_LISTENER_DONE) == 0);
2694
2695                         IN6M_LOCK_ASSERT_HELD(inm);
2696                         MLI_LOCK_ASSERT_HELD(mli);
2697
2698                         inm->in6m_state = MLD_NOT_MEMBER;
2699                 } else if (mli->mli_version == MLD_VERSION_2) {
2700                         /*
2701                          * Stop group timer and all pending reports.
2702                          * Immediately enqueue a state-change report
2703                          * TO_IN {} to be sent on the next timeout,
2704                          * giving us an opportunity to merge reports.
2705                          */
2706                         IF_DRAIN(&inm->in6m_scq);
2707                         inm->in6m_timer = 0;
2708                         inm->in6m_scrv = mli->mli_rv;
2709                         MLD_PRINTF(("%s: Leaving %s/%s with %d "
2710                             "pending retransmissions.\n", __func__,
2711                             ip6_sprintf(&inm->in6m_addr),
2712                             if_name(inm->in6m_ifp),
2713                             inm->in6m_scrv));
2714                         if (inm->in6m_scrv == 0) {
2715                                 inm->in6m_state = MLD_NOT_MEMBER;
2716                                 inm->in6m_sctimer = 0;
2717                         } else {
2718                                 int retval;
2719                                 /*
2720                                  * Stick around in the in6_multihead list;
2721                                  * the final detach will be issued by
2722                                  * mld_v2_process_group_timers() when
2723                                  * the retransmit timer expires.
2724                                  */
2725                                 IN6M_ADDREF_LOCKED(inm);
2726                                 VERIFY(inm->in6m_debug & IFD_ATTACHED);
2727                                 inm->in6m_reqcnt++;
2728                                 VERIFY(inm->in6m_reqcnt >= 1);
2729                                 inm->in6m_nrelecnt++;
2730                                 VERIFY(inm->in6m_nrelecnt != 0);
2731
2732                                 retval = mld_v2_enqueue_group_record(
2733                                         &inm->in6m_scq, inm, 1, 0, 0,
2734                                         (mli->mli_flags & MLIF_USEALLOW));
2735                                 mtp->cst = (inm->in6m_scq.ifq_len > 0);
2736                                 KASSERT(retval != 0,
2737                                     ("%s: enqueue record = %d\n", __func__,
2738                                     retval));
2739
2740                                 inm->in6m_state = MLD_LEAVING_MEMBER;
2741                                 inm->in6m_sctimer = 1;
2742                                 mtp->sct = 1;
2743                                 syncstates = 0;
2744                         }
2745                 }
2746                 MLI_UNLOCK(mli);
2747                 break;
2748         case MLD_LAZY_MEMBER:
2749         case MLD_SLEEPING_MEMBER:
2750         case MLD_AWAKENING_MEMBER:
2751                 /* Our reports are suppressed; do nothing. */
2752                 break;
2753         }
2754
2755         if (syncstates) {
2756                 in6m_commit(inm);
2757                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2758                     ip6_sprintf(&inm->in6m_addr),
2759                     if_name(inm->in6m_ifp)));
2760                 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2761                 MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for 0x%llx/%s\n",
2762                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(&inm->in6m_addr),
2763                     if_name(inm->in6m_ifp)));
2764         }
2765 }
2766
2767 /*
2768  * Enqueue an MLDv2 group record to the given output queue.
2769  *
2770  * If is_state_change is zero, a current-state record is appended.
2771  * If is_state_change is non-zero, a state-change report is appended.
2772  *
2773  * If is_group_query is non-zero, an mbuf packet chain is allocated.
2774  * If is_group_query is zero, and if there is a packet with free space
2775  * at the tail of the queue, it will be appended to providing there
2776  * is enough free space.
2777  * Otherwise a new mbuf packet chain is allocated.
2778  *
2779  * If is_source_query is non-zero, each source is checked to see if
2780  * it was recorded for a Group-Source query, and will be omitted if
2781  * it is not both in-mode and recorded.
2782  *
2783  * If use_block_allow is non-zero, state change reports for initial join
2784  * and final leave, on an inclusive mode group with a source list, will be
2785  * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2786  *
2787  * The function will attempt to allocate leading space in the packet
2788  * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2789  *
2790  * If successful the size of all data appended to the queue is returned,
2791  * otherwise an error code less than zero is returned, or zero if
2792  * no record(s) were appended.
2793  */
2794 static int
2795 mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2796     const int is_state_change, const int is_group_query,
2797     const int is_source_query, const int use_block_allow)
2798 {
2799         struct mldv2_record      mr;
2800         struct mldv2_record     *pmr;
2801         struct ifnet            *ifp;
2802         struct ip6_msource      *ims, *nims;
2803         struct mbuf             *m0, *m, *md;
2804         int                      error, is_filter_list_change;
2805         int                      minrec0len, m0srcs, msrcs, nbytes, off;
2806         int                      record_has_sources;
2807         int                      now;
2808         int                      type;
2809         uint8_t                  mode;
2810
2811         IN6M_LOCK_ASSERT_HELD(inm);
2812         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2813
2814         error = 0;
2815         ifp = inm->in6m_ifp;
2816         is_filter_list_change = 0;
2817         m = NULL;
2818         m0 = NULL;
2819         m0srcs = 0;
2820         msrcs = 0;
2821         nbytes = 0;
2822         nims = NULL;
2823         record_has_sources = 1;
2824         pmr = NULL;
2825         type = MLD_DO_NOTHING;
2826         mode = inm->in6m_st[1].iss_fmode;
2827
2828         /*
2829          * If we did not transition out of ASM mode during t0->t1,
2830          * and there are no source nodes to process, we can skip
2831          * the generation of source records.
2832          */
2833         if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2834             inm->in6m_nsrc == 0) {
2835                 record_has_sources = 0;
2836         }
2837
2838         if (is_state_change) {
2839                 /*
2840                  * Queue a state change record.
2841                  * If the mode did not change, and there are non-ASM
2842                  * listeners or source filters present,
2843                  * we potentially need to issue two records for the group.
2844                  * If there are ASM listeners, and there was no filter
2845                  * mode transition of any kind, do nothing.
2846                  *
2847                  * If we are transitioning to MCAST_UNDEFINED, we need
2848                  * not send any sources. A transition to/from this state is
2849                  * considered inclusive with some special treatment.
2850                  *
2851                  * If we are rewriting initial joins/leaves to use
2852                  * ALLOW/BLOCK, and the group's membership is inclusive,
2853                  * we need to send sources in all cases.
2854                  */
2855                 if (mode != inm->in6m_st[0].iss_fmode) {
2856                         if (mode == MCAST_EXCLUDE) {
2857                                 MLD_PRINTF(("%s: change to EXCLUDE\n",
2858                                     __func__));
2859                                 type = MLD_CHANGE_TO_EXCLUDE_MODE;
2860                         } else {
2861                                 MLD_PRINTF(("%s: change to INCLUDE\n",
2862                                     __func__));
2863                                 if (use_block_allow) {
2864                                         /*
2865                                          * XXX
2866                                          * Here we're interested in state
2867                                          * edges either direction between
2868                                          * MCAST_UNDEFINED and MCAST_INCLUDE.
2869                                          * Perhaps we should just check
2870                                          * the group state, rather than
2871                                          * the filter mode.
2872                                          */
2873                                         if (mode == MCAST_UNDEFINED) {
2874                                                 type = MLD_BLOCK_OLD_SOURCES;
2875                                         } else {
2876                                                 type = MLD_ALLOW_NEW_SOURCES;
2877                                         }
2878                                 } else {
2879                                         type = MLD_CHANGE_TO_INCLUDE_MODE;
2880                                         if (mode == MCAST_UNDEFINED) {
2881                                                 record_has_sources = 0;
2882                                         }
2883                                 }
2884                         }
2885                 } else {
2886                         if (record_has_sources) {
2887                                 is_filter_list_change = 1;
2888                         } else {
2889                                 type = MLD_DO_NOTHING;
2890                         }
2891                 }
2892         } else {
2893                 /*
2894                  * Queue a current state record.
2895                  */
2896                 if (mode == MCAST_EXCLUDE) {
2897                         type = MLD_MODE_IS_EXCLUDE;
2898                 } else if (mode == MCAST_INCLUDE) {
2899                         type = MLD_MODE_IS_INCLUDE;
2900                         VERIFY(inm->in6m_st[1].iss_asm == 0);
2901                 }
2902         }
2903
2904         /*
2905          * Generate the filter list changes using a separate function.
2906          */
2907         if (is_filter_list_change) {
2908                 return mld_v2_enqueue_filter_change(ifq, inm);
2909         }
2910
2911         if (type == MLD_DO_NOTHING) {
2912                 MLD_PRINTF(("%s: nothing to do for %s/%s\n",
2913                     __func__, ip6_sprintf(&inm->in6m_addr),
2914                     if_name(inm->in6m_ifp)));
2915                 return 0;
2916         }
2917
2918         /*
2919          * If any sources are present, we must be able to fit at least
2920          * one in the trailing space of the tail packet's mbuf,
2921          * ideally more.
2922          */
2923         minrec0len = sizeof(struct mldv2_record);
2924         if (record_has_sources) {
2925                 minrec0len += sizeof(struct in6_addr);
2926         }
2927         MLD_PRINTF(("%s: queueing %s for %s/%s\n", __func__,
2928             mld_rec_type_to_str(type),
2929             ip6_sprintf(&inm->in6m_addr),
2930             if_name(inm->in6m_ifp)));
2931
2932         /*
2933          * Check if we have a packet in the tail of the queue for this
2934          * group into which the first group record for this group will fit.
2935          * Otherwise allocate a new packet.
2936          * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2937          * Note: Group records for G/GSR query responses MUST be sent
2938          * in their own packet.
2939          */
2940         m0 = ifq->ifq_tail;
2941         if (!is_group_query &&
2942             m0 != NULL &&
2943             (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
2944             (m0->m_pkthdr.len + minrec0len) <
2945             (ifp->if_mtu - MLD_MTUSPACE)) {
2946                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2947                     sizeof(struct mldv2_record)) /
2948                     sizeof(struct in6_addr);
2949                 m = m0;
2950                 MLD_PRINTF(("%s: use existing packet\n", __func__));
2951         } else {
2952                 if (IF_QFULL(ifq)) {
2953                         MLD_PRINTF(("%s: outbound queue full\n", __func__));
2954                         return -ENOMEM;
2955                 }
2956                 m = NULL;
2957                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2958                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2959                 if (!is_state_change && !is_group_query) {
2960                         m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2961                 }
2962                 if (m == NULL) {
2963                         m = m_gethdr(M_DONTWAIT, MT_DATA);
2964                 }
2965                 if (m == NULL) {
2966                         return -ENOMEM;
2967                 }
2968
2969                 mld_save_context(m, ifp);
2970
2971                 MLD_PRINTF(("%s: allocated first packet\n", __func__));
2972         }
2973
2974         /*
2975          * Append group record.
2976          * If we have sources, we don't know how many yet.
2977          */
2978         mr.mr_type = type;
2979         mr.mr_datalen = 0;
2980         mr.mr_numsrc = 0;
2981         mr.mr_addr = inm->in6m_addr;
2982         in6_clearscope(&mr.mr_addr);
2983         if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2984                 if (m != m0) {
2985                         m_freem(m);
2986                 }
2987                 MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2988                 return -ENOMEM;
2989         }
2990         nbytes += sizeof(struct mldv2_record);
2991
2992         /*
2993          * Append as many sources as will fit in the first packet.
2994          * If we are appending to a new packet, the chain allocation
2995          * may potentially use clusters; use m_getptr() in this case.
2996          * If we are appending to an existing packet, we need to obtain
2997          * a pointer to the group record after m_append(), in case a new
2998          * mbuf was allocated.
2999          *
3000          * Only append sources which are in-mode at t1. If we are
3001          * transitioning to MCAST_UNDEFINED state on the group, and
3002          * use_block_allow is zero, do not include source entries.
3003          * Otherwise, we need to include this source in the report.
3004          *
3005          * Only report recorded sources in our filter set when responding
3006          * to a group-source query.
3007          */
3008         if (record_has_sources) {
3009                 if (m == m0) {
3010                         md = m_last(m);
3011                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
3012                             md->m_len - nbytes);
3013                 } else {
3014                         md = m_getptr(m, 0, &off);
3015                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
3016                             off);
3017                 }
3018                 msrcs = 0;
3019                 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
3020                     nims) {
3021                         MLD_PRINTF(("%s: visit node %s\n", __func__,
3022                             ip6_sprintf(&ims->im6s_addr)));
3023                         now = im6s_get_mode(inm, ims, 1);
3024                         MLD_PRINTF(("%s: node is %d\n", __func__, now));
3025                         if ((now != mode) ||
3026                             (now == mode &&
3027                             (!use_block_allow && mode == MCAST_UNDEFINED))) {
3028                                 MLD_PRINTF(("%s: skip node\n", __func__));
3029                                 continue;
3030                         }
3031                         if (is_source_query && ims->im6s_stp == 0) {
3032                                 MLD_PRINTF(("%s: skip unrecorded node\n",
3033                                     __func__));
3034                                 continue;
3035                         }
3036                         MLD_PRINTF(("%s: append node\n", __func__));
3037                         if (!m_append(m, sizeof(struct in6_addr),
3038                             (void *)&ims->im6s_addr)) {
3039                                 if (m != m0) {
3040                                         m_freem(m);
3041                                 }
3042                                 MLD_PRINTF(("%s: m_append() failed.\n",
3043                                     __func__));
3044                                 return -ENOMEM;
3045                         }
3046                         nbytes += sizeof(struct in6_addr);
3047                         ++msrcs;
3048                         if (msrcs == m0srcs) {
3049                                 break;
3050                         }
3051                 }
3052                 MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
3053                     msrcs));
3054                 pmr->mr_numsrc = htons(msrcs);
3055                 nbytes += (msrcs * sizeof(struct in6_addr));
3056         }
3057
3058         if (is_source_query && msrcs == 0) {
3059                 MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
3060                 if (m != m0) {
3061                         m_freem(m);
3062                 }
3063                 return 0;
3064         }
3065
3066         /*
3067          * We are good to go with first packet.
3068          */
3069         if (m != m0) {
3070                 MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
3071                 m->m_pkthdr.vt_nrecs = 1;
3072                 IF_ENQUEUE(ifq, m);
3073         } else {
3074                 m->m_pkthdr.vt_nrecs++;
3075         }
3076         /*
3077          * No further work needed if no source list in packet(s).
3078          */
3079         if (!record_has_sources) {
3080                 return nbytes;
3081         }
3082
3083         /*
3084          * Whilst sources remain to be announced, we need to allocate
3085          * a new packet and fill out as many sources as will fit.
3086          * Always try for a cluster first.
3087          */
3088         while (nims != NULL) {
3089                 if (IF_QFULL(ifq)) {
3090                         MLD_PRINTF(("%s: outbound queue full\n", __func__));
3091                         return -ENOMEM;
3092                 }
3093                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3094                 if (m == NULL) {
3095                         m = m_gethdr(M_DONTWAIT, MT_DATA);
3096                 }
3097                 if (m == NULL) {
3098                         return -ENOMEM;
3099                 }
3100                 mld_save_context(m, ifp);
3101                 md = m_getptr(m, 0, &off);
3102                 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
3103                 MLD_PRINTF(("%s: allocated next packet\n", __func__));
3104
3105                 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
3106                         if (m != m0) {
3107                                 m_freem(m);
3108                         }
3109                         MLD_PRINTF(("%s: m_append() failed.\n", __func__));
3110                         return -ENOMEM;
3111                 }
3112                 m->m_pkthdr.vt_nrecs = 1;
3113                 nbytes += sizeof(struct mldv2_record);
3114
3115                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3116                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
3117
3118                 msrcs = 0;
3119                 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
3120                         MLD_PRINTF(("%s: visit node %s\n",
3121                             __func__, ip6_sprintf(&ims->im6s_addr)));
3122                         now = im6s_get_mode(inm, ims, 1);
3123                         if ((now != mode) ||
3124                             (now == mode &&
3125                             (!use_block_allow && mode == MCAST_UNDEFINED))) {
3126                                 MLD_PRINTF(("%s: skip node\n", __func__));
3127                                 continue;
3128                         }
3129                         if (is_source_query && ims->im6s_stp == 0) {
3130                                 MLD_PRINTF(("%s: skip unrecorded node\n",
3131                                     __func__));
3132                                 continue;
3133                         }
3134                         MLD_PRINTF(("%s: append node\n", __func__));
3135                         if (!m_append(m, sizeof(struct in6_addr),
3136                             (void *)&ims->im6s_addr)) {
3137                                 if (m != m0) {
3138                                         m_freem(m);
3139                                 }
3140                                 MLD_PRINTF(("%s: m_append() failed.\n",
3141                                     __func__));
3142                                 return -ENOMEM;
3143                         }
3144                         ++msrcs;
3145                         if (msrcs == m0srcs) {
3146                                 break;
3147                         }
3148                 }
3149                 pmr->mr_numsrc = htons(msrcs);
3150                 nbytes += (msrcs * sizeof(struct in6_addr));
3151
3152                 MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
3153                 IF_ENQUEUE(ifq, m);
3154         }
3155
3156         return nbytes;
3157 }
3158
3159 /*
3160  * Type used to mark record pass completion.
3161  * We exploit the fact we can cast to this easily from the
3162  * current filter modes on each ip_msource node.
3163  */
3164 typedef enum {
3165         REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3166         REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3167         REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3168         REC_FULL = REC_ALLOW | REC_BLOCK
3169 } rectype_t;
3170
3171 /*
3172  * Enqueue an MLDv2 filter list change to the given output queue.
3173  *
3174  * Source list filter state is held in an RB-tree. When the filter list
3175  * for a group is changed without changing its mode, we need to compute
3176  * the deltas between T0 and T1 for each source in the filter set,
3177  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3178  *
3179  * As we may potentially queue two record types, and the entire R-B tree
3180  * needs to be walked at once, we break this out into its own function
3181  * so we can generate a tightly packed queue of packets.
3182  *
3183  * XXX This could be written to only use one tree walk, although that makes
3184  * serializing into the mbuf chains a bit harder. For now we do two walks
3185  * which makes things easier on us, and it may or may not be harder on
3186  * the L2 cache.
3187  *
3188  * If successful the size of all data appended to the queue is returned,
3189  * otherwise an error code less than zero is returned, or zero if
3190  * no record(s) were appended.
3191  */
3192 static int
3193 mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
3194 {
3195         static const int MINRECLEN =
3196             sizeof(struct mldv2_record) + sizeof(struct in6_addr);
3197         struct ifnet            *ifp;
3198         struct mldv2_record      mr;
3199         struct mldv2_record     *pmr;
3200         struct ip6_msource      *ims, *nims;
3201         struct mbuf             *m, *m0, *md;
3202         int                      m0srcs, nbytes, npbytes, off, rsrcs, schanged;
3203         int                      nallow, nblock;
3204         uint8_t                  mode, now, then;
3205         rectype_t                crt, drt, nrt;
3206
3207         IN6M_LOCK_ASSERT_HELD(inm);
3208
3209         if (inm->in6m_nsrc == 0 ||
3210             (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) {
3211                 return 0;
3212         }
3213
3214         ifp = inm->in6m_ifp;                    /* interface */
3215         mode = inm->in6m_st[1].iss_fmode;       /* filter mode at t1 */
3216         crt = REC_NONE; /* current group record type */
3217         drt = REC_NONE; /* mask of completed group record types */
3218         nrt = REC_NONE; /* record type for current node */
3219         m0srcs = 0;     /* # source which will fit in current mbuf chain */
3220         npbytes = 0;    /* # of bytes appended this packet */
3221         nbytes = 0;     /* # of bytes appended to group's state-change queue */
3222         rsrcs = 0;      /* # sources encoded in current record */
3223         schanged = 0;   /* # nodes encoded in overall filter change */
3224         nallow = 0;     /* # of source entries in ALLOW_NEW */
3225         nblock = 0;     /* # of source entries in BLOCK_OLD */
3226         nims = NULL;    /* next tree node pointer */
3227
3228         /*
3229          * For each possible filter record mode.
3230          * The first kind of source we encounter tells us which
3231          * is the first kind of record we start appending.
3232          * If a node transitioned to UNDEFINED at t1, its mode is treated
3233          * as the inverse of the group's filter mode.
3234          */
3235         while (drt != REC_FULL) {
3236                 do {
3237                         m0 = ifq->ifq_tail;
3238                         if (m0 != NULL &&
3239                             (m0->m_pkthdr.vt_nrecs + 1 <=
3240                             MLD_V2_REPORT_MAXRECS) &&
3241                             (m0->m_pkthdr.len + MINRECLEN) <
3242                             (ifp->if_mtu - MLD_MTUSPACE)) {
3243                                 m = m0;
3244                                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3245                                     sizeof(struct mldv2_record)) /
3246                                     sizeof(struct in6_addr);
3247                                 MLD_PRINTF(("%s: use previous packet\n",
3248                                     __func__));
3249                         } else {
3250                                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3251                                 if (m == NULL) {
3252                                         m = m_gethdr(M_DONTWAIT, MT_DATA);
3253                                 }
3254                                 if (m == NULL) {
3255                                         MLD_PRINTF(("%s: m_get*() failed\n",
3256                                             __func__));
3257                                         return -ENOMEM;
3258                                 }
3259                                 m->m_pkthdr.vt_nrecs = 0;
3260                                 mld_save_context(m, ifp);
3261                                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3262                                     sizeof(struct mldv2_record)) /
3263                                     sizeof(struct in6_addr);
3264                                 npbytes = 0;
3265                                 MLD_PRINTF(("%s: allocated new packet\n",
3266                                     __func__));
3267                         }
3268                         /*
3269                          * Append the MLD group record header to the
3270                          * current packet's data area.
3271                          * Recalculate pointer to free space for next
3272                          * group record, in case m_append() allocated
3273                          * a new mbuf or cluster.
3274                          */
3275                         memset(&mr, 0, sizeof(mr));
3276                         mr.mr_addr = inm->in6m_addr;
3277                         in6_clearscope(&mr.mr_addr);
3278                         if (!m_append(m, sizeof(mr), (void *)&mr)) {
3279                                 if (m != m0) {
3280                                         m_freem(m);
3281                                 }
3282                                 MLD_PRINTF(("%s: m_append() failed\n",
3283                                     __func__));
3284                                 return -ENOMEM;
3285                         }
3286                         npbytes += sizeof(struct mldv2_record);
3287                         if (m != m0) {
3288                                 /* new packet; offset in chain */
3289                                 md = m_getptr(m, npbytes -
3290                                     sizeof(struct mldv2_record), &off);
3291                                 pmr = (struct mldv2_record *)(mtod(md,
3292                                     uint8_t *) + off);
3293                         } else {
3294                                 /* current packet; offset from last append */
3295                                 md = m_last(m);
3296                                 pmr = (struct mldv2_record *)(mtod(md,
3297                                     uint8_t *) + md->m_len -
3298                                     sizeof(struct mldv2_record));
3299                         }
3300                         /*
3301                          * Begin walking the tree for this record type
3302                          * pass, or continue from where we left off
3303                          * previously if we had to allocate a new packet.
3304                          * Only report deltas in-mode at t1.
3305                          * We need not report included sources as allowed
3306                          * if we are in inclusive mode on the group,
3307                          * however the converse is not true.
3308                          */
3309                         rsrcs = 0;
3310                         if (nims == NULL) {
3311                                 nims = RB_MIN(ip6_msource_tree,
3312                                     &inm->in6m_srcs);
3313                         }
3314                         RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
3315                                 MLD_PRINTF(("%s: visit node %s\n", __func__,
3316                                     ip6_sprintf(&ims->im6s_addr)));
3317                                 now = im6s_get_mode(inm, ims, 1);
3318                                 then = im6s_get_mode(inm, ims, 0);
3319                                 MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3320                                     __func__, then, now));
3321                                 if (now == then) {
3322                                         MLD_PRINTF(("%s: skip unchanged\n",
3323                                             __func__));
3324                                         continue;
3325                                 }
3326                                 if (mode == MCAST_EXCLUDE &&
3327                                     now == MCAST_INCLUDE) {
3328                                         MLD_PRINTF(("%s: skip IN src on EX "
3329                                             "group\n", __func__));
3330                                         continue;
3331                                 }
3332                                 nrt = (rectype_t)now;
3333                                 if (nrt == REC_NONE) {
3334                                         nrt = (rectype_t)(~mode & REC_FULL);
3335                                 }
3336                                 if (schanged++ == 0) {
3337                                         crt = nrt;
3338                                 } else if (crt != nrt) {
3339                                         continue;
3340                                 }
3341                                 if (!m_append(m, sizeof(struct in6_addr),
3342                                     (void *)&ims->im6s_addr)) {
3343                                         if (m != m0) {
3344                                                 m_freem(m);
3345                                         }
3346                                         MLD_PRINTF(("%s: m_append() failed\n",
3347                                             __func__));
3348                                         return -ENOMEM;
3349                                 }
3350                                 nallow += !!(crt == REC_ALLOW);
3351                                 nblock += !!(crt == REC_BLOCK);
3352                                 if (++rsrcs == m0srcs) {
3353                                         break;
3354                                 }
3355                         }
3356                         /*
3357                          * If we did not append any tree nodes on this
3358                          * pass, back out of allocations.
3359                          */
3360                         if (rsrcs == 0) {
3361                                 npbytes -= sizeof(struct mldv2_record);
3362                                 if (m != m0) {
3363                                         MLD_PRINTF(("%s: m_free(m)\n",
3364                                             __func__));
3365                                         m_freem(m);
3366                                 } else {
3367                                         MLD_PRINTF(("%s: m_adj(m, -mr)\n",
3368                                             __func__));
3369                                         m_adj(m, -((int)sizeof(
3370                                                     struct mldv2_record)));
3371                                 }
3372                                 continue;
3373                         }
3374                         npbytes += (rsrcs * sizeof(struct in6_addr));
3375                         if (crt == REC_ALLOW) {
3376                                 pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
3377                         } else if (crt == REC_BLOCK) {
3378                                 pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
3379                         }
3380                         pmr->mr_numsrc = htons(rsrcs);
3381                         /*
3382                          * Count the new group record, and enqueue this
3383                          * packet if it wasn't already queued.
3384                          */
3385                         m->m_pkthdr.vt_nrecs++;
3386                         if (m != m0) {
3387                                 IF_ENQUEUE(ifq, m);
3388                         }
3389                         nbytes += npbytes;
3390                 } while (nims != NULL);
3391                 drt |= crt;
3392                 crt = (~crt & REC_FULL);
3393         }
3394
3395         MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3396             nallow, nblock));
3397
3398         return nbytes;
3399 }
3400
3401 static int
3402 mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
3403 {
3404         struct ifqueue  *gq;
3405         struct mbuf     *m;             /* pending state-change */
3406         struct mbuf     *m0;            /* copy of pending state-change */
3407         struct mbuf     *mt;            /* last state-change in packet */
3408         struct mbuf     *n;
3409         int              docopy, domerge;
3410         u_int            recslen;
3411
3412         IN6M_LOCK_ASSERT_HELD(inm);
3413
3414         docopy = 0;
3415         domerge = 0;
3416         recslen = 0;
3417
3418         /*
3419          * If there are further pending retransmissions, make a writable
3420          * copy of each queued state-change message before merging.
3421          */
3422         if (inm->in6m_scrv > 0) {
3423                 docopy = 1;
3424         }
3425
3426         gq = &inm->in6m_scq;
3427 #ifdef MLD_DEBUG
3428         if (gq->ifq_head == NULL) {
3429                 MLD_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3430                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3431         }
3432 #endif
3433
3434         /*
3435          * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3436          * packet might not always be at the head of the ifqueue.
3437          */
3438         m = gq->ifq_head;
3439         while (m != NULL) {
3440                 /*
3441                  * Only merge the report into the current packet if
3442                  * there is sufficient space to do so; an MLDv2 report
3443                  * packet may only contain 65,535 group records.
3444                  * Always use a simple mbuf chain concatentation to do this,
3445                  * as large state changes for single groups may have
3446                  * allocated clusters.
3447                  */
3448                 domerge = 0;
3449                 mt = ifscq->ifq_tail;
3450                 if (mt != NULL) {
3451                         recslen = m_length(m);
3452
3453                         if ((mt->m_pkthdr.vt_nrecs +
3454                             m->m_pkthdr.vt_nrecs <=
3455                             MLD_V2_REPORT_MAXRECS) &&
3456                             (mt->m_pkthdr.len + recslen <=
3457                             (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) {
3458                                 domerge = 1;
3459                         }
3460                 }
3461
3462                 if (!domerge && IF_QFULL(gq)) {
3463                         MLD_PRINTF(("%s: outbound queue full, skipping whole "
3464                             "packet 0x%llx\n", __func__,
3465                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3466                         n = m->m_nextpkt;
3467                         if (!docopy) {
3468                                 IF_REMQUEUE(gq, m);
3469                                 m_freem(m);
3470                         }
3471                         m = n;
3472                         continue;
3473                 }
3474
3475                 if (!docopy) {
3476                         MLD_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3477                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3478                         n = m->m_nextpkt;
3479                         IF_REMQUEUE(gq, m);
3480                         m0 = m;
3481                         m = n;
3482                 } else {
3483                         MLD_PRINTF(("%s: copying 0x%llx\n", __func__,
3484                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3485                         m0 = m_dup(m, M_NOWAIT);
3486                         if (m0 == NULL) {
3487                                 return ENOMEM;
3488                         }
3489                         m0->m_nextpkt = NULL;
3490                         m = m->m_nextpkt;
3491                 }
3492
3493                 if (!domerge) {
3494                         MLD_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3495                             __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3496                             (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3497                         IF_ENQUEUE(ifscq, m0);
3498                 } else {
3499                         struct mbuf *mtl;       /* last mbuf of packet mt */
3500
3501                         MLD_PRINTF(("%s: merging 0x%llx with ifscq tail "
3502                             "0x%llx)\n", __func__,
3503                             (uint64_t)VM_KERNEL_ADDRPERM(m0),
3504                             (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3505
3506                         mtl = m_last(mt);
3507                         m0->m_flags &= ~M_PKTHDR;
3508                         mt->m_pkthdr.len += recslen;
3509                         mt->m_pkthdr.vt_nrecs +=
3510                             m0->m_pkthdr.vt_nrecs;
3511
3512                         mtl->m_next = m0;
3513                 }
3514         }
3515
3516         return 0;
3517 }
3518
3519 /*
3520  * Respond to a pending MLDv2 General Query.
3521  */
3522 static uint32_t
3523 mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
3524 {
3525         struct ifnet            *ifp;
3526         struct in6_multi        *inm;
3527         struct in6_multistep    step;
3528         int                      retval;
3529
3530         MLI_LOCK_ASSERT_HELD(mli);
3531
3532         VERIFY(mli->mli_version == MLD_VERSION_2);
3533
3534         ifp = mli->mli_ifp;
3535         MLI_UNLOCK(mli);
3536
3537         in6_multihead_lock_shared();
3538         IN6_FIRST_MULTI(step, inm);
3539         while (inm != NULL) {
3540                 IN6M_LOCK(inm);
3541                 if (inm->in6m_ifp != ifp) {
3542                         goto next;
3543                 }
3544
3545                 switch (inm->in6m_state) {
3546                 case MLD_NOT_MEMBER:
3547                 case MLD_SILENT_MEMBER:
3548                         break;
3549                 case MLD_REPORTING_MEMBER:
3550                 case MLD_IDLE_MEMBER:
3551                 case MLD_LAZY_MEMBER:
3552                 case MLD_SLEEPING_MEMBER:
3553                 case MLD_AWAKENING_MEMBER:
3554                         inm->in6m_state = MLD_REPORTING_MEMBER;
3555                         MLI_LOCK(mli);
3556                         retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3557                             inm, 0, 0, 0, 0);
3558                         MLI_UNLOCK(mli);
3559                         MLD_PRINTF(("%s: enqueue record = %d\n",
3560                             __func__, retval));
3561                         break;
3562                 case MLD_G_QUERY_PENDING_MEMBER:
3563                 case MLD_SG_QUERY_PENDING_MEMBER:
3564                 case MLD_LEAVING_MEMBER:
3565                         break;
3566                 }
3567 next:
3568                 IN6M_UNLOCK(inm);
3569                 IN6_NEXT_MULTI(step, inm);
3570         }
3571         in6_multihead_lock_done();
3572
3573         MLI_LOCK(mli);
3574         mld_dispatch_queue_locked(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3575         MLI_LOCK_ASSERT_HELD(mli);
3576
3577         /*
3578          * Slew transmission of bursts over 1 second intervals.
3579          */
3580         if (mli->mli_gq.ifq_head != NULL) {
3581                 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3582                         MLD_RESPONSE_BURST_INTERVAL);
3583         }
3584
3585         return mli->mli_v2_timer;
3586 }
3587
3588 /*
3589  * Transmit the next pending message in the output queue.
3590  *
3591  * Must not be called with in6m_lockm or mli_lock held.
3592  */
3593 static void
3594 mld_dispatch_packet(struct mbuf *m)
3595 {
3596         struct ip6_moptions     *im6o;
3597         struct ifnet            *ifp;
3598         struct ifnet            *oifp = NULL;
3599         struct mbuf             *m0;
3600         struct mbuf             *md;
3601         struct ip6_hdr          *ip6;
3602         struct mld_hdr          *mld;
3603         int                      error;
3604         int                      off;
3605         int                      type;
3606
3607         MLD_PRINTF(("%s: transmit 0x%llx\n", __func__,
3608             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3609
3610         /*
3611          * Check if the ifnet is still attached.
3612          */
3613         ifp = mld_restore_context(m);
3614         if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3615                 MLD_PRINTF(("%s: dropped 0x%llx as ifindex %u went away.\n",
3616                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(m),
3617                     (u_int)if_index));
3618                 m_freem(m);
3619                 ip6stat.ip6s_noroute++;
3620                 return;
3621         }
3622
3623         im6o = ip6_allocmoptions(M_WAITOK);
3624         if (im6o == NULL) {
3625                 m_freem(m);
3626                 return;
3627         }
3628
3629         im6o->im6o_multicast_hlim  = 1;
3630         im6o->im6o_multicast_loop = 0;
3631         im6o->im6o_multicast_ifp = ifp;
3632
3633         if (m->m_flags & M_MLDV1) {
3634                 m0 = m;
3635         } else {
3636                 m0 = mld_v2_encap_report(ifp, m);
3637                 if (m0 == NULL) {
3638                         MLD_PRINTF(("%s: dropped 0x%llx\n", __func__,
3639                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3640                         /*
3641                          * mld_v2_encap_report() has already freed our mbuf.
3642                          */
3643                         IM6O_REMREF(im6o);
3644                         ip6stat.ip6s_odropped++;
3645                         return;
3646                 }
3647         }
3648
3649         mld_scrub_context(m0);
3650         m->m_flags &= ~(M_PROTOFLAGS);
3651         m0->m_pkthdr.rcvif = lo_ifp;
3652
3653         ip6 = mtod(m0, struct ip6_hdr *);
3654         (void)in6_setscope(&ip6->ip6_dst, ifp, NULL);
3655
3656         /*
3657          * Retrieve the ICMPv6 type before handoff to ip6_output(),
3658          * so we can bump the stats.
3659          */
3660         md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3661         mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3662         type = mld->mld_type;
3663
3664         if (ifp->if_eflags & IFEF_TXSTART) {
3665                 /*
3666                  * Use control service class if the outgoing
3667                  * interface supports transmit-start model.
3668                  */
3669                 (void) m_set_service_class(m0, MBUF_SC_CTL);
3670         }
3671
3672         error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
3673             &oifp, NULL);
3674
3675         IM6O_REMREF(im6o);
3676
3677         if (error) {
3678                 MLD_PRINTF(("%s: ip6_output(0x%llx) = %d\n", __func__,
3679                     (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
3680                 if (oifp != NULL) {
3681                         ifnet_release(oifp);
3682                 }
3683                 return;
3684         }
3685
3686         icmp6stat.icp6s_outhist[type]++;
3687         if (oifp != NULL) {
3688                 icmp6_ifstat_inc(oifp, ifs6_out_msg);
3689                 switch (type) {
3690                 case MLD_LISTENER_REPORT:
3691                 case MLDV2_LISTENER_REPORT:
3692                         icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3693                         break;
3694                 case MLD_LISTENER_DONE:
3695                         icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3696                         break;
3697                 }
3698                 ifnet_release(oifp);
3699         }
3700 }
3701
3702 /*
3703  * Encapsulate an MLDv2 report.
3704  *
3705  * KAME IPv6 requires that hop-by-hop options be passed separately,
3706  * and that the IPv6 header be prepended in a separate mbuf.
3707  *
3708  * Returns a pointer to the new mbuf chain head, or NULL if the
3709  * allocation failed.
3710  */
3711 static struct mbuf *
3712 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3713 {
3714         struct mbuf             *mh;
3715         struct mldv2_report     *mld;
3716         struct ip6_hdr          *ip6;
3717         struct in6_ifaddr       *ia;
3718         int                      mldreclen;
3719
3720         VERIFY(m->m_flags & M_PKTHDR);
3721
3722         /*
3723          * RFC3590: OK to send as :: or tentative during DAD.
3724          */
3725         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
3726         if (ia == NULL) {
3727                 MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
3728         }
3729
3730         MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3731         if (mh == NULL) {
3732                 if (ia != NULL) {
3733                         IFA_REMREF(&ia->ia_ifa);
3734                 }
3735                 m_freem(m);
3736                 return NULL;
3737         }
3738         MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3739
3740         mldreclen = m_length(m);
3741         MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
3742
3743         mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3744         mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3745             sizeof(struct mldv2_report) + mldreclen;
3746
3747         ip6 = mtod(mh, struct ip6_hdr *);
3748         ip6->ip6_flow = 0;
3749         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3750         ip6->ip6_vfc |= IPV6_VERSION;
3751         ip6->ip6_nxt = IPPROTO_ICMPV6;
3752         if (ia != NULL) {
3753                 IFA_LOCK(&ia->ia_ifa);
3754         }
3755         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3756         if (ia != NULL) {
3757                 IFA_UNLOCK(&ia->ia_ifa);
3758                 IFA_REMREF(&ia->ia_ifa);
3759                 ia = NULL;
3760         }
3761         ip6->ip6_dst = in6addr_linklocal_allv2routers;
3762         /* scope ID will be set in netisr */
3763
3764         mld = (struct mldv2_report *)(ip6 + 1);
3765         mld->mld_type = MLDV2_LISTENER_REPORT;
3766         mld->mld_code = 0;
3767         mld->mld_cksum = 0;
3768         mld->mld_v2_reserved = 0;
3769         mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3770         m->m_pkthdr.vt_nrecs = 0;
3771         m->m_flags &= ~M_PKTHDR;
3772
3773         mh->m_next = m;
3774         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3775             sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3776         return mh;
3777 }
3778
3779 #ifdef MLD_DEBUG
3780 static const char *
3781 mld_rec_type_to_str(const int type)
3782 {
3783         switch (type) {
3784         case MLD_CHANGE_TO_EXCLUDE_MODE:
3785                 return "TO_EX";
3786         case MLD_CHANGE_TO_INCLUDE_MODE:
3787                 return "TO_IN";
3788         case MLD_MODE_IS_EXCLUDE:
3789                 return "MODE_EX";
3790         case MLD_MODE_IS_INCLUDE:
3791                 return "MODE_IN";
3792         case MLD_ALLOW_NEW_SOURCES:
3793                 return "ALLOW_NEW";
3794         case MLD_BLOCK_OLD_SOURCES:
3795                 return "BLOCK_OLD";
3796         default:
3797                 break;
3798         }
3799         return "unknown";
3800 }
3801 #endif
3802
3803 void
3804 mld_init(void)
3805 {
3806         MLD_PRINTF(("%s: initializing\n", __func__));
3807
3808         /* Setup lock group and attribute for mld_mtx */
3809         mld_mtx_grp_attr = lck_grp_attr_alloc_init();
3810         mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
3811         mld_mtx_attr = lck_attr_alloc_init();
3812         lck_mtx_init(&mld_mtx, mld_mtx_grp, mld_mtx_attr);
3813
3814         ip6_initpktopts(&mld_po);
3815         mld_po.ip6po_hlim = 1;
3816         mld_po.ip6po_hbh = &mld_ra.hbh;
3817         mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3818         mld_po.ip6po_flags = IP6PO_DONTFRAG;
3819         LIST_INIT(&mli_head);
3820
3821         mli_size = sizeof(struct mld_ifinfo);
3822         mli_zone = zinit(mli_size, MLI_ZONE_MAX * mli_size,
3823             0, MLI_ZONE_NAME);
3824         if (mli_zone == NULL) {
3825                 panic("%s: failed allocating %s", __func__, MLI_ZONE_NAME);
3826                 /* NOTREACHED */
3827         }
3828         zone_change(mli_zone, Z_EXPAND, TRUE);
3829         zone_change(mli_zone, Z_CALLERACCT, FALSE);
3830 }