bsd/netinet6/mld6.c

   1 /*
   2  * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*-
  29  * Copyright (c) 2009 Bruce Simpson.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. The name of the author may not be used to endorse or promote
  40  *    products derived from this software without specific prior written
  41  *    permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  53  * SUCH DAMAGE.
  54  */
  55
  56 /*
  57  * Copyright (c) 1988 Stephen Deering.
  58  * Copyright (c) 1992, 1993
  59  *      The Regents of the University of California.  All rights reserved.
  60  *
  61  * This code is derived from software contributed to Berkeley by
  62  * Stephen Deering of Stanford University.
  63  *
  64  * Redistribution and use in source and binary forms, with or without
  65  * modification, are permitted provided that the following conditions
  66  * are met:
  67  * 1. Redistributions of source code must retain the above copyright
  68  *    notice, this list of conditions and the following disclaimer.
  69  * 2. Redistributions in binary form must reproduce the above copyright
  70  *    notice, this list of conditions and the following disclaimer in the
  71  *    documentation and/or other materials provided with the distribution.
  72  * 3. All advertising materials mentioning features or use of this software
  73  *    must display the following acknowledgement:
  74  *      This product includes software developed by the University of
  75  *      California, Berkeley and its contributors.
  76  * 4. Neither the name of the University nor the names of its contributors
  77  *    may be used to endorse or promote products derived from this software
  78  *    without specific prior written permission.
  79  *
  80  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  81  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  82  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  83  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  84  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  85  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  86  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  87  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  88  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  89  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  90  * SUCH DAMAGE.
  91  *
  92  *      @(#)igmp.c      8.1 (Berkeley) 7/19/93
  93  */
  94 /*
  95  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  96  * support for mandatory and extensible security protections.  This notice
  97  * is included in support of clause 2.2 (b) of the Apple Public License,
  98  * Version 2.0.
  99  */
 100
 101 #include <sys/cdefs.h>
 102
 103 #include <sys/param.h>
 104 #include <sys/systm.h>
 105 #include <sys/mbuf.h>
 106 #include <sys/socket.h>
 107 #include <sys/protosw.h>
 108 #include <sys/sysctl.h>
 109 #include <sys/kernel.h>
 110 #include <sys/malloc.h>
 111 #include <sys/mcache.h>
 112
 113 #include <dev/random/randomdev.h>
 114
 115 #include <kern/zalloc.h>
 116
 117 #include <net/if.h>
 118 #include <net/route.h>
 119
 120 #include <netinet/in.h>
 121 #include <netinet/in_var.h>
 122 #include <netinet6/in6_var.h>
 123 #include <netinet/ip6.h>
 124 #include <netinet6/ip6_var.h>
 125 #include <netinet6/scope6_var.h>
 126 #include <netinet/icmp6.h>
 127 #include <netinet6/mld6.h>
 128 #include <netinet6/mld6_var.h>
 129
 130 /* Lock group and attribute for mld_mtx */
 131 static lck_attr_t       *mld_mtx_attr;
 132 static lck_grp_t        *mld_mtx_grp;
 133 static lck_grp_attr_t   *mld_mtx_grp_attr;
 134
 135 /*
 136  * Locking and reference counting:
 137  *
 138  * mld_mtx mainly protects mli_head.  In cases where both mld_mtx and
 139  * in6_multihead_lock must be held, the former must be acquired first in order
 140  * to maintain lock ordering.  It is not a requirement that mld_mtx be
 141  * acquired first before in6_multihead_lock, but in case both must be acquired
 142  * in succession, the correct lock ordering must be followed.
 143  *
 144  * Instead of walking the if_multiaddrs list at the interface and returning
 145  * the ifma_protospec value of a matching entry, we search the global list
 146  * of in6_multi records and find it that way; this is done with in6_multihead
 147  * lock held.  Doing so avoids the race condition issues that many other BSDs
 148  * suffer from (therefore in our implementation, ifma_protospec will never be
 149  * NULL for as long as the in6_multi is valid.)
 150  *
 151  * The above creates a requirement for the in6_multi to stay in in6_multihead
 152  * list even after the final MLD leave (in MLDv2 mode) until no longer needs
 153  * be retransmitted (this is not required for MLDv1.)  In order to handle
 154  * this, the request and reference counts of the in6_multi are bumped up when
 155  * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
 156  * handler.  Each in6_multi holds a reference to the underlying mld_ifinfo.
 157  *
 158  * Thus, the permitted lock order is:
 159  *
 160  *      mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
 161  *
 162  * Any may be taken independently, but if any are held at the same time,
 163  * the above lock order must be followed.
 164  */
 165 static decl_lck_mtx_data(, mld_mtx);
 166
 167 SLIST_HEAD(mld_in6m_relhead, in6_multi);
 168
 169 static void     mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
 170 static struct mld_ifinfo *mli_alloc(int);
 171 static void     mli_free(struct mld_ifinfo *);
 172 static void     mli_delete(const struct ifnet *, struct mld_in6m_relhead *);
 173 static void     mld_dispatch_packet(struct mbuf *);
 174 static void     mld_final_leave(struct in6_multi *, struct mld_ifinfo *,
 175                     struct mld_tparams *);
 176 static int      mld_handle_state_change(struct in6_multi *, struct mld_ifinfo *,
 177                     struct mld_tparams *);
 178 static int      mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
 179                     struct mld_tparams *, const int);
 180 #ifdef MLD_DEBUG
 181 static const char *     mld_rec_type_to_str(const int);
 182 #endif
 183 static uint32_t mld_set_version(struct mld_ifinfo *, const int);
 184 static void     mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *);
 185 static void     mld_dispatch_queue(struct mld_ifinfo *, struct ifqueue *, int);
 186 static int      mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
 187                     /*const*/ struct mld_hdr *);
 188 static int      mld_v1_input_report(struct ifnet *, struct mbuf *,
 189                     const struct ip6_hdr *, /*const*/ struct mld_hdr *);
 190 static void     mld_v1_process_group_timer(struct in6_multi *, const int);
 191 static void     mld_v1_process_querier_timers(struct mld_ifinfo *);
 192 static int      mld_v1_transmit_report(struct in6_multi *, const int);
 193 static uint32_t mld_v1_update_group(struct in6_multi *, const int);
 194 static void     mld_v2_cancel_link_timers(struct mld_ifinfo *);
 195 static uint32_t mld_v2_dispatch_general_query(struct mld_ifinfo *);
 196 static struct mbuf *
 197                 mld_v2_encap_report(struct ifnet *, struct mbuf *);
 198 static int      mld_v2_enqueue_filter_change(struct ifqueue *,
 199                     struct in6_multi *);
 200 static int      mld_v2_enqueue_group_record(struct ifqueue *,
 201                     struct in6_multi *, const int, const int, const int,
 202                     const int);
 203 static int      mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
 204                     struct mbuf *, const int, const int);
 205 static int      mld_v2_merge_state_changes(struct in6_multi *,
 206                     struct ifqueue *);
 207 static void     mld_v2_process_group_timers(struct mld_ifinfo *,
 208                     struct ifqueue *, struct ifqueue *,
 209                     struct in6_multi *, const int);
 210 static int      mld_v2_process_group_query(struct in6_multi *,
 211                     int, struct mbuf *, const int);
 212 static int      sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
 213 static int      sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
 214 static int      sysctl_mld_v2enable SYSCTL_HANDLER_ARGS;
 215
 216 static int mld_timeout_run;             /* MLD timer is scheduled to run */
 217 static void mld_timeout(void *);
 218 static void mld_sched_timeout(void);
 219
 220 /*
 221  * Normative references: RFC 2710, RFC 3590, RFC 3810.
 222  */
 223 static struct timeval mld_gsrdelay = {10, 0};
 224 static LIST_HEAD(, mld_ifinfo) mli_head;
 225
 226 static int querier_present_timers_running6;
 227 static int interface_timers_running6;
 228 static int state_change_timers_running6;
 229 static int current_state_timers_running6;
 230
 231 /*
 232  * Subsystem lock macros.
 233  */
 234 #define MLD_LOCK()                      \
 235         lck_mtx_lock(&mld_mtx)
 236 #define MLD_LOCK_ASSERT_HELD()          \
 237         lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_OWNED)
 238 #define MLD_LOCK_ASSERT_NOTHELD()       \
 239         lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
 240 #define MLD_UNLOCK()                    \
 241         lck_mtx_unlock(&mld_mtx)
 242
 243 #define MLD_ADD_DETACHED_IN6M(_head, _in6m) {                           \
 244         SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle);                     \
 245 }
 246
 247 #define MLD_REMOVE_DETACHED_IN6M(_head) {                               \
 248         struct in6_multi *_in6m, *_inm_tmp;                             \
 249         SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) {         \
 250                 SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle);       \
 251                 IN6M_REMREF(_in6m);                                     \
 252         }                                                               \
 253         VERIFY(SLIST_EMPTY(_head));                                     \
 254 }
 255
 256 #define MLI_ZONE_MAX            64              /* maximum elements in zone */
 257 #define MLI_ZONE_NAME           "mld_ifinfo"    /* zone name */
 258
 259 static unsigned int mli_size;                   /* size of zone element */
 260 static struct zone *mli_zone;                   /* zone for mld_ifinfo */
 261
 262 SYSCTL_DECL(_net_inet6);        /* Note: Not in any common header. */
 263
 264 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
 265     "IPv6 Multicast Listener Discovery");
 266 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
 267     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 268     &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
 269     "Rate limit for MLDv2 Group-and-Source queries in seconds");
 270
 271 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
 272    sysctl_mld_ifinfo, "Per-interface MLDv2 state");
 273
 274 static int      mld_v1enable = 1;
 275 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
 276     &mld_v1enable, 0, "Enable fallback to MLDv1");
 277
 278 static int      mld_v2enable = 1;
 279 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, v2enable,
 280     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 281     &mld_v2enable, 0, sysctl_mld_v2enable, "I",
 282     "Enable MLDv2 (debug purposes only)");
 283
 284 static int      mld_use_allow = 1;
 285 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
 286     &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
 287
 288 #ifdef MLD_DEBUG
 289 int mld_debug = 0;
 290 SYSCTL_INT(_net_inet6_mld, OID_AUTO,
 291         debug, CTLFLAG_RW | CTLFLAG_LOCKED,     &mld_debug, 0, "");
 292 #endif
 293 /*
 294  * Packed Router Alert option structure declaration.
 295  */
 296 struct mld_raopt {
 297         struct ip6_hbh          hbh;
 298         struct ip6_opt          pad;
 299         struct ip6_opt_router   ra;
 300 } __packed;
 301
 302 /*
 303  * Router Alert hop-by-hop option header.
 304  */
 305 static struct mld_raopt mld_ra = {
 306         .hbh = { 0, 0 },
 307         .pad = { .ip6o_type = IP6OPT_PADN, 0 },
 308         .ra = {
 309             .ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
 310             .ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
 311             .ip6or_value =  {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
 312                 (IP6OPT_RTALERT_MLD & 0xFF) }
 313         }
 314 };
 315 static struct ip6_pktopts mld_po;
 316
 317 /* Store MLDv2 record count in the module private scratch space */
 318 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
 319
 320 static __inline void
 321 mld_save_context(struct mbuf *m, struct ifnet *ifp)
 322 {
 323         m->m_pkthdr.rcvif = ifp;
 324 }
 325
 326 static __inline void
 327 mld_scrub_context(struct mbuf *m)
 328 {
 329         m->m_pkthdr.rcvif = NULL;
 330 }
 331
 332 /*
 333  * Restore context from a queued output chain.
 334  * Return saved ifp.
 335  */
 336 static __inline struct ifnet *
 337 mld_restore_context(struct mbuf *m)
 338 {
 339         return (m->m_pkthdr.rcvif);
 340 }
 341
 342 /*
 343  * Retrieve or set threshold between group-source queries in seconds.
 344  */
 345 static int
 346 sysctl_mld_gsr SYSCTL_HANDLER_ARGS
 347 {
 348 #pragma unused(arg1, arg2)
 349         int error;
 350         int i;
 351
 352         MLD_LOCK();
 353
 354         i = mld_gsrdelay.tv_sec;
 355
 356         error = sysctl_handle_int(oidp, &i, 0, req);
 357         if (error || !req->newptr)
 358                 goto out_locked;
 359
 360         if (i < -1 || i >= 60) {
 361                 error = EINVAL;
 362                 goto out_locked;
 363         }
 364
 365         mld_gsrdelay.tv_sec = i;
 366
 367 out_locked:
 368         MLD_UNLOCK();
 369         return (error);
 370 }
 371 /*
 372  * Expose struct mld_ifinfo to userland, keyed by ifindex.
 373  * For use by ifmcstat(8).
 374  *
 375  */
 376 static int
 377 sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
 378 {
 379 #pragma unused(oidp)
 380         int                     *name;
 381         int                      error;
 382         u_int                    namelen;
 383         struct ifnet            *ifp;
 384         struct mld_ifinfo       *mli;
 385         struct mld_ifinfo_u     mli_u;
 386
 387         name = (int *)arg1;
 388         namelen = arg2;
 389
 390         if (req->newptr != USER_ADDR_NULL)
 391                 return (EPERM);
 392
 393         if (namelen != 1)
 394                 return (EINVAL);
 395
 396         MLD_LOCK();
 397
 398         if (name[0] <= 0 || name[0] > (u_int)if_index) {
 399                 error = ENOENT;
 400                 goto out_locked;
 401         }
 402
 403         error = ENOENT;
 404
 405         ifnet_head_lock_shared();
 406         ifp = ifindex2ifnet[name[0]];
 407         ifnet_head_done();
 408         if (ifp == NULL)
 409                 goto out_locked;
 410
 411         bzero(&mli_u, sizeof (mli_u));
 412
 413         LIST_FOREACH(mli, &mli_head, mli_link) {
 414                 MLI_LOCK(mli);
 415                 if (ifp != mli->mli_ifp) {
 416                         MLI_UNLOCK(mli);
 417                         continue;
 418                 }
 419
 420                 mli_u.mli_ifindex = mli->mli_ifp->if_index;
 421                 mli_u.mli_version = mli->mli_version;
 422                 mli_u.mli_v1_timer = mli->mli_v1_timer;
 423                 mli_u.mli_v2_timer = mli->mli_v2_timer;
 424                 mli_u.mli_flags = mli->mli_flags;
 425                 mli_u.mli_rv = mli->mli_rv;
 426                 mli_u.mli_qi = mli->mli_qi;
 427                 mli_u.mli_qri = mli->mli_qri;
 428                 mli_u.mli_uri = mli->mli_uri;
 429                 MLI_UNLOCK(mli);
 430
 431                 error = SYSCTL_OUT(req, &mli_u, sizeof (mli_u));
 432                 break;
 433         }
 434
 435 out_locked:
 436         MLD_UNLOCK();
 437         return (error);
 438 }
 439
 440 static int
 441 sysctl_mld_v2enable SYSCTL_HANDLER_ARGS
 442 {
 443 #pragma unused(arg1, arg2)
 444         int error;
 445         int i;
 446         struct mld_ifinfo *mli;
 447         struct mld_tparams mtp = { 0, 0, 0, 0 };
 448
 449         MLD_LOCK();
 450
 451         i = mld_v2enable;
 452
 453         error = sysctl_handle_int(oidp, &i, 0, req);
 454         if (error || !req->newptr)
 455                 goto out_locked;
 456
 457         if (i < 0 || i > 1) {
 458                 error = EINVAL;
 459                 goto out_locked;
 460         }
 461
 462         mld_v2enable = i;
 463         /*
 464          * If we enabled v2, the state transition will take care of upgrading
 465          * the MLD version back to v2. Otherwise, we have to explicitly
 466          * downgrade. Note that this functionality is to be used for debugging.
 467          */
 468         if (mld_v2enable == 1)
 469                 goto out_locked;
 470
 471         LIST_FOREACH(mli, &mli_head, mli_link) {
 472                 MLI_LOCK(mli);
 473                 if (mld_set_version(mli, MLD_VERSION_1) > 0)
 474                         mtp.qpt = 1;
 475                 MLI_UNLOCK(mli);
 476         }
 477
 478 out_locked:
 479         MLD_UNLOCK();
 480
 481         mld_set_timeout(&mtp);
 482
 483         return (error);
 484 }
 485
 486 /*
 487  * Dispatch an entire queue of pending packet chains.
 488  *
 489  * Must not be called with in6m_lock held.
 490  */
 491 static void
 492 mld_dispatch_queue(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
 493 {
 494         struct mbuf *m;
 495
 496         if (mli != NULL)
 497                 MLI_LOCK_ASSERT_HELD(mli);
 498
 499         for (;;) {
 500                 IF_DEQUEUE(ifq, m);
 501                 if (m == NULL)
 502                         break;
 503                 MLD_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
 504                     (uint64_t)VM_KERNEL_ADDRPERM(ifq),
 505                     (uint64_t)VM_KERNEL_ADDRPERM(m)));
 506                 if (mli != NULL)
 507                         MLI_UNLOCK(mli);
 508                 mld_dispatch_packet(m);
 509                 if (mli != NULL)
 510                         MLI_LOCK(mli);
 511                 if (--limit == 0)
 512                         break;
 513         }
 514
 515         if (mli != NULL)
 516                 MLI_LOCK_ASSERT_HELD(mli);
 517 }
 518
 519 /*
 520  * Filter outgoing MLD report state by group.
 521  *
 522  * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
 523  * and node-local addresses. However, kernel and socket consumers
 524  * always embed the KAME scope ID in the address provided, so strip it
 525  * when performing comparison.
 526  * Note: This is not the same as the *multicast* scope.
 527  *
 528  * Return zero if the given group is one for which MLD reports
 529  * should be suppressed, or non-zero if reports should be issued.
 530  */
 531 static __inline__ int
 532 mld_is_addr_reported(const struct in6_addr *addr)
 533 {
 534
 535         VERIFY(IN6_IS_ADDR_MULTICAST(addr));
 536
 537         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
 538                 return (0);
 539
 540         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
 541                 struct in6_addr tmp = *addr;
 542                 in6_clearscope(&tmp);
 543                 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
 544                         return (0);
 545         }
 546
 547         return (1);
 548 }
 549
 550 /*
 551  * Attach MLD when PF_INET6 is attached to an interface.
 552  */
 553 struct mld_ifinfo *
 554 mld_domifattach(struct ifnet *ifp, int how)
 555 {
 556         struct mld_ifinfo *mli;
 557
 558         MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
 559             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 560
 561         mli = mli_alloc(how);
 562         if (mli == NULL)
 563                 return (NULL);
 564
 565         MLD_LOCK();
 566
 567         MLI_LOCK(mli);
 568         mli_initvar(mli, ifp, 0);
 569         mli->mli_debug |= IFD_ATTACHED;
 570         MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
 571         MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
 572         MLI_UNLOCK(mli);
 573         ifnet_lock_shared(ifp);
 574         mld6_initsilent(ifp, mli);
 575         ifnet_lock_done(ifp);
 576
 577         LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 578
 579         MLD_UNLOCK();
 580
 581         MLD_PRINTF(("%s: allocate mld_ifinfo for ifp 0x%llx(%s)\n",
 582             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 583
 584         return (mli);
 585 }
 586
 587 /*
 588  * Attach MLD when PF_INET6 is reattached to an interface.  Caller is
 589  * expected to have an outstanding reference to the mli.
 590  */
 591 void
 592 mld_domifreattach(struct mld_ifinfo *mli)
 593 {
 594         struct ifnet *ifp;
 595
 596         MLD_LOCK();
 597
 598         MLI_LOCK(mli);
 599         VERIFY(!(mli->mli_debug & IFD_ATTACHED));
 600         ifp = mli->mli_ifp;
 601         VERIFY(ifp != NULL);
 602         mli_initvar(mli, ifp, 1);
 603         mli->mli_debug |= IFD_ATTACHED;
 604         MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
 605         MLI_UNLOCK(mli);
 606         ifnet_lock_shared(ifp);
 607         mld6_initsilent(ifp, mli);
 608         ifnet_lock_done(ifp);
 609
 610         LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 611
 612         MLD_UNLOCK();
 613
 614         MLD_PRINTF(("%s: reattached mld_ifinfo for ifp 0x%llx(%s)\n",
 615             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 616 }
 617
 618 /*
 619  * Hook for domifdetach.
 620  */
 621 void
 622 mld_domifdetach(struct ifnet *ifp)
 623 {
 624         SLIST_HEAD(, in6_multi) in6m_dthead;
 625
 626         SLIST_INIT(&in6m_dthead);
 627
 628         MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
 629             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 630
 631         MLD_LOCK();
 632         mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
 633         MLD_UNLOCK();
 634
 635         /* Now that we're dropped all locks, release detached records */
 636         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
 637 }
 638
 639 /*
 640  * Called at interface detach time.  Note that we only flush all deferred
 641  * responses and record releases; all remaining inm records and their source
 642  * entries related to this interface are left intact, in order to handle
 643  * the reattach case.
 644  */
 645 static void
 646 mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
 647 {
 648         struct mld_ifinfo *mli, *tmli;
 649
 650         MLD_LOCK_ASSERT_HELD();
 651
 652         LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
 653                 MLI_LOCK(mli);
 654                 if (mli->mli_ifp == ifp) {
 655                         /*
 656                          * Free deferred General Query responses.
 657                          */
 658                         IF_DRAIN(&mli->mli_gq);
 659                         IF_DRAIN(&mli->mli_v1q);
 660                         mld_flush_relq(mli, in6m_dthead);
 661                         VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
 662                         mli->mli_debug &= ~IFD_ATTACHED;
 663                         MLI_UNLOCK(mli);
 664
 665                         LIST_REMOVE(mli, mli_link);
 666                         MLI_REMREF(mli); /* release mli_head reference */
 667                         return;
 668                 }
 669                 MLI_UNLOCK(mli);
 670         }
 671         panic("%s: mld_ifinfo not found for ifp %p(%s)\n", __func__,
 672             ifp, ifp->if_xname);
 673 }
 674
 675 __private_extern__ void
 676 mld6_initsilent(struct ifnet *ifp, struct mld_ifinfo *mli)
 677 {
 678         ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
 679
 680         MLI_LOCK_ASSERT_NOTHELD(mli);
 681         MLI_LOCK(mli);
 682         if (!(ifp->if_flags & IFF_MULTICAST) &&
 683             (ifp->if_eflags & (IFEF_IPV6_ND6ALT|IFEF_LOCALNET_PRIVATE)))
 684                 mli->mli_flags |= MLIF_SILENT;
 685         else
 686                 mli->mli_flags &= ~MLIF_SILENT;
 687         MLI_UNLOCK(mli);
 688 }
 689
 690 static void
 691 mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
 692 {
 693         MLI_LOCK_ASSERT_HELD(mli);
 694
 695         mli->mli_ifp = ifp;
 696         if (mld_v2enable)
 697                 mli->mli_version = MLD_VERSION_2;
 698         else
 699                 mli->mli_version = MLD_VERSION_1;
 700         mli->mli_flags = 0;
 701         mli->mli_rv = MLD_RV_INIT;
 702         mli->mli_qi = MLD_QI_INIT;
 703         mli->mli_qri = MLD_QRI_INIT;
 704         mli->mli_uri = MLD_URI_INIT;
 705
 706         if (mld_use_allow)
 707                 mli->mli_flags |= MLIF_USEALLOW;
 708         if (!reattach)
 709                 SLIST_INIT(&mli->mli_relinmhead);
 710
 711         /*
 712          * Responses to general queries are subject to bounds.
 713          */
 714         mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
 715         mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
 716 }
 717
 718 static struct mld_ifinfo *
 719 mli_alloc(int how)
 720 {
 721         struct mld_ifinfo *mli;
 722
 723         mli = (how == M_WAITOK) ? zalloc(mli_zone) : zalloc_noblock(mli_zone);
 724         if (mli != NULL) {
 725                 bzero(mli, mli_size);
 726                 lck_mtx_init(&mli->mli_lock, mld_mtx_grp, mld_mtx_attr);
 727                 mli->mli_debug |= IFD_ALLOC;
 728         }
 729         return (mli);
 730 }
 731
 732 static void
 733 mli_free(struct mld_ifinfo *mli)
 734 {
 735         MLI_LOCK(mli);
 736         if (mli->mli_debug & IFD_ATTACHED) {
 737                 panic("%s: attached mli=%p is being freed", __func__, mli);
 738                 /* NOTREACHED */
 739         } else if (mli->mli_ifp != NULL) {
 740                 panic("%s: ifp not NULL for mli=%p", __func__, mli);
 741                 /* NOTREACHED */
 742         } else if (!(mli->mli_debug & IFD_ALLOC)) {
 743                 panic("%s: mli %p cannot be freed", __func__, mli);
 744                 /* NOTREACHED */
 745         } else if (mli->mli_refcnt != 0) {
 746                 panic("%s: non-zero refcnt mli=%p", __func__, mli);
 747                 /* NOTREACHED */
 748         }
 749         mli->mli_debug &= ~IFD_ALLOC;
 750         MLI_UNLOCK(mli);
 751
 752         lck_mtx_destroy(&mli->mli_lock, mld_mtx_grp);
 753         zfree(mli_zone, mli);
 754 }
 755
 756 void
 757 mli_addref(struct mld_ifinfo *mli, int locked)
 758 {
 759         if (!locked)
 760                 MLI_LOCK_SPIN(mli);
 761         else
 762                 MLI_LOCK_ASSERT_HELD(mli);
 763
 764         if (++mli->mli_refcnt == 0) {
 765                 panic("%s: mli=%p wraparound refcnt", __func__, mli);
 766                 /* NOTREACHED */
 767         }
 768         if (!locked)
 769                 MLI_UNLOCK(mli);
 770 }
 771
 772 void
 773 mli_remref(struct mld_ifinfo *mli)
 774 {
 775         SLIST_HEAD(, in6_multi) in6m_dthead;
 776         struct ifnet *ifp;
 777
 778         MLI_LOCK_SPIN(mli);
 779
 780         if (mli->mli_refcnt == 0) {
 781                 panic("%s: mli=%p negative refcnt", __func__, mli);
 782                 /* NOTREACHED */
 783         }
 784
 785         --mli->mli_refcnt;
 786         if (mli->mli_refcnt > 0) {
 787                 MLI_UNLOCK(mli);
 788                 return;
 789         }
 790
 791         ifp = mli->mli_ifp;
 792         mli->mli_ifp = NULL;
 793         IF_DRAIN(&mli->mli_gq);
 794         IF_DRAIN(&mli->mli_v1q);
 795         SLIST_INIT(&in6m_dthead);
 796         mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
 797         VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
 798         MLI_UNLOCK(mli);
 799
 800         /* Now that we're dropped all locks, release detached records */
 801         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
 802
 803         MLD_PRINTF(("%s: freeing mld_ifinfo for ifp 0x%llx(%s)\n",
 804             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 805
 806         mli_free(mli);
 807 }
 808
 809 /*
 810  * Process a received MLDv1 general or address-specific query.
 811  * Assumes that the query header has been pulled up to sizeof(mld_hdr).
 812  *
 813  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
 814  * mld_addr. This is OK as we own the mbuf chain.
 815  */
 816 static int
 817 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
 818     /*const*/ struct mld_hdr *mld)
 819 {
 820         struct mld_ifinfo       *mli;
 821         struct in6_multi        *inm;
 822         int                      err = 0, is_general_query;
 823         uint16_t                 timer;
 824         struct mld_tparams       mtp = { 0, 0, 0, 0 };
 825
 826         MLD_LOCK_ASSERT_NOTHELD();
 827
 828         is_general_query = 0;
 829
 830         if (!mld_v1enable) {
 831                 MLD_PRINTF(("%s: ignore v1 query %s on ifp 0x%llx(%s)\n",
 832                     __func__, ip6_sprintf(&mld->mld_addr),
 833                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 834                 goto done;
 835         }
 836
 837         /*
 838          * RFC3810 Section 6.2: MLD queries must originate from
 839          * a router's link-local address.
 840          */
 841         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 842                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
 843                     __func__, ip6_sprintf(&ip6->ip6_src),
 844                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 845                 goto done;
 846         }
 847
 848         /*
 849          * Do address field validation upfront before we accept
 850          * the query.
 851          */
 852         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
 853                 /*
 854                  * MLDv1 General Query.
 855                  * If this was not sent to the all-nodes group, ignore it.
 856                  */
 857                 struct in6_addr          dst;
 858
 859                 dst = ip6->ip6_dst;
 860                 in6_clearscope(&dst);
 861                 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) {
 862                         err = EINVAL;
 863                         goto done;
 864                 }
 865                 is_general_query = 1;
 866         } else {
 867                 /*
 868                  * Embed scope ID of receiving interface in MLD query for
 869                  * lookup whilst we don't hold other locks.
 870                  */
 871                 in6_setscope(&mld->mld_addr, ifp, NULL);
 872         }
 873
 874         /*
 875          * Switch to MLDv1 host compatibility mode.
 876          */
 877         mli = MLD_IFINFO(ifp);
 878         VERIFY(mli != NULL);
 879
 880         MLI_LOCK(mli);
 881         mtp.qpt = mld_set_version(mli, MLD_VERSION_1);
 882         MLI_UNLOCK(mli);
 883
 884         timer = ntohs(mld->mld_maxdelay) / MLD_TIMER_SCALE;
 885         if (timer == 0)
 886                 timer = 1;
 887
 888         if (is_general_query) {
 889                 struct in6_multistep step;
 890
 891                 MLD_PRINTF(("%s: process v1 general query on ifp 0x%llx(%s)\n",
 892                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 893                 /*
 894                  * For each reporting group joined on this
 895                  * interface, kick the report timer.
 896                  */
 897                 in6_multihead_lock_shared();
 898                 IN6_FIRST_MULTI(step, inm);
 899                 while (inm != NULL) {
 900                         IN6M_LOCK(inm);
 901                         if (inm->in6m_ifp == ifp)
 902                                 mtp.cst += mld_v1_update_group(inm, timer);
 903                         IN6M_UNLOCK(inm);
 904                         IN6_NEXT_MULTI(step, inm);
 905                 }
 906                 in6_multihead_lock_done();
 907         } else {
 908                 /*
 909                  * MLDv1 Group-Specific Query.
 910                  * If this is a group-specific MLDv1 query, we need only
 911                  * look up the single group to process it.
 912                  */
 913                 in6_multihead_lock_shared();
 914                 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
 915                 in6_multihead_lock_done();
 916
 917                 if (inm != NULL) {
 918                         IN6M_LOCK(inm);
 919                         MLD_PRINTF(("%s: process v1 query %s on "
 920                             "ifp 0x%llx(%s)\n", __func__,
 921                             ip6_sprintf(&mld->mld_addr),
 922                             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 923                         mtp.cst = mld_v1_update_group(inm, timer);
 924                         IN6M_UNLOCK(inm);
 925                         IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
 926                 }
 927                 /* XXX Clear embedded scope ID as userland won't expect it. */
 928                 in6_clearscope(&mld->mld_addr);
 929         }
 930 done:
 931         mld_set_timeout(&mtp);
 932
 933         return (err);
 934 }
 935
 936 /*
 937  * Update the report timer on a group in response to an MLDv1 query.
 938  *
 939  * If we are becoming the reporting member for this group, start the timer.
 940  * If we already are the reporting member for this group, and timer is
 941  * below the threshold, reset it.
 942  *
 943  * We may be updating the group for the first time since we switched
 944  * to MLDv2. If we are, then we must clear any recorded source lists,
 945  * and transition to REPORTING state; the group timer is overloaded
 946  * for group and group-source query responses.
 947  *
 948  * Unlike MLDv2, the delay per group should be jittered
 949  * to avoid bursts of MLDv1 reports.
 950  */
 951 static uint32_t
 952 mld_v1_update_group(struct in6_multi *inm, const int timer)
 953 {
 954         IN6M_LOCK_ASSERT_HELD(inm);
 955
 956         MLD_PRINTF(("%s: %s/%s timer=%d\n", __func__,
 957             ip6_sprintf(&inm->in6m_addr),
 958             if_name(inm->in6m_ifp), timer));
 959
 960         switch (inm->in6m_state) {
 961         case MLD_NOT_MEMBER:
 962         case MLD_SILENT_MEMBER:
 963                 break;
 964         case MLD_REPORTING_MEMBER:
 965                 if (inm->in6m_timer != 0 &&
 966                     inm->in6m_timer <= timer) {
 967                         MLD_PRINTF(("%s: REPORTING and timer running, "
 968                             "skipping.\n", __func__));
 969                         break;
 970                 }
 971                 /* FALLTHROUGH */
 972         case MLD_SG_QUERY_PENDING_MEMBER:
 973         case MLD_G_QUERY_PENDING_MEMBER:
 974         case MLD_IDLE_MEMBER:
 975         case MLD_LAZY_MEMBER:
 976         case MLD_AWAKENING_MEMBER:
 977                 MLD_PRINTF(("%s: ->REPORTING\n", __func__));
 978                 inm->in6m_state = MLD_REPORTING_MEMBER;
 979                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
 980                 break;
 981         case MLD_SLEEPING_MEMBER:
 982                 MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
 983                 inm->in6m_state = MLD_AWAKENING_MEMBER;
 984                 break;
 985         case MLD_LEAVING_MEMBER:
 986                 break;
 987         }
 988
 989         return (inm->in6m_timer);
 990 }
 991
 992 /*
 993  * Process a received MLDv2 general, group-specific or
 994  * group-and-source-specific query.
 995  *
 996  * Assumes that the query header has been pulled up to sizeof(mldv2_query).
 997  *
 998  * Return 0 if successful, otherwise an appropriate error code is returned.
 999  */
1000 static int
1001 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
1002     struct mbuf *m, const int off, const int icmp6len)
1003 {
1004         struct mld_ifinfo       *mli;
1005         struct mldv2_query      *mld;
1006         struct in6_multi        *inm;
1007         uint32_t                 maxdelay, nsrc, qqi;
1008         int                      err = 0, is_general_query;
1009         uint16_t                 timer;
1010         uint8_t                  qrv;
1011         struct mld_tparams       mtp = { 0, 0, 0, 0 };
1012
1013         MLD_LOCK_ASSERT_NOTHELD();
1014
1015         is_general_query = 0;
1016
1017         if (!mld_v2enable) {
1018                 MLD_PRINTF(("%s: ignore v2 query %s on ifp 0x%llx(%s)\n",
1019                     __func__, ip6_sprintf(&ip6->ip6_src),
1020                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1021                 goto done;
1022         }
1023
1024         /*
1025          * RFC3810 Section 6.2: MLD queries must originate from
1026          * a router's link-local address.
1027          */
1028         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
1029                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1030                     __func__, ip6_sprintf(&ip6->ip6_src),
1031                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1032                 goto done;
1033         }
1034
1035         MLD_PRINTF(("%s: input v2 query on ifp 0x%llx(%s)\n", __func__,
1036             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1037
1038         mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
1039
1040         maxdelay = ntohs(mld->mld_maxdelay);    /* in 1/10ths of a second */
1041         if (maxdelay >= 32768) {
1042                 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
1043                            (MLD_MRC_EXP(maxdelay) + 3);
1044         }
1045         timer = maxdelay / MLD_TIMER_SCALE;
1046         if (timer == 0)
1047                 timer = 1;
1048
1049         qrv = MLD_QRV(mld->mld_misc);
1050         if (qrv < 2) {
1051                 MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1052                     qrv, MLD_RV_INIT));
1053                 qrv = MLD_RV_INIT;
1054         }
1055
1056         qqi = mld->mld_qqi;
1057         if (qqi >= 128) {
1058                 qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
1059                      (MLD_QQIC_EXP(mld->mld_qqi) + 3);
1060         }
1061
1062         nsrc = ntohs(mld->mld_numsrc);
1063         if (nsrc > MLD_MAX_GS_SOURCES) {
1064                 err = EMSGSIZE;
1065                 goto done;
1066         }
1067         if (icmp6len < sizeof(struct mldv2_query) +
1068             (nsrc * sizeof(struct in6_addr))) {
1069                 err = EMSGSIZE;
1070                 goto done;
1071         }
1072
1073         /*
1074          * Do further input validation upfront to avoid resetting timers
1075          * should we need to discard this query.
1076          */
1077         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
1078                 /*
1079                  * A general query with a source list has undefined
1080                  * behaviour; discard it.
1081                  */
1082                 if (nsrc > 0) {
1083                         err = EINVAL;
1084                         goto done;
1085                 }
1086                 is_general_query = 1;
1087         } else {
1088                 /*
1089                  * Embed scope ID of receiving interface in MLD query for
1090                  * lookup whilst we don't hold other locks (due to KAME
1091                  * locking lameness). We own this mbuf chain just now.
1092                  */
1093                 in6_setscope(&mld->mld_addr, ifp, NULL);
1094         }
1095
1096         mli = MLD_IFINFO(ifp);
1097         VERIFY(mli != NULL);
1098
1099         MLI_LOCK(mli);
1100         /*
1101          * Discard the v2 query if we're in Compatibility Mode.
1102          * The RFC is pretty clear that hosts need to stay in MLDv1 mode
1103          * until the Old Version Querier Present timer expires.
1104          */
1105         if (mli->mli_version != MLD_VERSION_2) {
1106                 MLI_UNLOCK(mli);
1107                 goto done;
1108         }
1109
1110         mtp.qpt = mld_set_version(mli, MLD_VERSION_2);
1111         mli->mli_rv = qrv;
1112         mli->mli_qi = qqi;
1113         mli->mli_qri = MAX(timer, MLD_QRI_MIN);
1114
1115         MLD_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, mli->mli_rv,
1116             mli->mli_qi, mli->mli_qri));
1117
1118         if (is_general_query) {
1119                 /*
1120                  * MLDv2 General Query.
1121                  *
1122                  * Schedule a current-state report on this ifp for
1123                  * all groups, possibly containing source lists.
1124                  *
1125                  * If there is a pending General Query response
1126                  * scheduled earlier than the selected delay, do
1127                  * not schedule any other reports.
1128                  * Otherwise, reset the interface timer.
1129                  */
1130                 MLD_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1131                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1132                 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1133                         mtp.it = mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
1134                 }
1135                 MLI_UNLOCK(mli);
1136         } else {
1137                 MLI_UNLOCK(mli);
1138                 /*
1139                  * MLDv2 Group-specific or Group-and-source-specific Query.
1140                  *
1141                  * Group-source-specific queries are throttled on
1142                  * a per-group basis to defeat denial-of-service attempts.
1143                  * Queries for groups we are not a member of on this
1144                  * link are simply ignored.
1145                  */
1146                 in6_multihead_lock_shared();
1147                 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1148                 in6_multihead_lock_done();
1149                 if (inm == NULL)
1150                         goto done;
1151
1152                 IN6M_LOCK(inm);
1153                 if (nsrc > 0) {
1154                         if (!ratecheck(&inm->in6m_lastgsrtv,
1155                             &mld_gsrdelay)) {
1156                                 MLD_PRINTF(("%s: GS query throttled.\n",
1157                                     __func__));
1158                                 IN6M_UNLOCK(inm);
1159                                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1160                                 goto done;
1161                         }
1162                 }
1163                 MLD_PRINTF(("%s: process v2 group query on ifp 0x%llx(%s)\n",
1164                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1165                 /*
1166                  * If there is a pending General Query response
1167                  * scheduled sooner than the selected delay, no
1168                  * further report need be scheduled.
1169                  * Otherwise, prepare to respond to the
1170                  * group-specific or group-and-source query.
1171                  */
1172                 MLI_LOCK(mli);
1173                 mtp.it = mli->mli_v2_timer;
1174                 MLI_UNLOCK(mli);
1175                 if (mtp.it == 0 || mtp.it >= timer) {
1176                         (void) mld_v2_process_group_query(inm, timer, m, off);
1177                         mtp.cst = inm->in6m_timer;
1178                 }
1179                 IN6M_UNLOCK(inm);
1180                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1181                 /* XXX Clear embedded scope ID as userland won't expect it. */
1182                 in6_clearscope(&mld->mld_addr);
1183         }
1184 done:
1185         if (mtp.it > 0) {
1186                 MLD_PRINTF(("%s: v2 general query response scheduled in "
1187                     "T+%d seconds on ifp 0x%llx(%s)\n", __func__, mtp.it,
1188                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1189         }
1190         mld_set_timeout(&mtp);
1191
1192         return (err);
1193 }
1194
1195 /*
1196  * Process a recieved MLDv2 group-specific or group-and-source-specific
1197  * query.
1198  * Return <0 if any error occured. Currently this is ignored.
1199  */
1200 static int
1201 mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
1202     const int off)
1203 {
1204         struct mldv2_query      *mld;
1205         int                      retval;
1206         uint16_t                 nsrc;
1207
1208         IN6M_LOCK_ASSERT_HELD(inm);
1209
1210         retval = 0;
1211         mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
1212
1213         switch (inm->in6m_state) {
1214         case MLD_NOT_MEMBER:
1215         case MLD_SILENT_MEMBER:
1216         case MLD_SLEEPING_MEMBER:
1217         case MLD_LAZY_MEMBER:
1218         case MLD_AWAKENING_MEMBER:
1219         case MLD_IDLE_MEMBER:
1220         case MLD_LEAVING_MEMBER:
1221                 return (retval);
1222         case MLD_REPORTING_MEMBER:
1223         case MLD_G_QUERY_PENDING_MEMBER:
1224         case MLD_SG_QUERY_PENDING_MEMBER:
1225                 break;
1226         }
1227
1228         nsrc = ntohs(mld->mld_numsrc);
1229
1230         /*
1231          * Deal with group-specific queries upfront.
1232          * If any group query is already pending, purge any recorded
1233          * source-list state if it exists, and schedule a query response
1234          * for this group-specific query.
1235          */
1236         if (nsrc == 0) {
1237                 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1238                     inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1239                         in6m_clear_recorded(inm);
1240                         timer = min(inm->in6m_timer, timer);
1241                 }
1242                 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1243                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1244                 return (retval);
1245         }
1246
1247         /*
1248          * Deal with the case where a group-and-source-specific query has
1249          * been received but a group-specific query is already pending.
1250          */
1251         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1252                 timer = min(inm->in6m_timer, timer);
1253                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1254                 return (retval);
1255         }
1256
1257         /*
1258          * Finally, deal with the case where a group-and-source-specific
1259          * query has been received, where a response to a previous g-s-r
1260          * query exists, or none exists.
1261          * In this case, we need to parse the source-list which the Querier
1262          * has provided us with and check if we have any source list filter
1263          * entries at T1 for these sources. If we do not, there is no need
1264          * schedule a report and the query may be dropped.
1265          * If we do, we must record them and schedule a current-state
1266          * report for those sources.
1267          */
1268         if (inm->in6m_nsrc > 0) {
1269                 struct mbuf             *m;
1270                 uint8_t                 *sp;
1271                 int                      i, nrecorded;
1272                 int                      soff;
1273
1274                 m = m0;
1275                 soff = off + sizeof(struct mldv2_query);
1276                 nrecorded = 0;
1277                 for (i = 0; i < nsrc; i++) {
1278                         sp = mtod(m, uint8_t *) + soff;
1279                         retval = in6m_record_source(inm,
1280                             (const struct in6_addr *)(void *)sp);
1281                         if (retval < 0)
1282                                 break;
1283                         nrecorded += retval;
1284                         soff += sizeof(struct in6_addr);
1285                         if (soff >= m->m_len) {
1286                                 soff = soff - m->m_len;
1287                                 m = m->m_next;
1288                                 if (m == NULL)
1289                                         break;
1290                         }
1291                 }
1292                 if (nrecorded > 0) {
1293                         MLD_PRINTF(( "%s: schedule response to SG query\n",
1294                             __func__));
1295                         inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1296                         inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1297                 }
1298         }
1299
1300         return (retval);
1301 }
1302
1303 /*
1304  * Process a received MLDv1 host membership report.
1305  * Assumes mld points to mld_hdr in pulled up mbuf chain.
1306  *
1307  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1308  * mld_addr. This is OK as we own the mbuf chain.
1309  */
1310 static int
1311 mld_v1_input_report(struct ifnet *ifp, struct mbuf *m,
1312     const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld)
1313 {
1314         struct in6_addr          src, dst;
1315         struct in6_ifaddr       *ia;
1316         struct in6_multi        *inm;
1317
1318         if (!mld_v1enable) {
1319                 MLD_PRINTF(("%s: ignore v1 report %s on ifp 0x%llx(%s)\n",
1320                     __func__, ip6_sprintf(&mld->mld_addr),
1321                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1322                 return (0);
1323         }
1324
1325         if ((ifp->if_flags & IFF_LOOPBACK) ||
1326             (m->m_pkthdr.pkt_flags & PKTF_LOOP))
1327                 return (0);
1328
1329         /*
1330          * MLDv1 reports must originate from a host's link-local address,
1331          * or the unspecified address (when booting).
1332          */
1333         src = ip6->ip6_src;
1334         in6_clearscope(&src);
1335         if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1336                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1337                     __func__, ip6_sprintf(&ip6->ip6_src),
1338                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1339                 return (EINVAL);
1340         }
1341
1342         /*
1343          * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1344          * group, and must be directed to the group itself.
1345          */
1346         dst = ip6->ip6_dst;
1347         in6_clearscope(&dst);
1348         if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1349             !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1350                 MLD_PRINTF(("%s: ignore v1 query dst %s on ifp 0x%llx(%s)\n",
1351                     __func__, ip6_sprintf(&ip6->ip6_dst),
1352                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1353                 return (EINVAL);
1354         }
1355
1356         /*
1357          * Make sure we don't hear our own membership report, as fast
1358          * leave requires knowing that we are the only member of a
1359          * group. Assume we used the link-local address if available,
1360          * otherwise look for ::.
1361          *
1362          * XXX Note that scope ID comparison is needed for the address
1363          * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1364          * performed for the on-wire address.
1365          */
1366         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1367         if (ia != NULL) {
1368                 IFA_LOCK(&ia->ia_ifa);
1369                 if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))){
1370                         IFA_UNLOCK(&ia->ia_ifa);
1371                         IFA_REMREF(&ia->ia_ifa);
1372                         return (0);
1373                 }
1374                 IFA_UNLOCK(&ia->ia_ifa);
1375                 IFA_REMREF(&ia->ia_ifa);
1376         } else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1377                 return (0);
1378         }
1379
1380         MLD_PRINTF(("%s: process v1 report %s on ifp 0x%llx(%s)\n",
1381             __func__, ip6_sprintf(&mld->mld_addr),
1382             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1383
1384         /*
1385          * Embed scope ID of receiving interface in MLD query for lookup
1386          * whilst we don't hold other locks (due to KAME locking lameness).
1387          */
1388         if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
1389                 in6_setscope(&mld->mld_addr, ifp, NULL);
1390
1391         /*
1392          * MLDv1 report suppression.
1393          * If we are a member of this group, and our membership should be
1394          * reported, and our group timer is pending or about to be reset,
1395          * stop our group timer by transitioning to the 'lazy' state.
1396          */
1397         in6_multihead_lock_shared();
1398         IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1399         in6_multihead_lock_done();
1400
1401         if (inm != NULL) {
1402                 struct mld_ifinfo *mli;
1403
1404                 IN6M_LOCK(inm);
1405                 mli = inm->in6m_mli;
1406                 VERIFY(mli != NULL);
1407
1408                 MLI_LOCK(mli);
1409                 /*
1410                  * If we are in MLDv2 host mode, do not allow the
1411                  * other host's MLDv1 report to suppress our reports.
1412                  */
1413                 if (mli->mli_version == MLD_VERSION_2) {
1414                         MLI_UNLOCK(mli);
1415                         IN6M_UNLOCK(inm);
1416                         IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1417                         goto out;
1418                 }
1419                 MLI_UNLOCK(mli);
1420
1421                 inm->in6m_timer = 0;
1422
1423                 switch (inm->in6m_state) {
1424                 case MLD_NOT_MEMBER:
1425                 case MLD_SILENT_MEMBER:
1426                 case MLD_SLEEPING_MEMBER:
1427                         break;
1428                 case MLD_REPORTING_MEMBER:
1429                 case MLD_IDLE_MEMBER:
1430                 case MLD_AWAKENING_MEMBER:
1431                         MLD_PRINTF(("%s: report suppressed for %s on "
1432                             "ifp 0x%llx(%s)\n", __func__,
1433                             ip6_sprintf(&mld->mld_addr),
1434                             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1435                 case MLD_LAZY_MEMBER:
1436                         inm->in6m_state = MLD_LAZY_MEMBER;
1437                         break;
1438                 case MLD_G_QUERY_PENDING_MEMBER:
1439                 case MLD_SG_QUERY_PENDING_MEMBER:
1440                 case MLD_LEAVING_MEMBER:
1441                         break;
1442                 }
1443                 IN6M_UNLOCK(inm);
1444                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1445         }
1446
1447 out:
1448         /* XXX Clear embedded scope ID as userland won't expect it. */
1449         in6_clearscope(&mld->mld_addr);
1450
1451         return (0);
1452 }
1453
1454 /*
1455  * MLD input path.
1456  *
1457  * Assume query messages which fit in a single ICMPv6 message header
1458  * have been pulled up.
1459  * Assume that userland will want to see the message, even if it
1460  * otherwise fails kernel input validation; do not free it.
1461  * Pullup may however free the mbuf chain m if it fails.
1462  *
1463  * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1464  */
1465 int
1466 mld_input(struct mbuf *m, int off, int icmp6len)
1467 {
1468         struct ifnet    *ifp;
1469         struct ip6_hdr  *ip6;
1470         struct mld_hdr  *mld;
1471         int              mldlen;
1472
1473         MLD_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1474             (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1475
1476         ifp = m->m_pkthdr.rcvif;
1477
1478         ip6 = mtod(m, struct ip6_hdr *);
1479
1480         /* Pullup to appropriate size. */
1481         mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1482         if (mld->mld_type == MLD_LISTENER_QUERY &&
1483             icmp6len >= sizeof(struct mldv2_query)) {
1484                 mldlen = sizeof(struct mldv2_query);
1485         } else {
1486                 mldlen = sizeof(struct mld_hdr);
1487         }
1488         IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1489         if (mld == NULL) {
1490                 icmp6stat.icp6s_badlen++;
1491                 return (IPPROTO_DONE);
1492         }
1493
1494         /*
1495          * Userland needs to see all of this traffic for implementing
1496          * the endpoint discovery portion of multicast routing.
1497          */
1498         switch (mld->mld_type) {
1499         case MLD_LISTENER_QUERY:
1500                 icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1501                 if (icmp6len == sizeof(struct mld_hdr)) {
1502                         if (mld_v1_input_query(ifp, ip6, mld) != 0)
1503                                 return (0);
1504                 } else if (icmp6len >= sizeof(struct mldv2_query)) {
1505                         if (mld_v2_input_query(ifp, ip6, m, off,
1506                             icmp6len) != 0)
1507                                 return (0);
1508                 }
1509                 break;
1510         case MLD_LISTENER_REPORT:
1511                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1512                 if (mld_v1_input_report(ifp, m, ip6, mld) != 0)
1513                         return (0);
1514                 break;
1515         case MLDV2_LISTENER_REPORT:
1516                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1517                 break;
1518         case MLD_LISTENER_DONE:
1519                 icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1520                 break;
1521         default:
1522                 break;
1523         }
1524
1525         return (0);
1526 }
1527
1528 /*
1529  * Schedule MLD timer based on various parameters; caller must ensure that
1530  * lock ordering is maintained as this routine acquires MLD global lock.
1531  */
1532 void
1533 mld_set_timeout(struct mld_tparams *mtp)
1534 {
1535         MLD_LOCK_ASSERT_NOTHELD();
1536         VERIFY(mtp != NULL);
1537
1538         if (mtp->qpt != 0 || mtp->it != 0 || mtp->cst != 0 || mtp->sct != 0) {
1539                 MLD_LOCK();
1540                 if (mtp->qpt != 0)
1541                         querier_present_timers_running6 = 1;
1542                 if (mtp->it != 0)
1543                         interface_timers_running6 = 1;
1544                 if (mtp->cst != 0)
1545                         current_state_timers_running6 = 1;
1546                 if (mtp->sct != 0)
1547                         state_change_timers_running6 = 1;
1548                 mld_sched_timeout();
1549                 MLD_UNLOCK();
1550         }
1551 }
1552
1553 /*
1554  * MLD6 timer handler (per 1 second).
1555  */
1556 static void
1557 mld_timeout(void *arg)
1558 {
1559 #pragma unused(arg)
1560         struct ifqueue           scq;   /* State-change packets */
1561         struct ifqueue           qrq;   /* Query response packets */
1562         struct ifnet            *ifp;
1563         struct mld_ifinfo       *mli;
1564         struct in6_multi        *inm;
1565         int                      uri_sec = 0;
1566         SLIST_HEAD(, in6_multi) in6m_dthead;
1567
1568         SLIST_INIT(&in6m_dthead);
1569
1570         /*
1571          * Update coarse-grained networking timestamp (in sec.); the idea
1572          * is to piggy-back on the timeout callout to update the counter
1573          * returnable via net_uptime().
1574          */
1575         net_update_uptime();
1576
1577         MLD_LOCK();
1578
1579         MLD_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d\n", __func__,
1580             querier_present_timers_running6, interface_timers_running6,
1581             current_state_timers_running6, state_change_timers_running6));
1582
1583         /*
1584          * MLDv1 querier present timer processing.
1585          */
1586         if (querier_present_timers_running6) {
1587                 querier_present_timers_running6 = 0;
1588                 LIST_FOREACH(mli, &mli_head, mli_link) {
1589                         MLI_LOCK(mli);
1590                         mld_v1_process_querier_timers(mli);
1591                         if (mli->mli_v1_timer > 0)
1592                                 querier_present_timers_running6 = 1;
1593                         MLI_UNLOCK(mli);
1594                 }
1595         }
1596
1597         /*
1598          * MLDv2 General Query response timer processing.
1599          */
1600         if (interface_timers_running6) {
1601                 MLD_PRINTF(("%s: interface timers running\n", __func__));
1602                 interface_timers_running6 = 0;
1603                 LIST_FOREACH(mli, &mli_head, mli_link) {
1604                         MLI_LOCK(mli);
1605                         if (mli->mli_version != MLD_VERSION_2) {
1606                                 MLI_UNLOCK(mli);
1607                                 continue;
1608                         }
1609                         if (mli->mli_v2_timer == 0) {
1610                                 /* Do nothing. */
1611                         } else if (--mli->mli_v2_timer == 0) {
1612                                 if (mld_v2_dispatch_general_query(mli) > 0)
1613                                         interface_timers_running6 = 1;
1614                         } else {
1615                                 interface_timers_running6 = 1;
1616                         }
1617                         MLI_UNLOCK(mli);
1618                 }
1619         }
1620
1621         if (!current_state_timers_running6 &&
1622             !state_change_timers_running6)
1623                 goto out_locked;
1624
1625         current_state_timers_running6 = 0;
1626         state_change_timers_running6 = 0;
1627
1628         MLD_PRINTF(("%s: state change timers running\n", __func__));
1629
1630         memset(&qrq, 0, sizeof(struct ifqueue));
1631         qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
1632
1633         memset(&scq, 0, sizeof(struct ifqueue));
1634         scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
1635
1636         /*
1637          * MLD host report and state-change timer processing.
1638          * Note: Processing a v2 group timer may remove a node.
1639          */
1640         LIST_FOREACH(mli, &mli_head, mli_link) {
1641                 struct in6_multistep step;
1642
1643                 MLI_LOCK(mli);
1644                 ifp = mli->mli_ifp;
1645                 uri_sec = MLD_RANDOM_DELAY(mli->mli_uri);
1646                 MLI_UNLOCK(mli);
1647
1648                 in6_multihead_lock_shared();
1649                 IN6_FIRST_MULTI(step, inm);
1650                 while (inm != NULL) {
1651                         IN6M_LOCK(inm);
1652                         if (inm->in6m_ifp != ifp)
1653                                 goto next;
1654
1655                         MLI_LOCK(mli);
1656                         switch (mli->mli_version) {
1657                         case MLD_VERSION_1:
1658                                 mld_v1_process_group_timer(inm,
1659                                     mli->mli_version);
1660                                 break;
1661                         case MLD_VERSION_2:
1662                                 mld_v2_process_group_timers(mli, &qrq,
1663                                     &scq, inm, uri_sec);
1664                                 break;
1665                         }
1666                         MLI_UNLOCK(mli);
1667 next:
1668                         IN6M_UNLOCK(inm);
1669                         IN6_NEXT_MULTI(step, inm);
1670                 }
1671                 in6_multihead_lock_done();
1672
1673                 MLI_LOCK(mli);
1674                 if (mli->mli_version == MLD_VERSION_1) {
1675                         mld_dispatch_queue(mli, &mli->mli_v1q, 0);
1676                 } else if (mli->mli_version == MLD_VERSION_2) {
1677                         MLI_UNLOCK(mli);
1678                         mld_dispatch_queue(NULL, &qrq, 0);
1679                         mld_dispatch_queue(NULL, &scq, 0);
1680                         VERIFY(qrq.ifq_len == 0);
1681                         VERIFY(scq.ifq_len == 0);
1682                         MLI_LOCK(mli);
1683                 }
1684                 /*
1685                  * In case there are still any pending membership reports
1686                  * which didn't get drained at version change time.
1687                  */
1688                 IF_DRAIN(&mli->mli_v1q);
1689                 /*
1690                  * Release all deferred inm records, and drain any locally
1691                  * enqueued packets; do it even if the current MLD version
1692                  * for the link is no longer MLDv2, in order to handle the
1693                  * version change case.
1694                  */
1695                 mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
1696                 VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
1697                 MLI_UNLOCK(mli);
1698
1699                 IF_DRAIN(&qrq);
1700                 IF_DRAIN(&scq);
1701         }
1702
1703 out_locked:
1704         /* re-arm the timer if there's work to do */
1705         mld_timeout_run = 0;
1706         mld_sched_timeout();
1707         MLD_UNLOCK();
1708
1709         /* Now that we're dropped all locks, release detached records */
1710         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
1711 }
1712
1713 static void
1714 mld_sched_timeout(void)
1715 {
1716         MLD_LOCK_ASSERT_HELD();
1717
1718         if (!mld_timeout_run &&
1719             (querier_present_timers_running6 || current_state_timers_running6 ||
1720             interface_timers_running6 || state_change_timers_running6)) {
1721                 mld_timeout_run = 1;
1722                 timeout(mld_timeout, NULL, hz);
1723         }
1724 }
1725
1726 /*
1727  * Free the in6_multi reference(s) for this MLD lifecycle.
1728  *
1729  * Caller must be holding mli_lock.
1730  */
1731 static void
1732 mld_flush_relq(struct mld_ifinfo *mli, struct mld_in6m_relhead *in6m_dthead)
1733 {
1734         struct in6_multi *inm;
1735
1736 again:
1737         MLI_LOCK_ASSERT_HELD(mli);
1738         inm = SLIST_FIRST(&mli->mli_relinmhead);
1739         if (inm != NULL) {
1740                 int lastref;
1741
1742                 SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
1743                 MLI_UNLOCK(mli);
1744
1745                 in6_multihead_lock_exclusive();
1746                 IN6M_LOCK(inm);
1747                 VERIFY(inm->in6m_nrelecnt != 0);
1748                 inm->in6m_nrelecnt--;
1749                 lastref = in6_multi_detach(inm);
1750                 VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1751                     inm->in6m_reqcnt == 0));
1752                 IN6M_UNLOCK(inm);
1753                 in6_multihead_lock_done();
1754                 /* from mli_relinmhead */
1755                 IN6M_REMREF(inm);
1756                 /* from in6_multihead_list */
1757                 if (lastref) {
1758                         /*
1759                          * Defer releasing our final reference, as we
1760                          * are holding the MLD lock at this point, and
1761                          * we could end up with locking issues later on
1762                          * (while issuing SIOCDELMULTI) when this is the
1763                          * final reference count.  Let the caller do it
1764                          * when it is safe.
1765                          */
1766                         MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
1767                 }
1768                 MLI_LOCK(mli);
1769                 goto again;
1770         }
1771 }
1772
1773 /*
1774  * Update host report group timer.
1775  * Will update the global pending timer flags.
1776  */
1777 static void
1778 mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
1779 {
1780 #pragma unused(mld_version)
1781         int report_timer_expired;
1782
1783         MLD_LOCK_ASSERT_HELD();
1784         IN6M_LOCK_ASSERT_HELD(inm);
1785         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1786
1787         if (inm->in6m_timer == 0) {
1788                 report_timer_expired = 0;
1789         } else if (--inm->in6m_timer == 0) {
1790                 report_timer_expired = 1;
1791         } else {
1792                 current_state_timers_running6 = 1;
1793                 /* caller will schedule timer */
1794                 return;
1795         }
1796
1797         switch (inm->in6m_state) {
1798         case MLD_NOT_MEMBER:
1799         case MLD_SILENT_MEMBER:
1800         case MLD_IDLE_MEMBER:
1801         case MLD_LAZY_MEMBER:
1802         case MLD_SLEEPING_MEMBER:
1803         case MLD_AWAKENING_MEMBER:
1804                 break;
1805         case MLD_REPORTING_MEMBER:
1806                 if (report_timer_expired) {
1807                         inm->in6m_state = MLD_IDLE_MEMBER;
1808                         (void) mld_v1_transmit_report(inm,
1809                              MLD_LISTENER_REPORT);
1810                         IN6M_LOCK_ASSERT_HELD(inm);
1811                         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1812                 }
1813                 break;
1814         case MLD_G_QUERY_PENDING_MEMBER:
1815         case MLD_SG_QUERY_PENDING_MEMBER:
1816         case MLD_LEAVING_MEMBER:
1817                 break;
1818         }
1819 }
1820
1821 /*
1822  * Update a group's timers for MLDv2.
1823  * Will update the global pending timer flags.
1824  * Note: Unlocked read from mli.
1825  */
1826 static void
1827 mld_v2_process_group_timers(struct mld_ifinfo *mli,
1828     struct ifqueue *qrq, struct ifqueue *scq,
1829     struct in6_multi *inm, const int uri_sec)
1830 {
1831         int query_response_timer_expired;
1832         int state_change_retransmit_timer_expired;
1833
1834         MLD_LOCK_ASSERT_HELD();
1835         IN6M_LOCK_ASSERT_HELD(inm);
1836         MLI_LOCK_ASSERT_HELD(mli);
1837         VERIFY(mli == inm->in6m_mli);
1838
1839         query_response_timer_expired = 0;
1840         state_change_retransmit_timer_expired = 0;
1841
1842         /*
1843          * During a transition from compatibility mode back to MLDv2,
1844          * a group record in REPORTING state may still have its group
1845          * timer active. This is a no-op in this function; it is easier
1846          * to deal with it here than to complicate the timeout path.
1847          */
1848         if (inm->in6m_timer == 0) {
1849                 query_response_timer_expired = 0;
1850         } else if (--inm->in6m_timer == 0) {
1851                 query_response_timer_expired = 1;
1852         } else {
1853                 current_state_timers_running6 = 1;
1854                 /* caller will schedule timer */
1855         }
1856
1857         if (inm->in6m_sctimer == 0) {
1858                 state_change_retransmit_timer_expired = 0;
1859         } else if (--inm->in6m_sctimer == 0) {
1860                 state_change_retransmit_timer_expired = 1;
1861         } else {
1862                 state_change_timers_running6 = 1;
1863                 /* caller will schedule timer */
1864         }
1865
1866         /* We are in timer callback, so be quick about it. */
1867         if (!state_change_retransmit_timer_expired &&
1868             !query_response_timer_expired)
1869                 return;
1870
1871         switch (inm->in6m_state) {
1872         case MLD_NOT_MEMBER:
1873         case MLD_SILENT_MEMBER:
1874         case MLD_SLEEPING_MEMBER:
1875         case MLD_LAZY_MEMBER:
1876         case MLD_AWAKENING_MEMBER:
1877         case MLD_IDLE_MEMBER:
1878                 break;
1879         case MLD_G_QUERY_PENDING_MEMBER:
1880         case MLD_SG_QUERY_PENDING_MEMBER:
1881                 /*
1882                  * Respond to a previously pending Group-Specific
1883                  * or Group-and-Source-Specific query by enqueueing
1884                  * the appropriate Current-State report for
1885                  * immediate transmission.
1886                  */
1887                 if (query_response_timer_expired) {
1888                         int retval;
1889
1890                         retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
1891                             (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
1892                             0);
1893                         MLD_PRINTF(("%s: enqueue record = %d\n",
1894                             __func__, retval));
1895                         inm->in6m_state = MLD_REPORTING_MEMBER;
1896                         in6m_clear_recorded(inm);
1897                 }
1898                 /* FALLTHROUGH */
1899         case MLD_REPORTING_MEMBER:
1900         case MLD_LEAVING_MEMBER:
1901                 if (state_change_retransmit_timer_expired) {
1902                         /*
1903                          * State-change retransmission timer fired.
1904                          * If there are any further pending retransmissions,
1905                          * set the global pending state-change flag, and
1906                          * reset the timer.
1907                          */
1908                         if (--inm->in6m_scrv > 0) {
1909                                 inm->in6m_sctimer = uri_sec;
1910                                 state_change_timers_running6 = 1;
1911                                 /* caller will schedule timer */
1912                         }
1913                         /*
1914                          * Retransmit the previously computed state-change
1915                          * report. If there are no further pending
1916                          * retransmissions, the mbuf queue will be consumed.
1917                          * Update T0 state to T1 as we have now sent
1918                          * a state-change.
1919                          */
1920                         (void) mld_v2_merge_state_changes(inm, scq);
1921
1922                         in6m_commit(inm);
1923                         MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
1924                             ip6_sprintf(&inm->in6m_addr),
1925                             if_name(inm->in6m_ifp)));
1926
1927                         /*
1928                          * If we are leaving the group for good, make sure
1929                          * we release MLD's reference to it.
1930                          * This release must be deferred using a SLIST,
1931                          * as we are called from a loop which traverses
1932                          * the in_ifmultiaddr TAILQ.
1933                          */
1934                         if (inm->in6m_state == MLD_LEAVING_MEMBER &&
1935                             inm->in6m_scrv == 0) {
1936                                 inm->in6m_state = MLD_NOT_MEMBER;
1937                                 /*
1938                                  * A reference has already been held in
1939                                  * mld_final_leave() for this inm, so
1940                                  * no need to hold another one.  We also
1941                                  * bumped up its request count then, so
1942                                  * that it stays in in6_multihead.  Both
1943                                  * of them will be released when it is
1944                                  * dequeued later on.
1945                                  */
1946                                 VERIFY(inm->in6m_nrelecnt != 0);
1947                                 SLIST_INSERT_HEAD(&mli->mli_relinmhead,
1948                                     inm, in6m_nrele);
1949                         }
1950                 }
1951                 break;
1952         }
1953 }
1954
1955 /*
1956  * Switch to a different version on the given interface,
1957  * as per Section 9.12.
1958  */
1959 static uint32_t
1960 mld_set_version(struct mld_ifinfo *mli, const int mld_version)
1961 {
1962         int old_version_timer;
1963
1964         MLI_LOCK_ASSERT_HELD(mli);
1965
1966         MLD_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
1967             mld_version, (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
1968             if_name(mli->mli_ifp)));
1969
1970         if (mld_version == MLD_VERSION_1) {
1971                 /*
1972                  * Compute the "Older Version Querier Present" timer as per
1973                  * Section 9.12, in seconds.
1974                  */
1975                 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
1976                 mli->mli_v1_timer = old_version_timer;
1977         }
1978
1979         if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
1980                 mli->mli_version = MLD_VERSION_1;
1981                 mld_v2_cancel_link_timers(mli);
1982         }
1983
1984         MLI_LOCK_ASSERT_HELD(mli);
1985
1986         return (mli->mli_v1_timer);
1987 }
1988
1989 /*
1990  * Cancel pending MLDv2 timers for the given link and all groups
1991  * joined on it; state-change, general-query, and group-query timers.
1992  *
1993  * Only ever called on a transition from v2 to Compatibility mode. Kill
1994  * the timers stone dead (this may be expensive for large N groups), they
1995  * will be restarted if Compatibility Mode deems that they must be due to
1996  * query processing.
1997  */
1998 static void
1999 mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
2000 {
2001         struct ifnet            *ifp;
2002         struct in6_multi        *inm;
2003         struct in6_multistep    step;
2004
2005         MLI_LOCK_ASSERT_HELD(mli);
2006
2007         MLD_PRINTF(("%s: cancel v2 timers on ifp 0x%llx(%s)\n", __func__,
2008             (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), if_name(mli->mli_ifp)));
2009
2010         /*
2011          * Stop the v2 General Query Response on this link stone dead.
2012          * If timer is woken up due to interface_timers_running6,
2013          * the flag will be cleared if there are no pending link timers.
2014          */
2015         mli->mli_v2_timer = 0;
2016
2017         /*
2018          * Now clear the current-state and state-change report timers
2019          * for all memberships scoped to this link.
2020          */
2021         ifp = mli->mli_ifp;
2022         MLI_UNLOCK(mli);
2023
2024         in6_multihead_lock_shared();
2025         IN6_FIRST_MULTI(step, inm);
2026         while (inm != NULL) {
2027                 IN6M_LOCK(inm);
2028                 if (inm->in6m_ifp != ifp)
2029                         goto next;
2030
2031                 switch (inm->in6m_state) {
2032                 case MLD_NOT_MEMBER:
2033                 case MLD_SILENT_MEMBER:
2034                 case MLD_IDLE_MEMBER:
2035                 case MLD_LAZY_MEMBER:
2036                 case MLD_SLEEPING_MEMBER:
2037                 case MLD_AWAKENING_MEMBER:
2038                         /*
2039                          * These states are either not relevant in v2 mode,
2040                          * or are unreported. Do nothing.
2041                          */
2042                         break;
2043                 case MLD_LEAVING_MEMBER:
2044                         /*
2045                          * If we are leaving the group and switching
2046                          * version, we need to release the final
2047                          * reference held for issuing the INCLUDE {}.
2048                          * During mld_final_leave(), we bumped up both the
2049                          * request and reference counts.  Since we cannot
2050                          * call in6_multi_detach() here, defer this task to
2051                          * the timer routine.
2052                          */
2053                         VERIFY(inm->in6m_nrelecnt != 0);
2054                         MLI_LOCK(mli);
2055                         SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2056                             in6m_nrele);
2057                         MLI_UNLOCK(mli);
2058                         /* FALLTHROUGH */
2059                 case MLD_G_QUERY_PENDING_MEMBER:
2060                 case MLD_SG_QUERY_PENDING_MEMBER:
2061                         in6m_clear_recorded(inm);
2062                         /* FALLTHROUGH */
2063                 case MLD_REPORTING_MEMBER:
2064                         inm->in6m_state = MLD_REPORTING_MEMBER;
2065                         break;
2066                 }
2067                 /*
2068                  * Always clear state-change and group report timers.
2069                  * Free any pending MLDv2 state-change records.
2070                  */
2071                 inm->in6m_sctimer = 0;
2072                 inm->in6m_timer = 0;
2073                 IF_DRAIN(&inm->in6m_scq);
2074 next:
2075                 IN6M_UNLOCK(inm);
2076                 IN6_NEXT_MULTI(step, inm);
2077         }
2078         in6_multihead_lock_done();
2079
2080         MLI_LOCK(mli);
2081 }
2082
2083 /*
2084  * Update the Older Version Querier Present timers for a link.
2085  * See Section 9.12 of RFC 3810.
2086  */
2087 static void
2088 mld_v1_process_querier_timers(struct mld_ifinfo *mli)
2089 {
2090         MLI_LOCK_ASSERT_HELD(mli);
2091
2092         if (mld_v2enable && mli->mli_version != MLD_VERSION_2 &&
2093             --mli->mli_v1_timer == 0) {
2094                 /*
2095                  * MLDv1 Querier Present timer expired; revert to MLDv2.
2096                  */
2097                 MLD_PRINTF(("%s: transition from v%d -> v%d on 0x%llx(%s)\n",
2098                     __func__, mli->mli_version, MLD_VERSION_2,
2099                     (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
2100                     if_name(mli->mli_ifp)));
2101                 mli->mli_version = MLD_VERSION_2;
2102         }
2103 }
2104
2105 /*
2106  * Transmit an MLDv1 report immediately.
2107  */
2108 static int
2109 mld_v1_transmit_report(struct in6_multi *in6m, const int type)
2110 {
2111         struct ifnet            *ifp;
2112         struct in6_ifaddr       *ia;
2113         struct ip6_hdr          *ip6;
2114         struct mbuf             *mh, *md;
2115         struct mld_hdr          *mld;
2116         int                     error = 0;
2117
2118         IN6M_LOCK_ASSERT_HELD(in6m);
2119         MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
2120
2121         ifp = in6m->in6m_ifp;
2122         /* ia may be NULL if link-local address is tentative. */
2123         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
2124
2125         MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2126         if (mh == NULL) {
2127                 if (ia != NULL)
2128                         IFA_REMREF(&ia->ia_ifa);
2129                 return (ENOMEM);
2130         }
2131         MGET(md, M_DONTWAIT, MT_DATA);
2132         if (md == NULL) {
2133                 m_free(mh);
2134                 if (ia != NULL)
2135                         IFA_REMREF(&ia->ia_ifa);
2136                 return (ENOMEM);
2137         }
2138         mh->m_next = md;
2139
2140         /*
2141          * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
2142          * that ether_output() does not need to allocate another mbuf
2143          * for the header in the most common case.
2144          */
2145         MH_ALIGN(mh, sizeof(struct ip6_hdr));
2146         mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
2147         mh->m_len = sizeof(struct ip6_hdr);
2148
2149         ip6 = mtod(mh, struct ip6_hdr *);
2150         ip6->ip6_flow = 0;
2151         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2152         ip6->ip6_vfc |= IPV6_VERSION;
2153         ip6->ip6_nxt = IPPROTO_ICMPV6;
2154         if (ia != NULL)
2155                 IFA_LOCK(&ia->ia_ifa);
2156         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
2157         if (ia != NULL) {
2158                 IFA_UNLOCK(&ia->ia_ifa);
2159                 IFA_REMREF(&ia->ia_ifa);
2160                 ia = NULL;
2161         }
2162         ip6->ip6_dst = in6m->in6m_addr;
2163
2164         md->m_len = sizeof(struct mld_hdr);
2165         mld = mtod(md, struct mld_hdr *);
2166         mld->mld_type = type;
2167         mld->mld_code = 0;
2168         mld->mld_cksum = 0;
2169         mld->mld_maxdelay = 0;
2170         mld->mld_reserved = 0;
2171         mld->mld_addr = in6m->in6m_addr;
2172         in6_clearscope(&mld->mld_addr);
2173         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
2174             sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
2175
2176         mld_save_context(mh, ifp);
2177         mh->m_flags |= M_MLDV1;
2178
2179         /*
2180          * Due to the fact that at this point we are possibly holding
2181          * in6_multihead_lock in shared or exclusive mode, we can't call
2182          * mld_dispatch_packet() here since that will eventually call
2183          * ip6_output(), which will try to lock in6_multihead_lock and cause
2184          * a deadlock.
2185          * Instead we defer the work to the mld_timeout() thread, thus
2186          * avoiding unlocking in_multihead_lock here.
2187          */
2188         if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
2189                 MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
2190                 error = ENOMEM;
2191                 m_freem(mh);
2192         } else {
2193                 IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
2194                 VERIFY(error == 0);
2195         }
2196
2197         return (error);
2198 }
2199
2200 /*
2201  * Process a state change from the upper layer for the given IPv6 group.
2202  *
2203  * Each socket holds a reference on the in6_multi in its own ip_moptions.
2204  * The socket layer will have made the necessary updates to.the group
2205  * state, it is now up to MLD to issue a state change report if there
2206  * has been any change between T0 (when the last state-change was issued)
2207  * and T1 (now).
2208  *
2209  * We use the MLDv2 state machine at group level. The MLd module
2210  * however makes the decision as to which MLD protocol version to speak.
2211  * A state change *from* INCLUDE {} always means an initial join.
2212  * A state change *to* INCLUDE {} always means a final leave.
2213  *
2214  * If delay is non-zero, and the state change is an initial multicast
2215  * join, the state change report will be delayed by 'delay' ticks
2216  * in units of seconds if MLDv1 is active on the link; otherwise
2217  * the initial MLDv2 state change report will be delayed by whichever
2218  * is sooner, a pending state-change timer or delay itself.
2219  */
2220 int
2221 mld_change_state(struct in6_multi *inm, struct mld_tparams *mtp,
2222     const int delay)
2223 {
2224         struct mld_ifinfo *mli;
2225         struct ifnet *ifp;
2226         int error = 0;
2227
2228         VERIFY(mtp != NULL);
2229         bzero(mtp, sizeof (*mtp));
2230
2231         IN6M_LOCK_ASSERT_HELD(inm);
2232         VERIFY(inm->in6m_mli != NULL);
2233         MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
2234
2235         /*
2236          * Try to detect if the upper layer just asked us to change state
2237          * for an interface which has now gone away.
2238          */
2239         VERIFY(inm->in6m_ifma != NULL);
2240         ifp = inm->in6m_ifma->ifma_ifp;
2241         /*
2242          * Sanity check that netinet6's notion of ifp is the same as net's.
2243          */
2244         VERIFY(inm->in6m_ifp == ifp);
2245
2246         mli = MLD_IFINFO(ifp);
2247         VERIFY(mli != NULL);
2248
2249         /*
2250          * If we detect a state transition to or from MCAST_UNDEFINED
2251          * for this group, then we are starting or finishing an MLD
2252          * life cycle for this group.
2253          */
2254         if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
2255                 MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2256                     inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
2257                 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
2258                         MLD_PRINTF(("%s: initial join\n", __func__));
2259                         error = mld_initial_join(inm, mli, mtp, delay);
2260                         goto out;
2261                 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
2262                         MLD_PRINTF(("%s: final leave\n", __func__));
2263                         mld_final_leave(inm, mli, mtp);
2264                         goto out;
2265                 }
2266         } else {
2267                 MLD_PRINTF(("%s: filter set change\n", __func__));
2268         }
2269
2270         error = mld_handle_state_change(inm, mli, mtp);
2271 out:
2272         return (error);
2273 }
2274
2275 /*
2276  * Perform the initial join for an MLD group.
2277  *
2278  * When joining a group:
2279  *  If the group should have its MLD traffic suppressed, do nothing.
2280  *  MLDv1 starts sending MLDv1 host membership reports.
2281  *  MLDv2 will schedule an MLDv2 state-change report containing the
2282  *  initial state of the membership.
2283  *
2284  * If the delay argument is non-zero, then we must delay sending the
2285  * initial state change for delay ticks (in units of seconds).
2286  */
2287 static int
2288 mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
2289     struct mld_tparams *mtp, const int delay)
2290 {
2291         struct ifnet            *ifp;
2292         struct ifqueue          *ifq;
2293         int                      error, retval, syncstates;
2294         int                      odelay;
2295
2296         IN6M_LOCK_ASSERT_HELD(inm);
2297         MLI_LOCK_ASSERT_NOTHELD(mli);
2298         VERIFY(mtp != NULL);
2299
2300         MLD_PRINTF(("%s: initial join %s on ifp 0x%llx(%s)\n",
2301             __func__, ip6_sprintf(&inm->in6m_addr),
2302             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2303             if_name(inm->in6m_ifp)));
2304
2305         error = 0;
2306         syncstates = 1;
2307
2308         ifp = inm->in6m_ifp;
2309
2310         MLI_LOCK(mli);
2311         VERIFY(mli->mli_ifp == ifp);
2312
2313         /*
2314          * Avoid MLD if group is :
2315          * 1. Joined on loopback, OR
2316          * 2. On a link that is marked MLIF_SILENT
2317          * 3. rdar://problem/19227650 Is link local scoped and
2318          *    on cellular interface
2319          * 4. Is a type that should not be reported (node local
2320          *    or all node link local multicast.
2321          * All other groups enter the appropriate state machine
2322          * for the version in use on this link.
2323          */
2324         if ((ifp->if_flags & IFF_LOOPBACK) ||
2325             (mli->mli_flags & MLIF_SILENT) ||
2326             (IFNET_IS_CELLULAR(ifp) &&
2327              IN6_IS_ADDR_MC_LINKLOCAL(&inm->in6m_addr)) ||
2328             !mld_is_addr_reported(&inm->in6m_addr)) {
2329                 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2330                     __func__));
2331                 inm->in6m_state = MLD_SILENT_MEMBER;
2332                 inm->in6m_timer = 0;
2333         } else {
2334                 /*
2335                  * Deal with overlapping in6_multi lifecycle.
2336                  * If this group was LEAVING, then make sure
2337                  * we drop the reference we picked up to keep the
2338                  * group around for the final INCLUDE {} enqueue.
2339                  * Since we cannot call in6_multi_detach() here,
2340                  * defer this task to the timer routine.
2341                  */
2342                 if (mli->mli_version == MLD_VERSION_2 &&
2343                     inm->in6m_state == MLD_LEAVING_MEMBER) {
2344                         VERIFY(inm->in6m_nrelecnt != 0);
2345                         SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2346                             in6m_nrele);
2347                 }
2348
2349                 inm->in6m_state = MLD_REPORTING_MEMBER;
2350
2351                 switch (mli->mli_version) {
2352                 case MLD_VERSION_1:
2353                         /*
2354                          * If a delay was provided, only use it if
2355                          * it is greater than the delay normally
2356                          * used for an MLDv1 state change report,
2357                          * and delay sending the initial MLDv1 report
2358                          * by not transitioning to the IDLE state.
2359                          */
2360                         odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI);
2361                         if (delay) {
2362                                 inm->in6m_timer = max(delay, odelay);
2363                                 mtp->cst = 1;
2364                         } else {
2365                                 inm->in6m_state = MLD_IDLE_MEMBER;
2366                                 error = mld_v1_transmit_report(inm,
2367                                      MLD_LISTENER_REPORT);
2368
2369                                 IN6M_LOCK_ASSERT_HELD(inm);
2370                                 MLI_LOCK_ASSERT_HELD(mli);
2371
2372                                 if (error == 0) {
2373                                         inm->in6m_timer = odelay;
2374                                         mtp->cst = 1;
2375                                 }
2376                         }
2377                         break;
2378
2379                 case MLD_VERSION_2:
2380                         /*
2381                          * Defer update of T0 to T1, until the first copy
2382                          * of the state change has been transmitted.
2383                          */
2384                         syncstates = 0;
2385
2386                         /*
2387                          * Immediately enqueue a State-Change Report for
2388                          * this interface, freeing any previous reports.
2389                          * Don't kick the timers if there is nothing to do,
2390                          * or if an error occurred.
2391                          */
2392                         ifq = &inm->in6m_scq;
2393                         IF_DRAIN(ifq);
2394                         retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2395                             0, 0, (mli->mli_flags & MLIF_USEALLOW));
2396                         mtp->cst = (ifq->ifq_len > 0);
2397                         MLD_PRINTF(("%s: enqueue record = %d\n",
2398                             __func__, retval));
2399                         if (retval <= 0) {
2400                                 error = retval * -1;
2401                                 break;
2402                         }
2403
2404                         /*
2405                          * Schedule transmission of pending state-change
2406                          * report up to RV times for this link. The timer
2407                          * will fire at the next mld_timeout (1 second)),
2408                          * giving us an opportunity to merge the reports.
2409                          *
2410                          * If a delay was provided to this function, only
2411                          * use this delay if sooner than the existing one.
2412                          */
2413                         VERIFY(mli->mli_rv > 1);
2414                         inm->in6m_scrv = mli->mli_rv;
2415                         if (delay) {
2416                                 if (inm->in6m_sctimer > 1) {
2417                                         inm->in6m_sctimer =
2418                                             min(inm->in6m_sctimer, delay);
2419                                 } else
2420                                         inm->in6m_sctimer = delay;
2421                         } else {
2422                                 inm->in6m_sctimer = 1;
2423                         }
2424                         mtp->sct = 1;
2425                         error = 0;
2426                         break;
2427                 }
2428         }
2429         MLI_UNLOCK(mli);
2430
2431         /*
2432          * Only update the T0 state if state change is atomic,
2433          * i.e. we don't need to wait for a timer to fire before we
2434          * can consider the state change to have been communicated.
2435          */
2436         if (syncstates) {
2437                 in6m_commit(inm);
2438                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2439                     ip6_sprintf(&inm->in6m_addr),
2440                     if_name(inm->in6m_ifp)));
2441         }
2442
2443         return (error);
2444 }
2445
2446 /*
2447  * Issue an intermediate state change during the life-cycle.
2448  */
2449 static int
2450 mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli,
2451     struct mld_tparams *mtp)
2452 {
2453         struct ifnet            *ifp;
2454         int                      retval = 0;
2455
2456         IN6M_LOCK_ASSERT_HELD(inm);
2457         MLI_LOCK_ASSERT_NOTHELD(mli);
2458         VERIFY(mtp != NULL);
2459
2460         MLD_PRINTF(("%s: state change for %s on ifp 0x%llx(%s)\n",
2461             __func__, ip6_sprintf(&inm->in6m_addr),
2462             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2463             if_name(inm->in6m_ifp)));
2464
2465         ifp = inm->in6m_ifp;
2466
2467         MLI_LOCK(mli);
2468         VERIFY(mli->mli_ifp == ifp);
2469
2470         if ((ifp->if_flags & IFF_LOOPBACK) ||
2471             (mli->mli_flags & MLIF_SILENT) ||
2472             !mld_is_addr_reported(&inm->in6m_addr) ||
2473             (mli->mli_version != MLD_VERSION_2)) {
2474                 MLI_UNLOCK(mli);
2475                 if (!mld_is_addr_reported(&inm->in6m_addr)) {
2476                         MLD_PRINTF(("%s: not kicking state machine for silent "
2477                             "group\n", __func__));
2478                 }
2479                 MLD_PRINTF(("%s: nothing to do\n", __func__));
2480                 in6m_commit(inm);
2481                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2482                     ip6_sprintf(&inm->in6m_addr),
2483                     if_name(inm->in6m_ifp)));
2484                 goto done;
2485         }
2486
2487         IF_DRAIN(&inm->in6m_scq);
2488
2489         retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2490             (mli->mli_flags & MLIF_USEALLOW));
2491         mtp->cst = (inm->in6m_scq.ifq_len > 0);
2492         MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2493         if (retval <= 0) {
2494                 MLI_UNLOCK(mli);
2495                 retval *= -1;
2496                 goto done;
2497         } else {
2498                 retval = 0;
2499         }
2500
2501         /*
2502          * If record(s) were enqueued, start the state-change
2503          * report timer for this group.
2504          */
2505         inm->in6m_scrv = mli->mli_rv;
2506         inm->in6m_sctimer = 1;
2507         mtp->sct = 1;
2508         MLI_UNLOCK(mli);
2509
2510 done:
2511         return (retval);
2512 }
2513
2514 /*
2515  * Perform the final leave for a multicast address.
2516  *
2517  * When leaving a group:
2518  *  MLDv1 sends a DONE message, if and only if we are the reporter.
2519  *  MLDv2 enqueues a state-change report containing a transition
2520  *  to INCLUDE {} for immediate transmission.
2521  */
2522 static void
2523 mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli,
2524     struct mld_tparams *mtp)
2525 {
2526         int syncstates = 1;
2527
2528         IN6M_LOCK_ASSERT_HELD(inm);
2529         MLI_LOCK_ASSERT_NOTHELD(mli);
2530         VERIFY(mtp != NULL);
2531
2532         MLD_PRINTF(("%s: final leave %s on ifp 0x%llx(%s)\n",
2533             __func__, ip6_sprintf(&inm->in6m_addr),
2534             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2535             if_name(inm->in6m_ifp)));
2536
2537         switch (inm->in6m_state) {
2538         case MLD_NOT_MEMBER:
2539         case MLD_SILENT_MEMBER:
2540         case MLD_LEAVING_MEMBER:
2541                 /* Already leaving or left; do nothing. */
2542                 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2543                     __func__));
2544                 break;
2545         case MLD_REPORTING_MEMBER:
2546         case MLD_IDLE_MEMBER:
2547         case MLD_G_QUERY_PENDING_MEMBER:
2548         case MLD_SG_QUERY_PENDING_MEMBER:
2549                 MLI_LOCK(mli);
2550                 if (mli->mli_version == MLD_VERSION_1) {
2551                         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2552                             inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
2553                                 panic("%s: MLDv2 state reached, not MLDv2 "
2554                                     "mode\n", __func__);
2555                                 /* NOTREACHED */
2556                         }
2557                         /* scheduler timer if enqueue is successful */
2558                         mtp->cst = (mld_v1_transmit_report(inm,
2559                             MLD_LISTENER_DONE) == 0);
2560
2561                         IN6M_LOCK_ASSERT_HELD(inm);
2562                         MLI_LOCK_ASSERT_HELD(mli);
2563
2564                         inm->in6m_state = MLD_NOT_MEMBER;
2565                 } else if (mli->mli_version == MLD_VERSION_2) {
2566                         /*
2567                          * Stop group timer and all pending reports.
2568                          * Immediately enqueue a state-change report
2569                          * TO_IN {} to be sent on the next timeout,
2570                          * giving us an opportunity to merge reports.
2571                          */
2572                         IF_DRAIN(&inm->in6m_scq);
2573                         inm->in6m_timer = 0;
2574                         inm->in6m_scrv = mli->mli_rv;
2575                         MLD_PRINTF(("%s: Leaving %s/%s with %d "
2576                             "pending retransmissions.\n", __func__,
2577                             ip6_sprintf(&inm->in6m_addr),
2578                             if_name(inm->in6m_ifp),
2579                             inm->in6m_scrv));
2580                         if (inm->in6m_scrv == 0) {
2581                                 inm->in6m_state = MLD_NOT_MEMBER;
2582                                 inm->in6m_sctimer = 0;
2583                         } else {
2584                                 int retval;
2585                                 /*
2586                                  * Stick around in the in6_multihead list;
2587                                  * the final detach will be issued by
2588                                  * mld_v2_process_group_timers() when
2589                                  * the retransmit timer expires.
2590                                  */
2591                                 IN6M_ADDREF_LOCKED(inm);
2592                                 VERIFY(inm->in6m_debug & IFD_ATTACHED);
2593                                 inm->in6m_reqcnt++;
2594                                 VERIFY(inm->in6m_reqcnt >= 1);
2595                                 inm->in6m_nrelecnt++;
2596                                 VERIFY(inm->in6m_nrelecnt != 0);
2597
2598                                 retval = mld_v2_enqueue_group_record(
2599                                     &inm->in6m_scq, inm, 1, 0, 0,
2600                                     (mli->mli_flags & MLIF_USEALLOW));
2601                                 mtp->cst = (inm->in6m_scq.ifq_len > 0);
2602                                 KASSERT(retval != 0,
2603                                     ("%s: enqueue record = %d\n", __func__,
2604                                      retval));
2605
2606                                 inm->in6m_state = MLD_LEAVING_MEMBER;
2607                                 inm->in6m_sctimer = 1;
2608                                 mtp->sct = 1;
2609                                 syncstates = 0;
2610                         }
2611                 }
2612                 MLI_UNLOCK(mli);
2613                 break;
2614         case MLD_LAZY_MEMBER:
2615         case MLD_SLEEPING_MEMBER:
2616         case MLD_AWAKENING_MEMBER:
2617                 /* Our reports are suppressed; do nothing. */
2618                 break;
2619         }
2620
2621         if (syncstates) {
2622                 in6m_commit(inm);
2623                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2624                     ip6_sprintf(&inm->in6m_addr),
2625                     if_name(inm->in6m_ifp)));
2626                 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2627                 MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for 0x%llx/%s\n",
2628                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(&inm->in6m_addr),
2629                     if_name(inm->in6m_ifp)));
2630         }
2631 }
2632
2633 /*
2634  * Enqueue an MLDv2 group record to the given output queue.
2635  *
2636  * If is_state_change is zero, a current-state record is appended.
2637  * If is_state_change is non-zero, a state-change report is appended.
2638  *
2639  * If is_group_query is non-zero, an mbuf packet chain is allocated.
2640  * If is_group_query is zero, and if there is a packet with free space
2641  * at the tail of the queue, it will be appended to providing there
2642  * is enough free space.
2643  * Otherwise a new mbuf packet chain is allocated.
2644  *
2645  * If is_source_query is non-zero, each source is checked to see if
2646  * it was recorded for a Group-Source query, and will be omitted if
2647  * it is not both in-mode and recorded.
2648  *
2649  * If use_block_allow is non-zero, state change reports for initial join
2650  * and final leave, on an inclusive mode group with a source list, will be
2651  * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2652  *
2653  * The function will attempt to allocate leading space in the packet
2654  * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2655  *
2656  * If successful the size of all data appended to the queue is returned,
2657  * otherwise an error code less than zero is returned, or zero if
2658  * no record(s) were appended.
2659  */
2660 static int
2661 mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2662     const int is_state_change, const int is_group_query,
2663     const int is_source_query, const int use_block_allow)
2664 {
2665         struct mldv2_record      mr;
2666         struct mldv2_record     *pmr;
2667         struct ifnet            *ifp;
2668         struct ip6_msource      *ims, *nims;
2669         struct mbuf             *m0, *m, *md;
2670         int                      error, is_filter_list_change;
2671         int                      minrec0len, m0srcs, msrcs, nbytes, off;
2672         int                      record_has_sources;
2673         int                      now;
2674         int                      type;
2675         uint8_t                  mode;
2676
2677         IN6M_LOCK_ASSERT_HELD(inm);
2678         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2679
2680         error = 0;
2681         ifp = inm->in6m_ifp;
2682         is_filter_list_change = 0;
2683         m = NULL;
2684         m0 = NULL;
2685         m0srcs = 0;
2686         msrcs = 0;
2687         nbytes = 0;
2688         nims = NULL;
2689         record_has_sources = 1;
2690         pmr = NULL;
2691         type = MLD_DO_NOTHING;
2692         mode = inm->in6m_st[1].iss_fmode;
2693
2694         /*
2695          * If we did not transition out of ASM mode during t0->t1,
2696          * and there are no source nodes to process, we can skip
2697          * the generation of source records.
2698          */
2699         if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2700             inm->in6m_nsrc == 0)
2701                 record_has_sources = 0;
2702
2703         if (is_state_change) {
2704                 /*
2705                  * Queue a state change record.
2706                  * If the mode did not change, and there are non-ASM
2707                  * listeners or source filters present,
2708                  * we potentially need to issue two records for the group.
2709                  * If there are ASM listeners, and there was no filter
2710                  * mode transition of any kind, do nothing.
2711                  *
2712                  * If we are transitioning to MCAST_UNDEFINED, we need
2713                  * not send any sources. A transition to/from this state is
2714                  * considered inclusive with some special treatment.
2715                  *
2716                  * If we are rewriting initial joins/leaves to use
2717                  * ALLOW/BLOCK, and the group's membership is inclusive,
2718                  * we need to send sources in all cases.
2719                  */
2720                 if (mode != inm->in6m_st[0].iss_fmode) {
2721                         if (mode == MCAST_EXCLUDE) {
2722                                 MLD_PRINTF(("%s: change to EXCLUDE\n",
2723                                     __func__));
2724                                 type = MLD_CHANGE_TO_EXCLUDE_MODE;
2725                         } else {
2726                                 MLD_PRINTF(("%s: change to INCLUDE\n",
2727                                     __func__));
2728                                 if (use_block_allow) {
2729                                         /*
2730                                          * XXX
2731                                          * Here we're interested in state
2732                                          * edges either direction between
2733                                          * MCAST_UNDEFINED and MCAST_INCLUDE.
2734                                          * Perhaps we should just check
2735                                          * the group state, rather than
2736                                          * the filter mode.
2737                                          */
2738                                         if (mode == MCAST_UNDEFINED) {
2739                                                 type = MLD_BLOCK_OLD_SOURCES;
2740                                         } else {
2741                                                 type = MLD_ALLOW_NEW_SOURCES;
2742                                         }
2743                                 } else {
2744                                         type = MLD_CHANGE_TO_INCLUDE_MODE;
2745                                         if (mode == MCAST_UNDEFINED)
2746                                                 record_has_sources = 0;
2747                                 }
2748                         }
2749                 } else {
2750                         if (record_has_sources) {
2751                                 is_filter_list_change = 1;
2752                         } else {
2753                                 type = MLD_DO_NOTHING;
2754                         }
2755                 }
2756         } else {
2757                 /*
2758                  * Queue a current state record.
2759                  */
2760                 if (mode == MCAST_EXCLUDE) {
2761                         type = MLD_MODE_IS_EXCLUDE;
2762                 } else if (mode == MCAST_INCLUDE) {
2763                         type = MLD_MODE_IS_INCLUDE;
2764                         VERIFY(inm->in6m_st[1].iss_asm == 0);
2765                 }
2766         }
2767
2768         /*
2769          * Generate the filter list changes using a separate function.
2770          */
2771         if (is_filter_list_change)
2772                 return (mld_v2_enqueue_filter_change(ifq, inm));
2773
2774         if (type == MLD_DO_NOTHING) {
2775                 MLD_PRINTF(("%s: nothing to do for %s/%s\n",
2776                     __func__, ip6_sprintf(&inm->in6m_addr),
2777                     if_name(inm->in6m_ifp)));
2778                 return (0);
2779         }
2780
2781         /*
2782          * If any sources are present, we must be able to fit at least
2783          * one in the trailing space of the tail packet's mbuf,
2784          * ideally more.
2785          */
2786         minrec0len = sizeof(struct mldv2_record);
2787         if (record_has_sources)
2788                 minrec0len += sizeof(struct in6_addr);
2789         MLD_PRINTF(("%s: queueing %s for %s/%s\n", __func__,
2790             mld_rec_type_to_str(type),
2791             ip6_sprintf(&inm->in6m_addr),
2792             if_name(inm->in6m_ifp)));
2793
2794         /*
2795          * Check if we have a packet in the tail of the queue for this
2796          * group into which the first group record for this group will fit.
2797          * Otherwise allocate a new packet.
2798          * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2799          * Note: Group records for G/GSR query responses MUST be sent
2800          * in their own packet.
2801          */
2802         m0 = ifq->ifq_tail;
2803         if (!is_group_query &&
2804             m0 != NULL &&
2805             (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
2806             (m0->m_pkthdr.len + minrec0len) <
2807              (ifp->if_mtu - MLD_MTUSPACE)) {
2808                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2809                             sizeof(struct mldv2_record)) /
2810                             sizeof(struct in6_addr);
2811                 m = m0;
2812                 MLD_PRINTF(("%s: use existing packet\n", __func__));
2813         } else {
2814                 if (IF_QFULL(ifq)) {
2815                         MLD_PRINTF(("%s: outbound queue full\n", __func__));
2816                         return (-ENOMEM);
2817                 }
2818                 m = NULL;
2819                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2820                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2821                 if (!is_state_change && !is_group_query)
2822                         m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2823                 if (m == NULL)
2824                         m = m_gethdr(M_DONTWAIT, MT_DATA);
2825                 if (m == NULL)
2826                         return (-ENOMEM);
2827
2828                 mld_save_context(m, ifp);
2829
2830                 MLD_PRINTF(("%s: allocated first packet\n", __func__));
2831         }
2832
2833         /*
2834          * Append group record.
2835          * If we have sources, we don't know how many yet.
2836          */
2837         mr.mr_type = type;
2838         mr.mr_datalen = 0;
2839         mr.mr_numsrc = 0;
2840         mr.mr_addr = inm->in6m_addr;
2841         in6_clearscope(&mr.mr_addr);
2842         if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2843                 if (m != m0)
2844                         m_freem(m);
2845                 MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2846                 return (-ENOMEM);
2847         }
2848         nbytes += sizeof(struct mldv2_record);
2849
2850         /*
2851          * Append as many sources as will fit in the first packet.
2852          * If we are appending to a new packet, the chain allocation
2853          * may potentially use clusters; use m_getptr() in this case.
2854          * If we are appending to an existing packet, we need to obtain
2855          * a pointer to the group record after m_append(), in case a new
2856          * mbuf was allocated.
2857          *
2858          * Only append sources which are in-mode at t1. If we are
2859          * transitioning to MCAST_UNDEFINED state on the group, and
2860          * use_block_allow is zero, do not include source entries.
2861          * Otherwise, we need to include this source in the report.
2862          *
2863          * Only report recorded sources in our filter set when responding
2864          * to a group-source query.
2865          */
2866         if (record_has_sources) {
2867                 if (m == m0) {
2868                         md = m_last(m);
2869                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2870                             md->m_len - nbytes);
2871                 } else {
2872                         md = m_getptr(m, 0, &off);
2873                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2874                             off);
2875                 }
2876                 msrcs = 0;
2877                 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
2878                     nims) {
2879                         MLD_PRINTF(("%s: visit node %s\n", __func__,
2880                             ip6_sprintf(&ims->im6s_addr)));
2881                         now = im6s_get_mode(inm, ims, 1);
2882                         MLD_PRINTF(("%s: node is %d\n", __func__, now));
2883                         if ((now != mode) ||
2884                             (now == mode &&
2885                              (!use_block_allow && mode == MCAST_UNDEFINED))) {
2886                                 MLD_PRINTF(("%s: skip node\n", __func__));
2887                                 continue;
2888                         }
2889                         if (is_source_query && ims->im6s_stp == 0) {
2890                                 MLD_PRINTF(("%s: skip unrecorded node\n",
2891                                     __func__));
2892                                 continue;
2893                         }
2894                         MLD_PRINTF(("%s: append node\n", __func__));
2895                         if (!m_append(m, sizeof(struct in6_addr),
2896                             (void *)&ims->im6s_addr)) {
2897                                 if (m != m0)
2898                                         m_freem(m);
2899                                 MLD_PRINTF(("%s: m_append() failed.\n",
2900                                     __func__));
2901                                 return (-ENOMEM);
2902                         }
2903                         nbytes += sizeof(struct in6_addr);
2904                         ++msrcs;
2905                         if (msrcs == m0srcs)
2906                                 break;
2907                 }
2908                 MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
2909                     msrcs));
2910                 pmr->mr_numsrc = htons(msrcs);
2911                 nbytes += (msrcs * sizeof(struct in6_addr));
2912         }
2913
2914         if (is_source_query && msrcs == 0) {
2915                 MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
2916                 if (m != m0)
2917                         m_freem(m);
2918                 return (0);
2919         }
2920
2921         /*
2922          * We are good to go with first packet.
2923          */
2924         if (m != m0) {
2925                 MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
2926                 m->m_pkthdr.vt_nrecs = 1;
2927                 IF_ENQUEUE(ifq, m);
2928         } else {
2929                 m->m_pkthdr.vt_nrecs++;
2930         }
2931         /*
2932          * No further work needed if no source list in packet(s).
2933          */
2934         if (!record_has_sources)
2935                 return (nbytes);
2936
2937         /*
2938          * Whilst sources remain to be announced, we need to allocate
2939          * a new packet and fill out as many sources as will fit.
2940          * Always try for a cluster first.
2941          */
2942         while (nims != NULL) {
2943                 if (IF_QFULL(ifq)) {
2944                         MLD_PRINTF(("%s: outbound queue full\n", __func__));
2945                         return (-ENOMEM);
2946                 }
2947                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2948                 if (m == NULL)
2949                         m = m_gethdr(M_DONTWAIT, MT_DATA);
2950                 if (m == NULL)
2951                         return (-ENOMEM);
2952                 mld_save_context(m, ifp);
2953                 md = m_getptr(m, 0, &off);
2954                 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
2955                 MLD_PRINTF(("%s: allocated next packet\n", __func__));
2956
2957                 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2958                         if (m != m0)
2959                                 m_freem(m);
2960                         MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2961                         return (-ENOMEM);
2962                 }
2963                 m->m_pkthdr.vt_nrecs = 1;
2964                 nbytes += sizeof(struct mldv2_record);
2965
2966                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2967                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2968
2969                 msrcs = 0;
2970                 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2971                         MLD_PRINTF(("%s: visit node %s\n",
2972                             __func__, ip6_sprintf(&ims->im6s_addr)));
2973                         now = im6s_get_mode(inm, ims, 1);
2974                         if ((now != mode) ||
2975                             (now == mode &&
2976                              (!use_block_allow && mode == MCAST_UNDEFINED))) {
2977                                 MLD_PRINTF(("%s: skip node\n", __func__));
2978                                 continue;
2979                         }
2980                         if (is_source_query && ims->im6s_stp == 0) {
2981                                 MLD_PRINTF(("%s: skip unrecorded node\n",
2982                                     __func__));
2983                                 continue;
2984                         }
2985                         MLD_PRINTF(("%s: append node\n", __func__));
2986                         if (!m_append(m, sizeof(struct in6_addr),
2987                             (void *)&ims->im6s_addr)) {
2988                                 if (m != m0)
2989                                         m_freem(m);
2990                                 MLD_PRINTF(("%s: m_append() failed.\n",
2991                                     __func__));
2992                                 return (-ENOMEM);
2993                         }
2994                         ++msrcs;
2995                         if (msrcs == m0srcs)
2996                                 break;
2997                 }
2998                 pmr->mr_numsrc = htons(msrcs);
2999                 nbytes += (msrcs * sizeof(struct in6_addr));
3000
3001                 MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
3002                 IF_ENQUEUE(ifq, m);
3003         }
3004
3005         return (nbytes);
3006 }
3007
3008 /*
3009  * Type used to mark record pass completion.
3010  * We exploit the fact we can cast to this easily from the
3011  * current filter modes on each ip_msource node.
3012  */
3013 typedef enum {
3014         REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3015         REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3016         REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3017         REC_FULL = REC_ALLOW | REC_BLOCK
3018 } rectype_t;
3019
3020 /*
3021  * Enqueue an MLDv2 filter list change to the given output queue.
3022  *
3023  * Source list filter state is held in an RB-tree. When the filter list
3024  * for a group is changed without changing its mode, we need to compute
3025  * the deltas between T0 and T1 for each source in the filter set,
3026  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3027  *
3028  * As we may potentially queue two record types, and the entire R-B tree
3029  * needs to be walked at once, we break this out into its own function
3030  * so we can generate a tightly packed queue of packets.
3031  *
3032  * XXX This could be written to only use one tree walk, although that makes
3033  * serializing into the mbuf chains a bit harder. For now we do two walks
3034  * which makes things easier on us, and it may or may not be harder on
3035  * the L2 cache.
3036  *
3037  * If successful the size of all data appended to the queue is returned,
3038  * otherwise an error code less than zero is returned, or zero if
3039  * no record(s) were appended.
3040  */
3041 static int
3042 mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
3043 {
3044         static const int MINRECLEN =
3045             sizeof(struct mldv2_record) + sizeof(struct in6_addr);
3046         struct ifnet            *ifp;
3047         struct mldv2_record      mr;
3048         struct mldv2_record     *pmr;
3049         struct ip6_msource      *ims, *nims;
3050         struct mbuf             *m, *m0, *md;
3051         int                      m0srcs, nbytes, npbytes, off, rsrcs, schanged;
3052         int                      nallow, nblock;
3053         uint8_t                  mode, now, then;
3054         rectype_t                crt, drt, nrt;
3055
3056         IN6M_LOCK_ASSERT_HELD(inm);
3057
3058         if (inm->in6m_nsrc == 0 ||
3059             (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
3060                 return (0);
3061
3062         ifp = inm->in6m_ifp;                    /* interface */
3063         mode = inm->in6m_st[1].iss_fmode;       /* filter mode at t1 */
3064         crt = REC_NONE; /* current group record type */
3065         drt = REC_NONE; /* mask of completed group record types */
3066         nrt = REC_NONE; /* record type for current node */
3067         m0srcs = 0;     /* # source which will fit in current mbuf chain */
3068         npbytes = 0;    /* # of bytes appended this packet */
3069         nbytes = 0;     /* # of bytes appended to group's state-change queue */
3070         rsrcs = 0;      /* # sources encoded in current record */
3071         schanged = 0;   /* # nodes encoded in overall filter change */
3072         nallow = 0;     /* # of source entries in ALLOW_NEW */
3073         nblock = 0;     /* # of source entries in BLOCK_OLD */
3074         nims = NULL;    /* next tree node pointer */
3075
3076         /*
3077          * For each possible filter record mode.
3078          * The first kind of source we encounter tells us which
3079          * is the first kind of record we start appending.
3080          * If a node transitioned to UNDEFINED at t1, its mode is treated
3081          * as the inverse of the group's filter mode.
3082          */
3083         while (drt != REC_FULL) {
3084                 do {
3085                         m0 = ifq->ifq_tail;
3086                         if (m0 != NULL &&
3087                             (m0->m_pkthdr.vt_nrecs + 1 <=
3088                              MLD_V2_REPORT_MAXRECS) &&
3089                             (m0->m_pkthdr.len + MINRECLEN) <
3090                              (ifp->if_mtu - MLD_MTUSPACE)) {
3091                                 m = m0;
3092                                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3093                                             sizeof(struct mldv2_record)) /
3094                                             sizeof(struct in6_addr);
3095                                 MLD_PRINTF(("%s: use previous packet\n",
3096                                     __func__));
3097                         } else {
3098                                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3099                                 if (m == NULL)
3100                                         m = m_gethdr(M_DONTWAIT, MT_DATA);
3101                                 if (m == NULL) {
3102                                         MLD_PRINTF(("%s: m_get*() failed\n",
3103                                             __func__));
3104                                         return (-ENOMEM);
3105                                 }
3106                                 m->m_pkthdr.vt_nrecs = 0;
3107                                 mld_save_context(m, ifp);
3108                                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3109                                     sizeof(struct mldv2_record)) /
3110                                     sizeof(struct in6_addr);
3111                                 npbytes = 0;
3112                                 MLD_PRINTF(("%s: allocated new packet\n",
3113                                     __func__));
3114                         }
3115                         /*
3116                          * Append the MLD group record header to the
3117                          * current packet's data area.
3118                          * Recalculate pointer to free space for next
3119                          * group record, in case m_append() allocated
3120                          * a new mbuf or cluster.
3121                          */
3122                         memset(&mr, 0, sizeof(mr));
3123                         mr.mr_addr = inm->in6m_addr;
3124                         in6_clearscope(&mr.mr_addr);
3125                         if (!m_append(m, sizeof(mr), (void *)&mr)) {
3126                                 if (m != m0)
3127                                         m_freem(m);
3128                                 MLD_PRINTF(("%s: m_append() failed\n",
3129                                     __func__));
3130                                 return (-ENOMEM);
3131                         }
3132                         npbytes += sizeof(struct mldv2_record);
3133                         if (m != m0) {
3134                                 /* new packet; offset in chain */
3135                                 md = m_getptr(m, npbytes -
3136                                     sizeof(struct mldv2_record), &off);
3137                                 pmr = (struct mldv2_record *)(mtod(md,
3138                                     uint8_t *) + off);
3139                         } else {
3140                                 /* current packet; offset from last append */
3141                                 md = m_last(m);
3142                                 pmr = (struct mldv2_record *)(mtod(md,
3143                                     uint8_t *) + md->m_len -
3144                                     sizeof(struct mldv2_record));
3145                         }
3146                         /*
3147                          * Begin walking the tree for this record type
3148                          * pass, or continue from where we left off
3149                          * previously if we had to allocate a new packet.
3150                          * Only report deltas in-mode at t1.
3151                          * We need not report included sources as allowed
3152                          * if we are in inclusive mode on the group,
3153                          * however the converse is not true.
3154                          */
3155                         rsrcs = 0;
3156                         if (nims == NULL) {
3157                                 nims = RB_MIN(ip6_msource_tree,
3158                                     &inm->in6m_srcs);
3159                         }
3160                         RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
3161                                 MLD_PRINTF(("%s: visit node %s\n", __func__,
3162                                     ip6_sprintf(&ims->im6s_addr)));
3163                                 now = im6s_get_mode(inm, ims, 1);
3164                                 then = im6s_get_mode(inm, ims, 0);
3165                                 MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3166                                     __func__, then, now));
3167                                 if (now == then) {
3168                                         MLD_PRINTF(("%s: skip unchanged\n",
3169                                             __func__));
3170                                         continue;
3171                                 }
3172                                 if (mode == MCAST_EXCLUDE &&
3173                                     now == MCAST_INCLUDE) {
3174                                         MLD_PRINTF(("%s: skip IN src on EX "
3175                                             "group\n", __func__));
3176                                         continue;
3177                                 }
3178                                 nrt = (rectype_t)now;
3179                                 if (nrt == REC_NONE)
3180                                         nrt = (rectype_t)(~mode & REC_FULL);
3181                                 if (schanged++ == 0) {
3182                                         crt = nrt;
3183                                 } else if (crt != nrt)
3184                                         continue;
3185                                 if (!m_append(m, sizeof(struct in6_addr),
3186                                     (void *)&ims->im6s_addr)) {
3187                                         if (m != m0)
3188                                                 m_freem(m);
3189                                         MLD_PRINTF(("%s: m_append() failed\n",
3190                                             __func__));
3191                                         return (-ENOMEM);
3192                                 }
3193                                 nallow += !!(crt == REC_ALLOW);
3194                                 nblock += !!(crt == REC_BLOCK);
3195                                 if (++rsrcs == m0srcs)
3196                                         break;
3197                         }
3198                         /*
3199                          * If we did not append any tree nodes on this
3200                          * pass, back out of allocations.
3201                          */
3202                         if (rsrcs == 0) {
3203                                 npbytes -= sizeof(struct mldv2_record);
3204                                 if (m != m0) {
3205                                         MLD_PRINTF(("%s: m_free(m)\n",
3206                                             __func__));
3207                                         m_freem(m);
3208                                 } else {
3209                                         MLD_PRINTF(("%s: m_adj(m, -mr)\n",
3210                                             __func__));
3211                                         m_adj(m, -((int)sizeof(
3212                                             struct mldv2_record)));
3213                                 }
3214                                 continue;
3215                         }
3216                         npbytes += (rsrcs * sizeof(struct in6_addr));
3217                         if (crt == REC_ALLOW)
3218                                 pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
3219                         else if (crt == REC_BLOCK)
3220                                 pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
3221                         pmr->mr_numsrc = htons(rsrcs);
3222                         /*
3223                          * Count the new group record, and enqueue this
3224                          * packet if it wasn't already queued.
3225                          */
3226                         m->m_pkthdr.vt_nrecs++;
3227                         if (m != m0)
3228                                 IF_ENQUEUE(ifq, m);
3229                         nbytes += npbytes;
3230                 } while (nims != NULL);
3231                 drt |= crt;
3232                 crt = (~crt & REC_FULL);
3233         }
3234
3235         MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3236             nallow, nblock));
3237
3238         return (nbytes);
3239 }
3240
3241 static int
3242 mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
3243 {
3244         struct ifqueue  *gq;
3245         struct mbuf     *m;             /* pending state-change */
3246         struct mbuf     *m0;            /* copy of pending state-change */
3247         struct mbuf     *mt;            /* last state-change in packet */
3248         struct mbuf     *n;
3249         int              docopy, domerge;
3250         u_int            recslen;
3251
3252         IN6M_LOCK_ASSERT_HELD(inm);
3253
3254         docopy = 0;
3255         domerge = 0;
3256         recslen = 0;
3257
3258         /*
3259          * If there are further pending retransmissions, make a writable
3260          * copy of each queued state-change message before merging.
3261          */
3262         if (inm->in6m_scrv > 0)
3263                 docopy = 1;
3264
3265         gq = &inm->in6m_scq;
3266 #ifdef MLD_DEBUG
3267         if (gq->ifq_head == NULL) {
3268                 MLD_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3269                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3270         }
3271 #endif
3272
3273         /*
3274          * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3275          * packet might not always be at the head of the ifqueue.
3276          */
3277         m = gq->ifq_head;
3278         while (m != NULL) {
3279                 /*
3280                  * Only merge the report into the current packet if
3281                  * there is sufficient space to do so; an MLDv2 report
3282                  * packet may only contain 65,535 group records.
3283                  * Always use a simple mbuf chain concatentation to do this,
3284                  * as large state changes for single groups may have
3285                  * allocated clusters.
3286                  */
3287                 domerge = 0;
3288                 mt = ifscq->ifq_tail;
3289                 if (mt != NULL) {
3290                         recslen = m_length(m);
3291
3292                         if ((mt->m_pkthdr.vt_nrecs +
3293                             m->m_pkthdr.vt_nrecs <=
3294                             MLD_V2_REPORT_MAXRECS) &&
3295                             (mt->m_pkthdr.len + recslen <=
3296                             (inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
3297                                 domerge = 1;
3298                 }
3299
3300                 if (!domerge && IF_QFULL(gq)) {
3301                         MLD_PRINTF(("%s: outbound queue full, skipping whole "
3302                             "packet 0x%llx\n", __func__,
3303                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3304                         n = m->m_nextpkt;
3305                         if (!docopy) {
3306                                 IF_REMQUEUE(gq, m);
3307                                 m_freem(m);
3308                         }
3309                         m = n;
3310                         continue;
3311                 }
3312
3313                 if (!docopy) {
3314                         MLD_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3315                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3316                         n = m->m_nextpkt;
3317                         IF_REMQUEUE(gq, m);
3318                         m0 = m;
3319                         m = n;
3320                 } else {
3321                         MLD_PRINTF(("%s: copying 0x%llx\n", __func__,
3322                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3323                         m0 = m_dup(m, M_NOWAIT);
3324                         if (m0 == NULL)
3325                                 return (ENOMEM);
3326                         m0->m_nextpkt = NULL;
3327                         m = m->m_nextpkt;
3328                 }
3329
3330                 if (!domerge) {
3331                         MLD_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3332                             __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3333                             (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3334                         IF_ENQUEUE(ifscq, m0);
3335                 } else {
3336                         struct mbuf *mtl;       /* last mbuf of packet mt */
3337
3338                         MLD_PRINTF(("%s: merging 0x%llx with ifscq tail "
3339                             "0x%llx)\n", __func__,
3340                             (uint64_t)VM_KERNEL_ADDRPERM(m0),
3341                             (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3342
3343                         mtl = m_last(mt);
3344                         m0->m_flags &= ~M_PKTHDR;
3345                         mt->m_pkthdr.len += recslen;
3346                         mt->m_pkthdr.vt_nrecs +=
3347                             m0->m_pkthdr.vt_nrecs;
3348
3349                         mtl->m_next = m0;
3350                 }
3351         }
3352
3353         return (0);
3354 }
3355
3356 /*
3357  * Respond to a pending MLDv2 General Query.
3358  */
3359 static uint32_t
3360 mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
3361 {
3362         struct ifnet            *ifp;
3363         struct in6_multi        *inm;
3364         struct in6_multistep    step;
3365         int                      retval;
3366
3367         MLI_LOCK_ASSERT_HELD(mli);
3368
3369         VERIFY(mli->mli_version == MLD_VERSION_2);
3370
3371         ifp = mli->mli_ifp;
3372         MLI_UNLOCK(mli);
3373
3374         in6_multihead_lock_shared();
3375         IN6_FIRST_MULTI(step, inm);
3376         while (inm != NULL) {
3377                 IN6M_LOCK(inm);
3378                 if (inm->in6m_ifp != ifp)
3379                         goto next;
3380
3381                 switch (inm->in6m_state) {
3382                 case MLD_NOT_MEMBER:
3383                 case MLD_SILENT_MEMBER:
3384                         break;
3385                 case MLD_REPORTING_MEMBER:
3386                 case MLD_IDLE_MEMBER:
3387                 case MLD_LAZY_MEMBER:
3388                 case MLD_SLEEPING_MEMBER:
3389                 case MLD_AWAKENING_MEMBER:
3390                         inm->in6m_state = MLD_REPORTING_MEMBER;
3391                         MLI_LOCK(mli);
3392                         retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3393                             inm, 0, 0, 0, 0);
3394                         MLI_UNLOCK(mli);
3395                         MLD_PRINTF(("%s: enqueue record = %d\n",
3396                             __func__, retval));
3397                         break;
3398                 case MLD_G_QUERY_PENDING_MEMBER:
3399                 case MLD_SG_QUERY_PENDING_MEMBER:
3400                 case MLD_LEAVING_MEMBER:
3401                         break;
3402                 }
3403 next:
3404                 IN6M_UNLOCK(inm);
3405                 IN6_NEXT_MULTI(step, inm);
3406         }
3407         in6_multihead_lock_done();
3408
3409         MLI_LOCK(mli);
3410         mld_dispatch_queue(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3411         MLI_LOCK_ASSERT_HELD(mli);
3412
3413         /*
3414          * Slew transmission of bursts over 1 second intervals.
3415          */
3416         if (mli->mli_gq.ifq_head != NULL) {
3417                 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3418                     MLD_RESPONSE_BURST_INTERVAL);
3419         }
3420
3421         return (mli->mli_v2_timer);
3422 }
3423
3424 /*
3425  * Transmit the next pending message in the output queue.
3426  *
3427  * Must not be called with in6m_lockm or mli_lock held.
3428  */
3429 static void
3430 mld_dispatch_packet(struct mbuf *m)
3431 {
3432         struct ip6_moptions     *im6o;
3433         struct ifnet            *ifp;
3434         struct ifnet            *oifp = NULL;
3435         struct mbuf             *m0;
3436         struct mbuf             *md;
3437         struct ip6_hdr          *ip6;
3438         struct mld_hdr          *mld;
3439         int                      error;
3440         int                      off;
3441         int                      type;
3442
3443         MLD_PRINTF(("%s: transmit 0x%llx\n", __func__,
3444             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3445
3446         /*
3447          * Check if the ifnet is still attached.
3448          */
3449         ifp = mld_restore_context(m);
3450         if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3451                 MLD_PRINTF(("%s: dropped 0x%llx as ifindex %u went away.\n",
3452                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(m),
3453                     (u_int)if_index));
3454                 m_freem(m);
3455                 ip6stat.ip6s_noroute++;
3456                 return;
3457         }
3458
3459         im6o = ip6_allocmoptions(M_WAITOK);
3460         if (im6o == NULL) {
3461                 m_freem(m);
3462                 return;
3463         }
3464
3465         im6o->im6o_multicast_hlim  = 1;
3466         im6o->im6o_multicast_loop = 0;
3467         im6o->im6o_multicast_ifp = ifp;
3468
3469         if (m->m_flags & M_MLDV1) {
3470                 m0 = m;
3471         } else {
3472                 m0 = mld_v2_encap_report(ifp, m);
3473                 if (m0 == NULL) {
3474                         MLD_PRINTF(("%s: dropped 0x%llx\n", __func__,
3475                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3476                         /*
3477                          * mld_v2_encap_report() has already freed our mbuf.
3478                          */
3479                         IM6O_REMREF(im6o);
3480                         ip6stat.ip6s_odropped++;
3481                         return;
3482                 }
3483         }
3484
3485         mld_scrub_context(m0);
3486         m->m_flags &= ~(M_PROTOFLAGS);
3487         m0->m_pkthdr.rcvif = lo_ifp;
3488
3489         ip6 = mtod(m0, struct ip6_hdr *);
3490         (void) in6_setscope(&ip6->ip6_dst, ifp, NULL);
3491
3492         /*
3493          * Retrieve the ICMPv6 type before handoff to ip6_output(),
3494          * so we can bump the stats.
3495          */
3496         md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3497         mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3498         type = mld->mld_type;
3499
3500         if (ifp->if_eflags & IFEF_TXSTART) {
3501                 /*
3502                  * Use control service class if the outgoing
3503                  * interface supports transmit-start model.
3504                  */
3505                 (void) m_set_service_class(m0, MBUF_SC_CTL);
3506         }
3507
3508         error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
3509             &oifp, NULL);
3510
3511         IM6O_REMREF(im6o);
3512
3513         if (error) {
3514                 MLD_PRINTF(("%s: ip6_output(0x%llx) = %d\n", __func__,
3515                     (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
3516                 if (oifp != NULL)
3517                         ifnet_release(oifp);
3518                 return;
3519         }
3520
3521         icmp6stat.icp6s_outhist[type]++;
3522         if (oifp != NULL) {
3523                 icmp6_ifstat_inc(oifp, ifs6_out_msg);
3524                 switch (type) {
3525                 case MLD_LISTENER_REPORT:
3526                 case MLDV2_LISTENER_REPORT:
3527                         icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3528                         break;
3529                 case MLD_LISTENER_DONE:
3530                         icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3531                         break;
3532                 }
3533                 ifnet_release(oifp);
3534         }
3535 }
3536
3537 /*
3538  * Encapsulate an MLDv2 report.
3539  *
3540  * KAME IPv6 requires that hop-by-hop options be passed separately,
3541  * and that the IPv6 header be prepended in a separate mbuf.
3542  *
3543  * Returns a pointer to the new mbuf chain head, or NULL if the
3544  * allocation failed.
3545  */
3546 static struct mbuf *
3547 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3548 {
3549         struct mbuf             *mh;
3550         struct mldv2_report     *mld;
3551         struct ip6_hdr          *ip6;
3552         struct in6_ifaddr       *ia;
3553         int                      mldreclen;
3554
3555         VERIFY(m->m_flags & M_PKTHDR);
3556
3557         /*
3558          * RFC3590: OK to send as :: or tentative during DAD.
3559          */
3560         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
3561         if (ia == NULL)
3562                 MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
3563
3564         MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3565         if (mh == NULL) {
3566                 if (ia != NULL)
3567                         IFA_REMREF(&ia->ia_ifa);
3568                 m_freem(m);
3569                 return (NULL);
3570         }
3571         MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3572
3573         mldreclen = m_length(m);
3574         MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
3575
3576         mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3577         mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3578             sizeof(struct mldv2_report) + mldreclen;
3579
3580         ip6 = mtod(mh, struct ip6_hdr *);
3581         ip6->ip6_flow = 0;
3582         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3583         ip6->ip6_vfc |= IPV6_VERSION;
3584         ip6->ip6_nxt = IPPROTO_ICMPV6;
3585         if (ia != NULL)
3586                 IFA_LOCK(&ia->ia_ifa);
3587         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3588         if (ia != NULL) {
3589                 IFA_UNLOCK(&ia->ia_ifa);
3590                 IFA_REMREF(&ia->ia_ifa);
3591                 ia = NULL;
3592         }
3593         ip6->ip6_dst = in6addr_linklocal_allv2routers;
3594         /* scope ID will be set in netisr */
3595
3596         mld = (struct mldv2_report *)(ip6 + 1);
3597         mld->mld_type = MLDV2_LISTENER_REPORT;
3598         mld->mld_code = 0;
3599         mld->mld_cksum = 0;
3600         mld->mld_v2_reserved = 0;
3601         mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3602         m->m_pkthdr.vt_nrecs = 0;
3603         m->m_flags &= ~M_PKTHDR;
3604
3605         mh->m_next = m;
3606         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3607             sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3608         return (mh);
3609 }
3610
3611 #ifdef MLD_DEBUG
3612 static const char *
3613 mld_rec_type_to_str(const int type)
3614 {
3615         switch (type) {
3616                 case MLD_CHANGE_TO_EXCLUDE_MODE:
3617                         return "TO_EX";
3618                 case MLD_CHANGE_TO_INCLUDE_MODE:
3619                         return "TO_IN";
3620                 case MLD_MODE_IS_EXCLUDE:
3621                         return "MODE_EX";
3622                 case MLD_MODE_IS_INCLUDE:
3623                         return "MODE_IN";
3624                 case MLD_ALLOW_NEW_SOURCES:
3625                         return "ALLOW_NEW";
3626                 case MLD_BLOCK_OLD_SOURCES:
3627                         return "BLOCK_OLD";
3628                 default:
3629                         break;
3630         }
3631         return "unknown";
3632 }
3633 #endif
3634
3635 void
3636 mld_init(void)
3637 {
3638
3639         MLD_PRINTF(("%s: initializing\n", __func__));
3640
3641         /* Setup lock group and attribute for mld_mtx */
3642         mld_mtx_grp_attr = lck_grp_attr_alloc_init();
3643         mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
3644         mld_mtx_attr = lck_attr_alloc_init();
3645         lck_mtx_init(&mld_mtx, mld_mtx_grp, mld_mtx_attr);
3646
3647         ip6_initpktopts(&mld_po);
3648         mld_po.ip6po_hlim = 1;
3649         mld_po.ip6po_hbh = &mld_ra.hbh;
3650         mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3651         mld_po.ip6po_flags = IP6PO_DONTFRAG;
3652         LIST_INIT(&mli_head);
3653
3654         mli_size = sizeof (struct mld_ifinfo);
3655         mli_zone = zinit(mli_size, MLI_ZONE_MAX * mli_size,
3656             0, MLI_ZONE_NAME);
3657         if (mli_zone == NULL) {
3658                 panic("%s: failed allocating %s", __func__, MLI_ZONE_NAME);
3659                 /* NOTREACHED */
3660         }
3661         zone_change(mli_zone, Z_EXPAND, TRUE);
3662         zone_change(mli_zone, Z_CALLERACCT, FALSE);
3663 }