bsd/netinet6/mld6.c

   1 /*
   2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*-
  29  * Copyright (c) 2009 Bruce Simpson.
  30  *
  31  * Redistribution and use in source and binary forms, with or without
  32  * modification, are permitted provided that the following conditions
  33  * are met:
  34  * 1. Redistributions of source code must retain the above copyright
  35  *    notice, this list of conditions and the following disclaimer.
  36  * 2. Redistributions in binary form must reproduce the above copyright
  37  *    notice, this list of conditions and the following disclaimer in the
  38  *    documentation and/or other materials provided with the distribution.
  39  * 3. The name of the author may not be used to endorse or promote
  40  *    products derived from this software without specific prior written
  41  *    permission.
  42  *
  43  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  53  * SUCH DAMAGE.
  54  */
  55
  56 /*
  57  * Copyright (c) 1988 Stephen Deering.
  58  * Copyright (c) 1992, 1993
  59  *      The Regents of the University of California.  All rights reserved.
  60  *
  61  * This code is derived from software contributed to Berkeley by
  62  * Stephen Deering of Stanford University.
  63  *
  64  * Redistribution and use in source and binary forms, with or without
  65  * modification, are permitted provided that the following conditions
  66  * are met:
  67  * 1. Redistributions of source code must retain the above copyright
  68  *    notice, this list of conditions and the following disclaimer.
  69  * 2. Redistributions in binary form must reproduce the above copyright
  70  *    notice, this list of conditions and the following disclaimer in the
  71  *    documentation and/or other materials provided with the distribution.
  72  * 3. All advertising materials mentioning features or use of this software
  73  *    must display the following acknowledgement:
  74  *      This product includes software developed by the University of
  75  *      California, Berkeley and its contributors.
  76  * 4. Neither the name of the University nor the names of its contributors
  77  *    may be used to endorse or promote products derived from this software
  78  *    without specific prior written permission.
  79  *
  80  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  81  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  82  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  83  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  84  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  85  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  86  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  87  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  88  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  89  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  90  * SUCH DAMAGE.
  91  *
  92  *      @(#)igmp.c      8.1 (Berkeley) 7/19/93
  93  */
  94 /*
  95  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
  96  * support for mandatory and extensible security protections.  This notice
  97  * is included in support of clause 2.2 (b) of the Apple Public License,
  98  * Version 2.0.
  99  */
 100
 101 #include <sys/cdefs.h>
 102
 103 #include <sys/param.h>
 104 #include <sys/systm.h>
 105 #include <sys/mbuf.h>
 106 #include <sys/socket.h>
 107 #include <sys/protosw.h>
 108 #include <sys/sysctl.h>
 109 #include <sys/kernel.h>
 110 #include <sys/malloc.h>
 111 #include <sys/mcache.h>
 112
 113 #include <dev/random/randomdev.h>
 114
 115 #include <kern/zalloc.h>
 116
 117 #include <net/if.h>
 118 #include <net/route.h>
 119
 120 #include <netinet/in.h>
 121 #include <netinet/in_var.h>
 122 #include <netinet6/in6_var.h>
 123 #include <netinet/ip6.h>
 124 #include <netinet6/ip6_var.h>
 125 #include <netinet6/scope6_var.h>
 126 #include <netinet/icmp6.h>
 127 #include <netinet6/mld6.h>
 128 #include <netinet6/mld6_var.h>
 129
 130 /* Lock group and attribute for mld_mtx */
 131 static lck_attr_t       *mld_mtx_attr;
 132 static lck_grp_t        *mld_mtx_grp;
 133 static lck_grp_attr_t   *mld_mtx_grp_attr;
 134
 135 /*
 136  * Locking and reference counting:
 137  *
 138  * mld_mtx mainly protects mli_head.  In cases where both mld_mtx and
 139  * in6_multihead_lock must be held, the former must be acquired first in order
 140  * to maintain lock ordering.  It is not a requirement that mld_mtx be
 141  * acquired first before in6_multihead_lock, but in case both must be acquired
 142  * in succession, the correct lock ordering must be followed.
 143  *
 144  * Instead of walking the if_multiaddrs list at the interface and returning
 145  * the ifma_protospec value of a matching entry, we search the global list
 146  * of in6_multi records and find it that way; this is done with in6_multihead
 147  * lock held.  Doing so avoids the race condition issues that many other BSDs
 148  * suffer from (therefore in our implementation, ifma_protospec will never be
 149  * NULL for as long as the in6_multi is valid.)
 150  *
 151  * The above creates a requirement for the in6_multi to stay in in6_multihead
 152  * list even after the final MLD leave (in MLDv2 mode) until no longer needs
 153  * be retransmitted (this is not required for MLDv1.)  In order to handle
 154  * this, the request and reference counts of the in6_multi are bumped up when
 155  * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
 156  * handler.  Each in6_multi holds a reference to the underlying mld_ifinfo.
 157  *
 158  * Thus, the permitted lock order is:
 159  *
 160  *      mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
 161  *
 162  * Any may be taken independently, but if any are held at the same time,
 163  * the above lock order must be followed.
 164  */
 165 static decl_lck_mtx_data(, mld_mtx);
 166
 167 SLIST_HEAD(mld_in6m_relhead, in6_multi);
 168
 169 static void     mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
 170 static struct mld_ifinfo *mli_alloc(int);
 171 static void     mli_free(struct mld_ifinfo *);
 172 static void     mli_delete(const struct ifnet *, struct mld_in6m_relhead *);
 173 static void     mld_dispatch_packet(struct mbuf *);
 174 static void     mld_final_leave(struct in6_multi *, struct mld_ifinfo *,
 175                     struct mld_tparams *);
 176 static int      mld_handle_state_change(struct in6_multi *, struct mld_ifinfo *,
 177                     struct mld_tparams *);
 178 static int      mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
 179                     struct mld_tparams *, const int);
 180 #ifdef MLD_DEBUG
 181 static const char *     mld_rec_type_to_str(const int);
 182 #endif
 183 static uint32_t mld_set_version(struct mld_ifinfo *, const int);
 184 static void     mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *);
 185 static void     mld_dispatch_queue(struct mld_ifinfo *, struct ifqueue *, int);
 186 static int      mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
 187                     /*const*/ struct mld_hdr *);
 188 static int      mld_v1_input_report(struct ifnet *, struct mbuf *,
 189                     const struct ip6_hdr *, /*const*/ struct mld_hdr *);
 190 static void     mld_v1_process_group_timer(struct in6_multi *, const int);
 191 static void     mld_v1_process_querier_timers(struct mld_ifinfo *);
 192 static int      mld_v1_transmit_report(struct in6_multi *, const int);
 193 static uint32_t mld_v1_update_group(struct in6_multi *, const int);
 194 static void     mld_v2_cancel_link_timers(struct mld_ifinfo *);
 195 static uint32_t mld_v2_dispatch_general_query(struct mld_ifinfo *);
 196 static struct mbuf *
 197                 mld_v2_encap_report(struct ifnet *, struct mbuf *);
 198 static int      mld_v2_enqueue_filter_change(struct ifqueue *,
 199                     struct in6_multi *);
 200 static int      mld_v2_enqueue_group_record(struct ifqueue *,
 201                     struct in6_multi *, const int, const int, const int,
 202                     const int);
 203 static int      mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
 204                     struct mbuf *, const int, const int);
 205 static int      mld_v2_merge_state_changes(struct in6_multi *,
 206                     struct ifqueue *);
 207 static void     mld_v2_process_group_timers(struct mld_ifinfo *,
 208                     struct ifqueue *, struct ifqueue *,
 209                     struct in6_multi *, const int);
 210 static int      mld_v2_process_group_query(struct in6_multi *,
 211                     int, struct mbuf *, const int);
 212 static int      sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
 213 static int      sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
 214 static int      sysctl_mld_v2enable SYSCTL_HANDLER_ARGS;
 215
 216 static int mld_timeout_run;             /* MLD timer is scheduled to run */
 217 static void mld_timeout(void *);
 218 static void mld_sched_timeout(void);
 219
 220 /*
 221  * Normative references: RFC 2710, RFC 3590, RFC 3810.
 222  */
 223 static struct timeval mld_gsrdelay = {10, 0};
 224 static LIST_HEAD(, mld_ifinfo) mli_head;
 225
 226 static int querier_present_timers_running6;
 227 static int interface_timers_running6;
 228 static int state_change_timers_running6;
 229 static int current_state_timers_running6;
 230
 231 /*
 232  * Subsystem lock macros.
 233  */
 234 #define MLD_LOCK()                      \
 235         lck_mtx_lock(&mld_mtx)
 236 #define MLD_LOCK_ASSERT_HELD()          \
 237         lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_OWNED)
 238 #define MLD_LOCK_ASSERT_NOTHELD()       \
 239         lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
 240 #define MLD_UNLOCK()                    \
 241         lck_mtx_unlock(&mld_mtx)
 242
 243 #define MLD_ADD_DETACHED_IN6M(_head, _in6m) {                           \
 244         SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle);                     \
 245 }
 246
 247 #define MLD_REMOVE_DETACHED_IN6M(_head) {                               \
 248         struct in6_multi *_in6m, *_inm_tmp;                             \
 249         SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) {         \
 250                 SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle);       \
 251                 IN6M_REMREF(_in6m);                                     \
 252         }                                                               \
 253         VERIFY(SLIST_EMPTY(_head));                                     \
 254 }
 255
 256 #define MLI_ZONE_MAX            64              /* maximum elements in zone */
 257 #define MLI_ZONE_NAME           "mld_ifinfo"    /* zone name */
 258
 259 static unsigned int mli_size;                   /* size of zone element */
 260 static struct zone *mli_zone;                   /* zone for mld_ifinfo */
 261
 262 SYSCTL_DECL(_net_inet6);        /* Note: Not in any common header. */
 263
 264 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
 265     "IPv6 Multicast Listener Discovery");
 266 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
 267     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 268     &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
 269     "Rate limit for MLDv2 Group-and-Source queries in seconds");
 270
 271 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
 272    sysctl_mld_ifinfo, "Per-interface MLDv2 state");
 273
 274 static int      mld_v1enable = 1;
 275 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
 276     &mld_v1enable, 0, "Enable fallback to MLDv1");
 277
 278 static int      mld_v2enable = 1;
 279 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, v2enable,
 280     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 281     &mld_v2enable, 0, sysctl_mld_v2enable, "I",
 282     "Enable MLDv2 (debug purposes only)");
 283
 284 static int      mld_use_allow = 1;
 285 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
 286     &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
 287
 288 #ifdef MLD_DEBUG
 289 int mld_debug = 0;
 290 SYSCTL_INT(_net_inet6_mld, OID_AUTO,
 291         debug, CTLFLAG_RW | CTLFLAG_LOCKED,     &mld_debug, 0, "");
 292 #endif
 293 /*
 294  * Packed Router Alert option structure declaration.
 295  */
 296 struct mld_raopt {
 297         struct ip6_hbh          hbh;
 298         struct ip6_opt          pad;
 299         struct ip6_opt_router   ra;
 300 } __packed;
 301
 302 /*
 303  * Router Alert hop-by-hop option header.
 304  */
 305 static struct mld_raopt mld_ra = {
 306         .hbh = { 0, 0 },
 307         .pad = { .ip6o_type = IP6OPT_PADN, 0 },
 308         .ra = {
 309             .ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
 310             .ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
 311             .ip6or_value =  {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
 312                 (IP6OPT_RTALERT_MLD & 0xFF) }
 313         }
 314 };
 315 static struct ip6_pktopts mld_po;
 316
 317 /* Store MLDv2 record count in the module private scratch space */
 318 #define vt_nrecs        pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
 319
 320 static __inline void
 321 mld_save_context(struct mbuf *m, struct ifnet *ifp)
 322 {
 323         m->m_pkthdr.rcvif = ifp;
 324 }
 325
 326 static __inline void
 327 mld_scrub_context(struct mbuf *m)
 328 {
 329         m->m_pkthdr.rcvif = NULL;
 330 }
 331
 332 /*
 333  * Restore context from a queued output chain.
 334  * Return saved ifp.
 335  */
 336 static __inline struct ifnet *
 337 mld_restore_context(struct mbuf *m)
 338 {
 339         return (m->m_pkthdr.rcvif);
 340 }
 341
 342 /*
 343  * Retrieve or set threshold between group-source queries in seconds.
 344  */
 345 static int
 346 sysctl_mld_gsr SYSCTL_HANDLER_ARGS
 347 {
 348 #pragma unused(arg1, arg2)
 349         int error;
 350         int i;
 351
 352         MLD_LOCK();
 353
 354         i = mld_gsrdelay.tv_sec;
 355
 356         error = sysctl_handle_int(oidp, &i, 0, req);
 357         if (error || !req->newptr)
 358                 goto out_locked;
 359
 360         if (i < -1 || i >= 60) {
 361                 error = EINVAL;
 362                 goto out_locked;
 363         }
 364
 365         mld_gsrdelay.tv_sec = i;
 366
 367 out_locked:
 368         MLD_UNLOCK();
 369         return (error);
 370 }
 371 /*
 372  * Expose struct mld_ifinfo to userland, keyed by ifindex.
 373  * For use by ifmcstat(8).
 374  *
 375  */
 376 static int
 377 sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
 378 {
 379 #pragma unused(oidp)
 380         int                     *name;
 381         int                      error;
 382         u_int                    namelen;
 383         struct ifnet            *ifp;
 384         struct mld_ifinfo       *mli;
 385         struct mld_ifinfo_u     mli_u;
 386
 387         name = (int *)arg1;
 388         namelen = arg2;
 389
 390         if (req->newptr != USER_ADDR_NULL)
 391                 return (EPERM);
 392
 393         if (namelen != 1)
 394                 return (EINVAL);
 395
 396         MLD_LOCK();
 397
 398         if (name[0] <= 0 || name[0] > (u_int)if_index) {
 399                 error = ENOENT;
 400                 goto out_locked;
 401         }
 402
 403         error = ENOENT;
 404
 405         ifnet_head_lock_shared();
 406         ifp = ifindex2ifnet[name[0]];
 407         ifnet_head_done();
 408         if (ifp == NULL)
 409                 goto out_locked;
 410
 411         bzero(&mli_u, sizeof (mli_u));
 412
 413         LIST_FOREACH(mli, &mli_head, mli_link) {
 414                 MLI_LOCK(mli);
 415                 if (ifp != mli->mli_ifp) {
 416                         MLI_UNLOCK(mli);
 417                         continue;
 418                 }
 419
 420                 mli_u.mli_ifindex = mli->mli_ifp->if_index;
 421                 mli_u.mli_version = mli->mli_version;
 422                 mli_u.mli_v1_timer = mli->mli_v1_timer;
 423                 mli_u.mli_v2_timer = mli->mli_v2_timer;
 424                 mli_u.mli_flags = mli->mli_flags;
 425                 mli_u.mli_rv = mli->mli_rv;
 426                 mli_u.mli_qi = mli->mli_qi;
 427                 mli_u.mli_qri = mli->mli_qri;
 428                 mli_u.mli_uri = mli->mli_uri;
 429                 MLI_UNLOCK(mli);
 430
 431                 error = SYSCTL_OUT(req, &mli_u, sizeof (mli_u));
 432                 break;
 433         }
 434
 435 out_locked:
 436         MLD_UNLOCK();
 437         return (error);
 438 }
 439
 440 static int
 441 sysctl_mld_v2enable SYSCTL_HANDLER_ARGS
 442 {
 443 #pragma unused(arg1, arg2)
 444         int error;
 445         int i;
 446         struct mld_ifinfo *mli;
 447         struct mld_tparams mtp = { 0, 0, 0, 0 };
 448
 449         MLD_LOCK();
 450
 451         i = mld_v2enable;
 452
 453         error = sysctl_handle_int(oidp, &i, 0, req);
 454         if (error || !req->newptr)
 455                 goto out_locked;
 456
 457         if (i < 0 || i > 1) {
 458                 error = EINVAL;
 459                 goto out_locked;
 460         }
 461
 462         mld_v2enable = i;
 463         /*
 464          * If we enabled v2, the state transition will take care of upgrading
 465          * the MLD version back to v2. Otherwise, we have to explicitly
 466          * downgrade. Note that this functionality is to be used for debugging.
 467          */
 468         if (mld_v2enable == 1)
 469                 goto out_locked;
 470
 471         LIST_FOREACH(mli, &mli_head, mli_link) {
 472                 MLI_LOCK(mli);
 473                 if (mld_set_version(mli, MLD_VERSION_1) > 0)
 474                         mtp.qpt = 1;
 475                 MLI_UNLOCK(mli);
 476         }
 477
 478 out_locked:
 479         MLD_UNLOCK();
 480
 481         mld_set_timeout(&mtp);
 482
 483         return (error);
 484 }
 485
 486 /*
 487  * Dispatch an entire queue of pending packet chains.
 488  *
 489  * Must not be called with in6m_lock held.
 490  */
 491 static void
 492 mld_dispatch_queue(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
 493 {
 494         struct mbuf *m;
 495
 496         if (mli != NULL)
 497                 MLI_LOCK_ASSERT_HELD(mli);
 498
 499         for (;;) {
 500                 IF_DEQUEUE(ifq, m);
 501                 if (m == NULL)
 502                         break;
 503                 MLD_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
 504                     (uint64_t)VM_KERNEL_ADDRPERM(ifq),
 505                     (uint64_t)VM_KERNEL_ADDRPERM(m)));
 506                 if (mli != NULL)
 507                         MLI_UNLOCK(mli);
 508                 mld_dispatch_packet(m);
 509                 if (mli != NULL)
 510                         MLI_LOCK(mli);
 511                 if (--limit == 0)
 512                         break;
 513         }
 514
 515         if (mli != NULL)
 516                 MLI_LOCK_ASSERT_HELD(mli);
 517 }
 518
 519 /*
 520  * Filter outgoing MLD report state by group.
 521  *
 522  * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
 523  * and node-local addresses. However, kernel and socket consumers
 524  * always embed the KAME scope ID in the address provided, so strip it
 525  * when performing comparison.
 526  * Note: This is not the same as the *multicast* scope.
 527  *
 528  * Return zero if the given group is one for which MLD reports
 529  * should be suppressed, or non-zero if reports should be issued.
 530  */
 531 static __inline__ int
 532 mld_is_addr_reported(const struct in6_addr *addr)
 533 {
 534
 535         VERIFY(IN6_IS_ADDR_MULTICAST(addr));
 536
 537         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
 538                 return (0);
 539
 540         if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
 541                 struct in6_addr tmp = *addr;
 542                 in6_clearscope(&tmp);
 543                 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
 544                         return (0);
 545         }
 546
 547         return (1);
 548 }
 549
 550 /*
 551  * Attach MLD when PF_INET6 is attached to an interface.
 552  */
 553 struct mld_ifinfo *
 554 mld_domifattach(struct ifnet *ifp, int how)
 555 {
 556         struct mld_ifinfo *mli;
 557
 558         MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
 559             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 560
 561         mli = mli_alloc(how);
 562         if (mli == NULL)
 563                 return (NULL);
 564
 565         MLD_LOCK();
 566
 567         MLI_LOCK(mli);
 568         mli_initvar(mli, ifp, 0);
 569         mli->mli_debug |= IFD_ATTACHED;
 570         MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
 571         MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
 572         MLI_UNLOCK(mli);
 573         ifnet_lock_shared(ifp);
 574         mld6_initsilent(ifp, mli);
 575         ifnet_lock_done(ifp);
 576
 577         LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 578
 579         MLD_UNLOCK();
 580
 581         MLD_PRINTF(("%s: allocate mld_ifinfo for ifp 0x%llx(%s)\n",
 582             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 583
 584         return (mli);
 585 }
 586
 587 /*
 588  * Attach MLD when PF_INET6 is reattached to an interface.  Caller is
 589  * expected to have an outstanding reference to the mli.
 590  */
 591 void
 592 mld_domifreattach(struct mld_ifinfo *mli)
 593 {
 594         struct ifnet *ifp;
 595
 596         MLD_LOCK();
 597
 598         MLI_LOCK(mli);
 599         VERIFY(!(mli->mli_debug & IFD_ATTACHED));
 600         ifp = mli->mli_ifp;
 601         VERIFY(ifp != NULL);
 602         mli_initvar(mli, ifp, 1);
 603         mli->mli_debug |= IFD_ATTACHED;
 604         MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
 605         MLI_UNLOCK(mli);
 606         ifnet_lock_shared(ifp);
 607         mld6_initsilent(ifp, mli);
 608         ifnet_lock_done(ifp);
 609
 610         LIST_INSERT_HEAD(&mli_head, mli, mli_link);
 611
 612         MLD_UNLOCK();
 613
 614         MLD_PRINTF(("%s: reattached mld_ifinfo for ifp 0x%llx(%s)\n",
 615             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 616 }
 617
 618 /*
 619  * Hook for domifdetach.
 620  */
 621 void
 622 mld_domifdetach(struct ifnet *ifp)
 623 {
 624         SLIST_HEAD(, in6_multi) in6m_dthead;
 625
 626         SLIST_INIT(&in6m_dthead);
 627
 628         MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
 629             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 630
 631         MLD_LOCK();
 632         mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
 633         MLD_UNLOCK();
 634
 635         /* Now that we're dropped all locks, release detached records */
 636         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
 637 }
 638
 639 /*
 640  * Called at interface detach time.  Note that we only flush all deferred
 641  * responses and record releases; all remaining inm records and their source
 642  * entries related to this interface are left intact, in order to handle
 643  * the reattach case.
 644  */
 645 static void
 646 mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
 647 {
 648         struct mld_ifinfo *mli, *tmli;
 649
 650         MLD_LOCK_ASSERT_HELD();
 651
 652         LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
 653                 MLI_LOCK(mli);
 654                 if (mli->mli_ifp == ifp) {
 655                         /*
 656                          * Free deferred General Query responses.
 657                          */
 658                         IF_DRAIN(&mli->mli_gq);
 659                         IF_DRAIN(&mli->mli_v1q);
 660                         mld_flush_relq(mli, in6m_dthead);
 661                         VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
 662                         mli->mli_debug &= ~IFD_ATTACHED;
 663                         MLI_UNLOCK(mli);
 664
 665                         LIST_REMOVE(mli, mli_link);
 666                         MLI_REMREF(mli); /* release mli_head reference */
 667                         return;
 668                 }
 669                 MLI_UNLOCK(mli);
 670         }
 671         panic("%s: mld_ifinfo not found for ifp %p(%s)\n", __func__,
 672             ifp, ifp->if_xname);
 673 }
 674
 675 __private_extern__ void
 676 mld6_initsilent(struct ifnet *ifp, struct mld_ifinfo *mli)
 677 {
 678         ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
 679
 680         MLI_LOCK_ASSERT_NOTHELD(mli);
 681         MLI_LOCK(mli);
 682         if (!(ifp->if_flags & IFF_MULTICAST) &&
 683             (ifp->if_eflags & (IFEF_IPV6_ND6ALT|IFEF_LOCALNET_PRIVATE)))
 684                 mli->mli_flags |= MLIF_SILENT;
 685         else
 686                 mli->mli_flags &= ~MLIF_SILENT;
 687         MLI_UNLOCK(mli);
 688 }
 689
 690 static void
 691 mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
 692 {
 693         MLI_LOCK_ASSERT_HELD(mli);
 694
 695         mli->mli_ifp = ifp;
 696         if (mld_v2enable)
 697                 mli->mli_version = MLD_VERSION_2;
 698         else
 699                 mli->mli_version = MLD_VERSION_1;
 700         mli->mli_flags = 0;
 701         mli->mli_rv = MLD_RV_INIT;
 702         mli->mli_qi = MLD_QI_INIT;
 703         mli->mli_qri = MLD_QRI_INIT;
 704         mli->mli_uri = MLD_URI_INIT;
 705
 706         if (mld_use_allow)
 707                 mli->mli_flags |= MLIF_USEALLOW;
 708         if (!reattach)
 709                 SLIST_INIT(&mli->mli_relinmhead);
 710
 711         /*
 712          * Responses to general queries are subject to bounds.
 713          */
 714         mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
 715         mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
 716 }
 717
 718 static struct mld_ifinfo *
 719 mli_alloc(int how)
 720 {
 721         struct mld_ifinfo *mli;
 722
 723         mli = (how == M_WAITOK) ? zalloc(mli_zone) : zalloc_noblock(mli_zone);
 724         if (mli != NULL) {
 725                 bzero(mli, mli_size);
 726                 lck_mtx_init(&mli->mli_lock, mld_mtx_grp, mld_mtx_attr);
 727                 mli->mli_debug |= IFD_ALLOC;
 728         }
 729         return (mli);
 730 }
 731
 732 static void
 733 mli_free(struct mld_ifinfo *mli)
 734 {
 735         MLI_LOCK(mli);
 736         if (mli->mli_debug & IFD_ATTACHED) {
 737                 panic("%s: attached mli=%p is being freed", __func__, mli);
 738                 /* NOTREACHED */
 739         } else if (mli->mli_ifp != NULL) {
 740                 panic("%s: ifp not NULL for mli=%p", __func__, mli);
 741                 /* NOTREACHED */
 742         } else if (!(mli->mli_debug & IFD_ALLOC)) {
 743                 panic("%s: mli %p cannot be freed", __func__, mli);
 744                 /* NOTREACHED */
 745         } else if (mli->mli_refcnt != 0) {
 746                 panic("%s: non-zero refcnt mli=%p", __func__, mli);
 747                 /* NOTREACHED */
 748         }
 749         mli->mli_debug &= ~IFD_ALLOC;
 750         MLI_UNLOCK(mli);
 751
 752         lck_mtx_destroy(&mli->mli_lock, mld_mtx_grp);
 753         zfree(mli_zone, mli);
 754 }
 755
 756 void
 757 mli_addref(struct mld_ifinfo *mli, int locked)
 758 {
 759         if (!locked)
 760                 MLI_LOCK_SPIN(mli);
 761         else
 762                 MLI_LOCK_ASSERT_HELD(mli);
 763
 764         if (++mli->mli_refcnt == 0) {
 765                 panic("%s: mli=%p wraparound refcnt", __func__, mli);
 766                 /* NOTREACHED */
 767         }
 768         if (!locked)
 769                 MLI_UNLOCK(mli);
 770 }
 771
 772 void
 773 mli_remref(struct mld_ifinfo *mli)
 774 {
 775         SLIST_HEAD(, in6_multi) in6m_dthead;
 776         struct ifnet *ifp;
 777
 778         MLI_LOCK_SPIN(mli);
 779
 780         if (mli->mli_refcnt == 0) {
 781                 panic("%s: mli=%p negative refcnt", __func__, mli);
 782                 /* NOTREACHED */
 783         }
 784
 785         --mli->mli_refcnt;
 786         if (mli->mli_refcnt > 0) {
 787                 MLI_UNLOCK(mli);
 788                 return;
 789         }
 790
 791         ifp = mli->mli_ifp;
 792         mli->mli_ifp = NULL;
 793         IF_DRAIN(&mli->mli_gq);
 794         IF_DRAIN(&mli->mli_v1q);
 795         SLIST_INIT(&in6m_dthead);
 796         mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
 797         VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
 798         MLI_UNLOCK(mli);
 799
 800         /* Now that we're dropped all locks, release detached records */
 801         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
 802
 803         MLD_PRINTF(("%s: freeing mld_ifinfo for ifp 0x%llx(%s)\n",
 804             __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 805
 806         mli_free(mli);
 807 }
 808
 809 /*
 810  * Process a received MLDv1 general or address-specific query.
 811  * Assumes that the query header has been pulled up to sizeof(mld_hdr).
 812  *
 813  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
 814  * mld_addr. This is OK as we own the mbuf chain.
 815  */
 816 static int
 817 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
 818     /*const*/ struct mld_hdr *mld)
 819 {
 820         struct mld_ifinfo       *mli;
 821         struct in6_multi        *inm;
 822         int                      err = 0, is_general_query;
 823         uint16_t                 timer;
 824         struct mld_tparams       mtp = { 0, 0, 0, 0 };
 825
 826         MLD_LOCK_ASSERT_NOTHELD();
 827
 828         is_general_query = 0;
 829
 830         if (!mld_v1enable) {
 831                 MLD_PRINTF(("%s: ignore v1 query %s on ifp 0x%llx(%s)\n",
 832                     __func__, ip6_sprintf(&mld->mld_addr),
 833                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 834                 goto done;
 835         }
 836
 837         /*
 838          * RFC3810 Section 6.2: MLD queries must originate from
 839          * a router's link-local address.
 840          */
 841         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
 842                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
 843                     __func__, ip6_sprintf(&ip6->ip6_src),
 844                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 845                 goto done;
 846         }
 847
 848         /*
 849          * Do address field validation upfront before we accept
 850          * the query.
 851          */
 852         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
 853                 /*
 854                  * MLDv1 General Query.
 855                  * If this was not sent to the all-nodes group, ignore it.
 856                  */
 857                 struct in6_addr          dst;
 858
 859                 dst = ip6->ip6_dst;
 860                 in6_clearscope(&dst);
 861                 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) {
 862                         err = EINVAL;
 863                         goto done;
 864                 }
 865                 is_general_query = 1;
 866         } else {
 867                 /*
 868                  * Embed scope ID of receiving interface in MLD query for
 869                  * lookup whilst we don't hold other locks.
 870                  */
 871                 in6_setscope(&mld->mld_addr, ifp, NULL);
 872         }
 873
 874         /*
 875          * Switch to MLDv1 host compatibility mode.
 876          */
 877         mli = MLD_IFINFO(ifp);
 878         VERIFY(mli != NULL);
 879
 880         MLI_LOCK(mli);
 881         mtp.qpt = mld_set_version(mli, MLD_VERSION_1);
 882         MLI_UNLOCK(mli);
 883
 884         timer = ntohs(mld->mld_maxdelay) / MLD_TIMER_SCALE;
 885         if (timer == 0)
 886                 timer = 1;
 887
 888         if (is_general_query) {
 889                 struct in6_multistep step;
 890
 891                 MLD_PRINTF(("%s: process v1 general query on ifp 0x%llx(%s)\n",
 892                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 893                 /*
 894                  * For each reporting group joined on this
 895                  * interface, kick the report timer.
 896                  */
 897                 in6_multihead_lock_shared();
 898                 IN6_FIRST_MULTI(step, inm);
 899                 while (inm != NULL) {
 900                         IN6M_LOCK(inm);
 901                         if (inm->in6m_ifp == ifp)
 902                                 mtp.cst += mld_v1_update_group(inm, timer);
 903                         IN6M_UNLOCK(inm);
 904                         IN6_NEXT_MULTI(step, inm);
 905                 }
 906                 in6_multihead_lock_done();
 907         } else {
 908                 /*
 909                  * MLDv1 Group-Specific Query.
 910                  * If this is a group-specific MLDv1 query, we need only
 911                  * look up the single group to process it.
 912                  */
 913                 in6_multihead_lock_shared();
 914                 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
 915                 in6_multihead_lock_done();
 916
 917                 if (inm != NULL) {
 918                         IN6M_LOCK(inm);
 919                         MLD_PRINTF(("%s: process v1 query %s on "
 920                             "ifp 0x%llx(%s)\n", __func__,
 921                             ip6_sprintf(&mld->mld_addr),
 922                             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
 923                         mtp.cst = mld_v1_update_group(inm, timer);
 924                         IN6M_UNLOCK(inm);
 925                         IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
 926                 }
 927                 /* XXX Clear embedded scope ID as userland won't expect it. */
 928                 in6_clearscope(&mld->mld_addr);
 929         }
 930 done:
 931         mld_set_timeout(&mtp);
 932
 933         return (err);
 934 }
 935
 936 /*
 937  * Update the report timer on a group in response to an MLDv1 query.
 938  *
 939  * If we are becoming the reporting member for this group, start the timer.
 940  * If we already are the reporting member for this group, and timer is
 941  * below the threshold, reset it.
 942  *
 943  * We may be updating the group for the first time since we switched
 944  * to MLDv2. If we are, then we must clear any recorded source lists,
 945  * and transition to REPORTING state; the group timer is overloaded
 946  * for group and group-source query responses.
 947  *
 948  * Unlike MLDv2, the delay per group should be jittered
 949  * to avoid bursts of MLDv1 reports.
 950  */
 951 static uint32_t
 952 mld_v1_update_group(struct in6_multi *inm, const int timer)
 953 {
 954         IN6M_LOCK_ASSERT_HELD(inm);
 955
 956         MLD_PRINTF(("%s: %s/%s timer=%d\n", __func__,
 957             ip6_sprintf(&inm->in6m_addr),
 958             if_name(inm->in6m_ifp), timer));
 959
 960         switch (inm->in6m_state) {
 961         case MLD_NOT_MEMBER:
 962         case MLD_SILENT_MEMBER:
 963                 break;
 964         case MLD_REPORTING_MEMBER:
 965                 if (inm->in6m_timer != 0 &&
 966                     inm->in6m_timer <= timer) {
 967                         MLD_PRINTF(("%s: REPORTING and timer running, "
 968                             "skipping.\n", __func__));
 969                         break;
 970                 }
 971                 /* FALLTHROUGH */
 972         case MLD_SG_QUERY_PENDING_MEMBER:
 973         case MLD_G_QUERY_PENDING_MEMBER:
 974         case MLD_IDLE_MEMBER:
 975         case MLD_LAZY_MEMBER:
 976         case MLD_AWAKENING_MEMBER:
 977                 MLD_PRINTF(("%s: ->REPORTING\n", __func__));
 978                 inm->in6m_state = MLD_REPORTING_MEMBER;
 979                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
 980                 break;
 981         case MLD_SLEEPING_MEMBER:
 982                 MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
 983                 inm->in6m_state = MLD_AWAKENING_MEMBER;
 984                 break;
 985         case MLD_LEAVING_MEMBER:
 986                 break;
 987         }
 988
 989         return (inm->in6m_timer);
 990 }
 991
 992 /*
 993  * Process a received MLDv2 general, group-specific or
 994  * group-and-source-specific query.
 995  *
 996  * Assumes that the query header has been pulled up to sizeof(mldv2_query).
 997  *
 998  * Return 0 if successful, otherwise an appropriate error code is returned.
 999  */
1000 static int
1001 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
1002     struct mbuf *m, const int off, const int icmp6len)
1003 {
1004         struct mld_ifinfo       *mli;
1005         struct mldv2_query      *mld;
1006         struct in6_multi        *inm;
1007         uint32_t                 maxdelay, nsrc, qqi;
1008         int                      err = 0, is_general_query;
1009         uint16_t                 timer;
1010         uint8_t                  qrv;
1011         struct mld_tparams       mtp = { 0, 0, 0, 0 };
1012
1013         MLD_LOCK_ASSERT_NOTHELD();
1014
1015         is_general_query = 0;
1016
1017         if (!mld_v2enable) {
1018                 MLD_PRINTF(("%s: ignore v2 query %s on ifp 0x%llx(%s)\n",
1019                     __func__, ip6_sprintf(&ip6->ip6_src),
1020                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1021                 goto done;
1022         }
1023
1024         /*
1025          * RFC3810 Section 6.2: MLD queries must originate from
1026          * a router's link-local address.
1027          */
1028         if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
1029                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1030                     __func__, ip6_sprintf(&ip6->ip6_src),
1031                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1032                 goto done;
1033         }
1034
1035         MLD_PRINTF(("%s: input v2 query on ifp 0x%llx(%s)\n", __func__,
1036             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1037
1038         mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
1039
1040         maxdelay = ntohs(mld->mld_maxdelay);    /* in 1/10ths of a second */
1041         if (maxdelay >= 32768) {
1042                 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
1043                            (MLD_MRC_EXP(maxdelay) + 3);
1044         }
1045         timer = maxdelay / MLD_TIMER_SCALE;
1046         if (timer == 0)
1047                 timer = 1;
1048
1049         qrv = MLD_QRV(mld->mld_misc);
1050         if (qrv < 2) {
1051                 MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
1052                     qrv, MLD_RV_INIT));
1053                 qrv = MLD_RV_INIT;
1054         }
1055
1056         qqi = mld->mld_qqi;
1057         if (qqi >= 128) {
1058                 qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
1059                      (MLD_QQIC_EXP(mld->mld_qqi) + 3);
1060         }
1061
1062         nsrc = ntohs(mld->mld_numsrc);
1063         if (nsrc > MLD_MAX_GS_SOURCES) {
1064                 err = EMSGSIZE;
1065                 goto done;
1066         }
1067         if (icmp6len < sizeof(struct mldv2_query) +
1068             (nsrc * sizeof(struct in6_addr))) {
1069                 err = EMSGSIZE;
1070                 goto done;
1071         }
1072
1073         /*
1074          * Do further input validation upfront to avoid resetting timers
1075          * should we need to discard this query.
1076          */
1077         if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
1078                 /*
1079                  * A general query with a source list has undefined
1080                  * behaviour; discard it.
1081                  */
1082                 if (nsrc > 0) {
1083                         err = EINVAL;
1084                         goto done;
1085                 }
1086                 is_general_query = 1;
1087         } else {
1088                 /*
1089                  * Embed scope ID of receiving interface in MLD query for
1090                  * lookup whilst we don't hold other locks (due to KAME
1091                  * locking lameness). We own this mbuf chain just now.
1092                  */
1093                 in6_setscope(&mld->mld_addr, ifp, NULL);
1094         }
1095
1096         mli = MLD_IFINFO(ifp);
1097         VERIFY(mli != NULL);
1098
1099         MLI_LOCK(mli);
1100         /*
1101          * Discard the v2 query if we're in Compatibility Mode.
1102          * The RFC is pretty clear that hosts need to stay in MLDv1 mode
1103          * until the Old Version Querier Present timer expires.
1104          */
1105         if (mli->mli_version != MLD_VERSION_2) {
1106                 MLI_UNLOCK(mli);
1107                 goto done;
1108         }
1109
1110         mtp.qpt = mld_set_version(mli, MLD_VERSION_2);
1111         mli->mli_rv = qrv;
1112         mli->mli_qi = qqi;
1113         mli->mli_qri = MAX(timer, MLD_QRI_MIN);
1114
1115         MLD_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, mli->mli_rv,
1116             mli->mli_qi, mli->mli_qri));
1117
1118         if (is_general_query) {
1119                 /*
1120                  * MLDv2 General Query.
1121                  *
1122                  * Schedule a current-state report on this ifp for
1123                  * all groups, possibly containing source lists.
1124                  *
1125                  * If there is a pending General Query response
1126                  * scheduled earlier than the selected delay, do
1127                  * not schedule any other reports.
1128                  * Otherwise, reset the interface timer.
1129                  */
1130                 MLD_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
1131                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1132                 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1133                         mtp.it = mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
1134                 }
1135                 MLI_UNLOCK(mli);
1136         } else {
1137                 MLI_UNLOCK(mli);
1138                 /*
1139                  * MLDv2 Group-specific or Group-and-source-specific Query.
1140                  *
1141                  * Group-source-specific queries are throttled on
1142                  * a per-group basis to defeat denial-of-service attempts.
1143                  * Queries for groups we are not a member of on this
1144                  * link are simply ignored.
1145                  */
1146                 in6_multihead_lock_shared();
1147                 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1148                 in6_multihead_lock_done();
1149                 if (inm == NULL)
1150                         goto done;
1151
1152                 IN6M_LOCK(inm);
1153                 if (nsrc > 0) {
1154                         if (!ratecheck(&inm->in6m_lastgsrtv,
1155                             &mld_gsrdelay)) {
1156                                 MLD_PRINTF(("%s: GS query throttled.\n",
1157                                     __func__));
1158                                 IN6M_UNLOCK(inm);
1159                                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1160                                 goto done;
1161                         }
1162                 }
1163                 MLD_PRINTF(("%s: process v2 group query on ifp 0x%llx(%s)\n",
1164                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1165                 /*
1166                  * If there is a pending General Query response
1167                  * scheduled sooner than the selected delay, no
1168                  * further report need be scheduled.
1169                  * Otherwise, prepare to respond to the
1170                  * group-specific or group-and-source query.
1171                  */
1172                 MLI_LOCK(mli);
1173                 mtp.it = mli->mli_v2_timer;
1174                 MLI_UNLOCK(mli);
1175                 if (mtp.it == 0 || mtp.it >= timer) {
1176                         (void) mld_v2_process_group_query(inm, timer, m, off);
1177                         mtp.cst = inm->in6m_timer;
1178                 }
1179                 IN6M_UNLOCK(inm);
1180                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1181                 /* XXX Clear embedded scope ID as userland won't expect it. */
1182                 in6_clearscope(&mld->mld_addr);
1183         }
1184 done:
1185         if (mtp.it > 0) {
1186                 MLD_PRINTF(("%s: v2 general query response scheduled in "
1187                     "T+%d seconds on ifp 0x%llx(%s)\n", __func__, mtp.it,
1188                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1189         }
1190         mld_set_timeout(&mtp);
1191
1192         return (err);
1193 }
1194
1195 /*
1196  * Process a recieved MLDv2 group-specific or group-and-source-specific
1197  * query.
1198  * Return <0 if any error occured. Currently this is ignored.
1199  */
1200 static int
1201 mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
1202     const int off)
1203 {
1204         struct mldv2_query      *mld;
1205         int                      retval;
1206         uint16_t                 nsrc;
1207
1208         IN6M_LOCK_ASSERT_HELD(inm);
1209
1210         retval = 0;
1211         mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
1212
1213         switch (inm->in6m_state) {
1214         case MLD_NOT_MEMBER:
1215         case MLD_SILENT_MEMBER:
1216         case MLD_SLEEPING_MEMBER:
1217         case MLD_LAZY_MEMBER:
1218         case MLD_AWAKENING_MEMBER:
1219         case MLD_IDLE_MEMBER:
1220         case MLD_LEAVING_MEMBER:
1221                 return (retval);
1222                 break;
1223         case MLD_REPORTING_MEMBER:
1224         case MLD_G_QUERY_PENDING_MEMBER:
1225         case MLD_SG_QUERY_PENDING_MEMBER:
1226                 break;
1227         }
1228
1229         nsrc = ntohs(mld->mld_numsrc);
1230
1231         /*
1232          * Deal with group-specific queries upfront.
1233          * If any group query is already pending, purge any recorded
1234          * source-list state if it exists, and schedule a query response
1235          * for this group-specific query.
1236          */
1237         if (nsrc == 0) {
1238                 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1239                     inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1240                         in6m_clear_recorded(inm);
1241                         timer = min(inm->in6m_timer, timer);
1242                 }
1243                 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1244                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1245                 return (retval);
1246         }
1247
1248         /*
1249          * Deal with the case where a group-and-source-specific query has
1250          * been received but a group-specific query is already pending.
1251          */
1252         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1253                 timer = min(inm->in6m_timer, timer);
1254                 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1255                 return (retval);
1256         }
1257
1258         /*
1259          * Finally, deal with the case where a group-and-source-specific
1260          * query has been received, where a response to a previous g-s-r
1261          * query exists, or none exists.
1262          * In this case, we need to parse the source-list which the Querier
1263          * has provided us with and check if we have any source list filter
1264          * entries at T1 for these sources. If we do not, there is no need
1265          * schedule a report and the query may be dropped.
1266          * If we do, we must record them and schedule a current-state
1267          * report for those sources.
1268          */
1269         if (inm->in6m_nsrc > 0) {
1270                 struct mbuf             *m;
1271                 uint8_t                 *sp;
1272                 int                      i, nrecorded;
1273                 int                      soff;
1274
1275                 m = m0;
1276                 soff = off + sizeof(struct mldv2_query);
1277                 nrecorded = 0;
1278                 for (i = 0; i < nsrc; i++) {
1279                         sp = mtod(m, uint8_t *) + soff;
1280                         retval = in6m_record_source(inm,
1281                             (const struct in6_addr *)(void *)sp);
1282                         if (retval < 0)
1283                                 break;
1284                         nrecorded += retval;
1285                         soff += sizeof(struct in6_addr);
1286                         if (soff >= m->m_len) {
1287                                 soff = soff - m->m_len;
1288                                 m = m->m_next;
1289                                 if (m == NULL)
1290                                         break;
1291                         }
1292                 }
1293                 if (nrecorded > 0) {
1294                         MLD_PRINTF(( "%s: schedule response to SG query\n",
1295                             __func__));
1296                         inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1297                         inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1298                 }
1299         }
1300
1301         return (retval);
1302 }
1303
1304 /*
1305  * Process a received MLDv1 host membership report.
1306  * Assumes mld points to mld_hdr in pulled up mbuf chain.
1307  *
1308  * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1309  * mld_addr. This is OK as we own the mbuf chain.
1310  */
1311 static int
1312 mld_v1_input_report(struct ifnet *ifp, struct mbuf *m,
1313     const struct ip6_hdr *ip6, /*const*/ struct mld_hdr *mld)
1314 {
1315         struct in6_addr          src, dst;
1316         struct in6_ifaddr       *ia;
1317         struct in6_multi        *inm;
1318
1319         if (!mld_v1enable) {
1320                 MLD_PRINTF(("%s: ignore v1 report %s on ifp 0x%llx(%s)\n",
1321                     __func__, ip6_sprintf(&mld->mld_addr),
1322                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1323                 return (0);
1324         }
1325
1326         if ((ifp->if_flags & IFF_LOOPBACK) ||
1327             (m->m_pkthdr.pkt_flags & PKTF_LOOP))
1328                 return (0);
1329
1330         /*
1331          * MLDv1 reports must originate from a host's link-local address,
1332          * or the unspecified address (when booting).
1333          */
1334         src = ip6->ip6_src;
1335         in6_clearscope(&src);
1336         if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1337                 MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
1338                     __func__, ip6_sprintf(&ip6->ip6_src),
1339                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1340                 return (EINVAL);
1341         }
1342
1343         /*
1344          * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1345          * group, and must be directed to the group itself.
1346          */
1347         dst = ip6->ip6_dst;
1348         in6_clearscope(&dst);
1349         if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1350             !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1351                 MLD_PRINTF(("%s: ignore v1 query dst %s on ifp 0x%llx(%s)\n",
1352                     __func__, ip6_sprintf(&ip6->ip6_dst),
1353                     (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1354                 return (EINVAL);
1355         }
1356
1357         /*
1358          * Make sure we don't hear our own membership report, as fast
1359          * leave requires knowing that we are the only member of a
1360          * group. Assume we used the link-local address if available,
1361          * otherwise look for ::.
1362          *
1363          * XXX Note that scope ID comparison is needed for the address
1364          * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1365          * performed for the on-wire address.
1366          */
1367         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1368         if (ia != NULL) {
1369                 IFA_LOCK(&ia->ia_ifa);
1370                 if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))){
1371                         IFA_UNLOCK(&ia->ia_ifa);
1372                         IFA_REMREF(&ia->ia_ifa);
1373                         return (0);
1374                 }
1375                 IFA_UNLOCK(&ia->ia_ifa);
1376                 IFA_REMREF(&ia->ia_ifa);
1377         } else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1378                 return (0);
1379         }
1380
1381         MLD_PRINTF(("%s: process v1 report %s on ifp 0x%llx(%s)\n",
1382             __func__, ip6_sprintf(&mld->mld_addr),
1383             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1384
1385         /*
1386          * Embed scope ID of receiving interface in MLD query for lookup
1387          * whilst we don't hold other locks (due to KAME locking lameness).
1388          */
1389         if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
1390                 in6_setscope(&mld->mld_addr, ifp, NULL);
1391
1392         /*
1393          * MLDv1 report suppression.
1394          * If we are a member of this group, and our membership should be
1395          * reported, and our group timer is pending or about to be reset,
1396          * stop our group timer by transitioning to the 'lazy' state.
1397          */
1398         in6_multihead_lock_shared();
1399         IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1400         in6_multihead_lock_done();
1401
1402         if (inm != NULL) {
1403                 struct mld_ifinfo *mli;
1404
1405                 IN6M_LOCK(inm);
1406                 mli = inm->in6m_mli;
1407                 VERIFY(mli != NULL);
1408
1409                 MLI_LOCK(mli);
1410                 /*
1411                  * If we are in MLDv2 host mode, do not allow the
1412                  * other host's MLDv1 report to suppress our reports.
1413                  */
1414                 if (mli->mli_version == MLD_VERSION_2) {
1415                         MLI_UNLOCK(mli);
1416                         IN6M_UNLOCK(inm);
1417                         IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1418                         goto out;
1419                 }
1420                 MLI_UNLOCK(mli);
1421
1422                 inm->in6m_timer = 0;
1423
1424                 switch (inm->in6m_state) {
1425                 case MLD_NOT_MEMBER:
1426                 case MLD_SILENT_MEMBER:
1427                 case MLD_SLEEPING_MEMBER:
1428                         break;
1429                 case MLD_REPORTING_MEMBER:
1430                 case MLD_IDLE_MEMBER:
1431                 case MLD_AWAKENING_MEMBER:
1432                         MLD_PRINTF(("%s: report suppressed for %s on "
1433                             "ifp 0x%llx(%s)\n", __func__,
1434                             ip6_sprintf(&mld->mld_addr),
1435                             (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
1436                 case MLD_LAZY_MEMBER:
1437                         inm->in6m_state = MLD_LAZY_MEMBER;
1438                         break;
1439                 case MLD_G_QUERY_PENDING_MEMBER:
1440                 case MLD_SG_QUERY_PENDING_MEMBER:
1441                 case MLD_LEAVING_MEMBER:
1442                         break;
1443                 }
1444                 IN6M_UNLOCK(inm);
1445                 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1446         }
1447
1448 out:
1449         /* XXX Clear embedded scope ID as userland won't expect it. */
1450         in6_clearscope(&mld->mld_addr);
1451
1452         return (0);
1453 }
1454
1455 /*
1456  * MLD input path.
1457  *
1458  * Assume query messages which fit in a single ICMPv6 message header
1459  * have been pulled up.
1460  * Assume that userland will want to see the message, even if it
1461  * otherwise fails kernel input validation; do not free it.
1462  * Pullup may however free the mbuf chain m if it fails.
1463  *
1464  * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1465  */
1466 int
1467 mld_input(struct mbuf *m, int off, int icmp6len)
1468 {
1469         struct ifnet    *ifp;
1470         struct ip6_hdr  *ip6;
1471         struct mld_hdr  *mld;
1472         int              mldlen;
1473
1474         MLD_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
1475             (uint64_t)VM_KERNEL_ADDRPERM(m), off));
1476
1477         ifp = m->m_pkthdr.rcvif;
1478
1479         ip6 = mtod(m, struct ip6_hdr *);
1480
1481         /* Pullup to appropriate size. */
1482         mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1483         if (mld->mld_type == MLD_LISTENER_QUERY &&
1484             icmp6len >= sizeof(struct mldv2_query)) {
1485                 mldlen = sizeof(struct mldv2_query);
1486         } else {
1487                 mldlen = sizeof(struct mld_hdr);
1488         }
1489         IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1490         if (mld == NULL) {
1491                 icmp6stat.icp6s_badlen++;
1492                 return (IPPROTO_DONE);
1493         }
1494
1495         /*
1496          * Userland needs to see all of this traffic for implementing
1497          * the endpoint discovery portion of multicast routing.
1498          */
1499         switch (mld->mld_type) {
1500         case MLD_LISTENER_QUERY:
1501                 icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1502                 if (icmp6len == sizeof(struct mld_hdr)) {
1503                         if (mld_v1_input_query(ifp, ip6, mld) != 0)
1504                                 return (0);
1505                 } else if (icmp6len >= sizeof(struct mldv2_query)) {
1506                         if (mld_v2_input_query(ifp, ip6, m, off,
1507                             icmp6len) != 0)
1508                                 return (0);
1509                 }
1510                 break;
1511         case MLD_LISTENER_REPORT:
1512                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1513                 if (mld_v1_input_report(ifp, m, ip6, mld) != 0)
1514                         return (0);
1515                 break;
1516         case MLDV2_LISTENER_REPORT:
1517                 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1518                 break;
1519         case MLD_LISTENER_DONE:
1520                 icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1521                 break;
1522         default:
1523                 break;
1524         }
1525
1526         return (0);
1527 }
1528
1529 /*
1530  * Schedule MLD timer based on various parameters; caller must ensure that
1531  * lock ordering is maintained as this routine acquires MLD global lock.
1532  */
1533 void
1534 mld_set_timeout(struct mld_tparams *mtp)
1535 {
1536         MLD_LOCK_ASSERT_NOTHELD();
1537         VERIFY(mtp != NULL);
1538
1539         if (mtp->qpt != 0 || mtp->it != 0 || mtp->cst != 0 || mtp->sct != 0) {
1540                 MLD_LOCK();
1541                 if (mtp->qpt != 0)
1542                         querier_present_timers_running6 = 1;
1543                 if (mtp->it != 0)
1544                         interface_timers_running6 = 1;
1545                 if (mtp->cst != 0)
1546                         current_state_timers_running6 = 1;
1547                 if (mtp->sct != 0)
1548                         state_change_timers_running6 = 1;
1549                 mld_sched_timeout();
1550                 MLD_UNLOCK();
1551         }
1552 }
1553
1554 /*
1555  * MLD6 timer handler (per 1 second).
1556  */
1557 static void
1558 mld_timeout(void *arg)
1559 {
1560 #pragma unused(arg)
1561         struct ifqueue           scq;   /* State-change packets */
1562         struct ifqueue           qrq;   /* Query response packets */
1563         struct ifnet            *ifp;
1564         struct mld_ifinfo       *mli;
1565         struct in6_multi        *inm;
1566         int                      uri_sec = 0;
1567         SLIST_HEAD(, in6_multi) in6m_dthead;
1568
1569         SLIST_INIT(&in6m_dthead);
1570
1571         /*
1572          * Update coarse-grained networking timestamp (in sec.); the idea
1573          * is to piggy-back on the timeout callout to update the counter
1574          * returnable via net_uptime().
1575          */
1576         net_update_uptime();
1577
1578         MLD_LOCK();
1579
1580         MLD_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d\n", __func__,
1581             querier_present_timers_running6, interface_timers_running6,
1582             current_state_timers_running6, state_change_timers_running6));
1583
1584         /*
1585          * MLDv1 querier present timer processing.
1586          */
1587         if (querier_present_timers_running6) {
1588                 querier_present_timers_running6 = 0;
1589                 LIST_FOREACH(mli, &mli_head, mli_link) {
1590                         MLI_LOCK(mli);
1591                         mld_v1_process_querier_timers(mli);
1592                         if (mli->mli_v1_timer > 0)
1593                                 querier_present_timers_running6 = 1;
1594                         MLI_UNLOCK(mli);
1595                 }
1596         }
1597
1598         /*
1599          * MLDv2 General Query response timer processing.
1600          */
1601         if (interface_timers_running6) {
1602                 MLD_PRINTF(("%s: interface timers running\n", __func__));
1603                 interface_timers_running6 = 0;
1604                 LIST_FOREACH(mli, &mli_head, mli_link) {
1605                         MLI_LOCK(mli);
1606                         if (mli->mli_v2_timer == 0) {
1607                                 /* Do nothing. */
1608                         } else if (--mli->mli_v2_timer == 0) {
1609                                 if (mld_v2_dispatch_general_query(mli) > 0)
1610                                         interface_timers_running6 = 1;
1611                         } else {
1612                                 interface_timers_running6 = 1;
1613                         }
1614                         MLI_UNLOCK(mli);
1615                 }
1616         }
1617
1618         if (!current_state_timers_running6 &&
1619             !state_change_timers_running6)
1620                 goto out_locked;
1621
1622         current_state_timers_running6 = 0;
1623         state_change_timers_running6 = 0;
1624
1625         MLD_PRINTF(("%s: state change timers running\n", __func__));
1626
1627         memset(&qrq, 0, sizeof(struct ifqueue));
1628         qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
1629
1630         memset(&scq, 0, sizeof(struct ifqueue));
1631         scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
1632
1633         /*
1634          * MLD host report and state-change timer processing.
1635          * Note: Processing a v2 group timer may remove a node.
1636          */
1637         LIST_FOREACH(mli, &mli_head, mli_link) {
1638                 struct in6_multistep step;
1639
1640                 MLI_LOCK(mli);
1641                 ifp = mli->mli_ifp;
1642                 uri_sec = MLD_RANDOM_DELAY(mli->mli_uri);
1643                 MLI_UNLOCK(mli);
1644
1645                 in6_multihead_lock_shared();
1646                 IN6_FIRST_MULTI(step, inm);
1647                 while (inm != NULL) {
1648                         IN6M_LOCK(inm);
1649                         if (inm->in6m_ifp != ifp)
1650                                 goto next;
1651
1652                         MLI_LOCK(mli);
1653                         switch (mli->mli_version) {
1654                         case MLD_VERSION_1:
1655                                 mld_v1_process_group_timer(inm,
1656                                     mli->mli_version);
1657                                 break;
1658                         case MLD_VERSION_2:
1659                                 mld_v2_process_group_timers(mli, &qrq,
1660                                     &scq, inm, uri_sec);
1661                                 break;
1662                         }
1663                         MLI_UNLOCK(mli);
1664 next:
1665                         IN6M_UNLOCK(inm);
1666                         IN6_NEXT_MULTI(step, inm);
1667                 }
1668                 in6_multihead_lock_done();
1669
1670                 MLI_LOCK(mli);
1671                 if (mli->mli_version == MLD_VERSION_1) {
1672                         mld_dispatch_queue(mli, &mli->mli_v1q, 0);
1673                 } else if (mli->mli_version == MLD_VERSION_2) {
1674                         MLI_UNLOCK(mli);
1675                         mld_dispatch_queue(NULL, &qrq, 0);
1676                         mld_dispatch_queue(NULL, &scq, 0);
1677                         VERIFY(qrq.ifq_len == 0);
1678                         VERIFY(scq.ifq_len == 0);
1679                         MLI_LOCK(mli);
1680                 }
1681                 /*
1682                  * In case there are still any pending membership reports
1683                  * which didn't get drained at version change time.
1684                  */
1685                 IF_DRAIN(&mli->mli_v1q);
1686                 /*
1687                  * Release all deferred inm records, and drain any locally
1688                  * enqueued packets; do it even if the current MLD version
1689                  * for the link is no longer MLDv2, in order to handle the
1690                  * version change case.
1691                  */
1692                 mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
1693                 VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
1694                 MLI_UNLOCK(mli);
1695
1696                 IF_DRAIN(&qrq);
1697                 IF_DRAIN(&scq);
1698         }
1699
1700 out_locked:
1701         /* re-arm the timer if there's work to do */
1702         mld_timeout_run = 0;
1703         mld_sched_timeout();
1704         MLD_UNLOCK();
1705
1706         /* Now that we're dropped all locks, release detached records */
1707         MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
1708 }
1709
1710 static void
1711 mld_sched_timeout(void)
1712 {
1713         MLD_LOCK_ASSERT_HELD();
1714
1715         if (!mld_timeout_run &&
1716             (querier_present_timers_running6 || current_state_timers_running6 ||
1717             interface_timers_running6 || state_change_timers_running6)) {
1718                 mld_timeout_run = 1;
1719                 timeout(mld_timeout, NULL, hz);
1720         }
1721 }
1722
1723 /*
1724  * Free the in6_multi reference(s) for this MLD lifecycle.
1725  *
1726  * Caller must be holding mli_lock.
1727  */
1728 static void
1729 mld_flush_relq(struct mld_ifinfo *mli, struct mld_in6m_relhead *in6m_dthead)
1730 {
1731         struct in6_multi *inm;
1732
1733 again:
1734         MLI_LOCK_ASSERT_HELD(mli);
1735         inm = SLIST_FIRST(&mli->mli_relinmhead);
1736         if (inm != NULL) {
1737                 int lastref;
1738
1739                 SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
1740                 MLI_UNLOCK(mli);
1741
1742                 in6_multihead_lock_exclusive();
1743                 IN6M_LOCK(inm);
1744                 VERIFY(inm->in6m_nrelecnt != 0);
1745                 inm->in6m_nrelecnt--;
1746                 lastref = in6_multi_detach(inm);
1747                 VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1748                     inm->in6m_reqcnt == 0));
1749                 IN6M_UNLOCK(inm);
1750                 in6_multihead_lock_done();
1751                 /* from mli_relinmhead */
1752                 IN6M_REMREF(inm);
1753                 /* from in6_multihead_list */
1754                 if (lastref) {
1755                         /*
1756                          * Defer releasing our final reference, as we
1757                          * are holding the MLD lock at this point, and
1758                          * we could end up with locking issues later on
1759                          * (while issuing SIOCDELMULTI) when this is the
1760                          * final reference count.  Let the caller do it
1761                          * when it is safe.
1762                          */
1763                         MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
1764                 }
1765                 MLI_LOCK(mli);
1766                 goto again;
1767         }
1768 }
1769
1770 /*
1771  * Update host report group timer.
1772  * Will update the global pending timer flags.
1773  */
1774 static void
1775 mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
1776 {
1777 #pragma unused(mld_version)
1778         int report_timer_expired;
1779
1780         MLD_LOCK_ASSERT_HELD();
1781         IN6M_LOCK_ASSERT_HELD(inm);
1782         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1783
1784         if (inm->in6m_timer == 0) {
1785                 report_timer_expired = 0;
1786         } else if (--inm->in6m_timer == 0) {
1787                 report_timer_expired = 1;
1788         } else {
1789                 current_state_timers_running6 = 1;
1790                 /* caller will schedule timer */
1791                 return;
1792         }
1793
1794         switch (inm->in6m_state) {
1795         case MLD_NOT_MEMBER:
1796         case MLD_SILENT_MEMBER:
1797         case MLD_IDLE_MEMBER:
1798         case MLD_LAZY_MEMBER:
1799         case MLD_SLEEPING_MEMBER:
1800         case MLD_AWAKENING_MEMBER:
1801                 break;
1802         case MLD_REPORTING_MEMBER:
1803                 if (report_timer_expired) {
1804                         inm->in6m_state = MLD_IDLE_MEMBER;
1805                         (void) mld_v1_transmit_report(inm,
1806                              MLD_LISTENER_REPORT);
1807                         IN6M_LOCK_ASSERT_HELD(inm);
1808                         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1809                 }
1810                 break;
1811         case MLD_G_QUERY_PENDING_MEMBER:
1812         case MLD_SG_QUERY_PENDING_MEMBER:
1813         case MLD_LEAVING_MEMBER:
1814                 break;
1815         }
1816 }
1817
1818 /*
1819  * Update a group's timers for MLDv2.
1820  * Will update the global pending timer flags.
1821  * Note: Unlocked read from mli.
1822  */
1823 static void
1824 mld_v2_process_group_timers(struct mld_ifinfo *mli,
1825     struct ifqueue *qrq, struct ifqueue *scq,
1826     struct in6_multi *inm, const int uri_sec)
1827 {
1828         int query_response_timer_expired;
1829         int state_change_retransmit_timer_expired;
1830
1831         MLD_LOCK_ASSERT_HELD();
1832         IN6M_LOCK_ASSERT_HELD(inm);
1833         MLI_LOCK_ASSERT_HELD(mli);
1834         VERIFY(mli == inm->in6m_mli);
1835
1836         query_response_timer_expired = 0;
1837         state_change_retransmit_timer_expired = 0;
1838
1839         /*
1840          * During a transition from compatibility mode back to MLDv2,
1841          * a group record in REPORTING state may still have its group
1842          * timer active. This is a no-op in this function; it is easier
1843          * to deal with it here than to complicate the timeout path.
1844          */
1845         if (inm->in6m_timer == 0) {
1846                 query_response_timer_expired = 0;
1847         } else if (--inm->in6m_timer == 0) {
1848                 query_response_timer_expired = 1;
1849         } else {
1850                 current_state_timers_running6 = 1;
1851                 /* caller will schedule timer */
1852         }
1853
1854         if (inm->in6m_sctimer == 0) {
1855                 state_change_retransmit_timer_expired = 0;
1856         } else if (--inm->in6m_sctimer == 0) {
1857                 state_change_retransmit_timer_expired = 1;
1858         } else {
1859                 state_change_timers_running6 = 1;
1860                 /* caller will schedule timer */
1861         }
1862
1863         /* We are in timer callback, so be quick about it. */
1864         if (!state_change_retransmit_timer_expired &&
1865             !query_response_timer_expired)
1866                 return;
1867
1868         switch (inm->in6m_state) {
1869         case MLD_NOT_MEMBER:
1870         case MLD_SILENT_MEMBER:
1871         case MLD_SLEEPING_MEMBER:
1872         case MLD_LAZY_MEMBER:
1873         case MLD_AWAKENING_MEMBER:
1874         case MLD_IDLE_MEMBER:
1875                 break;
1876         case MLD_G_QUERY_PENDING_MEMBER:
1877         case MLD_SG_QUERY_PENDING_MEMBER:
1878                 /*
1879                  * Respond to a previously pending Group-Specific
1880                  * or Group-and-Source-Specific query by enqueueing
1881                  * the appropriate Current-State report for
1882                  * immediate transmission.
1883                  */
1884                 if (query_response_timer_expired) {
1885                         int retval;
1886
1887                         retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
1888                             (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
1889                             0);
1890                         MLD_PRINTF(("%s: enqueue record = %d\n",
1891                             __func__, retval));
1892                         inm->in6m_state = MLD_REPORTING_MEMBER;
1893                         in6m_clear_recorded(inm);
1894                 }
1895                 /* FALLTHROUGH */
1896         case MLD_REPORTING_MEMBER:
1897         case MLD_LEAVING_MEMBER:
1898                 if (state_change_retransmit_timer_expired) {
1899                         /*
1900                          * State-change retransmission timer fired.
1901                          * If there are any further pending retransmissions,
1902                          * set the global pending state-change flag, and
1903                          * reset the timer.
1904                          */
1905                         if (--inm->in6m_scrv > 0) {
1906                                 inm->in6m_sctimer = uri_sec;
1907                                 state_change_timers_running6 = 1;
1908                                 /* caller will schedule timer */
1909                         }
1910                         /*
1911                          * Retransmit the previously computed state-change
1912                          * report. If there are no further pending
1913                          * retransmissions, the mbuf queue will be consumed.
1914                          * Update T0 state to T1 as we have now sent
1915                          * a state-change.
1916                          */
1917                         (void) mld_v2_merge_state_changes(inm, scq);
1918
1919                         in6m_commit(inm);
1920                         MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
1921                             ip6_sprintf(&inm->in6m_addr),
1922                             if_name(inm->in6m_ifp)));
1923
1924                         /*
1925                          * If we are leaving the group for good, make sure
1926                          * we release MLD's reference to it.
1927                          * This release must be deferred using a SLIST,
1928                          * as we are called from a loop which traverses
1929                          * the in_ifmultiaddr TAILQ.
1930                          */
1931                         if (inm->in6m_state == MLD_LEAVING_MEMBER &&
1932                             inm->in6m_scrv == 0) {
1933                                 inm->in6m_state = MLD_NOT_MEMBER;
1934                                 /*
1935                                  * A reference has already been held in
1936                                  * mld_final_leave() for this inm, so
1937                                  * no need to hold another one.  We also
1938                                  * bumped up its request count then, so
1939                                  * that it stays in in6_multihead.  Both
1940                                  * of them will be released when it is
1941                                  * dequeued later on.
1942                                  */
1943                                 VERIFY(inm->in6m_nrelecnt != 0);
1944                                 SLIST_INSERT_HEAD(&mli->mli_relinmhead,
1945                                     inm, in6m_nrele);
1946                         }
1947                 }
1948                 break;
1949         }
1950 }
1951
1952 /*
1953  * Switch to a different version on the given interface,
1954  * as per Section 9.12.
1955  */
1956 static uint32_t
1957 mld_set_version(struct mld_ifinfo *mli, const int mld_version)
1958 {
1959         int old_version_timer;
1960
1961         MLI_LOCK_ASSERT_HELD(mli);
1962
1963         MLD_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
1964             mld_version, (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
1965             if_name(mli->mli_ifp)));
1966
1967         if (mld_version == MLD_VERSION_1) {
1968                 /*
1969                  * Compute the "Older Version Querier Present" timer as per
1970                  * Section 9.12, in seconds.
1971                  */
1972                 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
1973                 mli->mli_v1_timer = old_version_timer;
1974         }
1975
1976         if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
1977                 mli->mli_version = MLD_VERSION_1;
1978                 mld_v2_cancel_link_timers(mli);
1979         }
1980
1981         MLI_LOCK_ASSERT_HELD(mli);
1982
1983         return (mli->mli_v1_timer);
1984 }
1985
1986 /*
1987  * Cancel pending MLDv2 timers for the given link and all groups
1988  * joined on it; state-change, general-query, and group-query timers.
1989  *
1990  * Only ever called on a transition from v2 to Compatibility mode. Kill
1991  * the timers stone dead (this may be expensive for large N groups), they
1992  * will be restarted if Compatibility Mode deems that they must be due to
1993  * query processing.
1994  */
1995 static void
1996 mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
1997 {
1998         struct ifnet            *ifp;
1999         struct in6_multi        *inm;
2000         struct in6_multistep    step;
2001
2002         MLI_LOCK_ASSERT_HELD(mli);
2003
2004         MLD_PRINTF(("%s: cancel v2 timers on ifp 0x%llx(%s)\n", __func__,
2005             (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), if_name(mli->mli_ifp)));
2006
2007         /*
2008          * Stop the v2 General Query Response on this link stone dead.
2009          * If timer is woken up due to interface_timers_running6,
2010          * the flag will be cleared if there are no pending link timers.
2011          */
2012         mli->mli_v2_timer = 0;
2013
2014         /*
2015          * Now clear the current-state and state-change report timers
2016          * for all memberships scoped to this link.
2017          */
2018         ifp = mli->mli_ifp;
2019         MLI_UNLOCK(mli);
2020
2021         in6_multihead_lock_shared();
2022         IN6_FIRST_MULTI(step, inm);
2023         while (inm != NULL) {
2024                 IN6M_LOCK(inm);
2025                 if (inm->in6m_ifp != ifp)
2026                         goto next;
2027
2028                 switch (inm->in6m_state) {
2029                 case MLD_NOT_MEMBER:
2030                 case MLD_SILENT_MEMBER:
2031                 case MLD_IDLE_MEMBER:
2032                 case MLD_LAZY_MEMBER:
2033                 case MLD_SLEEPING_MEMBER:
2034                 case MLD_AWAKENING_MEMBER:
2035                         /*
2036                          * These states are either not relevant in v2 mode,
2037                          * or are unreported. Do nothing.
2038                          */
2039                         break;
2040                 case MLD_LEAVING_MEMBER:
2041                         /*
2042                          * If we are leaving the group and switching
2043                          * version, we need to release the final
2044                          * reference held for issuing the INCLUDE {}.
2045                          * During mld_final_leave(), we bumped up both the
2046                          * request and reference counts.  Since we cannot
2047                          * call in6_multi_detach() here, defer this task to
2048                          * the timer routine.
2049                          */
2050                         VERIFY(inm->in6m_nrelecnt != 0);
2051                         MLI_LOCK(mli);
2052                         SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2053                             in6m_nrele);
2054                         MLI_UNLOCK(mli);
2055                         /* FALLTHROUGH */
2056                 case MLD_G_QUERY_PENDING_MEMBER:
2057                 case MLD_SG_QUERY_PENDING_MEMBER:
2058                         in6m_clear_recorded(inm);
2059                         /* FALLTHROUGH */
2060                 case MLD_REPORTING_MEMBER:
2061                         inm->in6m_state = MLD_REPORTING_MEMBER;
2062                         break;
2063                 }
2064                 /*
2065                  * Always clear state-change and group report timers.
2066                  * Free any pending MLDv2 state-change records.
2067                  */
2068                 inm->in6m_sctimer = 0;
2069                 inm->in6m_timer = 0;
2070                 IF_DRAIN(&inm->in6m_scq);
2071 next:
2072                 IN6M_UNLOCK(inm);
2073                 IN6_NEXT_MULTI(step, inm);
2074         }
2075         in6_multihead_lock_done();
2076
2077         MLI_LOCK(mli);
2078 }
2079
2080 /*
2081  * Update the Older Version Querier Present timers for a link.
2082  * See Section 9.12 of RFC 3810.
2083  */
2084 static void
2085 mld_v1_process_querier_timers(struct mld_ifinfo *mli)
2086 {
2087         MLI_LOCK_ASSERT_HELD(mli);
2088
2089         if (mld_v2enable && mli->mli_version != MLD_VERSION_2 &&
2090             --mli->mli_v1_timer == 0) {
2091                 /*
2092                  * MLDv1 Querier Present timer expired; revert to MLDv2.
2093                  */
2094                 MLD_PRINTF(("%s: transition from v%d -> v%d on 0x%llx(%s)\n",
2095                     __func__, mli->mli_version, MLD_VERSION_2,
2096                     (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
2097                     if_name(mli->mli_ifp)));
2098                 mli->mli_version = MLD_VERSION_2;
2099         }
2100 }
2101
2102 /*
2103  * Transmit an MLDv1 report immediately.
2104  */
2105 static int
2106 mld_v1_transmit_report(struct in6_multi *in6m, const int type)
2107 {
2108         struct ifnet            *ifp;
2109         struct in6_ifaddr       *ia;
2110         struct ip6_hdr          *ip6;
2111         struct mbuf             *mh, *md;
2112         struct mld_hdr          *mld;
2113         int                     error = 0;
2114
2115         IN6M_LOCK_ASSERT_HELD(in6m);
2116         MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
2117
2118         ifp = in6m->in6m_ifp;
2119         /* ia may be NULL if link-local address is tentative. */
2120         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
2121
2122         MGETHDR(mh, M_DONTWAIT, MT_HEADER);
2123         if (mh == NULL) {
2124                 if (ia != NULL)
2125                         IFA_REMREF(&ia->ia_ifa);
2126                 return (ENOMEM);
2127         }
2128         MGET(md, M_DONTWAIT, MT_DATA);
2129         if (md == NULL) {
2130                 m_free(mh);
2131                 if (ia != NULL)
2132                         IFA_REMREF(&ia->ia_ifa);
2133                 return (ENOMEM);
2134         }
2135         mh->m_next = md;
2136
2137         /*
2138          * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
2139          * that ether_output() does not need to allocate another mbuf
2140          * for the header in the most common case.
2141          */
2142         MH_ALIGN(mh, sizeof(struct ip6_hdr));
2143         mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
2144         mh->m_len = sizeof(struct ip6_hdr);
2145
2146         ip6 = mtod(mh, struct ip6_hdr *);
2147         ip6->ip6_flow = 0;
2148         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
2149         ip6->ip6_vfc |= IPV6_VERSION;
2150         ip6->ip6_nxt = IPPROTO_ICMPV6;
2151         if (ia != NULL)
2152                 IFA_LOCK(&ia->ia_ifa);
2153         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
2154         if (ia != NULL) {
2155                 IFA_UNLOCK(&ia->ia_ifa);
2156                 IFA_REMREF(&ia->ia_ifa);
2157                 ia = NULL;
2158         }
2159         ip6->ip6_dst = in6m->in6m_addr;
2160
2161         md->m_len = sizeof(struct mld_hdr);
2162         mld = mtod(md, struct mld_hdr *);
2163         mld->mld_type = type;
2164         mld->mld_code = 0;
2165         mld->mld_cksum = 0;
2166         mld->mld_maxdelay = 0;
2167         mld->mld_reserved = 0;
2168         mld->mld_addr = in6m->in6m_addr;
2169         in6_clearscope(&mld->mld_addr);
2170         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
2171             sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
2172
2173         mld_save_context(mh, ifp);
2174         mh->m_flags |= M_MLDV1;
2175
2176         /*
2177          * Due to the fact that at this point we are possibly holding
2178          * in6_multihead_lock in shared or exclusive mode, we can't call
2179          * mld_dispatch_packet() here since that will eventually call
2180          * ip6_output(), which will try to lock in6_multihead_lock and cause
2181          * a deadlock.
2182          * Instead we defer the work to the mld_timeout() thread, thus
2183          * avoiding unlocking in_multihead_lock here.
2184          */
2185         if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
2186                 MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
2187                 error = ENOMEM;
2188                 m_freem(mh);
2189         } else {
2190                 IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
2191                 VERIFY(error == 0);
2192         }
2193
2194         return (error);
2195 }
2196
2197 /*
2198  * Process a state change from the upper layer for the given IPv6 group.
2199  *
2200  * Each socket holds a reference on the in6_multi in its own ip_moptions.
2201  * The socket layer will have made the necessary updates to.the group
2202  * state, it is now up to MLD to issue a state change report if there
2203  * has been any change between T0 (when the last state-change was issued)
2204  * and T1 (now).
2205  *
2206  * We use the MLDv2 state machine at group level. The MLd module
2207  * however makes the decision as to which MLD protocol version to speak.
2208  * A state change *from* INCLUDE {} always means an initial join.
2209  * A state change *to* INCLUDE {} always means a final leave.
2210  *
2211  * If delay is non-zero, and the state change is an initial multicast
2212  * join, the state change report will be delayed by 'delay' ticks
2213  * in units of seconds if MLDv1 is active on the link; otherwise
2214  * the initial MLDv2 state change report will be delayed by whichever
2215  * is sooner, a pending state-change timer or delay itself.
2216  */
2217 int
2218 mld_change_state(struct in6_multi *inm, struct mld_tparams *mtp,
2219     const int delay)
2220 {
2221         struct mld_ifinfo *mli;
2222         struct ifnet *ifp;
2223         int error = 0;
2224
2225         VERIFY(mtp != NULL);
2226         bzero(mtp, sizeof (*mtp));
2227
2228         IN6M_LOCK_ASSERT_HELD(inm);
2229         VERIFY(inm->in6m_mli != NULL);
2230         MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
2231
2232         /*
2233          * Try to detect if the upper layer just asked us to change state
2234          * for an interface which has now gone away.
2235          */
2236         VERIFY(inm->in6m_ifma != NULL);
2237         ifp = inm->in6m_ifma->ifma_ifp;
2238         /*
2239          * Sanity check that netinet6's notion of ifp is the same as net's.
2240          */
2241         VERIFY(inm->in6m_ifp == ifp);
2242
2243         mli = MLD_IFINFO(ifp);
2244         VERIFY(mli != NULL);
2245
2246         /*
2247          * If we detect a state transition to or from MCAST_UNDEFINED
2248          * for this group, then we are starting or finishing an MLD
2249          * life cycle for this group.
2250          */
2251         if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
2252                 MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2253                     inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
2254                 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
2255                         MLD_PRINTF(("%s: initial join\n", __func__));
2256                         error = mld_initial_join(inm, mli, mtp, delay);
2257                         goto out;
2258                 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
2259                         MLD_PRINTF(("%s: final leave\n", __func__));
2260                         mld_final_leave(inm, mli, mtp);
2261                         goto out;
2262                 }
2263         } else {
2264                 MLD_PRINTF(("%s: filter set change\n", __func__));
2265         }
2266
2267         error = mld_handle_state_change(inm, mli, mtp);
2268 out:
2269         return (error);
2270 }
2271
2272 /*
2273  * Perform the initial join for an MLD group.
2274  *
2275  * When joining a group:
2276  *  If the group should have its MLD traffic suppressed, do nothing.
2277  *  MLDv1 starts sending MLDv1 host membership reports.
2278  *  MLDv2 will schedule an MLDv2 state-change report containing the
2279  *  initial state of the membership.
2280  *
2281  * If the delay argument is non-zero, then we must delay sending the
2282  * initial state change for delay ticks (in units of seconds).
2283  */
2284 static int
2285 mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
2286     struct mld_tparams *mtp, const int delay)
2287 {
2288         struct ifnet            *ifp;
2289         struct ifqueue          *ifq;
2290         int                      error, retval, syncstates;
2291         int                      odelay;
2292
2293         IN6M_LOCK_ASSERT_HELD(inm);
2294         MLI_LOCK_ASSERT_NOTHELD(mli);
2295         VERIFY(mtp != NULL);
2296
2297         MLD_PRINTF(("%s: initial join %s on ifp 0x%llx(%s)\n",
2298             __func__, ip6_sprintf(&inm->in6m_addr),
2299             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2300             if_name(inm->in6m_ifp)));
2301
2302         error = 0;
2303         syncstates = 1;
2304
2305         ifp = inm->in6m_ifp;
2306
2307         MLI_LOCK(mli);
2308         VERIFY(mli->mli_ifp == ifp);
2309
2310         /*
2311          * Groups joined on loopback or marked as 'not reported',
2312          * enter the MLD_SILENT_MEMBER state and
2313          * are never reported in any protocol exchanges.
2314          * All other groups enter the appropriate state machine
2315          * for the version in use on this link.
2316          * A link marked as MLIF_SILENT causes MLD to be completely
2317          * disabled for the link.
2318          */
2319         if ((ifp->if_flags & IFF_LOOPBACK) ||
2320             (mli->mli_flags & MLIF_SILENT) ||
2321             !mld_is_addr_reported(&inm->in6m_addr)) {
2322                 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2323                     __func__));
2324                 inm->in6m_state = MLD_SILENT_MEMBER;
2325                 inm->in6m_timer = 0;
2326         } else {
2327                 /*
2328                  * Deal with overlapping in6_multi lifecycle.
2329                  * If this group was LEAVING, then make sure
2330                  * we drop the reference we picked up to keep the
2331                  * group around for the final INCLUDE {} enqueue.
2332                  * Since we cannot call in6_multi_detach() here,
2333                  * defer this task to the timer routine.
2334                  */
2335                 if (mli->mli_version == MLD_VERSION_2 &&
2336                     inm->in6m_state == MLD_LEAVING_MEMBER) {
2337                         VERIFY(inm->in6m_nrelecnt != 0);
2338                         SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2339                             in6m_nrele);
2340                 }
2341
2342                 inm->in6m_state = MLD_REPORTING_MEMBER;
2343
2344                 switch (mli->mli_version) {
2345                 case MLD_VERSION_1:
2346                         /*
2347                          * If a delay was provided, only use it if
2348                          * it is greater than the delay normally
2349                          * used for an MLDv1 state change report,
2350                          * and delay sending the initial MLDv1 report
2351                          * by not transitioning to the IDLE state.
2352                          */
2353                         odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI);
2354                         if (delay) {
2355                                 inm->in6m_timer = max(delay, odelay);
2356                                 mtp->cst = 1;
2357                         } else {
2358                                 inm->in6m_state = MLD_IDLE_MEMBER;
2359                                 error = mld_v1_transmit_report(inm,
2360                                      MLD_LISTENER_REPORT);
2361
2362                                 IN6M_LOCK_ASSERT_HELD(inm);
2363                                 MLI_LOCK_ASSERT_HELD(mli);
2364
2365                                 if (error == 0) {
2366                                         inm->in6m_timer = odelay;
2367                                         mtp->cst = 1;
2368                                 }
2369                         }
2370                         break;
2371
2372                 case MLD_VERSION_2:
2373                         /*
2374                          * Defer update of T0 to T1, until the first copy
2375                          * of the state change has been transmitted.
2376                          */
2377                         syncstates = 0;
2378
2379                         /*
2380                          * Immediately enqueue a State-Change Report for
2381                          * this interface, freeing any previous reports.
2382                          * Don't kick the timers if there is nothing to do,
2383                          * or if an error occurred.
2384                          */
2385                         ifq = &inm->in6m_scq;
2386                         IF_DRAIN(ifq);
2387                         retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2388                             0, 0, (mli->mli_flags & MLIF_USEALLOW));
2389                         mtp->cst = (ifq->ifq_len > 0);
2390                         MLD_PRINTF(("%s: enqueue record = %d\n",
2391                             __func__, retval));
2392                         if (retval <= 0) {
2393                                 error = retval * -1;
2394                                 break;
2395                         }
2396
2397                         /*
2398                          * Schedule transmission of pending state-change
2399                          * report up to RV times for this link. The timer
2400                          * will fire at the next mld_timeout (1 second)),
2401                          * giving us an opportunity to merge the reports.
2402                          *
2403                          * If a delay was provided to this function, only
2404                          * use this delay if sooner than the existing one.
2405                          */
2406                         VERIFY(mli->mli_rv > 1);
2407                         inm->in6m_scrv = mli->mli_rv;
2408                         if (delay) {
2409                                 if (inm->in6m_sctimer > 1) {
2410                                         inm->in6m_sctimer =
2411                                             min(inm->in6m_sctimer, delay);
2412                                 } else
2413                                         inm->in6m_sctimer = delay;
2414                         } else {
2415                                 inm->in6m_sctimer = 1;
2416                         }
2417                         mtp->sct = 1;
2418                         error = 0;
2419                         break;
2420                 }
2421         }
2422         MLI_UNLOCK(mli);
2423
2424         /*
2425          * Only update the T0 state if state change is atomic,
2426          * i.e. we don't need to wait for a timer to fire before we
2427          * can consider the state change to have been communicated.
2428          */
2429         if (syncstates) {
2430                 in6m_commit(inm);
2431                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2432                     ip6_sprintf(&inm->in6m_addr),
2433                     if_name(inm->in6m_ifp)));
2434         }
2435
2436         return (error);
2437 }
2438
2439 /*
2440  * Issue an intermediate state change during the life-cycle.
2441  */
2442 static int
2443 mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli,
2444     struct mld_tparams *mtp)
2445 {
2446         struct ifnet            *ifp;
2447         int                      retval = 0;
2448
2449         IN6M_LOCK_ASSERT_HELD(inm);
2450         MLI_LOCK_ASSERT_NOTHELD(mli);
2451         VERIFY(mtp != NULL);
2452
2453         MLD_PRINTF(("%s: state change for %s on ifp 0x%llx(%s)\n",
2454             __func__, ip6_sprintf(&inm->in6m_addr),
2455             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2456             if_name(inm->in6m_ifp)));
2457
2458         ifp = inm->in6m_ifp;
2459
2460         MLI_LOCK(mli);
2461         VERIFY(mli->mli_ifp == ifp);
2462
2463         if ((ifp->if_flags & IFF_LOOPBACK) ||
2464             (mli->mli_flags & MLIF_SILENT) ||
2465             !mld_is_addr_reported(&inm->in6m_addr) ||
2466             (mli->mli_version != MLD_VERSION_2)) {
2467                 MLI_UNLOCK(mli);
2468                 if (!mld_is_addr_reported(&inm->in6m_addr)) {
2469                         MLD_PRINTF(("%s: not kicking state machine for silent "
2470                             "group\n", __func__));
2471                 }
2472                 MLD_PRINTF(("%s: nothing to do\n", __func__));
2473                 in6m_commit(inm);
2474                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2475                     ip6_sprintf(&inm->in6m_addr),
2476                     if_name(inm->in6m_ifp)));
2477                 goto done;
2478         }
2479
2480         IF_DRAIN(&inm->in6m_scq);
2481
2482         retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2483             (mli->mli_flags & MLIF_USEALLOW));
2484         mtp->cst = (inm->in6m_scq.ifq_len > 0);
2485         MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2486         if (retval <= 0) {
2487                 MLI_UNLOCK(mli);
2488                 retval *= -1;
2489                 goto done;
2490         }
2491         /*
2492          * If record(s) were enqueued, start the state-change
2493          * report timer for this group.
2494          */
2495         inm->in6m_scrv = mli->mli_rv;
2496         inm->in6m_sctimer = 1;
2497         mtp->sct = 1;
2498         MLI_UNLOCK(mli);
2499
2500 done:
2501         return (retval);
2502 }
2503
2504 /*
2505  * Perform the final leave for a multicast address.
2506  *
2507  * When leaving a group:
2508  *  MLDv1 sends a DONE message, if and only if we are the reporter.
2509  *  MLDv2 enqueues a state-change report containing a transition
2510  *  to INCLUDE {} for immediate transmission.
2511  */
2512 static void
2513 mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli,
2514     struct mld_tparams *mtp)
2515 {
2516         int syncstates = 1;
2517
2518         IN6M_LOCK_ASSERT_HELD(inm);
2519         MLI_LOCK_ASSERT_NOTHELD(mli);
2520         VERIFY(mtp != NULL);
2521
2522         MLD_PRINTF(("%s: final leave %s on ifp 0x%llx(%s)\n",
2523             __func__, ip6_sprintf(&inm->in6m_addr),
2524             (uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
2525             if_name(inm->in6m_ifp)));
2526
2527         switch (inm->in6m_state) {
2528         case MLD_NOT_MEMBER:
2529         case MLD_SILENT_MEMBER:
2530         case MLD_LEAVING_MEMBER:
2531                 /* Already leaving or left; do nothing. */
2532                 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2533                     __func__));
2534                 break;
2535         case MLD_REPORTING_MEMBER:
2536         case MLD_IDLE_MEMBER:
2537         case MLD_G_QUERY_PENDING_MEMBER:
2538         case MLD_SG_QUERY_PENDING_MEMBER:
2539                 MLI_LOCK(mli);
2540                 if (mli->mli_version == MLD_VERSION_1) {
2541                         if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2542                             inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
2543                                 panic("%s: MLDv2 state reached, not MLDv2 "
2544                                     "mode\n", __func__);
2545                                 /* NOTREACHED */
2546                         }
2547                         /* scheduler timer if enqueue is successful */
2548                         mtp->cst = (mld_v1_transmit_report(inm,
2549                             MLD_LISTENER_DONE) == 0);
2550
2551                         IN6M_LOCK_ASSERT_HELD(inm);
2552                         MLI_LOCK_ASSERT_HELD(mli);
2553
2554                         inm->in6m_state = MLD_NOT_MEMBER;
2555                 } else if (mli->mli_version == MLD_VERSION_2) {
2556                         /*
2557                          * Stop group timer and all pending reports.
2558                          * Immediately enqueue a state-change report
2559                          * TO_IN {} to be sent on the next timeout,
2560                          * giving us an opportunity to merge reports.
2561                          */
2562                         IF_DRAIN(&inm->in6m_scq);
2563                         inm->in6m_timer = 0;
2564                         inm->in6m_scrv = mli->mli_rv;
2565                         MLD_PRINTF(("%s: Leaving %s/%s with %d "
2566                             "pending retransmissions.\n", __func__,
2567                             ip6_sprintf(&inm->in6m_addr),
2568                             if_name(inm->in6m_ifp),
2569                             inm->in6m_scrv));
2570                         if (inm->in6m_scrv == 0) {
2571                                 inm->in6m_state = MLD_NOT_MEMBER;
2572                                 inm->in6m_sctimer = 0;
2573                         } else {
2574                                 int retval;
2575                                 /*
2576                                  * Stick around in the in6_multihead list;
2577                                  * the final detach will be issued by
2578                                  * mld_v2_process_group_timers() when
2579                                  * the retransmit timer expires.
2580                                  */
2581                                 IN6M_ADDREF_LOCKED(inm);
2582                                 VERIFY(inm->in6m_debug & IFD_ATTACHED);
2583                                 inm->in6m_reqcnt++;
2584                                 VERIFY(inm->in6m_reqcnt >= 1);
2585                                 inm->in6m_nrelecnt++;
2586                                 VERIFY(inm->in6m_nrelecnt != 0);
2587
2588                                 retval = mld_v2_enqueue_group_record(
2589                                     &inm->in6m_scq, inm, 1, 0, 0,
2590                                     (mli->mli_flags & MLIF_USEALLOW));
2591                                 mtp->cst = (inm->in6m_scq.ifq_len > 0);
2592                                 KASSERT(retval != 0,
2593                                     ("%s: enqueue record = %d\n", __func__,
2594                                      retval));
2595
2596                                 inm->in6m_state = MLD_LEAVING_MEMBER;
2597                                 inm->in6m_sctimer = 1;
2598                                 mtp->sct = 1;
2599                                 syncstates = 0;
2600                         }
2601                 }
2602                 MLI_UNLOCK(mli);
2603                 break;
2604         case MLD_LAZY_MEMBER:
2605         case MLD_SLEEPING_MEMBER:
2606         case MLD_AWAKENING_MEMBER:
2607                 /* Our reports are suppressed; do nothing. */
2608                 break;
2609         }
2610
2611         if (syncstates) {
2612                 in6m_commit(inm);
2613                 MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
2614                     ip6_sprintf(&inm->in6m_addr),
2615                     if_name(inm->in6m_ifp)));
2616                 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2617                 MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for 0x%llx/%s\n",
2618                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(&inm->in6m_addr),
2619                     if_name(inm->in6m_ifp)));
2620         }
2621 }
2622
2623 /*
2624  * Enqueue an MLDv2 group record to the given output queue.
2625  *
2626  * If is_state_change is zero, a current-state record is appended.
2627  * If is_state_change is non-zero, a state-change report is appended.
2628  *
2629  * If is_group_query is non-zero, an mbuf packet chain is allocated.
2630  * If is_group_query is zero, and if there is a packet with free space
2631  * at the tail of the queue, it will be appended to providing there
2632  * is enough free space.
2633  * Otherwise a new mbuf packet chain is allocated.
2634  *
2635  * If is_source_query is non-zero, each source is checked to see if
2636  * it was recorded for a Group-Source query, and will be omitted if
2637  * it is not both in-mode and recorded.
2638  *
2639  * If use_block_allow is non-zero, state change reports for initial join
2640  * and final leave, on an inclusive mode group with a source list, will be
2641  * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2642  *
2643  * The function will attempt to allocate leading space in the packet
2644  * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2645  *
2646  * If successful the size of all data appended to the queue is returned,
2647  * otherwise an error code less than zero is returned, or zero if
2648  * no record(s) were appended.
2649  */
2650 static int
2651 mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2652     const int is_state_change, const int is_group_query,
2653     const int is_source_query, const int use_block_allow)
2654 {
2655         struct mldv2_record      mr;
2656         struct mldv2_record     *pmr;
2657         struct ifnet            *ifp;
2658         struct ip6_msource      *ims, *nims;
2659         struct mbuf             *m0, *m, *md;
2660         int                      error, is_filter_list_change;
2661         int                      minrec0len, m0srcs, msrcs, nbytes, off;
2662         int                      record_has_sources;
2663         int                      now;
2664         int                      type;
2665         uint8_t                  mode;
2666
2667         IN6M_LOCK_ASSERT_HELD(inm);
2668         MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2669
2670         error = 0;
2671         ifp = inm->in6m_ifp;
2672         is_filter_list_change = 0;
2673         m = NULL;
2674         m0 = NULL;
2675         m0srcs = 0;
2676         msrcs = 0;
2677         nbytes = 0;
2678         nims = NULL;
2679         record_has_sources = 1;
2680         pmr = NULL;
2681         type = MLD_DO_NOTHING;
2682         mode = inm->in6m_st[1].iss_fmode;
2683
2684         /*
2685          * If we did not transition out of ASM mode during t0->t1,
2686          * and there are no source nodes to process, we can skip
2687          * the generation of source records.
2688          */
2689         if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2690             inm->in6m_nsrc == 0)
2691                 record_has_sources = 0;
2692
2693         if (is_state_change) {
2694                 /*
2695                  * Queue a state change record.
2696                  * If the mode did not change, and there are non-ASM
2697                  * listeners or source filters present,
2698                  * we potentially need to issue two records for the group.
2699                  * If there are ASM listeners, and there was no filter
2700                  * mode transition of any kind, do nothing.
2701                  *
2702                  * If we are transitioning to MCAST_UNDEFINED, we need
2703                  * not send any sources. A transition to/from this state is
2704                  * considered inclusive with some special treatment.
2705                  *
2706                  * If we are rewriting initial joins/leaves to use
2707                  * ALLOW/BLOCK, and the group's membership is inclusive,
2708                  * we need to send sources in all cases.
2709                  */
2710                 if (mode != inm->in6m_st[0].iss_fmode) {
2711                         if (mode == MCAST_EXCLUDE) {
2712                                 MLD_PRINTF(("%s: change to EXCLUDE\n",
2713                                     __func__));
2714                                 type = MLD_CHANGE_TO_EXCLUDE_MODE;
2715                         } else {
2716                                 MLD_PRINTF(("%s: change to INCLUDE\n",
2717                                     __func__));
2718                                 if (use_block_allow) {
2719                                         /*
2720                                          * XXX
2721                                          * Here we're interested in state
2722                                          * edges either direction between
2723                                          * MCAST_UNDEFINED and MCAST_INCLUDE.
2724                                          * Perhaps we should just check
2725                                          * the group state, rather than
2726                                          * the filter mode.
2727                                          */
2728                                         if (mode == MCAST_UNDEFINED) {
2729                                                 type = MLD_BLOCK_OLD_SOURCES;
2730                                         } else {
2731                                                 type = MLD_ALLOW_NEW_SOURCES;
2732                                         }
2733                                 } else {
2734                                         type = MLD_CHANGE_TO_INCLUDE_MODE;
2735                                         if (mode == MCAST_UNDEFINED)
2736                                                 record_has_sources = 0;
2737                                 }
2738                         }
2739                 } else {
2740                         if (record_has_sources) {
2741                                 is_filter_list_change = 1;
2742                         } else {
2743                                 type = MLD_DO_NOTHING;
2744                         }
2745                 }
2746         } else {
2747                 /*
2748                  * Queue a current state record.
2749                  */
2750                 if (mode == MCAST_EXCLUDE) {
2751                         type = MLD_MODE_IS_EXCLUDE;
2752                 } else if (mode == MCAST_INCLUDE) {
2753                         type = MLD_MODE_IS_INCLUDE;
2754                         VERIFY(inm->in6m_st[1].iss_asm == 0);
2755                 }
2756         }
2757
2758         /*
2759          * Generate the filter list changes using a separate function.
2760          */
2761         if (is_filter_list_change)
2762                 return (mld_v2_enqueue_filter_change(ifq, inm));
2763
2764         if (type == MLD_DO_NOTHING) {
2765                 MLD_PRINTF(("%s: nothing to do for %s/%s\n",
2766                     __func__, ip6_sprintf(&inm->in6m_addr),
2767                     if_name(inm->in6m_ifp)));
2768                 return (0);
2769         }
2770
2771         /*
2772          * If any sources are present, we must be able to fit at least
2773          * one in the trailing space of the tail packet's mbuf,
2774          * ideally more.
2775          */
2776         minrec0len = sizeof(struct mldv2_record);
2777         if (record_has_sources)
2778                 minrec0len += sizeof(struct in6_addr);
2779         MLD_PRINTF(("%s: queueing %s for %s/%s\n", __func__,
2780             mld_rec_type_to_str(type),
2781             ip6_sprintf(&inm->in6m_addr),
2782             if_name(inm->in6m_ifp)));
2783
2784         /*
2785          * Check if we have a packet in the tail of the queue for this
2786          * group into which the first group record for this group will fit.
2787          * Otherwise allocate a new packet.
2788          * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2789          * Note: Group records for G/GSR query responses MUST be sent
2790          * in their own packet.
2791          */
2792         m0 = ifq->ifq_tail;
2793         if (!is_group_query &&
2794             m0 != NULL &&
2795             (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
2796             (m0->m_pkthdr.len + minrec0len) <
2797              (ifp->if_mtu - MLD_MTUSPACE)) {
2798                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2799                             sizeof(struct mldv2_record)) /
2800                             sizeof(struct in6_addr);
2801                 m = m0;
2802                 MLD_PRINTF(("%s: use existing packet\n", __func__));
2803         } else {
2804                 if (IF_QFULL(ifq)) {
2805                         MLD_PRINTF(("%s: outbound queue full\n", __func__));
2806                         return (-ENOMEM);
2807                 }
2808                 m = NULL;
2809                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2810                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2811                 if (!is_state_change && !is_group_query)
2812                         m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2813                 if (m == NULL)
2814                         m = m_gethdr(M_DONTWAIT, MT_DATA);
2815                 if (m == NULL)
2816                         return (-ENOMEM);
2817
2818                 mld_save_context(m, ifp);
2819
2820                 MLD_PRINTF(("%s: allocated first packet\n", __func__));
2821         }
2822
2823         /*
2824          * Append group record.
2825          * If we have sources, we don't know how many yet.
2826          */
2827         mr.mr_type = type;
2828         mr.mr_datalen = 0;
2829         mr.mr_numsrc = 0;
2830         mr.mr_addr = inm->in6m_addr;
2831         in6_clearscope(&mr.mr_addr);
2832         if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2833                 if (m != m0)
2834                         m_freem(m);
2835                 MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2836                 return (-ENOMEM);
2837         }
2838         nbytes += sizeof(struct mldv2_record);
2839
2840         /*
2841          * Append as many sources as will fit in the first packet.
2842          * If we are appending to a new packet, the chain allocation
2843          * may potentially use clusters; use m_getptr() in this case.
2844          * If we are appending to an existing packet, we need to obtain
2845          * a pointer to the group record after m_append(), in case a new
2846          * mbuf was allocated.
2847          *
2848          * Only append sources which are in-mode at t1. If we are
2849          * transitioning to MCAST_UNDEFINED state on the group, and
2850          * use_block_allow is zero, do not include source entries.
2851          * Otherwise, we need to include this source in the report.
2852          *
2853          * Only report recorded sources in our filter set when responding
2854          * to a group-source query.
2855          */
2856         if (record_has_sources) {
2857                 if (m == m0) {
2858                         md = m_last(m);
2859                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2860                             md->m_len - nbytes);
2861                 } else {
2862                         md = m_getptr(m, 0, &off);
2863                         pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2864                             off);
2865                 }
2866                 msrcs = 0;
2867                 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
2868                     nims) {
2869                         MLD_PRINTF(("%s: visit node %s\n", __func__,
2870                             ip6_sprintf(&ims->im6s_addr)));
2871                         now = im6s_get_mode(inm, ims, 1);
2872                         MLD_PRINTF(("%s: node is %d\n", __func__, now));
2873                         if ((now != mode) ||
2874                             (now == mode &&
2875                              (!use_block_allow && mode == MCAST_UNDEFINED))) {
2876                                 MLD_PRINTF(("%s: skip node\n", __func__));
2877                                 continue;
2878                         }
2879                         if (is_source_query && ims->im6s_stp == 0) {
2880                                 MLD_PRINTF(("%s: skip unrecorded node\n",
2881                                     __func__));
2882                                 continue;
2883                         }
2884                         MLD_PRINTF(("%s: append node\n", __func__));
2885                         if (!m_append(m, sizeof(struct in6_addr),
2886                             (void *)&ims->im6s_addr)) {
2887                                 if (m != m0)
2888                                         m_freem(m);
2889                                 MLD_PRINTF(("%s: m_append() failed.\n",
2890                                     __func__));
2891                                 return (-ENOMEM);
2892                         }
2893                         nbytes += sizeof(struct in6_addr);
2894                         ++msrcs;
2895                         if (msrcs == m0srcs)
2896                                 break;
2897                 }
2898                 MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
2899                     msrcs));
2900                 pmr->mr_numsrc = htons(msrcs);
2901                 nbytes += (msrcs * sizeof(struct in6_addr));
2902         }
2903
2904         if (is_source_query && msrcs == 0) {
2905                 MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
2906                 if (m != m0)
2907                         m_freem(m);
2908                 return (0);
2909         }
2910
2911         /*
2912          * We are good to go with first packet.
2913          */
2914         if (m != m0) {
2915                 MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
2916                 m->m_pkthdr.vt_nrecs = 1;
2917                 IF_ENQUEUE(ifq, m);
2918         } else {
2919                 m->m_pkthdr.vt_nrecs++;
2920         }
2921         /*
2922          * No further work needed if no source list in packet(s).
2923          */
2924         if (!record_has_sources)
2925                 return (nbytes);
2926
2927         /*
2928          * Whilst sources remain to be announced, we need to allocate
2929          * a new packet and fill out as many sources as will fit.
2930          * Always try for a cluster first.
2931          */
2932         while (nims != NULL) {
2933                 if (IF_QFULL(ifq)) {
2934                         MLD_PRINTF(("%s: outbound queue full\n", __func__));
2935                         return (-ENOMEM);
2936                 }
2937                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2938                 if (m == NULL)
2939                         m = m_gethdr(M_DONTWAIT, MT_DATA);
2940                 if (m == NULL)
2941                         return (-ENOMEM);
2942                 mld_save_context(m, ifp);
2943                 md = m_getptr(m, 0, &off);
2944                 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
2945                 MLD_PRINTF(("%s: allocated next packet\n", __func__));
2946
2947                 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2948                         if (m != m0)
2949                                 m_freem(m);
2950                         MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2951                         return (-ENOMEM);
2952                 }
2953                 m->m_pkthdr.vt_nrecs = 1;
2954                 nbytes += sizeof(struct mldv2_record);
2955
2956                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2957                     sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2958
2959                 msrcs = 0;
2960                 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2961                         MLD_PRINTF(("%s: visit node %s\n",
2962                             __func__, ip6_sprintf(&ims->im6s_addr)));
2963                         now = im6s_get_mode(inm, ims, 1);
2964                         if ((now != mode) ||
2965                             (now == mode &&
2966                              (!use_block_allow && mode == MCAST_UNDEFINED))) {
2967                                 MLD_PRINTF(("%s: skip node\n", __func__));
2968                                 continue;
2969                         }
2970                         if (is_source_query && ims->im6s_stp == 0) {
2971                                 MLD_PRINTF(("%s: skip unrecorded node\n",
2972                                     __func__));
2973                                 continue;
2974                         }
2975                         MLD_PRINTF(("%s: append node\n", __func__));
2976                         if (!m_append(m, sizeof(struct in6_addr),
2977                             (void *)&ims->im6s_addr)) {
2978                                 if (m != m0)
2979                                         m_freem(m);
2980                                 MLD_PRINTF(("%s: m_append() failed.\n",
2981                                     __func__));
2982                                 return (-ENOMEM);
2983                         }
2984                         ++msrcs;
2985                         if (msrcs == m0srcs)
2986                                 break;
2987                 }
2988                 pmr->mr_numsrc = htons(msrcs);
2989                 nbytes += (msrcs * sizeof(struct in6_addr));
2990
2991                 MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
2992                 IF_ENQUEUE(ifq, m);
2993         }
2994
2995         return (nbytes);
2996 }
2997
2998 /*
2999  * Type used to mark record pass completion.
3000  * We exploit the fact we can cast to this easily from the
3001  * current filter modes on each ip_msource node.
3002  */
3003 typedef enum {
3004         REC_NONE = 0x00,        /* MCAST_UNDEFINED */
3005         REC_ALLOW = 0x01,       /* MCAST_INCLUDE */
3006         REC_BLOCK = 0x02,       /* MCAST_EXCLUDE */
3007         REC_FULL = REC_ALLOW | REC_BLOCK
3008 } rectype_t;
3009
3010 /*
3011  * Enqueue an MLDv2 filter list change to the given output queue.
3012  *
3013  * Source list filter state is held in an RB-tree. When the filter list
3014  * for a group is changed without changing its mode, we need to compute
3015  * the deltas between T0 and T1 for each source in the filter set,
3016  * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
3017  *
3018  * As we may potentially queue two record types, and the entire R-B tree
3019  * needs to be walked at once, we break this out into its own function
3020  * so we can generate a tightly packed queue of packets.
3021  *
3022  * XXX This could be written to only use one tree walk, although that makes
3023  * serializing into the mbuf chains a bit harder. For now we do two walks
3024  * which makes things easier on us, and it may or may not be harder on
3025  * the L2 cache.
3026  *
3027  * If successful the size of all data appended to the queue is returned,
3028  * otherwise an error code less than zero is returned, or zero if
3029  * no record(s) were appended.
3030  */
3031 static int
3032 mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
3033 {
3034         static const int MINRECLEN =
3035             sizeof(struct mldv2_record) + sizeof(struct in6_addr);
3036         struct ifnet            *ifp;
3037         struct mldv2_record      mr;
3038         struct mldv2_record     *pmr;
3039         struct ip6_msource      *ims, *nims;
3040         struct mbuf             *m, *m0, *md;
3041         int                      m0srcs, nbytes, npbytes, off, rsrcs, schanged;
3042         int                      nallow, nblock;
3043         uint8_t                  mode, now, then;
3044         rectype_t                crt, drt, nrt;
3045
3046         IN6M_LOCK_ASSERT_HELD(inm);
3047
3048         if (inm->in6m_nsrc == 0 ||
3049             (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
3050                 return (0);
3051
3052         ifp = inm->in6m_ifp;                    /* interface */
3053         mode = inm->in6m_st[1].iss_fmode;       /* filter mode at t1 */
3054         crt = REC_NONE; /* current group record type */
3055         drt = REC_NONE; /* mask of completed group record types */
3056         nrt = REC_NONE; /* record type for current node */
3057         m0srcs = 0;     /* # source which will fit in current mbuf chain */
3058         npbytes = 0;    /* # of bytes appended this packet */
3059         nbytes = 0;     /* # of bytes appended to group's state-change queue */
3060         rsrcs = 0;      /* # sources encoded in current record */
3061         schanged = 0;   /* # nodes encoded in overall filter change */
3062         nallow = 0;     /* # of source entries in ALLOW_NEW */
3063         nblock = 0;     /* # of source entries in BLOCK_OLD */
3064         nims = NULL;    /* next tree node pointer */
3065
3066         /*
3067          * For each possible filter record mode.
3068          * The first kind of source we encounter tells us which
3069          * is the first kind of record we start appending.
3070          * If a node transitioned to UNDEFINED at t1, its mode is treated
3071          * as the inverse of the group's filter mode.
3072          */
3073         while (drt != REC_FULL) {
3074                 do {
3075                         m0 = ifq->ifq_tail;
3076                         if (m0 != NULL &&
3077                             (m0->m_pkthdr.vt_nrecs + 1 <=
3078                              MLD_V2_REPORT_MAXRECS) &&
3079                             (m0->m_pkthdr.len + MINRECLEN) <
3080                              (ifp->if_mtu - MLD_MTUSPACE)) {
3081                                 m = m0;
3082                                 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
3083                                             sizeof(struct mldv2_record)) /
3084                                             sizeof(struct in6_addr);
3085                                 MLD_PRINTF(("%s: use previous packet\n",
3086                                     __func__));
3087                         } else {
3088                                 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
3089                                 if (m == NULL)
3090                                         m = m_gethdr(M_DONTWAIT, MT_DATA);
3091                                 if (m == NULL) {
3092                                         MLD_PRINTF(("%s: m_get*() failed\n",
3093                                             __func__));
3094                                         return (-ENOMEM);
3095                                 }
3096                                 m->m_pkthdr.vt_nrecs = 0;
3097                                 mld_save_context(m, ifp);
3098                                 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
3099                                     sizeof(struct mldv2_record)) /
3100                                     sizeof(struct in6_addr);
3101                                 npbytes = 0;
3102                                 MLD_PRINTF(("%s: allocated new packet\n",
3103                                     __func__));
3104                         }
3105                         /*
3106                          * Append the MLD group record header to the
3107                          * current packet's data area.
3108                          * Recalculate pointer to free space for next
3109                          * group record, in case m_append() allocated
3110                          * a new mbuf or cluster.
3111                          */
3112                         memset(&mr, 0, sizeof(mr));
3113                         mr.mr_addr = inm->in6m_addr;
3114                         in6_clearscope(&mr.mr_addr);
3115                         if (!m_append(m, sizeof(mr), (void *)&mr)) {
3116                                 if (m != m0)
3117                                         m_freem(m);
3118                                 MLD_PRINTF(("%s: m_append() failed\n",
3119                                     __func__));
3120                                 return (-ENOMEM);
3121                         }
3122                         npbytes += sizeof(struct mldv2_record);
3123                         if (m != m0) {
3124                                 /* new packet; offset in chain */
3125                                 md = m_getptr(m, npbytes -
3126                                     sizeof(struct mldv2_record), &off);
3127                                 pmr = (struct mldv2_record *)(mtod(md,
3128                                     uint8_t *) + off);
3129                         } else {
3130                                 /* current packet; offset from last append */
3131                                 md = m_last(m);
3132                                 pmr = (struct mldv2_record *)(mtod(md,
3133                                     uint8_t *) + md->m_len -
3134                                     sizeof(struct mldv2_record));
3135                         }
3136                         /*
3137                          * Begin walking the tree for this record type
3138                          * pass, or continue from where we left off
3139                          * previously if we had to allocate a new packet.
3140                          * Only report deltas in-mode at t1.
3141                          * We need not report included sources as allowed
3142                          * if we are in inclusive mode on the group,
3143                          * however the converse is not true.
3144                          */
3145                         rsrcs = 0;
3146                         if (nims == NULL) {
3147                                 nims = RB_MIN(ip6_msource_tree,
3148                                     &inm->in6m_srcs);
3149                         }
3150                         RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
3151                                 MLD_PRINTF(("%s: visit node %s\n", __func__,
3152                                     ip6_sprintf(&ims->im6s_addr)));
3153                                 now = im6s_get_mode(inm, ims, 1);
3154                                 then = im6s_get_mode(inm, ims, 0);
3155                                 MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
3156                                     __func__, then, now));
3157                                 if (now == then) {
3158                                         MLD_PRINTF(("%s: skip unchanged\n",
3159                                             __func__));
3160                                         continue;
3161                                 }
3162                                 if (mode == MCAST_EXCLUDE &&
3163                                     now == MCAST_INCLUDE) {
3164                                         MLD_PRINTF(("%s: skip IN src on EX "
3165                                             "group\n", __func__));
3166                                         continue;
3167                                 }
3168                                 nrt = (rectype_t)now;
3169                                 if (nrt == REC_NONE)
3170                                         nrt = (rectype_t)(~mode & REC_FULL);
3171                                 if (schanged++ == 0) {
3172                                         crt = nrt;
3173                                 } else if (crt != nrt)
3174                                         continue;
3175                                 if (!m_append(m, sizeof(struct in6_addr),
3176                                     (void *)&ims->im6s_addr)) {
3177                                         if (m != m0)
3178                                                 m_freem(m);
3179                                         MLD_PRINTF(("%s: m_append() failed\n",
3180                                             __func__));
3181                                         return (-ENOMEM);
3182                                 }
3183                                 nallow += !!(crt == REC_ALLOW);
3184                                 nblock += !!(crt == REC_BLOCK);
3185                                 if (++rsrcs == m0srcs)
3186                                         break;
3187                         }
3188                         /*
3189                          * If we did not append any tree nodes on this
3190                          * pass, back out of allocations.
3191                          */
3192                         if (rsrcs == 0) {
3193                                 npbytes -= sizeof(struct mldv2_record);
3194                                 if (m != m0) {
3195                                         MLD_PRINTF(("%s: m_free(m)\n",
3196                                             __func__));
3197                                         m_freem(m);
3198                                 } else {
3199                                         MLD_PRINTF(("%s: m_adj(m, -mr)\n",
3200                                             __func__));
3201                                         m_adj(m, -((int)sizeof(
3202                                             struct mldv2_record)));
3203                                 }
3204                                 continue;
3205                         }
3206                         npbytes += (rsrcs * sizeof(struct in6_addr));
3207                         if (crt == REC_ALLOW)
3208                                 pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
3209                         else if (crt == REC_BLOCK)
3210                                 pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
3211                         pmr->mr_numsrc = htons(rsrcs);
3212                         /*
3213                          * Count the new group record, and enqueue this
3214                          * packet if it wasn't already queued.
3215                          */
3216                         m->m_pkthdr.vt_nrecs++;
3217                         if (m != m0)
3218                                 IF_ENQUEUE(ifq, m);
3219                         nbytes += npbytes;
3220                 } while (nims != NULL);
3221                 drt |= crt;
3222                 crt = (~crt & REC_FULL);
3223         }
3224
3225         MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3226             nallow, nblock));
3227
3228         return (nbytes);
3229 }
3230
3231 static int
3232 mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
3233 {
3234         struct ifqueue  *gq;
3235         struct mbuf     *m;             /* pending state-change */
3236         struct mbuf     *m0;            /* copy of pending state-change */
3237         struct mbuf     *mt;            /* last state-change in packet */
3238         struct mbuf     *n;
3239         int              docopy, domerge;
3240         u_int            recslen;
3241
3242         IN6M_LOCK_ASSERT_HELD(inm);
3243
3244         docopy = 0;
3245         domerge = 0;
3246         recslen = 0;
3247
3248         /*
3249          * If there are further pending retransmissions, make a writable
3250          * copy of each queued state-change message before merging.
3251          */
3252         if (inm->in6m_scrv > 0)
3253                 docopy = 1;
3254
3255         gq = &inm->in6m_scq;
3256 #ifdef MLD_DEBUG
3257         if (gq->ifq_head == NULL) {
3258                 MLD_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
3259                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
3260         }
3261 #endif
3262
3263         /*
3264          * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3265          * packet might not always be at the head of the ifqueue.
3266          */
3267         m = gq->ifq_head;
3268         while (m != NULL) {
3269                 /*
3270                  * Only merge the report into the current packet if
3271                  * there is sufficient space to do so; an MLDv2 report
3272                  * packet may only contain 65,535 group records.
3273                  * Always use a simple mbuf chain concatentation to do this,
3274                  * as large state changes for single groups may have
3275                  * allocated clusters.
3276                  */
3277                 domerge = 0;
3278                 mt = ifscq->ifq_tail;
3279                 if (mt != NULL) {
3280                         recslen = m_length(m);
3281
3282                         if ((mt->m_pkthdr.vt_nrecs +
3283                             m->m_pkthdr.vt_nrecs <=
3284                             MLD_V2_REPORT_MAXRECS) &&
3285                             (mt->m_pkthdr.len + recslen <=
3286                             (inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
3287                                 domerge = 1;
3288                 }
3289
3290                 if (!domerge && IF_QFULL(gq)) {
3291                         MLD_PRINTF(("%s: outbound queue full, skipping whole "
3292                             "packet 0x%llx\n", __func__,
3293                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3294                         n = m->m_nextpkt;
3295                         if (!docopy) {
3296                                 IF_REMQUEUE(gq, m);
3297                                 m_freem(m);
3298                         }
3299                         m = n;
3300                         continue;
3301                 }
3302
3303                 if (!docopy) {
3304                         MLD_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
3305                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3306                         n = m->m_nextpkt;
3307                         IF_REMQUEUE(gq, m);
3308                         m0 = m;
3309                         m = n;
3310                 } else {
3311                         MLD_PRINTF(("%s: copying 0x%llx\n", __func__,
3312                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3313                         m0 = m_dup(m, M_NOWAIT);
3314                         if (m0 == NULL)
3315                                 return (ENOMEM);
3316                         m0->m_nextpkt = NULL;
3317                         m = m->m_nextpkt;
3318                 }
3319
3320                 if (!domerge) {
3321                         MLD_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
3322                             __func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
3323                             (uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
3324                         IF_ENQUEUE(ifscq, m0);
3325                 } else {
3326                         struct mbuf *mtl;       /* last mbuf of packet mt */
3327
3328                         MLD_PRINTF(("%s: merging 0x%llx with ifscq tail "
3329                             "0x%llx)\n", __func__,
3330                             (uint64_t)VM_KERNEL_ADDRPERM(m0),
3331                             (uint64_t)VM_KERNEL_ADDRPERM(mt)));
3332
3333                         mtl = m_last(mt);
3334                         m0->m_flags &= ~M_PKTHDR;
3335                         mt->m_pkthdr.len += recslen;
3336                         mt->m_pkthdr.vt_nrecs +=
3337                             m0->m_pkthdr.vt_nrecs;
3338
3339                         mtl->m_next = m0;
3340                 }
3341         }
3342
3343         return (0);
3344 }
3345
3346 /*
3347  * Respond to a pending MLDv2 General Query.
3348  */
3349 static uint32_t
3350 mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
3351 {
3352         struct ifnet            *ifp;
3353         struct in6_multi        *inm;
3354         struct in6_multistep    step;
3355         int                      retval;
3356
3357         MLI_LOCK_ASSERT_HELD(mli);
3358
3359         VERIFY(mli->mli_version == MLD_VERSION_2);
3360
3361         ifp = mli->mli_ifp;
3362         MLI_UNLOCK(mli);
3363
3364         in6_multihead_lock_shared();
3365         IN6_FIRST_MULTI(step, inm);
3366         while (inm != NULL) {
3367                 IN6M_LOCK(inm);
3368                 if (inm->in6m_ifp != ifp)
3369                         goto next;
3370
3371                 switch (inm->in6m_state) {
3372                 case MLD_NOT_MEMBER:
3373                 case MLD_SILENT_MEMBER:
3374                         break;
3375                 case MLD_REPORTING_MEMBER:
3376                 case MLD_IDLE_MEMBER:
3377                 case MLD_LAZY_MEMBER:
3378                 case MLD_SLEEPING_MEMBER:
3379                 case MLD_AWAKENING_MEMBER:
3380                         inm->in6m_state = MLD_REPORTING_MEMBER;
3381                         MLI_LOCK(mli);
3382                         retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3383                             inm, 0, 0, 0, 0);
3384                         MLI_UNLOCK(mli);
3385                         MLD_PRINTF(("%s: enqueue record = %d\n",
3386                             __func__, retval));
3387                         break;
3388                 case MLD_G_QUERY_PENDING_MEMBER:
3389                 case MLD_SG_QUERY_PENDING_MEMBER:
3390                 case MLD_LEAVING_MEMBER:
3391                         break;
3392                 }
3393 next:
3394                 IN6M_UNLOCK(inm);
3395                 IN6_NEXT_MULTI(step, inm);
3396         }
3397         in6_multihead_lock_done();
3398
3399         MLI_LOCK(mli);
3400         mld_dispatch_queue(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3401         MLI_LOCK_ASSERT_HELD(mli);
3402
3403         /*
3404          * Slew transmission of bursts over 1 second intervals.
3405          */
3406         if (mli->mli_gq.ifq_head != NULL) {
3407                 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3408                     MLD_RESPONSE_BURST_INTERVAL);
3409         }
3410
3411         return (mli->mli_v2_timer);
3412 }
3413
3414 /*
3415  * Transmit the next pending message in the output queue.
3416  *
3417  * Must not be called with in6m_lockm or mli_lock held.
3418  */
3419 static void
3420 mld_dispatch_packet(struct mbuf *m)
3421 {
3422         struct ip6_moptions     *im6o;
3423         struct ifnet            *ifp;
3424         struct ifnet            *oifp = NULL;
3425         struct mbuf             *m0;
3426         struct mbuf             *md;
3427         struct ip6_hdr          *ip6;
3428         struct mld_hdr          *mld;
3429         int                      error;
3430         int                      off;
3431         int                      type;
3432
3433         MLD_PRINTF(("%s: transmit 0x%llx\n", __func__,
3434             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3435
3436         /*
3437          * Check if the ifnet is still attached.
3438          */
3439         ifp = mld_restore_context(m);
3440         if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3441                 MLD_PRINTF(("%s: dropped 0x%llx as ifindex %u went away.\n",
3442                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(m),
3443                     (u_int)if_index));
3444                 m_freem(m);
3445                 ip6stat.ip6s_noroute++;
3446                 return;
3447         }
3448
3449         im6o = ip6_allocmoptions(M_WAITOK);
3450         if (im6o == NULL) {
3451                 m_freem(m);
3452                 return;
3453         }
3454
3455         im6o->im6o_multicast_hlim  = 1;
3456 #if MROUTING
3457         im6o->im6o_multicast_loop = (ip6_mrouter != NULL);
3458 #else
3459         im6o->im6o_multicast_loop = 0;
3460 #endif
3461         im6o->im6o_multicast_ifp = ifp;
3462
3463         if (m->m_flags & M_MLDV1) {
3464                 m0 = m;
3465         } else {
3466                 m0 = mld_v2_encap_report(ifp, m);
3467                 if (m0 == NULL) {
3468                         MLD_PRINTF(("%s: dropped 0x%llx\n", __func__,
3469                             (uint64_t)VM_KERNEL_ADDRPERM(m)));
3470                         /*
3471                          * mld_v2_encap_report() has already freed our mbuf.
3472                          */
3473                         IM6O_REMREF(im6o);
3474                         ip6stat.ip6s_odropped++;
3475                         return;
3476                 }
3477         }
3478
3479         mld_scrub_context(m0);
3480         m->m_flags &= ~(M_PROTOFLAGS);
3481         m0->m_pkthdr.rcvif = lo_ifp;
3482
3483         ip6 = mtod(m0, struct ip6_hdr *);
3484         (void) in6_setscope(&ip6->ip6_dst, ifp, NULL);
3485
3486         /*
3487          * Retrieve the ICMPv6 type before handoff to ip6_output(),
3488          * so we can bump the stats.
3489          */
3490         md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3491         mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3492         type = mld->mld_type;
3493
3494         if (ifp->if_eflags & IFEF_TXSTART) {
3495                 /*
3496                  * Use control service class if the outgoing
3497                  * interface supports transmit-start model.
3498                  */
3499                 (void) m_set_service_class(m0, MBUF_SC_CTL);
3500         }
3501
3502         error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
3503             &oifp, NULL);
3504
3505         IM6O_REMREF(im6o);
3506
3507         if (error) {
3508                 MLD_PRINTF(("%s: ip6_output(0x%llx) = %d\n", __func__,
3509                     (uint64_t)VM_KERNEL_ADDRPERM(m0), error));
3510                 if (oifp != NULL)
3511                         ifnet_release(oifp);
3512                 return;
3513         }
3514
3515         icmp6stat.icp6s_outhist[type]++;
3516         if (oifp != NULL) {
3517                 icmp6_ifstat_inc(oifp, ifs6_out_msg);
3518                 switch (type) {
3519                 case MLD_LISTENER_REPORT:
3520                 case MLDV2_LISTENER_REPORT:
3521                         icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3522                         break;
3523                 case MLD_LISTENER_DONE:
3524                         icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3525                         break;
3526                 }
3527                 ifnet_release(oifp);
3528         }
3529 }
3530
3531 /*
3532  * Encapsulate an MLDv2 report.
3533  *
3534  * KAME IPv6 requires that hop-by-hop options be passed separately,
3535  * and that the IPv6 header be prepended in a separate mbuf.
3536  *
3537  * Returns a pointer to the new mbuf chain head, or NULL if the
3538  * allocation failed.
3539  */
3540 static struct mbuf *
3541 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3542 {
3543         struct mbuf             *mh;
3544         struct mldv2_report     *mld;
3545         struct ip6_hdr          *ip6;
3546         struct in6_ifaddr       *ia;
3547         int                      mldreclen;
3548
3549         VERIFY(m->m_flags & M_PKTHDR);
3550
3551         /*
3552          * RFC3590: OK to send as :: or tentative during DAD.
3553          */
3554         ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
3555         if (ia == NULL)
3556                 MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
3557
3558         MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3559         if (mh == NULL) {
3560                 if (ia != NULL)
3561                         IFA_REMREF(&ia->ia_ifa);
3562                 m_freem(m);
3563                 return (NULL);
3564         }
3565         MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3566
3567         mldreclen = m_length(m);
3568         MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
3569
3570         mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3571         mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3572             sizeof(struct mldv2_report) + mldreclen;
3573
3574         ip6 = mtod(mh, struct ip6_hdr *);
3575         ip6->ip6_flow = 0;
3576         ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3577         ip6->ip6_vfc |= IPV6_VERSION;
3578         ip6->ip6_nxt = IPPROTO_ICMPV6;
3579         if (ia != NULL)
3580                 IFA_LOCK(&ia->ia_ifa);
3581         ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3582         if (ia != NULL) {
3583                 IFA_UNLOCK(&ia->ia_ifa);
3584                 IFA_REMREF(&ia->ia_ifa);
3585                 ia = NULL;
3586         }
3587         ip6->ip6_dst = in6addr_linklocal_allv2routers;
3588         /* scope ID will be set in netisr */
3589
3590         mld = (struct mldv2_report *)(ip6 + 1);
3591         mld->mld_type = MLDV2_LISTENER_REPORT;
3592         mld->mld_code = 0;
3593         mld->mld_cksum = 0;
3594         mld->mld_v2_reserved = 0;
3595         mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3596         m->m_pkthdr.vt_nrecs = 0;
3597         m->m_flags &= ~M_PKTHDR;
3598
3599         mh->m_next = m;
3600         mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3601             sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3602         return (mh);
3603 }
3604
3605 #ifdef MLD_DEBUG
3606 static const char *
3607 mld_rec_type_to_str(const int type)
3608 {
3609         switch (type) {
3610                 case MLD_CHANGE_TO_EXCLUDE_MODE:
3611                         return "TO_EX";
3612                         break;
3613                 case MLD_CHANGE_TO_INCLUDE_MODE:
3614                         return "TO_IN";
3615                         break;
3616                 case MLD_MODE_IS_EXCLUDE:
3617                         return "MODE_EX";
3618                         break;
3619                 case MLD_MODE_IS_INCLUDE:
3620                         return "MODE_IN";
3621                         break;
3622                 case MLD_ALLOW_NEW_SOURCES:
3623                         return "ALLOW_NEW";
3624                         break;
3625                 case MLD_BLOCK_OLD_SOURCES:
3626                         return "BLOCK_OLD";
3627                         break;
3628                 default:
3629                         break;
3630         }
3631         return "unknown";
3632 }
3633 #endif
3634
3635 void
3636 mld_init(void)
3637 {
3638
3639         MLD_PRINTF(("%s: initializing\n", __func__));
3640
3641         /* Setup lock group and attribute for mld_mtx */
3642         mld_mtx_grp_attr = lck_grp_attr_alloc_init();
3643         mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
3644         mld_mtx_attr = lck_attr_alloc_init();
3645         lck_mtx_init(&mld_mtx, mld_mtx_grp, mld_mtx_attr);
3646
3647         ip6_initpktopts(&mld_po);
3648         mld_po.ip6po_hlim = 1;
3649         mld_po.ip6po_hbh = &mld_ra.hbh;
3650         mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3651         mld_po.ip6po_flags = IP6PO_DONTFRAG;
3652         LIST_INIT(&mli_head);
3653
3654         mli_size = sizeof (struct mld_ifinfo);
3655         mli_zone = zinit(mli_size, MLI_ZONE_MAX * mli_size,
3656             0, MLI_ZONE_NAME);
3657         if (mli_zone == NULL) {
3658                 panic("%s: failed allocating %s", __func__, MLI_ZONE_NAME);
3659                 /* NOTREACHED */
3660         }
3661         zone_change(mli_zone, Z_EXPAND, TRUE);
3662         zone_change(mli_zone, Z_CALLERACCT, FALSE);
3663 }