git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2015 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*-
	29	* Copyright (c) 2009 Bruce Simpson.
	30	*
	31	* Redistribution and use in source and binary forms, with or without
	32	* modification, are permitted provided that the following conditions
	33	* are met:
	34	* 1. Redistributions of source code must retain the above copyright
	35	* notice, this list of conditions and the following disclaimer.
	36	* 2. Redistributions in binary form must reproduce the above copyright
	37	* notice, this list of conditions and the following disclaimer in the
	38	* documentation and/or other materials provided with the distribution.
	39	* 3. The name of the author may not be used to endorse or promote
	40	* products derived from this software without specific prior written
	41	* permission.
	42	*
	43	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
	44	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	45	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	46	* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
	47	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	48	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	49	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	50	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	51	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	52	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	53	* SUCH DAMAGE.
	54	*/
	55
	56	/*
	57	* Copyright (c) 1988 Stephen Deering.
	58	* Copyright (c) 1992, 1993
	59	* The Regents of the University of California. All rights reserved.
	60	*
	61	* This code is derived from software contributed to Berkeley by
	62	* Stephen Deering of Stanford University.
	63	*
	64	* Redistribution and use in source and binary forms, with or without
	65	* modification, are permitted provided that the following conditions
	66	* are met:
	67	* 1. Redistributions of source code must retain the above copyright
	68	* notice, this list of conditions and the following disclaimer.
	69	* 2. Redistributions in binary form must reproduce the above copyright
	70	* notice, this list of conditions and the following disclaimer in the
	71	* documentation and/or other materials provided with the distribution.
	72	* 3. All advertising materials mentioning features or use of this software
	73	* must display the following acknowledgement:
	74	* This product includes software developed by the University of
	75	* California, Berkeley and its contributors.
	76	* 4. Neither the name of the University nor the names of its contributors
	77	* may be used to endorse or promote products derived from this software
	78	* without specific prior written permission.
	79	*
	80	* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
	81	* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	82	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	83	* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
	84	* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
	85	* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
	86	* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
	87	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
	88	* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
	89	* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
	90	* SUCH DAMAGE.
	91	*
	92	* @(#)igmp.c 8.1 (Berkeley) 7/19/93
	93	*/
	94	/*
	95	* NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
	96	* support for mandatory and extensible security protections. This notice
	97	* is included in support of clause 2.2 (b) of the Apple Public License,
	98	* Version 2.0.
	99	*/
	100
	101	#include <sys/cdefs.h>
	102
	103	#include <sys/param.h>
	104	#include <sys/systm.h>
	105	#include <sys/mbuf.h>
	106	#include <sys/socket.h>
	107	#include <sys/protosw.h>
	108	#include <sys/sysctl.h>
	109	#include <sys/kernel.h>
	110	#include <sys/malloc.h>
	111	#include <sys/mcache.h>
	112
	113	#include <dev/random/randomdev.h>
	114
	115	#include <kern/zalloc.h>
	116
	117	#include <net/if.h>
	118	#include <net/route.h>
	119
	120	#include <netinet/in.h>
	121	#include <netinet/in_var.h>
	122	#include <netinet6/in6_var.h>
	123	#include <netinet/ip6.h>
	124	#include <netinet6/ip6_var.h>
	125	#include <netinet6/scope6_var.h>
	126	#include <netinet/icmp6.h>
	127	#include <netinet6/mld6.h>
	128	#include <netinet6/mld6_var.h>
	129
	130	/* Lock group and attribute for mld_mtx */
	131	static lck_attr_t *mld_mtx_attr;
	132	static lck_grp_t *mld_mtx_grp;
	133	static lck_grp_attr_t *mld_mtx_grp_attr;
	134
	135	/*
	136	* Locking and reference counting:
	137	*
	138	* mld_mtx mainly protects mli_head. In cases where both mld_mtx and
	139	* in6_multihead_lock must be held, the former must be acquired first in order
	140	* to maintain lock ordering. It is not a requirement that mld_mtx be
	141	* acquired first before in6_multihead_lock, but in case both must be acquired
	142	* in succession, the correct lock ordering must be followed.
	143	*
	144	* Instead of walking the if_multiaddrs list at the interface and returning
	145	* the ifma_protospec value of a matching entry, we search the global list
	146	* of in6_multi records and find it that way; this is done with in6_multihead
	147	* lock held. Doing so avoids the race condition issues that many other BSDs
	148	* suffer from (therefore in our implementation, ifma_protospec will never be
	149	* NULL for as long as the in6_multi is valid.)
	150	*
	151	* The above creates a requirement for the in6_multi to stay in in6_multihead
	152	* list even after the final MLD leave (in MLDv2 mode) until no longer needs
	153	* be retransmitted (this is not required for MLDv1.) In order to handle
	154	* this, the request and reference counts of the in6_multi are bumped up when
	155	* the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
	156	* handler. Each in6_multi holds a reference to the underlying mld_ifinfo.
	157	*
	158	* Thus, the permitted lock order is:
	159	*
	160	* mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
	161	*
	162	* Any may be taken independently, but if any are held at the same time,
	163	* the above lock order must be followed.
	164	*/
	165	static decl_lck_mtx_data(, mld_mtx);
	166
	167	SLIST_HEAD(mld_in6m_relhead, in6_multi);
	168
	169	static void mli_initvar(struct mld_ifinfo , struct ifnet , int);
	170	static struct mld_ifinfo *mli_alloc(int);
	171	static void mli_free(struct mld_ifinfo *);
	172	static void mli_delete(const struct ifnet , struct mld_in6m_relhead );
	173	static void mld_dispatch_packet(struct mbuf *);
	174	static void mld_final_leave(struct in6_multi , struct mld_ifinfo ,
	175	struct mld_tparams *);
	176	static int mld_handle_state_change(struct in6_multi , struct mld_ifinfo ,
	177	struct mld_tparams *);
	178	static int mld_initial_join(struct in6_multi , struct mld_ifinfo ,
	179	struct mld_tparams *, const int);
	180	#ifdef MLD_DEBUG
	181	static const char * mld_rec_type_to_str(const int);
	182	#endif
	183	static uint32_t mld_set_version(struct mld_ifinfo *, const int);
	184	static void mld_flush_relq(struct mld_ifinfo , struct mld_in6m_relhead );
	185	static void mld_dispatch_queue(struct mld_ifinfo , struct ifqueue , int);
	186	static int mld_v1_input_query(struct ifnet , const struct ip6_hdr ,
	187	/const/ struct mld_hdr *);
	188	static int mld_v1_input_report(struct ifnet , struct mbuf ,
	189	const struct ip6_hdr , /const/ struct mld_hdr );
	190	static void mld_v1_process_group_timer(struct in6_multi *, const int);
	191	static void mld_v1_process_querier_timers(struct mld_ifinfo *);
	192	static int mld_v1_transmit_report(struct in6_multi *, const int);
	193	static uint32_t mld_v1_update_group(struct in6_multi *, const int);
	194	static void mld_v2_cancel_link_timers(struct mld_ifinfo *);
	195	static uint32_t mld_v2_dispatch_general_query(struct mld_ifinfo *);
	196	static struct mbuf *
	197	mld_v2_encap_report(struct ifnet , struct mbuf );
	198	static int mld_v2_enqueue_filter_change(struct ifqueue *,
	199	struct in6_multi *);
	200	static int mld_v2_enqueue_group_record(struct ifqueue *,
	201	struct in6_multi *, const int, const int, const int,
	202	const int);
	203	static int mld_v2_input_query(struct ifnet , const struct ip6_hdr ,
	204	struct mbuf *, const int, const int);
	205	static int mld_v2_merge_state_changes(struct in6_multi *,
	206	struct ifqueue *);
	207	static void mld_v2_process_group_timers(struct mld_ifinfo *,
	208	struct ifqueue , struct ifqueue ,
	209	struct in6_multi *, const int);
	210	static int mld_v2_process_group_query(struct in6_multi *,
	211	int, struct mbuf *, const int);
	212	static int sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
	213	static int sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
	214	static int sysctl_mld_v2enable SYSCTL_HANDLER_ARGS;
	215
	216	static int mld_timeout_run; /* MLD timer is scheduled to run */
	217	static void mld_timeout(void *);
	218	static void mld_sched_timeout(void);
	219
	220	/*
	221	* Normative references: RFC 2710, RFC 3590, RFC 3810.
	222	*/
	223	static struct timeval mld_gsrdelay = {10, 0};
	224	static LIST_HEAD(, mld_ifinfo) mli_head;
	225
	226	static int querier_present_timers_running6;
	227	static int interface_timers_running6;
	228	static int state_change_timers_running6;
	229	static int current_state_timers_running6;
	230
	231	/*
	232	* Subsystem lock macros.
	233	*/
	234	#define MLD_LOCK() \
	235	lck_mtx_lock(&mld_mtx)
	236	#define MLD_LOCK_ASSERT_HELD() \
	237	lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_OWNED)
	238	#define MLD_LOCK_ASSERT_NOTHELD() \
	239	lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
	240	#define MLD_UNLOCK() \
	241	lck_mtx_unlock(&mld_mtx)
	242
	243	#define MLD_ADD_DETACHED_IN6M(_head, _in6m) { \
	244	SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle); \
	245	}
	246
	247	#define MLD_REMOVE_DETACHED_IN6M(_head) { \
	248	struct in6_multi _in6m, _inm_tmp; \
	249	SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) { \
	250	SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle); \
	251	IN6M_REMREF(_in6m); \
	252	} \
	253	VERIFY(SLIST_EMPTY(_head)); \
	254	}
	255
	256	#define MLI_ZONE_MAX 64 /* maximum elements in zone */
	257	#define MLI_ZONE_NAME "mld_ifinfo" /* zone name */
	258
	259	static unsigned int mli_size; /* size of zone element */
	260	static struct zone mli_zone; / zone for mld_ifinfo */
	261
	262	SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */
	263
	264	SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW \| CTLFLAG_LOCKED, 0,
	265	"IPv6 Multicast Listener Discovery");
	266	SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
	267	CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	268	&mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
	269	"Rate limit for MLDv2 Group-and-Source queries in seconds");
	270
	271	SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD \| CTLFLAG_LOCKED,
	272	sysctl_mld_ifinfo, "Per-interface MLDv2 state");
	273
	274	static int mld_v1enable = 1;
	275	SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW \| CTLFLAG_LOCKED,
	276	&mld_v1enable, 0, "Enable fallback to MLDv1");
	277
	278	static int mld_v2enable = 1;
	279	SYSCTL_PROC(_net_inet6_mld, OID_AUTO, v2enable,
	280	CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	281	&mld_v2enable, 0, sysctl_mld_v2enable, "I",
	282	"Enable MLDv2 (debug purposes only)");
	283
	284	static int mld_use_allow = 1;
	285	SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW \| CTLFLAG_LOCKED,
	286	&mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
	287
	288	#ifdef MLD_DEBUG
	289	int mld_debug = 0;
	290	SYSCTL_INT(_net_inet6_mld, OID_AUTO,
	291	debug, CTLFLAG_RW \| CTLFLAG_LOCKED, &mld_debug, 0, "");
	292	#endif
	293	/*
	294	* Packed Router Alert option structure declaration.
	295	*/
	296	struct mld_raopt {
	297	struct ip6_hbh hbh;
	298	struct ip6_opt pad;
	299	struct ip6_opt_router ra;
	300	} __packed;
	301
	302	/*
	303	* Router Alert hop-by-hop option header.
	304	*/
	305	static struct mld_raopt mld_ra = {
	306	.hbh = { 0, 0 },
	307	.pad = { .ip6o_type = IP6OPT_PADN, 0 },
	308	.ra = {
	309	.ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
	310	.ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
	311	.ip6or_value = {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
	312	(IP6OPT_RTALERT_MLD & 0xFF) }
	313	}
	314	};
	315	static struct ip6_pktopts mld_po;
	316
	317	/* Store MLDv2 record count in the module private scratch space */
	318	#define vt_nrecs pkt_mpriv.__mpriv_u.__mpriv32[0].__mpriv32_u.__val16[0]
	319
	320	static __inline void
	321	mld_save_context(struct mbuf m, struct ifnet ifp)
	322	{
	323	m->m_pkthdr.rcvif = ifp;
	324	}
	325
	326	static __inline void
	327	mld_scrub_context(struct mbuf *m)
	328	{
	329	m->m_pkthdr.rcvif = NULL;
	330	}
	331
	332	/*
	333	* Restore context from a queued output chain.
	334	* Return saved ifp.
	335	*/
	336	static __inline struct ifnet *
	337	mld_restore_context(struct mbuf *m)
	338	{
	339	return (m->m_pkthdr.rcvif);
	340	}
	341
	342	/*
	343	* Retrieve or set threshold between group-source queries in seconds.
	344	*/
	345	static int
	346	sysctl_mld_gsr SYSCTL_HANDLER_ARGS
	347	{
	348	#pragma unused(arg1, arg2)
	349	int error;
	350	int i;
	351
	352	MLD_LOCK();
	353
	354	i = mld_gsrdelay.tv_sec;
	355
	356	error = sysctl_handle_int(oidp, &i, 0, req);
	357	if (error \|\| !req->newptr)
	358	goto out_locked;
	359
	360	if (i < -1 \|\| i >= 60) {
	361	error = EINVAL;
	362	goto out_locked;
	363	}
	364
	365	mld_gsrdelay.tv_sec = i;
	366
	367	out_locked:
	368	MLD_UNLOCK();
	369	return (error);
	370	}
	371	/*
	372	* Expose struct mld_ifinfo to userland, keyed by ifindex.
	373	* For use by ifmcstat(8).
	374	*
	375	*/
	376	static int
	377	sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
	378	{
	379	#pragma unused(oidp)
	380	int *name;
	381	int error;
	382	u_int namelen;
	383	struct ifnet *ifp;
	384	struct mld_ifinfo *mli;
	385	struct mld_ifinfo_u mli_u;
	386
	387	name = (int *)arg1;
	388	namelen = arg2;
	389
	390	if (req->newptr != USER_ADDR_NULL)
	391	return (EPERM);
	392
	393	if (namelen != 1)
	394	return (EINVAL);
	395
	396	MLD_LOCK();
	397
	398	if (name[0] <= 0 \|\| name[0] > (u_int)if_index) {
	399	error = ENOENT;
	400	goto out_locked;
	401	}
	402
	403	error = ENOENT;
	404
	405	ifnet_head_lock_shared();
	406	ifp = ifindex2ifnet[name[0]];
	407	ifnet_head_done();
	408	if (ifp == NULL)
	409	goto out_locked;
	410
	411	bzero(&mli_u, sizeof (mli_u));
	412
	413	LIST_FOREACH(mli, &mli_head, mli_link) {
	414	MLI_LOCK(mli);
	415	if (ifp != mli->mli_ifp) {
	416	MLI_UNLOCK(mli);
	417	continue;
	418	}
	419
	420	mli_u.mli_ifindex = mli->mli_ifp->if_index;
	421	mli_u.mli_version = mli->mli_version;
	422	mli_u.mli_v1_timer = mli->mli_v1_timer;
	423	mli_u.mli_v2_timer = mli->mli_v2_timer;
	424	mli_u.mli_flags = mli->mli_flags;
	425	mli_u.mli_rv = mli->mli_rv;
	426	mli_u.mli_qi = mli->mli_qi;
	427	mli_u.mli_qri = mli->mli_qri;
	428	mli_u.mli_uri = mli->mli_uri;
	429	MLI_UNLOCK(mli);
	430
	431	error = SYSCTL_OUT(req, &mli_u, sizeof (mli_u));
	432	break;
	433	}
	434
	435	out_locked:
	436	MLD_UNLOCK();
	437	return (error);
	438	}
	439
	440	static int
	441	sysctl_mld_v2enable SYSCTL_HANDLER_ARGS
	442	{
	443	#pragma unused(arg1, arg2)
	444	int error;
	445	int i;
	446	struct mld_ifinfo *mli;
	447	struct mld_tparams mtp = { 0, 0, 0, 0 };
	448
	449	MLD_LOCK();
	450
	451	i = mld_v2enable;
	452
	453	error = sysctl_handle_int(oidp, &i, 0, req);
	454	if (error \|\| !req->newptr)
	455	goto out_locked;
	456
	457	if (i < 0 \|\| i > 1) {
	458	error = EINVAL;
	459	goto out_locked;
	460	}
	461
	462	mld_v2enable = i;
	463	/*
	464	* If we enabled v2, the state transition will take care of upgrading
	465	* the MLD version back to v2. Otherwise, we have to explicitly
	466	* downgrade. Note that this functionality is to be used for debugging.
	467	*/
	468	if (mld_v2enable == 1)
	469	goto out_locked;
	470
	471	LIST_FOREACH(mli, &mli_head, mli_link) {
	472	MLI_LOCK(mli);
	473	if (mld_set_version(mli, MLD_VERSION_1) > 0)
	474	mtp.qpt = 1;
	475	MLI_UNLOCK(mli);
	476	}
	477
	478	out_locked:
	479	MLD_UNLOCK();
	480
	481	mld_set_timeout(&mtp);
	482
	483	return (error);
	484	}
	485
	486	/*
	487	* Dispatch an entire queue of pending packet chains.
	488	*
	489	* Must not be called with in6m_lock held.
	490	*/
	491	static void
	492	mld_dispatch_queue(struct mld_ifinfo mli, struct ifqueue ifq, int limit)
	493	{
	494	struct mbuf *m;
	495
	496	if (mli != NULL)
	497	MLI_LOCK_ASSERT_HELD(mli);
	498
	499	for (;;) {
	500	IF_DEQUEUE(ifq, m);
	501	if (m == NULL)
	502	break;
	503	MLD_PRINTF(("%s: dispatch 0x%llx from 0x%llx\n", __func__,
	504	(uint64_t)VM_KERNEL_ADDRPERM(ifq),
	505	(uint64_t)VM_KERNEL_ADDRPERM(m)));
	506	if (mli != NULL)
	507	MLI_UNLOCK(mli);
	508	mld_dispatch_packet(m);
	509	if (mli != NULL)
	510	MLI_LOCK(mli);
	511	if (--limit == 0)
	512	break;
	513	}
	514
	515	if (mli != NULL)
	516	MLI_LOCK_ASSERT_HELD(mli);
	517	}
	518
	519	/*
	520	* Filter outgoing MLD report state by group.
	521	*
	522	* Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
	523	* and node-local addresses. However, kernel and socket consumers
	524	* always embed the KAME scope ID in the address provided, so strip it
	525	* when performing comparison.
	526	* Note: This is not the same as the multicast scope.
	527	*
	528	* Return zero if the given group is one for which MLD reports
	529	* should be suppressed, or non-zero if reports should be issued.
	530	*/
	531	static __inline__ int
	532	mld_is_addr_reported(const struct in6_addr *addr)
	533	{
	534
	535	VERIFY(IN6_IS_ADDR_MULTICAST(addr));
	536
	537	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
	538	return (0);
	539
	540	if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
	541	struct in6_addr tmp = *addr;
	542	in6_clearscope(&tmp);
	543	if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
	544	return (0);
	545	}
	546
	547	return (1);
	548	}
	549
	550	/*
	551	* Attach MLD when PF_INET6 is attached to an interface.
	552	*/
	553	struct mld_ifinfo *
	554	mld_domifattach(struct ifnet *ifp, int how)
	555	{
	556	struct mld_ifinfo *mli;
	557
	558	MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
	559	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	560
	561	mli = mli_alloc(how);
	562	if (mli == NULL)
	563	return (NULL);
	564
	565	MLD_LOCK();
	566
	567	MLI_LOCK(mli);
	568	mli_initvar(mli, ifp, 0);
	569	mli->mli_debug \|= IFD_ATTACHED;
	570	MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
	571	MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
	572	MLI_UNLOCK(mli);
	573	ifnet_lock_shared(ifp);
	574	mld6_initsilent(ifp, mli);
	575	ifnet_lock_done(ifp);
	576
	577	LIST_INSERT_HEAD(&mli_head, mli, mli_link);
	578
	579	MLD_UNLOCK();
	580
	581	MLD_PRINTF(("%s: allocate mld_ifinfo for ifp 0x%llx(%s)\n",
	582	__func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	583
	584	return (mli);
	585	}
	586
	587	/*
	588	* Attach MLD when PF_INET6 is reattached to an interface. Caller is
	589	* expected to have an outstanding reference to the mli.
	590	*/
	591	void
	592	mld_domifreattach(struct mld_ifinfo *mli)
	593	{
	594	struct ifnet *ifp;
	595
	596	MLD_LOCK();
	597
	598	MLI_LOCK(mli);
	599	VERIFY(!(mli->mli_debug & IFD_ATTACHED));
	600	ifp = mli->mli_ifp;
	601	VERIFY(ifp != NULL);
	602	mli_initvar(mli, ifp, 1);
	603	mli->mli_debug \|= IFD_ATTACHED;
	604	MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
	605	MLI_UNLOCK(mli);
	606	ifnet_lock_shared(ifp);
	607	mld6_initsilent(ifp, mli);
	608	ifnet_lock_done(ifp);
	609
	610	LIST_INSERT_HEAD(&mli_head, mli, mli_link);
	611
	612	MLD_UNLOCK();
	613
	614	MLD_PRINTF(("%s: reattached mld_ifinfo for ifp 0x%llx(%s)\n",
	615	__func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	616	}
	617
	618	/*
	619	* Hook for domifdetach.
	620	*/
	621	void
	622	mld_domifdetach(struct ifnet *ifp)
	623	{
	624	SLIST_HEAD(, in6_multi) in6m_dthead;
	625
	626	SLIST_INIT(&in6m_dthead);
	627
	628	MLD_PRINTF(("%s: called for ifp 0x%llx(%s)\n", __func__,
	629	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	630
	631	MLD_LOCK();
	632	mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
	633	MLD_UNLOCK();
	634
	635	/* Now that we're dropped all locks, release detached records */
	636	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
	637	}
	638
	639	/*
	640	* Called at interface detach time. Note that we only flush all deferred
	641	* responses and record releases; all remaining inm records and their source
	642	* entries related to this interface are left intact, in order to handle
	643	* the reattach case.
	644	*/
	645	static void
	646	mli_delete(const struct ifnet ifp, struct mld_in6m_relhead in6m_dthead)
	647	{
	648	struct mld_ifinfo mli, tmli;
	649
	650	MLD_LOCK_ASSERT_HELD();
	651
	652	LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
	653	MLI_LOCK(mli);
	654	if (mli->mli_ifp == ifp) {
	655	/*
	656	* Free deferred General Query responses.
	657	*/
	658	IF_DRAIN(&mli->mli_gq);
	659	IF_DRAIN(&mli->mli_v1q);
	660	mld_flush_relq(mli, in6m_dthead);
	661	VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
	662	mli->mli_debug &= ~IFD_ATTACHED;
	663	MLI_UNLOCK(mli);
	664
	665	LIST_REMOVE(mli, mli_link);
	666	MLI_REMREF(mli); /* release mli_head reference */
	667	return;
	668	}
	669	MLI_UNLOCK(mli);
	670	}
	671	panic("%s: mld_ifinfo not found for ifp %p(%s)\n", __func__,
	672	ifp, ifp->if_xname);
	673	}
	674
	675	__private_extern__ void
	676	mld6_initsilent(struct ifnet ifp, struct mld_ifinfo mli)
	677	{
	678	ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
	679
	680	MLI_LOCK_ASSERT_NOTHELD(mli);
	681	MLI_LOCK(mli);
	682	if (!(ifp->if_flags & IFF_MULTICAST) &&
	683	(ifp->if_eflags & (IFEF_IPV6_ND6ALT\|IFEF_LOCALNET_PRIVATE)))
	684	mli->mli_flags \|= MLIF_SILENT;
	685	else
	686	mli->mli_flags &= ~MLIF_SILENT;
	687	MLI_UNLOCK(mli);
	688	}
	689
	690	static void
	691	mli_initvar(struct mld_ifinfo mli, struct ifnet ifp, int reattach)
	692	{
	693	MLI_LOCK_ASSERT_HELD(mli);
	694
	695	mli->mli_ifp = ifp;
	696	if (mld_v2enable)
	697	mli->mli_version = MLD_VERSION_2;
	698	else
	699	mli->mli_version = MLD_VERSION_1;
	700	mli->mli_flags = 0;
	701	mli->mli_rv = MLD_RV_INIT;
	702	mli->mli_qi = MLD_QI_INIT;
	703	mli->mli_qri = MLD_QRI_INIT;
	704	mli->mli_uri = MLD_URI_INIT;
	705
	706	if (mld_use_allow)
	707	mli->mli_flags \|= MLIF_USEALLOW;
	708	if (!reattach)
	709	SLIST_INIT(&mli->mli_relinmhead);
	710
	711	/*
	712	* Responses to general queries are subject to bounds.
	713	*/
	714	mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
	715	mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
	716	}
	717
	718	static struct mld_ifinfo *
	719	mli_alloc(int how)
	720	{
	721	struct mld_ifinfo *mli;
	722
	723	mli = (how == M_WAITOK) ? zalloc(mli_zone) : zalloc_noblock(mli_zone);
	724	if (mli != NULL) {
	725	bzero(mli, mli_size);
	726	lck_mtx_init(&mli->mli_lock, mld_mtx_grp, mld_mtx_attr);
	727	mli->mli_debug \|= IFD_ALLOC;
	728	}
	729	return (mli);
	730	}
	731
	732	static void
	733	mli_free(struct mld_ifinfo *mli)
	734	{
	735	MLI_LOCK(mli);
	736	if (mli->mli_debug & IFD_ATTACHED) {
	737	panic("%s: attached mli=%p is being freed", __func__, mli);
	738	/* NOTREACHED */
	739	} else if (mli->mli_ifp != NULL) {
	740	panic("%s: ifp not NULL for mli=%p", __func__, mli);
	741	/* NOTREACHED */
	742	} else if (!(mli->mli_debug & IFD_ALLOC)) {
	743	panic("%s: mli %p cannot be freed", __func__, mli);
	744	/* NOTREACHED */
	745	} else if (mli->mli_refcnt != 0) {
	746	panic("%s: non-zero refcnt mli=%p", __func__, mli);
	747	/* NOTREACHED */
	748	}
	749	mli->mli_debug &= ~IFD_ALLOC;
	750	MLI_UNLOCK(mli);
	751
	752	lck_mtx_destroy(&mli->mli_lock, mld_mtx_grp);
	753	zfree(mli_zone, mli);
	754	}
	755
	756	void
	757	mli_addref(struct mld_ifinfo *mli, int locked)
	758	{
	759	if (!locked)
	760	MLI_LOCK_SPIN(mli);
	761	else
	762	MLI_LOCK_ASSERT_HELD(mli);
	763
	764	if (++mli->mli_refcnt == 0) {
	765	panic("%s: mli=%p wraparound refcnt", __func__, mli);
	766	/* NOTREACHED */
	767	}
	768	if (!locked)
	769	MLI_UNLOCK(mli);
	770	}
	771
	772	void
	773	mli_remref(struct mld_ifinfo *mli)
	774	{
	775	SLIST_HEAD(, in6_multi) in6m_dthead;
	776	struct ifnet *ifp;
	777
	778	MLI_LOCK_SPIN(mli);
	779
	780	if (mli->mli_refcnt == 0) {
	781	panic("%s: mli=%p negative refcnt", __func__, mli);
	782	/* NOTREACHED */
	783	}
	784
	785	--mli->mli_refcnt;
	786	if (mli->mli_refcnt > 0) {
	787	MLI_UNLOCK(mli);
	788	return;
	789	}
	790
	791	ifp = mli->mli_ifp;
	792	mli->mli_ifp = NULL;
	793	IF_DRAIN(&mli->mli_gq);
	794	IF_DRAIN(&mli->mli_v1q);
	795	SLIST_INIT(&in6m_dthead);
	796	mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
	797	VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
	798	MLI_UNLOCK(mli);
	799
	800	/* Now that we're dropped all locks, release detached records */
	801	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
	802
	803	MLD_PRINTF(("%s: freeing mld_ifinfo for ifp 0x%llx(%s)\n",
	804	__func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	805
	806	mli_free(mli);
	807	}
	808
	809	/*
	810	* Process a received MLDv1 general or address-specific query.
	811	* Assumes that the query header has been pulled up to sizeof(mld_hdr).
	812	*
	813	* NOTE: Can't be fully const correct as we temporarily embed scope ID in
	814	* mld_addr. This is OK as we own the mbuf chain.
	815	*/
	816	static int
	817	mld_v1_input_query(struct ifnet ifp, const struct ip6_hdr ip6,
	818	/const/ struct mld_hdr *mld)
	819	{
	820	struct mld_ifinfo *mli;
	821	struct in6_multi *inm;
	822	int err = 0, is_general_query;
	823	uint16_t timer;
	824	struct mld_tparams mtp = { 0, 0, 0, 0 };
	825
	826	MLD_LOCK_ASSERT_NOTHELD();
	827
	828	is_general_query = 0;
	829
	830	if (!mld_v1enable) {
	831	MLD_PRINTF(("%s: ignore v1 query %s on ifp 0x%llx(%s)\n",
	832	__func__, ip6_sprintf(&mld->mld_addr),
	833	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	834	goto done;
	835	}
	836
	837	/*
	838	* RFC3810 Section 6.2: MLD queries must originate from
	839	* a router's link-local address.
	840	*/
	841	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
	842	MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
	843	__func__, ip6_sprintf(&ip6->ip6_src),
	844	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	845	goto done;
	846	}
	847
	848	/*
	849	* Do address field validation upfront before we accept
	850	* the query.
	851	*/
	852	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
	853	/*
	854	* MLDv1 General Query.
	855	* If this was not sent to the all-nodes group, ignore it.
	856	*/
	857	struct in6_addr dst;
	858
	859	dst = ip6->ip6_dst;
	860	in6_clearscope(&dst);
	861	if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) {
	862	err = EINVAL;
	863	goto done;
	864	}
	865	is_general_query = 1;
	866	} else {
	867	/*
	868	* Embed scope ID of receiving interface in MLD query for
	869	* lookup whilst we don't hold other locks.
	870	*/
	871	in6_setscope(&mld->mld_addr, ifp, NULL);
	872	}
	873
	874	/*
	875	* Switch to MLDv1 host compatibility mode.
	876	*/
	877	mli = MLD_IFINFO(ifp);
	878	VERIFY(mli != NULL);
	879
	880	MLI_LOCK(mli);
	881	mtp.qpt = mld_set_version(mli, MLD_VERSION_1);
	882	MLI_UNLOCK(mli);
	883
	884	timer = ntohs(mld->mld_maxdelay) / MLD_TIMER_SCALE;
	885	if (timer == 0)
	886	timer = 1;
	887
	888	if (is_general_query) {
	889	struct in6_multistep step;
	890
	891	MLD_PRINTF(("%s: process v1 general query on ifp 0x%llx(%s)\n",
	892	__func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	893	/*
	894	* For each reporting group joined on this
	895	* interface, kick the report timer.
	896	*/
	897	in6_multihead_lock_shared();
	898	IN6_FIRST_MULTI(step, inm);
	899	while (inm != NULL) {
	900	IN6M_LOCK(inm);
	901	if (inm->in6m_ifp == ifp)
	902	mtp.cst += mld_v1_update_group(inm, timer);
	903	IN6M_UNLOCK(inm);
	904	IN6_NEXT_MULTI(step, inm);
	905	}
	906	in6_multihead_lock_done();
	907	} else {
	908	/*
	909	* MLDv1 Group-Specific Query.
	910	* If this is a group-specific MLDv1 query, we need only
	911	* look up the single group to process it.
	912	*/
	913	in6_multihead_lock_shared();
	914	IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
	915	in6_multihead_lock_done();
	916
	917	if (inm != NULL) {
	918	IN6M_LOCK(inm);
	919	MLD_PRINTF(("%s: process v1 query %s on "
	920	"ifp 0x%llx(%s)\n", __func__,
	921	ip6_sprintf(&mld->mld_addr),
	922	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	923	mtp.cst = mld_v1_update_group(inm, timer);
	924	IN6M_UNLOCK(inm);
	925	IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
	926	}
	927	/* XXX Clear embedded scope ID as userland won't expect it. */
	928	in6_clearscope(&mld->mld_addr);
	929	}
	930	done:
	931	mld_set_timeout(&mtp);
	932
	933	return (err);
	934	}
	935
	936	/*
	937	* Update the report timer on a group in response to an MLDv1 query.
	938	*
	939	* If we are becoming the reporting member for this group, start the timer.
	940	* If we already are the reporting member for this group, and timer is
	941	* below the threshold, reset it.
	942	*
	943	* We may be updating the group for the first time since we switched
	944	* to MLDv2. If we are, then we must clear any recorded source lists,
	945	* and transition to REPORTING state; the group timer is overloaded
	946	* for group and group-source query responses.
	947	*
	948	* Unlike MLDv2, the delay per group should be jittered
	949	* to avoid bursts of MLDv1 reports.
	950	*/
	951	static uint32_t
	952	mld_v1_update_group(struct in6_multi *inm, const int timer)
	953	{
	954	IN6M_LOCK_ASSERT_HELD(inm);
	955
	956	MLD_PRINTF(("%s: %s/%s timer=%d\n", __func__,
	957	ip6_sprintf(&inm->in6m_addr),
	958	if_name(inm->in6m_ifp), timer));
	959
	960	switch (inm->in6m_state) {
	961	case MLD_NOT_MEMBER:
	962	case MLD_SILENT_MEMBER:
	963	break;
	964	case MLD_REPORTING_MEMBER:
	965	if (inm->in6m_timer != 0 &&
	966	inm->in6m_timer <= timer) {
	967	MLD_PRINTF(("%s: REPORTING and timer running, "
	968	"skipping.\n", __func__));
	969	break;
	970	}
	971	/* FALLTHROUGH */
	972	case MLD_SG_QUERY_PENDING_MEMBER:
	973	case MLD_G_QUERY_PENDING_MEMBER:
	974	case MLD_IDLE_MEMBER:
	975	case MLD_LAZY_MEMBER:
	976	case MLD_AWAKENING_MEMBER:
	977	MLD_PRINTF(("%s: ->REPORTING\n", __func__));
	978	inm->in6m_state = MLD_REPORTING_MEMBER;
	979	inm->in6m_timer = MLD_RANDOM_DELAY(timer);
	980	break;
	981	case MLD_SLEEPING_MEMBER:
	982	MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
	983	inm->in6m_state = MLD_AWAKENING_MEMBER;
	984	break;
	985	case MLD_LEAVING_MEMBER:
	986	break;
	987	}
	988
	989	return (inm->in6m_timer);
	990	}
	991
	992	/*
	993	* Process a received MLDv2 general, group-specific or
	994	* group-and-source-specific query.
	995	*
	996	* Assumes that the query header has been pulled up to sizeof(mldv2_query).
	997	*
	998	* Return 0 if successful, otherwise an appropriate error code is returned.
	999	*/
	1000	static int
	1001	mld_v2_input_query(struct ifnet ifp, const struct ip6_hdr ip6,
	1002	struct mbuf *m, const int off, const int icmp6len)
	1003	{
	1004	struct mld_ifinfo *mli;
	1005	struct mldv2_query *mld;
	1006	struct in6_multi *inm;
	1007	uint32_t maxdelay, nsrc, qqi;
	1008	int err = 0, is_general_query;
	1009	uint16_t timer;
	1010	uint8_t qrv;
	1011	struct mld_tparams mtp = { 0, 0, 0, 0 };
	1012
	1013	MLD_LOCK_ASSERT_NOTHELD();
	1014
	1015	is_general_query = 0;
	1016
	1017	if (!mld_v2enable) {
	1018	MLD_PRINTF(("%s: ignore v2 query %s on ifp 0x%llx(%s)\n",
	1019	__func__, ip6_sprintf(&ip6->ip6_src),
	1020	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1021	goto done;
	1022	}
	1023
	1024	/*
	1025	* RFC3810 Section 6.2: MLD queries must originate from
	1026	* a router's link-local address.
	1027	*/
	1028	if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
	1029	MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
	1030	__func__, ip6_sprintf(&ip6->ip6_src),
	1031	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1032	goto done;
	1033	}
	1034
	1035	MLD_PRINTF(("%s: input v2 query on ifp 0x%llx(%s)\n", __func__,
	1036	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1037
	1038	mld = (struct mldv2_query )(mtod(m, uint8_t ) + off);
	1039
	1040	maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */
	1041	if (maxdelay >= 32768) {
	1042	maxdelay = (MLD_MRC_MANT(maxdelay) \| 0x1000) <<
	1043	(MLD_MRC_EXP(maxdelay) + 3);
	1044	}
	1045	timer = maxdelay / MLD_TIMER_SCALE;
	1046	if (timer == 0)
	1047	timer = 1;
	1048
	1049	qrv = MLD_QRV(mld->mld_misc);
	1050	if (qrv < 2) {
	1051	MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
	1052	qrv, MLD_RV_INIT));
	1053	qrv = MLD_RV_INIT;
	1054	}
	1055
	1056	qqi = mld->mld_qqi;
	1057	if (qqi >= 128) {
	1058	qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
	1059	(MLD_QQIC_EXP(mld->mld_qqi) + 3);
	1060	}
	1061
	1062	nsrc = ntohs(mld->mld_numsrc);
	1063	if (nsrc > MLD_MAX_GS_SOURCES) {
	1064	err = EMSGSIZE;
	1065	goto done;
	1066	}
	1067	if (icmp6len < sizeof(struct mldv2_query) +
	1068	(nsrc * sizeof(struct in6_addr))) {
	1069	err = EMSGSIZE;
	1070	goto done;
	1071	}
	1072
	1073	/*
	1074	* Do further input validation upfront to avoid resetting timers
	1075	* should we need to discard this query.
	1076	*/
	1077	if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
	1078	/*
	1079	* A general query with a source list has undefined
	1080	* behaviour; discard it.
	1081	*/
	1082	if (nsrc > 0) {
	1083	err = EINVAL;
	1084	goto done;
	1085	}
	1086	is_general_query = 1;
	1087	} else {
	1088	/*
	1089	* Embed scope ID of receiving interface in MLD query for
	1090	* lookup whilst we don't hold other locks (due to KAME
	1091	* locking lameness). We own this mbuf chain just now.
	1092	*/
	1093	in6_setscope(&mld->mld_addr, ifp, NULL);
	1094	}
	1095
	1096	mli = MLD_IFINFO(ifp);
	1097	VERIFY(mli != NULL);
	1098
	1099	MLI_LOCK(mli);
	1100	/*
	1101	* Discard the v2 query if we're in Compatibility Mode.
	1102	* The RFC is pretty clear that hosts need to stay in MLDv1 mode
	1103	* until the Old Version Querier Present timer expires.
	1104	*/
	1105	if (mli->mli_version != MLD_VERSION_2) {
	1106	MLI_UNLOCK(mli);
	1107	goto done;
	1108	}
	1109
	1110	mtp.qpt = mld_set_version(mli, MLD_VERSION_2);
	1111	mli->mli_rv = qrv;
	1112	mli->mli_qi = qqi;
	1113	mli->mli_qri = MAX(timer, MLD_QRI_MIN);
	1114
	1115	MLD_PRINTF(("%s: qrv %d qi %d qri %d\n", __func__, mli->mli_rv,
	1116	mli->mli_qi, mli->mli_qri));
	1117
	1118	if (is_general_query) {
	1119	/*
	1120	* MLDv2 General Query.
	1121	*
	1122	* Schedule a current-state report on this ifp for
	1123	* all groups, possibly containing source lists.
	1124	*
	1125	* If there is a pending General Query response
	1126	* scheduled earlier than the selected delay, do
	1127	* not schedule any other reports.
	1128	* Otherwise, reset the interface timer.
	1129	*/
	1130	MLD_PRINTF(("%s: process v2 general query on ifp 0x%llx(%s)\n",
	1131	__func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1132	if (mli->mli_v2_timer == 0 \|\| mli->mli_v2_timer >= timer) {
	1133	mtp.it = mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
	1134	}
	1135	MLI_UNLOCK(mli);
	1136	} else {
	1137	MLI_UNLOCK(mli);
	1138	/*
	1139	* MLDv2 Group-specific or Group-and-source-specific Query.
	1140	*
	1141	* Group-source-specific queries are throttled on
	1142	* a per-group basis to defeat denial-of-service attempts.
	1143	* Queries for groups we are not a member of on this
	1144	* link are simply ignored.
	1145	*/
	1146	in6_multihead_lock_shared();
	1147	IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
	1148	in6_multihead_lock_done();
	1149	if (inm == NULL)
	1150	goto done;
	1151
	1152	IN6M_LOCK(inm);
	1153	if (nsrc > 0) {
	1154	if (!ratecheck(&inm->in6m_lastgsrtv,
	1155	&mld_gsrdelay)) {
	1156	MLD_PRINTF(("%s: GS query throttled.\n",
	1157	__func__));
	1158	IN6M_UNLOCK(inm);
	1159	IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
	1160	goto done;
	1161	}
	1162	}
	1163	MLD_PRINTF(("%s: process v2 group query on ifp 0x%llx(%s)\n",
	1164	__func__, (uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1165	/*
	1166	* If there is a pending General Query response
	1167	* scheduled sooner than the selected delay, no
	1168	* further report need be scheduled.
	1169	* Otherwise, prepare to respond to the
	1170	* group-specific or group-and-source query.
	1171	*/
	1172	MLI_LOCK(mli);
	1173	mtp.it = mli->mli_v2_timer;
	1174	MLI_UNLOCK(mli);
	1175	if (mtp.it == 0 \|\| mtp.it >= timer) {
	1176	(void) mld_v2_process_group_query(inm, timer, m, off);
	1177	mtp.cst = inm->in6m_timer;
	1178	}
	1179	IN6M_UNLOCK(inm);
	1180	IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
	1181	/* XXX Clear embedded scope ID as userland won't expect it. */
	1182	in6_clearscope(&mld->mld_addr);
	1183	}
	1184	done:
	1185	if (mtp.it > 0) {
	1186	MLD_PRINTF(("%s: v2 general query response scheduled in "
	1187	"T+%d seconds on ifp 0x%llx(%s)\n", __func__, mtp.it,
	1188	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1189	}
	1190	mld_set_timeout(&mtp);
	1191
	1192	return (err);
	1193	}
	1194
	1195	/*
	1196	* Process a recieved MLDv2 group-specific or group-and-source-specific
	1197	* query.
	1198	* Return <0 if any error occured. Currently this is ignored.
	1199	*/
	1200	static int
	1201	mld_v2_process_group_query(struct in6_multi inm, int timer, struct mbuf m0,
	1202	const int off)
	1203	{
	1204	struct mldv2_query *mld;
	1205	int retval;
	1206	uint16_t nsrc;
	1207
	1208	IN6M_LOCK_ASSERT_HELD(inm);
	1209
	1210	retval = 0;
	1211	mld = (struct mldv2_query )(mtod(m0, uint8_t ) + off);
	1212
	1213	switch (inm->in6m_state) {
	1214	case MLD_NOT_MEMBER:
	1215	case MLD_SILENT_MEMBER:
	1216	case MLD_SLEEPING_MEMBER:
	1217	case MLD_LAZY_MEMBER:
	1218	case MLD_AWAKENING_MEMBER:
	1219	case MLD_IDLE_MEMBER:
	1220	case MLD_LEAVING_MEMBER:
	1221	return (retval);
	1222	break;
	1223	case MLD_REPORTING_MEMBER:
	1224	case MLD_G_QUERY_PENDING_MEMBER:
	1225	case MLD_SG_QUERY_PENDING_MEMBER:
	1226	break;
	1227	}
	1228
	1229	nsrc = ntohs(mld->mld_numsrc);
	1230
	1231	/*
	1232	* Deal with group-specific queries upfront.
	1233	* If any group query is already pending, purge any recorded
	1234	* source-list state if it exists, and schedule a query response
	1235	* for this group-specific query.
	1236	*/
	1237	if (nsrc == 0) {
	1238	if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER \|\|
	1239	inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
	1240	in6m_clear_recorded(inm);
	1241	timer = min(inm->in6m_timer, timer);
	1242	}
	1243	inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
	1244	inm->in6m_timer = MLD_RANDOM_DELAY(timer);
	1245	return (retval);
	1246	}
	1247
	1248	/*
	1249	* Deal with the case where a group-and-source-specific query has
	1250	* been received but a group-specific query is already pending.
	1251	*/
	1252	if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
	1253	timer = min(inm->in6m_timer, timer);
	1254	inm->in6m_timer = MLD_RANDOM_DELAY(timer);
	1255	return (retval);
	1256	}
	1257
	1258	/*
	1259	* Finally, deal with the case where a group-and-source-specific
	1260	* query has been received, where a response to a previous g-s-r
	1261	* query exists, or none exists.
	1262	* In this case, we need to parse the source-list which the Querier
	1263	* has provided us with and check if we have any source list filter
	1264	* entries at T1 for these sources. If we do not, there is no need
	1265	* schedule a report and the query may be dropped.
	1266	* If we do, we must record them and schedule a current-state
	1267	* report for those sources.
	1268	*/
	1269	if (inm->in6m_nsrc > 0) {
	1270	struct mbuf *m;
	1271	uint8_t *sp;
	1272	int i, nrecorded;
	1273	int soff;
	1274
	1275	m = m0;
	1276	soff = off + sizeof(struct mldv2_query);
	1277	nrecorded = 0;
	1278	for (i = 0; i < nsrc; i++) {
	1279	sp = mtod(m, uint8_t *) + soff;
	1280	retval = in6m_record_source(inm,
	1281	(const struct in6_addr )(void )sp);
	1282	if (retval < 0)
	1283	break;
	1284	nrecorded += retval;
	1285	soff += sizeof(struct in6_addr);
	1286	if (soff >= m->m_len) {
	1287	soff = soff - m->m_len;
	1288	m = m->m_next;
	1289	if (m == NULL)
	1290	break;
	1291	}
	1292	}
	1293	if (nrecorded > 0) {
	1294	MLD_PRINTF(( "%s: schedule response to SG query\n",
	1295	__func__));
	1296	inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
	1297	inm->in6m_timer = MLD_RANDOM_DELAY(timer);
	1298	}
	1299	}
	1300
	1301	return (retval);
	1302	}
	1303
	1304	/*
	1305	* Process a received MLDv1 host membership report.
	1306	* Assumes mld points to mld_hdr in pulled up mbuf chain.
	1307	*
	1308	* NOTE: Can't be fully const correct as we temporarily embed scope ID in
	1309	* mld_addr. This is OK as we own the mbuf chain.
	1310	*/
	1311	static int
	1312	mld_v1_input_report(struct ifnet ifp, struct mbuf m,
	1313	const struct ip6_hdr ip6, /const/ struct mld_hdr mld)
	1314	{
	1315	struct in6_addr src, dst;
	1316	struct in6_ifaddr *ia;
	1317	struct in6_multi *inm;
	1318
	1319	if (!mld_v1enable) {
	1320	MLD_PRINTF(("%s: ignore v1 report %s on ifp 0x%llx(%s)\n",
	1321	__func__, ip6_sprintf(&mld->mld_addr),
	1322	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1323	return (0);
	1324	}
	1325
	1326	if ((ifp->if_flags & IFF_LOOPBACK) \|\|
	1327	(m->m_pkthdr.pkt_flags & PKTF_LOOP))
	1328	return (0);
	1329
	1330	/*
	1331	* MLDv1 reports must originate from a host's link-local address,
	1332	* or the unspecified address (when booting).
	1333	*/
	1334	src = ip6->ip6_src;
	1335	in6_clearscope(&src);
	1336	if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
	1337	MLD_PRINTF(("%s: ignore v1 query src %s on ifp 0x%llx(%s)\n",
	1338	__func__, ip6_sprintf(&ip6->ip6_src),
	1339	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1340	return (EINVAL);
	1341	}
	1342
	1343	/*
	1344	* RFC2710 Section 4: MLDv1 reports must pertain to a multicast
	1345	* group, and must be directed to the group itself.
	1346	*/
	1347	dst = ip6->ip6_dst;
	1348	in6_clearscope(&dst);
	1349	if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) \|\|
	1350	!IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
	1351	MLD_PRINTF(("%s: ignore v1 query dst %s on ifp 0x%llx(%s)\n",
	1352	__func__, ip6_sprintf(&ip6->ip6_dst),
	1353	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1354	return (EINVAL);
	1355	}
	1356
	1357	/*
	1358	* Make sure we don't hear our own membership report, as fast
	1359	* leave requires knowing that we are the only member of a
	1360	* group. Assume we used the link-local address if available,
	1361	* otherwise look for ::.
	1362	*
	1363	* XXX Note that scope ID comparison is needed for the address
	1364	* returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
	1365	* performed for the on-wire address.
	1366	*/
	1367	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY\|IN6_IFF_ANYCAST);
	1368	if (ia != NULL) {
	1369	IFA_LOCK(&ia->ia_ifa);
	1370	if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))){
	1371	IFA_UNLOCK(&ia->ia_ifa);
	1372	IFA_REMREF(&ia->ia_ifa);
	1373	return (0);
	1374	}
	1375	IFA_UNLOCK(&ia->ia_ifa);
	1376	IFA_REMREF(&ia->ia_ifa);
	1377	} else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
	1378	return (0);
	1379	}
	1380
	1381	MLD_PRINTF(("%s: process v1 report %s on ifp 0x%llx(%s)\n",
	1382	__func__, ip6_sprintf(&mld->mld_addr),
	1383	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1384
	1385	/*
	1386	* Embed scope ID of receiving interface in MLD query for lookup
	1387	* whilst we don't hold other locks (due to KAME locking lameness).
	1388	*/
	1389	if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
	1390	in6_setscope(&mld->mld_addr, ifp, NULL);
	1391
	1392	/*
	1393	* MLDv1 report suppression.
	1394	* If we are a member of this group, and our membership should be
	1395	* reported, and our group timer is pending or about to be reset,
	1396	* stop our group timer by transitioning to the 'lazy' state.
	1397	*/
	1398	in6_multihead_lock_shared();
	1399	IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
	1400	in6_multihead_lock_done();
	1401
	1402	if (inm != NULL) {
	1403	struct mld_ifinfo *mli;
	1404
	1405	IN6M_LOCK(inm);
	1406	mli = inm->in6m_mli;
	1407	VERIFY(mli != NULL);
	1408
	1409	MLI_LOCK(mli);
	1410	/*
	1411	* If we are in MLDv2 host mode, do not allow the
	1412	* other host's MLDv1 report to suppress our reports.
	1413	*/
	1414	if (mli->mli_version == MLD_VERSION_2) {
	1415	MLI_UNLOCK(mli);
	1416	IN6M_UNLOCK(inm);
	1417	IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
	1418	goto out;
	1419	}
	1420	MLI_UNLOCK(mli);
	1421
	1422	inm->in6m_timer = 0;
	1423
	1424	switch (inm->in6m_state) {
	1425	case MLD_NOT_MEMBER:
	1426	case MLD_SILENT_MEMBER:
	1427	case MLD_SLEEPING_MEMBER:
	1428	break;
	1429	case MLD_REPORTING_MEMBER:
	1430	case MLD_IDLE_MEMBER:
	1431	case MLD_AWAKENING_MEMBER:
	1432	MLD_PRINTF(("%s: report suppressed for %s on "
	1433	"ifp 0x%llx(%s)\n", __func__,
	1434	ip6_sprintf(&mld->mld_addr),
	1435	(uint64_t)VM_KERNEL_ADDRPERM(ifp), if_name(ifp)));
	1436	case MLD_LAZY_MEMBER:
	1437	inm->in6m_state = MLD_LAZY_MEMBER;
	1438	break;
	1439	case MLD_G_QUERY_PENDING_MEMBER:
	1440	case MLD_SG_QUERY_PENDING_MEMBER:
	1441	case MLD_LEAVING_MEMBER:
	1442	break;
	1443	}
	1444	IN6M_UNLOCK(inm);
	1445	IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
	1446	}
	1447
	1448	out:
	1449	/* XXX Clear embedded scope ID as userland won't expect it. */
	1450	in6_clearscope(&mld->mld_addr);
	1451
	1452	return (0);
	1453	}
	1454
	1455	/*
	1456	* MLD input path.
	1457	*
	1458	* Assume query messages which fit in a single ICMPv6 message header
	1459	* have been pulled up.
	1460	* Assume that userland will want to see the message, even if it
	1461	* otherwise fails kernel input validation; do not free it.
	1462	* Pullup may however free the mbuf chain m if it fails.
	1463	*
	1464	* Return IPPROTO_DONE if we freed m. Otherwise, return 0.
	1465	*/
	1466	int
	1467	mld_input(struct mbuf *m, int off, int icmp6len)
	1468	{
	1469	struct ifnet *ifp;
	1470	struct ip6_hdr *ip6;
	1471	struct mld_hdr *mld;
	1472	int mldlen;
	1473
	1474	MLD_PRINTF(("%s: called w/mbuf (0x%llx,%d)\n", __func__,
	1475	(uint64_t)VM_KERNEL_ADDRPERM(m), off));
	1476
	1477	ifp = m->m_pkthdr.rcvif;
	1478
	1479	ip6 = mtod(m, struct ip6_hdr *);
	1480
	1481	/* Pullup to appropriate size. */
	1482	mld = (struct mld_hdr )(mtod(m, uint8_t ) + off);
	1483	if (mld->mld_type == MLD_LISTENER_QUERY &&
	1484	icmp6len >= sizeof(struct mldv2_query)) {
	1485	mldlen = sizeof(struct mldv2_query);
	1486	} else {
	1487	mldlen = sizeof(struct mld_hdr);
	1488	}
	1489	IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
	1490	if (mld == NULL) {
	1491	icmp6stat.icp6s_badlen++;
	1492	return (IPPROTO_DONE);
	1493	}
	1494
	1495	/*
	1496	* Userland needs to see all of this traffic for implementing
	1497	* the endpoint discovery portion of multicast routing.
	1498	*/
	1499	switch (mld->mld_type) {
	1500	case MLD_LISTENER_QUERY:
	1501	icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
	1502	if (icmp6len == sizeof(struct mld_hdr)) {
	1503	if (mld_v1_input_query(ifp, ip6, mld) != 0)
	1504	return (0);
	1505	} else if (icmp6len >= sizeof(struct mldv2_query)) {
	1506	if (mld_v2_input_query(ifp, ip6, m, off,
	1507	icmp6len) != 0)
	1508	return (0);
	1509	}
	1510	break;
	1511	case MLD_LISTENER_REPORT:
	1512	icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
	1513	if (mld_v1_input_report(ifp, m, ip6, mld) != 0)
	1514	return (0);
	1515	break;
	1516	case MLDV2_LISTENER_REPORT:
	1517	icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
	1518	break;
	1519	case MLD_LISTENER_DONE:
	1520	icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
	1521	break;
	1522	default:
	1523	break;
	1524	}
	1525
	1526	return (0);
	1527	}
	1528
	1529	/*
	1530	* Schedule MLD timer based on various parameters; caller must ensure that
	1531	* lock ordering is maintained as this routine acquires MLD global lock.
	1532	*/
	1533	void
	1534	mld_set_timeout(struct mld_tparams *mtp)
	1535	{
	1536	MLD_LOCK_ASSERT_NOTHELD();
	1537	VERIFY(mtp != NULL);
	1538
	1539	if (mtp->qpt != 0 \|\| mtp->it != 0 \|\| mtp->cst != 0 \|\| mtp->sct != 0) {
	1540	MLD_LOCK();
	1541	if (mtp->qpt != 0)
	1542	querier_present_timers_running6 = 1;
	1543	if (mtp->it != 0)
	1544	interface_timers_running6 = 1;
	1545	if (mtp->cst != 0)
	1546	current_state_timers_running6 = 1;
	1547	if (mtp->sct != 0)
	1548	state_change_timers_running6 = 1;
	1549	mld_sched_timeout();
	1550	MLD_UNLOCK();
	1551	}
	1552	}
	1553
	1554	/*
	1555	* MLD6 timer handler (per 1 second).
	1556	*/
	1557	static void
	1558	mld_timeout(void *arg)
	1559	{
	1560	#pragma unused(arg)
	1561	struct ifqueue scq; /* State-change packets */
	1562	struct ifqueue qrq; /* Query response packets */
	1563	struct ifnet *ifp;
	1564	struct mld_ifinfo *mli;
	1565	struct in6_multi *inm;
	1566	int uri_sec = 0;
	1567	SLIST_HEAD(, in6_multi) in6m_dthead;
	1568
	1569	SLIST_INIT(&in6m_dthead);
	1570
	1571	/*
	1572	* Update coarse-grained networking timestamp (in sec.); the idea
	1573	* is to piggy-back on the timeout callout to update the counter
	1574	* returnable via net_uptime().
	1575	*/
	1576	net_update_uptime();
	1577
	1578	MLD_LOCK();
	1579
	1580	MLD_PRINTF(("%s: qpt %d, it %d, cst %d, sct %d\n", __func__,
	1581	querier_present_timers_running6, interface_timers_running6,
	1582	current_state_timers_running6, state_change_timers_running6));
	1583
	1584	/*
	1585	* MLDv1 querier present timer processing.
	1586	*/
	1587	if (querier_present_timers_running6) {
	1588	querier_present_timers_running6 = 0;
	1589	LIST_FOREACH(mli, &mli_head, mli_link) {
	1590	MLI_LOCK(mli);
	1591	mld_v1_process_querier_timers(mli);
	1592	if (mli->mli_v1_timer > 0)
	1593	querier_present_timers_running6 = 1;
	1594	MLI_UNLOCK(mli);
	1595	}
	1596	}
	1597
	1598	/*
	1599	* MLDv2 General Query response timer processing.
	1600	*/
	1601	if (interface_timers_running6) {
	1602	MLD_PRINTF(("%s: interface timers running\n", __func__));
	1603	interface_timers_running6 = 0;
	1604	LIST_FOREACH(mli, &mli_head, mli_link) {
	1605	MLI_LOCK(mli);
	1606	if (mli->mli_version != MLD_VERSION_2) {
	1607	MLI_UNLOCK(mli);
	1608	continue;
	1609	}
	1610	if (mli->mli_v2_timer == 0) {
	1611	/* Do nothing. */
	1612	} else if (--mli->mli_v2_timer == 0) {
	1613	if (mld_v2_dispatch_general_query(mli) > 0)
	1614	interface_timers_running6 = 1;
	1615	} else {
	1616	interface_timers_running6 = 1;
	1617	}
	1618	MLI_UNLOCK(mli);
	1619	}
	1620	}
	1621
	1622	if (!current_state_timers_running6 &&
	1623	!state_change_timers_running6)
	1624	goto out_locked;
	1625
	1626	current_state_timers_running6 = 0;
	1627	state_change_timers_running6 = 0;
	1628
	1629	MLD_PRINTF(("%s: state change timers running\n", __func__));
	1630
	1631	memset(&qrq, 0, sizeof(struct ifqueue));
	1632	qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
	1633
	1634	memset(&scq, 0, sizeof(struct ifqueue));
	1635	scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
	1636
	1637	/*
	1638	* MLD host report and state-change timer processing.
	1639	* Note: Processing a v2 group timer may remove a node.
	1640	*/
	1641	LIST_FOREACH(mli, &mli_head, mli_link) {
	1642	struct in6_multistep step;
	1643
	1644	MLI_LOCK(mli);
	1645	ifp = mli->mli_ifp;
	1646	uri_sec = MLD_RANDOM_DELAY(mli->mli_uri);
	1647	MLI_UNLOCK(mli);
	1648
	1649	in6_multihead_lock_shared();
	1650	IN6_FIRST_MULTI(step, inm);
	1651	while (inm != NULL) {
	1652	IN6M_LOCK(inm);
	1653	if (inm->in6m_ifp != ifp)
	1654	goto next;
	1655
	1656	MLI_LOCK(mli);
	1657	switch (mli->mli_version) {
	1658	case MLD_VERSION_1:
	1659	mld_v1_process_group_timer(inm,
	1660	mli->mli_version);
	1661	break;
	1662	case MLD_VERSION_2:
	1663	mld_v2_process_group_timers(mli, &qrq,
	1664	&scq, inm, uri_sec);
	1665	break;
	1666	}
	1667	MLI_UNLOCK(mli);
	1668	next:
	1669	IN6M_UNLOCK(inm);
	1670	IN6_NEXT_MULTI(step, inm);
	1671	}
	1672	in6_multihead_lock_done();
	1673
	1674	MLI_LOCK(mli);
	1675	if (mli->mli_version == MLD_VERSION_1) {
	1676	mld_dispatch_queue(mli, &mli->mli_v1q, 0);
	1677	} else if (mli->mli_version == MLD_VERSION_2) {
	1678	MLI_UNLOCK(mli);
	1679	mld_dispatch_queue(NULL, &qrq, 0);
	1680	mld_dispatch_queue(NULL, &scq, 0);
	1681	VERIFY(qrq.ifq_len == 0);
	1682	VERIFY(scq.ifq_len == 0);
	1683	MLI_LOCK(mli);
	1684	}
	1685	/*
	1686	* In case there are still any pending membership reports
	1687	* which didn't get drained at version change time.
	1688	*/
	1689	IF_DRAIN(&mli->mli_v1q);
	1690	/*
	1691	* Release all deferred inm records, and drain any locally
	1692	* enqueued packets; do it even if the current MLD version
	1693	* for the link is no longer MLDv2, in order to handle the
	1694	* version change case.
	1695	*/
	1696	mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
	1697	VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
	1698	MLI_UNLOCK(mli);
	1699
	1700	IF_DRAIN(&qrq);
	1701	IF_DRAIN(&scq);
	1702	}
	1703
	1704	out_locked:
	1705	/* re-arm the timer if there's work to do */
	1706	mld_timeout_run = 0;
	1707	mld_sched_timeout();
	1708	MLD_UNLOCK();
	1709
	1710	/* Now that we're dropped all locks, release detached records */
	1711	MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
	1712	}
	1713
	1714	static void
	1715	mld_sched_timeout(void)
	1716	{
	1717	MLD_LOCK_ASSERT_HELD();
	1718
	1719	if (!mld_timeout_run &&
	1720	(querier_present_timers_running6 \|\| current_state_timers_running6 \|\|
	1721	interface_timers_running6 \|\| state_change_timers_running6)) {
	1722	mld_timeout_run = 1;
	1723	timeout(mld_timeout, NULL, hz);
	1724	}
	1725	}
	1726
	1727	/*
	1728	* Free the in6_multi reference(s) for this MLD lifecycle.
	1729	*
	1730	* Caller must be holding mli_lock.
	1731	*/
	1732	static void
	1733	mld_flush_relq(struct mld_ifinfo mli, struct mld_in6m_relhead in6m_dthead)
	1734	{
	1735	struct in6_multi *inm;
	1736
	1737	again:
	1738	MLI_LOCK_ASSERT_HELD(mli);
	1739	inm = SLIST_FIRST(&mli->mli_relinmhead);
	1740	if (inm != NULL) {
	1741	int lastref;
	1742
	1743	SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
	1744	MLI_UNLOCK(mli);
	1745
	1746	in6_multihead_lock_exclusive();
	1747	IN6M_LOCK(inm);
	1748	VERIFY(inm->in6m_nrelecnt != 0);
	1749	inm->in6m_nrelecnt--;
	1750	lastref = in6_multi_detach(inm);
	1751	VERIFY(!lastref \|\| (!(inm->in6m_debug & IFD_ATTACHED) &&
	1752	inm->in6m_reqcnt == 0));
	1753	IN6M_UNLOCK(inm);
	1754	in6_multihead_lock_done();
	1755	/* from mli_relinmhead */
	1756	IN6M_REMREF(inm);
	1757	/* from in6_multihead_list */
	1758	if (lastref) {
	1759	/*
	1760	* Defer releasing our final reference, as we
	1761	* are holding the MLD lock at this point, and
	1762	* we could end up with locking issues later on
	1763	* (while issuing SIOCDELMULTI) when this is the
	1764	* final reference count. Let the caller do it
	1765	* when it is safe.
	1766	*/
	1767	MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
	1768	}
	1769	MLI_LOCK(mli);
	1770	goto again;
	1771	}
	1772	}
	1773
	1774	/*
	1775	* Update host report group timer.
	1776	* Will update the global pending timer flags.
	1777	*/
	1778	static void
	1779	mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
	1780	{
	1781	#pragma unused(mld_version)
	1782	int report_timer_expired;
	1783
	1784	MLD_LOCK_ASSERT_HELD();
	1785	IN6M_LOCK_ASSERT_HELD(inm);
	1786	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
	1787
	1788	if (inm->in6m_timer == 0) {
	1789	report_timer_expired = 0;
	1790	} else if (--inm->in6m_timer == 0) {
	1791	report_timer_expired = 1;
	1792	} else {
	1793	current_state_timers_running6 = 1;
	1794	/* caller will schedule timer */
	1795	return;
	1796	}
	1797
	1798	switch (inm->in6m_state) {
	1799	case MLD_NOT_MEMBER:
	1800	case MLD_SILENT_MEMBER:
	1801	case MLD_IDLE_MEMBER:
	1802	case MLD_LAZY_MEMBER:
	1803	case MLD_SLEEPING_MEMBER:
	1804	case MLD_AWAKENING_MEMBER:
	1805	break;
	1806	case MLD_REPORTING_MEMBER:
	1807	if (report_timer_expired) {
	1808	inm->in6m_state = MLD_IDLE_MEMBER;
	1809	(void) mld_v1_transmit_report(inm,
	1810	MLD_LISTENER_REPORT);
	1811	IN6M_LOCK_ASSERT_HELD(inm);
	1812	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
	1813	}
	1814	break;
	1815	case MLD_G_QUERY_PENDING_MEMBER:
	1816	case MLD_SG_QUERY_PENDING_MEMBER:
	1817	case MLD_LEAVING_MEMBER:
	1818	break;
	1819	}
	1820	}
	1821
	1822	/*
	1823	* Update a group's timers for MLDv2.
	1824	* Will update the global pending timer flags.
	1825	* Note: Unlocked read from mli.
	1826	*/
	1827	static void
	1828	mld_v2_process_group_timers(struct mld_ifinfo *mli,
	1829	struct ifqueue qrq, struct ifqueue scq,
	1830	struct in6_multi *inm, const int uri_sec)
	1831	{
	1832	int query_response_timer_expired;
	1833	int state_change_retransmit_timer_expired;
	1834
	1835	MLD_LOCK_ASSERT_HELD();
	1836	IN6M_LOCK_ASSERT_HELD(inm);
	1837	MLI_LOCK_ASSERT_HELD(mli);
	1838	VERIFY(mli == inm->in6m_mli);
	1839
	1840	query_response_timer_expired = 0;
	1841	state_change_retransmit_timer_expired = 0;
	1842
	1843	/*
	1844	* During a transition from compatibility mode back to MLDv2,
	1845	* a group record in REPORTING state may still have its group
	1846	* timer active. This is a no-op in this function; it is easier
	1847	* to deal with it here than to complicate the timeout path.
	1848	*/
	1849	if (inm->in6m_timer == 0) {
	1850	query_response_timer_expired = 0;
	1851	} else if (--inm->in6m_timer == 0) {
	1852	query_response_timer_expired = 1;
	1853	} else {
	1854	current_state_timers_running6 = 1;
	1855	/* caller will schedule timer */
	1856	}
	1857
	1858	if (inm->in6m_sctimer == 0) {
	1859	state_change_retransmit_timer_expired = 0;
	1860	} else if (--inm->in6m_sctimer == 0) {
	1861	state_change_retransmit_timer_expired = 1;
	1862	} else {
	1863	state_change_timers_running6 = 1;
	1864	/* caller will schedule timer */
	1865	}
	1866
	1867	/* We are in timer callback, so be quick about it. */
	1868	if (!state_change_retransmit_timer_expired &&
	1869	!query_response_timer_expired)
	1870	return;
	1871
	1872	switch (inm->in6m_state) {
	1873	case MLD_NOT_MEMBER:
	1874	case MLD_SILENT_MEMBER:
	1875	case MLD_SLEEPING_MEMBER:
	1876	case MLD_LAZY_MEMBER:
	1877	case MLD_AWAKENING_MEMBER:
	1878	case MLD_IDLE_MEMBER:
	1879	break;
	1880	case MLD_G_QUERY_PENDING_MEMBER:
	1881	case MLD_SG_QUERY_PENDING_MEMBER:
	1882	/*
	1883	* Respond to a previously pending Group-Specific
	1884	* or Group-and-Source-Specific query by enqueueing
	1885	* the appropriate Current-State report for
	1886	* immediate transmission.
	1887	*/
	1888	if (query_response_timer_expired) {
	1889	int retval;
	1890
	1891	retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
	1892	(inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
	1893	0);
	1894	MLD_PRINTF(("%s: enqueue record = %d\n",
	1895	__func__, retval));
	1896	inm->in6m_state = MLD_REPORTING_MEMBER;
	1897	in6m_clear_recorded(inm);
	1898	}
	1899	/* FALLTHROUGH */
	1900	case MLD_REPORTING_MEMBER:
	1901	case MLD_LEAVING_MEMBER:
	1902	if (state_change_retransmit_timer_expired) {
	1903	/*
	1904	* State-change retransmission timer fired.
	1905	* If there are any further pending retransmissions,
	1906	* set the global pending state-change flag, and
	1907	* reset the timer.
	1908	*/
	1909	if (--inm->in6m_scrv > 0) {
	1910	inm->in6m_sctimer = uri_sec;
	1911	state_change_timers_running6 = 1;
	1912	/* caller will schedule timer */
	1913	}
	1914	/*
	1915	* Retransmit the previously computed state-change
	1916	* report. If there are no further pending
	1917	* retransmissions, the mbuf queue will be consumed.
	1918	* Update T0 state to T1 as we have now sent
	1919	* a state-change.
	1920	*/
	1921	(void) mld_v2_merge_state_changes(inm, scq);
	1922
	1923	in6m_commit(inm);
	1924	MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
	1925	ip6_sprintf(&inm->in6m_addr),
	1926	if_name(inm->in6m_ifp)));
	1927
	1928	/*
	1929	* If we are leaving the group for good, make sure
	1930	* we release MLD's reference to it.
	1931	* This release must be deferred using a SLIST,
	1932	* as we are called from a loop which traverses
	1933	* the in_ifmultiaddr TAILQ.
	1934	*/
	1935	if (inm->in6m_state == MLD_LEAVING_MEMBER &&
	1936	inm->in6m_scrv == 0) {
	1937	inm->in6m_state = MLD_NOT_MEMBER;
	1938	/*
	1939	* A reference has already been held in
	1940	* mld_final_leave() for this inm, so
	1941	* no need to hold another one. We also
	1942	* bumped up its request count then, so
	1943	* that it stays in in6_multihead. Both
	1944	* of them will be released when it is
	1945	* dequeued later on.
	1946	*/
	1947	VERIFY(inm->in6m_nrelecnt != 0);
	1948	SLIST_INSERT_HEAD(&mli->mli_relinmhead,
	1949	inm, in6m_nrele);
	1950	}
	1951	}
	1952	break;
	1953	}
	1954	}
	1955
	1956	/*
	1957	* Switch to a different version on the given interface,
	1958	* as per Section 9.12.
	1959	*/
	1960	static uint32_t
	1961	mld_set_version(struct mld_ifinfo *mli, const int mld_version)
	1962	{
	1963	int old_version_timer;
	1964
	1965	MLI_LOCK_ASSERT_HELD(mli);
	1966
	1967	MLD_PRINTF(("%s: switching to v%d on ifp 0x%llx(%s)\n", __func__,
	1968	mld_version, (uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
	1969	if_name(mli->mli_ifp)));
	1970
	1971	if (mld_version == MLD_VERSION_1) {
	1972	/*
	1973	* Compute the "Older Version Querier Present" timer as per
	1974	* Section 9.12, in seconds.
	1975	*/
	1976	old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
	1977	mli->mli_v1_timer = old_version_timer;
	1978	}
	1979
	1980	if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
	1981	mli->mli_version = MLD_VERSION_1;
	1982	mld_v2_cancel_link_timers(mli);
	1983	}
	1984
	1985	MLI_LOCK_ASSERT_HELD(mli);
	1986
	1987	return (mli->mli_v1_timer);
	1988	}
	1989
	1990	/*
	1991	* Cancel pending MLDv2 timers for the given link and all groups
	1992	* joined on it; state-change, general-query, and group-query timers.
	1993	*
	1994	* Only ever called on a transition from v2 to Compatibility mode. Kill
	1995	* the timers stone dead (this may be expensive for large N groups), they
	1996	* will be restarted if Compatibility Mode deems that they must be due to
	1997	* query processing.
	1998	*/
	1999	static void
	2000	mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
	2001	{
	2002	struct ifnet *ifp;
	2003	struct in6_multi *inm;
	2004	struct in6_multistep step;
	2005
	2006	MLI_LOCK_ASSERT_HELD(mli);
	2007
	2008	MLD_PRINTF(("%s: cancel v2 timers on ifp 0x%llx(%s)\n", __func__,
	2009	(uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp), if_name(mli->mli_ifp)));
	2010
	2011	/*
	2012	* Stop the v2 General Query Response on this link stone dead.
	2013	* If timer is woken up due to interface_timers_running6,
	2014	* the flag will be cleared if there are no pending link timers.
	2015	*/
	2016	mli->mli_v2_timer = 0;
	2017
	2018	/*
	2019	* Now clear the current-state and state-change report timers
	2020	* for all memberships scoped to this link.
	2021	*/
	2022	ifp = mli->mli_ifp;
	2023	MLI_UNLOCK(mli);
	2024
	2025	in6_multihead_lock_shared();
	2026	IN6_FIRST_MULTI(step, inm);
	2027	while (inm != NULL) {
	2028	IN6M_LOCK(inm);
	2029	if (inm->in6m_ifp != ifp)
	2030	goto next;
	2031
	2032	switch (inm->in6m_state) {
	2033	case MLD_NOT_MEMBER:
	2034	case MLD_SILENT_MEMBER:
	2035	case MLD_IDLE_MEMBER:
	2036	case MLD_LAZY_MEMBER:
	2037	case MLD_SLEEPING_MEMBER:
	2038	case MLD_AWAKENING_MEMBER:
	2039	/*
	2040	* These states are either not relevant in v2 mode,
	2041	* or are unreported. Do nothing.
	2042	*/
	2043	break;
	2044	case MLD_LEAVING_MEMBER:
	2045	/*
	2046	* If we are leaving the group and switching
	2047	* version, we need to release the final
	2048	* reference held for issuing the INCLUDE {}.
	2049	* During mld_final_leave(), we bumped up both the
	2050	* request and reference counts. Since we cannot
	2051	* call in6_multi_detach() here, defer this task to
	2052	* the timer routine.
	2053	*/
	2054	VERIFY(inm->in6m_nrelecnt != 0);
	2055	MLI_LOCK(mli);
	2056	SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
	2057	in6m_nrele);
	2058	MLI_UNLOCK(mli);
	2059	/* FALLTHROUGH */
	2060	case MLD_G_QUERY_PENDING_MEMBER:
	2061	case MLD_SG_QUERY_PENDING_MEMBER:
	2062	in6m_clear_recorded(inm);
	2063	/* FALLTHROUGH */
	2064	case MLD_REPORTING_MEMBER:
	2065	inm->in6m_state = MLD_REPORTING_MEMBER;
	2066	break;
	2067	}
	2068	/*
	2069	* Always clear state-change and group report timers.
	2070	* Free any pending MLDv2 state-change records.
	2071	*/
	2072	inm->in6m_sctimer = 0;
	2073	inm->in6m_timer = 0;
	2074	IF_DRAIN(&inm->in6m_scq);
	2075	next:
	2076	IN6M_UNLOCK(inm);
	2077	IN6_NEXT_MULTI(step, inm);
	2078	}
	2079	in6_multihead_lock_done();
	2080
	2081	MLI_LOCK(mli);
	2082	}
	2083
	2084	/*
	2085	* Update the Older Version Querier Present timers for a link.
	2086	* See Section 9.12 of RFC 3810.
	2087	*/
	2088	static void
	2089	mld_v1_process_querier_timers(struct mld_ifinfo *mli)
	2090	{
	2091	MLI_LOCK_ASSERT_HELD(mli);
	2092
	2093	if (mld_v2enable && mli->mli_version != MLD_VERSION_2 &&
	2094	--mli->mli_v1_timer == 0) {
	2095	/*
	2096	* MLDv1 Querier Present timer expired; revert to MLDv2.
	2097	*/
	2098	MLD_PRINTF(("%s: transition from v%d -> v%d on 0x%llx(%s)\n",
	2099	__func__, mli->mli_version, MLD_VERSION_2,
	2100	(uint64_t)VM_KERNEL_ADDRPERM(mli->mli_ifp),
	2101	if_name(mli->mli_ifp)));
	2102	mli->mli_version = MLD_VERSION_2;
	2103	}
	2104	}
	2105
	2106	/*
	2107	* Transmit an MLDv1 report immediately.
	2108	*/
	2109	static int
	2110	mld_v1_transmit_report(struct in6_multi *in6m, const int type)
	2111	{
	2112	struct ifnet *ifp;
	2113	struct in6_ifaddr *ia;
	2114	struct ip6_hdr *ip6;
	2115	struct mbuf mh, md;
	2116	struct mld_hdr *mld;
	2117	int error = 0;
	2118
	2119	IN6M_LOCK_ASSERT_HELD(in6m);
	2120	MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
	2121
	2122	ifp = in6m->in6m_ifp;
	2123	/* ia may be NULL if link-local address is tentative. */
	2124	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY\|IN6_IFF_ANYCAST);
	2125
	2126	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
	2127	if (mh == NULL) {
	2128	if (ia != NULL)
	2129	IFA_REMREF(&ia->ia_ifa);
	2130	return (ENOMEM);
	2131	}
	2132	MGET(md, M_DONTWAIT, MT_DATA);
	2133	if (md == NULL) {
	2134	m_free(mh);
	2135	if (ia != NULL)
	2136	IFA_REMREF(&ia->ia_ifa);
	2137	return (ENOMEM);
	2138	}
	2139	mh->m_next = md;
	2140
	2141	/*
	2142	* FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
	2143	* that ether_output() does not need to allocate another mbuf
	2144	* for the header in the most common case.
	2145	*/
	2146	MH_ALIGN(mh, sizeof(struct ip6_hdr));
	2147	mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
	2148	mh->m_len = sizeof(struct ip6_hdr);
	2149
	2150	ip6 = mtod(mh, struct ip6_hdr *);
	2151	ip6->ip6_flow = 0;
	2152	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
	2153	ip6->ip6_vfc \|= IPV6_VERSION;
	2154	ip6->ip6_nxt = IPPROTO_ICMPV6;
	2155	if (ia != NULL)
	2156	IFA_LOCK(&ia->ia_ifa);
	2157	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
	2158	if (ia != NULL) {
	2159	IFA_UNLOCK(&ia->ia_ifa);
	2160	IFA_REMREF(&ia->ia_ifa);
	2161	ia = NULL;
	2162	}
	2163	ip6->ip6_dst = in6m->in6m_addr;
	2164
	2165	md->m_len = sizeof(struct mld_hdr);
	2166	mld = mtod(md, struct mld_hdr *);
	2167	mld->mld_type = type;
	2168	mld->mld_code = 0;
	2169	mld->mld_cksum = 0;
	2170	mld->mld_maxdelay = 0;
	2171	mld->mld_reserved = 0;
	2172	mld->mld_addr = in6m->in6m_addr;
	2173	in6_clearscope(&mld->mld_addr);
	2174	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
	2175	sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
	2176
	2177	mld_save_context(mh, ifp);
	2178	mh->m_flags \|= M_MLDV1;
	2179
	2180	/*
	2181	* Due to the fact that at this point we are possibly holding
	2182	* in6_multihead_lock in shared or exclusive mode, we can't call
	2183	* mld_dispatch_packet() here since that will eventually call
	2184	* ip6_output(), which will try to lock in6_multihead_lock and cause
	2185	* a deadlock.
	2186	* Instead we defer the work to the mld_timeout() thread, thus
	2187	* avoiding unlocking in_multihead_lock here.
	2188	*/
	2189	if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
	2190	MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
	2191	error = ENOMEM;
	2192	m_freem(mh);
	2193	} else {
	2194	IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
	2195	VERIFY(error == 0);
	2196	}
	2197
	2198	return (error);
	2199	}
	2200
	2201	/*
	2202	* Process a state change from the upper layer for the given IPv6 group.
	2203	*
	2204	* Each socket holds a reference on the in6_multi in its own ip_moptions.
	2205	* The socket layer will have made the necessary updates to.the group
	2206	* state, it is now up to MLD to issue a state change report if there
	2207	* has been any change between T0 (when the last state-change was issued)
	2208	* and T1 (now).
	2209	*
	2210	* We use the MLDv2 state machine at group level. The MLd module
	2211	* however makes the decision as to which MLD protocol version to speak.
	2212	* A state change from INCLUDE {} always means an initial join.
	2213	* A state change to INCLUDE {} always means a final leave.
	2214	*
	2215	* If delay is non-zero, and the state change is an initial multicast
	2216	* join, the state change report will be delayed by 'delay' ticks
	2217	* in units of seconds if MLDv1 is active on the link; otherwise
	2218	* the initial MLDv2 state change report will be delayed by whichever
	2219	* is sooner, a pending state-change timer or delay itself.
	2220	*/
	2221	int
	2222	mld_change_state(struct in6_multi inm, struct mld_tparams mtp,
	2223	const int delay)
	2224	{
	2225	struct mld_ifinfo *mli;
	2226	struct ifnet *ifp;
	2227	int error = 0;
	2228
	2229	VERIFY(mtp != NULL);
	2230	bzero(mtp, sizeof (*mtp));
	2231
	2232	IN6M_LOCK_ASSERT_HELD(inm);
	2233	VERIFY(inm->in6m_mli != NULL);
	2234	MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
	2235
	2236	/*
	2237	* Try to detect if the upper layer just asked us to change state
	2238	* for an interface which has now gone away.
	2239	*/
	2240	VERIFY(inm->in6m_ifma != NULL);
	2241	ifp = inm->in6m_ifma->ifma_ifp;
	2242	/*
	2243	* Sanity check that netinet6's notion of ifp is the same as net's.
	2244	*/
	2245	VERIFY(inm->in6m_ifp == ifp);
	2246
	2247	mli = MLD_IFINFO(ifp);
	2248	VERIFY(mli != NULL);
	2249
	2250	/*
	2251	* If we detect a state transition to or from MCAST_UNDEFINED
	2252	* for this group, then we are starting or finishing an MLD
	2253	* life cycle for this group.
	2254	*/
	2255	if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
	2256	MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
	2257	inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
	2258	if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
	2259	MLD_PRINTF(("%s: initial join\n", __func__));
	2260	error = mld_initial_join(inm, mli, mtp, delay);
	2261	goto out;
	2262	} else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
	2263	MLD_PRINTF(("%s: final leave\n", __func__));
	2264	mld_final_leave(inm, mli, mtp);
	2265	goto out;
	2266	}
	2267	} else {
	2268	MLD_PRINTF(("%s: filter set change\n", __func__));
	2269	}
	2270
	2271	error = mld_handle_state_change(inm, mli, mtp);
	2272	out:
	2273	return (error);
	2274	}
	2275
	2276	/*
	2277	* Perform the initial join for an MLD group.
	2278	*
	2279	* When joining a group:
	2280	* If the group should have its MLD traffic suppressed, do nothing.
	2281	* MLDv1 starts sending MLDv1 host membership reports.
	2282	* MLDv2 will schedule an MLDv2 state-change report containing the
	2283	* initial state of the membership.
	2284	*
	2285	* If the delay argument is non-zero, then we must delay sending the
	2286	* initial state change for delay ticks (in units of seconds).
	2287	*/
	2288	static int
	2289	mld_initial_join(struct in6_multi inm, struct mld_ifinfo mli,
	2290	struct mld_tparams *mtp, const int delay)
	2291	{
	2292	struct ifnet *ifp;
	2293	struct ifqueue *ifq;
	2294	int error, retval, syncstates;
	2295	int odelay;
	2296
	2297	IN6M_LOCK_ASSERT_HELD(inm);
	2298	MLI_LOCK_ASSERT_NOTHELD(mli);
	2299	VERIFY(mtp != NULL);
	2300
	2301	MLD_PRINTF(("%s: initial join %s on ifp 0x%llx(%s)\n",
	2302	__func__, ip6_sprintf(&inm->in6m_addr),
	2303	(uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
	2304	if_name(inm->in6m_ifp)));
	2305
	2306	error = 0;
	2307	syncstates = 1;
	2308
	2309	ifp = inm->in6m_ifp;
	2310
	2311	MLI_LOCK(mli);
	2312	VERIFY(mli->mli_ifp == ifp);
	2313
	2314	/*
	2315	* Avoid MLD if group is :
	2316	* 1. Joined on loopback, OR
	2317	* 2. On a link that is marked MLIF_SILENT
	2318	* 3. rdar://problem/19227650 Is link local scoped and
	2319	* on cellular interface
	2320	* 4. Is a type that should not be reported (node local
	2321	* or all node link local multicast.
	2322	* All other groups enter the appropriate state machine
	2323	* for the version in use on this link.
	2324	*/
	2325	if ((ifp->if_flags & IFF_LOOPBACK) \|\|
	2326	(mli->mli_flags & MLIF_SILENT) \|\|
	2327	(IFNET_IS_CELLULAR(ifp) &&
	2328	IN6_IS_ADDR_MC_LINKLOCAL(&inm->in6m_addr)) \|\|
	2329	!mld_is_addr_reported(&inm->in6m_addr)) {
	2330	MLD_PRINTF(("%s: not kicking state machine for silent group\n",
	2331	__func__));
	2332	inm->in6m_state = MLD_SILENT_MEMBER;
	2333	inm->in6m_timer = 0;
	2334	} else {
	2335	/*
	2336	* Deal with overlapping in6_multi lifecycle.
	2337	* If this group was LEAVING, then make sure
	2338	* we drop the reference we picked up to keep the
	2339	* group around for the final INCLUDE {} enqueue.
	2340	* Since we cannot call in6_multi_detach() here,
	2341	* defer this task to the timer routine.
	2342	*/
	2343	if (mli->mli_version == MLD_VERSION_2 &&
	2344	inm->in6m_state == MLD_LEAVING_MEMBER) {
	2345	VERIFY(inm->in6m_nrelecnt != 0);
	2346	SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
	2347	in6m_nrele);
	2348	}
	2349
	2350	inm->in6m_state = MLD_REPORTING_MEMBER;
	2351
	2352	switch (mli->mli_version) {
	2353	case MLD_VERSION_1:
	2354	/*
	2355	* If a delay was provided, only use it if
	2356	* it is greater than the delay normally
	2357	* used for an MLDv1 state change report,
	2358	* and delay sending the initial MLDv1 report
	2359	* by not transitioning to the IDLE state.
	2360	*/
	2361	odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI);
	2362	if (delay) {
	2363	inm->in6m_timer = max(delay, odelay);
	2364	mtp->cst = 1;
	2365	} else {
	2366	inm->in6m_state = MLD_IDLE_MEMBER;
	2367	error = mld_v1_transmit_report(inm,
	2368	MLD_LISTENER_REPORT);
	2369
	2370	IN6M_LOCK_ASSERT_HELD(inm);
	2371	MLI_LOCK_ASSERT_HELD(mli);
	2372
	2373	if (error == 0) {
	2374	inm->in6m_timer = odelay;
	2375	mtp->cst = 1;
	2376	}
	2377	}
	2378	break;
	2379
	2380	case MLD_VERSION_2:
	2381	/*
	2382	* Defer update of T0 to T1, until the first copy
	2383	* of the state change has been transmitted.
	2384	*/
	2385	syncstates = 0;
	2386
	2387	/*
	2388	* Immediately enqueue a State-Change Report for
	2389	* this interface, freeing any previous reports.
	2390	* Don't kick the timers if there is nothing to do,
	2391	* or if an error occurred.
	2392	*/
	2393	ifq = &inm->in6m_scq;
	2394	IF_DRAIN(ifq);
	2395	retval = mld_v2_enqueue_group_record(ifq, inm, 1,
	2396	0, 0, (mli->mli_flags & MLIF_USEALLOW));
	2397	mtp->cst = (ifq->ifq_len > 0);
	2398	MLD_PRINTF(("%s: enqueue record = %d\n",
	2399	__func__, retval));
	2400	if (retval <= 0) {
	2401	error = retval * -1;
	2402	break;
	2403	}
	2404
	2405	/*
	2406	* Schedule transmission of pending state-change
	2407	* report up to RV times for this link. The timer
	2408	* will fire at the next mld_timeout (1 second)),
	2409	* giving us an opportunity to merge the reports.
	2410	*
	2411	* If a delay was provided to this function, only
	2412	* use this delay if sooner than the existing one.
	2413	*/
	2414	VERIFY(mli->mli_rv > 1);
	2415	inm->in6m_scrv = mli->mli_rv;
	2416	if (delay) {
	2417	if (inm->in6m_sctimer > 1) {
	2418	inm->in6m_sctimer =
	2419	min(inm->in6m_sctimer, delay);
	2420	} else
	2421	inm->in6m_sctimer = delay;
	2422	} else {
	2423	inm->in6m_sctimer = 1;
	2424	}
	2425	mtp->sct = 1;
	2426	error = 0;
	2427	break;
	2428	}
	2429	}
	2430	MLI_UNLOCK(mli);
	2431
	2432	/*
	2433	* Only update the T0 state if state change is atomic,
	2434	* i.e. we don't need to wait for a timer to fire before we
	2435	* can consider the state change to have been communicated.
	2436	*/
	2437	if (syncstates) {
	2438	in6m_commit(inm);
	2439	MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
	2440	ip6_sprintf(&inm->in6m_addr),
	2441	if_name(inm->in6m_ifp)));
	2442	}
	2443
	2444	return (error);
	2445	}
	2446
	2447	/*
	2448	* Issue an intermediate state change during the life-cycle.
	2449	*/
	2450	static int
	2451	mld_handle_state_change(struct in6_multi inm, struct mld_ifinfo mli,
	2452	struct mld_tparams *mtp)
	2453	{
	2454	struct ifnet *ifp;
	2455	int retval = 0;
	2456
	2457	IN6M_LOCK_ASSERT_HELD(inm);
	2458	MLI_LOCK_ASSERT_NOTHELD(mli);
	2459	VERIFY(mtp != NULL);
	2460
	2461	MLD_PRINTF(("%s: state change for %s on ifp 0x%llx(%s)\n",
	2462	__func__, ip6_sprintf(&inm->in6m_addr),
	2463	(uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
	2464	if_name(inm->in6m_ifp)));
	2465
	2466	ifp = inm->in6m_ifp;
	2467
	2468	MLI_LOCK(mli);
	2469	VERIFY(mli->mli_ifp == ifp);
	2470
	2471	if ((ifp->if_flags & IFF_LOOPBACK) \|\|
	2472	(mli->mli_flags & MLIF_SILENT) \|\|
	2473	!mld_is_addr_reported(&inm->in6m_addr) \|\|
	2474	(mli->mli_version != MLD_VERSION_2)) {
	2475	MLI_UNLOCK(mli);
	2476	if (!mld_is_addr_reported(&inm->in6m_addr)) {
	2477	MLD_PRINTF(("%s: not kicking state machine for silent "
	2478	"group\n", __func__));
	2479	}
	2480	MLD_PRINTF(("%s: nothing to do\n", __func__));
	2481	in6m_commit(inm);
	2482	MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
	2483	ip6_sprintf(&inm->in6m_addr),
	2484	if_name(inm->in6m_ifp)));
	2485	goto done;
	2486	}
	2487
	2488	IF_DRAIN(&inm->in6m_scq);
	2489
	2490	retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
	2491	(mli->mli_flags & MLIF_USEALLOW));
	2492	mtp->cst = (inm->in6m_scq.ifq_len > 0);
	2493	MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
	2494	if (retval <= 0) {
	2495	MLI_UNLOCK(mli);
	2496	retval *= -1;
	2497	goto done;
	2498	} else {
	2499	retval = 0;
	2500	}
	2501
	2502	/*
	2503	* If record(s) were enqueued, start the state-change
	2504	* report timer for this group.
	2505	*/
	2506	inm->in6m_scrv = mli->mli_rv;
	2507	inm->in6m_sctimer = 1;
	2508	mtp->sct = 1;
	2509	MLI_UNLOCK(mli);
	2510
	2511	done:
	2512	return (retval);
	2513	}
	2514
	2515	/*
	2516	* Perform the final leave for a multicast address.
	2517	*
	2518	* When leaving a group:
	2519	* MLDv1 sends a DONE message, if and only if we are the reporter.
	2520	* MLDv2 enqueues a state-change report containing a transition
	2521	* to INCLUDE {} for immediate transmission.
	2522	*/
	2523	static void
	2524	mld_final_leave(struct in6_multi inm, struct mld_ifinfo mli,
	2525	struct mld_tparams *mtp)
	2526	{
	2527	int syncstates = 1;
	2528
	2529	IN6M_LOCK_ASSERT_HELD(inm);
	2530	MLI_LOCK_ASSERT_NOTHELD(mli);
	2531	VERIFY(mtp != NULL);
	2532
	2533	MLD_PRINTF(("%s: final leave %s on ifp 0x%llx(%s)\n",
	2534	__func__, ip6_sprintf(&inm->in6m_addr),
	2535	(uint64_t)VM_KERNEL_ADDRPERM(inm->in6m_ifp),
	2536	if_name(inm->in6m_ifp)));
	2537
	2538	switch (inm->in6m_state) {
	2539	case MLD_NOT_MEMBER:
	2540	case MLD_SILENT_MEMBER:
	2541	case MLD_LEAVING_MEMBER:
	2542	/* Already leaving or left; do nothing. */
	2543	MLD_PRINTF(("%s: not kicking state machine for silent group\n",
	2544	__func__));
	2545	break;
	2546	case MLD_REPORTING_MEMBER:
	2547	case MLD_IDLE_MEMBER:
	2548	case MLD_G_QUERY_PENDING_MEMBER:
	2549	case MLD_SG_QUERY_PENDING_MEMBER:
	2550	MLI_LOCK(mli);
	2551	if (mli->mli_version == MLD_VERSION_1) {
	2552	if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER \|\|
	2553	inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
	2554	panic("%s: MLDv2 state reached, not MLDv2 "
	2555	"mode\n", __func__);
	2556	/* NOTREACHED */
	2557	}
	2558	/* scheduler timer if enqueue is successful */
	2559	mtp->cst = (mld_v1_transmit_report(inm,
	2560	MLD_LISTENER_DONE) == 0);
	2561
	2562	IN6M_LOCK_ASSERT_HELD(inm);
	2563	MLI_LOCK_ASSERT_HELD(mli);
	2564
	2565	inm->in6m_state = MLD_NOT_MEMBER;
	2566	} else if (mli->mli_version == MLD_VERSION_2) {
	2567	/*
	2568	* Stop group timer and all pending reports.
	2569	* Immediately enqueue a state-change report
	2570	* TO_IN {} to be sent on the next timeout,
	2571	* giving us an opportunity to merge reports.
	2572	*/
	2573	IF_DRAIN(&inm->in6m_scq);
	2574	inm->in6m_timer = 0;
	2575	inm->in6m_scrv = mli->mli_rv;
	2576	MLD_PRINTF(("%s: Leaving %s/%s with %d "
	2577	"pending retransmissions.\n", __func__,
	2578	ip6_sprintf(&inm->in6m_addr),
	2579	if_name(inm->in6m_ifp),
	2580	inm->in6m_scrv));
	2581	if (inm->in6m_scrv == 0) {
	2582	inm->in6m_state = MLD_NOT_MEMBER;
	2583	inm->in6m_sctimer = 0;
	2584	} else {
	2585	int retval;
	2586	/*
	2587	* Stick around in the in6_multihead list;
	2588	* the final detach will be issued by
	2589	* mld_v2_process_group_timers() when
	2590	* the retransmit timer expires.
	2591	*/
	2592	IN6M_ADDREF_LOCKED(inm);
	2593	VERIFY(inm->in6m_debug & IFD_ATTACHED);
	2594	inm->in6m_reqcnt++;
	2595	VERIFY(inm->in6m_reqcnt >= 1);
	2596	inm->in6m_nrelecnt++;
	2597	VERIFY(inm->in6m_nrelecnt != 0);
	2598
	2599	retval = mld_v2_enqueue_group_record(
	2600	&inm->in6m_scq, inm, 1, 0, 0,
	2601	(mli->mli_flags & MLIF_USEALLOW));
	2602	mtp->cst = (inm->in6m_scq.ifq_len > 0);
	2603	KASSERT(retval != 0,
	2604	("%s: enqueue record = %d\n", __func__,
	2605	retval));
	2606
	2607	inm->in6m_state = MLD_LEAVING_MEMBER;
	2608	inm->in6m_sctimer = 1;
	2609	mtp->sct = 1;
	2610	syncstates = 0;
	2611	}
	2612	}
	2613	MLI_UNLOCK(mli);
	2614	break;
	2615	case MLD_LAZY_MEMBER:
	2616	case MLD_SLEEPING_MEMBER:
	2617	case MLD_AWAKENING_MEMBER:
	2618	/* Our reports are suppressed; do nothing. */
	2619	break;
	2620	}
	2621
	2622	if (syncstates) {
	2623	in6m_commit(inm);
	2624	MLD_PRINTF(("%s: T1 -> T0 for %s/%s\n", __func__,
	2625	ip6_sprintf(&inm->in6m_addr),
	2626	if_name(inm->in6m_ifp)));
	2627	inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
	2628	MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for 0x%llx/%s\n",
	2629	__func__, (uint64_t)VM_KERNEL_ADDRPERM(&inm->in6m_addr),
	2630	if_name(inm->in6m_ifp)));
	2631	}
	2632	}
	2633
	2634	/*
	2635	* Enqueue an MLDv2 group record to the given output queue.
	2636	*
	2637	* If is_state_change is zero, a current-state record is appended.
	2638	* If is_state_change is non-zero, a state-change report is appended.
	2639	*
	2640	* If is_group_query is non-zero, an mbuf packet chain is allocated.
	2641	* If is_group_query is zero, and if there is a packet with free space
	2642	* at the tail of the queue, it will be appended to providing there
	2643	* is enough free space.
	2644	* Otherwise a new mbuf packet chain is allocated.
	2645	*
	2646	* If is_source_query is non-zero, each source is checked to see if
	2647	* it was recorded for a Group-Source query, and will be omitted if
	2648	* it is not both in-mode and recorded.
	2649	*
	2650	* If use_block_allow is non-zero, state change reports for initial join
	2651	* and final leave, on an inclusive mode group with a source list, will be
	2652	* rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
	2653	*
	2654	* The function will attempt to allocate leading space in the packet
	2655	* for the IPv6+ICMP headers to be prepended without fragmenting the chain.
	2656	*
	2657	* If successful the size of all data appended to the queue is returned,
	2658	* otherwise an error code less than zero is returned, or zero if
	2659	* no record(s) were appended.
	2660	*/
	2661	static int
	2662	mld_v2_enqueue_group_record(struct ifqueue ifq, struct in6_multi inm,
	2663	const int is_state_change, const int is_group_query,
	2664	const int is_source_query, const int use_block_allow)
	2665	{
	2666	struct mldv2_record mr;
	2667	struct mldv2_record *pmr;
	2668	struct ifnet *ifp;
	2669	struct ip6_msource ims, nims;
	2670	struct mbuf m0, m, *md;
	2671	int error, is_filter_list_change;
	2672	int minrec0len, m0srcs, msrcs, nbytes, off;
	2673	int record_has_sources;
	2674	int now;
	2675	int type;
	2676	uint8_t mode;
	2677
	2678	IN6M_LOCK_ASSERT_HELD(inm);
	2679	MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
	2680
	2681	error = 0;
	2682	ifp = inm->in6m_ifp;
	2683	is_filter_list_change = 0;
	2684	m = NULL;
	2685	m0 = NULL;
	2686	m0srcs = 0;
	2687	msrcs = 0;
	2688	nbytes = 0;
	2689	nims = NULL;
	2690	record_has_sources = 1;
	2691	pmr = NULL;
	2692	type = MLD_DO_NOTHING;
	2693	mode = inm->in6m_st[1].iss_fmode;
	2694
	2695	/*
	2696	* If we did not transition out of ASM mode during t0->t1,
	2697	* and there are no source nodes to process, we can skip
	2698	* the generation of source records.
	2699	*/
	2700	if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
	2701	inm->in6m_nsrc == 0)
	2702	record_has_sources = 0;
	2703
	2704	if (is_state_change) {
	2705	/*
	2706	* Queue a state change record.
	2707	* If the mode did not change, and there are non-ASM
	2708	* listeners or source filters present,
	2709	* we potentially need to issue two records for the group.
	2710	* If there are ASM listeners, and there was no filter
	2711	* mode transition of any kind, do nothing.
	2712	*
	2713	* If we are transitioning to MCAST_UNDEFINED, we need
	2714	* not send any sources. A transition to/from this state is
	2715	* considered inclusive with some special treatment.
	2716	*
	2717	* If we are rewriting initial joins/leaves to use
	2718	* ALLOW/BLOCK, and the group's membership is inclusive,
	2719	* we need to send sources in all cases.
	2720	*/
	2721	if (mode != inm->in6m_st[0].iss_fmode) {
	2722	if (mode == MCAST_EXCLUDE) {
	2723	MLD_PRINTF(("%s: change to EXCLUDE\n",
	2724	__func__));
	2725	type = MLD_CHANGE_TO_EXCLUDE_MODE;
	2726	} else {
	2727	MLD_PRINTF(("%s: change to INCLUDE\n",
	2728	__func__));
	2729	if (use_block_allow) {
	2730	/*
	2731	* XXX
	2732	* Here we're interested in state
	2733	* edges either direction between
	2734	* MCAST_UNDEFINED and MCAST_INCLUDE.
	2735	* Perhaps we should just check
	2736	* the group state, rather than
	2737	* the filter mode.
	2738	*/
	2739	if (mode == MCAST_UNDEFINED) {
	2740	type = MLD_BLOCK_OLD_SOURCES;
	2741	} else {
	2742	type = MLD_ALLOW_NEW_SOURCES;
	2743	}
	2744	} else {
	2745	type = MLD_CHANGE_TO_INCLUDE_MODE;
	2746	if (mode == MCAST_UNDEFINED)
	2747	record_has_sources = 0;
	2748	}
	2749	}
	2750	} else {
	2751	if (record_has_sources) {
	2752	is_filter_list_change = 1;
	2753	} else {
	2754	type = MLD_DO_NOTHING;
	2755	}
	2756	}
	2757	} else {
	2758	/*
	2759	* Queue a current state record.
	2760	*/
	2761	if (mode == MCAST_EXCLUDE) {
	2762	type = MLD_MODE_IS_EXCLUDE;
	2763	} else if (mode == MCAST_INCLUDE) {
	2764	type = MLD_MODE_IS_INCLUDE;
	2765	VERIFY(inm->in6m_st[1].iss_asm == 0);
	2766	}
	2767	}
	2768
	2769	/*
	2770	* Generate the filter list changes using a separate function.
	2771	*/
	2772	if (is_filter_list_change)
	2773	return (mld_v2_enqueue_filter_change(ifq, inm));
	2774
	2775	if (type == MLD_DO_NOTHING) {
	2776	MLD_PRINTF(("%s: nothing to do for %s/%s\n",
	2777	__func__, ip6_sprintf(&inm->in6m_addr),
	2778	if_name(inm->in6m_ifp)));
	2779	return (0);
	2780	}
	2781
	2782	/*
	2783	* If any sources are present, we must be able to fit at least
	2784	* one in the trailing space of the tail packet's mbuf,
	2785	* ideally more.
	2786	*/
	2787	minrec0len = sizeof(struct mldv2_record);
	2788	if (record_has_sources)
	2789	minrec0len += sizeof(struct in6_addr);
	2790	MLD_PRINTF(("%s: queueing %s for %s/%s\n", __func__,
	2791	mld_rec_type_to_str(type),
	2792	ip6_sprintf(&inm->in6m_addr),
	2793	if_name(inm->in6m_ifp)));
	2794
	2795	/*
	2796	* Check if we have a packet in the tail of the queue for this
	2797	* group into which the first group record for this group will fit.
	2798	* Otherwise allocate a new packet.
	2799	* Always allocate leading space for IP6+RA+ICMPV6+REPORT.
	2800	* Note: Group records for G/GSR query responses MUST be sent
	2801	* in their own packet.
	2802	*/
	2803	m0 = ifq->ifq_tail;
	2804	if (!is_group_query &&
	2805	m0 != NULL &&
	2806	(m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
	2807	(m0->m_pkthdr.len + minrec0len) <
	2808	(ifp->if_mtu - MLD_MTUSPACE)) {
	2809	m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
	2810	sizeof(struct mldv2_record)) /
	2811	sizeof(struct in6_addr);
	2812	m = m0;
	2813	MLD_PRINTF(("%s: use existing packet\n", __func__));
	2814	} else {
	2815	if (IF_QFULL(ifq)) {
	2816	MLD_PRINTF(("%s: outbound queue full\n", __func__));
	2817	return (-ENOMEM);
	2818	}
	2819	m = NULL;
	2820	m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
	2821	sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
	2822	if (!is_state_change && !is_group_query)
	2823	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
	2824	if (m == NULL)
	2825	m = m_gethdr(M_DONTWAIT, MT_DATA);
	2826	if (m == NULL)
	2827	return (-ENOMEM);
	2828
	2829	mld_save_context(m, ifp);
	2830
	2831	MLD_PRINTF(("%s: allocated first packet\n", __func__));
	2832	}
	2833
	2834	/*
	2835	* Append group record.
	2836	* If we have sources, we don't know how many yet.
	2837	*/
	2838	mr.mr_type = type;
	2839	mr.mr_datalen = 0;
	2840	mr.mr_numsrc = 0;
	2841	mr.mr_addr = inm->in6m_addr;
	2842	in6_clearscope(&mr.mr_addr);
	2843	if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
	2844	if (m != m0)
	2845	m_freem(m);
	2846	MLD_PRINTF(("%s: m_append() failed.\n", __func__));
	2847	return (-ENOMEM);
	2848	}
	2849	nbytes += sizeof(struct mldv2_record);
	2850
	2851	/*
	2852	* Append as many sources as will fit in the first packet.
	2853	* If we are appending to a new packet, the chain allocation
	2854	* may potentially use clusters; use m_getptr() in this case.
	2855	* If we are appending to an existing packet, we need to obtain
	2856	* a pointer to the group record after m_append(), in case a new
	2857	* mbuf was allocated.
	2858	*
	2859	* Only append sources which are in-mode at t1. If we are
	2860	* transitioning to MCAST_UNDEFINED state on the group, and
	2861	* use_block_allow is zero, do not include source entries.
	2862	* Otherwise, we need to include this source in the report.
	2863	*
	2864	* Only report recorded sources in our filter set when responding
	2865	* to a group-source query.
	2866	*/
	2867	if (record_has_sources) {
	2868	if (m == m0) {
	2869	md = m_last(m);
	2870	pmr = (struct mldv2_record )(mtod(md, uint8_t ) +
	2871	md->m_len - nbytes);
	2872	} else {
	2873	md = m_getptr(m, 0, &off);
	2874	pmr = (struct mldv2_record )(mtod(md, uint8_t ) +
	2875	off);
	2876	}
	2877	msrcs = 0;
	2878	RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
	2879	nims) {
	2880	MLD_PRINTF(("%s: visit node %s\n", __func__,
	2881	ip6_sprintf(&ims->im6s_addr)));
	2882	now = im6s_get_mode(inm, ims, 1);
	2883	MLD_PRINTF(("%s: node is %d\n", __func__, now));
	2884	if ((now != mode) \|\|
	2885	(now == mode &&
	2886	(!use_block_allow && mode == MCAST_UNDEFINED))) {
	2887	MLD_PRINTF(("%s: skip node\n", __func__));
	2888	continue;
	2889	}
	2890	if (is_source_query && ims->im6s_stp == 0) {
	2891	MLD_PRINTF(("%s: skip unrecorded node\n",
	2892	__func__));
	2893	continue;
	2894	}
	2895	MLD_PRINTF(("%s: append node\n", __func__));
	2896	if (!m_append(m, sizeof(struct in6_addr),
	2897	(void *)&ims->im6s_addr)) {
	2898	if (m != m0)
	2899	m_freem(m);
	2900	MLD_PRINTF(("%s: m_append() failed.\n",
	2901	__func__));
	2902	return (-ENOMEM);
	2903	}
	2904	nbytes += sizeof(struct in6_addr);
	2905	++msrcs;
	2906	if (msrcs == m0srcs)
	2907	break;
	2908	}
	2909	MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
	2910	msrcs));
	2911	pmr->mr_numsrc = htons(msrcs);
	2912	nbytes += (msrcs * sizeof(struct in6_addr));
	2913	}
	2914
	2915	if (is_source_query && msrcs == 0) {
	2916	MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
	2917	if (m != m0)
	2918	m_freem(m);
	2919	return (0);
	2920	}
	2921
	2922	/*
	2923	* We are good to go with first packet.
	2924	*/
	2925	if (m != m0) {
	2926	MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
	2927	m->m_pkthdr.vt_nrecs = 1;
	2928	IF_ENQUEUE(ifq, m);
	2929	} else {
	2930	m->m_pkthdr.vt_nrecs++;
	2931	}
	2932	/*
	2933	* No further work needed if no source list in packet(s).
	2934	*/
	2935	if (!record_has_sources)
	2936	return (nbytes);
	2937
	2938	/*
	2939	* Whilst sources remain to be announced, we need to allocate
	2940	* a new packet and fill out as many sources as will fit.
	2941	* Always try for a cluster first.
	2942	*/
	2943	while (nims != NULL) {
	2944	if (IF_QFULL(ifq)) {
	2945	MLD_PRINTF(("%s: outbound queue full\n", __func__));
	2946	return (-ENOMEM);
	2947	}
	2948	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
	2949	if (m == NULL)
	2950	m = m_gethdr(M_DONTWAIT, MT_DATA);
	2951	if (m == NULL)
	2952	return (-ENOMEM);
	2953	mld_save_context(m, ifp);
	2954	md = m_getptr(m, 0, &off);
	2955	pmr = (struct mldv2_record )(mtod(md, uint8_t ) + off);
	2956	MLD_PRINTF(("%s: allocated next packet\n", __func__));
	2957
	2958	if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
	2959	if (m != m0)
	2960	m_freem(m);
	2961	MLD_PRINTF(("%s: m_append() failed.\n", __func__));
	2962	return (-ENOMEM);
	2963	}
	2964	m->m_pkthdr.vt_nrecs = 1;
	2965	nbytes += sizeof(struct mldv2_record);
	2966
	2967	m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
	2968	sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
	2969
	2970	msrcs = 0;
	2971	RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
	2972	MLD_PRINTF(("%s: visit node %s\n",
	2973	__func__, ip6_sprintf(&ims->im6s_addr)));
	2974	now = im6s_get_mode(inm, ims, 1);
	2975	if ((now != mode) \|\|
	2976	(now == mode &&
	2977	(!use_block_allow && mode == MCAST_UNDEFINED))) {
	2978	MLD_PRINTF(("%s: skip node\n", __func__));
	2979	continue;
	2980	}
	2981	if (is_source_query && ims->im6s_stp == 0) {
	2982	MLD_PRINTF(("%s: skip unrecorded node\n",
	2983	__func__));
	2984	continue;
	2985	}
	2986	MLD_PRINTF(("%s: append node\n", __func__));
	2987	if (!m_append(m, sizeof(struct in6_addr),
	2988	(void *)&ims->im6s_addr)) {
	2989	if (m != m0)
	2990	m_freem(m);
	2991	MLD_PRINTF(("%s: m_append() failed.\n",
	2992	__func__));
	2993	return (-ENOMEM);
	2994	}
	2995	++msrcs;
	2996	if (msrcs == m0srcs)
	2997	break;
	2998	}
	2999	pmr->mr_numsrc = htons(msrcs);
	3000	nbytes += (msrcs * sizeof(struct in6_addr));
	3001
	3002	MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
	3003	IF_ENQUEUE(ifq, m);
	3004	}
	3005
	3006	return (nbytes);
	3007	}
	3008
	3009	/*
	3010	* Type used to mark record pass completion.
	3011	* We exploit the fact we can cast to this easily from the
	3012	* current filter modes on each ip_msource node.
	3013	*/
	3014	typedef enum {
	3015	REC_NONE = 0x00, /* MCAST_UNDEFINED */
	3016	REC_ALLOW = 0x01, /* MCAST_INCLUDE */
	3017	REC_BLOCK = 0x02, /* MCAST_EXCLUDE */
	3018	REC_FULL = REC_ALLOW \| REC_BLOCK
	3019	} rectype_t;
	3020
	3021	/*
	3022	* Enqueue an MLDv2 filter list change to the given output queue.
	3023	*
	3024	* Source list filter state is held in an RB-tree. When the filter list
	3025	* for a group is changed without changing its mode, we need to compute
	3026	* the deltas between T0 and T1 for each source in the filter set,
	3027	* and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
	3028	*
	3029	* As we may potentially queue two record types, and the entire R-B tree
	3030	* needs to be walked at once, we break this out into its own function
	3031	* so we can generate a tightly packed queue of packets.
	3032	*
	3033	* XXX This could be written to only use one tree walk, although that makes
	3034	* serializing into the mbuf chains a bit harder. For now we do two walks
	3035	* which makes things easier on us, and it may or may not be harder on
	3036	* the L2 cache.
	3037	*
	3038	* If successful the size of all data appended to the queue is returned,
	3039	* otherwise an error code less than zero is returned, or zero if
	3040	* no record(s) were appended.
	3041	*/
	3042	static int
	3043	mld_v2_enqueue_filter_change(struct ifqueue ifq, struct in6_multi inm)
	3044	{
	3045	static const int MINRECLEN =
	3046	sizeof(struct mldv2_record) + sizeof(struct in6_addr);
	3047	struct ifnet *ifp;
	3048	struct mldv2_record mr;
	3049	struct mldv2_record *pmr;
	3050	struct ip6_msource ims, nims;
	3051	struct mbuf m, m0, *md;
	3052	int m0srcs, nbytes, npbytes, off, rsrcs, schanged;
	3053	int nallow, nblock;
	3054	uint8_t mode, now, then;
	3055	rectype_t crt, drt, nrt;
	3056
	3057	IN6M_LOCK_ASSERT_HELD(inm);
	3058
	3059	if (inm->in6m_nsrc == 0 \|\|
	3060	(inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
	3061	return (0);
	3062
	3063	ifp = inm->in6m_ifp; /* interface */
	3064	mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */
	3065	crt = REC_NONE; /* current group record type */
	3066	drt = REC_NONE; /* mask of completed group record types */
	3067	nrt = REC_NONE; /* record type for current node */
	3068	m0srcs = 0; /* # source which will fit in current mbuf chain */
	3069	npbytes = 0; /* # of bytes appended this packet */
	3070	nbytes = 0; /* # of bytes appended to group's state-change queue */
	3071	rsrcs = 0; /* # sources encoded in current record */
	3072	schanged = 0; /* # nodes encoded in overall filter change */
	3073	nallow = 0; /* # of source entries in ALLOW_NEW */
	3074	nblock = 0; /* # of source entries in BLOCK_OLD */
	3075	nims = NULL; /* next tree node pointer */
	3076
	3077	/*
	3078	* For each possible filter record mode.
	3079	* The first kind of source we encounter tells us which
	3080	* is the first kind of record we start appending.
	3081	* If a node transitioned to UNDEFINED at t1, its mode is treated
	3082	* as the inverse of the group's filter mode.
	3083	*/
	3084	while (drt != REC_FULL) {
	3085	do {
	3086	m0 = ifq->ifq_tail;
	3087	if (m0 != NULL &&
	3088	(m0->m_pkthdr.vt_nrecs + 1 <=
	3089	MLD_V2_REPORT_MAXRECS) &&
	3090	(m0->m_pkthdr.len + MINRECLEN) <
	3091	(ifp->if_mtu - MLD_MTUSPACE)) {
	3092	m = m0;
	3093	m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
	3094	sizeof(struct mldv2_record)) /
	3095	sizeof(struct in6_addr);
	3096	MLD_PRINTF(("%s: use previous packet\n",
	3097	__func__));
	3098	} else {
	3099	m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
	3100	if (m == NULL)
	3101	m = m_gethdr(M_DONTWAIT, MT_DATA);
	3102	if (m == NULL) {
	3103	MLD_PRINTF(("%s: m_get*() failed\n",
	3104	__func__));
	3105	return (-ENOMEM);
	3106	}
	3107	m->m_pkthdr.vt_nrecs = 0;
	3108	mld_save_context(m, ifp);
	3109	m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
	3110	sizeof(struct mldv2_record)) /
	3111	sizeof(struct in6_addr);
	3112	npbytes = 0;
	3113	MLD_PRINTF(("%s: allocated new packet\n",
	3114	__func__));
	3115	}
	3116	/*
	3117	* Append the MLD group record header to the
	3118	* current packet's data area.
	3119	* Recalculate pointer to free space for next
	3120	* group record, in case m_append() allocated
	3121	* a new mbuf or cluster.
	3122	*/
	3123	memset(&mr, 0, sizeof(mr));
	3124	mr.mr_addr = inm->in6m_addr;
	3125	in6_clearscope(&mr.mr_addr);
	3126	if (!m_append(m, sizeof(mr), (void *)&mr)) {
	3127	if (m != m0)
	3128	m_freem(m);
	3129	MLD_PRINTF(("%s: m_append() failed\n",
	3130	__func__));
	3131	return (-ENOMEM);
	3132	}
	3133	npbytes += sizeof(struct mldv2_record);
	3134	if (m != m0) {
	3135	/* new packet; offset in chain */
	3136	md = m_getptr(m, npbytes -
	3137	sizeof(struct mldv2_record), &off);
	3138	pmr = (struct mldv2_record *)(mtod(md,
	3139	uint8_t *) + off);
	3140	} else {
	3141	/* current packet; offset from last append */
	3142	md = m_last(m);
	3143	pmr = (struct mldv2_record *)(mtod(md,
	3144	uint8_t *) + md->m_len -
	3145	sizeof(struct mldv2_record));
	3146	}
	3147	/*
	3148	* Begin walking the tree for this record type
	3149	* pass, or continue from where we left off
	3150	* previously if we had to allocate a new packet.
	3151	* Only report deltas in-mode at t1.
	3152	* We need not report included sources as allowed
	3153	* if we are in inclusive mode on the group,
	3154	* however the converse is not true.
	3155	*/
	3156	rsrcs = 0;
	3157	if (nims == NULL) {
	3158	nims = RB_MIN(ip6_msource_tree,
	3159	&inm->in6m_srcs);
	3160	}
	3161	RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
	3162	MLD_PRINTF(("%s: visit node %s\n", __func__,
	3163	ip6_sprintf(&ims->im6s_addr)));
	3164	now = im6s_get_mode(inm, ims, 1);
	3165	then = im6s_get_mode(inm, ims, 0);
	3166	MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
	3167	__func__, then, now));
	3168	if (now == then) {
	3169	MLD_PRINTF(("%s: skip unchanged\n",
	3170	__func__));
	3171	continue;
	3172	}
	3173	if (mode == MCAST_EXCLUDE &&
	3174	now == MCAST_INCLUDE) {
	3175	MLD_PRINTF(("%s: skip IN src on EX "
	3176	"group\n", __func__));
	3177	continue;
	3178	}
	3179	nrt = (rectype_t)now;
	3180	if (nrt == REC_NONE)
	3181	nrt = (rectype_t)(~mode & REC_FULL);
	3182	if (schanged++ == 0) {
	3183	crt = nrt;
	3184	} else if (crt != nrt)
	3185	continue;
	3186	if (!m_append(m, sizeof(struct in6_addr),
	3187	(void *)&ims->im6s_addr)) {
	3188	if (m != m0)
	3189	m_freem(m);
	3190	MLD_PRINTF(("%s: m_append() failed\n",
	3191	__func__));
	3192	return (-ENOMEM);
	3193	}
	3194	nallow += !!(crt == REC_ALLOW);
	3195	nblock += !!(crt == REC_BLOCK);
	3196	if (++rsrcs == m0srcs)
	3197	break;
	3198	}
	3199	/*
	3200	* If we did not append any tree nodes on this
	3201	* pass, back out of allocations.
	3202	*/
	3203	if (rsrcs == 0) {
	3204	npbytes -= sizeof(struct mldv2_record);
	3205	if (m != m0) {
	3206	MLD_PRINTF(("%s: m_free(m)\n",
	3207	__func__));
	3208	m_freem(m);
	3209	} else {
	3210	MLD_PRINTF(("%s: m_adj(m, -mr)\n",
	3211	__func__));
	3212	m_adj(m, -((int)sizeof(
	3213	struct mldv2_record)));
	3214	}
	3215	continue;
	3216	}
	3217	npbytes += (rsrcs * sizeof(struct in6_addr));
	3218	if (crt == REC_ALLOW)
	3219	pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
	3220	else if (crt == REC_BLOCK)
	3221	pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
	3222	pmr->mr_numsrc = htons(rsrcs);
	3223	/*
	3224	* Count the new group record, and enqueue this
	3225	* packet if it wasn't already queued.
	3226	*/
	3227	m->m_pkthdr.vt_nrecs++;
	3228	if (m != m0)
	3229	IF_ENQUEUE(ifq, m);
	3230	nbytes += npbytes;
	3231	} while (nims != NULL);
	3232	drt \|= crt;
	3233	crt = (~crt & REC_FULL);
	3234	}
	3235
	3236	MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
	3237	nallow, nblock));
	3238
	3239	return (nbytes);
	3240	}
	3241
	3242	static int
	3243	mld_v2_merge_state_changes(struct in6_multi inm, struct ifqueue ifscq)
	3244	{
	3245	struct ifqueue *gq;
	3246	struct mbuf m; / pending state-change */
	3247	struct mbuf m0; / copy of pending state-change */
	3248	struct mbuf mt; / last state-change in packet */
	3249	struct mbuf *n;
	3250	int docopy, domerge;
	3251	u_int recslen;
	3252
	3253	IN6M_LOCK_ASSERT_HELD(inm);
	3254
	3255	docopy = 0;
	3256	domerge = 0;
	3257	recslen = 0;
	3258
	3259	/*
	3260	* If there are further pending retransmissions, make a writable
	3261	* copy of each queued state-change message before merging.
	3262	*/
	3263	if (inm->in6m_scrv > 0)
	3264	docopy = 1;
	3265
	3266	gq = &inm->in6m_scq;
	3267	#ifdef MLD_DEBUG
	3268	if (gq->ifq_head == NULL) {
	3269	MLD_PRINTF(("%s: WARNING: queue for inm 0x%llx is empty\n",
	3270	__func__, (uint64_t)VM_KERNEL_ADDRPERM(inm)));
	3271	}
	3272	#endif
	3273
	3274	/*
	3275	* Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
	3276	* packet might not always be at the head of the ifqueue.
	3277	*/
	3278	m = gq->ifq_head;
	3279	while (m != NULL) {
	3280	/*
	3281	* Only merge the report into the current packet if
	3282	* there is sufficient space to do so; an MLDv2 report
	3283	* packet may only contain 65,535 group records.
	3284	* Always use a simple mbuf chain concatentation to do this,
	3285	* as large state changes for single groups may have
	3286	* allocated clusters.
	3287	*/
	3288	domerge = 0;
	3289	mt = ifscq->ifq_tail;
	3290	if (mt != NULL) {
	3291	recslen = m_length(m);
	3292
	3293	if ((mt->m_pkthdr.vt_nrecs +
	3294	m->m_pkthdr.vt_nrecs <=
	3295	MLD_V2_REPORT_MAXRECS) &&
	3296	(mt->m_pkthdr.len + recslen <=
	3297	(inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
	3298	domerge = 1;
	3299	}
	3300
	3301	if (!domerge && IF_QFULL(gq)) {
	3302	MLD_PRINTF(("%s: outbound queue full, skipping whole "
	3303	"packet 0x%llx\n", __func__,
	3304	(uint64_t)VM_KERNEL_ADDRPERM(m)));
	3305	n = m->m_nextpkt;
	3306	if (!docopy) {
	3307	IF_REMQUEUE(gq, m);
	3308	m_freem(m);
	3309	}
	3310	m = n;
	3311	continue;
	3312	}
	3313
	3314	if (!docopy) {
	3315	MLD_PRINTF(("%s: dequeueing 0x%llx\n", __func__,
	3316	(uint64_t)VM_KERNEL_ADDRPERM(m)));
	3317	n = m->m_nextpkt;
	3318	IF_REMQUEUE(gq, m);
	3319	m0 = m;
	3320	m = n;
	3321	} else {
	3322	MLD_PRINTF(("%s: copying 0x%llx\n", __func__,
	3323	(uint64_t)VM_KERNEL_ADDRPERM(m)));
	3324	m0 = m_dup(m, M_NOWAIT);
	3325	if (m0 == NULL)
	3326	return (ENOMEM);
	3327	m0->m_nextpkt = NULL;
	3328	m = m->m_nextpkt;
	3329	}
	3330
	3331	if (!domerge) {
	3332	MLD_PRINTF(("%s: queueing 0x%llx to ifscq 0x%llx)\n",
	3333	__func__, (uint64_t)VM_KERNEL_ADDRPERM(m0),
	3334	(uint64_t)VM_KERNEL_ADDRPERM(ifscq)));
	3335	IF_ENQUEUE(ifscq, m0);
	3336	} else {
	3337	struct mbuf mtl; / last mbuf of packet mt */
	3338
	3339	MLD_PRINTF(("%s: merging 0x%llx with ifscq tail "
	3340	"0x%llx)\n", __func__,
	3341	(uint64_t)VM_KERNEL_ADDRPERM(m0),
	3342	(uint64_t)VM_KERNEL_ADDRPERM(mt)));
	3343
	3344	mtl = m_last(mt);
	3345	m0->m_flags &= ~M_PKTHDR;
	3346	mt->m_pkthdr.len += recslen;
	3347	mt->m_pkthdr.vt_nrecs +=
	3348	m0->m_pkthdr.vt_nrecs;
	3349
	3350	mtl->m_next = m0;
	3351	}
	3352	}
	3353
	3354	return (0);
	3355	}
	3356
	3357	/*
	3358	* Respond to a pending MLDv2 General Query.
	3359	*/
	3360	static uint32_t
	3361	mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
	3362	{
	3363	struct ifnet *ifp;
	3364	struct in6_multi *inm;
	3365	struct in6_multistep step;
	3366	int retval;
	3367
	3368	MLI_LOCK_ASSERT_HELD(mli);
	3369
	3370	VERIFY(mli->mli_version == MLD_VERSION_2);
	3371
	3372	ifp = mli->mli_ifp;
	3373	MLI_UNLOCK(mli);
	3374
	3375	in6_multihead_lock_shared();
	3376	IN6_FIRST_MULTI(step, inm);
	3377	while (inm != NULL) {
	3378	IN6M_LOCK(inm);
	3379	if (inm->in6m_ifp != ifp)
	3380	goto next;
	3381
	3382	switch (inm->in6m_state) {
	3383	case MLD_NOT_MEMBER:
	3384	case MLD_SILENT_MEMBER:
	3385	break;
	3386	case MLD_REPORTING_MEMBER:
	3387	case MLD_IDLE_MEMBER:
	3388	case MLD_LAZY_MEMBER:
	3389	case MLD_SLEEPING_MEMBER:
	3390	case MLD_AWAKENING_MEMBER:
	3391	inm->in6m_state = MLD_REPORTING_MEMBER;
	3392	MLI_LOCK(mli);
	3393	retval = mld_v2_enqueue_group_record(&mli->mli_gq,
	3394	inm, 0, 0, 0, 0);
	3395	MLI_UNLOCK(mli);
	3396	MLD_PRINTF(("%s: enqueue record = %d\n",
	3397	__func__, retval));
	3398	break;
	3399	case MLD_G_QUERY_PENDING_MEMBER:
	3400	case MLD_SG_QUERY_PENDING_MEMBER:
	3401	case MLD_LEAVING_MEMBER:
	3402	break;
	3403	}
	3404	next:
	3405	IN6M_UNLOCK(inm);
	3406	IN6_NEXT_MULTI(step, inm);
	3407	}
	3408	in6_multihead_lock_done();
	3409
	3410	MLI_LOCK(mli);
	3411	mld_dispatch_queue(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
	3412	MLI_LOCK_ASSERT_HELD(mli);
	3413
	3414	/*
	3415	* Slew transmission of bursts over 1 second intervals.
	3416	*/
	3417	if (mli->mli_gq.ifq_head != NULL) {
	3418	mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
	3419	MLD_RESPONSE_BURST_INTERVAL);
	3420	}
	3421
	3422	return (mli->mli_v2_timer);
	3423	}
	3424
	3425	/*
	3426	* Transmit the next pending message in the output queue.
	3427	*
	3428	* Must not be called with in6m_lockm or mli_lock held.
	3429	*/
	3430	static void
	3431	mld_dispatch_packet(struct mbuf *m)
	3432	{
	3433	struct ip6_moptions *im6o;
	3434	struct ifnet *ifp;
	3435	struct ifnet *oifp = NULL;
	3436	struct mbuf *m0;
	3437	struct mbuf *md;
	3438	struct ip6_hdr *ip6;
	3439	struct mld_hdr *mld;
	3440	int error;
	3441	int off;
	3442	int type;
	3443
	3444	MLD_PRINTF(("%s: transmit 0x%llx\n", __func__,
	3445	(uint64_t)VM_KERNEL_ADDRPERM(m)));
	3446
	3447	/*
	3448	* Check if the ifnet is still attached.
	3449	*/
	3450	ifp = mld_restore_context(m);
	3451	if (ifp == NULL \|\| !ifnet_is_attached(ifp, 0)) {
	3452	MLD_PRINTF(("%s: dropped 0x%llx as ifindex %u went away.\n",
	3453	__func__, (uint64_t)VM_KERNEL_ADDRPERM(m),
	3454	(u_int)if_index));
	3455	m_freem(m);
	3456	ip6stat.ip6s_noroute++;
	3457	return;
	3458	}
	3459
	3460	im6o = ip6_allocmoptions(M_WAITOK);
	3461	if (im6o == NULL) {
	3462	m_freem(m);
	3463	return;
	3464	}
	3465
	3466	im6o->im6o_multicast_hlim = 1;
	3467	im6o->im6o_multicast_loop = 0;
	3468	im6o->im6o_multicast_ifp = ifp;
	3469
	3470	if (m->m_flags & M_MLDV1) {
	3471	m0 = m;
	3472	} else {
	3473	m0 = mld_v2_encap_report(ifp, m);
	3474	if (m0 == NULL) {
	3475	MLD_PRINTF(("%s: dropped 0x%llx\n", __func__,
	3476	(uint64_t)VM_KERNEL_ADDRPERM(m)));
	3477	/*
	3478	* mld_v2_encap_report() has already freed our mbuf.
	3479	*/
	3480	IM6O_REMREF(im6o);
	3481	ip6stat.ip6s_odropped++;
	3482	return;
	3483	}
	3484	}
	3485
	3486	mld_scrub_context(m0);
	3487	m->m_flags &= ~(M_PROTOFLAGS);
	3488	m0->m_pkthdr.rcvif = lo_ifp;
	3489
	3490	ip6 = mtod(m0, struct ip6_hdr *);
	3491	(void) in6_setscope(&ip6->ip6_dst, ifp, NULL);
	3492
	3493	/*
	3494	* Retrieve the ICMPv6 type before handoff to ip6_output(),
	3495	* so we can bump the stats.
	3496	*/
	3497	md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
	3498	mld = (struct mld_hdr )(mtod(md, uint8_t ) + off);
	3499	type = mld->mld_type;
	3500
	3501	if (ifp->if_eflags & IFEF_TXSTART) {
	3502	/*
	3503	* Use control service class if the outgoing
	3504	* interface supports transmit-start model.
	3505	*/
	3506	(void) m_set_service_class(m0, MBUF_SC_CTL);
	3507	}
	3508
	3509	error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
	3510	&oifp, NULL);
	3511
	3512	IM6O_REMREF(im6o);
	3513
	3514	if (error) {
	3515	MLD_PRINTF(("%s: ip6_output(0x%llx) = %d\n", __func__,
	3516	(uint64_t)VM_KERNEL_ADDRPERM(m0), error));
	3517	if (oifp != NULL)
	3518	ifnet_release(oifp);
	3519	return;
	3520	}
	3521
	3522	icmp6stat.icp6s_outhist[type]++;
	3523	if (oifp != NULL) {
	3524	icmp6_ifstat_inc(oifp, ifs6_out_msg);
	3525	switch (type) {
	3526	case MLD_LISTENER_REPORT:
	3527	case MLDV2_LISTENER_REPORT:
	3528	icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
	3529	break;
	3530	case MLD_LISTENER_DONE:
	3531	icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
	3532	break;
	3533	}
	3534	ifnet_release(oifp);
	3535	}
	3536	}
	3537
	3538	/*
	3539	* Encapsulate an MLDv2 report.
	3540	*
	3541	* KAME IPv6 requires that hop-by-hop options be passed separately,
	3542	* and that the IPv6 header be prepended in a separate mbuf.
	3543	*
	3544	* Returns a pointer to the new mbuf chain head, or NULL if the
	3545	* allocation failed.
	3546	*/
	3547	static struct mbuf *
	3548	mld_v2_encap_report(struct ifnet ifp, struct mbuf m)
	3549	{
	3550	struct mbuf *mh;
	3551	struct mldv2_report *mld;
	3552	struct ip6_hdr *ip6;
	3553	struct in6_ifaddr *ia;
	3554	int mldreclen;
	3555
	3556	VERIFY(m->m_flags & M_PKTHDR);
	3557
	3558	/*
	3559	* RFC3590: OK to send as :: or tentative during DAD.
	3560	*/
	3561	ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY\|IN6_IFF_ANYCAST);
	3562	if (ia == NULL)
	3563	MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
	3564
	3565	MGETHDR(mh, M_DONTWAIT, MT_HEADER);
	3566	if (mh == NULL) {
	3567	if (ia != NULL)
	3568	IFA_REMREF(&ia->ia_ifa);
	3569	m_freem(m);
	3570	return (NULL);
	3571	}
	3572	MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
	3573
	3574	mldreclen = m_length(m);
	3575	MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
	3576
	3577	mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
	3578	mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
	3579	sizeof(struct mldv2_report) + mldreclen;
	3580
	3581	ip6 = mtod(mh, struct ip6_hdr *);
	3582	ip6->ip6_flow = 0;
	3583	ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
	3584	ip6->ip6_vfc \|= IPV6_VERSION;
	3585	ip6->ip6_nxt = IPPROTO_ICMPV6;
	3586	if (ia != NULL)
	3587	IFA_LOCK(&ia->ia_ifa);
	3588	ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
	3589	if (ia != NULL) {
	3590	IFA_UNLOCK(&ia->ia_ifa);
	3591	IFA_REMREF(&ia->ia_ifa);
	3592	ia = NULL;
	3593	}
	3594	ip6->ip6_dst = in6addr_linklocal_allv2routers;
	3595	/* scope ID will be set in netisr */
	3596
	3597	mld = (struct mldv2_report *)(ip6 + 1);
	3598	mld->mld_type = MLDV2_LISTENER_REPORT;
	3599	mld->mld_code = 0;
	3600	mld->mld_cksum = 0;
	3601	mld->mld_v2_reserved = 0;
	3602	mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
	3603	m->m_pkthdr.vt_nrecs = 0;
	3604	m->m_flags &= ~M_PKTHDR;
	3605
	3606	mh->m_next = m;
	3607	mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
	3608	sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
	3609	return (mh);
	3610	}
	3611
	3612	#ifdef MLD_DEBUG
	3613	static const char *
	3614	mld_rec_type_to_str(const int type)
	3615	{
	3616	switch (type) {
	3617	case MLD_CHANGE_TO_EXCLUDE_MODE:
	3618	return "TO_EX";
	3619	break;
	3620	case MLD_CHANGE_TO_INCLUDE_MODE:
	3621	return "TO_IN";
	3622	break;
	3623	case MLD_MODE_IS_EXCLUDE:
	3624	return "MODE_EX";
	3625	break;
	3626	case MLD_MODE_IS_INCLUDE:
	3627	return "MODE_IN";
	3628	break;
	3629	case MLD_ALLOW_NEW_SOURCES:
	3630	return "ALLOW_NEW";
	3631	break;
	3632	case MLD_BLOCK_OLD_SOURCES:
	3633	return "BLOCK_OLD";
	3634	break;
	3635	default:
	3636	break;
	3637	}
	3638	return "unknown";
	3639	}
	3640	#endif
	3641
	3642	void
	3643	mld_init(void)
	3644	{
	3645
	3646	MLD_PRINTF(("%s: initializing\n", __func__));
	3647
	3648	/* Setup lock group and attribute for mld_mtx */
	3649	mld_mtx_grp_attr = lck_grp_attr_alloc_init();
	3650	mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
	3651	mld_mtx_attr = lck_attr_alloc_init();
	3652	lck_mtx_init(&mld_mtx, mld_mtx_grp, mld_mtx_attr);
	3653
	3654	ip6_initpktopts(&mld_po);
	3655	mld_po.ip6po_hlim = 1;
	3656	mld_po.ip6po_hbh = &mld_ra.hbh;
	3657	mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
	3658	mld_po.ip6po_flags = IP6PO_DONTFRAG;
	3659	LIST_INIT(&mli_head);
	3660
	3661	mli_size = sizeof (struct mld_ifinfo);
	3662	mli_zone = zinit(mli_size, MLI_ZONE_MAX * mli_size,
	3663	0, MLI_ZONE_NAME);
	3664	if (mli_zone == NULL) {
	3665	panic("%s: failed allocating %s", __func__, MLI_ZONE_NAME);
	3666	/* NOTREACHED */
	3667	}
	3668	zone_change(mli_zone, Z_EXPAND, TRUE);
	3669	zone_change(mli_zone, Z_CALLERACCT, FALSE);
	3670	}