]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/mld6.c
7cedd2e22163f08207acadec834d3a448c2c7f30
[apple/xnu.git] / bsd / netinet6 / mld6.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*-
29 * Copyright (c) 2009 Bruce Simpson.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. The name of the author may not be used to endorse or promote
40 * products derived from this software without specific prior written
41 * permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
44 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
47 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53 * SUCH DAMAGE.
54 */
55
56 /*
57 * Copyright (c) 1988 Stephen Deering.
58 * Copyright (c) 1992, 1993
59 * The Regents of the University of California. All rights reserved.
60 *
61 * This code is derived from software contributed to Berkeley by
62 * Stephen Deering of Stanford University.
63 *
64 * Redistribution and use in source and binary forms, with or without
65 * modification, are permitted provided that the following conditions
66 * are met:
67 * 1. Redistributions of source code must retain the above copyright
68 * notice, this list of conditions and the following disclaimer.
69 * 2. Redistributions in binary form must reproduce the above copyright
70 * notice, this list of conditions and the following disclaimer in the
71 * documentation and/or other materials provided with the distribution.
72 * 3. All advertising materials mentioning features or use of this software
73 * must display the following acknowledgement:
74 * This product includes software developed by the University of
75 * California, Berkeley and its contributors.
76 * 4. Neither the name of the University nor the names of its contributors
77 * may be used to endorse or promote products derived from this software
78 * without specific prior written permission.
79 *
80 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
81 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
82 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
83 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
84 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
85 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
86 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
87 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
88 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
89 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
90 * SUCH DAMAGE.
91 *
92 * @(#)igmp.c 8.1 (Berkeley) 7/19/93
93 */
94 /*
95 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
96 * support for mandatory and extensible security protections. This notice
97 * is included in support of clause 2.2 (b) of the Apple Public License,
98 * Version 2.0.
99 */
100
101 #include <sys/cdefs.h>
102
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/mbuf.h>
106 #include <sys/socket.h>
107 #include <sys/protosw.h>
108 #include <sys/sysctl.h>
109 #include <sys/kernel.h>
110 #include <sys/malloc.h>
111 #include <sys/mcache.h>
112
113 #include <kern/zalloc.h>
114
115 #include <net/if.h>
116 #include <net/route.h>
117
118 #include <netinet/in.h>
119 #include <netinet/in_var.h>
120 #include <netinet6/in6_var.h>
121 #include <netinet/ip6.h>
122 #include <netinet6/ip6_var.h>
123 #include <netinet6/scope6_var.h>
124 #include <netinet/icmp6.h>
125 #include <netinet6/mld6.h>
126 #include <netinet6/mld6_var.h>
127
128 /* Lock group and attribute for mld_mtx */
129 static lck_attr_t *mld_mtx_attr;
130 static lck_grp_t *mld_mtx_grp;
131 static lck_grp_attr_t *mld_mtx_grp_attr;
132
133 /*
134 * Locking and reference counting:
135 *
136 * mld_mtx mainly protects mli_head. In cases where both mld_mtx and
137 * in6_multihead_lock must be held, the former must be acquired first in order
138 * to maintain lock ordering. It is not a requirement that mld_mtx be
139 * acquired first before in6_multihead_lock, but in case both must be acquired
140 * in succession, the correct lock ordering must be followed.
141 *
142 * Instead of walking the if_multiaddrs list at the interface and returning
143 * the ifma_protospec value of a matching entry, we search the global list
144 * of in6_multi records and find it that way; this is done with in6_multihead
145 * lock held. Doing so avoids the race condition issues that many other BSDs
146 * suffer from (therefore in our implementation, ifma_protospec will never be
147 * NULL for as long as the in6_multi is valid.)
148 *
149 * The above creates a requirement for the in6_multi to stay in in6_multihead
150 * list even after the final MLD leave (in MLDv2 mode) until no longer needs
151 * be retransmitted (this is not required for MLDv1.) In order to handle
152 * this, the request and reference counts of the in6_multi are bumped up when
153 * the state changes to MLD_LEAVING_MEMBER, and later dropped in the timeout
154 * handler. Each in6_multi holds a reference to the underlying mld_ifinfo.
155 *
156 * Thus, the permitted lock oder is:
157 *
158 * mld_mtx, in6_multihead_lock, inm6_lock, mli_lock
159 *
160 * Any may be taken independently, but if any are held at the same time,
161 * the above lock order must be followed.
162 */
163 static decl_lck_mtx_data(, mld_mtx);
164
165 SLIST_HEAD(mld_in6m_relhead, in6_multi);
166
167 static void mli_initvar(struct mld_ifinfo *, struct ifnet *, int);
168 static struct mld_ifinfo *mli_alloc(int);
169 static void mli_free(struct mld_ifinfo *);
170 static void mli_delete(const struct ifnet *, struct mld_in6m_relhead *);
171 static void mld_dispatch_packet(struct mbuf *);
172 static void mld_final_leave(struct in6_multi *, struct mld_ifinfo *);
173 static int mld_handle_state_change(struct in6_multi *,
174 struct mld_ifinfo *);
175 static int mld_initial_join(struct in6_multi *, struct mld_ifinfo *,
176 const int);
177 #ifdef MLD_DEBUG
178 static const char * mld_rec_type_to_str(const int);
179 #endif
180 static void mld_set_version(struct mld_ifinfo *, const int);
181 static void mld_flush_relq(struct mld_ifinfo *, struct mld_in6m_relhead *);
182 static void mld_dispatch_queue(struct mld_ifinfo *, struct ifqueue *, int);
183 static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *,
184 /*const*/ struct mld_hdr *);
185 static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *,
186 /*const*/ struct mld_hdr *);
187 static void mld_v1_process_group_timer(struct in6_multi *, const int);
188 static void mld_v1_process_querier_timers(struct mld_ifinfo *);
189 static int mld_v1_transmit_report(struct in6_multi *, const int);
190 static void mld_v1_update_group(struct in6_multi *, const int);
191 static void mld_v2_cancel_link_timers(struct mld_ifinfo *);
192 static void mld_v2_dispatch_general_query(struct mld_ifinfo *);
193 static struct mbuf *
194 mld_v2_encap_report(struct ifnet *, struct mbuf *);
195 static int mld_v2_enqueue_filter_change(struct ifqueue *,
196 struct in6_multi *);
197 static int mld_v2_enqueue_group_record(struct ifqueue *,
198 struct in6_multi *, const int, const int, const int,
199 const int);
200 static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *,
201 struct mbuf *, const int, const int);
202 static int mld_v2_merge_state_changes(struct in6_multi *,
203 struct ifqueue *);
204 static void mld_v2_process_group_timers(struct mld_ifinfo *,
205 struct ifqueue *, struct ifqueue *,
206 struct in6_multi *, const int);
207 static int mld_v2_process_group_query(struct in6_multi *,
208 int, struct mbuf *, const int);
209 static int sysctl_mld_gsr SYSCTL_HANDLER_ARGS;
210 static int sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS;
211
212 /*
213 * Normative references: RFC 2710, RFC 3590, RFC 3810.
214 *
215 * XXX LOR PREVENTION
216 * A special case for IPv6 is the in6_setscope() routine. ip6_output()
217 * will not accept an ifp; it wants an embedded scope ID, unlike
218 * ip_output(), which happily takes the ifp given to it. The embedded
219 * scope ID is only used by MLD to select the outgoing interface.
220 *
221 * As such, we exploit the fact that the scope ID is just the interface
222 * index, and embed it in the IPv6 destination address accordingly.
223 * This is potentially NOT VALID for MLDv1 reports, as they
224 * are always sent to the multicast group itself; as MLDv2
225 * reports are always sent to ff02::16, this is not an issue
226 * when MLDv2 is in use.
227 */
228
229 #define MLD_EMBEDSCOPE(pin6, zoneid) \
230 (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF)
231
232 static struct timeval mld_gsrdelay = {10, 0};
233 static LIST_HEAD(, mld_ifinfo) mli_head;
234
235 static int interface_timers_running6;
236 static int state_change_timers_running6;
237 static int current_state_timers_running6;
238
239 #define MLD_LOCK() \
240 lck_mtx_lock(&mld_mtx)
241 #define MLD_LOCK_ASSERT_HELD() \
242 lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_OWNED)
243 #define MLD_LOCK_ASSERT_NOTHELD() \
244 lck_mtx_assert(&mld_mtx, LCK_MTX_ASSERT_NOTOWNED)
245 #define MLD_UNLOCK() \
246 lck_mtx_unlock(&mld_mtx)
247
248 #define MLD_ADD_DETACHED_IN6M(_head, _in6m) { \
249 SLIST_INSERT_HEAD(_head, _in6m, in6m_dtle); \
250 }
251
252 #define MLD_REMOVE_DETACHED_IN6M(_head) { \
253 struct in6_multi *_in6m, *_inm_tmp; \
254 SLIST_FOREACH_SAFE(_in6m, _head, in6m_dtle, _inm_tmp) { \
255 SLIST_REMOVE(_head, _in6m, in6_multi, in6m_dtle); \
256 IN6M_REMREF(_in6m); \
257 } \
258 VERIFY(SLIST_EMPTY(_head)); \
259 }
260
261 #define MLI_ZONE_MAX 64 /* maximum elements in zone */
262 #define MLI_ZONE_NAME "mld_ifinfo" /* zone name */
263
264 static unsigned int mli_size; /* size of zone element */
265 static struct zone *mli_zone; /* zone for mld_ifinfo */
266
267 SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */
268
269 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_LOCKED, 0,
270 "IPv6 Multicast Listener Discovery");
271 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay,
272 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
273 &mld_gsrdelay.tv_sec, 0, sysctl_mld_gsr, "I",
274 "Rate limit for MLDv2 Group-and-Source queries in seconds");
275
276 SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_LOCKED,
277 sysctl_mld_ifinfo, "Per-interface MLDv2 state");
278
279 static int mld_v1enable = 1;
280 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW | CTLFLAG_LOCKED,
281 &mld_v1enable, 0, "Enable fallback to MLDv1");
282
283 static int mld_use_allow = 1;
284 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RW | CTLFLAG_LOCKED,
285 &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves");
286
287 #ifdef MLD_DEBUG
288 int mld_debug = 0;
289 SYSCTL_INT(_net_inet6_mld, OID_AUTO,
290 debug, CTLFLAG_RW | CTLFLAG_LOCKED, &mld_debug, 0, "");
291 #endif
292 /*
293 * Packed Router Alert option structure declaration.
294 */
295 struct mld_raopt {
296 struct ip6_hbh hbh;
297 struct ip6_opt pad;
298 struct ip6_opt_router ra;
299 } __packed;
300
301 /*
302 * Router Alert hop-by-hop option header.
303 */
304 static struct mld_raopt mld_ra = {
305 .hbh = { 0, 0 },
306 .pad = { .ip6o_type = IP6OPT_PADN, 0 },
307 .ra = {
308 .ip6or_type = (u_int8_t)IP6OPT_ROUTER_ALERT,
309 .ip6or_len = (u_int8_t)(IP6OPT_RTALERT_LEN - 2),
310 .ip6or_value = {((IP6OPT_RTALERT_MLD >> 8) & 0xFF),
311 (IP6OPT_RTALERT_MLD & 0xFF) }
312 }
313 };
314 static struct ip6_pktopts mld_po;
315
316 /*
317 * Retrieve or set threshold between group-source queries in seconds.
318 */
319 static int
320 sysctl_mld_gsr SYSCTL_HANDLER_ARGS
321 {
322 #pragma unused(arg1, arg2)
323 int error;
324 int i;
325
326 MLD_LOCK();
327
328 i = mld_gsrdelay.tv_sec;
329
330 error = sysctl_handle_int(oidp, &i, 0, req);
331 if (error || !req->newptr)
332 goto out_locked;
333
334 if (i < -1 || i >= 60) {
335 error = EINVAL;
336 goto out_locked;
337 }
338
339 mld_gsrdelay.tv_sec = i;
340
341 out_locked:
342 MLD_UNLOCK();
343 return (error);
344 }
345 /*
346 * Expose struct mld_ifinfo to userland, keyed by ifindex.
347 * For use by ifmcstat(8).
348 *
349 */
350 static int
351 sysctl_mld_ifinfo SYSCTL_HANDLER_ARGS
352 {
353 #pragma unused(oidp)
354 int *name;
355 int error;
356 u_int namelen;
357 struct ifnet *ifp;
358 struct mld_ifinfo *mli;
359 struct mld_ifinfo_u mli_u;
360
361 name = (int *)arg1;
362 namelen = arg2;
363
364 if (req->newptr != USER_ADDR_NULL)
365 return (EPERM);
366
367 if (namelen != 1)
368 return (EINVAL);
369
370 MLD_LOCK();
371
372 if (name[0] <= 0 || name[0] > (u_int)if_index) {
373 error = ENOENT;
374 goto out_locked;
375 }
376
377 error = ENOENT;
378
379 ifnet_head_lock_shared();
380 ifp = ifindex2ifnet[name[0]];
381 ifnet_head_done();
382 if (ifp == NULL)
383 goto out_locked;
384
385 bzero(&mli_u, sizeof (mli_u));
386
387 LIST_FOREACH(mli, &mli_head, mli_link) {
388 MLI_LOCK(mli);
389 if (ifp != mli->mli_ifp) {
390 MLI_UNLOCK(mli);
391 continue;
392 }
393
394 mli_u.mli_ifindex = mli->mli_ifp->if_index;
395 mli_u.mli_version = mli->mli_version;
396 mli_u.mli_v1_timer = mli->mli_v1_timer;
397 mli_u.mli_v2_timer = mli->mli_v2_timer;
398 mli_u.mli_flags = mli->mli_flags;
399 mli_u.mli_rv = mli->mli_rv;
400 mli_u.mli_qi = mli->mli_qi;
401 mli_u.mli_qri = mli->mli_qri;
402 mli_u.mli_uri = mli->mli_uri;
403 MLI_UNLOCK(mli);
404
405 error = SYSCTL_OUT(req, &mli_u, sizeof (mli_u));
406 break;
407 }
408
409 out_locked:
410 MLD_UNLOCK();
411 return (error);
412 }
413
414 /*
415 * Dispatch an entire queue of pending packet chains.
416 *
417 * Must not be called with in6m_lock held.
418 */
419 static void
420 mld_dispatch_queue(struct mld_ifinfo *mli, struct ifqueue *ifq, int limit)
421 {
422 struct mbuf *m;
423
424 if (mli != NULL)
425 MLI_LOCK_ASSERT_HELD(mli);
426
427 for (;;) {
428 IF_DEQUEUE(ifq, m);
429 if (m == NULL)
430 break;
431 MLD_PRINTF(("%s: dispatch %p from %p\n", __func__, ifq, m));
432 if (mli != NULL)
433 MLI_UNLOCK(mli);
434 mld_dispatch_packet(m);
435 if (mli != NULL)
436 MLI_LOCK(mli);
437 if (--limit == 0)
438 break;
439 }
440
441 if (mli != NULL)
442 MLI_LOCK_ASSERT_HELD(mli);
443 }
444
445 /*
446 * Filter outgoing MLD report state by group.
447 *
448 * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1)
449 * and node-local addresses. However, kernel and socket consumers
450 * always embed the KAME scope ID in the address provided, so strip it
451 * when performing comparison.
452 * Note: This is not the same as the *multicast* scope.
453 *
454 * Return zero if the given group is one for which MLD reports
455 * should be suppressed, or non-zero if reports should be issued.
456 */
457 static __inline__ int
458 mld_is_addr_reported(const struct in6_addr *addr)
459 {
460
461 VERIFY(IN6_IS_ADDR_MULTICAST(addr));
462
463 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL)
464 return (0);
465
466 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) {
467 struct in6_addr tmp = *addr;
468 in6_clearscope(&tmp);
469 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes))
470 return (0);
471 }
472
473 return (1);
474 }
475
476 /*
477 * Attach MLD when PF_INET6 is attached to an interface.
478 */
479 struct mld_ifinfo *
480 mld_domifattach(struct ifnet *ifp, int how)
481 {
482 struct mld_ifinfo *mli;
483
484 MLD_PRINTF(("%s: called for ifp %p(%s%d)\n",
485 __func__, ifp, ifp->if_name, ifp->if_unit));
486
487 mli = mli_alloc(how);
488 if (mli == NULL)
489 return (NULL);
490
491 MLD_LOCK();
492
493 MLI_LOCK(mli);
494 mli_initvar(mli, ifp, 0);
495 mli->mli_debug |= IFD_ATTACHED;
496 MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
497 MLI_ADDREF_LOCKED(mli); /* hold a reference for caller */
498 MLI_UNLOCK(mli);
499 ifnet_lock_shared(ifp);
500 mld6_initsilent(ifp, mli);
501 ifnet_lock_done(ifp);
502
503 LIST_INSERT_HEAD(&mli_head, mli, mli_link);
504
505 MLD_UNLOCK();
506
507 MLD_PRINTF(("allocate mld_ifinfo for ifp %p(%s%d)\n",
508 ifp, ifp->if_name, ifp->if_unit));
509
510 return (mli);
511 }
512
513 /*
514 * Attach MLD when PF_INET6 is reattached to an interface. Caller is
515 * expected to have an outstanding reference to the mli.
516 */
517 void
518 mld_domifreattach(struct mld_ifinfo *mli)
519 {
520 struct ifnet *ifp;
521
522 MLD_LOCK();
523
524 MLI_LOCK(mli);
525 VERIFY(!(mli->mli_debug & IFD_ATTACHED));
526 ifp = mli->mli_ifp;
527 VERIFY(ifp != NULL);
528 mli_initvar(mli, ifp, 1);
529 mli->mli_debug |= IFD_ATTACHED;
530 MLI_ADDREF_LOCKED(mli); /* hold a reference for mli_head */
531 MLI_UNLOCK(mli);
532 ifnet_lock_shared(ifp);
533 mld6_initsilent(ifp, mli);
534 ifnet_lock_done(ifp);
535
536 LIST_INSERT_HEAD(&mli_head, mli, mli_link);
537
538 MLD_UNLOCK();
539
540 MLD_PRINTF(("reattached mld_ifinfo for ifp %p(%s%d)\n",
541 ifp, ifp->if_name, ifp->if_unit));
542 }
543
544 /*
545 * Hook for domifdetach.
546 */
547 void
548 mld_domifdetach(struct ifnet *ifp)
549 {
550 SLIST_HEAD(, in6_multi) in6m_dthead;
551
552 SLIST_INIT(&in6m_dthead);
553
554 MLD_PRINTF(("%s: called for ifp %p(%s%d)\n",
555 __func__, ifp, ifp->if_name, ifp->if_unit));
556
557 MLD_LOCK();
558 mli_delete(ifp, (struct mld_in6m_relhead *)&in6m_dthead);
559 MLD_UNLOCK();
560
561 /* Now that we're dropped all locks, release detached records */
562 MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
563 }
564
565 /*
566 * Called at interface detach time. Note that we only flush all deferred
567 * responses and record releases; all remaining inm records and their source
568 * entries related to this interface are left intact, in order to handle
569 * the reattach case.
570 */
571 static void
572 mli_delete(const struct ifnet *ifp, struct mld_in6m_relhead *in6m_dthead)
573 {
574 struct mld_ifinfo *mli, *tmli;
575
576 MLD_LOCK_ASSERT_HELD();
577
578 LIST_FOREACH_SAFE(mli, &mli_head, mli_link, tmli) {
579 MLI_LOCK(mli);
580 if (mli->mli_ifp == ifp) {
581 /*
582 * Free deferred General Query responses.
583 */
584 IF_DRAIN(&mli->mli_gq);
585 IF_DRAIN(&mli->mli_v1q);
586 mld_flush_relq(mli, in6m_dthead);
587 VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
588 mli->mli_debug &= ~IFD_ATTACHED;
589 MLI_UNLOCK(mli);
590
591 LIST_REMOVE(mli, mli_link);
592 MLI_REMREF(mli); /* release mli_head reference */
593 return;
594 }
595 MLI_UNLOCK(mli);
596 }
597 panic("%s: mld_ifinfo not found for ifp %p\n", __func__, ifp);
598 }
599
600 __private_extern__ void
601 mld6_initsilent(struct ifnet *ifp, struct mld_ifinfo *mli)
602 {
603 ifnet_lock_assert(ifp, IFNET_LCK_ASSERT_OWNED);
604
605 MLI_LOCK_ASSERT_NOTHELD(mli);
606 MLI_LOCK(mli);
607 if (!(ifp->if_flags & IFF_MULTICAST) &&
608 (ifp->if_eflags & (IFEF_IPV6_ND6ALT|IFEF_LOCALNET_PRIVATE)))
609 mli->mli_flags |= MLIF_SILENT;
610 else
611 mli->mli_flags &= ~MLIF_SILENT;
612 MLI_UNLOCK(mli);
613 }
614
615 static void
616 mli_initvar(struct mld_ifinfo *mli, struct ifnet *ifp, int reattach)
617 {
618 MLI_LOCK_ASSERT_HELD(mli);
619
620 mli->mli_ifp = ifp;
621 mli->mli_version = MLD_VERSION_2;
622 mli->mli_flags = 0;
623 mli->mli_rv = MLD_RV_INIT;
624 mli->mli_qi = MLD_QI_INIT;
625 mli->mli_qri = MLD_QRI_INIT;
626 mli->mli_uri = MLD_URI_INIT;
627
628 if (mld_use_allow)
629 mli->mli_flags |= MLIF_USEALLOW;
630 if (!reattach)
631 SLIST_INIT(&mli->mli_relinmhead);
632
633 /*
634 * Responses to general queries are subject to bounds.
635 */
636 mli->mli_gq.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
637 mli->mli_v1q.ifq_maxlen = MLD_MAX_RESPONSE_PACKETS;
638 }
639
640 static struct mld_ifinfo *
641 mli_alloc(int how)
642 {
643 struct mld_ifinfo *mli;
644
645 mli = (how == M_WAITOK) ? zalloc(mli_zone) : zalloc_noblock(mli_zone);
646 if (mli != NULL) {
647 bzero(mli, mli_size);
648 lck_mtx_init(&mli->mli_lock, mld_mtx_grp, mld_mtx_attr);
649 mli->mli_debug |= IFD_ALLOC;
650 }
651 return (mli);
652 }
653
654 static void
655 mli_free(struct mld_ifinfo *mli)
656 {
657 MLI_LOCK(mli);
658 if (mli->mli_debug & IFD_ATTACHED) {
659 panic("%s: attached mli=%p is being freed", __func__, mli);
660 /* NOTREACHED */
661 } else if (mli->mli_ifp != NULL) {
662 panic("%s: ifp not NULL for mli=%p", __func__, mli);
663 /* NOTREACHED */
664 } else if (!(mli->mli_debug & IFD_ALLOC)) {
665 panic("%s: mli %p cannot be freed", __func__, mli);
666 /* NOTREACHED */
667 } else if (mli->mli_refcnt != 0) {
668 panic("%s: non-zero refcnt mli=%p", __func__, mli);
669 /* NOTREACHED */
670 }
671 mli->mli_debug &= ~IFD_ALLOC;
672 MLI_UNLOCK(mli);
673
674 lck_mtx_destroy(&mli->mli_lock, mld_mtx_grp);
675 zfree(mli_zone, mli);
676 }
677
678 void
679 mli_addref(struct mld_ifinfo *mli, int locked)
680 {
681 if (!locked)
682 MLI_LOCK_SPIN(mli);
683 else
684 MLI_LOCK_ASSERT_HELD(mli);
685
686 if (++mli->mli_refcnt == 0) {
687 panic("%s: mli=%p wraparound refcnt", __func__, mli);
688 /* NOTREACHED */
689 }
690 if (!locked)
691 MLI_UNLOCK(mli);
692 }
693
694 void
695 mli_remref(struct mld_ifinfo *mli)
696 {
697 SLIST_HEAD(, in6_multi) in6m_dthead;
698 struct ifnet *ifp;
699
700 MLI_LOCK_SPIN(mli);
701
702 if (mli->mli_refcnt == 0) {
703 panic("%s: mli=%p negative refcnt", __func__, mli);
704 /* NOTREACHED */
705 }
706
707 --mli->mli_refcnt;
708 if (mli->mli_refcnt > 0) {
709 MLI_UNLOCK(mli);
710 return;
711 }
712
713 ifp = mli->mli_ifp;
714 mli->mli_ifp = NULL;
715 IF_DRAIN(&mli->mli_gq);
716 IF_DRAIN(&mli->mli_v1q);
717 SLIST_INIT(&in6m_dthead);
718 mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
719 VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
720 MLI_UNLOCK(mli);
721
722 /* Now that we're dropped all locks, release detached records */
723 MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
724
725 MLD_PRINTF(("%s: freeing mld_ifinfo for ifp %p(%s%d)\n",
726 __func__, ifp, ifp->if_name, ifp->if_unit));
727
728 mli_free(mli);
729 }
730
731 /*
732 * Process a received MLDv1 general or address-specific query.
733 * Assumes that the query header has been pulled up to sizeof(mld_hdr).
734 *
735 * NOTE: Can't be fully const correct as we temporarily embed scope ID in
736 * mld_addr. This is OK as we own the mbuf chain.
737 */
738 static int
739 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
740 /*const*/ struct mld_hdr *mld)
741 {
742 struct mld_ifinfo *mli;
743 struct in6_multi *inm;
744 int is_general_query;
745 uint16_t timer;
746
747 is_general_query = 0;
748
749 if (!mld_v1enable) {
750 MLD_PRINTF(("ignore v1 query %s on ifp %p(%s%d)\n",
751 ip6_sprintf(&mld->mld_addr),
752 ifp, ifp->if_name, ifp->if_unit));
753 return (0);
754 }
755
756 /*
757 * RFC3810 Section 6.2: MLD queries must originate from
758 * a router's link-local address.
759 */
760 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
761 MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
762 ip6_sprintf(&ip6->ip6_src),
763 ifp, ifp->if_name, ifp->if_unit));
764 return (0);
765 }
766
767 /*
768 * Do address field validation upfront before we accept
769 * the query.
770 */
771 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
772 /*
773 * MLDv1 General Query.
774 * If this was not sent to the all-nodes group, ignore it.
775 */
776 struct in6_addr dst;
777
778 dst = ip6->ip6_dst;
779 in6_clearscope(&dst);
780 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes))
781 return (EINVAL);
782 is_general_query = 1;
783 } else {
784 /*
785 * Embed scope ID of receiving interface in MLD query for
786 * lookup whilst we don't hold other locks.
787 */
788 in6_setscope(&mld->mld_addr, ifp, NULL);
789 }
790
791 /*
792 * Switch to MLDv1 host compatibility mode.
793 */
794 mli = MLD_IFINFO(ifp);
795 VERIFY(mli != NULL);
796
797 MLI_LOCK(mli);
798 mld_set_version(mli, MLD_VERSION_1);
799 MLI_UNLOCK(mli);
800
801 timer = (ntohs(mld->mld_maxdelay) * PR_SLOWHZ) / MLD_TIMER_SCALE;
802 if (timer == 0)
803 timer = 1;
804
805 if (is_general_query) {
806 struct in6_multistep step;
807
808 MLD_PRINTF(("process v1 general query on ifp %p(%s%d)\n",
809 ifp, ifp->if_name, ifp->if_unit));
810 /*
811 * For each reporting group joined on this
812 * interface, kick the report timer.
813 */
814 in6_multihead_lock_shared();
815 IN6_FIRST_MULTI(step, inm);
816 while (inm != NULL) {
817 IN6M_LOCK(inm);
818 if (inm->in6m_ifp == ifp)
819 mld_v1_update_group(inm, timer);
820 IN6M_UNLOCK(inm);
821 IN6_NEXT_MULTI(step, inm);
822 }
823 in6_multihead_lock_done();
824 } else {
825 /*
826 * MLDv1 Group-Specific Query.
827 * If this is a group-specific MLDv1 query, we need only
828 * look up the single group to process it.
829 */
830 in6_multihead_lock_shared();
831 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
832 in6_multihead_lock_done();
833
834 if (inm != NULL) {
835 IN6M_LOCK(inm);
836 MLD_PRINTF(("process v1 query %s on ifp %p(%s%d)\n",
837 ip6_sprintf(&mld->mld_addr),
838 ifp, ifp->if_name, ifp->if_unit));
839 mld_v1_update_group(inm, timer);
840 IN6M_UNLOCK(inm);
841 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
842 }
843 /* XXX Clear embedded scope ID as userland won't expect it. */
844 in6_clearscope(&mld->mld_addr);
845 }
846
847 return (0);
848 }
849
850 /*
851 * Update the report timer on a group in response to an MLDv1 query.
852 *
853 * If we are becoming the reporting member for this group, start the timer.
854 * If we already are the reporting member for this group, and timer is
855 * below the threshold, reset it.
856 *
857 * We may be updating the group for the first time since we switched
858 * to MLDv2. If we are, then we must clear any recorded source lists,
859 * and transition to REPORTING state; the group timer is overloaded
860 * for group and group-source query responses.
861 *
862 * Unlike MLDv2, the delay per group should be jittered
863 * to avoid bursts of MLDv1 reports.
864 */
865 static void
866 mld_v1_update_group(struct in6_multi *inm, const int timer)
867 {
868 IN6M_LOCK_ASSERT_HELD(inm);
869
870 MLD_PRINTF(("%s: %s/%s%d timer=%d\n", __func__,
871 ip6_sprintf(&inm->in6m_addr),
872 inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit, timer));
873
874 switch (inm->in6m_state) {
875 case MLD_NOT_MEMBER:
876 case MLD_SILENT_MEMBER:
877 break;
878 case MLD_REPORTING_MEMBER:
879 if (inm->in6m_timer != 0 &&
880 inm->in6m_timer <= timer) {
881 MLD_PRINTF(("%s: REPORTING and timer running, "
882 "skipping.\n", __func__));
883 break;
884 }
885 /* FALLTHROUGH */
886 case MLD_SG_QUERY_PENDING_MEMBER:
887 case MLD_G_QUERY_PENDING_MEMBER:
888 case MLD_IDLE_MEMBER:
889 case MLD_LAZY_MEMBER:
890 case MLD_AWAKENING_MEMBER:
891 MLD_PRINTF(("%s: ->REPORTING\n", __func__));
892 inm->in6m_state = MLD_REPORTING_MEMBER;
893 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
894 current_state_timers_running6 = 1;
895 break;
896 case MLD_SLEEPING_MEMBER:
897 MLD_PRINTF(("%s: ->AWAKENING\n", __func__));
898 inm->in6m_state = MLD_AWAKENING_MEMBER;
899 break;
900 case MLD_LEAVING_MEMBER:
901 break;
902 }
903 }
904
905 /*
906 * Process a received MLDv2 general, group-specific or
907 * group-and-source-specific query.
908 *
909 * Assumes that the query header has been pulled up to sizeof(mldv2_query).
910 *
911 * Return 0 if successful, otherwise an appropriate error code is returned.
912 */
913 static int
914 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6,
915 struct mbuf *m, const int off, const int icmp6len)
916 {
917 struct mld_ifinfo *mli;
918 struct mldv2_query *mld;
919 struct in6_multi *inm;
920 uint32_t maxdelay, nsrc, qqi;
921 int is_general_query;
922 uint16_t timer;
923 uint8_t qrv;
924
925 is_general_query = 0;
926
927 /*
928 * RFC3810 Section 6.2: MLD queries must originate from
929 * a router's link-local address.
930 */
931 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) {
932 MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
933 ip6_sprintf(&ip6->ip6_src),
934 ifp, ifp->if_name, ifp->if_unit));
935 return (0);
936 }
937
938 MLD_PRINTF(("input v2 query on ifp %p(%s%d)\n", ifp, ifp->if_name,
939 ifp->if_unit));
940
941 mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off);
942
943 maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */
944 if (maxdelay >= 32678) {
945 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) <<
946 (MLD_MRC_EXP(maxdelay) + 3);
947 }
948 timer = (maxdelay * PR_SLOWHZ) / MLD_TIMER_SCALE;
949 if (timer == 0)
950 timer = 1;
951
952 qrv = MLD_QRV(mld->mld_misc);
953 if (qrv < 2) {
954 MLD_PRINTF(("%s: clamping qrv %d to %d\n", __func__,
955 qrv, MLD_RV_INIT));
956 qrv = MLD_RV_INIT;
957 }
958
959 qqi = mld->mld_qqi;
960 if (qqi >= 128) {
961 qqi = MLD_QQIC_MANT(mld->mld_qqi) <<
962 (MLD_QQIC_EXP(mld->mld_qqi) + 3);
963 }
964
965 nsrc = ntohs(mld->mld_numsrc);
966 if (nsrc > MLD_MAX_GS_SOURCES)
967 return (EMSGSIZE);
968 if (icmp6len < sizeof(struct mldv2_query) +
969 (nsrc * sizeof(struct in6_addr)))
970 return (EMSGSIZE);
971
972 /*
973 * Do further input validation upfront to avoid resetting timers
974 * should we need to discard this query.
975 */
976 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) {
977 /*
978 * General Queries SHOULD be directed to ff02::1.
979 * A general query with a source list has undefined
980 * behaviour; discard it.
981 */
982 struct in6_addr dst;
983
984 dst = ip6->ip6_dst;
985 in6_clearscope(&dst);
986 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) ||
987 nsrc > 0)
988 return (EINVAL);
989 is_general_query = 1;
990 } else {
991 /*
992 * Embed scope ID of receiving interface in MLD query for
993 * lookup whilst we don't hold other locks (due to KAME
994 * locking lameness). We own this mbuf chain just now.
995 */
996 in6_setscope(&mld->mld_addr, ifp, NULL);
997 }
998
999 mli = MLD_IFINFO(ifp);
1000 VERIFY(mli != NULL);
1001
1002 MLI_LOCK(mli);
1003 /*
1004 * Discard the v2 query if we're in Compatibility Mode.
1005 * The RFC is pretty clear that hosts need to stay in MLDv1 mode
1006 * until the Old Version Querier Present timer expires.
1007 */
1008 if (mli->mli_version != MLD_VERSION_2) {
1009 MLI_UNLOCK(mli);
1010 return (0);
1011 }
1012
1013 mld_set_version(mli, MLD_VERSION_2);
1014 mli->mli_rv = qrv;
1015 mli->mli_qi = qqi;
1016 mli->mli_qri = maxdelay;
1017
1018 MLD_PRINTF(("%s: qrv %d qi %d maxdelay %d\n", __func__, qrv, qqi,
1019 maxdelay));
1020
1021 if (is_general_query) {
1022 /*
1023 * MLDv2 General Query.
1024 *
1025 * Schedule a current-state report on this ifp for
1026 * all groups, possibly containing source lists.
1027 *
1028 * If there is a pending General Query response
1029 * scheduled earlier than the selected delay, do
1030 * not schedule any other reports.
1031 * Otherwise, reset the interface timer.
1032 */
1033 MLD_PRINTF(("process v2 general query on ifp %p(%s%d)\n",
1034 ifp, ifp->if_name, ifp->if_unit));
1035 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1036 mli->mli_v2_timer = MLD_RANDOM_DELAY(timer);
1037 interface_timers_running6 = 1;
1038 }
1039 MLI_UNLOCK(mli);
1040 } else {
1041 MLI_UNLOCK(mli);
1042 /*
1043 * MLDv2 Group-specific or Group-and-source-specific Query.
1044 *
1045 * Group-source-specific queries are throttled on
1046 * a per-group basis to defeat denial-of-service attempts.
1047 * Queries for groups we are not a member of on this
1048 * link are simply ignored.
1049 */
1050 in6_multihead_lock_shared();
1051 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1052 in6_multihead_lock_done();
1053 if (inm == NULL)
1054 return (0);
1055
1056 IN6M_LOCK(inm);
1057 #ifndef __APPLE__
1058 /* TODO: need ratecheck equivalent */
1059 if (nsrc > 0) {
1060 if (!ratecheck(&inm->in6m_lastgsrtv,
1061 &mld_gsrdelay)) {
1062 MLD_PRINTF(("%s: GS query throttled.\n",
1063 __func__));
1064 IN6M_UNLOCK(inm);
1065 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1066 return (0);
1067 }
1068 }
1069 #endif
1070 MLD_PRINTF(("process v2 group query on ifp %p(%s%d)\n",
1071 ifp, ifp->if_name, ifp->if_unit));
1072 /*
1073 * If there is a pending General Query response
1074 * scheduled sooner than the selected delay, no
1075 * further report need be scheduled.
1076 * Otherwise, prepare to respond to the
1077 * group-specific or group-and-source query.
1078 */
1079 MLI_LOCK(mli);
1080 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) {
1081 MLI_UNLOCK(mli);
1082 mld_v2_process_group_query(inm, timer, m, off);
1083 } else {
1084 MLI_UNLOCK(mli);
1085 }
1086 IN6M_UNLOCK(inm);
1087 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1088 /* XXX Clear embedded scope ID as userland won't expect it. */
1089 in6_clearscope(&mld->mld_addr);
1090 }
1091
1092 return (0);
1093 }
1094
1095 /*
1096 * Process a recieved MLDv2 group-specific or group-and-source-specific
1097 * query.
1098 * Return <0 if any error occured. Currently this is ignored.
1099 */
1100 static int
1101 mld_v2_process_group_query(struct in6_multi *inm, int timer, struct mbuf *m0,
1102 const int off)
1103 {
1104 struct mldv2_query *mld;
1105 int retval;
1106 uint16_t nsrc;
1107
1108 IN6M_LOCK_ASSERT_HELD(inm);
1109
1110 retval = 0;
1111 mld = (struct mldv2_query *)(mtod(m0, uint8_t *) + off);
1112
1113 switch (inm->in6m_state) {
1114 case MLD_NOT_MEMBER:
1115 case MLD_SILENT_MEMBER:
1116 case MLD_SLEEPING_MEMBER:
1117 case MLD_LAZY_MEMBER:
1118 case MLD_AWAKENING_MEMBER:
1119 case MLD_IDLE_MEMBER:
1120 case MLD_LEAVING_MEMBER:
1121 return (retval);
1122 break;
1123 case MLD_REPORTING_MEMBER:
1124 case MLD_G_QUERY_PENDING_MEMBER:
1125 case MLD_SG_QUERY_PENDING_MEMBER:
1126 break;
1127 }
1128
1129 nsrc = ntohs(mld->mld_numsrc);
1130
1131 /*
1132 * Deal with group-specific queries upfront.
1133 * If any group query is already pending, purge any recorded
1134 * source-list state if it exists, and schedule a query response
1135 * for this group-specific query.
1136 */
1137 if (nsrc == 0) {
1138 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
1139 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
1140 in6m_clear_recorded(inm);
1141 timer = min(inm->in6m_timer, timer);
1142 }
1143 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER;
1144 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1145 current_state_timers_running6 = 1;
1146 return (retval);
1147 }
1148
1149 /*
1150 * Deal with the case where a group-and-source-specific query has
1151 * been received but a group-specific query is already pending.
1152 */
1153 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) {
1154 timer = min(inm->in6m_timer, timer);
1155 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1156 current_state_timers_running6 = 1;
1157 return (retval);
1158 }
1159
1160 /*
1161 * Finally, deal with the case where a group-and-source-specific
1162 * query has been received, where a response to a previous g-s-r
1163 * query exists, or none exists.
1164 * In this case, we need to parse the source-list which the Querier
1165 * has provided us with and check if we have any source list filter
1166 * entries at T1 for these sources. If we do not, there is no need
1167 * schedule a report and the query may be dropped.
1168 * If we do, we must record them and schedule a current-state
1169 * report for those sources.
1170 */
1171 if (inm->in6m_nsrc > 0) {
1172 struct mbuf *m;
1173 uint8_t *sp;
1174 int i, nrecorded;
1175 int soff;
1176
1177 m = m0;
1178 soff = off + sizeof(struct mldv2_query);
1179 nrecorded = 0;
1180 for (i = 0; i < nsrc; i++) {
1181 sp = mtod(m, uint8_t *) + soff;
1182 retval = in6m_record_source(inm,
1183 (const struct in6_addr *)(void *)sp);
1184 if (retval < 0)
1185 break;
1186 nrecorded += retval;
1187 soff += sizeof(struct in6_addr);
1188 if (soff >= m->m_len) {
1189 soff = soff - m->m_len;
1190 m = m->m_next;
1191 if (m == NULL)
1192 break;
1193 }
1194 }
1195 if (nrecorded > 0) {
1196 MLD_PRINTF(( "%s: schedule response to SG query\n",
1197 __func__));
1198 inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER;
1199 inm->in6m_timer = MLD_RANDOM_DELAY(timer);
1200 current_state_timers_running6 = 1;
1201 }
1202 }
1203
1204 return (retval);
1205 }
1206
1207 /*
1208 * Process a received MLDv1 host membership report.
1209 * Assumes mld points to mld_hdr in pulled up mbuf chain.
1210 *
1211 * NOTE: Can't be fully const correct as we temporarily embed scope ID in
1212 * mld_addr. This is OK as we own the mbuf chain.
1213 */
1214 static int
1215 mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6,
1216 /*const*/ struct mld_hdr *mld)
1217 {
1218 struct in6_addr src, dst;
1219 struct in6_ifaddr *ia;
1220 struct in6_multi *inm;
1221
1222 if (!mld_v1enable) {
1223 MLD_PRINTF(("ignore v1 report %s on ifp %p(%s%d)\n",
1224 ip6_sprintf(&mld->mld_addr),
1225 ifp, ifp->if_name, ifp->if_unit));
1226 return (0);
1227 }
1228
1229 if (ifp->if_flags & IFF_LOOPBACK)
1230 return (0);
1231
1232 /*
1233 * MLDv1 reports must originate from a host's link-local address,
1234 * or the unspecified address (when booting).
1235 */
1236 src = ip6->ip6_src;
1237 in6_clearscope(&src);
1238 if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) {
1239 MLD_PRINTF(("ignore v1 query src %s on ifp %p(%s%d)\n",
1240 ip6_sprintf(&ip6->ip6_src),
1241 ifp, ifp->if_name, ifp->if_unit));
1242 return (EINVAL);
1243 }
1244
1245 /*
1246 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast
1247 * group, and must be directed to the group itself.
1248 */
1249 dst = ip6->ip6_dst;
1250 in6_clearscope(&dst);
1251 if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) ||
1252 !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) {
1253 MLD_PRINTF(("ignore v1 query dst %s on ifp %p(%s%d)\n",
1254 ip6_sprintf(&ip6->ip6_dst),
1255 ifp, ifp->if_name, ifp->if_unit));
1256 return (EINVAL);
1257 }
1258
1259 /*
1260 * Make sure we don't hear our own membership report, as fast
1261 * leave requires knowing that we are the only member of a
1262 * group. Assume we used the link-local address if available,
1263 * otherwise look for ::.
1264 *
1265 * XXX Note that scope ID comparison is needed for the address
1266 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be
1267 * performed for the on-wire address.
1268 */
1269 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1270 if (ia != NULL) {
1271 IFA_LOCK(&ia->ia_ifa);
1272 if ((IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia)))){
1273 IFA_UNLOCK(&ia->ia_ifa);
1274 IFA_REMREF(&ia->ia_ifa);
1275 return (0);
1276 }
1277 IFA_UNLOCK(&ia->ia_ifa);
1278 IFA_REMREF(&ia->ia_ifa);
1279 } else if (IN6_IS_ADDR_UNSPECIFIED(&src)) {
1280 return (0);
1281 }
1282
1283 MLD_PRINTF(("process v1 report %s on ifp %p(%s%d)\n",
1284 ip6_sprintf(&mld->mld_addr), ifp, ifp->if_name, ifp->if_unit));
1285
1286 /*
1287 * Embed scope ID of receiving interface in MLD query for lookup
1288 * whilst we don't hold other locks (due to KAME locking lameness).
1289 */
1290 if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr))
1291 in6_setscope(&mld->mld_addr, ifp, NULL);
1292
1293 /*
1294 * MLDv1 report suppression.
1295 * If we are a member of this group, and our membership should be
1296 * reported, and our group timer is pending or about to be reset,
1297 * stop our group timer by transitioning to the 'lazy' state.
1298 */
1299 in6_multihead_lock_shared();
1300 IN6_LOOKUP_MULTI(&mld->mld_addr, ifp, inm);
1301 in6_multihead_lock_done();
1302
1303 if (inm != NULL) {
1304 struct mld_ifinfo *mli;
1305
1306 IN6M_LOCK(inm);
1307 mli = inm->in6m_mli;
1308 VERIFY(mli != NULL);
1309
1310 MLI_LOCK(mli);
1311 /*
1312 * If we are in MLDv2 host mode, do not allow the
1313 * other host's MLDv1 report to suppress our reports.
1314 */
1315 if (mli->mli_version == MLD_VERSION_2) {
1316 MLI_UNLOCK(mli);
1317 IN6M_UNLOCK(inm);
1318 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1319 goto out;
1320 }
1321 MLI_UNLOCK(mli);
1322
1323 inm->in6m_timer = 0;
1324
1325 switch (inm->in6m_state) {
1326 case MLD_NOT_MEMBER:
1327 case MLD_SILENT_MEMBER:
1328 case MLD_SLEEPING_MEMBER:
1329 break;
1330 case MLD_REPORTING_MEMBER:
1331 case MLD_IDLE_MEMBER:
1332 case MLD_AWAKENING_MEMBER:
1333 MLD_PRINTF(("report suppressed for %s on ifp %p(%s%d)\n",
1334 ip6_sprintf(&mld->mld_addr),
1335 ifp, ifp->if_name, ifp->if_unit));
1336 case MLD_LAZY_MEMBER:
1337 inm->in6m_state = MLD_LAZY_MEMBER;
1338 break;
1339 case MLD_G_QUERY_PENDING_MEMBER:
1340 case MLD_SG_QUERY_PENDING_MEMBER:
1341 case MLD_LEAVING_MEMBER:
1342 break;
1343 }
1344 IN6M_UNLOCK(inm);
1345 IN6M_REMREF(inm); /* from IN6_LOOKUP_MULTI */
1346 }
1347
1348 out:
1349 /* XXX Clear embedded scope ID as userland won't expect it. */
1350 in6_clearscope(&mld->mld_addr);
1351
1352 return (0);
1353 }
1354
1355 /*
1356 * MLD input path.
1357 *
1358 * Assume query messages which fit in a single ICMPv6 message header
1359 * have been pulled up.
1360 * Assume that userland will want to see the message, even if it
1361 * otherwise fails kernel input validation; do not free it.
1362 * Pullup may however free the mbuf chain m if it fails.
1363 *
1364 * Return IPPROTO_DONE if we freed m. Otherwise, return 0.
1365 */
1366 int
1367 mld_input(struct mbuf *m, int off, int icmp6len)
1368 {
1369 struct ifnet *ifp;
1370 struct ip6_hdr *ip6;
1371 struct mld_hdr *mld;
1372 int mldlen;
1373
1374 MLD_PRINTF(("%s: called w/mbuf (%p,%d)\n", __func__, m, off));
1375
1376 ifp = m->m_pkthdr.rcvif;
1377
1378 ip6 = mtod(m, struct ip6_hdr *);
1379
1380 /* Pullup to appropriate size. */
1381 mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off);
1382 if (mld->mld_type == MLD_LISTENER_QUERY &&
1383 icmp6len >= sizeof(struct mldv2_query)) {
1384 mldlen = sizeof(struct mldv2_query);
1385 } else {
1386 mldlen = sizeof(struct mld_hdr);
1387 }
1388 IP6_EXTHDR_GET(mld, struct mld_hdr *, m, off, mldlen);
1389 if (mld == NULL) {
1390 icmp6stat.icp6s_badlen++;
1391 return (IPPROTO_DONE);
1392 }
1393
1394 /*
1395 * Userland needs to see all of this traffic for implementing
1396 * the endpoint discovery portion of multicast routing.
1397 */
1398 switch (mld->mld_type) {
1399 case MLD_LISTENER_QUERY:
1400 icmp6_ifstat_inc(ifp, ifs6_in_mldquery);
1401 if (icmp6len == sizeof(struct mld_hdr)) {
1402 if (mld_v1_input_query(ifp, ip6, mld) != 0)
1403 return (0);
1404 } else if (icmp6len >= sizeof(struct mldv2_query)) {
1405 if (mld_v2_input_query(ifp, ip6, m, off,
1406 icmp6len) != 0)
1407 return (0);
1408 }
1409 break;
1410 case MLD_LISTENER_REPORT:
1411 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1412 if (mld_v1_input_report(ifp, ip6, mld) != 0)
1413 return (0);
1414 break;
1415 case MLDV2_LISTENER_REPORT:
1416 icmp6_ifstat_inc(ifp, ifs6_in_mldreport);
1417 break;
1418 case MLD_LISTENER_DONE:
1419 icmp6_ifstat_inc(ifp, ifs6_in_mlddone);
1420 break;
1421 default:
1422 break;
1423 }
1424
1425 return (0);
1426 }
1427
1428 /*
1429 * MLD6 slowtimo handler.
1430 * Combiles both the slow and fast timer into one. We loose some responsivness but
1431 * allows the system to avoid having a pr_fasttimo, thus allowing for power savings.
1432 */
1433 void
1434 mld_slowtimo(void)
1435 {
1436 struct ifqueue scq; /* State-change packets */
1437 struct ifqueue qrq; /* Query response packets */
1438 struct ifnet *ifp;
1439 struct mld_ifinfo *mli;
1440 struct in6_multi *inm;
1441 int uri_fasthz = 0;
1442 SLIST_HEAD(, in6_multi) in6m_dthead;
1443
1444 SLIST_INIT(&in6m_dthead);
1445
1446 MLD_LOCK();
1447
1448 LIST_FOREACH(mli, &mli_head, mli_link) {
1449 MLI_LOCK(mli);
1450 mld_v1_process_querier_timers(mli);
1451 MLI_UNLOCK(mli);
1452 }
1453
1454 /*
1455 * Quick check to see if any work needs to be done, in order to
1456 * minimize the overhead of fasttimo processing.
1457 */
1458 if (!current_state_timers_running6 &&
1459 !interface_timers_running6 &&
1460 !state_change_timers_running6) {
1461 MLD_UNLOCK();
1462 return;
1463 }
1464
1465 /*
1466 * MLDv2 General Query response timer processing.
1467 */
1468 if (interface_timers_running6) {
1469 #if 0
1470 MLD_PRINTF(("%s: interface timers running\n", __func__));
1471 #endif
1472 interface_timers_running6 = 0;
1473 LIST_FOREACH(mli, &mli_head, mli_link) {
1474 MLI_LOCK(mli);
1475 if (mli->mli_v2_timer == 0) {
1476 /* Do nothing. */
1477 } else if (--mli->mli_v2_timer == 0) {
1478 mld_v2_dispatch_general_query(mli);
1479 } else {
1480 interface_timers_running6 = 1;
1481 }
1482 MLI_UNLOCK(mli);
1483 }
1484 }
1485
1486 if (!current_state_timers_running6 &&
1487 !state_change_timers_running6)
1488 goto out_locked;
1489
1490 current_state_timers_running6 = 0;
1491 state_change_timers_running6 = 0;
1492 #if 0
1493 MLD_PRINTF(("%s: state change timers running\n", __func__));
1494 #endif
1495
1496 memset(&qrq, 0, sizeof(struct ifqueue));
1497 qrq.ifq_maxlen = MLD_MAX_G_GS_PACKETS;
1498
1499 memset(&scq, 0, sizeof(struct ifqueue));
1500 scq.ifq_maxlen = MLD_MAX_STATE_CHANGE_PACKETS;
1501
1502 /*
1503 * MLD host report and state-change timer processing.
1504 * Note: Processing a v2 group timer may remove a node.
1505 */
1506 LIST_FOREACH(mli, &mli_head, mli_link) {
1507 struct in6_multistep step;
1508
1509 MLI_LOCK(mli);
1510 ifp = mli->mli_ifp;
1511 uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * PR_SLOWHZ);
1512 MLI_UNLOCK(mli);
1513
1514 in6_multihead_lock_shared();
1515 IN6_FIRST_MULTI(step, inm);
1516 while (inm != NULL) {
1517 IN6M_LOCK(inm);
1518 if (inm->in6m_ifp != ifp)
1519 goto next;
1520
1521 MLI_LOCK(mli);
1522 switch (mli->mli_version) {
1523 case MLD_VERSION_1:
1524 mld_v1_process_group_timer(inm,
1525 mli->mli_version);
1526 break;
1527 case MLD_VERSION_2:
1528 mld_v2_process_group_timers(mli, &qrq,
1529 &scq, inm, uri_fasthz);
1530 break;
1531 }
1532 MLI_UNLOCK(mli);
1533 next:
1534 IN6M_UNLOCK(inm);
1535 IN6_NEXT_MULTI(step, inm);
1536 }
1537 in6_multihead_lock_done();
1538
1539 MLI_LOCK(mli);
1540 if (mli->mli_version == MLD_VERSION_1) {
1541 mld_dispatch_queue(mli, &mli->mli_v1q, 0);
1542 } else if (mli->mli_version == MLD_VERSION_2) {
1543 MLI_UNLOCK(mli);
1544 mld_dispatch_queue(NULL, &qrq, 0);
1545 mld_dispatch_queue(NULL, &scq, 0);
1546 VERIFY(qrq.ifq_len == 0);
1547 VERIFY(scq.ifq_len == 0);
1548 MLI_LOCK(mli);
1549 }
1550 /*
1551 * In case there are still any pending membership reports
1552 * which didn't get drained at version change time.
1553 */
1554 IF_DRAIN(&mli->mli_v1q);
1555 /*
1556 * Release all deferred inm records, and drain any locally
1557 * enqueued packets; do it even if the current MLD version
1558 * for the link is no longer MLDv2, in order to handle the
1559 * version change case.
1560 */
1561 mld_flush_relq(mli, (struct mld_in6m_relhead *)&in6m_dthead);
1562 VERIFY(SLIST_EMPTY(&mli->mli_relinmhead));
1563 MLI_UNLOCK(mli);
1564
1565 IF_DRAIN(&qrq);
1566 IF_DRAIN(&scq);
1567 }
1568
1569 out_locked:
1570 MLD_UNLOCK();
1571
1572 /* Now that we're dropped all locks, release detached records */
1573 MLD_REMOVE_DETACHED_IN6M(&in6m_dthead);
1574 }
1575
1576 /*
1577 * Free the in6_multi reference(s) for this MLD lifecycle.
1578 *
1579 * Caller must be holding mli_lock.
1580 */
1581 static void
1582 mld_flush_relq(struct mld_ifinfo *mli, struct mld_in6m_relhead *in6m_dthead)
1583 {
1584 struct in6_multi *inm;
1585
1586 again:
1587 MLI_LOCK_ASSERT_HELD(mli);
1588 inm = SLIST_FIRST(&mli->mli_relinmhead);
1589 if (inm != NULL) {
1590 int lastref;
1591
1592 SLIST_REMOVE_HEAD(&mli->mli_relinmhead, in6m_nrele);
1593 MLI_UNLOCK(mli);
1594
1595 in6_multihead_lock_exclusive();
1596 IN6M_LOCK(inm);
1597 VERIFY(inm->in6m_nrelecnt != 0);
1598 inm->in6m_nrelecnt--;
1599 lastref = in6_multi_detach(inm);
1600 VERIFY(!lastref || (!(inm->in6m_debug & IFD_ATTACHED) &&
1601 inm->in6m_reqcnt == 0));
1602 IN6M_UNLOCK(inm);
1603 in6_multihead_lock_done();
1604 /* from mli_relinmhead */
1605 IN6M_REMREF(inm);
1606 /* from in6_multihead_list */
1607 if (lastref) {
1608 /*
1609 * Defer releasing our final reference, as we
1610 * are holding the MLD lock at this point, and
1611 * we could end up with locking issues later on
1612 * (while issuing SIOCDELMULTI) when this is the
1613 * final reference count. Let the caller do it
1614 * when it is safe.
1615 */
1616 MLD_ADD_DETACHED_IN6M(in6m_dthead, inm);
1617 }
1618 MLI_LOCK(mli);
1619 goto again;
1620 }
1621 }
1622
1623 /*
1624 * Update host report group timer.
1625 * Will update the global pending timer flags.
1626 */
1627 static void
1628 mld_v1_process_group_timer(struct in6_multi *inm, const int mld_version)
1629 {
1630 #pragma unused(mld_version)
1631 int report_timer_expired;
1632
1633 IN6M_LOCK_ASSERT_HELD(inm);
1634 MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1635
1636 if (inm->in6m_timer == 0) {
1637 report_timer_expired = 0;
1638 } else if (--inm->in6m_timer == 0) {
1639 report_timer_expired = 1;
1640 } else {
1641 current_state_timers_running6 = 1;
1642 return;
1643 }
1644
1645 switch (inm->in6m_state) {
1646 case MLD_NOT_MEMBER:
1647 case MLD_SILENT_MEMBER:
1648 case MLD_IDLE_MEMBER:
1649 case MLD_LAZY_MEMBER:
1650 case MLD_SLEEPING_MEMBER:
1651 case MLD_AWAKENING_MEMBER:
1652 break;
1653 case MLD_REPORTING_MEMBER:
1654 if (report_timer_expired) {
1655 inm->in6m_state = MLD_IDLE_MEMBER;
1656 (void) mld_v1_transmit_report(inm,
1657 MLD_LISTENER_REPORT);
1658 IN6M_LOCK_ASSERT_HELD(inm);
1659 MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
1660 }
1661 break;
1662 case MLD_G_QUERY_PENDING_MEMBER:
1663 case MLD_SG_QUERY_PENDING_MEMBER:
1664 case MLD_LEAVING_MEMBER:
1665 break;
1666 }
1667 }
1668
1669 /*
1670 * Update a group's timers for MLDv2.
1671 * Will update the global pending timer flags.
1672 * Note: Unlocked read from mli.
1673 */
1674 static void
1675 mld_v2_process_group_timers(struct mld_ifinfo *mli,
1676 struct ifqueue *qrq, struct ifqueue *scq,
1677 struct in6_multi *inm, const int uri_fasthz)
1678 {
1679 int query_response_timer_expired;
1680 int state_change_retransmit_timer_expired;
1681
1682 IN6M_LOCK_ASSERT_HELD(inm);
1683 MLI_LOCK_ASSERT_HELD(mli);
1684 VERIFY(mli == inm->in6m_mli);
1685
1686 query_response_timer_expired = 0;
1687 state_change_retransmit_timer_expired = 0;
1688
1689 /*
1690 * During a transition from compatibility mode back to MLDv2,
1691 * a group record in REPORTING state may still have its group
1692 * timer active. This is a no-op in this function; it is easier
1693 * to deal with it here than to complicate the slow-timeout path.
1694 */
1695 if (inm->in6m_timer == 0) {
1696 query_response_timer_expired = 0;
1697 } else if (--inm->in6m_timer == 0) {
1698 query_response_timer_expired = 1;
1699 } else {
1700 current_state_timers_running6 = 1;
1701 }
1702
1703 if (inm->in6m_sctimer == 0) {
1704 state_change_retransmit_timer_expired = 0;
1705 } else if (--inm->in6m_sctimer == 0) {
1706 state_change_retransmit_timer_expired = 1;
1707 } else {
1708 state_change_timers_running6 = 1;
1709 }
1710
1711 /* We are in fasttimo, so be quick about it. */
1712 if (!state_change_retransmit_timer_expired &&
1713 !query_response_timer_expired)
1714 return;
1715
1716 switch (inm->in6m_state) {
1717 case MLD_NOT_MEMBER:
1718 case MLD_SILENT_MEMBER:
1719 case MLD_SLEEPING_MEMBER:
1720 case MLD_LAZY_MEMBER:
1721 case MLD_AWAKENING_MEMBER:
1722 case MLD_IDLE_MEMBER:
1723 break;
1724 case MLD_G_QUERY_PENDING_MEMBER:
1725 case MLD_SG_QUERY_PENDING_MEMBER:
1726 /*
1727 * Respond to a previously pending Group-Specific
1728 * or Group-and-Source-Specific query by enqueueing
1729 * the appropriate Current-State report for
1730 * immediate transmission.
1731 */
1732 if (query_response_timer_expired) {
1733 int retval;
1734
1735 retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1,
1736 (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER),
1737 0);
1738 MLD_PRINTF(("%s: enqueue record = %d\n",
1739 __func__, retval));
1740 inm->in6m_state = MLD_REPORTING_MEMBER;
1741 in6m_clear_recorded(inm);
1742 }
1743 /* FALLTHROUGH */
1744 case MLD_REPORTING_MEMBER:
1745 case MLD_LEAVING_MEMBER:
1746 if (state_change_retransmit_timer_expired) {
1747 /*
1748 * State-change retransmission timer fired.
1749 * If there are any further pending retransmissions,
1750 * set the global pending state-change flag, and
1751 * reset the timer.
1752 */
1753 if (--inm->in6m_scrv > 0) {
1754 inm->in6m_sctimer = uri_fasthz;
1755 state_change_timers_running6 = 1;
1756 }
1757 /*
1758 * Retransmit the previously computed state-change
1759 * report. If there are no further pending
1760 * retransmissions, the mbuf queue will be consumed.
1761 * Update T0 state to T1 as we have now sent
1762 * a state-change.
1763 */
1764 (void) mld_v2_merge_state_changes(inm, scq);
1765
1766 in6m_commit(inm);
1767 MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
1768 ip6_sprintf(&inm->in6m_addr),
1769 inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
1770
1771 /*
1772 * If we are leaving the group for good, make sure
1773 * we release MLD's reference to it.
1774 * This release must be deferred using a SLIST,
1775 * as we are called from a loop which traverses
1776 * the in_ifmultiaddr TAILQ.
1777 */
1778 if (inm->in6m_state == MLD_LEAVING_MEMBER &&
1779 inm->in6m_scrv == 0) {
1780 inm->in6m_state = MLD_NOT_MEMBER;
1781 /*
1782 * A reference has already been held in
1783 * mld_final_leave() for this inm, so
1784 * no need to hold another one. We also
1785 * bumped up its request count then, so
1786 * that it stays in in6_multihead. Both
1787 * of them will be released when it is
1788 * dequeued later on.
1789 */
1790 VERIFY(inm->in6m_nrelecnt != 0);
1791 SLIST_INSERT_HEAD(&mli->mli_relinmhead,
1792 inm, in6m_nrele);
1793 }
1794 }
1795 break;
1796 }
1797 }
1798
1799 /*
1800 * Switch to a different version on the given interface,
1801 * as per Section 9.12.
1802 */
1803 static void
1804 mld_set_version(struct mld_ifinfo *mli, const int mld_version)
1805 {
1806 int old_version_timer;
1807
1808 MLI_LOCK_ASSERT_HELD(mli);
1809
1810 MLD_PRINTF(("%s: switching to v%d on ifp %p(%s%d)\n", __func__,
1811 mld_version, mli->mli_ifp, mli->mli_ifp->if_name,
1812 mli->mli_ifp->if_unit));
1813
1814 if (mld_version == MLD_VERSION_1) {
1815 /*
1816 * Compute the "Older Version Querier Present" timer as per
1817 * Section 9.12.
1818 */
1819 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri;
1820 old_version_timer *= PR_SLOWHZ;
1821 mli->mli_v1_timer = old_version_timer;
1822 }
1823
1824 if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) {
1825 mli->mli_version = MLD_VERSION_1;
1826 mld_v2_cancel_link_timers(mli);
1827 }
1828
1829 MLI_LOCK_ASSERT_HELD(mli);
1830 }
1831
1832 /*
1833 * Cancel pending MLDv2 timers for the given link and all groups
1834 * joined on it; state-change, general-query, and group-query timers.
1835 */
1836 static void
1837 mld_v2_cancel_link_timers(struct mld_ifinfo *mli)
1838 {
1839 struct ifnet *ifp;
1840 struct in6_multi *inm;
1841 struct in6_multistep step;
1842
1843 MLI_LOCK_ASSERT_HELD(mli);
1844
1845 MLD_PRINTF(("%s: cancel v2 timers on ifp %p(%s%d)\n", __func__,
1846 mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit));
1847
1848 /*
1849 * Fast-track this potentially expensive operation
1850 * by checking all the global 'timer pending' flags.
1851 */
1852 if (!interface_timers_running6 &&
1853 !state_change_timers_running6 &&
1854 !current_state_timers_running6)
1855 return;
1856
1857 mli->mli_v2_timer = 0;
1858 ifp = mli->mli_ifp;
1859 MLI_UNLOCK(mli);
1860
1861 in6_multihead_lock_shared();
1862 IN6_FIRST_MULTI(step, inm);
1863 while (inm != NULL) {
1864 IN6M_LOCK(inm);
1865 if (inm->in6m_ifp != ifp)
1866 goto next;
1867
1868 switch (inm->in6m_state) {
1869 case MLD_NOT_MEMBER:
1870 case MLD_SILENT_MEMBER:
1871 case MLD_IDLE_MEMBER:
1872 case MLD_LAZY_MEMBER:
1873 case MLD_SLEEPING_MEMBER:
1874 case MLD_AWAKENING_MEMBER:
1875 break;
1876 case MLD_LEAVING_MEMBER:
1877 /*
1878 * If we are leaving the group and switching
1879 * version, we need to release the final
1880 * reference held for issuing the INCLUDE {}.
1881 * During mld_final_leave(), we bumped up both the
1882 * request and reference counts. Since we cannot
1883 * call in6_multi_detach() here, defer this task to
1884 * the timer routine.
1885 */
1886 VERIFY(inm->in6m_nrelecnt != 0);
1887 MLI_LOCK(mli);
1888 SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
1889 in6m_nrele);
1890 MLI_UNLOCK(mli);
1891 /* FALLTHROUGH */
1892 case MLD_G_QUERY_PENDING_MEMBER:
1893 case MLD_SG_QUERY_PENDING_MEMBER:
1894 in6m_clear_recorded(inm);
1895 /* FALLTHROUGH */
1896 case MLD_REPORTING_MEMBER:
1897 inm->in6m_sctimer = 0;
1898 inm->in6m_timer = 0;
1899 inm->in6m_state = MLD_REPORTING_MEMBER;
1900 /*
1901 * Free any pending MLDv2 state-change records.
1902 */
1903 IF_DRAIN(&inm->in6m_scq);
1904 break;
1905 }
1906 next:
1907 IN6M_UNLOCK(inm);
1908 IN6_NEXT_MULTI(step, inm);
1909 }
1910 in6_multihead_lock_done();
1911
1912 MLI_LOCK(mli);
1913 }
1914
1915 /*
1916 * Update the Older Version Querier Present timers for a link.
1917 * See Section 9.12 of RFC 3810.
1918 */
1919 static void
1920 mld_v1_process_querier_timers(struct mld_ifinfo *mli)
1921 {
1922 MLI_LOCK_ASSERT_HELD(mli);
1923
1924 if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) {
1925 /*
1926 * MLDv1 Querier Present timer expired; revert to MLDv2.
1927 */
1928 MLD_PRINTF(("%s: transition from v%d -> v%d on %p(%s%d)\n",
1929 __func__, mli->mli_version, MLD_VERSION_2,
1930 mli->mli_ifp, mli->mli_ifp->if_name, mli->mli_ifp->if_unit));
1931 mli->mli_version = MLD_VERSION_2;
1932 }
1933 }
1934
1935 /*
1936 * Transmit an MLDv1 report immediately.
1937 */
1938 static int
1939 mld_v1_transmit_report(struct in6_multi *in6m, const int type)
1940 {
1941 struct ifnet *ifp;
1942 struct in6_ifaddr *ia;
1943 struct ip6_hdr *ip6;
1944 struct mbuf *mh, *md;
1945 struct mld_hdr *mld;
1946 int error = 0;
1947
1948 IN6M_LOCK_ASSERT_HELD(in6m);
1949 MLI_LOCK_ASSERT_HELD(in6m->in6m_mli);
1950
1951 ifp = in6m->in6m_ifp;
1952 /* ia may be NULL if link-local address is tentative. */
1953 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
1954
1955 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
1956 if (mh == NULL) {
1957 if (ia != NULL)
1958 IFA_REMREF(&ia->ia_ifa);
1959 return (ENOMEM);
1960 }
1961 MGET(md, M_DONTWAIT, MT_DATA);
1962 if (md == NULL) {
1963 m_free(mh);
1964 if (ia != NULL)
1965 IFA_REMREF(&ia->ia_ifa);
1966 return (ENOMEM);
1967 }
1968 mh->m_next = md;
1969
1970 /*
1971 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so
1972 * that ether_output() does not need to allocate another mbuf
1973 * for the header in the most common case.
1974 */
1975 MH_ALIGN(mh, sizeof(struct ip6_hdr));
1976 mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr);
1977 mh->m_len = sizeof(struct ip6_hdr);
1978
1979 ip6 = mtod(mh, struct ip6_hdr *);
1980 ip6->ip6_flow = 0;
1981 ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
1982 ip6->ip6_vfc |= IPV6_VERSION;
1983 ip6->ip6_nxt = IPPROTO_ICMPV6;
1984 if (ia != NULL)
1985 IFA_LOCK(&ia->ia_ifa);
1986 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
1987 if (ia != NULL) {
1988 IFA_UNLOCK(&ia->ia_ifa);
1989 IFA_REMREF(&ia->ia_ifa);
1990 ia = NULL;
1991 }
1992 ip6->ip6_dst = in6m->in6m_addr;
1993
1994 md->m_len = sizeof(struct mld_hdr);
1995 mld = mtod(md, struct mld_hdr *);
1996 mld->mld_type = type;
1997 mld->mld_code = 0;
1998 mld->mld_cksum = 0;
1999 mld->mld_maxdelay = 0;
2000 mld->mld_reserved = 0;
2001 mld->mld_addr = in6m->in6m_addr;
2002 in6_clearscope(&mld->mld_addr);
2003 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
2004 sizeof(struct ip6_hdr), sizeof(struct mld_hdr));
2005
2006 mh->m_flags |= M_MLDV1;
2007
2008 /*
2009 * Due to the fact that at this point we are possibly holding
2010 * in6_multihead_lock in shared or exclusive mode, we can't call
2011 * mld_dispatch_packet() here since that will eventually call
2012 * ip6_output(), which will try to lock in6_multihead_lock and cause
2013 * a deadlock.
2014 * Instead we defer the work to the mld_slowtimo() thread, thus
2015 * avoiding unlocking in_multihead_lock here.
2016 */
2017 if (IF_QFULL(&in6m->in6m_mli->mli_v1q)) {
2018 MLD_PRINTF(("%s: v1 outbound queue full\n", __func__));
2019 error = ENOMEM;
2020 m_freem(mh);
2021 } else
2022 IF_ENQUEUE(&in6m->in6m_mli->mli_v1q, mh);
2023
2024 return (error);
2025 }
2026
2027 /*
2028 * Process a state change from the upper layer for the given IPv6 group.
2029 *
2030 * Each socket holds a reference on the in6_multi in its own ip_moptions.
2031 * The socket layer will have made the necessary updates to.the group
2032 * state, it is now up to MLD to issue a state change report if there
2033 * has been any change between T0 (when the last state-change was issued)
2034 * and T1 (now).
2035 *
2036 * We use the MLDv2 state machine at group level. The MLd module
2037 * however makes the decision as to which MLD protocol version to speak.
2038 * A state change *from* INCLUDE {} always means an initial join.
2039 * A state change *to* INCLUDE {} always means a final leave.
2040 *
2041 * If delay is non-zero, and the state change is an initial multicast
2042 * join, the state change report will be delayed by 'delay' ticks
2043 * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise
2044 * the initial MLDv2 state change report will be delayed by whichever
2045 * is sooner, a pending state-change timer or delay itself.
2046 */
2047 int
2048 mld_change_state(struct in6_multi *inm, const int delay)
2049 {
2050 struct mld_ifinfo *mli;
2051 struct ifnet *ifp;
2052 int error = 0;
2053
2054 IN6M_LOCK_ASSERT_HELD(inm);
2055 VERIFY(inm->in6m_mli != NULL);
2056 MLI_LOCK_ASSERT_NOTHELD(inm->in6m_mli);
2057
2058 /*
2059 * Try to detect if the upper layer just asked us to change state
2060 * for an interface which has now gone away.
2061 */
2062 VERIFY(inm->in6m_ifma != NULL);
2063 ifp = inm->in6m_ifma->ifma_ifp;
2064 /*
2065 * Sanity check that netinet6's notion of ifp is the same as net's.
2066 */
2067 VERIFY(inm->in6m_ifp == ifp);
2068
2069 mli = MLD_IFINFO(ifp);
2070 VERIFY(mli != NULL);
2071
2072 /*
2073 * If we detect a state transition to or from MCAST_UNDEFINED
2074 * for this group, then we are starting or finishing an MLD
2075 * life cycle for this group.
2076 */
2077 if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) {
2078 MLD_PRINTF(("%s: inm transition %d -> %d\n", __func__,
2079 inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode));
2080 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) {
2081 MLD_PRINTF(("%s: initial join\n", __func__));
2082 error = mld_initial_join(inm, mli, delay);
2083 goto out;
2084 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) {
2085 MLD_PRINTF(("%s: final leave\n", __func__));
2086 mld_final_leave(inm, mli);
2087 goto out;
2088 }
2089 } else {
2090 MLD_PRINTF(("%s: filter set change\n", __func__));
2091 }
2092
2093 error = mld_handle_state_change(inm, mli);
2094
2095 out:
2096 return (error);
2097 }
2098
2099 /*
2100 * Perform the initial join for an MLD group.
2101 *
2102 * When joining a group:
2103 * If the group should have its MLD traffic suppressed, do nothing.
2104 * MLDv1 starts sending MLDv1 host membership reports.
2105 * MLDv2 will schedule an MLDv2 state-change report containing the
2106 * initial state of the membership.
2107 *
2108 * If the delay argument is non-zero, then we must delay sending the
2109 * initial state change for delay ticks (in units of PR_FASTHZ).
2110 */
2111 static int
2112 mld_initial_join(struct in6_multi *inm, struct mld_ifinfo *mli,
2113 const int delay)
2114 {
2115 struct ifnet *ifp;
2116 struct ifqueue *ifq;
2117 int error, retval, syncstates;
2118 int odelay;
2119
2120 IN6M_LOCK_ASSERT_HELD(inm);
2121 MLI_LOCK_ASSERT_NOTHELD(mli);
2122
2123 MLD_PRINTF(("%s: initial join %s on ifp %p(%s%d)\n",
2124 __func__, ip6_sprintf(&inm->in6m_addr),
2125 inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2126
2127 error = 0;
2128 syncstates = 1;
2129
2130 ifp = inm->in6m_ifp;
2131
2132 MLI_LOCK(mli);
2133 VERIFY(mli->mli_ifp == ifp);
2134
2135 /*
2136 * Groups joined on loopback or marked as 'not reported',
2137 * enter the MLD_SILENT_MEMBER state and
2138 * are never reported in any protocol exchanges.
2139 * All other groups enter the appropriate state machine
2140 * for the version in use on this link.
2141 * A link marked as MLIF_SILENT causes MLD to be completely
2142 * disabled for the link.
2143 */
2144 if ((ifp->if_flags & IFF_LOOPBACK) ||
2145 (mli->mli_flags & MLIF_SILENT) ||
2146 !mld_is_addr_reported(&inm->in6m_addr)) {
2147 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2148 __func__));
2149 inm->in6m_state = MLD_SILENT_MEMBER;
2150 inm->in6m_timer = 0;
2151 } else {
2152 /*
2153 * Deal with overlapping in6_multi lifecycle.
2154 * If this group was LEAVING, then make sure
2155 * we drop the reference we picked up to keep the
2156 * group around for the final INCLUDE {} enqueue.
2157 * Since we cannot call in6_multi_detach() here,
2158 * defer this task to the timer routine.
2159 */
2160 if (mli->mli_version == MLD_VERSION_2 &&
2161 inm->in6m_state == MLD_LEAVING_MEMBER) {
2162 VERIFY(inm->in6m_nrelecnt != 0);
2163 SLIST_INSERT_HEAD(&mli->mli_relinmhead, inm,
2164 in6m_nrele);
2165 }
2166
2167 inm->in6m_state = MLD_REPORTING_MEMBER;
2168
2169 switch (mli->mli_version) {
2170 case MLD_VERSION_1:
2171 /*
2172 * If a delay was provided, only use it if
2173 * it is greater than the delay normally
2174 * used for an MLDv1 state change report,
2175 * and delay sending the initial MLDv1 report
2176 * by not transitioning to the IDLE state.
2177 */
2178 odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_SLOWHZ);
2179 if (delay) {
2180 inm->in6m_timer = max(delay, odelay);
2181 current_state_timers_running6 = 1;
2182 } else {
2183 inm->in6m_state = MLD_IDLE_MEMBER;
2184 error = mld_v1_transmit_report(inm,
2185 MLD_LISTENER_REPORT);
2186
2187 IN6M_LOCK_ASSERT_HELD(inm);
2188 MLI_LOCK_ASSERT_HELD(mli);
2189
2190 if (error == 0) {
2191 inm->in6m_timer = odelay;
2192 current_state_timers_running6 = 1;
2193 }
2194 }
2195 break;
2196
2197 case MLD_VERSION_2:
2198 /*
2199 * Defer update of T0 to T1, until the first copy
2200 * of the state change has been transmitted.
2201 */
2202 syncstates = 0;
2203
2204 /*
2205 * Immediately enqueue a State-Change Report for
2206 * this interface, freeing any previous reports.
2207 * Don't kick the timers if there is nothing to do,
2208 * or if an error occurred.
2209 */
2210 ifq = &inm->in6m_scq;
2211 IF_DRAIN(ifq);
2212 retval = mld_v2_enqueue_group_record(ifq, inm, 1,
2213 0, 0, (mli->mli_flags & MLIF_USEALLOW));
2214 MLD_PRINTF(("%s: enqueue record = %d\n",
2215 __func__, retval));
2216 if (retval <= 0) {
2217 error = retval * -1;
2218 break;
2219 }
2220
2221 /*
2222 * Schedule transmission of pending state-change
2223 * report up to RV times for this link. The timer
2224 * will fire at the next mld_fasttimo (~200ms),
2225 * giving us an opportunity to merge the reports.
2226 *
2227 * If a delay was provided to this function, only
2228 * use this delay if sooner than the existing one.
2229 */
2230 VERIFY(mli->mli_rv > 1);
2231 inm->in6m_scrv = mli->mli_rv;
2232 if (delay) {
2233 if (inm->in6m_sctimer > 1) {
2234 inm->in6m_sctimer =
2235 min(inm->in6m_sctimer, delay);
2236 } else
2237 inm->in6m_sctimer = delay;
2238 } else
2239 inm->in6m_sctimer = 1;
2240 state_change_timers_running6 = 1;
2241
2242 error = 0;
2243 break;
2244 }
2245 }
2246 MLI_UNLOCK(mli);
2247
2248 /*
2249 * Only update the T0 state if state change is atomic,
2250 * i.e. we don't need to wait for a timer to fire before we
2251 * can consider the state change to have been communicated.
2252 */
2253 if (syncstates) {
2254 in6m_commit(inm);
2255 MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2256 ip6_sprintf(&inm->in6m_addr),
2257 inm->in6m_ifp->if_name, ifp->if_unit));
2258 }
2259
2260 return (error);
2261 }
2262
2263 /*
2264 * Issue an intermediate state change during the life-cycle.
2265 */
2266 static int
2267 mld_handle_state_change(struct in6_multi *inm, struct mld_ifinfo *mli)
2268 {
2269 struct ifnet *ifp;
2270 int retval;
2271
2272 IN6M_LOCK_ASSERT_HELD(inm);
2273 MLI_LOCK_ASSERT_NOTHELD(mli);
2274
2275 MLD_PRINTF(("%s: state change for %s on ifp %p(%s%d)\n",
2276 __func__, ip6_sprintf(&inm->in6m_addr),
2277 inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2278
2279 ifp = inm->in6m_ifp;
2280
2281 MLI_LOCK(mli);
2282 VERIFY(mli->mli_ifp == ifp);
2283
2284 if ((ifp->if_flags & IFF_LOOPBACK) ||
2285 (mli->mli_flags & MLIF_SILENT) ||
2286 !mld_is_addr_reported(&inm->in6m_addr) ||
2287 (mli->mli_version != MLD_VERSION_2)) {
2288 MLI_UNLOCK(mli);
2289 if (!mld_is_addr_reported(&inm->in6m_addr)) {
2290 MLD_PRINTF(("%s: not kicking state machine for silent "
2291 "group\n", __func__));
2292 }
2293 MLD_PRINTF(("%s: nothing to do\n", __func__));
2294 in6m_commit(inm);
2295 MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2296 ip6_sprintf(&inm->in6m_addr),
2297 inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2298 return (0);
2299 }
2300
2301 IF_DRAIN(&inm->in6m_scq);
2302
2303 retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0,
2304 (mli->mli_flags & MLIF_USEALLOW));
2305 MLD_PRINTF(("%s: enqueue record = %d\n", __func__, retval));
2306 if (retval <= 0) {
2307 MLI_UNLOCK(mli);
2308 return (-retval);
2309 }
2310 /*
2311 * If record(s) were enqueued, start the state-change
2312 * report timer for this group.
2313 */
2314 inm->in6m_scrv = mli->mli_rv;
2315 inm->in6m_sctimer = 1;
2316 state_change_timers_running6 = 1;
2317 MLI_UNLOCK(mli);
2318
2319 return (0);
2320 }
2321
2322 /*
2323 * Perform the final leave for a multicast address.
2324 *
2325 * When leaving a group:
2326 * MLDv1 sends a DONE message, if and only if we are the reporter.
2327 * MLDv2 enqueues a state-change report containing a transition
2328 * to INCLUDE {} for immediate transmission.
2329 */
2330 static void
2331 mld_final_leave(struct in6_multi *inm, struct mld_ifinfo *mli)
2332 {
2333 int syncstates = 1;
2334
2335 IN6M_LOCK_ASSERT_HELD(inm);
2336 MLI_LOCK_ASSERT_NOTHELD(mli);
2337
2338 MLD_PRINTF(("%s: final leave %s on ifp %p(%s%d)\n",
2339 __func__, ip6_sprintf(&inm->in6m_addr),
2340 inm->in6m_ifp, inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2341
2342 switch (inm->in6m_state) {
2343 case MLD_NOT_MEMBER:
2344 case MLD_SILENT_MEMBER:
2345 case MLD_LEAVING_MEMBER:
2346 /* Already leaving or left; do nothing. */
2347 MLD_PRINTF(("%s: not kicking state machine for silent group\n",
2348 __func__));
2349 break;
2350 case MLD_REPORTING_MEMBER:
2351 case MLD_IDLE_MEMBER:
2352 case MLD_G_QUERY_PENDING_MEMBER:
2353 case MLD_SG_QUERY_PENDING_MEMBER:
2354 MLI_LOCK(mli);
2355 if (mli->mli_version == MLD_VERSION_1) {
2356 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER ||
2357 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) {
2358 panic("%s: MLDv2 state reached, not MLDv2 "
2359 "mode\n", __func__);
2360 /* NOTREACHED */
2361 }
2362 mld_v1_transmit_report(inm, MLD_LISTENER_DONE);
2363
2364 IN6M_LOCK_ASSERT_HELD(inm);
2365 MLI_LOCK_ASSERT_HELD(mli);
2366
2367 inm->in6m_state = MLD_NOT_MEMBER;
2368 } else if (mli->mli_version == MLD_VERSION_2) {
2369 /*
2370 * Stop group timer and all pending reports.
2371 * Immediately enqueue a state-change report
2372 * TO_IN {} to be sent on the next fast timeout,
2373 * giving us an opportunity to merge reports.
2374 */
2375 IF_DRAIN(&inm->in6m_scq);
2376 inm->in6m_timer = 0;
2377 inm->in6m_scrv = mli->mli_rv;
2378 MLD_PRINTF(("%s: Leaving %s/%s%d with %d "
2379 "pending retransmissions.\n", __func__,
2380 ip6_sprintf(&inm->in6m_addr),
2381 inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit,
2382 inm->in6m_scrv));
2383 if (inm->in6m_scrv == 0) {
2384 inm->in6m_state = MLD_NOT_MEMBER;
2385 inm->in6m_sctimer = 0;
2386 } else {
2387 int retval;
2388 /*
2389 * Stick around in the in6_multihead list;
2390 * the final detach will be issued by
2391 * mld_v2_process_group_timers() when
2392 * the retransmit timer expires.
2393 */
2394 IN6M_ADDREF_LOCKED(inm);
2395 VERIFY(inm->in6m_debug & IFD_ATTACHED);
2396 inm->in6m_reqcnt++;
2397 VERIFY(inm->in6m_reqcnt >= 1);
2398 inm->in6m_nrelecnt++;
2399 VERIFY(inm->in6m_nrelecnt != 0);
2400
2401 retval = mld_v2_enqueue_group_record(
2402 &inm->in6m_scq, inm, 1, 0, 0,
2403 (mli->mli_flags & MLIF_USEALLOW));
2404 KASSERT(retval != 0,
2405 ("%s: enqueue record = %d\n", __func__,
2406 retval));
2407
2408 inm->in6m_state = MLD_LEAVING_MEMBER;
2409 inm->in6m_sctimer = 1;
2410 state_change_timers_running6 = 1;
2411 syncstates = 0;
2412 }
2413 }
2414 MLI_UNLOCK(mli);
2415 break;
2416 case MLD_LAZY_MEMBER:
2417 case MLD_SLEEPING_MEMBER:
2418 case MLD_AWAKENING_MEMBER:
2419 /* Our reports are suppressed; do nothing. */
2420 break;
2421 }
2422
2423 if (syncstates) {
2424 in6m_commit(inm);
2425 MLD_PRINTF(("%s: T1 -> T0 for %s/%s%d\n", __func__,
2426 ip6_sprintf(&inm->in6m_addr),
2427 inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2428 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED;
2429 MLD_PRINTF(("%s: T1 now MCAST_UNDEFINED for %p/%s%d\n",
2430 __func__, &inm->in6m_addr, inm->in6m_ifp->if_name,
2431 inm->in6m_ifp->if_unit));
2432 }
2433 }
2434
2435 /*
2436 * Enqueue an MLDv2 group record to the given output queue.
2437 *
2438 * If is_state_change is zero, a current-state record is appended.
2439 * If is_state_change is non-zero, a state-change report is appended.
2440 *
2441 * If is_group_query is non-zero, an mbuf packet chain is allocated.
2442 * If is_group_query is zero, and if there is a packet with free space
2443 * at the tail of the queue, it will be appended to providing there
2444 * is enough free space.
2445 * Otherwise a new mbuf packet chain is allocated.
2446 *
2447 * If is_source_query is non-zero, each source is checked to see if
2448 * it was recorded for a Group-Source query, and will be omitted if
2449 * it is not both in-mode and recorded.
2450 *
2451 * If use_block_allow is non-zero, state change reports for initial join
2452 * and final leave, on an inclusive mode group with a source list, will be
2453 * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively.
2454 *
2455 * The function will attempt to allocate leading space in the packet
2456 * for the IPv6+ICMP headers to be prepended without fragmenting the chain.
2457 *
2458 * If successful the size of all data appended to the queue is returned,
2459 * otherwise an error code less than zero is returned, or zero if
2460 * no record(s) were appended.
2461 */
2462 static int
2463 mld_v2_enqueue_group_record(struct ifqueue *ifq, struct in6_multi *inm,
2464 const int is_state_change, const int is_group_query,
2465 const int is_source_query, const int use_block_allow)
2466 {
2467 struct mldv2_record mr;
2468 struct mldv2_record *pmr;
2469 struct ifnet *ifp;
2470 struct ip6_msource *ims, *nims;
2471 struct mbuf *m0, *m, *md;
2472 int error, is_filter_list_change;
2473 int minrec0len, m0srcs, msrcs, nbytes, off;
2474 int record_has_sources;
2475 int now;
2476 int type;
2477 uint8_t mode;
2478
2479 IN6M_LOCK_ASSERT_HELD(inm);
2480 MLI_LOCK_ASSERT_HELD(inm->in6m_mli);
2481
2482 error = 0;
2483 ifp = inm->in6m_ifp;
2484 is_filter_list_change = 0;
2485 m = NULL;
2486 m0 = NULL;
2487 m0srcs = 0;
2488 msrcs = 0;
2489 nbytes = 0;
2490 nims = NULL;
2491 record_has_sources = 1;
2492 pmr = NULL;
2493 type = MLD_DO_NOTHING;
2494 mode = inm->in6m_st[1].iss_fmode;
2495
2496 /*
2497 * If we did not transition out of ASM mode during t0->t1,
2498 * and there are no source nodes to process, we can skip
2499 * the generation of source records.
2500 */
2501 if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 &&
2502 inm->in6m_nsrc == 0)
2503 record_has_sources = 0;
2504
2505 if (is_state_change) {
2506 /*
2507 * Queue a state change record.
2508 * If the mode did not change, and there are non-ASM
2509 * listeners or source filters present,
2510 * we potentially need to issue two records for the group.
2511 * If there are ASM listeners, and there was no filter
2512 * mode transition of any kind, do nothing.
2513 *
2514 * If we are transitioning to MCAST_UNDEFINED, we need
2515 * not send any sources. A transition to/from this state is
2516 * considered inclusive with some special treatment.
2517 *
2518 * If we are rewriting initial joins/leaves to use
2519 * ALLOW/BLOCK, and the group's membership is inclusive,
2520 * we need to send sources in all cases.
2521 */
2522 if (mode != inm->in6m_st[0].iss_fmode) {
2523 if (mode == MCAST_EXCLUDE) {
2524 MLD_PRINTF(("%s: change to EXCLUDE\n",
2525 __func__));
2526 type = MLD_CHANGE_TO_EXCLUDE_MODE;
2527 } else {
2528 MLD_PRINTF(("%s: change to INCLUDE\n",
2529 __func__));
2530 if (use_block_allow) {
2531 /*
2532 * XXX
2533 * Here we're interested in state
2534 * edges either direction between
2535 * MCAST_UNDEFINED and MCAST_INCLUDE.
2536 * Perhaps we should just check
2537 * the group state, rather than
2538 * the filter mode.
2539 */
2540 if (mode == MCAST_UNDEFINED) {
2541 type = MLD_BLOCK_OLD_SOURCES;
2542 } else {
2543 type = MLD_ALLOW_NEW_SOURCES;
2544 }
2545 } else {
2546 type = MLD_CHANGE_TO_INCLUDE_MODE;
2547 if (mode == MCAST_UNDEFINED)
2548 record_has_sources = 0;
2549 }
2550 }
2551 } else {
2552 if (record_has_sources) {
2553 is_filter_list_change = 1;
2554 } else {
2555 type = MLD_DO_NOTHING;
2556 }
2557 }
2558 } else {
2559 /*
2560 * Queue a current state record.
2561 */
2562 if (mode == MCAST_EXCLUDE) {
2563 type = MLD_MODE_IS_EXCLUDE;
2564 } else if (mode == MCAST_INCLUDE) {
2565 type = MLD_MODE_IS_INCLUDE;
2566 VERIFY(inm->in6m_st[1].iss_asm == 0);
2567 }
2568 }
2569
2570 /*
2571 * Generate the filter list changes using a separate function.
2572 */
2573 if (is_filter_list_change)
2574 return (mld_v2_enqueue_filter_change(ifq, inm));
2575
2576 if (type == MLD_DO_NOTHING) {
2577 MLD_PRINTF(("%s: nothing to do for %s/%s%d\n",
2578 __func__, ip6_sprintf(&inm->in6m_addr),
2579 inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2580 return (0);
2581 }
2582
2583 /*
2584 * If any sources are present, we must be able to fit at least
2585 * one in the trailing space of the tail packet's mbuf,
2586 * ideally more.
2587 */
2588 minrec0len = sizeof(struct mldv2_record);
2589 if (record_has_sources)
2590 minrec0len += sizeof(struct in6_addr);
2591 MLD_PRINTF(("%s: queueing %s for %s/%s%d\n", __func__,
2592 mld_rec_type_to_str(type),
2593 ip6_sprintf(&inm->in6m_addr),
2594 inm->in6m_ifp->if_name, inm->in6m_ifp->if_unit));
2595
2596 /*
2597 * Check if we have a packet in the tail of the queue for this
2598 * group into which the first group record for this group will fit.
2599 * Otherwise allocate a new packet.
2600 * Always allocate leading space for IP6+RA+ICMPV6+REPORT.
2601 * Note: Group records for G/GSR query responses MUST be sent
2602 * in their own packet.
2603 */
2604 m0 = ifq->ifq_tail;
2605 if (!is_group_query &&
2606 m0 != NULL &&
2607 (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) &&
2608 (m0->m_pkthdr.len + minrec0len) <
2609 (ifp->if_mtu - MLD_MTUSPACE)) {
2610 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2611 sizeof(struct mldv2_record)) /
2612 sizeof(struct in6_addr);
2613 m = m0;
2614 MLD_PRINTF(("%s: use existing packet\n", __func__));
2615 } else {
2616 if (IF_QFULL(ifq)) {
2617 MLD_PRINTF(("%s: outbound queue full\n", __func__));
2618 return (-ENOMEM);
2619 }
2620 m = NULL;
2621 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2622 sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2623 if (!is_state_change && !is_group_query)
2624 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2625 if (m == NULL)
2626 m = m_gethdr(M_DONTWAIT, MT_DATA);
2627 if (m == NULL)
2628 return (-ENOMEM);
2629
2630 MLD_PRINTF(("%s: allocated first packet\n", __func__));
2631 }
2632
2633 /*
2634 * Append group record.
2635 * If we have sources, we don't know how many yet.
2636 */
2637 mr.mr_type = type;
2638 mr.mr_datalen = 0;
2639 mr.mr_numsrc = 0;
2640 mr.mr_addr = inm->in6m_addr;
2641 in6_clearscope(&mr.mr_addr);
2642 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2643 if (m != m0)
2644 m_freem(m);
2645 MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2646 return (-ENOMEM);
2647 }
2648 nbytes += sizeof(struct mldv2_record);
2649
2650 /*
2651 * Append as many sources as will fit in the first packet.
2652 * If we are appending to a new packet, the chain allocation
2653 * may potentially use clusters; use m_getptr() in this case.
2654 * If we are appending to an existing packet, we need to obtain
2655 * a pointer to the group record after m_append(), in case a new
2656 * mbuf was allocated.
2657 *
2658 * Only append sources which are in-mode at t1. If we are
2659 * transitioning to MCAST_UNDEFINED state on the group, and
2660 * use_block_allow is zero, do not include source entries.
2661 * Otherwise, we need to include this source in the report.
2662 *
2663 * Only report recorded sources in our filter set when responding
2664 * to a group-source query.
2665 */
2666 if (record_has_sources) {
2667 if (m == m0) {
2668 md = m_last(m);
2669 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2670 md->m_len - nbytes);
2671 } else {
2672 md = m_getptr(m, 0, &off);
2673 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) +
2674 off);
2675 }
2676 msrcs = 0;
2677 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs,
2678 nims) {
2679 MLD_PRINTF(("%s: visit node %s\n", __func__,
2680 ip6_sprintf(&ims->im6s_addr)));
2681 now = im6s_get_mode(inm, ims, 1);
2682 MLD_PRINTF(("%s: node is %d\n", __func__, now));
2683 if ((now != mode) ||
2684 (now == mode &&
2685 (!use_block_allow && mode == MCAST_UNDEFINED))) {
2686 MLD_PRINTF(("%s: skip node\n", __func__));
2687 continue;
2688 }
2689 if (is_source_query && ims->im6s_stp == 0) {
2690 MLD_PRINTF(("%s: skip unrecorded node\n",
2691 __func__));
2692 continue;
2693 }
2694 MLD_PRINTF(("%s: append node\n", __func__));
2695 if (!m_append(m, sizeof(struct in6_addr),
2696 (void *)&ims->im6s_addr)) {
2697 if (m != m0)
2698 m_freem(m);
2699 MLD_PRINTF(("%s: m_append() failed.\n",
2700 __func__));
2701 return (-ENOMEM);
2702 }
2703 nbytes += sizeof(struct in6_addr);
2704 ++msrcs;
2705 if (msrcs == m0srcs)
2706 break;
2707 }
2708 MLD_PRINTF(("%s: msrcs is %d this packet\n", __func__,
2709 msrcs));
2710 pmr->mr_numsrc = htons(msrcs);
2711 nbytes += (msrcs * sizeof(struct in6_addr));
2712 }
2713
2714 if (is_source_query && msrcs == 0) {
2715 MLD_PRINTF(("%s: no recorded sources to report\n", __func__));
2716 if (m != m0)
2717 m_freem(m);
2718 return (0);
2719 }
2720
2721 /*
2722 * We are good to go with first packet.
2723 */
2724 if (m != m0) {
2725 MLD_PRINTF(("%s: enqueueing first packet\n", __func__));
2726 m->m_pkthdr.vt_nrecs = 1;
2727 m->m_pkthdr.rcvif = ifp;
2728 IF_ENQUEUE(ifq, m);
2729 } else {
2730 m->m_pkthdr.vt_nrecs++;
2731 }
2732 /*
2733 * No further work needed if no source list in packet(s).
2734 */
2735 if (!record_has_sources)
2736 return (nbytes);
2737
2738 /*
2739 * Whilst sources remain to be announced, we need to allocate
2740 * a new packet and fill out as many sources as will fit.
2741 * Always try for a cluster first.
2742 */
2743 while (nims != NULL) {
2744 if (IF_QFULL(ifq)) {
2745 MLD_PRINTF(("%s: outbound queue full\n", __func__));
2746 return (-ENOMEM);
2747 }
2748 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2749 if (m == NULL)
2750 m = m_gethdr(M_DONTWAIT, MT_DATA);
2751 if (m == NULL)
2752 return (-ENOMEM);
2753 md = m_getptr(m, 0, &off);
2754 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off);
2755 MLD_PRINTF(("%s: allocated next packet\n", __func__));
2756
2757 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) {
2758 if (m != m0)
2759 m_freem(m);
2760 MLD_PRINTF(("%s: m_append() failed.\n", __func__));
2761 return (-ENOMEM);
2762 }
2763 m->m_pkthdr.vt_nrecs = 1;
2764 nbytes += sizeof(struct mldv2_record);
2765
2766 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2767 sizeof(struct mldv2_record)) / sizeof(struct in6_addr);
2768
2769 msrcs = 0;
2770 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2771 MLD_PRINTF(("%s: visit node %s\n",
2772 __func__, ip6_sprintf(&ims->im6s_addr)));
2773 now = im6s_get_mode(inm, ims, 1);
2774 if ((now != mode) ||
2775 (now == mode &&
2776 (!use_block_allow && mode == MCAST_UNDEFINED))) {
2777 MLD_PRINTF(("%s: skip node\n", __func__));
2778 continue;
2779 }
2780 if (is_source_query && ims->im6s_stp == 0) {
2781 MLD_PRINTF(("%s: skip unrecorded node\n",
2782 __func__));
2783 continue;
2784 }
2785 MLD_PRINTF(("%s: append node\n", __func__));
2786 if (!m_append(m, sizeof(struct in6_addr),
2787 (void *)&ims->im6s_addr)) {
2788 if (m != m0)
2789 m_freem(m);
2790 MLD_PRINTF(("%s: m_append() failed.\n",
2791 __func__));
2792 return (-ENOMEM);
2793 }
2794 ++msrcs;
2795 if (msrcs == m0srcs)
2796 break;
2797 }
2798 pmr->mr_numsrc = htons(msrcs);
2799 nbytes += (msrcs * sizeof(struct in6_addr));
2800
2801 MLD_PRINTF(("%s: enqueueing next packet\n", __func__));
2802 m->m_pkthdr.rcvif = ifp;
2803 IF_ENQUEUE(ifq, m);
2804 }
2805
2806 return (nbytes);
2807 }
2808
2809 /*
2810 * Type used to mark record pass completion.
2811 * We exploit the fact we can cast to this easily from the
2812 * current filter modes on each ip_msource node.
2813 */
2814 typedef enum {
2815 REC_NONE = 0x00, /* MCAST_UNDEFINED */
2816 REC_ALLOW = 0x01, /* MCAST_INCLUDE */
2817 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */
2818 REC_FULL = REC_ALLOW | REC_BLOCK
2819 } rectype_t;
2820
2821 /*
2822 * Enqueue an MLDv2 filter list change to the given output queue.
2823 *
2824 * Source list filter state is held in an RB-tree. When the filter list
2825 * for a group is changed without changing its mode, we need to compute
2826 * the deltas between T0 and T1 for each source in the filter set,
2827 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records.
2828 *
2829 * As we may potentially queue two record types, and the entire R-B tree
2830 * needs to be walked at once, we break this out into its own function
2831 * so we can generate a tightly packed queue of packets.
2832 *
2833 * XXX This could be written to only use one tree walk, although that makes
2834 * serializing into the mbuf chains a bit harder. For now we do two walks
2835 * which makes things easier on us, and it may or may not be harder on
2836 * the L2 cache.
2837 *
2838 * If successful the size of all data appended to the queue is returned,
2839 * otherwise an error code less than zero is returned, or zero if
2840 * no record(s) were appended.
2841 */
2842 static int
2843 mld_v2_enqueue_filter_change(struct ifqueue *ifq, struct in6_multi *inm)
2844 {
2845 static const int MINRECLEN =
2846 sizeof(struct mldv2_record) + sizeof(struct in6_addr);
2847 struct ifnet *ifp;
2848 struct mldv2_record mr;
2849 struct mldv2_record *pmr;
2850 struct ip6_msource *ims, *nims;
2851 struct mbuf *m, *m0, *md;
2852 int m0srcs, nbytes, npbytes, off, rsrcs, schanged;
2853 int nallow, nblock;
2854 uint8_t mode, now, then;
2855 rectype_t crt, drt, nrt;
2856
2857 IN6M_LOCK_ASSERT_HELD(inm);
2858
2859 if (inm->in6m_nsrc == 0 ||
2860 (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0))
2861 return (0);
2862
2863 ifp = inm->in6m_ifp; /* interface */
2864 mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */
2865 crt = REC_NONE; /* current group record type */
2866 drt = REC_NONE; /* mask of completed group record types */
2867 nrt = REC_NONE; /* record type for current node */
2868 m0srcs = 0; /* # source which will fit in current mbuf chain */
2869 npbytes = 0; /* # of bytes appended this packet */
2870 nbytes = 0; /* # of bytes appended to group's state-change queue */
2871 rsrcs = 0; /* # sources encoded in current record */
2872 schanged = 0; /* # nodes encoded in overall filter change */
2873 nallow = 0; /* # of source entries in ALLOW_NEW */
2874 nblock = 0; /* # of source entries in BLOCK_OLD */
2875 nims = NULL; /* next tree node pointer */
2876
2877 /*
2878 * For each possible filter record mode.
2879 * The first kind of source we encounter tells us which
2880 * is the first kind of record we start appending.
2881 * If a node transitioned to UNDEFINED at t1, its mode is treated
2882 * as the inverse of the group's filter mode.
2883 */
2884 while (drt != REC_FULL) {
2885 do {
2886 m0 = ifq->ifq_tail;
2887 if (m0 != NULL &&
2888 (m0->m_pkthdr.vt_nrecs + 1 <=
2889 MLD_V2_REPORT_MAXRECS) &&
2890 (m0->m_pkthdr.len + MINRECLEN) <
2891 (ifp->if_mtu - MLD_MTUSPACE)) {
2892 m = m0;
2893 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len -
2894 sizeof(struct mldv2_record)) /
2895 sizeof(struct in6_addr);
2896 MLD_PRINTF(("%s: use previous packet\n",
2897 __func__));
2898 } else {
2899 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
2900 if (m == NULL)
2901 m = m_gethdr(M_DONTWAIT, MT_DATA);
2902 if (m == NULL) {
2903 MLD_PRINTF(("%s: m_get*() failed\n",
2904 __func__));
2905 return (-ENOMEM);
2906 }
2907 m->m_pkthdr.vt_nrecs = 0;
2908 m0srcs = (ifp->if_mtu - MLD_MTUSPACE -
2909 sizeof(struct mldv2_record)) /
2910 sizeof(struct in6_addr);
2911 npbytes = 0;
2912 MLD_PRINTF(("%s: allocated new packet\n",
2913 __func__));
2914 }
2915 /*
2916 * Append the MLD group record header to the
2917 * current packet's data area.
2918 * Recalculate pointer to free space for next
2919 * group record, in case m_append() allocated
2920 * a new mbuf or cluster.
2921 */
2922 memset(&mr, 0, sizeof(mr));
2923 mr.mr_addr = inm->in6m_addr;
2924 in6_clearscope(&mr.mr_addr);
2925 if (!m_append(m, sizeof(mr), (void *)&mr)) {
2926 if (m != m0)
2927 m_freem(m);
2928 MLD_PRINTF(("%s: m_append() failed\n",
2929 __func__));
2930 return (-ENOMEM);
2931 }
2932 npbytes += sizeof(struct mldv2_record);
2933 if (m != m0) {
2934 /* new packet; offset in chain */
2935 md = m_getptr(m, npbytes -
2936 sizeof(struct mldv2_record), &off);
2937 pmr = (struct mldv2_record *)(mtod(md,
2938 uint8_t *) + off);
2939 } else {
2940 /* current packet; offset from last append */
2941 md = m_last(m);
2942 pmr = (struct mldv2_record *)(mtod(md,
2943 uint8_t *) + md->m_len -
2944 sizeof(struct mldv2_record));
2945 }
2946 /*
2947 * Begin walking the tree for this record type
2948 * pass, or continue from where we left off
2949 * previously if we had to allocate a new packet.
2950 * Only report deltas in-mode at t1.
2951 * We need not report included sources as allowed
2952 * if we are in inclusive mode on the group,
2953 * however the converse is not true.
2954 */
2955 rsrcs = 0;
2956 if (nims == NULL) {
2957 nims = RB_MIN(ip6_msource_tree,
2958 &inm->in6m_srcs);
2959 }
2960 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) {
2961 MLD_PRINTF(("%s: visit node %s\n", __func__,
2962 ip6_sprintf(&ims->im6s_addr)));
2963 now = im6s_get_mode(inm, ims, 1);
2964 then = im6s_get_mode(inm, ims, 0);
2965 MLD_PRINTF(("%s: mode: t0 %d, t1 %d\n",
2966 __func__, then, now));
2967 if (now == then) {
2968 MLD_PRINTF(("%s: skip unchanged\n",
2969 __func__));
2970 continue;
2971 }
2972 if (mode == MCAST_EXCLUDE &&
2973 now == MCAST_INCLUDE) {
2974 MLD_PRINTF(("%s: skip IN src on EX "
2975 "group\n", __func__));
2976 continue;
2977 }
2978 nrt = (rectype_t)now;
2979 if (nrt == REC_NONE)
2980 nrt = (rectype_t)(~mode & REC_FULL);
2981 if (schanged++ == 0) {
2982 crt = nrt;
2983 } else if (crt != nrt)
2984 continue;
2985 if (!m_append(m, sizeof(struct in6_addr),
2986 (void *)&ims->im6s_addr)) {
2987 if (m != m0)
2988 m_freem(m);
2989 MLD_PRINTF(("%s: m_append() failed\n",
2990 __func__));
2991 return (-ENOMEM);
2992 }
2993 nallow += !!(crt == REC_ALLOW);
2994 nblock += !!(crt == REC_BLOCK);
2995 if (++rsrcs == m0srcs)
2996 break;
2997 }
2998 /*
2999 * If we did not append any tree nodes on this
3000 * pass, back out of allocations.
3001 */
3002 if (rsrcs == 0) {
3003 npbytes -= sizeof(struct mldv2_record);
3004 if (m != m0) {
3005 MLD_PRINTF(("%s: m_free(m)\n",
3006 __func__));
3007 m_freem(m);
3008 } else {
3009 MLD_PRINTF(("%s: m_adj(m, -mr)\n",
3010 __func__));
3011 m_adj(m, -((int)sizeof(
3012 struct mldv2_record)));
3013 }
3014 continue;
3015 }
3016 npbytes += (rsrcs * sizeof(struct in6_addr));
3017 if (crt == REC_ALLOW)
3018 pmr->mr_type = MLD_ALLOW_NEW_SOURCES;
3019 else if (crt == REC_BLOCK)
3020 pmr->mr_type = MLD_BLOCK_OLD_SOURCES;
3021 pmr->mr_numsrc = htons(rsrcs);
3022 /*
3023 * Count the new group record, and enqueue this
3024 * packet if it wasn't already queued.
3025 */
3026 m->m_pkthdr.vt_nrecs++;
3027 m->m_pkthdr.rcvif = ifp;
3028 if (m != m0)
3029 IF_ENQUEUE(ifq, m);
3030 nbytes += npbytes;
3031 } while (nims != NULL);
3032 drt |= crt;
3033 crt = (~crt & REC_FULL);
3034 }
3035
3036 MLD_PRINTF(("%s: queued %d ALLOW_NEW, %d BLOCK_OLD\n", __func__,
3037 nallow, nblock));
3038
3039 return (nbytes);
3040 }
3041
3042 static int
3043 mld_v2_merge_state_changes(struct in6_multi *inm, struct ifqueue *ifscq)
3044 {
3045 struct ifqueue *gq;
3046 struct mbuf *m; /* pending state-change */
3047 struct mbuf *m0; /* copy of pending state-change */
3048 struct mbuf *mt; /* last state-change in packet */
3049 struct mbuf *n;
3050 int docopy, domerge;
3051 u_int recslen;
3052
3053 IN6M_LOCK_ASSERT_HELD(inm);
3054
3055 docopy = 0;
3056 domerge = 0;
3057 recslen = 0;
3058
3059 /*
3060 * If there are further pending retransmissions, make a writable
3061 * copy of each queued state-change message before merging.
3062 */
3063 if (inm->in6m_scrv > 0)
3064 docopy = 1;
3065
3066 gq = &inm->in6m_scq;
3067 #ifdef MLD_DEBUG
3068 if (gq->ifq_head == NULL) {
3069 MLD_PRINTF(("%s: WARNING: queue for inm %p is empty\n",
3070 __func__, inm));
3071 }
3072 #endif
3073
3074 /*
3075 * Use IF_REMQUEUE() instead of IF_DEQUEUE() below, since the
3076 * packet might not always be at the head of the ifqueue.
3077 */
3078 m = gq->ifq_head;
3079 while (m != NULL) {
3080 /*
3081 * Only merge the report into the current packet if
3082 * there is sufficient space to do so; an MLDv2 report
3083 * packet may only contain 65,535 group records.
3084 * Always use a simple mbuf chain concatentation to do this,
3085 * as large state changes for single groups may have
3086 * allocated clusters.
3087 */
3088 domerge = 0;
3089 mt = ifscq->ifq_tail;
3090 if (mt != NULL) {
3091 recslen = m_length(m);
3092
3093 if ((mt->m_pkthdr.vt_nrecs +
3094 m->m_pkthdr.vt_nrecs <=
3095 MLD_V2_REPORT_MAXRECS) &&
3096 (mt->m_pkthdr.len + recslen <=
3097 (inm->in6m_ifp->if_mtu - MLD_MTUSPACE)))
3098 domerge = 1;
3099 }
3100
3101 if (!domerge && IF_QFULL(gq)) {
3102 MLD_PRINTF(("%s: outbound queue full, skipping whole "
3103 "packet %p\n", __func__, m));
3104 n = m->m_nextpkt;
3105 if (!docopy) {
3106 IF_REMQUEUE(gq, m);
3107 m_freem(m);
3108 }
3109 m = n;
3110 continue;
3111 }
3112
3113 if (!docopy) {
3114 MLD_PRINTF(("%s: dequeueing %p\n", __func__, m));
3115 n = m->m_nextpkt;
3116 IF_REMQUEUE(gq, m);
3117 m0 = m;
3118 m = n;
3119 } else {
3120 MLD_PRINTF(("%s: copying %p\n", __func__, m));
3121 m0 = m_dup(m, M_NOWAIT);
3122 if (m0 == NULL)
3123 return (ENOMEM);
3124 m0->m_nextpkt = NULL;
3125 m = m->m_nextpkt;
3126 }
3127
3128 if (!domerge) {
3129 MLD_PRINTF(("%s: queueing %p to ifscq %p)\n",
3130 __func__, m0, ifscq));
3131 m0->m_pkthdr.rcvif = inm->in6m_ifp;
3132 IF_ENQUEUE(ifscq, m0);
3133 } else {
3134 struct mbuf *mtl; /* last mbuf of packet mt */
3135
3136 MLD_PRINTF(("%s: merging %p with ifscq tail %p)\n",
3137 __func__, m0, mt));
3138
3139 mtl = m_last(mt);
3140 m0->m_flags &= ~M_PKTHDR;
3141 mt->m_pkthdr.len += recslen;
3142 mt->m_pkthdr.vt_nrecs +=
3143 m0->m_pkthdr.vt_nrecs;
3144
3145 mtl->m_next = m0;
3146 }
3147 }
3148
3149 return (0);
3150 }
3151
3152 /*
3153 * Respond to a pending MLDv2 General Query.
3154 */
3155 static void
3156 mld_v2_dispatch_general_query(struct mld_ifinfo *mli)
3157 {
3158 struct ifnet *ifp;
3159 struct in6_multi *inm;
3160 struct in6_multistep step;
3161 int retval;
3162
3163 MLI_LOCK_ASSERT_HELD(mli);
3164
3165 VERIFY(mli->mli_version == MLD_VERSION_2);
3166
3167 ifp = mli->mli_ifp;
3168 MLI_UNLOCK(mli);
3169
3170 in6_multihead_lock_shared();
3171 IN6_FIRST_MULTI(step, inm);
3172 while (inm != NULL) {
3173 IN6M_LOCK(inm);
3174 if (inm->in6m_ifp != ifp)
3175 goto next;
3176
3177 switch (inm->in6m_state) {
3178 case MLD_NOT_MEMBER:
3179 case MLD_SILENT_MEMBER:
3180 break;
3181 case MLD_REPORTING_MEMBER:
3182 case MLD_IDLE_MEMBER:
3183 case MLD_LAZY_MEMBER:
3184 case MLD_SLEEPING_MEMBER:
3185 case MLD_AWAKENING_MEMBER:
3186 inm->in6m_state = MLD_REPORTING_MEMBER;
3187 MLI_LOCK(mli);
3188 retval = mld_v2_enqueue_group_record(&mli->mli_gq,
3189 inm, 0, 0, 0, 0);
3190 MLI_UNLOCK(mli);
3191 MLD_PRINTF(("%s: enqueue record = %d\n",
3192 __func__, retval));
3193 break;
3194 case MLD_G_QUERY_PENDING_MEMBER:
3195 case MLD_SG_QUERY_PENDING_MEMBER:
3196 case MLD_LEAVING_MEMBER:
3197 break;
3198 }
3199 next:
3200 IN6M_UNLOCK(inm);
3201 IN6_NEXT_MULTI(step, inm);
3202 }
3203 in6_multihead_lock_done();
3204
3205 MLI_LOCK(mli);
3206 mld_dispatch_queue(mli, &mli->mli_gq, MLD_MAX_RESPONSE_BURST);
3207 MLI_LOCK_ASSERT_HELD(mli);
3208
3209 /*
3210 * Slew transmission of bursts over 500ms intervals.
3211 */
3212 if (mli->mli_gq.ifq_head != NULL) {
3213 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY(
3214 MLD_RESPONSE_BURST_INTERVAL);
3215 interface_timers_running6 = 1;
3216 }
3217 }
3218
3219 /*
3220 * Transmit the next pending message in the output queue.
3221 *
3222 * Must not be called with in6m_lockm or mli_lock held.
3223 */
3224 static void
3225 mld_dispatch_packet(struct mbuf *m)
3226 {
3227 struct ip6_moptions *im6o;
3228 struct ifnet *ifp;
3229 struct ifnet *oifp = NULL;
3230 struct mbuf *m0;
3231 struct mbuf *md;
3232 struct ip6_hdr *ip6;
3233 struct mld_hdr *mld;
3234 int error;
3235 int off;
3236 int type;
3237
3238 MLD_PRINTF(("%s: transmit %p\n", __func__, m));
3239
3240 /*
3241 * Check if the ifnet is still attached.
3242 */
3243 ifp = m->m_pkthdr.rcvif;
3244 if (ifp == NULL || !ifnet_is_attached(ifp, 0)) {
3245 MLD_PRINTF(("%s: dropped %p as ifindex %u went away.\n",
3246 __func__, m, (u_int)if_index));
3247 m_freem(m);
3248 ip6stat.ip6s_noroute++;
3249 return;
3250 }
3251
3252 im6o = ip6_allocmoptions(M_WAITOK);
3253 if (im6o == NULL) {
3254 m_freem(m);
3255 return;
3256 }
3257
3258 im6o->im6o_multicast_hlim = 1;
3259 #if MROUTING
3260 im6o->im6o_multicast_loop = (ip6_mrouter != NULL);
3261 #else
3262 im6o->im6o_multicast_loop = 0;
3263 #endif
3264 im6o->im6o_multicast_ifp = ifp;
3265
3266 if (m->m_flags & M_MLDV1) {
3267 m0 = m;
3268 } else {
3269 m0 = mld_v2_encap_report(ifp, m);
3270 if (m0 == NULL) {
3271 MLD_PRINTF(("%s: dropped %p\n", __func__, m));
3272 /*
3273 * mld_v2_encap_report() has already freed our mbuf.
3274 */
3275 IM6O_REMREF(im6o);
3276 ip6stat.ip6s_odropped++;
3277 return;
3278 }
3279 }
3280
3281 m->m_flags &= ~(M_PROTOFLAGS);
3282 m0->m_pkthdr.rcvif = lo_ifp;
3283
3284 ip6 = mtod(m0, struct ip6_hdr *);
3285 #if 0
3286 (void) in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */
3287 #else
3288 /*
3289 * XXX XXX Break some KPI rules to prevent an LOR which would
3290 * occur if we called in6_setscope() at transmission.
3291 * See comments at top of file.
3292 */
3293 MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index);
3294 #endif
3295
3296 /*
3297 * Retrieve the ICMPv6 type before handoff to ip6_output(),
3298 * so we can bump the stats.
3299 */
3300 md = m_getptr(m0, sizeof(struct ip6_hdr), &off);
3301 mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off);
3302 type = mld->mld_type;
3303
3304 if (ifp->if_eflags & IFEF_TXSTART) {
3305 /* Use control service class if the outgoing
3306 * interface supports transmit-start model.
3307 */
3308 (void) m_set_service_class(m0, MBUF_SC_CTL);
3309 }
3310
3311 error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, im6o,
3312 &oifp, NULL);
3313
3314 IM6O_REMREF(im6o);
3315
3316 if (error) {
3317 MLD_PRINTF(("%s: ip6_output(%p) = %d\n", __func__, m0, error));
3318 if (oifp != NULL)
3319 ifnet_release(oifp);
3320 return;
3321 }
3322
3323 icmp6stat.icp6s_outhist[type]++;
3324 if (oifp != NULL) {
3325 icmp6_ifstat_inc(oifp, ifs6_out_msg);
3326 switch (type) {
3327 case MLD_LISTENER_REPORT:
3328 case MLDV2_LISTENER_REPORT:
3329 icmp6_ifstat_inc(oifp, ifs6_out_mldreport);
3330 break;
3331 case MLD_LISTENER_DONE:
3332 icmp6_ifstat_inc(oifp, ifs6_out_mlddone);
3333 break;
3334 }
3335 ifnet_release(oifp);
3336 }
3337 }
3338
3339 /*
3340 * Encapsulate an MLDv2 report.
3341 *
3342 * KAME IPv6 requires that hop-by-hop options be passed separately,
3343 * and that the IPv6 header be prepended in a separate mbuf.
3344 *
3345 * Returns a pointer to the new mbuf chain head, or NULL if the
3346 * allocation failed.
3347 */
3348 static struct mbuf *
3349 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m)
3350 {
3351 struct mbuf *mh;
3352 struct mldv2_report *mld;
3353 struct ip6_hdr *ip6;
3354 struct in6_ifaddr *ia;
3355 int mldreclen;
3356
3357 VERIFY(m->m_flags & M_PKTHDR);
3358
3359 /*
3360 * RFC3590: OK to send as :: or tentative during DAD.
3361 */
3362 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
3363 if (ia == NULL)
3364 MLD_PRINTF(("%s: warning: ia is NULL\n", __func__));
3365
3366 MGETHDR(mh, M_DONTWAIT, MT_HEADER);
3367 if (mh == NULL) {
3368 if (ia != NULL)
3369 IFA_REMREF(&ia->ia_ifa);
3370 m_freem(m);
3371 return (NULL);
3372 }
3373 MH_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report));
3374
3375 mldreclen = m_length(m);
3376 MLD_PRINTF(("%s: mldreclen is %d\n", __func__, mldreclen));
3377
3378 mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report);
3379 mh->m_pkthdr.len = sizeof(struct ip6_hdr) +
3380 sizeof(struct mldv2_report) + mldreclen;
3381
3382 ip6 = mtod(mh, struct ip6_hdr *);
3383 ip6->ip6_flow = 0;
3384 ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
3385 ip6->ip6_vfc |= IPV6_VERSION;
3386 ip6->ip6_nxt = IPPROTO_ICMPV6;
3387 if (ia != NULL)
3388 IFA_LOCK(&ia->ia_ifa);
3389 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any;
3390 if (ia != NULL) {
3391 IFA_UNLOCK(&ia->ia_ifa);
3392 IFA_REMREF(&ia->ia_ifa);
3393 ia = NULL;
3394 }
3395 ip6->ip6_dst = in6addr_linklocal_allv2routers;
3396 /* scope ID will be set in netisr */
3397
3398 mld = (struct mldv2_report *)(ip6 + 1);
3399 mld->mld_type = MLDV2_LISTENER_REPORT;
3400 mld->mld_code = 0;
3401 mld->mld_cksum = 0;
3402 mld->mld_v2_reserved = 0;
3403 mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs);
3404 m->m_pkthdr.vt_nrecs = 0;
3405 m->m_flags &= ~M_PKTHDR;
3406
3407 mh->m_next = m;
3408 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6,
3409 sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen);
3410 return (mh);
3411 }
3412
3413 #ifdef MLD_DEBUG
3414 static const char *
3415 mld_rec_type_to_str(const int type)
3416 {
3417 switch (type) {
3418 case MLD_CHANGE_TO_EXCLUDE_MODE:
3419 return "TO_EX";
3420 break;
3421 case MLD_CHANGE_TO_INCLUDE_MODE:
3422 return "TO_IN";
3423 break;
3424 case MLD_MODE_IS_EXCLUDE:
3425 return "MODE_EX";
3426 break;
3427 case MLD_MODE_IS_INCLUDE:
3428 return "MODE_IN";
3429 break;
3430 case MLD_ALLOW_NEW_SOURCES:
3431 return "ALLOW_NEW";
3432 break;
3433 case MLD_BLOCK_OLD_SOURCES:
3434 return "BLOCK_OLD";
3435 break;
3436 default:
3437 break;
3438 }
3439 return "unknown";
3440 }
3441 #endif
3442
3443 void
3444 mld_init(void)
3445 {
3446
3447 MLD_PRINTF(("%s: initializing\n", __func__));
3448
3449 /* Setup lock group and attribute for mld_mtx */
3450 mld_mtx_grp_attr = lck_grp_attr_alloc_init();
3451 mld_mtx_grp = lck_grp_alloc_init("mld_mtx\n", mld_mtx_grp_attr);
3452 mld_mtx_attr = lck_attr_alloc_init();
3453 lck_mtx_init(&mld_mtx, mld_mtx_grp, mld_mtx_attr);
3454
3455 ip6_initpktopts(&mld_po);
3456 mld_po.ip6po_hlim = 1;
3457 mld_po.ip6po_hbh = &mld_ra.hbh;
3458 mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER;
3459 mld_po.ip6po_flags = IP6PO_DONTFRAG;
3460 LIST_INIT(&mli_head);
3461
3462 mli_size = sizeof (struct mld_ifinfo);
3463 mli_zone = zinit(mli_size, MLI_ZONE_MAX * mli_size,
3464 0, MLI_ZONE_NAME);
3465 if (mli_zone == NULL) {
3466 panic("%s: failed allocating %s", __func__, MLI_ZONE_NAME);
3467 /* NOTREACHED */
3468 }
3469 zone_change(mli_zone, Z_EXPAND, TRUE);
3470 zone_change(mli_zone, Z_CALLERACCT, FALSE);
3471 }