]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet6/nd6.c
77ab7630a7def41d18eb286ca2a6a0b13dc089e8
[apple/xnu.git] / bsd / netinet6 / nd6.c
1 /*
2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 /* $FreeBSD: src/sys/netinet6/nd6.c,v 1.20 2002/08/02 20:49:14 rwatson Exp $ */
30 /* $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */
31
32 /*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61 /*
62 * XXX
63 * KAME 970409 note:
64 * BSD/OS version heavily modifies this code, related to llinfo.
65 * Since we don't have BSD/OS version of net/route.c in our hand,
66 * I left the code mostly as it was in 970310. -- itojun
67 */
68
69 #include <sys/param.h>
70 #include <sys/systm.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/socket.h>
74 #include <sys/sockio.h>
75 #include <sys/time.h>
76 #include <sys/kernel.h>
77 #include <sys/sysctl.h>
78 #include <sys/errno.h>
79 #include <sys/syslog.h>
80 #include <sys/protosw.h>
81 #include <sys/proc.h>
82 #include <sys/mcache.h>
83
84 #include <kern/queue.h>
85 #include <kern/zalloc.h>
86
87 #define DONT_WARN_OBSOLETE
88 #include <net/if.h>
89 #include <net/if_dl.h>
90 #include <net/if_types.h>
91 #include <net/if_llreach.h>
92 #include <net/route.h>
93 #include <net/dlil.h>
94 #include <net/ntstat.h>
95
96 #include <netinet/in.h>
97 #include <netinet/in_arp.h>
98 #include <netinet/if_ether.h>
99 #include <netinet6/in6_var.h>
100 #include <netinet/ip6.h>
101 #include <netinet6/ip6_var.h>
102 #include <netinet6/nd6.h>
103 #include <netinet6/scope6_var.h>
104 #include <netinet/icmp6.h>
105
106 #include "loop.h"
107
108 #include <net/net_osdep.h>
109
110 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
111 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
112
113 #define SA(p) ((struct sockaddr *)(p))
114 #define SIN6(s) ((struct sockaddr_in6 *)s)
115 #define SDL(s) ((struct sockaddr_dl *)s)
116 #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
117
118 /* timer values */
119 int nd6_prune = 1; /* walk list every 1 seconds */
120 int nd6_delay = 5; /* delay first probe time 5 second */
121 int nd6_umaxtries = 3; /* maximum unicast query */
122 int nd6_mmaxtries = 3; /* maximum multicast query */
123 int nd6_useloopback = 1; /* use loopback interface for local traffic */
124 int nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
125
126 /* preventing too many loops in ND option parsing */
127 int nd6_maxndopt = 10; /* max # of ND options allowed */
128
129 int nd6_maxnudhint = 0; /* max # of subsequent upper layer hints */
130 int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */
131
132 #if ND6_DEBUG
133 int nd6_debug = 1;
134 #else
135 int nd6_debug = 0;
136 #endif
137
138 static int nd6_is_new_addr_neighbor (struct sockaddr_in6 *, struct ifnet *);
139
140 /* for debugging? */
141 static int nd6_inuse, nd6_allocated;
142
143 /*
144 * Synchronization notes:
145 *
146 * The global list of ND entries are stored in llinfo_nd6; an entry
147 * gets inserted into the list when the route is created and gets
148 * removed from the list when it is deleted; this is done as part
149 * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in nd6_rtrequest().
150 *
151 * Because rnh_lock and rt_lock for the entry are held during those
152 * operations, the same locks (and thus lock ordering) must be used
153 * elsewhere to access the relevant data structure fields:
154 *
155 * ln_next, ln_prev, ln_rt
156 *
157 * - Routing lock (rnh_lock)
158 *
159 * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_byhint, ln_flags,
160 * ln_llreach, ln_lastused
161 *
162 * - Routing entry lock (rt_lock)
163 *
164 * Due to the dependency on rt_lock, llinfo_nd6 has the same lifetime
165 * as the route entry itself. When a route is deleted (RTM_DELETE),
166 * it is simply removed from the global list but the memory is not
167 * freed until the route itself is freed.
168 */
169 struct llinfo_nd6 llinfo_nd6 = {
170 &llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0, 0, NULL, 0
171 };
172
173 /* Protected by nd_if_rwlock */
174 size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */
175 struct nd_ifinfo *nd_ifinfo = NULL;
176
177 static lck_grp_attr_t *nd_if_rwlock_grp_attr;
178 static lck_grp_t *nd_if_rwlock_grp;
179 static lck_attr_t *nd_if_rwlock_attr;
180 lck_rw_t *nd_if_rwlock;
181
182 /* Protected by nd6_mutex */
183 struct nd_drhead nd_defrouter;
184 struct nd_prhead nd_prefix = { 0 };
185
186 /* Serialization variables for nd6_drain() */
187 static boolean_t nd6_drain_busy;
188 static void *nd6_drain_waitchan = &nd6_drain_busy;
189 static int nd6_drain_waiters = 0;
190
191 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
192 static struct sockaddr_in6 all1_sa;
193
194 static int regen_tmpaddr(struct in6_ifaddr *);
195 extern lck_mtx_t *nd6_mutex;
196
197 static void nd6_slowtimo(void *ignored_arg);
198 static struct llinfo_nd6 *nd6_llinfo_alloc(void);
199 static void nd6_llinfo_free(void *);
200 static void nd6_llinfo_purge(struct rtentry *);
201 static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
202
203 static void nd6_siocgdrlst(void *, int);
204 static void nd6_siocgprlst(void *, int);
205
206 /*
207 * Insertion and removal from llinfo_nd6 must be done with rnh_lock held.
208 */
209 #define LN_DEQUEUE(_ln) do { \
210 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \
211 RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \
212 (_ln)->ln_next->ln_prev = (_ln)->ln_prev; \
213 (_ln)->ln_prev->ln_next = (_ln)->ln_next; \
214 (_ln)->ln_prev = (_ln)->ln_next = NULL; \
215 (_ln)->ln_flags &= ~ND6_LNF_IN_USE; \
216 } while (0)
217
218 #define LN_INSERTHEAD(_ln) do { \
219 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \
220 RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \
221 (_ln)->ln_next = llinfo_nd6.ln_next; \
222 llinfo_nd6.ln_next = (_ln); \
223 (_ln)->ln_prev = &llinfo_nd6; \
224 (_ln)->ln_next->ln_prev = (_ln); \
225 (_ln)->ln_flags |= ND6_LNF_IN_USE; \
226 } while (0)
227
228 static struct zone *llinfo_nd6_zone;
229 #define LLINFO_ND6_ZONE_MAX 256 /* maximum elements in zone */
230 #define LLINFO_ND6_ZONE_NAME "llinfo_nd6" /* name for zone */
231
232 void
233 nd6_init()
234 {
235 static int nd6_init_done = 0;
236 int i;
237
238 if (nd6_init_done) {
239 log(LOG_NOTICE, "nd6_init called more than once (ignored)\n");
240 return;
241 }
242
243 all1_sa.sin6_family = AF_INET6;
244 all1_sa.sin6_len = sizeof(struct sockaddr_in6);
245 for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
246 all1_sa.sin6_addr.s6_addr[i] = 0xff;
247
248 /* initialization of the default router list */
249 TAILQ_INIT(&nd_defrouter);
250
251 nd_if_rwlock_grp_attr = lck_grp_attr_alloc_init();
252 nd_if_rwlock_grp = lck_grp_alloc_init("nd_if_rwlock",
253 nd_if_rwlock_grp_attr);
254 nd_if_rwlock_attr = lck_attr_alloc_init();
255 nd_if_rwlock = lck_rw_alloc_init(nd_if_rwlock_grp, nd_if_rwlock_attr);
256
257 llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6),
258 LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0,
259 LLINFO_ND6_ZONE_NAME);
260 if (llinfo_nd6_zone == NULL)
261 panic("%s: failed allocating llinfo_nd6_zone", __func__);
262
263 zone_change(llinfo_nd6_zone, Z_EXPAND, TRUE);
264 zone_change(llinfo_nd6_zone, Z_CALLERACCT, FALSE);
265
266 nd6_nbr_init();
267 nd6_rtr_init();
268
269 nd6_init_done = 1;
270
271 /* start timer */
272 timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz);
273 }
274
275 static struct llinfo_nd6 *
276 nd6_llinfo_alloc(void)
277 {
278 return (zalloc(llinfo_nd6_zone));
279 }
280
281 static void
282 nd6_llinfo_free(void *arg)
283 {
284 struct llinfo_nd6 *ln = arg;
285
286 if (ln->ln_next != NULL || ln->ln_prev != NULL) {
287 panic("%s: trying to free %p when it is in use", __func__, ln);
288 /* NOTREACHED */
289 }
290
291 /* Just in case there's anything there, free it */
292 if (ln->ln_hold != NULL) {
293 m_freem(ln->ln_hold);
294 ln->ln_hold = NULL;
295 }
296
297 /* Purge any link-layer info caching */
298 VERIFY(ln->ln_rt->rt_llinfo == ln);
299 if (ln->ln_rt->rt_llinfo_purge != NULL)
300 ln->ln_rt->rt_llinfo_purge(ln->ln_rt);
301
302 zfree(llinfo_nd6_zone, ln);
303 }
304
305 static void
306 nd6_llinfo_purge(struct rtentry *rt)
307 {
308 struct llinfo_nd6 *ln = rt->rt_llinfo;
309
310 RT_LOCK_ASSERT_HELD(rt);
311 VERIFY(rt->rt_llinfo_purge == nd6_llinfo_purge && ln != NULL);
312
313 if (ln->ln_llreach != NULL) {
314 RT_CONVERT_LOCK(rt);
315 ifnet_llreach_free(ln->ln_llreach);
316 ln->ln_llreach = NULL;
317 }
318 ln->ln_lastused = 0;
319 }
320
321 static void
322 nd6_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
323 {
324 struct llinfo_nd6 *ln = rt->rt_llinfo;
325 struct if_llreach *lr = ln->ln_llreach;
326
327 if (lr == NULL) {
328 bzero(ri, sizeof (*ri));
329 } else {
330 IFLR_LOCK(lr);
331 /* Export to rt_reach_info structure */
332 ifnet_lr2ri(lr, ri);
333 /* Export ND6 send expiration time */
334 ri->ri_snd_expire = ifnet_llreach_up2cal(lr, ln->ln_lastused);
335 IFLR_UNLOCK(lr);
336 }
337 }
338
339 int
340 nd6_ifattach(struct ifnet *ifp)
341 {
342
343 /*
344 * We have some arrays that should be indexed by if_index.
345 * since if_index will grow dynamically, they should grow too.
346 */
347 lck_rw_lock_exclusive(nd_if_rwlock);
348 if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) {
349 size_t n;
350 caddr_t q;
351 size_t newlim = nd_ifinfo_indexlim;
352
353 while (if_index >= newlim)
354 newlim <<= 1;
355
356 /* grow nd_ifinfo */
357 n = newlim * sizeof(struct nd_ifinfo);
358 q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK);
359 if (q == NULL) {
360 lck_rw_done(nd_if_rwlock);
361 return ENOBUFS;
362 }
363 bzero(q, n);
364 nd_ifinfo_indexlim = newlim;
365 if (nd_ifinfo) {
366 bcopy((caddr_t)nd_ifinfo, q, n/2);
367 /*
368 * We might want to pattern fill the old
369 * array to catch use-after-free cases.
370 */
371 FREE((caddr_t)nd_ifinfo, M_IP6NDP);
372 }
373 nd_ifinfo = (struct nd_ifinfo *)q;
374 }
375 lck_rw_done(nd_if_rwlock);
376
377 #define ND nd_ifinfo[ifp->if_index]
378
379 /*
380 * Don't initialize if called twice.
381 * XXX: to detect this, we should choose a member that is never set
382 * before initialization of the ND structure itself. We formaly used
383 * the linkmtu member, which was not suitable because it could be
384 * initialized via "ifconfig mtu".
385 */
386 lck_rw_lock_shared(nd_if_rwlock);
387 if (ND.basereachable) {
388 lck_rw_done(nd_if_rwlock);
389 return 0;
390 }
391 ND.linkmtu = ifp->if_mtu;
392 ND.chlim = IPV6_DEFHLIM;
393 ND.basereachable = REACHABLE_TIME;
394 ND.reachable = ND_COMPUTE_RTIME(ND.basereachable);
395 ND.retrans = RETRANS_TIMER;
396 ND.flags = ND6_IFF_PERFORMNUD;
397 lck_rw_done(nd_if_rwlock);
398 nd6_setmtu(ifp);
399 #undef ND
400
401 return 0;
402 }
403
404 /*
405 * Reset ND level link MTU. This function is called when the physical MTU
406 * changes, which means we might have to adjust the ND level MTU.
407 */
408 void
409 nd6_setmtu(struct ifnet *ifp)
410 {
411 struct nd_ifinfo *ndi;
412 u_int32_t oldmaxmtu, maxmtu;
413
414 /*
415 * Make sure IPv6 is enabled for the interface first,
416 * because this can be called directly from SIOCSIFMTU for IPv4
417 */
418 lck_rw_lock_shared(nd_if_rwlock);
419 if (ifp->if_index >= nd_ifinfo_indexlim) {
420 lck_rw_done(nd_if_rwlock);
421 return; /* we're out of bound for nd_ifinfo */
422 }
423
424 ndi = &nd_ifinfo[ifp->if_index];
425 oldmaxmtu = ndi->maxmtu;
426
427 /*
428 * The ND level maxmtu is somewhat redundant to the interface MTU
429 * and is an implementation artifact of KAME. Instead of hard-
430 * limiting the maxmtu based on the interface type here, we simply
431 * take the if_mtu value since SIOCSIFMTU would have taken care of
432 * the sanity checks related to the maximum MTU allowed for the
433 * interface (a value that is known only by the interface layer),
434 * by sending the request down via ifnet_ioctl(). The use of the
435 * ND level maxmtu and linkmtu are done via IN6_LINKMTU() which
436 * does further checking against if_mtu.
437 */
438 maxmtu = ndi->maxmtu = ifp->if_mtu;
439
440 /*
441 * Decreasing the interface MTU under IPV6 minimum MTU may cause
442 * undesirable situation. We thus notify the operator of the change
443 * explicitly. The check for oldmaxmtu is necessary to restrict the
444 * log to the case of changing the MTU, not initializing it.
445 */
446 if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
447 log(LOG_NOTICE, "nd6_setmtu: "
448 "new link MTU on %s%d (%u) is too small for IPv6\n",
449 ifp->if_name, ifp->if_unit, (uint32_t)ndi->maxmtu);
450 }
451 ndi->linkmtu = ifp->if_mtu;
452 lck_rw_done(nd_if_rwlock);
453
454 /* also adjust in6_maxmtu if necessary. */
455 if (maxmtu > in6_maxmtu)
456 in6_setmaxmtu();
457 }
458
459 void
460 nd6_option_init(
461 void *opt,
462 int icmp6len,
463 union nd_opts *ndopts)
464 {
465 bzero(ndopts, sizeof(*ndopts));
466 ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
467 ndopts->nd_opts_last
468 = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
469
470 if (icmp6len == 0) {
471 ndopts->nd_opts_done = 1;
472 ndopts->nd_opts_search = NULL;
473 }
474 }
475
476 /*
477 * Take one ND option.
478 */
479 struct nd_opt_hdr *
480 nd6_option(
481 union nd_opts *ndopts)
482 {
483 struct nd_opt_hdr *nd_opt;
484 int olen;
485
486 if (!ndopts)
487 panic("ndopts == NULL in nd6_option\n");
488 if (!ndopts->nd_opts_last)
489 panic("uninitialized ndopts in nd6_option\n");
490 if (!ndopts->nd_opts_search)
491 return NULL;
492 if (ndopts->nd_opts_done)
493 return NULL;
494
495 nd_opt = ndopts->nd_opts_search;
496
497 /* make sure nd_opt_len is inside the buffer */
498 if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
499 bzero(ndopts, sizeof(*ndopts));
500 return NULL;
501 }
502
503 olen = nd_opt->nd_opt_len << 3;
504 if (olen == 0) {
505 /*
506 * Message validation requires that all included
507 * options have a length that is greater than zero.
508 */
509 bzero(ndopts, sizeof(*ndopts));
510 return NULL;
511 }
512
513 ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
514 if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
515 /* option overruns the end of buffer, invalid */
516 bzero(ndopts, sizeof(*ndopts));
517 return NULL;
518 } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
519 /* reached the end of options chain */
520 ndopts->nd_opts_done = 1;
521 ndopts->nd_opts_search = NULL;
522 }
523 return nd_opt;
524 }
525
526 /*
527 * Parse multiple ND options.
528 * This function is much easier to use, for ND routines that do not need
529 * multiple options of the same type.
530 */
531 int
532 nd6_options(
533 union nd_opts *ndopts)
534 {
535 struct nd_opt_hdr *nd_opt;
536 int i = 0;
537
538 if (ndopts == NULL)
539 panic("ndopts == NULL in nd6_options");
540 if (ndopts->nd_opts_last == NULL)
541 panic("uninitialized ndopts in nd6_options");
542 if (ndopts->nd_opts_search == NULL)
543 return 0;
544
545 while (1) {
546 nd_opt = nd6_option(ndopts);
547 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
548 /*
549 * Message validation requires that all included
550 * options have a length that is greater than zero.
551 */
552 icmp6stat.icp6s_nd_badopt++;
553 bzero(ndopts, sizeof(*ndopts));
554 return -1;
555 }
556
557 if (nd_opt == NULL)
558 goto skip1;
559
560 switch (nd_opt->nd_opt_type) {
561 case ND_OPT_SOURCE_LINKADDR:
562 case ND_OPT_TARGET_LINKADDR:
563 case ND_OPT_MTU:
564 case ND_OPT_REDIRECTED_HEADER:
565 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
566 nd6log((LOG_INFO,
567 "duplicated ND6 option found (type=%d)\n",
568 nd_opt->nd_opt_type));
569 /* XXX bark? */
570 } else {
571 ndopts->nd_opt_array[nd_opt->nd_opt_type]
572 = nd_opt;
573 }
574 break;
575 case ND_OPT_PREFIX_INFORMATION:
576 if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
577 ndopts->nd_opt_array[nd_opt->nd_opt_type]
578 = nd_opt;
579 }
580 ndopts->nd_opts_pi_end =
581 (struct nd_opt_prefix_info *)nd_opt;
582 break;
583 case ND_OPT_RDNSS:
584 /* ignore */
585 break;
586 default:
587 /*
588 * Unknown options must be silently ignored,
589 * to accomodate future extension to the protocol.
590 */
591 nd6log((LOG_DEBUG,
592 "nd6_options: unsupported option %d - "
593 "option ignored\n", nd_opt->nd_opt_type));
594 }
595
596 skip1:
597 i++;
598 if (i > nd6_maxndopt) {
599 icmp6stat.icp6s_nd_toomanyopt++;
600 nd6log((LOG_INFO, "too many loop in nd opt\n"));
601 break;
602 }
603
604 if (ndopts->nd_opts_done)
605 break;
606 }
607
608 return 0;
609 }
610
611 void
612 nd6_drain(__unused void *ignored_arg)
613 {
614 struct llinfo_nd6 *ln;
615 struct nd_defrouter *dr;
616 struct nd_prefix *pr;
617 struct ifnet *ifp = NULL;
618 struct in6_ifaddr *ia6, *nia6;
619 struct in6_addrlifetime *lt6;
620 struct timeval timenow;
621
622 getmicrotime(&timenow);
623 again:
624 /*
625 * The global list llinfo_nd6 is modified by nd6_request() and is
626 * therefore protected by rnh_lock. For obvious reasons, we cannot
627 * hold rnh_lock across calls that might lead to code paths which
628 * attempt to acquire rnh_lock, else we deadlock. Hence for such
629 * cases we drop rt_lock and rnh_lock, make the calls, and repeat the
630 * loop. To ensure that we don't process the same entry more than
631 * once in a single timeout, we mark the "already-seen" entries with
632 * ND6_LNF_TIMER_SKIP flag. At the end of the loop, we do a second
633 * pass thru the entries and clear the flag so they can be processed
634 * during the next timeout.
635 */
636 lck_mtx_lock(rnh_lock);
637 ln = llinfo_nd6.ln_next;
638 while (ln != NULL && ln != &llinfo_nd6) {
639 struct rtentry *rt;
640 struct sockaddr_in6 *dst;
641 struct llinfo_nd6 *next;
642
643 /* ln_next/prev/rt is protected by rnh_lock */
644 next = ln->ln_next;
645 rt = ln->ln_rt;
646 RT_LOCK(rt);
647
648 /* We've seen this already; skip it */
649 if (ln->ln_flags & ND6_LNF_TIMER_SKIP) {
650 RT_UNLOCK(rt);
651 ln = next;
652 continue;
653 }
654
655 /* rt->rt_ifp should never be NULL */
656 if ((ifp = rt->rt_ifp) == NULL) {
657 panic("%s: ln(%p) rt(%p) rt_ifp == NULL", __func__,
658 ln, rt);
659 /* NOTREACHED */
660 }
661
662 /* rt_llinfo must always be equal to ln */
663 if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) {
664 panic("%s: rt_llinfo(%p) is not equal to ln(%p)",
665 __func__, rt->rt_llinfo, ln);
666 /* NOTREACHED */
667 }
668
669 /* rt_key should never be NULL */
670 dst = (struct sockaddr_in6 *)rt_key(rt);
671 if (dst == NULL) {
672 panic("%s: rt(%p) key is NULL ln(%p)", __func__,
673 rt, ln);
674 /* NOTREACHED */
675 }
676
677 /* Set the flag in case we jump to "again" */
678 ln->ln_flags |= ND6_LNF_TIMER_SKIP;
679
680 if (ln->ln_expire > timenow.tv_sec) {
681 RT_UNLOCK(rt);
682 ln = next;
683 continue;
684 }
685
686 /* Make a copy (we're using it read-only anyway) */
687 lck_rw_lock_shared(nd_if_rwlock);
688 if (ifp->if_index >= nd_ifinfo_indexlim) {
689 lck_rw_done(nd_if_rwlock);
690 RT_UNLOCK(rt);
691 ln = next;
692 continue;
693 }
694 lck_rw_done(nd_if_rwlock);
695
696 RT_LOCK_ASSERT_HELD(rt);
697
698 switch (ln->ln_state) {
699 case ND6_LLINFO_INCOMPLETE:
700 if (ln->ln_asked < nd6_mmaxtries) {
701 ln->ln_asked++;
702 lck_rw_lock_shared(nd_if_rwlock);
703 ln->ln_expire = timenow.tv_sec +
704 nd_ifinfo[ifp->if_index].retrans / 1000;
705 lck_rw_done(nd_if_rwlock);
706 RT_ADDREF_LOCKED(rt);
707 RT_UNLOCK(rt);
708 lck_mtx_unlock(rnh_lock);
709 nd6_ns_output(ifp, NULL, &dst->sin6_addr,
710 ln, 0);
711 RT_REMREF(rt);
712 } else {
713 struct mbuf *m = ln->ln_hold;
714 ln->ln_hold = NULL;
715 if (m != NULL) {
716 /*
717 * Fake rcvif to make ICMP error
718 * more helpful in diagnosing
719 * for the receiver.
720 * XXX: should we consider
721 * older rcvif?
722 */
723 m->m_pkthdr.rcvif = ifp;
724 RT_UNLOCK(rt);
725 lck_mtx_unlock(rnh_lock);
726 icmp6_error(m, ICMP6_DST_UNREACH,
727 ICMP6_DST_UNREACH_ADDR, 0);
728 } else {
729 RT_UNLOCK(rt);
730 lck_mtx_unlock(rnh_lock);
731 }
732 nd6_free(rt);
733 }
734 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
735 goto again;
736
737 case ND6_LLINFO_REACHABLE:
738 if (ln->ln_expire) {
739 ln->ln_state = ND6_LLINFO_STALE;
740 ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
741 nd6_gctimer);
742 }
743 RT_UNLOCK(rt);
744 break;
745
746 case ND6_LLINFO_STALE:
747 case ND6_LLINFO_PURGE:
748 /* Garbage Collection(RFC 2461 5.3) */
749 if (ln->ln_expire) {
750 RT_UNLOCK(rt);
751 lck_mtx_unlock(rnh_lock);
752 nd6_free(rt);
753 lck_mtx_assert(rnh_lock,
754 LCK_MTX_ASSERT_NOTOWNED);
755 goto again;
756 } else {
757 RT_UNLOCK(rt);
758 }
759 break;
760
761 case ND6_LLINFO_DELAY:
762 lck_rw_lock_shared(nd_if_rwlock);
763 if ((nd_ifinfo[ifp->if_index].flags &
764 ND6_IFF_PERFORMNUD) != 0) {
765 /* We need NUD */
766 ln->ln_asked = 1;
767 ln->ln_state = ND6_LLINFO_PROBE;
768 ln->ln_expire = timenow.tv_sec +
769 nd_ifinfo[ifp->if_index].retrans / 1000;
770 lck_rw_done(nd_if_rwlock);
771 RT_ADDREF_LOCKED(rt);
772 RT_UNLOCK(rt);
773 lck_mtx_unlock(rnh_lock);
774 nd6_ns_output(ifp, &dst->sin6_addr,
775 &dst->sin6_addr, ln, 0);
776 lck_mtx_assert(rnh_lock,
777 LCK_MTX_ASSERT_NOTOWNED);
778 RT_REMREF(rt);
779 goto again;
780 }
781 lck_rw_done(nd_if_rwlock);
782 ln->ln_state = ND6_LLINFO_STALE; /* XXX */
783 ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
784 nd6_gctimer);
785 RT_UNLOCK(rt);
786 break;
787
788 case ND6_LLINFO_PROBE:
789 if (ln->ln_asked < nd6_umaxtries) {
790 ln->ln_asked++;
791 lck_rw_lock_shared(nd_if_rwlock);
792 ln->ln_expire = timenow.tv_sec +
793 nd_ifinfo[ifp->if_index].retrans / 1000;
794 lck_rw_done(nd_if_rwlock);
795 RT_ADDREF_LOCKED(rt);
796 RT_UNLOCK(rt);
797 lck_mtx_unlock(rnh_lock);
798 nd6_ns_output(ifp, &dst->sin6_addr,
799 &dst->sin6_addr, ln, 0);
800 RT_REMREF(rt);
801 } else {
802 RT_UNLOCK(rt);
803 lck_mtx_unlock(rnh_lock);
804 nd6_free(rt);
805 }
806 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
807 goto again;
808
809 default:
810 RT_UNLOCK(rt);
811 break;
812 }
813 ln = next;
814 }
815 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
816
817 /* Now clear the flag from all entries */
818 ln = llinfo_nd6.ln_next;
819 while (ln != NULL && ln != &llinfo_nd6) {
820 struct rtentry *rt = ln->ln_rt;
821 struct llinfo_nd6 *next = ln->ln_next;
822
823 RT_LOCK_SPIN(rt);
824 if (ln->ln_flags & ND6_LNF_TIMER_SKIP)
825 ln->ln_flags &= ~ND6_LNF_TIMER_SKIP;
826 RT_UNLOCK(rt);
827 ln = next;
828 }
829 lck_mtx_unlock(rnh_lock);
830
831 /* expire default router list */
832 lck_mtx_lock(nd6_mutex);
833 dr = TAILQ_FIRST(&nd_defrouter);
834 while (dr) {
835 if (dr->expire && dr->expire < timenow.tv_sec) {
836 struct nd_defrouter *t;
837 t = TAILQ_NEXT(dr, dr_entry);
838 defrtrlist_del(dr);
839 dr = t;
840 } else {
841 dr = TAILQ_NEXT(dr, dr_entry);
842 }
843 }
844 lck_mtx_unlock(nd6_mutex);
845
846 /*
847 * expire interface addresses.
848 * in the past the loop was inside prefix expiry processing.
849 * However, from a stricter speci-confrmance standpoint, we should
850 * rather separate address lifetimes and prefix lifetimes.
851 */
852 addrloop:
853 lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
854 for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) {
855 nia6 = ia6->ia_next;
856 IFA_LOCK(&ia6->ia_ifa);
857 /*
858 * Extra reference for ourselves; it's no-op if
859 * we don't have to regenerate temporary address,
860 * otherwise it protects the address from going
861 * away since we drop in6_ifaddr_rwlock below.
862 */
863 IFA_ADDREF_LOCKED(&ia6->ia_ifa);
864 /* check address lifetime */
865 lt6 = &ia6->ia6_lifetime;
866 if (IFA6_IS_INVALID(ia6)) {
867 /*
868 * If the expiring address is temporary, try
869 * regenerating a new one. This would be useful when
870 * we suspended a laptop PC, then turned it on after a
871 * period that could invalidate all temporary
872 * addresses. Although we may have to restart the
873 * loop (see below), it must be after purging the
874 * address. Otherwise, we'd see an infinite loop of
875 * regeneration.
876 */
877 if (ip6_use_tempaddr &&
878 (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
879 /* NOTE: We have to drop the lock here because
880 * regen_tmpaddr() eventually calls in6_update_ifa(),
881 * which must take the lock and would otherwise cause a
882 * hang. This is safe because the goto addrloop
883 * leads to a reevaluation of the in6_ifaddrs list
884 */
885 IFA_UNLOCK(&ia6->ia_ifa);
886 lck_rw_done(&in6_ifaddr_rwlock);
887 (void) regen_tmpaddr(ia6);
888 } else {
889 IFA_UNLOCK(&ia6->ia_ifa);
890 lck_rw_done(&in6_ifaddr_rwlock);
891 }
892
893 /*
894 * Purging the address would have caused
895 * in6_ifaddr_rwlock to be dropped and reacquired;
896 * therefore search again from the beginning
897 * of in6_ifaddrs list.
898 */
899 in6_purgeaddr(&ia6->ia_ifa);
900
901 /* Release extra reference taken above */
902 IFA_REMREF(&ia6->ia_ifa);
903 goto addrloop;
904 }
905 IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa);
906 if (IFA6_IS_DEPRECATED(ia6)) {
907 int oldflags = ia6->ia6_flags;
908
909 ia6->ia6_flags |= IN6_IFF_DEPRECATED;
910
911 /*
912 * If a temporary address has just become deprecated,
913 * regenerate a new one if possible.
914 */
915 if (ip6_use_tempaddr &&
916 (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
917 (oldflags & IN6_IFF_DEPRECATED) == 0) {
918
919 /* see NOTE above */
920 IFA_UNLOCK(&ia6->ia_ifa);
921 lck_rw_done(&in6_ifaddr_rwlock);
922 if (regen_tmpaddr(ia6) == 0) {
923 /*
924 * A new temporary address is
925 * generated.
926 * XXX: this means the address chain
927 * has changed while we are still in
928 * the loop. Although the change
929 * would not cause disaster (because
930 * it's not a deletion, but an
931 * addition,) we'd rather restart the
932 * loop just for safety. Or does this
933 * significantly reduce performance??
934 */
935 /* Release extra reference */
936 IFA_REMREF(&ia6->ia_ifa);
937 goto addrloop;
938 }
939 lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
940 } else {
941 IFA_UNLOCK(&ia6->ia_ifa);
942 }
943 } else {
944 /*
945 * A new RA might have made a deprecated address
946 * preferred.
947 */
948 ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
949 IFA_UNLOCK(&ia6->ia_ifa);
950 }
951 lck_rw_assert(&in6_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
952 /* Release extra reference taken above */
953 IFA_REMREF(&ia6->ia_ifa);
954 }
955 lck_rw_done(&in6_ifaddr_rwlock);
956
957 lck_mtx_lock(nd6_mutex);
958 /*
959 * Since we drop the nd6_mutex in prelist_remove, we want to run this
960 * section single threaded.
961 */
962 while (nd6_drain_busy) {
963 nd6_drain_waiters++;
964 msleep(nd6_drain_waitchan, nd6_mutex, (PZERO-1),
965 __func__, NULL);
966 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
967 }
968 nd6_drain_busy = TRUE;
969
970 /* expire prefix list */
971 pr = nd_prefix.lh_first;
972 while (pr) {
973 /*
974 * check prefix lifetime.
975 * since pltime is just for autoconf, pltime processing for
976 * prefix is not necessary.
977 */
978 NDPR_LOCK(pr);
979 if (pr->ndpr_stateflags & NDPRF_PROCESSED) {
980 NDPR_UNLOCK(pr);
981 pr = pr->ndpr_next;
982 continue;
983 }
984 if (pr->ndpr_expire && pr->ndpr_expire < timenow.tv_sec) {
985 /*
986 * address expiration and prefix expiration are
987 * separate. NEVER perform in6_purgeaddr here.
988 */
989 pr->ndpr_stateflags |= NDPRF_PROCESSED;
990 NDPR_ADDREF_LOCKED(pr);
991 prelist_remove(pr);
992 NDPR_UNLOCK(pr);
993 NDPR_REMREF(pr);
994 pr = nd_prefix.lh_first;
995 } else {
996 pr->ndpr_stateflags |= NDPRF_PROCESSED;
997 NDPR_UNLOCK(pr);
998 pr = pr->ndpr_next;
999 }
1000 }
1001 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
1002 NDPR_LOCK(pr);
1003 pr->ndpr_stateflags &= ~NDPRF_PROCESSED;
1004 NDPR_UNLOCK(pr);
1005 }
1006 nd6_drain_busy = FALSE;
1007 if (nd6_drain_waiters > 0) {
1008 nd6_drain_waiters = 0;
1009 wakeup(nd6_drain_waitchan);
1010 }
1011 lck_mtx_unlock(nd6_mutex);
1012 }
1013
1014 /*
1015 * ND6 timer routine to expire default route list and prefix list
1016 */
1017 void
1018 nd6_timer(__unused void *ignored_arg)
1019 {
1020 nd6_drain(NULL);
1021 timeout(nd6_timer, (caddr_t)0, nd6_prune * hz);
1022 }
1023
1024 static int
1025 regen_tmpaddr(
1026 struct in6_ifaddr *ia6) /* deprecated/invalidated temporary address */
1027 {
1028 struct ifaddr *ifa;
1029 struct ifnet *ifp;
1030 struct in6_ifaddr *public_ifa6 = NULL;
1031 struct timeval timenow;
1032
1033 getmicrotime(&timenow);
1034
1035 ifp = ia6->ia_ifa.ifa_ifp;
1036 ifnet_lock_shared(ifp);
1037 for (ifa = ifp->if_addrlist.tqh_first; ifa;
1038 ifa = ifa->ifa_list.tqe_next)
1039 {
1040 struct in6_ifaddr *it6;
1041
1042 IFA_LOCK(ifa);
1043 if (ifa->ifa_addr->sa_family != AF_INET6) {
1044 IFA_UNLOCK(ifa);
1045 continue;
1046 }
1047 it6 = (struct in6_ifaddr *)ifa;
1048
1049 /* ignore no autoconf addresses. */
1050 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
1051 IFA_UNLOCK(ifa);
1052 continue;
1053 }
1054 /* ignore autoconf addresses with different prefixes. */
1055 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) {
1056 IFA_UNLOCK(ifa);
1057 continue;
1058 }
1059 /*
1060 * Now we are looking at an autoconf address with the same
1061 * prefix as ours. If the address is temporary and is still
1062 * preferred, do not create another one. It would be rare, but
1063 * could happen, for example, when we resume a laptop PC after
1064 * a long period.
1065 */
1066 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1067 !IFA6_IS_DEPRECATED(it6)) {
1068 IFA_UNLOCK(ifa);
1069 if (public_ifa6 != NULL)
1070 IFA_REMREF(&public_ifa6->ia_ifa);
1071 public_ifa6 = NULL;
1072 break;
1073 }
1074
1075 /*
1076 * This is a public autoconf address that has the same prefix
1077 * as ours. If it is preferred, keep it. We can't break the
1078 * loop here, because there may be a still-preferred temporary
1079 * address with the prefix.
1080 */
1081 if (!IFA6_IS_DEPRECATED(it6)) {
1082 IFA_ADDREF_LOCKED(ifa); /* for public_ifa6 */
1083 IFA_UNLOCK(ifa);
1084 if (public_ifa6 != NULL)
1085 IFA_REMREF(&public_ifa6->ia_ifa);
1086 public_ifa6 = it6;
1087 } else {
1088 IFA_UNLOCK(ifa);
1089 }
1090 }
1091 ifnet_lock_done(ifp);
1092
1093 if (public_ifa6 != NULL) {
1094 int e;
1095
1096 if ((e = in6_tmpifadd(public_ifa6, 0, M_WAITOK)) != 0) {
1097 log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
1098 " tmp addr,errno=%d\n", e);
1099 IFA_REMREF(&public_ifa6->ia_ifa);
1100 return(-1);
1101 }
1102 IFA_REMREF(&public_ifa6->ia_ifa);
1103 return(0);
1104 }
1105
1106 return(-1);
1107 }
1108
1109 /*
1110 * Nuke neighbor cache/prefix/default router management table, right before
1111 * ifp goes away.
1112 */
1113 void
1114 nd6_purge(
1115 struct ifnet *ifp)
1116 {
1117 struct llinfo_nd6 *ln;
1118 struct nd_defrouter *dr, *ndr;
1119 struct nd_prefix *pr, *npr;
1120
1121 /* Nuke default router list entries toward ifp */
1122 lck_mtx_lock(nd6_mutex);
1123 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
1124 /*
1125 * The first entry of the list may be stored in
1126 * the routing table, so we'll delete it later.
1127 */
1128 for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
1129 ndr = TAILQ_NEXT(dr, dr_entry);
1130 if (dr->stateflags & NDDRF_INSTALLED)
1131 continue;
1132 if (dr->ifp == ifp)
1133 defrtrlist_del(dr);
1134 }
1135 dr = TAILQ_FIRST(&nd_defrouter);
1136 if (dr->ifp == ifp)
1137 defrtrlist_del(dr);
1138 }
1139
1140 for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = ndr) {
1141 ndr = TAILQ_NEXT(dr, dr_entry);
1142 if (!(dr->stateflags & NDDRF_INSTALLED))
1143 continue;
1144
1145 if (dr->ifp == ifp)
1146 defrtrlist_del(dr);
1147 }
1148
1149 /* Nuke prefix list entries toward ifp */
1150 for (pr = nd_prefix.lh_first; pr; pr = npr) {
1151 npr = pr->ndpr_next;
1152 NDPR_LOCK(pr);
1153 if (pr->ndpr_ifp == ifp) {
1154 /*
1155 * Because if_detach() does *not* release prefixes
1156 * while purging addresses the reference count will
1157 * still be above zero. We therefore reset it to
1158 * make sure that the prefix really gets purged.
1159 */
1160 pr->ndpr_addrcnt = 0;
1161
1162 /*
1163 * Previously, pr->ndpr_addr is removed as well,
1164 * but I strongly believe we don't have to do it.
1165 * nd6_purge() is only called from in6_ifdetach(),
1166 * which removes all the associated interface addresses
1167 * by itself.
1168 * (jinmei@kame.net 20010129)
1169 */
1170 NDPR_ADDREF_LOCKED(pr);
1171 prelist_remove(pr);
1172 NDPR_UNLOCK(pr);
1173 NDPR_REMREF(pr);
1174 } else {
1175 NDPR_UNLOCK(pr);
1176 }
1177 }
1178 lck_mtx_unlock(nd6_mutex);
1179
1180 /* cancel default outgoing interface setting */
1181 if (nd6_defifindex == ifp->if_index) {
1182 nd6_setdefaultiface(0);
1183 }
1184
1185 if (!ip6_forwarding && (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
1186 lck_mtx_lock(nd6_mutex);
1187 /* refresh default router list */
1188 defrouter_reset();
1189 defrouter_select(ifp);
1190 lck_mtx_unlock(nd6_mutex);
1191 }
1192
1193 /*
1194 * Nuke neighbor cache entries for the ifp.
1195 * Note that rt->rt_ifp may not be the same as ifp,
1196 * due to KAME goto ours hack. See RTM_RESOLVE case in
1197 * nd6_rtrequest(), and ip6_input().
1198 */
1199 again:
1200 lck_mtx_lock(rnh_lock);
1201 ln = llinfo_nd6.ln_next;
1202 while (ln != NULL && ln != &llinfo_nd6) {
1203 struct rtentry *rt;
1204 struct llinfo_nd6 *nln;
1205
1206 nln = ln->ln_next;
1207 rt = ln->ln_rt;
1208 RT_LOCK(rt);
1209 if (rt->rt_gateway != NULL &&
1210 rt->rt_gateway->sa_family == AF_LINK &&
1211 SDL(rt->rt_gateway)->sdl_index == ifp->if_index) {
1212 RT_UNLOCK(rt);
1213 lck_mtx_unlock(rnh_lock);
1214 /*
1215 * See comments on nd6_timer() for reasons why
1216 * this loop is repeated; we bite the costs of
1217 * going thru the same llinfo_nd6 more than once
1218 * here, since this purge happens during detach,
1219 * and that unlike the timer case, it's possible
1220 * there's more than one purges happening at the
1221 * same time (thus a flag wouldn't buy anything).
1222 */
1223 nd6_free(rt);
1224 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1225 goto again;
1226 } else {
1227 RT_UNLOCK(rt);
1228 }
1229 ln = nln;
1230 }
1231 lck_mtx_unlock(rnh_lock);
1232 }
1233
1234 /*
1235 * Upon success, the returned route will be locked and the caller is
1236 * responsible for releasing the reference and doing RT_UNLOCK(rt).
1237 * This routine does not require rnh_lock to be held by the caller,
1238 * although it needs to be indicated of such a case in order to call
1239 * the correct variant of the relevant routing routines.
1240 */
1241 struct rtentry *
1242 nd6_lookup(
1243 struct in6_addr *addr6,
1244 int create,
1245 struct ifnet *ifp,
1246 int rt_locked)
1247 {
1248 struct rtentry *rt;
1249 struct sockaddr_in6 sin6;
1250 unsigned int ifscope;
1251
1252 bzero(&sin6, sizeof(sin6));
1253 sin6.sin6_len = sizeof(struct sockaddr_in6);
1254 sin6.sin6_family = AF_INET6;
1255 sin6.sin6_addr = *addr6;
1256
1257 ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
1258 if (rt_locked) {
1259 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1260 rt = rtalloc1_scoped_locked((struct sockaddr *)&sin6,
1261 create, 0, ifscope);
1262 } else {
1263 rt = rtalloc1_scoped((struct sockaddr *)&sin6,
1264 create, 0, ifscope);
1265 }
1266
1267 if (rt != NULL) {
1268 RT_LOCK(rt);
1269 if ((rt->rt_flags & RTF_LLINFO) == 0) {
1270 /*
1271 * This is the case for the default route.
1272 * If we want to create a neighbor cache for the
1273 * address, we should free the route for the
1274 * destination and allocate an interface route.
1275 */
1276 if (create) {
1277 RT_UNLOCK(rt);
1278 if (rt_locked)
1279 rtfree_locked(rt);
1280 else
1281 rtfree(rt);
1282 rt = NULL;
1283 }
1284 }
1285 }
1286 if (rt == NULL) {
1287 if (create && ifp) {
1288 struct ifaddr *ifa;
1289 u_int32_t ifa_flags;
1290 int e;
1291
1292 /*
1293 * If no route is available and create is set,
1294 * we allocate a host route for the destination
1295 * and treat it like an interface route.
1296 * This hack is necessary for a neighbor which can't
1297 * be covered by our own prefix.
1298 */
1299 ifa = ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp);
1300 if (ifa == NULL)
1301 return(NULL);
1302
1303 /*
1304 * Create a new route. RTF_LLINFO is necessary
1305 * to create a Neighbor Cache entry for the
1306 * destination in nd6_rtrequest which will be
1307 * called in rtrequest via ifa->ifa_rtrequest.
1308 */
1309 if (!rt_locked)
1310 lck_mtx_lock(rnh_lock);
1311 IFA_LOCK_SPIN(ifa);
1312 ifa_flags = ifa->ifa_flags;
1313 IFA_UNLOCK(ifa);
1314 if ((e = rtrequest_scoped_locked(RTM_ADD,
1315 (struct sockaddr *)&sin6, ifa->ifa_addr,
1316 (struct sockaddr *)&all1_sa,
1317 (ifa_flags | RTF_HOST | RTF_LLINFO) &
1318 ~RTF_CLONING, &rt, ifscope)) != 0) {
1319 if (e != EEXIST)
1320 log(LOG_ERR, "%s: failed to add route "
1321 "for a neighbor(%s), errno=%d\n",
1322 __func__, ip6_sprintf(addr6), e);
1323 }
1324 if (!rt_locked)
1325 lck_mtx_unlock(rnh_lock);
1326 IFA_REMREF(ifa);
1327 if (rt == NULL)
1328 return(NULL);
1329
1330 RT_LOCK(rt);
1331 if (rt->rt_llinfo) {
1332 struct llinfo_nd6 *ln = rt->rt_llinfo;
1333 ln->ln_state = ND6_LLINFO_NOSTATE;
1334 }
1335 } else {
1336 return(NULL);
1337 }
1338 }
1339 RT_LOCK_ASSERT_HELD(rt);
1340 /*
1341 * Validation for the entry.
1342 * Note that the check for rt_llinfo is necessary because a cloned
1343 * route from a parent route that has the L flag (e.g. the default
1344 * route to a p2p interface) may have the flag, too, while the
1345 * destination is not actually a neighbor.
1346 * XXX: we can't use rt->rt_ifp to check for the interface, since
1347 * it might be the loopback interface if the entry is for our
1348 * own address on a non-loopback interface. Instead, we should
1349 * use rt->rt_ifa->ifa_ifp, which would specify the REAL
1350 * interface.
1351 * Note also that ifa_ifp and ifp may differ when we connect two
1352 * interfaces to a same link, install a link prefix to an interface,
1353 * and try to install a neighbor cache on an interface that does not
1354 * have a route to the prefix.
1355 */
1356 if (ifp == NULL ||
1357 (rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
1358 rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
1359 (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
1360 RT_REMREF_LOCKED(rt);
1361 RT_UNLOCK(rt);
1362 if (create) {
1363 log(LOG_DEBUG, "%s: failed to lookup %s "
1364 "(if = %s)\n", __func__, ip6_sprintf(addr6),
1365 ifp ? if_name(ifp) : "unspec");
1366 /* xxx more logs... kazu */
1367 }
1368 return(NULL);
1369 }
1370 /*
1371 * Caller needs to release reference and call RT_UNLOCK(rt).
1372 */
1373 return(rt);
1374 }
1375
1376 /*
1377 * Test whether a given IPv6 address is a neighbor or not, ignoring
1378 * the actual neighbor cache. The neighbor cache is ignored in order
1379 * to not reenter the routing code from within itself.
1380 */
1381 static int
1382 nd6_is_new_addr_neighbor(
1383 struct sockaddr_in6 *addr,
1384 struct ifnet *ifp)
1385 {
1386 struct nd_prefix *pr;
1387 struct ifaddr *dstaddr;
1388
1389 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
1390
1391 /*
1392 * A link-local address is always a neighbor.
1393 * XXX: a link does not necessarily specify a single interface.
1394 */
1395 if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
1396 struct sockaddr_in6 sin6_copy;
1397 u_int32_t zone;
1398
1399 /*
1400 * We need sin6_copy since sa6_recoverscope() may modify the
1401 * content (XXX).
1402 */
1403 sin6_copy = *addr;
1404 if (sa6_recoverscope(&sin6_copy))
1405 return (0); /* XXX: should be impossible */
1406 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
1407 return (0);
1408 if (sin6_copy.sin6_scope_id == zone)
1409 return (1);
1410 else
1411 return (0);
1412 }
1413
1414 /*
1415 * If the address matches one of our addresses,
1416 * it should be a neighbor.
1417 * If the address matches one of our on-link prefixes, it should be a
1418 * neighbor.
1419 */
1420 for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1421 NDPR_LOCK(pr);
1422 if (pr->ndpr_ifp != ifp) {
1423 NDPR_UNLOCK(pr);
1424 continue;
1425 }
1426 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
1427 NDPR_UNLOCK(pr);
1428 continue;
1429 }
1430 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
1431 &addr->sin6_addr, &pr->ndpr_mask)) {
1432 NDPR_UNLOCK(pr);
1433 return (1);
1434 }
1435 NDPR_UNLOCK(pr);
1436 }
1437
1438 /*
1439 * If the address is assigned on the node of the other side of
1440 * a p2p interface, the address should be a neighbor.
1441 */
1442 dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
1443 if (dstaddr != NULL) {
1444 if (dstaddr->ifa_ifp == ifp) {
1445 IFA_REMREF(dstaddr);
1446 return (1);
1447 }
1448 IFA_REMREF(dstaddr);
1449 dstaddr = NULL;
1450 }
1451
1452 /*
1453 * If the default router list is empty, all addresses are regarded
1454 * as on-link, and thus, as a neighbor.
1455 * XXX: we restrict the condition to hosts, because routers usually do
1456 * not have the "default router list".
1457 */
1458 if (!ip6_forwarding && TAILQ_FIRST(&nd_defrouter) == NULL &&
1459 nd6_defifindex == ifp->if_index) {
1460 return (1);
1461 }
1462
1463 return (0);
1464 }
1465
1466
1467 /*
1468 * Detect if a given IPv6 address identifies a neighbor on a given link.
1469 * XXX: should take care of the destination of a p2p link?
1470 */
1471 int
1472 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp, int rt_locked)
1473 {
1474 struct rtentry *rt;
1475
1476 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
1477 lck_mtx_lock(nd6_mutex);
1478 if (nd6_is_new_addr_neighbor(addr, ifp)) {
1479 lck_mtx_unlock(nd6_mutex);
1480 return (1);
1481 }
1482 lck_mtx_unlock(nd6_mutex);
1483
1484 /*
1485 * Even if the address matches none of our addresses, it might be
1486 * in the neighbor cache.
1487 */
1488 if ((rt = nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked)) != NULL) {
1489 RT_LOCK_ASSERT_HELD(rt);
1490 RT_REMREF_LOCKED(rt);
1491 RT_UNLOCK(rt);
1492 return (1);
1493 }
1494
1495 return (0);
1496 }
1497
1498 /*
1499 * Free an nd6 llinfo entry.
1500 * Since the function would cause significant changes in the kernel, DO NOT
1501 * make it global, unless you have a strong reason for the change, and are sure
1502 * that the change is safe.
1503 */
1504 void
1505 nd6_free(
1506 struct rtentry *rt)
1507 {
1508 struct llinfo_nd6 *ln;
1509 struct in6_addr in6;
1510 struct nd_defrouter *dr;
1511
1512 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1513 RT_LOCK_ASSERT_NOTHELD(rt);
1514 lck_mtx_lock(nd6_mutex);
1515
1516 RT_LOCK(rt);
1517 RT_ADDREF_LOCKED(rt); /* Extra ref */
1518 ln = rt->rt_llinfo;
1519 in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
1520
1521 /*
1522 * Prevent another thread from modifying rt_key, rt_gateway
1523 * via rt_setgate() after the rt_lock is dropped by marking
1524 * the route as defunct.
1525 */
1526 rt->rt_flags |= RTF_CONDEMNED;
1527
1528 /*
1529 * we used to have pfctlinput(PRC_HOSTDEAD) here.
1530 * even though it is not harmful, it was not really necessary.
1531 */
1532
1533 if (!ip6_forwarding && (ip6_accept_rtadv ||
1534 (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
1535 dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->
1536 sin6_addr, rt->rt_ifp);
1537
1538 if ((ln && ln->ln_router) || dr) {
1539 /*
1540 * rt6_flush must be called whether or not the neighbor
1541 * is in the Default Router List.
1542 * See a corresponding comment in nd6_na_input().
1543 */
1544 RT_UNLOCK(rt);
1545 lck_mtx_unlock(nd6_mutex);
1546 rt6_flush(&in6, rt->rt_ifp);
1547 lck_mtx_lock(nd6_mutex);
1548 } else {
1549 RT_UNLOCK(rt);
1550 }
1551
1552 if (dr) {
1553 NDDR_REMREF(dr);
1554 /*
1555 * Unreachablity of a router might affect the default
1556 * router selection and on-link detection of advertised
1557 * prefixes.
1558 */
1559
1560 /*
1561 * Temporarily fake the state to choose a new default
1562 * router and to perform on-link determination of
1563 * prefixes correctly.
1564 * Below the state will be set correctly,
1565 * or the entry itself will be deleted.
1566 */
1567 RT_LOCK_SPIN(rt);
1568 ln->ln_state = ND6_LLINFO_INCOMPLETE;
1569
1570 /*
1571 * Since defrouter_select() does not affect the
1572 * on-link determination and MIP6 needs the check
1573 * before the default router selection, we perform
1574 * the check now.
1575 */
1576 RT_UNLOCK(rt);
1577 pfxlist_onlink_check();
1578
1579 /*
1580 * refresh default router list
1581 */
1582 defrouter_select(rt->rt_ifp);
1583 }
1584 RT_LOCK_ASSERT_NOTHELD(rt);
1585 } else {
1586 RT_UNLOCK(rt);
1587 }
1588
1589 lck_mtx_unlock(nd6_mutex);
1590 /*
1591 * Detach the route from the routing tree and the list of neighbor
1592 * caches, and disable the route entry not to be used in already
1593 * cached routes.
1594 */
1595 (void) rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
1596 rt_mask(rt), 0, (struct rtentry **)0);
1597
1598 /* Extra ref held above; now free it */
1599 rtfree(rt);
1600 }
1601
1602 /*
1603 * Upper-layer reachability hint for Neighbor Unreachability Detection.
1604 *
1605 * XXX cost-effective methods?
1606 */
1607 void
1608 nd6_nud_hint(
1609 struct rtentry *rt,
1610 struct in6_addr *dst6,
1611 int force)
1612 {
1613 struct llinfo_nd6 *ln;
1614 struct timeval timenow;
1615
1616 getmicrotime(&timenow);
1617
1618 /*
1619 * If the caller specified "rt", use that. Otherwise, resolve the
1620 * routing table by supplied "dst6".
1621 */
1622 if (!rt) {
1623 if (!dst6)
1624 return;
1625 /* Callee returns a locked route upon success */
1626 if ((rt = nd6_lookup(dst6, 0, NULL, 0)) == NULL)
1627 return;
1628 RT_LOCK_ASSERT_HELD(rt);
1629 } else {
1630 RT_LOCK(rt);
1631 RT_ADDREF_LOCKED(rt);
1632 }
1633
1634 if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
1635 (rt->rt_flags & RTF_LLINFO) == 0 ||
1636 !rt->rt_llinfo || !rt->rt_gateway ||
1637 rt->rt_gateway->sa_family != AF_LINK) {
1638 /* This is not a host route. */
1639 goto done;
1640 }
1641
1642 ln = rt->rt_llinfo;
1643 if (ln->ln_state < ND6_LLINFO_REACHABLE)
1644 goto done;
1645
1646 /*
1647 * if we get upper-layer reachability confirmation many times,
1648 * it is possible we have false information.
1649 */
1650 if (!force) {
1651 ln->ln_byhint++;
1652 if (ln->ln_byhint > nd6_maxnudhint)
1653 goto done;
1654 }
1655
1656 ln->ln_state = ND6_LLINFO_REACHABLE;
1657 if (ln->ln_expire) {
1658 lck_rw_lock_shared(nd_if_rwlock);
1659 ln->ln_expire = timenow.tv_sec +
1660 nd_ifinfo[rt->rt_ifp->if_index].reachable;
1661 lck_rw_done(nd_if_rwlock);
1662 }
1663 done:
1664 RT_REMREF_LOCKED(rt);
1665 RT_UNLOCK(rt);
1666 }
1667
1668 void
1669 nd6_rtrequest(
1670 int req,
1671 struct rtentry *rt,
1672 __unused struct sockaddr *sa)
1673 {
1674 struct sockaddr *gate = rt->rt_gateway;
1675 struct llinfo_nd6 *ln = rt->rt_llinfo;
1676 static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0,
1677 {0,0,0,0,0,0,0,0,0,0,0,0,} };
1678 struct ifnet *ifp = rt->rt_ifp;
1679 struct ifaddr *ifa;
1680 struct timeval timenow;
1681
1682 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1683 RT_LOCK_ASSERT_HELD(rt);
1684
1685 if ((rt->rt_flags & RTF_GATEWAY))
1686 return;
1687
1688 if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
1689 /*
1690 * This is probably an interface direct route for a link
1691 * which does not need neighbor caches (e.g. fe80::%lo0/64).
1692 * We do not need special treatment below for such a route.
1693 * Moreover, the RTF_LLINFO flag which would be set below
1694 * would annoy the ndp(8) command.
1695 */
1696 return;
1697 }
1698
1699 if (req == RTM_RESOLVE) {
1700 int no_nd_cache;
1701
1702 if (!nd6_need_cache(ifp)) { /* stf case */
1703 no_nd_cache = 1;
1704 } else {
1705 struct sockaddr_in6 sin6;
1706
1707 rtkey_to_sa6(rt, &sin6);
1708 /*
1709 * nd6_is_addr_neighbor() may call nd6_lookup(),
1710 * therefore we drop rt_lock to avoid deadlock
1711 * during the lookup.
1712 */
1713 RT_ADDREF_LOCKED(rt);
1714 RT_UNLOCK(rt);
1715 no_nd_cache = !nd6_is_addr_neighbor(&sin6, ifp, 1);
1716 RT_LOCK(rt);
1717 RT_REMREF_LOCKED(rt);
1718 }
1719
1720 /*
1721 * FreeBSD and BSD/OS often make a cloned host route based
1722 * on a less-specific route (e.g. the default route).
1723 * If the less specific route does not have a "gateway"
1724 * (this is the case when the route just goes to a p2p or an
1725 * stf interface), we'll mistakenly make a neighbor cache for
1726 * the host route, and will see strange neighbor solicitation
1727 * for the corresponding destination. In order to avoid the
1728 * confusion, we check if the destination of the route is
1729 * a neighbor in terms of neighbor discovery, and stop the
1730 * process if not. Additionally, we remove the LLINFO flag
1731 * so that ndp(8) will not try to get the neighbor information
1732 * of the destination.
1733 */
1734 if (no_nd_cache) {
1735 rt->rt_flags &= ~RTF_LLINFO;
1736 return;
1737 }
1738 }
1739
1740 getmicrotime(&timenow);
1741 switch (req) {
1742 case RTM_ADD:
1743 /*
1744 * There is no backward compatibility :)
1745 *
1746 * if ((rt->rt_flags & RTF_HOST) == 0 &&
1747 * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
1748 * rt->rt_flags |= RTF_CLONING;
1749 */
1750 if ((rt->rt_flags & RTF_CLONING) ||
1751 ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) {
1752 /*
1753 * Case 1: This route should come from a route to
1754 * interface (RTF_CLONING case) or the route should be
1755 * treated as on-link but is currently not
1756 * (RTF_LLINFO && ln == NULL case).
1757 */
1758 if (rt_setgate(rt, rt_key(rt),
1759 (struct sockaddr *)&null_sdl) == 0) {
1760 gate = rt->rt_gateway;
1761 SDL(gate)->sdl_type = ifp->if_type;
1762 SDL(gate)->sdl_index = ifp->if_index;
1763 /*
1764 * In case we're called before 1.0 sec.
1765 * has elapsed.
1766 */
1767 if (ln != NULL)
1768 ln->ln_expire = MAX(timenow.tv_sec, 1);
1769 }
1770 if ((rt->rt_flags & RTF_CLONING))
1771 break;
1772 }
1773 /*
1774 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
1775 * We don't do that here since llinfo is not ready yet.
1776 *
1777 * There are also couple of other things to be discussed:
1778 * - unsolicited NA code needs improvement beforehand
1779 * - RFC2461 says we MAY send multicast unsolicited NA
1780 * (7.2.6 paragraph 4), however, it also says that we
1781 * SHOULD provide a mechanism to prevent multicast NA storm.
1782 * we don't have anything like it right now.
1783 * note that the mechanism needs a mutual agreement
1784 * between proxies, which means that we need to implement
1785 * a new protocol, or a new kludge.
1786 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
1787 * we need to check ip6forwarding before sending it.
1788 * (or should we allow proxy ND configuration only for
1789 * routers? there's no mention about proxy ND from hosts)
1790 */
1791 /* FALLTHROUGH */
1792 case RTM_RESOLVE:
1793 if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
1794 /*
1795 * Address resolution isn't necessary for a point to
1796 * point link, so we can skip this test for a p2p link.
1797 */
1798 if (gate->sa_family != AF_LINK ||
1799 gate->sa_len < sizeof(null_sdl)) {
1800 /* Don't complain in case of RTM_ADD */
1801 if (req == RTM_RESOLVE) {
1802 log(LOG_DEBUG,
1803 "nd6_rtrequest: bad gateway "
1804 "value: %s\n", if_name(ifp));
1805 }
1806 break;
1807 }
1808 SDL(gate)->sdl_type = ifp->if_type;
1809 SDL(gate)->sdl_index = ifp->if_index;
1810 }
1811 if (ln != NULL)
1812 break; /* This happens on a route change */
1813 /*
1814 * Case 2: This route may come from cloning, or a manual route
1815 * add with a LL address.
1816 */
1817 rt->rt_llinfo = ln = nd6_llinfo_alloc();
1818 if (ln == NULL) {
1819 log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
1820 break;
1821 }
1822 rt->rt_llinfo_get_ri = nd6_llinfo_get_ri;
1823 rt->rt_llinfo_purge = nd6_llinfo_purge;
1824 rt->rt_llinfo_free = nd6_llinfo_free;
1825
1826 nd6_inuse++;
1827 nd6_allocated++;
1828 Bzero(ln, sizeof(*ln));
1829 ln->ln_rt = rt;
1830 /* this is required for "ndp" command. - shin */
1831 if (req == RTM_ADD) {
1832 /*
1833 * gate should have some valid AF_LINK entry,
1834 * and ln->ln_expire should have some lifetime
1835 * which is specified by ndp command.
1836 */
1837 ln->ln_state = ND6_LLINFO_REACHABLE;
1838 ln->ln_byhint = 0;
1839 } else {
1840 /*
1841 * When req == RTM_RESOLVE, rt is created and
1842 * initialized in rtrequest(), so rt_expire is 0.
1843 */
1844 ln->ln_state = ND6_LLINFO_NOSTATE;
1845 /* In case we're called before 1.0 sec. has elapsed */
1846 ln->ln_expire = MAX(timenow.tv_sec, 1);
1847 }
1848 rt->rt_flags |= RTF_LLINFO;
1849 LN_INSERTHEAD(ln);
1850
1851 /*
1852 * If we have too many cache entries, initiate immediate
1853 * purging for some "less recently used" entries. Note that
1854 * we cannot directly call nd6_free() here because it would
1855 * cause re-entering rtable related routines triggering an LOR
1856 * problem.
1857 */
1858 if (ip6_neighborgcthresh >= 0 &&
1859 nd6_inuse >= ip6_neighborgcthresh) {
1860 int i;
1861
1862 for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) {
1863 struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev;
1864 struct rtentry *rt_end = ln_end->ln_rt;
1865
1866 /* Move this entry to the head */
1867 RT_LOCK(rt_end);
1868 LN_DEQUEUE(ln_end);
1869 LN_INSERTHEAD(ln_end);
1870
1871 if (ln_end->ln_expire == 0) {
1872 RT_UNLOCK(rt_end);
1873 continue;
1874 }
1875 if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE)
1876 ln_end->ln_state = ND6_LLINFO_STALE;
1877 else
1878 ln_end->ln_state = ND6_LLINFO_PURGE;
1879 ln_end->ln_expire = timenow.tv_sec;
1880 RT_UNLOCK(rt_end);
1881 }
1882 }
1883
1884 /*
1885 * check if rt_key(rt) is one of my address assigned
1886 * to the interface.
1887 */
1888 ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
1889 &SIN6(rt_key(rt))->sin6_addr);
1890 if (ifa) {
1891 caddr_t macp = nd6_ifptomac(ifp);
1892 ln->ln_expire = 0;
1893 ln->ln_state = ND6_LLINFO_REACHABLE;
1894 ln->ln_byhint = 0;
1895 if (macp) {
1896 Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
1897 SDL(gate)->sdl_alen = ifp->if_addrlen;
1898 }
1899 if (nd6_useloopback) {
1900 if (rt->rt_ifp != lo_ifp) {
1901 /*
1902 * Purge any link-layer info caching.
1903 */
1904 if (rt->rt_llinfo_purge != NULL)
1905 rt->rt_llinfo_purge(rt);
1906
1907 /*
1908 * Adjust route ref count for the
1909 * interfaces.
1910 */
1911 if (rt->rt_if_ref_fn != NULL) {
1912 rt->rt_if_ref_fn(lo_ifp, 1);
1913 rt->rt_if_ref_fn(rt->rt_ifp, -1);
1914 }
1915 }
1916 rt->rt_ifp = lo_ifp; /* XXX */
1917 /*
1918 * Make sure rt_ifa be equal to the ifaddr
1919 * corresponding to the address.
1920 * We need this because when we refer
1921 * rt_ifa->ia6_flags in ip6_input, we assume
1922 * that the rt_ifa points to the address instead
1923 * of the loopback address.
1924 */
1925 if (ifa != rt->rt_ifa) {
1926 rtsetifa(rt, ifa);
1927 }
1928 }
1929 IFA_REMREF(ifa);
1930 } else if (rt->rt_flags & RTF_ANNOUNCE) {
1931 ln->ln_expire = 0;
1932 ln->ln_state = ND6_LLINFO_REACHABLE;
1933 ln->ln_byhint = 0;
1934
1935 /* join solicited node multicast for proxy ND */
1936 if (ifp->if_flags & IFF_MULTICAST) {
1937 struct in6_addr llsol;
1938 struct in6_multi *in6m;
1939 int error;
1940
1941 llsol = SIN6(rt_key(rt))->sin6_addr;
1942 llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
1943 llsol.s6_addr32[1] = 0;
1944 llsol.s6_addr32[2] = htonl(1);
1945 llsol.s6_addr8[12] = 0xff;
1946 if (in6_setscope(&llsol, ifp, NULL))
1947 break;
1948 error = in6_mc_join(ifp, &llsol, NULL, &in6m, 0);
1949 if (error) {
1950 nd6log((LOG_ERR, "%s: failed to join "
1951 "%s (errno=%d)\n", if_name(ifp),
1952 ip6_sprintf(&llsol), error));
1953 } else {
1954 IN6M_REMREF(in6m);
1955 }
1956 }
1957 }
1958 break;
1959
1960 case RTM_DELETE:
1961 if (ln == NULL)
1962 break;
1963 /* leave from solicited node multicast for proxy ND */
1964 if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
1965 (ifp->if_flags & IFF_MULTICAST) != 0) {
1966 struct in6_addr llsol;
1967 struct in6_multi *in6m;
1968
1969 llsol = SIN6(rt_key(rt))->sin6_addr;
1970 llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
1971 llsol.s6_addr32[1] = 0;
1972 llsol.s6_addr32[2] = htonl(1);
1973 llsol.s6_addr8[12] = 0xff;
1974 if (in6_setscope(&llsol, ifp, NULL) == 0) {
1975 in6_multihead_lock_shared();
1976 IN6_LOOKUP_MULTI(&llsol, ifp, in6m);
1977 in6_multihead_lock_done();
1978 if (in6m != NULL) {
1979 in6_mc_leave(in6m, NULL);
1980 IN6M_REMREF(in6m);
1981 }
1982 }
1983 }
1984 nd6_inuse--;
1985 /*
1986 * Unchain it but defer the actual freeing until the route
1987 * itself is to be freed. rt->rt_llinfo still points to
1988 * llinfo_nd6, and likewise, ln->ln_rt stil points to this
1989 * route entry, except that RTF_LLINFO is now cleared.
1990 */
1991 if (ln->ln_flags & ND6_LNF_IN_USE)
1992 LN_DEQUEUE(ln);
1993
1994 /*
1995 * Purge any link-layer info caching.
1996 */
1997 if (rt->rt_llinfo_purge != NULL)
1998 rt->rt_llinfo_purge(rt);
1999
2000 rt->rt_flags &= ~RTF_LLINFO;
2001 if (ln->ln_hold != NULL) {
2002 m_freem(ln->ln_hold);
2003 ln->ln_hold = NULL;
2004 }
2005 }
2006 }
2007
2008 static void
2009 nd6_siocgdrlst(void *data, int data_is_64)
2010 {
2011 struct in6_drlist_64 *drl_64 = (struct in6_drlist_64 *)data;
2012 struct in6_drlist_32 *drl_32 = (struct in6_drlist_32 *)data;
2013 struct nd_defrouter *dr;
2014 int i = 0;
2015
2016 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2017
2018 bzero(data, data_is_64 ? sizeof (*drl_64) : sizeof (*drl_32));
2019 dr = TAILQ_FIRST(&nd_defrouter);
2020 if (data_is_64) {
2021 /* For 64-bit process */
2022 while (dr && i < DRLSTSIZ) {
2023 drl_64->defrouter[i].rtaddr = dr->rtaddr;
2024 if (IN6_IS_ADDR_LINKLOCAL(&drl_64->defrouter[i].rtaddr)) {
2025 /* XXX: need to this hack for KAME stack */
2026 drl_64->defrouter[i].rtaddr.s6_addr16[1] = 0;
2027 } else {
2028 log(LOG_ERR,
2029 "default router list contains a "
2030 "non-linklocal address(%s)\n",
2031 ip6_sprintf(&drl_64->defrouter[i].rtaddr));
2032 }
2033 drl_64->defrouter[i].flags = dr->flags;
2034 drl_64->defrouter[i].rtlifetime = dr->rtlifetime;
2035 drl_64->defrouter[i].expire = dr->expire;
2036 drl_64->defrouter[i].if_index = dr->ifp->if_index;
2037 i++;
2038 dr = TAILQ_NEXT(dr, dr_entry);
2039 }
2040 return;
2041 }
2042 /* For 32-bit process */
2043 while (dr && i < DRLSTSIZ) {
2044 drl_32->defrouter[i].rtaddr = dr->rtaddr;
2045 if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) {
2046 /* XXX: need to this hack for KAME stack */
2047 drl_32->defrouter[i].rtaddr.s6_addr16[1] = 0;
2048 } else {
2049 log(LOG_ERR,
2050 "default router list contains a "
2051 "non-linklocal address(%s)\n",
2052 ip6_sprintf(&drl_32->defrouter[i].rtaddr));
2053 }
2054 drl_32->defrouter[i].flags = dr->flags;
2055 drl_32->defrouter[i].rtlifetime = dr->rtlifetime;
2056 drl_32->defrouter[i].expire = dr->expire;
2057 drl_32->defrouter[i].if_index = dr->ifp->if_index;
2058 i++;
2059 dr = TAILQ_NEXT(dr, dr_entry);
2060 }
2061 }
2062
2063 static void
2064 nd6_siocgprlst(void *data, int data_is_64)
2065 {
2066 struct in6_prlist_64 *prl_64 = (struct in6_prlist_64 *)data;
2067 struct in6_prlist_32 *prl_32 = (struct in6_prlist_32 *)data;
2068 struct nd_prefix *pr;
2069 int i = 0;
2070
2071 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2072 /*
2073 * XXX meaning of fields, especialy "raflags", is very
2074 * differnet between RA prefix list and RR/static prefix list.
2075 * how about separating ioctls into two?
2076 */
2077 bzero(data, data_is_64 ? sizeof (*prl_64) : sizeof (*prl_32));
2078 pr = nd_prefix.lh_first;
2079 if (data_is_64) {
2080 /* For 64-bit process */
2081 while (pr && i < PRLSTSIZ) {
2082 struct nd_pfxrouter *pfr;
2083 int j;
2084
2085 NDPR_LOCK(pr);
2086 (void) in6_embedscope(&prl_64->prefix[i].prefix,
2087 &pr->ndpr_prefix, NULL, NULL, NULL);
2088 prl_64->prefix[i].raflags = pr->ndpr_raf;
2089 prl_64->prefix[i].prefixlen = pr->ndpr_plen;
2090 prl_64->prefix[i].vltime = pr->ndpr_vltime;
2091 prl_64->prefix[i].pltime = pr->ndpr_pltime;
2092 prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index;
2093 prl_64->prefix[i].expire = pr->ndpr_expire;
2094
2095 pfr = pr->ndpr_advrtrs.lh_first;
2096 j = 0;
2097 while (pfr) {
2098 if (j < DRLSTSIZ) {
2099 #define RTRADDR prl_64->prefix[i].advrtr[j]
2100 RTRADDR = pfr->router->rtaddr;
2101 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
2102 /* XXX: hack for KAME */
2103 RTRADDR.s6_addr16[1] = 0;
2104 } else {
2105 log(LOG_ERR,
2106 "a router(%s) advertises "
2107 "a prefix with "
2108 "non-link local address\n",
2109 ip6_sprintf(&RTRADDR));
2110 }
2111 #undef RTRADDR
2112 }
2113 j++;
2114 pfr = pfr->pfr_next;
2115 }
2116 prl_64->prefix[i].advrtrs = j;
2117 prl_64->prefix[i].origin = PR_ORIG_RA;
2118 NDPR_UNLOCK(pr);
2119
2120 i++;
2121 pr = pr->ndpr_next;
2122 }
2123
2124 return;
2125 }
2126 /* For 32-bit process */
2127 while (pr && i < PRLSTSIZ) {
2128 struct nd_pfxrouter *pfr;
2129 int j;
2130
2131 NDPR_LOCK(pr);
2132 (void) in6_embedscope(&prl_32->prefix[i].prefix,
2133 &pr->ndpr_prefix, NULL, NULL, NULL);
2134 prl_32->prefix[i].raflags = pr->ndpr_raf;
2135 prl_32->prefix[i].prefixlen = pr->ndpr_plen;
2136 prl_32->prefix[i].vltime = pr->ndpr_vltime;
2137 prl_32->prefix[i].pltime = pr->ndpr_pltime;
2138 prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index;
2139 prl_32->prefix[i].expire = pr->ndpr_expire;
2140
2141 pfr = pr->ndpr_advrtrs.lh_first;
2142 j = 0;
2143 while (pfr) {
2144 if (j < DRLSTSIZ) {
2145 #define RTRADDR prl_32->prefix[i].advrtr[j]
2146 RTRADDR = pfr->router->rtaddr;
2147 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
2148 /* XXX: hack for KAME */
2149 RTRADDR.s6_addr16[1] = 0;
2150 } else {
2151 log(LOG_ERR,
2152 "a router(%s) advertises "
2153 "a prefix with "
2154 "non-link local address\n",
2155 ip6_sprintf(&RTRADDR));
2156 }
2157 #undef RTRADDR
2158 }
2159 j++;
2160 pfr = pfr->pfr_next;
2161 }
2162 prl_32->prefix[i].advrtrs = j;
2163 prl_32->prefix[i].origin = PR_ORIG_RA;
2164 NDPR_UNLOCK(pr);
2165
2166 i++;
2167 pr = pr->ndpr_next;
2168 }
2169 }
2170
2171 int
2172 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
2173 {
2174 struct in6_ndireq *ndi = (struct in6_ndireq *)data;
2175 struct in6_ondireq *ondi = (struct in6_ondireq *)data;
2176 struct nd_defrouter *dr;
2177 struct nd_prefix *pr;
2178 struct rtentry *rt;
2179 int i = ifp->if_index, error = 0;
2180
2181 switch (cmd) {
2182 case SIOCGDRLST_IN6_32:
2183 case SIOCGDRLST_IN6_64:
2184 /*
2185 * obsolete API, use sysctl under net.inet6.icmp6
2186 */
2187 lck_mtx_lock(nd6_mutex);
2188 nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64);
2189 lck_mtx_unlock(nd6_mutex);
2190 break;
2191
2192 case SIOCGPRLST_IN6_32:
2193 case SIOCGPRLST_IN6_64:
2194 /*
2195 * obsolete API, use sysctl under net.inet6.icmp6
2196 */
2197 lck_mtx_lock(nd6_mutex);
2198 nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64);
2199 lck_mtx_unlock(nd6_mutex);
2200 break;
2201
2202 case OSIOCGIFINFO_IN6:
2203 case SIOCGIFINFO_IN6:
2204 /*
2205 * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq
2206 * instead of in6_ndireq, so we treat it as such.
2207 */
2208 lck_rw_lock_shared(nd_if_rwlock);
2209 if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
2210 lck_rw_done(nd_if_rwlock);
2211 error = EINVAL;
2212 break;
2213 }
2214 ondi->ndi.linkmtu = IN6_LINKMTU(ifp);
2215 ondi->ndi.maxmtu = nd_ifinfo[i].maxmtu;
2216 ondi->ndi.basereachable = nd_ifinfo[i].basereachable;
2217 ondi->ndi.reachable = nd_ifinfo[i].reachable;
2218 ondi->ndi.retrans = nd_ifinfo[i].retrans;
2219 ondi->ndi.flags = nd_ifinfo[i].flags;
2220 ondi->ndi.recalctm = nd_ifinfo[i].recalctm;
2221 ondi->ndi.chlim = nd_ifinfo[i].chlim;
2222 lck_rw_done(nd_if_rwlock);
2223 break;
2224
2225 case SIOCSIFINFO_FLAGS:
2226 /* XXX: almost all other fields of ndi->ndi is unused */
2227 lck_rw_lock_shared(nd_if_rwlock);
2228 if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
2229 lck_rw_done(nd_if_rwlock);
2230 error = EINVAL;
2231 break;
2232 }
2233 nd_ifinfo[i].flags = ndi->ndi.flags;
2234 lck_rw_done(nd_if_rwlock);
2235 break;
2236
2237 case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */
2238 /* flush default router list */
2239 /*
2240 * xxx sumikawa: should not delete route if default
2241 * route equals to the top of default router list
2242 */
2243 lck_mtx_lock(nd6_mutex);
2244 defrouter_reset();
2245 defrouter_select(ifp);
2246 lck_mtx_unlock(nd6_mutex);
2247 /* xxx sumikawa: flush prefix list */
2248 break;
2249
2250 case SIOCSPFXFLUSH_IN6: {
2251 /* flush all the prefix advertised by routers */
2252 struct nd_prefix *next;
2253
2254 lck_mtx_lock(nd6_mutex);
2255 for (pr = nd_prefix.lh_first; pr; pr = next) {
2256 struct in6_ifaddr *ia;
2257
2258 next = pr->ndpr_next;
2259
2260 NDPR_LOCK(pr);
2261 if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) {
2262 NDPR_UNLOCK(pr);
2263 continue; /* XXX */
2264 }
2265 if (ifp != lo_ifp && pr->ndpr_ifp != ifp) {
2266 NDPR_UNLOCK(pr);
2267 continue;
2268 }
2269 /* do we really have to remove addresses as well? */
2270 NDPR_ADDREF_LOCKED(pr);
2271 NDPR_UNLOCK(pr);
2272 lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
2273 ia = in6_ifaddrs;
2274 while (ia != NULL) {
2275 IFA_LOCK(&ia->ia_ifa);
2276 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
2277 IFA_UNLOCK(&ia->ia_ifa);
2278 ia = ia->ia_next;
2279 continue;
2280 }
2281
2282 if (ia->ia6_ndpr == pr) {
2283 IFA_ADDREF_LOCKED(&ia->ia_ifa);
2284 IFA_UNLOCK(&ia->ia_ifa);
2285 lck_rw_done(&in6_ifaddr_rwlock);
2286 lck_mtx_unlock(nd6_mutex);
2287 in6_purgeaddr(&ia->ia_ifa);
2288 lck_mtx_lock(nd6_mutex);
2289 lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
2290 IFA_REMREF(&ia->ia_ifa);
2291 /*
2292 * Purging the address caused
2293 * in6_ifaddr_rwlock to be
2294 * dropped and
2295 * reacquired; therefore search again
2296 * from the beginning of in6_ifaddrs.
2297 * The same applies for the prefix list.
2298 */
2299 ia = in6_ifaddrs;
2300 next = nd_prefix.lh_first;
2301 continue;
2302
2303 }
2304 IFA_UNLOCK(&ia->ia_ifa);
2305 ia = ia->ia_next;
2306 }
2307 lck_rw_done(&in6_ifaddr_rwlock);
2308 NDPR_LOCK(pr);
2309 prelist_remove(pr);
2310 NDPR_UNLOCK(pr);
2311 /*
2312 * If we were trying to restart this loop
2313 * above by changing the value of 'next', we might
2314 * end up freeing the only element on the list
2315 * when we call NDPR_REMREF().
2316 * When this happens, we also have get out of this
2317 * loop because we have nothing else to do.
2318 */
2319 if (pr == next)
2320 next = NULL;
2321 NDPR_REMREF(pr);
2322 }
2323 lck_mtx_unlock(nd6_mutex);
2324 break;
2325 }
2326
2327 case SIOCSRTRFLUSH_IN6: {
2328 /* flush all the default routers */
2329 struct nd_defrouter *next;
2330
2331 lck_mtx_lock(nd6_mutex);
2332 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
2333 /*
2334 * The first entry of the list may be stored in
2335 * the routing table, so we'll delete it later.
2336 */
2337 for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
2338 next = TAILQ_NEXT(dr, dr_entry);
2339 if (ifp == lo_ifp || dr->ifp == ifp)
2340 defrtrlist_del(dr);
2341 }
2342 if (ifp == lo_ifp ||
2343 TAILQ_FIRST(&nd_defrouter)->ifp == ifp)
2344 defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
2345 }
2346 lck_mtx_unlock(nd6_mutex);
2347 break;
2348 }
2349
2350 case SIOCGNBRINFO_IN6_32: {
2351 struct llinfo_nd6 *ln;
2352 struct in6_nbrinfo_32 *nbi_32 = (struct in6_nbrinfo_32 *)data;
2353 /* make local for safety */
2354 struct in6_addr nb_addr = nbi_32->addr;
2355
2356 /*
2357 * XXX: KAME specific hack for scoped addresses
2358 * XXXX: for other scopes than link-local?
2359 */
2360 if (IN6_IS_ADDR_LINKLOCAL(&nbi_32->addr) ||
2361 IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32->addr)) {
2362 u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
2363
2364 if (*idp == 0)
2365 *idp = htons(ifp->if_index);
2366 }
2367
2368 /* Callee returns a locked route upon success */
2369 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
2370 error = EINVAL;
2371 break;
2372 }
2373 RT_LOCK_ASSERT_HELD(rt);
2374 ln = rt->rt_llinfo;
2375 nbi_32->state = ln->ln_state;
2376 nbi_32->asked = ln->ln_asked;
2377 nbi_32->isrouter = ln->ln_router;
2378 nbi_32->expire = ln->ln_expire;
2379 RT_REMREF_LOCKED(rt);
2380 RT_UNLOCK(rt);
2381 break;
2382 }
2383
2384 case SIOCGNBRINFO_IN6_64: {
2385 struct llinfo_nd6 *ln;
2386 struct in6_nbrinfo_64 *nbi_64 = (struct in6_nbrinfo_64 *)data;
2387 /* make local for safety */
2388 struct in6_addr nb_addr = nbi_64->addr;
2389
2390 /*
2391 * XXX: KAME specific hack for scoped addresses
2392 * XXXX: for other scopes than link-local?
2393 */
2394 if (IN6_IS_ADDR_LINKLOCAL(&nbi_64->addr) ||
2395 IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64->addr)) {
2396 u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
2397
2398 if (*idp == 0)
2399 *idp = htons(ifp->if_index);
2400 }
2401
2402 /* Callee returns a locked route upon success */
2403 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
2404 error = EINVAL;
2405 break;
2406 }
2407 RT_LOCK_ASSERT_HELD(rt);
2408 ln = rt->rt_llinfo;
2409 nbi_64->state = ln->ln_state;
2410 nbi_64->asked = ln->ln_asked;
2411 nbi_64->isrouter = ln->ln_router;
2412 nbi_64->expire = ln->ln_expire;
2413 RT_REMREF_LOCKED(rt);
2414 RT_UNLOCK(rt);
2415 break;
2416 }
2417
2418 case SIOCGDEFIFACE_IN6_32: /* XXX: should be implemented as a sysctl? */
2419 case SIOCGDEFIFACE_IN6_64: {
2420 struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data;
2421 struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data;
2422
2423 if (cmd == SIOCGDEFIFACE_IN6_64)
2424 ndif_64->ifindex = nd6_defifindex;
2425 else
2426 ndif_32->ifindex = nd6_defifindex;
2427 break;
2428 }
2429
2430 case SIOCSDEFIFACE_IN6_32: /* XXX: should be implemented as a sysctl? */
2431 case SIOCSDEFIFACE_IN6_64: {
2432 struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data;
2433 struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data;
2434
2435 error = nd6_setdefaultiface(cmd == SIOCSDEFIFACE_IN6_64 ?
2436 ndif_64->ifindex : ndif_32->ifindex);
2437 return (error);
2438 /* NOTREACHED */
2439 }
2440 }
2441 return (error);
2442 }
2443
2444 /*
2445 * Create neighbor cache entry and cache link-layer address,
2446 * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
2447 */
2448 void
2449 nd6_cache_lladdr(
2450 struct ifnet *ifp,
2451 struct in6_addr *from,
2452 char *lladdr,
2453 __unused int lladdrlen,
2454 int type, /* ICMP6 type */
2455 int code) /* type dependent information */
2456 {
2457 struct rtentry *rt = NULL;
2458 struct llinfo_nd6 *ln = NULL;
2459 int is_newentry;
2460 struct sockaddr_dl *sdl = NULL;
2461 int do_update;
2462 int olladdr;
2463 int llchange;
2464 int newstate = 0;
2465 struct timeval timenow;
2466
2467 if (ifp == NULL)
2468 panic("ifp == NULL in nd6_cache_lladdr");
2469 if (from == NULL)
2470 panic("from == NULL in nd6_cache_lladdr");
2471
2472 /* nothing must be updated for unspecified address */
2473 if (IN6_IS_ADDR_UNSPECIFIED(from))
2474 return;
2475
2476 /*
2477 * Validation about ifp->if_addrlen and lladdrlen must be done in
2478 * the caller.
2479 *
2480 * XXX If the link does not have link-layer adderss, what should
2481 * we do? (ifp->if_addrlen == 0)
2482 * Spec says nothing in sections for RA, RS and NA. There's small
2483 * description on it in NS section (RFC 2461 7.2.3).
2484 */
2485 getmicrotime(&timenow);
2486
2487 rt = nd6_lookup(from, 0, ifp, 0);
2488 if (rt == NULL) {
2489 if ((rt = nd6_lookup(from, 1, ifp, 0)) == NULL)
2490 return;
2491 RT_LOCK_ASSERT_HELD(rt);
2492 is_newentry = 1;
2493 } else {
2494 RT_LOCK_ASSERT_HELD(rt);
2495 /* do nothing if static ndp is set */
2496 if (rt->rt_flags & RTF_STATIC) {
2497 RT_REMREF_LOCKED(rt);
2498 RT_UNLOCK(rt);
2499 return;
2500 }
2501 is_newentry = 0;
2502 }
2503
2504 if (rt == NULL)
2505 return;
2506 if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
2507 fail:
2508 RT_UNLOCK(rt);
2509 nd6_free(rt);
2510 rtfree(rt);
2511 return;
2512 }
2513 ln = (struct llinfo_nd6 *)rt->rt_llinfo;
2514 if (ln == NULL)
2515 goto fail;
2516 if (rt->rt_gateway == NULL)
2517 goto fail;
2518 if (rt->rt_gateway->sa_family != AF_LINK)
2519 goto fail;
2520 sdl = SDL(rt->rt_gateway);
2521
2522 olladdr = (sdl->sdl_alen) ? 1 : 0;
2523 if (olladdr && lladdr) {
2524 if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
2525 llchange = 1;
2526 else
2527 llchange = 0;
2528 } else
2529 llchange = 0;
2530
2531 /*
2532 * newentry olladdr lladdr llchange (*=record)
2533 * 0 n n -- (1)
2534 * 0 y n -- (2)
2535 * 0 n y -- (3) * STALE
2536 * 0 y y n (4) *
2537 * 0 y y y (5) * STALE
2538 * 1 -- n -- (6) NOSTATE(= PASSIVE)
2539 * 1 -- y -- (7) * STALE
2540 */
2541
2542 if (lladdr) { /* (3-5) and (7) */
2543 /*
2544 * Record source link-layer address
2545 * XXX is it dependent to ifp->if_type?
2546 */
2547 sdl->sdl_alen = ifp->if_addrlen;
2548 bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
2549
2550 /* cache the gateway (sender HW) address */
2551 nd6_llreach_alloc(rt, ifp, LLADDR(sdl), sdl->sdl_alen, FALSE);
2552 }
2553
2554 if (!is_newentry) {
2555 if ((!olladdr && lladdr != NULL) || /* (3) */
2556 (olladdr && lladdr != NULL && llchange)) { /* (5) */
2557 do_update = 1;
2558 newstate = ND6_LLINFO_STALE;
2559 } else /* (1-2,4) */
2560 do_update = 0;
2561 } else {
2562 do_update = 1;
2563 if (lladdr == NULL) /* (6) */
2564 newstate = ND6_LLINFO_NOSTATE;
2565 else /* (7) */
2566 newstate = ND6_LLINFO_STALE;
2567 }
2568
2569 if (do_update) {
2570 /*
2571 * Update the state of the neighbor cache.
2572 */
2573 ln->ln_state = newstate;
2574
2575 if (ln->ln_state == ND6_LLINFO_STALE) {
2576 struct mbuf *m = ln->ln_hold;
2577 /*
2578 * XXX: since nd6_output() below will cause
2579 * state tansition to DELAY and reset the timer,
2580 * we must set the timer now, although it is actually
2581 * meaningless.
2582 */
2583 ln->ln_expire = timenow.tv_sec + nd6_gctimer;
2584 ln->ln_hold = NULL;
2585
2586 if (m != NULL) {
2587 struct sockaddr_in6 sin6;
2588
2589 rtkey_to_sa6(rt, &sin6);
2590 /*
2591 * we assume ifp is not a p2p here, so just
2592 * set the 2nd argument as the 1st one.
2593 */
2594 RT_UNLOCK(rt);
2595 nd6_output(ifp, ifp, m, &sin6, rt);
2596 RT_LOCK(rt);
2597 }
2598 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
2599 /* probe right away */
2600 ln->ln_expire = timenow.tv_sec;
2601 }
2602 }
2603
2604 /*
2605 * ICMP6 type dependent behavior.
2606 *
2607 * NS: clear IsRouter if new entry
2608 * RS: clear IsRouter
2609 * RA: set IsRouter if there's lladdr
2610 * redir: clear IsRouter if new entry
2611 *
2612 * RA case, (1):
2613 * The spec says that we must set IsRouter in the following cases:
2614 * - If lladdr exist, set IsRouter. This means (1-5).
2615 * - If it is old entry (!newentry), set IsRouter. This means (7).
2616 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
2617 * A quetion arises for (1) case. (1) case has no lladdr in the
2618 * neighbor cache, this is similar to (6).
2619 * This case is rare but we figured that we MUST NOT set IsRouter.
2620 *
2621 * newentry olladdr lladdr llchange NS RS RA redir
2622 * D R
2623 * 0 n n -- (1) c ? s
2624 * 0 y n -- (2) c s s
2625 * 0 n y -- (3) c s s
2626 * 0 y y n (4) c s s
2627 * 0 y y y (5) c s s
2628 * 1 -- n -- (6) c c c s
2629 * 1 -- y -- (7) c c s c s
2630 *
2631 * (c=clear s=set)
2632 */
2633 switch (type & 0xff) {
2634 case ND_NEIGHBOR_SOLICIT:
2635 /*
2636 * New entry must have is_router flag cleared.
2637 */
2638 if (is_newentry) /* (6-7) */
2639 ln->ln_router = 0;
2640 break;
2641 case ND_REDIRECT:
2642 /*
2643 * If the icmp is a redirect to a better router, always set the
2644 * is_router flag. Otherwise, if the entry is newly created,
2645 * clear the flag. [RFC 2461, sec 8.3]
2646 */
2647 if (code == ND_REDIRECT_ROUTER)
2648 ln->ln_router = 1;
2649 else if (is_newentry) /* (6-7) */
2650 ln->ln_router = 0;
2651 break;
2652 case ND_ROUTER_SOLICIT:
2653 /*
2654 * is_router flag must always be cleared.
2655 */
2656 ln->ln_router = 0;
2657 break;
2658 case ND_ROUTER_ADVERT:
2659 /*
2660 * Mark an entry with lladdr as a router.
2661 */
2662 if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */
2663 (is_newentry && lladdr)) { /* (7) */
2664 ln->ln_router = 1;
2665 }
2666 break;
2667 }
2668
2669 /*
2670 * When the link-layer address of a router changes, select the
2671 * best router again. In particular, when the neighbor entry is newly
2672 * created, it might affect the selection policy.
2673 * Question: can we restrict the first condition to the "is_newentry"
2674 * case?
2675 * XXX: when we hear an RA from a new router with the link-layer
2676 * address option, defrouter_select() is called twice, since
2677 * defrtrlist_update called the function as well. However, I believe
2678 * we can compromise the overhead, since it only happens the first
2679 * time.
2680 * XXX: although defrouter_select() should not have a bad effect
2681 * for those are not autoconfigured hosts, we explicitly avoid such
2682 * cases for safety.
2683 */
2684 if (do_update && ln->ln_router && !ip6_forwarding &&
2685 (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
2686 RT_REMREF_LOCKED(rt);
2687 RT_UNLOCK(rt);
2688 lck_mtx_lock(nd6_mutex);
2689 defrouter_select(ifp);
2690 lck_mtx_unlock(nd6_mutex);
2691 } else {
2692 RT_REMREF_LOCKED(rt);
2693 RT_UNLOCK(rt);
2694 }
2695 }
2696
2697 static void
2698 nd6_slowtimo(
2699 __unused void *ignored_arg)
2700 {
2701 int i;
2702 struct nd_ifinfo *nd6if;
2703
2704 lck_rw_lock_shared(nd_if_rwlock);
2705 for (i = 1; i < if_index + 1; i++) {
2706 if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
2707 break;
2708 nd6if = &nd_ifinfo[i];
2709 if (nd6if->basereachable && /* already initialized */
2710 (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
2711 /*
2712 * Since reachable time rarely changes by router
2713 * advertisements, we SHOULD insure that a new random
2714 * value gets recomputed at least once every few hours.
2715 * (RFC 2461, 6.3.4)
2716 */
2717 nd6if->recalctm = nd6_recalc_reachtm_interval;
2718 nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
2719 }
2720 }
2721 lck_rw_done(nd_if_rwlock);
2722 timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz);
2723 }
2724
2725 #define senderr(e) { error = (e); goto bad;}
2726 int
2727 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
2728 struct sockaddr_in6 *dst, struct rtentry *hint0)
2729 {
2730 struct mbuf *m = m0;
2731 struct rtentry *rt = hint0, *hint = hint0;
2732 struct llinfo_nd6 *ln = NULL;
2733 int error = 0;
2734 struct timeval timenow;
2735 struct rtentry *rtrele = NULL;
2736
2737 if (rt != NULL) {
2738 RT_LOCK_SPIN(rt);
2739 RT_ADDREF_LOCKED(rt);
2740 }
2741
2742 if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr) || !nd6_need_cache(ifp)) {
2743 if (rt != NULL)
2744 RT_UNLOCK(rt);
2745 goto sendpkt;
2746 }
2747
2748 /*
2749 * Next hop determination. Because we may involve the gateway route
2750 * in addition to the original route, locking is rather complicated.
2751 * The general concept is that regardless of whether the route points
2752 * to the original route or to the gateway route, this routine takes
2753 * an extra reference on such a route. This extra reference will be
2754 * released at the end.
2755 *
2756 * Care must be taken to ensure that the "hint0" route never gets freed
2757 * via rtfree(), since the caller may have stored it inside a struct
2758 * route with a reference held for that placeholder.
2759 *
2760 * This logic is similar to, though not exactly the same as the one
2761 * used by arp_route_to_gateway_route().
2762 */
2763 if (rt != NULL) {
2764 /*
2765 * We have a reference to "rt" by now (or below via rtalloc1),
2766 * which will either be released or freed at the end of this
2767 * routine.
2768 */
2769 RT_LOCK_ASSERT_HELD(rt);
2770 if (!(rt->rt_flags & RTF_UP)) {
2771 RT_REMREF_LOCKED(rt);
2772 RT_UNLOCK(rt);
2773 if ((hint = rt = rtalloc1_scoped((struct sockaddr *)dst,
2774 1, 0, ifp->if_index)) != NULL) {
2775 RT_LOCK_SPIN(rt);
2776 if (rt->rt_ifp != ifp) {
2777 /* XXX: loop care? */
2778 RT_UNLOCK(rt);
2779 error = nd6_output(ifp, origifp, m0,
2780 dst, rt);
2781 rtfree(rt);
2782 return (error);
2783 }
2784 } else {
2785 senderr(EHOSTUNREACH);
2786 }
2787 }
2788
2789 if (rt->rt_flags & RTF_GATEWAY) {
2790 struct rtentry *gwrt;
2791 struct in6_ifaddr *ia6 = NULL;
2792 struct sockaddr_in6 gw6;
2793
2794 rtgw_to_sa6(rt, &gw6);
2795 /*
2796 * Must drop rt_lock since nd6_is_addr_neighbor()
2797 * calls nd6_lookup() and acquires rnh_lock.
2798 */
2799 RT_UNLOCK(rt);
2800
2801 /*
2802 * We skip link-layer address resolution and NUD
2803 * if the gateway is not a neighbor from ND point
2804 * of view, regardless of the value of nd_ifinfo.flags.
2805 * The second condition is a bit tricky; we skip
2806 * if the gateway is our own address, which is
2807 * sometimes used to install a route to a p2p link.
2808 */
2809 if (!nd6_is_addr_neighbor(&gw6, ifp, 0) ||
2810 (ia6 = in6ifa_ifpwithaddr(ifp, &gw6.sin6_addr))) {
2811 /*
2812 * We allow this kind of tricky route only
2813 * when the outgoing interface is p2p.
2814 * XXX: we may need a more generic rule here.
2815 */
2816 if (ia6 != NULL)
2817 IFA_REMREF(&ia6->ia_ifa);
2818 if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
2819 senderr(EHOSTUNREACH);
2820 goto sendpkt;
2821 }
2822
2823 RT_LOCK_SPIN(rt);
2824 gw6 = *((struct sockaddr_in6 *)rt->rt_gateway);
2825
2826 /* If hint is now down, give up */
2827 if (!(rt->rt_flags & RTF_UP)) {
2828 RT_UNLOCK(rt);
2829 senderr(EHOSTUNREACH);
2830 }
2831
2832 /* If there's no gateway route, look it up */
2833 if ((gwrt = rt->rt_gwroute) == NULL) {
2834 RT_UNLOCK(rt);
2835 goto lookup;
2836 }
2837 /* Become a regular mutex */
2838 RT_CONVERT_LOCK(rt);
2839
2840 /*
2841 * Take gwrt's lock while holding route's lock;
2842 * this is okay since gwrt never points back
2843 * to rt, so no lock ordering issues.
2844 */
2845 RT_LOCK_SPIN(gwrt);
2846 if (!(gwrt->rt_flags & RTF_UP)) {
2847 struct rtentry *ogwrt;
2848
2849 rt->rt_gwroute = NULL;
2850 RT_UNLOCK(gwrt);
2851 RT_UNLOCK(rt);
2852 rtfree(gwrt);
2853 lookup:
2854 gwrt = rtalloc1_scoped((struct sockaddr *)&gw6,
2855 1, 0, ifp->if_index);
2856
2857 RT_LOCK(rt);
2858 /*
2859 * Bail out if the route is down, no route
2860 * to gateway, circular route, or if the
2861 * gateway portion of "rt" has changed.
2862 */
2863 if (!(rt->rt_flags & RTF_UP) ||
2864 gwrt == NULL || gwrt == rt ||
2865 !equal(SA(&gw6), rt->rt_gateway)) {
2866 if (gwrt == rt) {
2867 RT_REMREF_LOCKED(gwrt);
2868 gwrt = NULL;
2869 }
2870 RT_UNLOCK(rt);
2871 if (gwrt != NULL)
2872 rtfree(gwrt);
2873 senderr(EHOSTUNREACH);
2874 }
2875
2876 /* Remove any existing gwrt */
2877 ogwrt = rt->rt_gwroute;
2878 if ((rt->rt_gwroute = gwrt) != NULL)
2879 RT_ADDREF(gwrt);
2880
2881 RT_UNLOCK(rt);
2882 /* Now free the replaced gwrt */
2883 if (ogwrt != NULL)
2884 rtfree(ogwrt);
2885 /* If still no route to gateway, bail out */
2886 if (gwrt == NULL)
2887 senderr(EHOSTUNREACH);
2888 /* Remember to release/free "rt" at the end */
2889 rtrele = rt;
2890 rt = gwrt;
2891 RT_LOCK_SPIN(rt);
2892 /* If gwrt is now down, give up */
2893 if (!(rt->rt_flags & RTF_UP)) {
2894 RT_UNLOCK(rt);
2895 rtfree(rt);
2896 rt = NULL;
2897 /* "rtrele" == original "rt" */
2898 senderr(EHOSTUNREACH);
2899 }
2900 } else {
2901 RT_ADDREF_LOCKED(gwrt);
2902 RT_UNLOCK(gwrt);
2903 RT_UNLOCK(rt);
2904 RT_LOCK_SPIN(gwrt);
2905 /* If gwrt is now down, give up */
2906 if (!(gwrt->rt_flags & RTF_UP)) {
2907 RT_UNLOCK(gwrt);
2908 rtfree(gwrt);
2909 senderr(EHOSTUNREACH);
2910 }
2911 /* Remember to release/free "rt" at the end */
2912 rtrele = rt;
2913 rt = gwrt;
2914 }
2915 }
2916 /* Become a regular mutex */
2917 RT_CONVERT_LOCK(rt);
2918 }
2919
2920 if (rt != NULL)
2921 RT_LOCK_ASSERT_HELD(rt);
2922
2923 /*
2924 * Address resolution or Neighbor Unreachability Detection
2925 * for the next hop.
2926 * At this point, the destination of the packet must be a unicast
2927 * or an anycast address(i.e. not a multicast).
2928 */
2929
2930 /* Look up the neighbor cache for the nexthop */
2931 if (rt && (rt->rt_flags & RTF_LLINFO) != 0) {
2932 ln = rt->rt_llinfo;
2933 } else {
2934 struct sockaddr_in6 sin6;
2935 /*
2936 * Clear out Scope ID field in case it is set.
2937 */
2938 sin6 = *dst;
2939 sin6.sin6_scope_id = 0;
2940 /*
2941 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
2942 * the condition below is not very efficient. But we believe
2943 * it is tolerable, because this should be a rare case.
2944 * Must drop rt_lock since nd6_is_addr_neighbor() calls
2945 * nd6_lookup() and acquires rnh_lock.
2946 */
2947 if (rt != NULL)
2948 RT_UNLOCK(rt);
2949 if (nd6_is_addr_neighbor(&sin6, ifp, 0)) {
2950 /* "rtrele" may have been used, so clean up "rt" now */
2951 if (rt != NULL) {
2952 /* Don't free "hint0" */
2953 if (rt == hint0)
2954 RT_REMREF(rt);
2955 else
2956 rtfree(rt);
2957 }
2958 /* Callee returns a locked route upon success */
2959 rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 0);
2960 if (rt != NULL) {
2961 RT_LOCK_ASSERT_HELD(rt);
2962 ln = rt->rt_llinfo;
2963 }
2964 } else if (rt != NULL) {
2965 RT_LOCK(rt);
2966 }
2967 }
2968
2969 if (!ln || !rt) {
2970 if (rt != NULL)
2971 RT_UNLOCK(rt);
2972 lck_rw_lock_shared(nd_if_rwlock);
2973 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
2974 !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) {
2975 lck_rw_done(nd_if_rwlock);
2976 log(LOG_DEBUG,
2977 "nd6_output: can't allocate llinfo for %s "
2978 "(ln=%p, rt=%p)\n",
2979 ip6_sprintf(&dst->sin6_addr), ln, rt);
2980 senderr(EIO); /* XXX: good error? */
2981 }
2982 lck_rw_done(nd_if_rwlock);
2983
2984 goto sendpkt; /* send anyway */
2985 }
2986
2987 getmicrotime(&timenow);
2988
2989 /* We don't have to do link-layer address resolution on a p2p link. */
2990 if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
2991 ln->ln_state < ND6_LLINFO_REACHABLE) {
2992 ln->ln_state = ND6_LLINFO_STALE;
2993 ln->ln_expire = rt_expiry(rt, timenow.tv_sec, nd6_gctimer);
2994 }
2995
2996 /*
2997 * The first time we send a packet to a neighbor whose entry is
2998 * STALE, we have to change the state to DELAY and a sets a timer to
2999 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
3000 * neighbor unreachability detection on expiration.
3001 * (RFC 2461 7.3.3)
3002 */
3003 if (ln->ln_state == ND6_LLINFO_STALE) {
3004 ln->ln_asked = 0;
3005 ln->ln_state = ND6_LLINFO_DELAY;
3006 ln->ln_expire = rt_expiry(rt, timenow.tv_sec, nd6_delay);
3007 }
3008
3009 /*
3010 * If the neighbor cache entry has a state other than INCOMPLETE
3011 * (i.e. its link-layer address is already resolved), just
3012 * send the packet.
3013 */
3014 if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
3015 RT_UNLOCK(rt);
3016 /*
3017 * Move this entry to the head of the queue so that it is
3018 * less likely for this entry to be a target of forced
3019 * garbage collection (see nd6_rtrequest()).
3020 */
3021 lck_mtx_lock(rnh_lock);
3022 RT_LOCK_SPIN(rt);
3023 if (ln->ln_flags & ND6_LNF_IN_USE) {
3024 LN_DEQUEUE(ln);
3025 LN_INSERTHEAD(ln);
3026 }
3027 RT_UNLOCK(rt);
3028 lck_mtx_unlock(rnh_lock);
3029 goto sendpkt;
3030 }
3031
3032 /*
3033 * There is a neighbor cache entry, but no ethernet address
3034 * response yet. Replace the held mbuf (if any) with this
3035 * latest one.
3036 *
3037 * This code conforms to the rate-limiting rule described in Section
3038 * 7.2.2 of RFC 2461, because the timer is set correctly after sending
3039 * an NS below.
3040 */
3041 if (ln->ln_state == ND6_LLINFO_NOSTATE)
3042 ln->ln_state = ND6_LLINFO_INCOMPLETE;
3043 if (ln->ln_hold)
3044 m_freem(ln->ln_hold);
3045 ln->ln_hold = m;
3046 if (ln->ln_expire && ln->ln_asked < nd6_mmaxtries &&
3047 ln->ln_expire < timenow.tv_sec) {
3048 ln->ln_asked++;
3049 lck_rw_lock_shared(nd_if_rwlock);
3050 ln->ln_expire = timenow.tv_sec +
3051 nd_ifinfo[ifp->if_index].retrans / 1000;
3052 lck_rw_done(nd_if_rwlock);
3053 RT_UNLOCK(rt);
3054 /* We still have a reference on rt (for ln) */
3055 nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
3056 } else {
3057 RT_UNLOCK(rt);
3058 }
3059 /*
3060 * Move this entry to the head of the queue so that it is
3061 * less likely for this entry to be a target of forced
3062 * garbage collection (see nd6_rtrequest()).
3063 */
3064 lck_mtx_lock(rnh_lock);
3065 RT_LOCK_SPIN(rt);
3066 if (ln->ln_flags & ND6_LNF_IN_USE) {
3067 LN_DEQUEUE(ln);
3068 LN_INSERTHEAD(ln);
3069 }
3070 /* Clean up "rt" now while we can */
3071 if (rt == hint0) {
3072 RT_REMREF_LOCKED(rt);
3073 RT_UNLOCK(rt);
3074 } else {
3075 RT_UNLOCK(rt);
3076 rtfree_locked(rt);
3077 }
3078 rt = NULL; /* "rt" has been taken care of */
3079 lck_mtx_unlock(rnh_lock);
3080
3081 error = 0;
3082 goto release;
3083
3084 sendpkt:
3085 if (rt != NULL)
3086 RT_LOCK_ASSERT_NOTHELD(rt);
3087
3088 /* discard the packet if IPv6 operation is disabled on the interface */
3089 lck_rw_lock_shared(nd_if_rwlock);
3090 if ((nd_ifinfo[ifp->if_index].flags & ND6_IFF_IFDISABLED)) {
3091 lck_rw_done(nd_if_rwlock);
3092 error = ENETDOWN; /* better error? */
3093 goto bad;
3094 }
3095 lck_rw_done(nd_if_rwlock);
3096
3097 if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
3098 /* forwarding rules require the original scope_id */
3099 m->m_pkthdr.rcvif = origifp;
3100 error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt,
3101 (struct sockaddr *)dst, 0);
3102 goto release;
3103 } else {
3104 /* Do not allow loopback address to wind up on a wire */
3105 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
3106
3107 if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
3108 IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) {
3109 ip6stat.ip6s_badscope++;
3110 /*
3111 * Do not simply drop the packet just like a
3112 * firewall -- we want the the application to feel
3113 * the pain. Return ENETUNREACH like ip6_output
3114 * does in some similar cases. This can startle
3115 * the otherwise clueless process that specifies
3116 * loopback as the source address.
3117 */
3118 error = ENETUNREACH;
3119 goto bad;
3120 }
3121 }
3122
3123 if (rt != NULL) {
3124 RT_LOCK_SPIN(rt);
3125 /* Mark use timestamp */
3126 if (rt->rt_llinfo != NULL)
3127 nd6_llreach_use(rt->rt_llinfo);
3128 RT_UNLOCK(rt);
3129 }
3130
3131 if (hint && nstat_collect)
3132 nstat_route_tx(hint, 1, m->m_pkthdr.len, 0);
3133
3134 m->m_pkthdr.rcvif = NULL;
3135 error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt,
3136 (struct sockaddr *)dst, 0);
3137 goto release;
3138
3139 bad:
3140 if (m != NULL)
3141 m_freem(m);
3142
3143 release:
3144 /* Clean up "rt" unless it's already been done */
3145 if (rt != NULL) {
3146 RT_LOCK_SPIN(rt);
3147 if (rt == hint0) {
3148 RT_REMREF_LOCKED(rt);
3149 RT_UNLOCK(rt);
3150 } else {
3151 RT_UNLOCK(rt);
3152 rtfree(rt);
3153 }
3154 }
3155 /* And now clean up "rtrele" if there is any */
3156 if (rtrele != NULL) {
3157 RT_LOCK_SPIN(rtrele);
3158 if (rtrele == hint0) {
3159 RT_REMREF_LOCKED(rtrele);
3160 RT_UNLOCK(rtrele);
3161 } else {
3162 RT_UNLOCK(rtrele);
3163 rtfree(rtrele);
3164 }
3165 }
3166 return (error);
3167 }
3168 #undef senderr
3169
3170 int
3171 nd6_need_cache(
3172 struct ifnet *ifp)
3173 {
3174 /*
3175 * XXX: we currently do not make neighbor cache on any interface
3176 * other than ARCnet, Ethernet, FDDI and GIF.
3177 *
3178 * RFC2893 says:
3179 * - unidirectional tunnels needs no ND
3180 */
3181 switch (ifp->if_type) {
3182 case IFT_ARCNET:
3183 case IFT_ETHER:
3184 case IFT_FDDI:
3185 case IFT_IEEE1394:
3186 case IFT_L2VLAN:
3187 case IFT_IEEE8023ADLAG:
3188 #if IFT_IEEE80211
3189 case IFT_IEEE80211:
3190 #endif
3191 case IFT_GIF: /* XXX need more cases? */
3192 case IFT_PPP:
3193 #if IFT_TUNNEL
3194 case IFT_TUNNEL:
3195 #endif
3196 case IFT_BRIDGE:
3197 case IFT_CELLULAR:
3198 return(1);
3199 default:
3200 return(0);
3201 }
3202 }
3203
3204 int
3205 nd6_storelladdr(
3206 struct ifnet *ifp,
3207 struct rtentry *rt,
3208 struct mbuf *m,
3209 struct sockaddr *dst,
3210 u_char *desten)
3211 {
3212 int i;
3213 struct sockaddr_dl *sdl;
3214
3215 if (m->m_flags & M_MCAST) {
3216 switch (ifp->if_type) {
3217 case IFT_ETHER:
3218 case IFT_FDDI:
3219 case IFT_L2VLAN:
3220 case IFT_IEEE8023ADLAG:
3221 #if IFT_IEEE80211
3222 case IFT_IEEE80211:
3223 #endif
3224 case IFT_BRIDGE:
3225 ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
3226 desten);
3227 return(1);
3228 case IFT_IEEE1394:
3229 for (i = 0; i < ifp->if_addrlen; i++)
3230 desten[i] = ~0;
3231 return(1);
3232 case IFT_ARCNET:
3233 *desten = 0;
3234 return(1);
3235 default:
3236 return(0); /* caller will free mbuf */
3237 }
3238 }
3239
3240 if (rt == NULL) {
3241 /* this could happen, if we could not allocate memory */
3242 return(0); /* caller will free mbuf */
3243 }
3244 RT_LOCK(rt);
3245 if (rt->rt_gateway->sa_family != AF_LINK) {
3246 printf("nd6_storelladdr: something odd happens\n");
3247 RT_UNLOCK(rt);
3248 return(0); /* caller will free mbuf */
3249 }
3250 sdl = SDL(rt->rt_gateway);
3251 if (sdl->sdl_alen == 0) {
3252 /* this should be impossible, but we bark here for debugging */
3253 printf("nd6_storelladdr: sdl_alen == 0\n");
3254 RT_UNLOCK(rt);
3255 return(0); /* caller will free mbuf */
3256 }
3257
3258 bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
3259 RT_UNLOCK(rt);
3260 return(1);
3261 }
3262
3263 /*
3264 * This is the ND pre-output routine; care must be taken to ensure that
3265 * the "hint" route never gets freed via rtfree(), since the caller may
3266 * have stored it inside a struct route with a reference held for that
3267 * placeholder.
3268 */
3269 errno_t
3270 nd6_lookup_ipv6(ifnet_t ifp, const struct sockaddr_in6 *ip6_dest,
3271 struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
3272 mbuf_t packet)
3273 {
3274 route_t route = hint;
3275 errno_t result = 0;
3276 struct sockaddr_dl *sdl = NULL;
3277 size_t copy_len;
3278
3279 if (ip6_dest->sin6_family != AF_INET6)
3280 return (EAFNOSUPPORT);
3281
3282 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
3283 return (ENETDOWN);
3284
3285 if (hint != NULL) {
3286 /*
3287 * Callee holds a reference on the route and returns
3288 * with the route entry locked, upon success.
3289 */
3290 result = arp_route_to_gateway_route(
3291 (const struct sockaddr*)ip6_dest, hint, &route);
3292 if (result != 0)
3293 return (result);
3294 if (route != NULL)
3295 RT_LOCK_ASSERT_HELD(route);
3296 }
3297
3298 if ((packet->m_flags & M_MCAST) != 0) {
3299 if (route != NULL)
3300 RT_UNLOCK(route);
3301 result = dlil_resolve_multi(ifp,
3302 (const struct sockaddr*)ip6_dest,
3303 (struct sockaddr *)ll_dest, ll_dest_len);
3304 if (route != NULL)
3305 RT_LOCK(route);
3306 goto release;
3307 }
3308
3309 if (route == NULL) {
3310 /*
3311 * This could happen, if we could not allocate memory or
3312 * if arp_route_to_gateway_route() didn't return a route.
3313 */
3314 result = ENOBUFS;
3315 goto release;
3316 }
3317
3318 if (route->rt_gateway->sa_family != AF_LINK) {
3319 printf("nd6_lookup_ipv6: gateway address not AF_LINK\n");
3320 result = EADDRNOTAVAIL;
3321 goto release;
3322 }
3323
3324 sdl = SDL(route->rt_gateway);
3325 if (sdl->sdl_alen == 0) {
3326 /* this should be impossible, but we bark here for debugging */
3327 printf("nd6_lookup_ipv6: sdl_alen == 0\n");
3328 result = EHOSTUNREACH;
3329 goto release;
3330 }
3331
3332 copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len;
3333 bcopy(sdl, ll_dest, copy_len);
3334
3335 release:
3336 if (route != NULL) {
3337 if (route == hint) {
3338 RT_REMREF_LOCKED(route);
3339 RT_UNLOCK(route);
3340 } else {
3341 RT_UNLOCK(route);
3342 rtfree(route);
3343 }
3344 }
3345 return (result);
3346 }
3347
3348 SYSCTL_DECL(_net_inet6_icmp6);
3349
3350 static int
3351 nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
3352 {
3353 #pragma unused(oidp, arg1, arg2)
3354 int error = 0;
3355 char buf[1024];
3356 struct nd_defrouter *dr;
3357 int p64 = proc_is64bit(req->p);
3358
3359 if (req->newptr)
3360 return (EPERM);
3361
3362 lck_mtx_lock(nd6_mutex);
3363 if (p64) {
3364 struct in6_defrouter_64 *d, *de;
3365
3366 for (dr = TAILQ_FIRST(&nd_defrouter);
3367 dr;
3368 dr = TAILQ_NEXT(dr, dr_entry)) {
3369 d = (struct in6_defrouter_64 *)buf;
3370 de = (struct in6_defrouter_64 *)(buf + sizeof (buf));
3371
3372 if (d + 1 <= de) {
3373 bzero(d, sizeof (*d));
3374 d->rtaddr.sin6_family = AF_INET6;
3375 d->rtaddr.sin6_len = sizeof (d->rtaddr);
3376 if (in6_recoverscope(&d->rtaddr, &dr->rtaddr,
3377 dr->ifp) != 0)
3378 log(LOG_ERR,
3379 "scope error in "
3380 "default router list (%s)\n",
3381 ip6_sprintf(&dr->rtaddr));
3382 d->flags = dr->flags;
3383 d->stateflags = dr->stateflags;
3384 d->stateflags &= ~NDDRF_PROCESSED;
3385 d->rtlifetime = dr->rtlifetime;
3386 d->expire = dr->expire;
3387 d->if_index = dr->ifp->if_index;
3388 } else {
3389 panic("buffer too short");
3390 }
3391 error = SYSCTL_OUT(req, buf, sizeof (*d));
3392 if (error)
3393 break;
3394 }
3395 } else {
3396 struct in6_defrouter_32 *d_32, *de_32;
3397
3398 for (dr = TAILQ_FIRST(&nd_defrouter);
3399 dr;
3400 dr = TAILQ_NEXT(dr, dr_entry)) {
3401 d_32 = (struct in6_defrouter_32 *)buf;
3402 de_32 = (struct in6_defrouter_32 *)(buf + sizeof (buf));
3403
3404 if (d_32 + 1 <= de_32) {
3405 bzero(d_32, sizeof (*d_32));
3406 d_32->rtaddr.sin6_family = AF_INET6;
3407 d_32->rtaddr.sin6_len = sizeof (d_32->rtaddr);
3408 if (in6_recoverscope(&d_32->rtaddr, &dr->rtaddr,
3409 dr->ifp) != 0)
3410 log(LOG_ERR,
3411 "scope error in "
3412 "default router list (%s)\n",
3413 ip6_sprintf(&dr->rtaddr));
3414 d_32->flags = dr->flags;
3415 d_32->stateflags = dr->stateflags;
3416 d_32->stateflags &= ~NDDRF_PROCESSED;
3417 d_32->rtlifetime = dr->rtlifetime;
3418 d_32->expire = dr->expire;
3419 d_32->if_index = dr->ifp->if_index;
3420 } else {
3421 panic("buffer too short");
3422 }
3423 error = SYSCTL_OUT(req, buf, sizeof (*d_32));
3424 if (error)
3425 break;
3426 }
3427 }
3428 lck_mtx_unlock(nd6_mutex);
3429 return (error);
3430 }
3431
3432 static int
3433 nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
3434 {
3435 #pragma unused(oidp, arg1, arg2)
3436 int error = 0;
3437 char buf[1024];
3438 struct nd_prefix *pr;
3439 int p64 = proc_is64bit(req->p);
3440
3441 if (req->newptr)
3442 return (EPERM);
3443
3444 lck_mtx_lock(nd6_mutex);
3445 if (p64) {
3446 struct in6_prefix_64 *p, *pe;
3447
3448 for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
3449 u_short advrtrs = 0;
3450 size_t advance;
3451 struct sockaddr_in6 *sin6, *s6;
3452 struct nd_pfxrouter *pfr;
3453
3454 p = (struct in6_prefix_64 *)buf;
3455 pe = (struct in6_prefix_64 *)(buf + sizeof (buf));
3456
3457 if (p + 1 <= pe) {
3458 bzero(p, sizeof (*p));
3459 sin6 = (struct sockaddr_in6 *)(p + 1);
3460
3461 NDPR_LOCK(pr);
3462 p->prefix = pr->ndpr_prefix;
3463 if (in6_recoverscope(&p->prefix,
3464 &p->prefix.sin6_addr, pr->ndpr_ifp) != 0)
3465 log(LOG_ERR,
3466 "scope error in prefix list (%s)\n",
3467 ip6_sprintf(&p->prefix.sin6_addr));
3468 p->raflags = pr->ndpr_raf;
3469 p->prefixlen = pr->ndpr_plen;
3470 p->vltime = pr->ndpr_vltime;
3471 p->pltime = pr->ndpr_pltime;
3472 p->if_index = pr->ndpr_ifp->if_index;
3473 p->expire = pr->ndpr_expire;
3474 p->refcnt = pr->ndpr_addrcnt;
3475 p->flags = pr->ndpr_stateflags;
3476 p->origin = PR_ORIG_RA;
3477 advrtrs = 0;
3478 for (pfr = pr->ndpr_advrtrs.lh_first;
3479 pfr;
3480 pfr = pfr->pfr_next) {
3481 if ((void *)&sin6[advrtrs + 1] >
3482 (void *)pe) {
3483 advrtrs++;
3484 continue;
3485 }
3486 s6 = &sin6[advrtrs];
3487 bzero(s6, sizeof (*s6));
3488 s6->sin6_family = AF_INET6;
3489 s6->sin6_len = sizeof (*sin6);
3490 if (in6_recoverscope(s6,
3491 &pfr->router->rtaddr,
3492 pfr->router->ifp) != 0)
3493 log(LOG_ERR, "scope error in "
3494 "prefix list (%s)\n",
3495 ip6_sprintf(&pfr->router->
3496 rtaddr));
3497 advrtrs++;
3498 }
3499 p->advrtrs = advrtrs;
3500 NDPR_UNLOCK(pr);
3501 } else {
3502 panic("buffer too short");
3503 }
3504 advance = sizeof (*p) + sizeof (*sin6) * advrtrs;
3505 error = SYSCTL_OUT(req, buf, advance);
3506 if (error)
3507 break;
3508 }
3509 } else {
3510 struct in6_prefix_32 *p_32, *pe_32;
3511
3512 for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
3513 u_short advrtrs = 0;
3514 size_t advance;
3515 struct sockaddr_in6 *sin6, *s6;
3516 struct nd_pfxrouter *pfr;
3517
3518 p_32 = (struct in6_prefix_32 *)buf;
3519 pe_32 = (struct in6_prefix_32 *)(buf + sizeof (buf));
3520
3521 if (p_32 + 1 <= pe_32) {
3522 bzero(p_32, sizeof (*p_32));
3523 sin6 = (struct sockaddr_in6 *)(p_32 + 1);
3524
3525 NDPR_LOCK(pr);
3526 p_32->prefix = pr->ndpr_prefix;
3527 if (in6_recoverscope(&p_32->prefix,
3528 &p_32->prefix.sin6_addr, pr->ndpr_ifp) != 0)
3529 log(LOG_ERR, "scope error in prefix "
3530 "list (%s)\n", ip6_sprintf(&p_32->
3531 prefix.sin6_addr));
3532 p_32->raflags = pr->ndpr_raf;
3533 p_32->prefixlen = pr->ndpr_plen;
3534 p_32->vltime = pr->ndpr_vltime;
3535 p_32->pltime = pr->ndpr_pltime;
3536 p_32->if_index = pr->ndpr_ifp->if_index;
3537 p_32->expire = pr->ndpr_expire;
3538 p_32->refcnt = pr->ndpr_addrcnt;
3539 p_32->flags = pr->ndpr_stateflags;
3540 p_32->origin = PR_ORIG_RA;
3541 advrtrs = 0;
3542 for (pfr = pr->ndpr_advrtrs.lh_first;
3543 pfr;
3544 pfr = pfr->pfr_next) {
3545 if ((void *)&sin6[advrtrs + 1] >
3546 (void *)pe_32) {
3547 advrtrs++;
3548 continue;
3549 }
3550 s6 = &sin6[advrtrs];
3551 bzero(s6, sizeof (*s6));
3552 s6->sin6_family = AF_INET6;
3553 s6->sin6_len = sizeof (*sin6);
3554 if (in6_recoverscope(s6,
3555 &pfr->router->rtaddr,
3556 pfr->router->ifp) != 0)
3557 log(LOG_ERR, "scope error in "
3558 "prefix list (%s)\n",
3559 ip6_sprintf(&pfr->router->
3560 rtaddr));
3561 advrtrs++;
3562 }
3563 p_32->advrtrs = advrtrs;
3564 NDPR_UNLOCK(pr);
3565 } else {
3566 panic("buffer too short");
3567 }
3568 advance = sizeof (*p_32) + sizeof (*sin6) * advrtrs;
3569 error = SYSCTL_OUT(req, buf, advance);
3570 if (error)
3571 break;
3572 }
3573 }
3574 lck_mtx_unlock(nd6_mutex);
3575 return (error);
3576 }
3577 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
3578 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_drlist, "S,in6_defrouter","");
3579 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
3580 CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0, nd6_sysctl_prlist, "S,in6_defrouter","");
3581