]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet6/nd6.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / bsd / netinet6 / nd6.c
CommitLineData
b0d623f7 1/*
b7266188 2 * Copyright (c) 2008-2009 Apple Inc. All rights reserved.
b0d623f7
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
55e303ae 29/* $FreeBSD: src/sys/netinet6/nd6.c,v 1.20 2002/08/02 20:49:14 rwatson Exp $ */
9bccf70c 30/* $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ */
1c79356b
A
31
32/*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61/*
62 * XXX
63 * KAME 970409 note:
64 * BSD/OS version heavily modifies this code, related to llinfo.
65 * Since we don't have BSD/OS version of net/route.c in our hand,
66 * I left the code mostly as it was in 970310. -- itojun
67 */
68
69#include <sys/param.h>
70#include <sys/systm.h>
71#include <sys/malloc.h>
72#include <sys/mbuf.h>
73#include <sys/socket.h>
74#include <sys/sockio.h>
75#include <sys/time.h>
76#include <sys/kernel.h>
2d21ac55 77#include <sys/sysctl.h>
1c79356b 78#include <sys/errno.h>
1c79356b
A
79#include <sys/syslog.h>
80#include <sys/protosw.h>
b0d623f7 81#include <sys/proc.h>
1c79356b 82#include <kern/queue.h>
b0d623f7 83#include <kern/zalloc.h>
1c79356b 84
9bccf70c 85#define DONT_WARN_OBSOLETE
1c79356b
A
86#include <net/if.h>
87#include <net/if_dl.h>
88#include <net/if_types.h>
1c79356b 89#include <net/if_atm.h>
1c79356b
A
90#include <net/route.h>
91#include <net/dlil.h>
92
93#include <netinet/in.h>
b0d623f7 94#include <netinet/in_arp.h>
1c79356b 95#include <netinet/if_ether.h>
1c79356b 96#include <netinet/if_fddi.h>
1c79356b
A
97#include <netinet6/in6_var.h>
98#include <netinet/ip6.h>
99#include <netinet6/ip6_var.h>
100#include <netinet6/nd6.h>
101#include <netinet6/in6_prefix.h>
102#include <netinet/icmp6.h>
103
1c79356b 104#include "loop.h"
1c79356b
A
105
106#include <net/net_osdep.h>
107
108#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
109#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
110
b0d623f7 111#define SA(p) ((struct sockaddr *)(p))
1c79356b
A
112#define SIN6(s) ((struct sockaddr_in6 *)s)
113#define SDL(s) ((struct sockaddr_dl *)s)
b0d623f7 114#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
1c79356b
A
115
116/* timer values */
117int nd6_prune = 1; /* walk list every 1 seconds */
118int nd6_delay = 5; /* delay first probe time 5 second */
119int nd6_umaxtries = 3; /* maximum unicast query */
120int nd6_mmaxtries = 3; /* maximum multicast query */
121int nd6_useloopback = 1; /* use loopback interface for local traffic */
9bccf70c 122int nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
1c79356b
A
123
124/* preventing too many loops in ND option parsing */
125int nd6_maxndopt = 10; /* max # of ND options allowed */
126
9bccf70c
A
127int nd6_maxnudhint = 0; /* max # of subsequent upper layer hints */
128
129#if ND6_DEBUG
130int nd6_debug = 1;
131#else
132int nd6_debug = 0;
133#endif
134
1c79356b
A
135/* for debugging? */
136static int nd6_inuse, nd6_allocated;
137
b0d623f7
A
138/*
139 * Synchronization notes:
140 *
141 * The global list of ND entries are stored in llinfo_nd6; an entry
142 * gets inserted into the list when the route is created and gets
143 * removed from the list when it is deleted; this is done as part
144 * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in nd6_rtrequest().
145 *
146 * Because rnh_lock and rt_lock for the entry are held during those
147 * operations, the same locks (and thus lock ordering) must be used
148 * elsewhere to access the relevant data structure fields:
149 *
150 * ln_next, ln_prev, ln_rt
151 *
152 * - Routing lock (rnh_lock)
153 *
154 * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_byhint, ln_flags
155 *
156 * - Routing entry lock (rt_lock)
157 *
158 * Due to the dependency on rt_lock, llinfo_nd6 has the same lifetime
159 * as the route entry itself. When a route is deleted (RTM_DELETE),
160 * it is simply removed from the global list but the memory is not
161 * freed until the route itself is freed.
162 */
163struct llinfo_nd6 llinfo_nd6 = {
164 &llinfo_nd6, &llinfo_nd6, NULL, NULL, 0, 0, 0, 0, 0, 0
165};
166
167/* Protected by nd_if_rwlock */
4a3eedf9 168size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */
1c79356b 169struct nd_ifinfo *nd_ifinfo = NULL;
b0d623f7
A
170
171static lck_grp_attr_t *nd_if_rwlock_grp_attr;
172static lck_grp_t *nd_if_rwlock_grp;
173static lck_attr_t *nd_if_rwlock_attr;
174lck_rw_t *nd_if_rwlock;
175
176/* Protected by nd6_mutex */
1c79356b
A
177struct nd_drhead nd_defrouter;
178struct nd_prhead nd_prefix = { 0 };
179
180int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
181static struct sockaddr_in6 all1_sa;
182
91447636 183static int regen_tmpaddr(struct in6_ifaddr *);
91447636
A
184extern lck_mtx_t *ip6_mutex;
185extern lck_mtx_t *nd6_mutex;
1c79356b 186
91447636 187static void nd6_slowtimo(void *ignored_arg);
b0d623f7
A
188static struct llinfo_nd6 *nd6_llinfo_alloc(void);
189static void nd6_llinfo_free(void *);
1c79356b 190
b0d623f7
A
191static void nd6_siocgdrlst(void *, int);
192static void nd6_siocgprlst(void *, int);
193
194/*
195 * Insertion and removal from llinfo_nd6 must be done with rnh_lock held.
196 */
197#define LN_DEQUEUE(_ln) do { \
198 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \
199 RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \
200 (_ln)->ln_next->ln_prev = (_ln)->ln_prev; \
201 (_ln)->ln_prev->ln_next = (_ln)->ln_next; \
202 (_ln)->ln_prev = (_ln)->ln_next = NULL; \
203 (_ln)->ln_flags &= ~ND6_LNF_IN_USE; \
204} while (0)
205
206#define LN_INSERTHEAD(_ln) do { \
207 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); \
208 RT_LOCK_ASSERT_HELD((_ln)->ln_rt); \
209 (_ln)->ln_next = llinfo_nd6.ln_next; \
210 llinfo_nd6.ln_next = (_ln); \
211 (_ln)->ln_prev = &llinfo_nd6; \
212 (_ln)->ln_next->ln_prev = (_ln); \
213 (_ln)->ln_flags |= ND6_LNF_IN_USE; \
214} while (0)
215
216static struct zone *llinfo_nd6_zone;
217#define LLINFO_ND6_ZONE_MAX 256 /* maximum elements in zone */
218#define LLINFO_ND6_ZONE_NAME "llinfo_nd6" /* name for zone */
e2fac8b1 219
1c79356b
A
220void
221nd6_init()
222{
223 static int nd6_init_done = 0;
224 int i;
225
226 if (nd6_init_done) {
b0d623f7 227 log(LOG_NOTICE, "nd6_init called more than once (ignored)\n");
1c79356b
A
228 return;
229 }
230
231 all1_sa.sin6_family = AF_INET6;
232 all1_sa.sin6_len = sizeof(struct sockaddr_in6);
233 for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
234 all1_sa.sin6_addr.s6_addr[i] = 0xff;
235
236 /* initialization of the default router list */
237 TAILQ_INIT(&nd_defrouter);
238
b0d623f7
A
239 nd_if_rwlock_grp_attr = lck_grp_attr_alloc_init();
240 nd_if_rwlock_grp = lck_grp_alloc_init("nd_if_rwlock",
241 nd_if_rwlock_grp_attr);
242 nd_if_rwlock_attr = lck_attr_alloc_init();
243 nd_if_rwlock = lck_rw_alloc_init(nd_if_rwlock_grp, nd_if_rwlock_attr);
244
245 llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6),
246 LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0,
247 LLINFO_ND6_ZONE_NAME);
248 if (llinfo_nd6_zone == NULL)
249 panic("%s: failed allocating llinfo_nd6_zone", __func__);
250
251 zone_change(llinfo_nd6_zone, Z_EXPAND, TRUE);
252
1c79356b
A
253 nd6_init_done = 1;
254
255 /* start timer */
91447636 256 timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz);
1c79356b
A
257}
258
b0d623f7
A
259static struct llinfo_nd6 *
260nd6_llinfo_alloc(void)
261{
262 return (zalloc(llinfo_nd6_zone));
263}
264
265static void
266nd6_llinfo_free(void *arg)
267{
268 struct llinfo_nd6 *ln = arg;
269
270 if (ln->ln_next != NULL || ln->ln_prev != NULL) {
271 panic("%s: trying to free %p when it is in use", __func__, ln);
272 /* NOTREACHED */
273 }
274
275 /* Just in case there's anything there, free it */
276 if (ln->ln_hold != NULL) {
277 m_freem(ln->ln_hold);
278 ln->ln_hold = NULL;
279 }
280
281 zfree(llinfo_nd6_zone, ln);
282}
283
284int
285nd6_ifattach(struct ifnet *ifp)
1c79356b 286{
1c79356b
A
287
288 /*
289 * We have some arrays that should be indexed by if_index.
290 * since if_index will grow dynamically, they should grow too.
291 */
b0d623f7 292 lck_rw_lock_exclusive(nd_if_rwlock);
9bccf70c 293 if (nd_ifinfo == NULL || if_index >= nd_ifinfo_indexlim) {
1c79356b
A
294 size_t n;
295 caddr_t q;
b0d623f7 296 size_t newlim = nd_ifinfo_indexlim;
1c79356b 297
b0d623f7
A
298 while (if_index >= newlim)
299 newlim <<= 1;
1c79356b
A
300
301 /* grow nd_ifinfo */
b0d623f7 302 n = newlim * sizeof(struct nd_ifinfo);
1c79356b 303 q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK);
b0d623f7
A
304 if (q == NULL) {
305 lck_rw_done(nd_if_rwlock);
306 return ENOBUFS;
307 }
1c79356b 308 bzero(q, n);
b0d623f7 309 nd_ifinfo_indexlim = newlim;
1c79356b
A
310 if (nd_ifinfo) {
311 bcopy((caddr_t)nd_ifinfo, q, n/2);
b0d623f7
A
312 /*
313 * We might want to pattern fill the old
314 * array to catch use-after-free cases.
315 */
9bccf70c 316 FREE((caddr_t)nd_ifinfo, M_IP6NDP);
1c79356b
A
317 }
318 nd_ifinfo = (struct nd_ifinfo *)q;
319 }
b0d623f7 320 lck_rw_done(nd_if_rwlock);
1c79356b
A
321
322#define ND nd_ifinfo[ifp->if_index]
9bccf70c
A
323
324 /*
325 * Don't initialize if called twice.
326 * XXX: to detect this, we should choose a member that is never set
327 * before initialization of the ND structure itself. We formaly used
328 * the linkmtu member, which was not suitable because it could be
329 * initialized via "ifconfig mtu".
330 */
b0d623f7
A
331 lck_rw_lock_shared(nd_if_rwlock);
332 if (ND.basereachable) {
333 lck_rw_done(nd_if_rwlock);
334 return 0;
335 }
336 ND.linkmtu = ifp->if_mtu;
1c79356b
A
337 ND.chlim = IPV6_DEFHLIM;
338 ND.basereachable = REACHABLE_TIME;
339 ND.reachable = ND_COMPUTE_RTIME(ND.basereachable);
340 ND.retrans = RETRANS_TIMER;
341 ND.receivedra = 0;
342 ND.flags = ND6_IFF_PERFORMNUD;
b0d623f7 343 lck_rw_done(nd_if_rwlock);
1c79356b
A
344 nd6_setmtu(ifp);
345#undef ND
b0d623f7
A
346
347 return 0;
1c79356b
A
348}
349
350/*
351 * Reset ND level link MTU. This function is called when the physical MTU
352 * changes, which means we might have to adjust the ND level MTU.
353 */
354void
2d21ac55 355nd6_setmtu(struct ifnet *ifp)
1c79356b 356{
55e303ae 357 struct nd_ifinfo *ndi;
b0d623f7 358 u_int32_t oldmaxmtu, maxmtu;
55e303ae
A
359
360 /*
361 * Make sure IPv6 is enabled for the interface first,
362 * because this can be called directly from SIOCSIFMTU for IPv4
363 */
b0d623f7 364 lck_rw_lock_shared(nd_if_rwlock);
55e303ae 365 if (ifp->if_index >= nd_ifinfo_indexlim) {
b0d623f7 366 lck_rw_done(nd_if_rwlock);
55e303ae
A
367 return; /* we're out of bound for nd_ifinfo */
368 }
369
370 ndi = &nd_ifinfo[ifp->if_index];
371 oldmaxmtu = ndi->maxmtu;
1c79356b 372
2d21ac55
A
373 /*
374 * The ND level maxmtu is somewhat redundant to the interface MTU
375 * and is an implementation artifact of KAME. Instead of hard-
376 * limiting the maxmtu based on the interface type here, we simply
377 * take the if_mtu value since SIOCSIFMTU would have taken care of
378 * the sanity checks related to the maximum MTU allowed for the
379 * interface (a value that is known only by the interface layer),
380 * by sending the request down via ifnet_ioctl(). The use of the
381 * ND level maxmtu and linkmtu (the latter obtained via RA) are done
382 * via IN6_LINKMTU() which does further checking against if_mtu.
383 */
b0d623f7 384 maxmtu = ndi->maxmtu = ifp->if_mtu;
1c79356b 385
2d21ac55
A
386 /*
387 * Decreasing the interface MTU under IPV6 minimum MTU may cause
388 * undesirable situation. We thus notify the operator of the change
389 * explicitly. The check for oldmaxmtu is necessary to restrict the
390 * log to the case of changing the MTU, not initializing it.
391 */
392 if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
393 log(LOG_NOTICE, "nd6_setmtu: "
b0d623f7
A
394 "new link MTU on %s%d (%u) is too small for IPv6\n",
395 ifp->if_name, ifp->if_unit, (uint32_t)ndi->maxmtu);
1c79356b 396 }
b0d623f7 397 lck_rw_done(nd_if_rwlock);
2d21ac55
A
398
399 /* also adjust in6_maxmtu if necessary. */
b0d623f7 400 if (maxmtu > in6_maxmtu)
2d21ac55 401 in6_setmaxmtu();
1c79356b
A
402}
403
404void
91447636
A
405nd6_option_init(
406 void *opt,
407 int icmp6len,
408 union nd_opts *ndopts)
1c79356b
A
409{
410 bzero(ndopts, sizeof(*ndopts));
411 ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
412 ndopts->nd_opts_last
413 = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
414
415 if (icmp6len == 0) {
416 ndopts->nd_opts_done = 1;
417 ndopts->nd_opts_search = NULL;
418 }
419}
420
421/*
422 * Take one ND option.
423 */
424struct nd_opt_hdr *
91447636
A
425nd6_option(
426 union nd_opts *ndopts)
1c79356b
A
427{
428 struct nd_opt_hdr *nd_opt;
429 int olen;
430
431 if (!ndopts)
432 panic("ndopts == NULL in nd6_option\n");
433 if (!ndopts->nd_opts_last)
434 panic("uninitialized ndopts in nd6_option\n");
435 if (!ndopts->nd_opts_search)
436 return NULL;
437 if (ndopts->nd_opts_done)
438 return NULL;
439
440 nd_opt = ndopts->nd_opts_search;
441
9bccf70c
A
442 /* make sure nd_opt_len is inside the buffer */
443 if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
444 bzero(ndopts, sizeof(*ndopts));
445 return NULL;
446 }
447
1c79356b
A
448 olen = nd_opt->nd_opt_len << 3;
449 if (olen == 0) {
450 /*
451 * Message validation requires that all included
452 * options have a length that is greater than zero.
453 */
454 bzero(ndopts, sizeof(*ndopts));
455 return NULL;
456 }
457
458 ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
9bccf70c
A
459 if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
460 /* option overruns the end of buffer, invalid */
461 bzero(ndopts, sizeof(*ndopts));
462 return NULL;
463 } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
464 /* reached the end of options chain */
1c79356b
A
465 ndopts->nd_opts_done = 1;
466 ndopts->nd_opts_search = NULL;
467 }
468 return nd_opt;
469}
470
471/*
472 * Parse multiple ND options.
473 * This function is much easier to use, for ND routines that do not need
474 * multiple options of the same type.
475 */
476int
91447636
A
477nd6_options(
478 union nd_opts *ndopts)
1c79356b
A
479{
480 struct nd_opt_hdr *nd_opt;
481 int i = 0;
482
483 if (!ndopts)
484 panic("ndopts == NULL in nd6_options\n");
485 if (!ndopts->nd_opts_last)
486 panic("uninitialized ndopts in nd6_options\n");
487 if (!ndopts->nd_opts_search)
488 return 0;
489
490 while (1) {
491 nd_opt = nd6_option(ndopts);
492 if (!nd_opt && !ndopts->nd_opts_last) {
493 /*
494 * Message validation requires that all included
495 * options have a length that is greater than zero.
496 */
9bccf70c 497 icmp6stat.icp6s_nd_badopt++;
1c79356b
A
498 bzero(ndopts, sizeof(*ndopts));
499 return -1;
500 }
501
502 if (!nd_opt)
503 goto skip1;
504
505 switch (nd_opt->nd_opt_type) {
506 case ND_OPT_SOURCE_LINKADDR:
507 case ND_OPT_TARGET_LINKADDR:
508 case ND_OPT_MTU:
509 case ND_OPT_REDIRECTED_HEADER:
1c79356b 510 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
9bccf70c
A
511 nd6log((LOG_INFO,
512 "duplicated ND6 option found (type=%d)\n",
513 nd_opt->nd_opt_type));
1c79356b
A
514 /* XXX bark? */
515 } else {
516 ndopts->nd_opt_array[nd_opt->nd_opt_type]
517 = nd_opt;
518 }
519 break;
520 case ND_OPT_PREFIX_INFORMATION:
521 if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
522 ndopts->nd_opt_array[nd_opt->nd_opt_type]
523 = nd_opt;
524 }
525 ndopts->nd_opts_pi_end =
526 (struct nd_opt_prefix_info *)nd_opt;
527 break;
1c79356b
A
528 default:
529 /*
530 * Unknown options must be silently ignored,
531 * to accomodate future extension to the protocol.
532 */
9bccf70c 533 nd6log((LOG_DEBUG,
1c79356b 534 "nd6_options: unsupported option %d - "
9bccf70c 535 "option ignored\n", nd_opt->nd_opt_type));
1c79356b
A
536 }
537
538skip1:
539 i++;
540 if (i > nd6_maxndopt) {
541 icmp6stat.icp6s_nd_toomanyopt++;
9bccf70c 542 nd6log((LOG_INFO, "too many loop in nd opt\n"));
1c79356b
A
543 break;
544 }
545
546 if (ndopts->nd_opts_done)
547 break;
548 }
549
550 return 0;
551}
552
1c79356b 553void
d1ecb069 554nd6_drain(__unused void *ignored_arg)
0b4e3aa0 555{
9bccf70c
A
556 struct llinfo_nd6 *ln;
557 struct nd_defrouter *dr;
558 struct nd_prefix *pr;
91447636 559 struct ifnet *ifp = NULL;
9bccf70c
A
560 struct in6_ifaddr *ia6, *nia6;
561 struct in6_addrlifetime *lt6;
91447636
A
562 struct timeval timenow;
563
564 getmicrotime(&timenow);
b0d623f7
A
565again:
566 /*
567 * The global list llinfo_nd6 is modified by nd6_request() and is
568 * therefore protected by rnh_lock. For obvious reasons, we cannot
569 * hold rnh_lock across calls that might lead to code paths which
570 * attempt to acquire rnh_lock, else we deadlock. Hence for such
571 * cases we drop rt_lock and rnh_lock, make the calls, and repeat the
572 * loop. To ensure that we don't process the same entry more than
573 * once in a single timeout, we mark the "already-seen" entries with
574 * ND6_LNF_TIMER_SKIP flag. At the end of the loop, we do a second
575 * pass thru the entries and clear the flag so they can be processed
576 * during the next timeout.
577 */
578 lck_mtx_lock(rnh_lock);
1c79356b 579 ln = llinfo_nd6.ln_next;
b0d623f7 580 while (ln != NULL && ln != &llinfo_nd6) {
1c79356b 581 struct rtentry *rt;
1c79356b 582 struct sockaddr_in6 *dst;
b0d623f7
A
583 struct llinfo_nd6 *next;
584 struct nd_ifinfo ndi;
585
586 /* ln_next/prev/rt is protected by rnh_lock */
587 next = ln->ln_next;
588 rt = ln->ln_rt;
589 RT_LOCK(rt);
1c79356b 590
b0d623f7
A
591 /* We've seen this already; skip it */
592 if (ln->ln_flags & ND6_LNF_TIMER_SKIP) {
593 RT_UNLOCK(rt);
1c79356b
A
594 ln = next;
595 continue;
596 }
b0d623f7
A
597
598 /* rt->rt_ifp should never be NULL */
1c79356b 599 if ((ifp = rt->rt_ifp) == NULL) {
b0d623f7
A
600 panic("%s: ln(%p) rt(%p) rt_ifp == NULL", __func__,
601 ln, rt);
602 /* NOTREACHED */
1c79356b 603 }
e2fac8b1 604
b0d623f7
A
605 /* rt_llinfo must always be equal to ln */
606 if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) {
607 panic("%s: rt_llinfo(%p) is not equal to ln(%p)",
608 __func__, rt->rt_llinfo, ln);
609 /* NOTREACHED */
610 }
e2fac8b1 611
b0d623f7
A
612 /* rt_key should never be NULL */
613 dst = (struct sockaddr_in6 *)rt_key(rt);
614 if (dst == NULL) {
615 panic("%s: rt(%p) key is NULL ln(%p)", __func__,
616 rt, ln);
617 /* NOTREACHED */
1c79356b 618 }
55e303ae 619
b0d623f7
A
620 /* Set the flag in case we jump to "again" */
621 ln->ln_flags |= ND6_LNF_TIMER_SKIP;
622
623 if (ln->ln_expire > timenow.tv_sec) {
624 RT_UNLOCK(rt);
55e303ae
A
625 ln = next;
626 continue;
627 }
b0d623f7
A
628
629 /* Make a copy (we're using it read-only anyway) */
630 lck_rw_lock_shared(nd_if_rwlock);
631 if (ifp->if_index >= nd_ifinfo_indexlim) {
632 lck_rw_done(nd_if_rwlock);
633 RT_UNLOCK(rt);
55e303ae
A
634 ln = next;
635 continue;
636 }
b0d623f7
A
637 ndi = nd_ifinfo[ifp->if_index];
638 lck_rw_done(nd_if_rwlock);
639
640 RT_LOCK_ASSERT_HELD(rt);
1c79356b
A
641
642 switch (ln->ln_state) {
643 case ND6_LLINFO_INCOMPLETE:
644 if (ln->ln_asked < nd6_mmaxtries) {
645 ln->ln_asked++;
91447636 646 ln->ln_expire = timenow.tv_sec +
b0d623f7
A
647 ndi.retrans / 1000;
648 RT_ADDREF_LOCKED(rt);
649 RT_UNLOCK(rt);
650 lck_mtx_unlock(rnh_lock);
1c79356b 651 nd6_ns_output(ifp, NULL, &dst->sin6_addr,
91447636 652 ln, 0, 0);
b0d623f7 653 RT_REMREF(rt);
1c79356b
A
654 } else {
655 struct mbuf *m = ln->ln_hold;
55e303ae 656 ln->ln_hold = NULL;
b0d623f7
A
657 if (m != NULL) {
658 /*
659 * Fake rcvif to make ICMP error
660 * more helpful in diagnosing
661 * for the receiver.
662 * XXX: should we consider
663 * older rcvif?
664 */
665 m->m_pkthdr.rcvif = ifp;
666 RT_UNLOCK(rt);
667 lck_mtx_unlock(rnh_lock);
1c79356b
A
668 icmp6_error(m, ICMP6_DST_UNREACH,
669 ICMP6_DST_UNREACH_ADDR, 0);
b0d623f7
A
670 } else {
671 RT_UNLOCK(rt);
672 lck_mtx_unlock(rnh_lock);
1c79356b 673 }
b0d623f7 674 nd6_free(rt);
1c79356b 675 }
b0d623f7
A
676 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
677 goto again;
678
1c79356b 679 case ND6_LLINFO_REACHABLE:
9bccf70c 680 if (ln->ln_expire) {
1c79356b 681 ln->ln_state = ND6_LLINFO_STALE;
d1ecb069
A
682 ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
683 nd6_gctimer);
9bccf70c 684 }
b0d623f7 685 RT_UNLOCK(rt);
1c79356b 686 break;
9bccf70c
A
687
688 case ND6_LLINFO_STALE:
e2fac8b1 689 case ND6_LLINFO_PURGE:
9bccf70c 690 /* Garbage Collection(RFC 2461 5.3) */
b0d623f7
A
691 if (ln->ln_expire) {
692 RT_UNLOCK(rt);
693 lck_mtx_unlock(rnh_lock);
694 nd6_free(rt);
695 lck_mtx_assert(rnh_lock,
696 LCK_MTX_ASSERT_NOTOWNED);
697 goto again;
698 } else {
699 RT_UNLOCK(rt);
700 }
9bccf70c
A
701 break;
702
1c79356b 703 case ND6_LLINFO_DELAY:
b0d623f7 704 if ((ndi.flags & ND6_IFF_PERFORMNUD) != 0) {
1c79356b
A
705 /* We need NUD */
706 ln->ln_asked = 1;
707 ln->ln_state = ND6_LLINFO_PROBE;
91447636 708 ln->ln_expire = timenow.tv_sec +
b0d623f7
A
709 ndi.retrans / 1000;
710 RT_ADDREF_LOCKED(rt);
711 RT_UNLOCK(rt);
712 lck_mtx_unlock(rnh_lock);
1c79356b 713 nd6_ns_output(ifp, &dst->sin6_addr,
b0d623f7
A
714 &dst->sin6_addr, ln, 0, 0);
715 lck_mtx_assert(rnh_lock,
716 LCK_MTX_ASSERT_NOTOWNED);
717 RT_REMREF(rt);
718 goto again;
9bccf70c 719 }
b0d623f7 720 ln->ln_state = ND6_LLINFO_STALE; /* XXX */
d1ecb069
A
721 ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
722 nd6_gctimer);
b0d623f7 723 RT_UNLOCK(rt);
1c79356b 724 break;
b0d623f7 725
1c79356b
A
726 case ND6_LLINFO_PROBE:
727 if (ln->ln_asked < nd6_umaxtries) {
728 ln->ln_asked++;
91447636 729 ln->ln_expire = timenow.tv_sec +
b0d623f7
A
730 ndi.retrans / 1000;
731 RT_ADDREF_LOCKED(rt);
732 RT_UNLOCK(rt);
733 lck_mtx_unlock(rnh_lock);
1c79356b 734 nd6_ns_output(ifp, &dst->sin6_addr,
b0d623f7
A
735 &dst->sin6_addr, ln, 0, 0);
736 RT_REMREF(rt);
1c79356b 737 } else {
b0d623f7
A
738 RT_UNLOCK(rt);
739 lck_mtx_unlock(rnh_lock);
740 nd6_free(rt);
1c79356b 741 }
b0d623f7
A
742 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
743 goto again;
744
745 default:
746 RT_UNLOCK(rt);
1c79356b 747 break;
1c79356b
A
748 }
749 ln = next;
750 }
b0d623f7
A
751 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
752
753 /* Now clear the flag from all entries */
754 ln = llinfo_nd6.ln_next;
755 while (ln != NULL && ln != &llinfo_nd6) {
756 struct rtentry *rt = ln->ln_rt;
757 struct llinfo_nd6 *next = ln->ln_next;
758
759 RT_LOCK_SPIN(rt);
760 if (ln->ln_flags & ND6_LNF_TIMER_SKIP)
761 ln->ln_flags &= ~ND6_LNF_TIMER_SKIP;
762 RT_UNLOCK(rt);
763 ln = next;
764 }
765 lck_mtx_unlock(rnh_lock);
766
9bccf70c 767 /* expire default router list */
91447636 768 lck_mtx_lock(nd6_mutex);
1c79356b
A
769 dr = TAILQ_FIRST(&nd_defrouter);
770 while (dr) {
91447636 771 if (dr->expire && dr->expire < timenow.tv_sec) {
1c79356b
A
772 struct nd_defrouter *t;
773 t = TAILQ_NEXT(dr, dr_entry);
91447636 774 defrtrlist_del(dr, 1);
1c79356b
A
775 dr = t;
776 } else {
1c79356b
A
777 dr = TAILQ_NEXT(dr, dr_entry);
778 }
779 }
1c79356b 780
9bccf70c
A
781 /*
782 * expire interface addresses.
783 * in the past the loop was inside prefix expiry processing.
784 * However, from a stricter speci-confrmance standpoint, we should
785 * rather separate address lifetimes and prefix lifetimes.
786 */
787 addrloop:
91447636 788 for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) {
9bccf70c
A
789 nia6 = ia6->ia_next;
790 /* check address lifetime */
791 lt6 = &ia6->ia6_lifetime;
792 if (IFA6_IS_INVALID(ia6)) {
793 int regen = 0;
794
b0d623f7
A
795 /*
796 * Extra reference for ourselves; it's no-op if
797 * we don't have to regenerate temporary address,
798 * otherwise it protects the address from going
799 * away since we drop nd6_mutex below.
800 */
801 ifaref(&ia6->ia_ifa);
802
9bccf70c
A
803 /*
804 * If the expiring address is temporary, try
805 * regenerating a new one. This would be useful when
55e303ae 806 * we suspended a laptop PC, then turned it on after a
9bccf70c
A
807 * period that could invalidate all temporary
808 * addresses. Although we may have to restart the
809 * loop (see below), it must be after purging the
810 * address. Otherwise, we'd see an infinite loop of
811 * regeneration.
812 */
813 if (ip6_use_tempaddr &&
814 (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
2d21ac55
A
815 /* NOTE: We have to drop the lock here because
816 * regen_tmpaddr() eventually calls in6_update_ifa(),
817 * which must take the lock and would otherwise cause a
818 * hang. This is safe because the goto addrloop
819 * leads to a reevaluation of the in6_ifaddrs list
820 */
821 lck_mtx_unlock(nd6_mutex);
822 if (regen_tmpaddr(ia6) == 0)
9bccf70c 823 regen = 1;
2d21ac55 824 lck_mtx_lock(nd6_mutex);
9bccf70c
A
825 }
826
91447636 827 in6_purgeaddr(&ia6->ia_ifa, 1);
9bccf70c 828
b0d623f7
A
829 /* Release extra reference taken above */
830 ifafree(&ia6->ia_ifa);
831
9bccf70c
A
832 if (regen)
833 goto addrloop; /* XXX: see below */
55e303ae
A
834 }
835 if (IFA6_IS_DEPRECATED(ia6)) {
9bccf70c
A
836 int oldflags = ia6->ia6_flags;
837
838 ia6->ia6_flags |= IN6_IFF_DEPRECATED;
839
840 /*
841 * If a temporary address has just become deprecated,
842 * regenerate a new one if possible.
843 */
844 if (ip6_use_tempaddr &&
845 (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
846 (oldflags & IN6_IFF_DEPRECATED) == 0) {
847
2d21ac55
A
848 /* see NOTE above */
849 lck_mtx_unlock(nd6_mutex);
9bccf70c
A
850 if (regen_tmpaddr(ia6) == 0) {
851 /*
852 * A new temporary address is
853 * generated.
854 * XXX: this means the address chain
855 * has changed while we are still in
856 * the loop. Although the change
857 * would not cause disaster (because
55e303ae
A
858 * it's not a deletion, but an
859 * addition,) we'd rather restart the
9bccf70c
A
860 * loop just for safety. Or does this
861 * significantly reduce performance??
862 */
2d21ac55 863 lck_mtx_lock(nd6_mutex);
9bccf70c
A
864 goto addrloop;
865 }
2d21ac55 866 lck_mtx_lock(nd6_mutex);
1c79356b 867 }
55e303ae 868 } else {
9bccf70c
A
869 /*
870 * A new RA might have made a deprecated address
871 * preferred.
872 */
873 ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
1c79356b 874 }
9bccf70c 875 }
1c79356b 876
9bccf70c
A
877 /* expire prefix list */
878 pr = nd_prefix.lh_first;
879 while (pr) {
1c79356b
A
880 /*
881 * check prefix lifetime.
882 * since pltime is just for autoconf, pltime processing for
883 * prefix is not necessary.
1c79356b 884 */
91447636 885 if (pr->ndpr_expire && pr->ndpr_expire < timenow.tv_sec) {
1c79356b
A
886 struct nd_prefix *t;
887 t = pr->ndpr_next;
888
889 /*
890 * address expiration and prefix expiration are
9bccf70c 891 * separate. NEVER perform in6_purgeaddr here.
1c79356b
A
892 */
893
91447636 894 prelist_remove(pr, 1);
1c79356b
A
895 pr = t;
896 } else
897 pr = pr->ndpr_next;
898 }
91447636 899 lck_mtx_unlock(nd6_mutex);
d1ecb069
A
900}
901
902/*
903 * ND6 timer routine to expire default route list and prefix list
904 */
905void
906nd6_timer(__unused void *ignored_arg)
907{
908 nd6_drain(NULL);
91447636 909 timeout(nd6_timer, (caddr_t)0, nd6_prune * hz);
1c79356b
A
910}
911
9bccf70c 912static int
91447636
A
913regen_tmpaddr(
914 struct in6_ifaddr *ia6) /* deprecated/invalidated temporary address */
9bccf70c
A
915{
916 struct ifaddr *ifa;
917 struct ifnet *ifp;
918 struct in6_ifaddr *public_ifa6 = NULL;
91447636
A
919 struct timeval timenow;
920
921 getmicrotime(&timenow);
9bccf70c
A
922
923 ifp = ia6->ia_ifa.ifa_ifp;
91447636 924 ifnet_lock_exclusive(ifp);
9bccf70c
A
925 for (ifa = ifp->if_addrlist.tqh_first; ifa;
926 ifa = ifa->ifa_list.tqe_next)
927 {
928 struct in6_ifaddr *it6;
929
930 if (ifa->ifa_addr->sa_family != AF_INET6)
931 continue;
932
933 it6 = (struct in6_ifaddr *)ifa;
934
935 /* ignore no autoconf addresses. */
936 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
937 continue;
938
939 /* ignore autoconf addresses with different prefixes. */
940 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
941 continue;
942
943 /*
944 * Now we are looking at an autoconf address with the same
945 * prefix as ours. If the address is temporary and is still
946 * preferred, do not create another one. It would be rare, but
947 * could happen, for example, when we resume a laptop PC after
948 * a long period.
949 */
950 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
951 !IFA6_IS_DEPRECATED(it6)) {
952 public_ifa6 = NULL;
953 break;
954 }
955
956 /*
957 * This is a public autoconf address that has the same prefix
958 * as ours. If it is preferred, keep it. We can't break the
959 * loop here, because there may be a still-preferred temporary
960 * address with the prefix.
961 */
962 if (!IFA6_IS_DEPRECATED(it6))
963 public_ifa6 = it6;
964 }
91447636 965 ifnet_lock_done(ifp);
9bccf70c
A
966
967 if (public_ifa6 != NULL) {
968 int e;
969
b0d623f7 970 if ((e = in6_tmpifadd(public_ifa6, 0, M_WAITOK)) != 0) {
9bccf70c
A
971 log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
972 " tmp addr,errno=%d\n", e);
973 return(-1);
974 }
975 return(0);
976 }
977
978 return(-1);
979}
980
1c79356b
A
981/*
982 * Nuke neighbor cache/prefix/default router management table, right before
983 * ifp goes away.
984 */
985void
91447636
A
986nd6_purge(
987 struct ifnet *ifp)
1c79356b 988{
b0d623f7 989 struct llinfo_nd6 *ln;
1c79356b
A
990 struct nd_defrouter *dr, *ndr, drany;
991 struct nd_prefix *pr, *npr;
992
993 /* Nuke default router list entries toward ifp */
91447636 994 lck_mtx_lock(nd6_mutex);
1c79356b
A
995 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
996 /*
997 * The first entry of the list may be stored in
998 * the routing table, so we'll delete it later.
999 */
1000 for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
1001 ndr = TAILQ_NEXT(dr, dr_entry);
1002 if (dr->ifp == ifp)
91447636 1003 defrtrlist_del(dr, 1);
1c79356b
A
1004 }
1005 dr = TAILQ_FIRST(&nd_defrouter);
1006 if (dr->ifp == ifp)
91447636 1007 defrtrlist_del(dr, 1);
1c79356b
A
1008 }
1009
1010 /* Nuke prefix list entries toward ifp */
1011 for (pr = nd_prefix.lh_first; pr; pr = npr) {
1012 npr = pr->ndpr_next;
1013 if (pr->ndpr_ifp == ifp) {
9bccf70c
A
1014 /*
1015 * Previously, pr->ndpr_addr is removed as well,
1016 * but I strongly believe we don't have to do it.
1017 * nd6_purge() is only called from in6_ifdetach(),
1018 * which removes all the associated interface addresses
1019 * by itself.
1020 * (jinmei@kame.net 20010129)
1021 */
91447636 1022 prelist_remove(pr, 1);
1c79356b
A
1023 }
1024 }
1025
1026 /* cancel default outgoing interface setting */
b0d623f7
A
1027 if (nd6_defifindex == ifp->if_index) {
1028 /* Release nd6_mutex as it will be acquired
1029 * during nd6_setdefaultiface again
1030 */
1031 lck_mtx_unlock(nd6_mutex);
1c79356b 1032 nd6_setdefaultiface(0);
b0d623f7
A
1033 lck_mtx_lock(nd6_mutex);
1034 }
1c79356b 1035
55e303ae 1036 if (!ip6_forwarding && (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
9bccf70c
A
1037 /* refresh default router list */
1038 bzero(&drany, sizeof(drany));
1039 defrouter_delreq(&drany, 0);
1040 defrouter_select();
1041 }
91447636 1042 lck_mtx_unlock(nd6_mutex);
1c79356b
A
1043
1044 /*
1045 * Nuke neighbor cache entries for the ifp.
1046 * Note that rt->rt_ifp may not be the same as ifp,
1047 * due to KAME goto ours hack. See RTM_RESOLVE case in
1048 * nd6_rtrequest(), and ip6_input().
1049 */
b0d623f7
A
1050again:
1051 lck_mtx_lock(rnh_lock);
1c79356b 1052 ln = llinfo_nd6.ln_next;
b0d623f7 1053 while (ln != NULL && ln != &llinfo_nd6) {
1c79356b 1054 struct rtentry *rt;
b0d623f7 1055 struct llinfo_nd6 *nln;
1c79356b
A
1056
1057 nln = ln->ln_next;
1058 rt = ln->ln_rt;
b0d623f7
A
1059 RT_LOCK(rt);
1060 if (rt->rt_gateway != NULL &&
1061 rt->rt_gateway->sa_family == AF_LINK &&
1062 SDL(rt->rt_gateway)->sdl_index == ifp->if_index) {
1063 RT_UNLOCK(rt);
1064 lck_mtx_unlock(rnh_lock);
1065 /*
1066 * See comments on nd6_timer() for reasons why
1067 * this loop is repeated; we bite the costs of
1068 * going thru the same llinfo_nd6 more than once
1069 * here, since this purge happens during detach,
1070 * and that unlike the timer case, it's possible
1071 * there's more than one purges happening at the
1072 * same time (thus a flag wouldn't buy anything).
1073 */
1074 nd6_free(rt);
1075 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1076 goto again;
1077 } else {
1078 RT_UNLOCK(rt);
1c79356b
A
1079 }
1080 ln = nln;
1081 }
b0d623f7 1082 lck_mtx_unlock(rnh_lock);
1c79356b
A
1083}
1084
b0d623f7
A
1085/*
1086 * Upon success, the returned route will be locked and the caller is
1087 * responsible for releasing the reference and doing RT_UNLOCK(rt).
1088 * This routine does not require rnh_lock to be held by the caller,
1089 * although it needs to be indicated of such a case in order to call
1090 * the correct variant of the relevant routing routines.
1091 */
1c79356b 1092struct rtentry *
91447636
A
1093nd6_lookup(
1094 struct in6_addr *addr6,
1095 int create,
1096 struct ifnet *ifp,
1097 int rt_locked)
1c79356b
A
1098{
1099 struct rtentry *rt;
1100 struct sockaddr_in6 sin6;
1101
1102 bzero(&sin6, sizeof(sin6));
1103 sin6.sin6_len = sizeof(struct sockaddr_in6);
1104 sin6.sin6_family = AF_INET6;
1105 sin6.sin6_addr = *addr6;
9bccf70c
A
1106#if SCOPEDROUTING
1107 sin6.sin6_scope_id = in6_addr2scopeid(ifp, addr6);
1108#endif
b0d623f7
A
1109 if (rt_locked)
1110 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1111
1112 rt = rt_locked ? rtalloc1_locked((struct sockaddr *)&sin6, create, 0) :
1113 rtalloc1((struct sockaddr *)&sin6, create, 0);
1114
1115 if (rt != NULL) {
1116 RT_LOCK(rt);
1117 if ((rt->rt_flags & RTF_LLINFO) == 0) {
1118 /*
1119 * This is the case for the default route. If we
1120 * want to create a neighbor cache for the address,
1121 * we should free the route for the destination and
1122 * allocate an interface route.
1123 */
1124 if (create) {
1125 RT_UNLOCK(rt);
1126 if (rt_locked)
1127 rtfree_locked(rt);
1128 else
1129 rtfree(rt);
1130 rt = NULL;
1131 }
1c79356b
A
1132 }
1133 }
b0d623f7 1134 if (rt == NULL) {
1c79356b 1135 if (create && ifp) {
b0d623f7 1136 struct ifaddr *ifa;
1c79356b
A
1137 int e;
1138
1139 /*
1140 * If no route is available and create is set,
1141 * we allocate a host route for the destination
1142 * and treat it like an interface route.
1143 * This hack is necessary for a neighbor which can't
1144 * be covered by our own prefix.
1145 */
b0d623f7
A
1146 ifa = ifaof_ifpforaddr((struct sockaddr *)&sin6, ifp);
1147 if (ifa == NULL)
1c79356b
A
1148 return(NULL);
1149
1150 /*
55e303ae 1151 * Create a new route. RTF_LLINFO is necessary
1c79356b
A
1152 * to create a Neighbor Cache entry for the
1153 * destination in nd6_rtrequest which will be
55e303ae 1154 * called in rtrequest via ifa->ifa_rtrequest.
1c79356b 1155 */
b0d623f7
A
1156 if (!rt_locked)
1157 lck_mtx_lock(rnh_lock);
1158 if ((e = rtrequest_locked(RTM_ADD,
1159 (struct sockaddr *)&sin6, ifa->ifa_addr,
1160 (struct sockaddr *)&all1_sa,
1161 (ifa->ifa_flags | RTF_HOST | RTF_LLINFO) &
1162 ~RTF_CLONING, &rt)) != 0) {
91447636 1163 if (e != EEXIST)
b0d623f7
A
1164 log(LOG_ERR, "%s: failed to add route "
1165 "for a neighbor(%s), errno=%d\n",
1166 __func__, ip6_sprintf(addr6), e);
91447636 1167 }
b0d623f7
A
1168 if (!rt_locked)
1169 lck_mtx_unlock(rnh_lock);
91447636 1170 ifafree(ifa);
b0d623f7 1171 if (rt == NULL)
1c79356b 1172 return(NULL);
b0d623f7
A
1173
1174 RT_LOCK(rt);
1c79356b 1175 if (rt->rt_llinfo) {
b0d623f7 1176 struct llinfo_nd6 *ln = rt->rt_llinfo;
1c79356b
A
1177 ln->ln_state = ND6_LLINFO_NOSTATE;
1178 }
91447636 1179 } else {
1c79356b 1180 return(NULL);
91447636 1181 }
1c79356b 1182 }
b0d623f7 1183 RT_LOCK_ASSERT_HELD(rt);
1c79356b
A
1184 /*
1185 * Validation for the entry.
55e303ae
A
1186 * Note that the check for rt_llinfo is necessary because a cloned
1187 * route from a parent route that has the L flag (e.g. the default
1188 * route to a p2p interface) may have the flag, too, while the
1189 * destination is not actually a neighbor.
1c79356b
A
1190 * XXX: we can't use rt->rt_ifp to check for the interface, since
1191 * it might be the loopback interface if the entry is for our
1192 * own address on a non-loopback interface. Instead, we should
55e303ae
A
1193 * use rt->rt_ifa->ifa_ifp, which would specify the REAL
1194 * interface.
1c79356b 1195 */
0b4c1975
A
1196 if (ifp == NULL || (ifp->if_type == IFT_PPP) ||
1197 (ifp->if_eflags & IFEF_NOAUTOIPV6LL) ||
1198 (rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
55e303ae 1199 rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
0b4c1975 1200 (ifp && rt->rt_ifa->ifa_ifp != ifp)) {
b0d623f7
A
1201 RT_REMREF_LOCKED(rt);
1202 RT_UNLOCK(rt);
1c79356b 1203 if (create) {
b0d623f7
A
1204 log(LOG_DEBUG, "%s: failed to lookup %s "
1205 "(if = %s)\n", __func__, ip6_sprintf(addr6),
1206 ifp ? if_name(ifp) : "unspec");
1c79356b
A
1207 /* xxx more logs... kazu */
1208 }
55e303ae 1209 return(NULL);
b0d623f7
A
1210 }
1211 /*
1212 * Caller needs to release reference and call RT_UNLOCK(rt).
1213 */
1c79356b
A
1214 return(rt);
1215}
1216
1217/*
1218 * Detect if a given IPv6 address identifies a neighbor on a given link.
1219 * XXX: should take care of the destination of a p2p link?
1220 */
1221int
91447636
A
1222nd6_is_addr_neighbor(
1223 struct sockaddr_in6 *addr,
1224 struct ifnet *ifp,
1225 int rt_locked)
1c79356b 1226{
9bccf70c 1227 struct ifaddr *ifa;
b0d623f7 1228 struct rtentry *rt;
1c79356b
A
1229 int i;
1230
1231#define IFADDR6(a) ((((struct in6_ifaddr *)(a))->ia_addr).sin6_addr)
1232#define IFMASK6(a) ((((struct in6_ifaddr *)(a))->ia_prefixmask).sin6_addr)
1233
9bccf70c
A
1234 /*
1235 * A link-local address is always a neighbor.
1236 * XXX: we should use the sin6_scope_id field rather than the embedded
1237 * interface index.
1238 */
1239 if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr) &&
1240 ntohs(*(u_int16_t *)&addr->sin6_addr.s6_addr[2]) == ifp->if_index)
1c79356b
A
1241 return(1);
1242
1243 /*
1244 * If the address matches one of our addresses,
1245 * it should be a neighbor.
1246 */
91447636 1247 ifnet_lock_shared(ifp);
1c79356b
A
1248 for (ifa = ifp->if_addrlist.tqh_first;
1249 ifa;
1250 ifa = ifa->ifa_list.tqe_next)
1c79356b
A
1251 {
1252 if (ifa->ifa_addr->sa_family != AF_INET6)
91447636 1253 continue;
1c79356b
A
1254
1255 for (i = 0; i < 4; i++) {
9bccf70c
A
1256 if ((IFADDR6(ifa).s6_addr32[i] ^
1257 addr->sin6_addr.s6_addr32[i]) &
91447636
A
1258 IFMASK6(ifa).s6_addr32[i])
1259 continue;
1c79356b 1260 }
91447636 1261 ifnet_lock_done(ifp);
1c79356b
A
1262 return(1);
1263 }
91447636 1264 ifnet_lock_done(ifp);
1c79356b
A
1265
1266 /*
1267 * Even if the address matches none of our addresses, it might be
b0d623f7
A
1268 * in the neighbor cache. Callee returns a locked route upon
1269 * success.
1c79356b 1270 */
b0d623f7
A
1271 if ((rt = nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked)) != NULL) {
1272 RT_LOCK_ASSERT_HELD(rt);
1273 RT_REMREF_LOCKED(rt);
1274 RT_UNLOCK(rt);
1c79356b 1275 return(1);
b0d623f7 1276 }
1c79356b
A
1277
1278 return(0);
1279#undef IFADDR6
1280#undef IFMASK6
1281}
1282
1283/*
1284 * Free an nd6 llinfo entry.
1285 */
b0d623f7 1286void
91447636
A
1287nd6_free(
1288 struct rtentry *rt)
1c79356b 1289{
b0d623f7
A
1290 struct llinfo_nd6 *ln;
1291 struct in6_addr in6;
1c79356b
A
1292 struct nd_defrouter *dr;
1293
b0d623f7
A
1294 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1295 RT_LOCK_ASSERT_NOTHELD(rt);
1296 lck_mtx_lock(nd6_mutex);
1297
1298 RT_LOCK(rt);
1299 RT_ADDREF_LOCKED(rt); /* Extra ref */
1300 ln = rt->rt_llinfo;
1301 in6 = ((struct sockaddr_in6 *)rt_key(rt))->sin6_addr;
1302
1303 /*
1304 * Prevent another thread from modifying rt_key, rt_gateway
1305 * via rt_setgate() after the rt_lock is dropped by marking
1306 * the route as defunct.
1307 */
1308 rt->rt_flags |= RTF_CONDEMNED;
1309
1c79356b 1310 /*
9bccf70c
A
1311 * we used to have pfctlinput(PRC_HOSTDEAD) here.
1312 * even though it is not harmful, it was not really necessary.
1c79356b 1313 */
1c79356b 1314
b0d623f7
A
1315 if (!ip6_forwarding && (ip6_accept_rtadv ||
1316 (rt->rt_ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
1317 dr = defrouter_lookup(&((struct sockaddr_in6 *)rt_key(rt))->
1318 sin6_addr, rt->rt_ifp);
9bccf70c 1319
b7266188 1320 if ((ln && ln->ln_router) || dr) {
1c79356b
A
1321 /*
1322 * rt6_flush must be called whether or not the neighbor
1323 * is in the Default Router List.
1324 * See a corresponding comment in nd6_na_input().
1325 */
b0d623f7 1326 RT_UNLOCK(rt);
1c79356b 1327 rt6_flush(&in6, rt->rt_ifp);
b0d623f7
A
1328 } else {
1329 RT_UNLOCK(rt);
1c79356b
A
1330 }
1331
1332 if (dr) {
1333 /*
1334 * Unreachablity of a router might affect the default
1335 * router selection and on-link detection of advertised
1336 * prefixes.
1337 */
1338
1339 /*
1340 * Temporarily fake the state to choose a new default
1341 * router and to perform on-link determination of
55e303ae 1342 * prefixes correctly.
1c79356b
A
1343 * Below the state will be set correctly,
1344 * or the entry itself will be deleted.
1345 */
b0d623f7 1346 RT_LOCK_SPIN(rt);
1c79356b
A
1347 ln->ln_state = ND6_LLINFO_INCOMPLETE;
1348
9bccf70c
A
1349 /*
1350 * Since defrouter_select() does not affect the
1351 * on-link determination and MIP6 needs the check
1352 * before the default router selection, we perform
1353 * the check now.
1354 */
b0d623f7 1355 RT_UNLOCK(rt);
91447636 1356 pfxlist_onlink_check(1);
9bccf70c 1357
1c79356b
A
1358 if (dr == TAILQ_FIRST(&nd_defrouter)) {
1359 /*
1360 * It is used as the current default router,
1361 * so we have to move it to the end of the
1362 * list and choose a new one.
1363 * XXX: it is not very efficient if this is
1364 * the only router.
1365 */
1366 TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
1367 TAILQ_INSERT_TAIL(&nd_defrouter, dr, dr_entry);
1368
1369 defrouter_select();
1370 }
1c79356b 1371 }
b0d623f7
A
1372 RT_LOCK_ASSERT_NOTHELD(rt);
1373 } else {
1374 RT_UNLOCK(rt);
1c79356b
A
1375 }
1376
b0d623f7 1377 lck_mtx_unlock(nd6_mutex);
9bccf70c
A
1378 /*
1379 * Detach the route from the routing tree and the list of neighbor
1380 * caches, and disable the route entry not to be used in already
1381 * cached routes.
1382 */
b0d623f7 1383 (void) rtrequest(RTM_DELETE, rt_key(rt), (struct sockaddr *)0,
1c79356b 1384 rt_mask(rt), 0, (struct rtentry **)0);
9bccf70c 1385
b0d623f7
A
1386 /* Extra ref held above; now free it */
1387 rtfree(rt);
1c79356b
A
1388}
1389
1390/*
1391 * Upper-layer reachability hint for Neighbor Unreachability Detection.
1392 *
1393 * XXX cost-effective metods?
1394 */
1395void
91447636
A
1396nd6_nud_hint(
1397 struct rtentry *rt,
1398 struct in6_addr *dst6,
1399 int force)
1c79356b
A
1400{
1401 struct llinfo_nd6 *ln;
91447636
A
1402 struct timeval timenow;
1403
1404 getmicrotime(&timenow);
1c79356b
A
1405
1406 /*
1407 * If the caller specified "rt", use that. Otherwise, resolve the
1408 * routing table by supplied "dst6".
1409 */
1410 if (!rt) {
1411 if (!dst6)
1412 return;
b0d623f7
A
1413 /* Callee returns a locked route upon success */
1414 if ((rt = nd6_lookup(dst6, 0, NULL, 0)) == NULL)
1c79356b 1415 return;
b0d623f7
A
1416 RT_LOCK_ASSERT_HELD(rt);
1417 } else {
1418 RT_LOCK(rt);
1419 RT_ADDREF_LOCKED(rt);
1c79356b
A
1420 }
1421
9bccf70c
A
1422 if ((rt->rt_flags & RTF_GATEWAY) != 0 ||
1423 (rt->rt_flags & RTF_LLINFO) == 0 ||
1424 !rt->rt_llinfo || !rt->rt_gateway ||
1425 rt->rt_gateway->sa_family != AF_LINK) {
1c79356b 1426 /* This is not a host route. */
b0d623f7 1427 goto done;
1c79356b
A
1428 }
1429
b0d623f7 1430 ln = rt->rt_llinfo;
1c79356b 1431 if (ln->ln_state < ND6_LLINFO_REACHABLE)
b0d623f7 1432 goto done;
1c79356b 1433
1c79356b 1434 /*
9bccf70c
A
1435 * if we get upper-layer reachability confirmation many times,
1436 * it is possible we have false information.
1c79356b 1437 */
9bccf70c
A
1438 if (!force) {
1439 ln->ln_byhint++;
1440 if (ln->ln_byhint > nd6_maxnudhint)
b0d623f7 1441 goto done;
1c79356b 1442 }
9bccf70c
A
1443
1444 ln->ln_state = ND6_LLINFO_REACHABLE;
b0d623f7
A
1445 if (ln->ln_expire) {
1446 lck_rw_lock_shared(nd_if_rwlock);
d1ecb069
A
1447 ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
1448 nd_ifinfo[rt->rt_ifp->if_index].reachable);
b0d623f7
A
1449 lck_rw_done(nd_if_rwlock);
1450 }
1451done:
1452 RT_REMREF_LOCKED(rt);
1453 RT_UNLOCK(rt);
1c79356b 1454}
1c79356b
A
1455
1456void
91447636
A
1457nd6_rtrequest(
1458 int req,
1459 struct rtentry *rt,
2d21ac55 1460 __unused struct sockaddr *sa)
1c79356b
A
1461{
1462 struct sockaddr *gate = rt->rt_gateway;
b0d623f7 1463 struct llinfo_nd6 *ln = rt->rt_llinfo;
2d21ac55
A
1464 static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK, 0, 0, 0, 0, 0,
1465 {0,0,0,0,0,0,0,0,0,0,0,0,} };
1c79356b
A
1466 struct ifnet *ifp = rt->rt_ifp;
1467 struct ifaddr *ifa;
91447636
A
1468 struct timeval timenow;
1469
b0d623f7
A
1470 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1471 RT_LOCK_ASSERT_HELD(rt);
1c79356b 1472
55e303ae 1473 if ((rt->rt_flags & RTF_GATEWAY))
1c79356b
A
1474 return;
1475
9bccf70c
A
1476 if (nd6_need_cache(ifp) == 0 && (rt->rt_flags & RTF_HOST) == 0) {
1477 /*
1478 * This is probably an interface direct route for a link
1479 * which does not need neighbor caches (e.g. fe80::%lo0/64).
1480 * We do not need special treatment below for such a route.
1481 * Moreover, the RTF_LLINFO flag which would be set below
1482 * would annoy the ndp(8) command.
1483 */
1484 return;
1485 }
1486
b0d623f7
A
1487 if (req == RTM_RESOLVE) {
1488 int no_nd_cache;
1489
1490 if (!nd6_need_cache(ifp)) { /* stf case */
1491 no_nd_cache = 1;
1492 } else {
1493 /*
1494 * nd6_is_addr_neighbor() may call nd6_lookup(),
1495 * therefore we drop rt_lock to avoid deadlock
1496 * during the lookup. Using rt_key(rt) is still
1497 * safe because it won't change while rnh_lock
1498 * is held.
1499 */
1500 RT_ADDREF_LOCKED(rt);
1501 RT_UNLOCK(rt);
1502 no_nd_cache = !nd6_is_addr_neighbor(
1503 (struct sockaddr_in6 *)rt_key(rt), ifp, 1);
1504 RT_LOCK(rt);
1505 RT_REMREF_LOCKED(rt);
1506 }
1507
55e303ae
A
1508 /*
1509 * FreeBSD and BSD/OS often make a cloned host route based
1510 * on a less-specific route (e.g. the default route).
1511 * If the less specific route does not have a "gateway"
1512 * (this is the case when the route just goes to a p2p or an
1513 * stf interface), we'll mistakenly make a neighbor cache for
1514 * the host route, and will see strange neighbor solicitation
1515 * for the corresponding destination. In order to avoid the
1516 * confusion, we check if the destination of the route is
1517 * a neighbor in terms of neighbor discovery, and stop the
1518 * process if not. Additionally, we remove the LLINFO flag
1519 * so that ndp(8) will not try to get the neighbor information
1520 * of the destination.
1521 */
b0d623f7
A
1522 if (no_nd_cache) {
1523 rt->rt_flags &= ~RTF_LLINFO;
1524 return;
1525 }
55e303ae
A
1526 }
1527
91447636 1528 getmicrotime(&timenow);
1c79356b
A
1529 switch (req) {
1530 case RTM_ADD:
1531 /*
1532 * There is no backward compatibility :)
1533 *
1534 * if ((rt->rt_flags & RTF_HOST) == 0 &&
1535 * SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
1536 * rt->rt_flags |= RTF_CLONING;
1537 */
1538 if (rt->rt_flags & (RTF_CLONING | RTF_LLINFO)) {
1539 /*
1540 * Case 1: This route should come from
55e303ae 1541 * a route to interface. RTF_LLINFO flag is set
1c79356b
A
1542 * for a host route whose destination should be
1543 * treated as on-link.
1544 */
b0d623f7
A
1545 if (rt_setgate(rt, rt_key(rt),
1546 (struct sockaddr *)&null_sdl) == 0) {
1547 gate = rt->rt_gateway;
1548 SDL(gate)->sdl_type = ifp->if_type;
1549 SDL(gate)->sdl_index = ifp->if_index;
1550 /*
1551 * In case we're called before 1.0 sec.
1552 * has elapsed.
1553 */
1554 if (ln != NULL)
1555 ln->ln_expire = MAX(timenow.tv_sec, 1);
1c79356b 1556 }
55e303ae 1557 if ((rt->rt_flags & RTF_CLONING))
1c79356b
A
1558 break;
1559 }
1560 /*
1561 * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
1562 * We don't do that here since llinfo is not ready yet.
1563 *
1564 * There are also couple of other things to be discussed:
1565 * - unsolicited NA code needs improvement beforehand
1566 * - RFC2461 says we MAY send multicast unsolicited NA
1567 * (7.2.6 paragraph 4), however, it also says that we
1568 * SHOULD provide a mechanism to prevent multicast NA storm.
1569 * we don't have anything like it right now.
9bccf70c 1570 * note that the mechanism needs a mutual agreement
1c79356b 1571 * between proxies, which means that we need to implement
9bccf70c
A
1572 * a new protocol, or a new kludge.
1573 * - from RFC2461 6.2.4, host MUST NOT send an unsolicited NA.
1c79356b
A
1574 * we need to check ip6forwarding before sending it.
1575 * (or should we allow proxy ND configuration only for
1576 * routers? there's no mention about proxy ND from hosts)
1577 */
1578#if 0
1579 /* XXX it does not work */
1580 if (rt->rt_flags & RTF_ANNOUNCE)
1581 nd6_na_output(ifp,
1582 &SIN6(rt_key(rt))->sin6_addr,
1583 &SIN6(rt_key(rt))->sin6_addr,
1584 ip6_forwarding ? ND_NA_FLAG_ROUTER : 0,
1585 1, NULL);
1586#endif
1587 /* FALLTHROUGH */
1588 case RTM_RESOLVE:
9bccf70c 1589 if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) == 0) {
1c79356b
A
1590 /*
1591 * Address resolution isn't necessary for a point to
1592 * point link, so we can skip this test for a p2p link.
1593 */
1594 if (gate->sa_family != AF_LINK ||
1595 gate->sa_len < sizeof(null_sdl)) {
1596 log(LOG_DEBUG,
9bccf70c
A
1597 "nd6_rtrequest: bad gateway value: %s\n",
1598 if_name(ifp));
1c79356b
A
1599 break;
1600 }
1601 SDL(gate)->sdl_type = ifp->if_type;
1602 SDL(gate)->sdl_index = ifp->if_index;
1603 }
1604 if (ln != NULL)
1605 break; /* This happens on a route change */
1606 /*
1607 * Case 2: This route may come from cloning, or a manual route
1608 * add with a LL address.
1609 */
b0d623f7
A
1610 rt->rt_llinfo = ln = nd6_llinfo_alloc();
1611 if (ln == NULL) {
1c79356b
A
1612 log(LOG_DEBUG, "nd6_rtrequest: malloc failed\n");
1613 break;
1614 }
b0d623f7
A
1615 rt->rt_llinfo_free = nd6_llinfo_free;
1616
1c79356b
A
1617 nd6_inuse++;
1618 nd6_allocated++;
1619 Bzero(ln, sizeof(*ln));
1620 ln->ln_rt = rt;
1621 /* this is required for "ndp" command. - shin */
1622 if (req == RTM_ADD) {
1623 /*
1624 * gate should have some valid AF_LINK entry,
1625 * and ln->ln_expire should have some lifetime
1626 * which is specified by ndp command.
1627 */
1628 ln->ln_state = ND6_LLINFO_REACHABLE;
9bccf70c 1629 ln->ln_byhint = 0;
1c79356b
A
1630 } else {
1631 /*
1632 * When req == RTM_RESOLVE, rt is created and
1633 * initialized in rtrequest(), so rt_expire is 0.
1634 */
1635 ln->ln_state = ND6_LLINFO_NOSTATE;
b0d623f7
A
1636 /* In case we're called before 1.0 sec. has elapsed */
1637 ln->ln_expire = MAX(timenow.tv_sec, 1);
1c79356b
A
1638 }
1639 rt->rt_flags |= RTF_LLINFO;
b0d623f7
A
1640 LN_INSERTHEAD(ln);
1641
1642 /*
1643 * If we have too many cache entries, initiate immediate
1644 * purging for some "less recently used" entries. Note that
1645 * we cannot directly call nd6_free() here because it would
1646 * cause re-entering rtable related routines triggering an LOR
1647 * problem.
1648 */
1649 if (ip6_neighborgcthresh >= 0 &&
1650 nd6_inuse >= ip6_neighborgcthresh) {
1651 int i;
1652
1653 for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) {
1654 struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev;
1655 struct rtentry *rt_end = ln_end->ln_rt;
1656
1657 /* Move this entry to the head */
1658 RT_LOCK(rt_end);
1659 LN_DEQUEUE(ln_end);
1660 LN_INSERTHEAD(ln_end);
1661
1662 if (ln_end->ln_expire == 0) {
1663 RT_UNLOCK(rt_end);
1664 continue;
1665 }
1666 if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE)
1667 ln_end->ln_state = ND6_LLINFO_STALE;
1668 else
1669 ln_end->ln_state = ND6_LLINFO_PURGE;
1670 ln_end->ln_expire = timenow.tv_sec;
1671 RT_UNLOCK(rt_end);
1672 }
1673 }
1c79356b
A
1674
1675 /*
1676 * check if rt_key(rt) is one of my address assigned
1677 * to the interface.
1678 */
1679 ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
1680 &SIN6(rt_key(rt))->sin6_addr);
1681 if (ifa) {
1682 caddr_t macp = nd6_ifptomac(ifp);
1683 ln->ln_expire = 0;
1684 ln->ln_state = ND6_LLINFO_REACHABLE;
9bccf70c 1685 ln->ln_byhint = 0;
1c79356b
A
1686 if (macp) {
1687 Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
1688 SDL(gate)->sdl_alen = ifp->if_addrlen;
1689 }
1690 if (nd6_useloopback) {
d1ecb069
A
1691#if IFNET_ROUTE_REFCNT
1692 /* Adjust route ref count for the interfaces */
1693 if (rt->rt_if_ref_fn != NULL &&
1694 rt->rt_ifp != lo_ifp) {
1695 rt->rt_if_ref_fn(lo_ifp, 1);
1696 rt->rt_if_ref_fn(rt->rt_ifp, -1);
1697 }
1698#endif /* IFNET_ROUTE_REFCNT */
2d21ac55 1699 rt->rt_ifp = lo_ifp; /* XXX */
1c79356b
A
1700 /*
1701 * Make sure rt_ifa be equal to the ifaddr
1702 * corresponding to the address.
1703 * We need this because when we refer
1704 * rt_ifa->ia6_flags in ip6_input, we assume
1705 * that the rt_ifa points to the address instead
1706 * of the loopback address.
1707 */
1708 if (ifa != rt->rt_ifa) {
9bccf70c 1709 rtsetifa(rt, ifa);
1c79356b
A
1710 }
1711 }
b0d623f7 1712 ifafree(ifa);
1c79356b
A
1713 } else if (rt->rt_flags & RTF_ANNOUNCE) {
1714 ln->ln_expire = 0;
1715 ln->ln_state = ND6_LLINFO_REACHABLE;
9bccf70c 1716 ln->ln_byhint = 0;
1c79356b
A
1717
1718 /* join solicited node multicast for proxy ND */
1719 if (ifp->if_flags & IFF_MULTICAST) {
1720 struct in6_addr llsol;
1721 int error;
1722
1723 llsol = SIN6(rt_key(rt))->sin6_addr;
1724 llsol.s6_addr16[0] = htons(0xff02);
1725 llsol.s6_addr16[1] = htons(ifp->if_index);
1726 llsol.s6_addr32[1] = 0;
1727 llsol.s6_addr32[2] = htonl(1);
1728 llsol.s6_addr8[12] = 0xff;
1729
91447636 1730 if (!in6_addmulti(&llsol, ifp, &error, 0)) {
9bccf70c
A
1731 nd6log((LOG_ERR, "%s: failed to join "
1732 "%s (errno=%d)\n", if_name(ifp),
1733 ip6_sprintf(&llsol), error));
1734 }
1c79356b
A
1735 }
1736 }
1737 break;
1738
1739 case RTM_DELETE:
1740 if (!ln)
1741 break;
1742 /* leave from solicited node multicast for proxy ND */
1743 if ((rt->rt_flags & RTF_ANNOUNCE) != 0 &&
1744 (ifp->if_flags & IFF_MULTICAST) != 0) {
1745 struct in6_addr llsol;
1746 struct in6_multi *in6m;
1747
1748 llsol = SIN6(rt_key(rt))->sin6_addr;
1749 llsol.s6_addr16[0] = htons(0xff02);
1750 llsol.s6_addr16[1] = htons(ifp->if_index);
1751 llsol.s6_addr32[1] = 0;
1752 llsol.s6_addr32[2] = htonl(1);
1753 llsol.s6_addr8[12] = 0xff;
1754
91447636 1755 ifnet_lock_shared(ifp);
1c79356b 1756 IN6_LOOKUP_MULTI(llsol, ifp, in6m);
91447636 1757 ifnet_lock_done(ifp);
1c79356b 1758 if (in6m)
91447636 1759 in6_delmulti(in6m, 0);
1c79356b
A
1760 }
1761 nd6_inuse--;
b0d623f7
A
1762 /*
1763 * Unchain it but defer the actual freeing until the route
1764 * itself is to be freed. rt->rt_llinfo still points to
1765 * llinfo_nd6, and likewise, ln->ln_rt stil points to this
1766 * route entry, except that RTF_LLINFO is now cleared.
1767 */
1768 if (ln->ln_flags & ND6_LNF_IN_USE)
1769 LN_DEQUEUE(ln);
1c79356b 1770 rt->rt_flags &= ~RTF_LLINFO;
b0d623f7 1771 if (ln->ln_hold != NULL)
1c79356b 1772 m_freem(ln->ln_hold);
55e303ae 1773 ln->ln_hold = NULL;
1c79356b
A
1774 }
1775}
1776
b0d623f7
A
1777static void
1778nd6_siocgdrlst(void *data, int data_is_64)
1c79356b 1779{
b0d623f7
A
1780 struct in6_drlist_64 *drl_64 = (struct in6_drlist_64 *)data;
1781 struct in6_drlist_32 *drl_32 = (struct in6_drlist_32 *)data;
1782 struct nd_defrouter *dr;
1783 int i = 0;
1c79356b 1784
b0d623f7
A
1785 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
1786
1787 bzero(data, data_is_64 ? sizeof (*drl_64) : sizeof (*drl_32));
1788 dr = TAILQ_FIRST(&nd_defrouter);
1789 if (data_is_64) {
1790 /* For 64-bit process */
1c79356b 1791 while (dr && i < DRLSTSIZ) {
b0d623f7
A
1792 drl_64->defrouter[i].rtaddr = dr->rtaddr;
1793 if (IN6_IS_ADDR_LINKLOCAL(&drl_64->defrouter[i].rtaddr)) {
1c79356b 1794 /* XXX: need to this hack for KAME stack */
b0d623f7
A
1795 drl_64->defrouter[i].rtaddr.s6_addr16[1] = 0;
1796 } else {
1c79356b
A
1797 log(LOG_ERR,
1798 "default router list contains a "
1799 "non-linklocal address(%s)\n",
b0d623f7
A
1800 ip6_sprintf(&drl_64->defrouter[i].rtaddr));
1801 }
1802 drl_64->defrouter[i].flags = dr->flags;
1803 drl_64->defrouter[i].rtlifetime = dr->rtlifetime;
1804 drl_64->defrouter[i].expire = dr->expire;
1805 drl_64->defrouter[i].if_index = dr->ifp->if_index;
1c79356b
A
1806 i++;
1807 dr = TAILQ_NEXT(dr, dr_entry);
1808 }
b0d623f7
A
1809 return;
1810 }
1811 /* For 32-bit process */
1812 while (dr && i < DRLSTSIZ) {
1813 drl_32->defrouter[i].rtaddr = dr->rtaddr;
1814 if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) {
1815 /* XXX: need to this hack for KAME stack */
1816 drl_32->defrouter[i].rtaddr.s6_addr16[1] = 0;
1817 } else {
1818 log(LOG_ERR,
1819 "default router list contains a "
1820 "non-linklocal address(%s)\n",
1821 ip6_sprintf(&drl_32->defrouter[i].rtaddr));
1822 }
1823 drl_32->defrouter[i].flags = dr->flags;
1824 drl_32->defrouter[i].rtlifetime = dr->rtlifetime;
1825 drl_32->defrouter[i].expire = dr->expire;
1826 drl_32->defrouter[i].if_index = dr->ifp->if_index;
1827 i++;
1828 dr = TAILQ_NEXT(dr, dr_entry);
1829 }
1830}
1831
1832static void
1833nd6_siocgprlst(void *data, int data_is_64)
1834{
1835 struct in6_prlist_64 *prl_64 = (struct in6_prlist_64 *)data;
1836 struct in6_prlist_32 *prl_32 = (struct in6_prlist_32 *)data;
1837 struct nd_prefix *pr;
1838 struct rr_prefix *rpp;
1839 int i = 0;
1840
1841 lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
1842 /*
1843 * XXX meaning of fields, especialy "raflags", is very
1844 * differnet between RA prefix list and RR/static prefix list.
1845 * how about separating ioctls into two?
1846 */
1847 bzero(data, data_is_64 ? sizeof (*prl_64) : sizeof (*prl_32));
1848 pr = nd_prefix.lh_first;
1849 if (data_is_64) {
1850 /* For 64-bit process */
1c79356b
A
1851 while (pr && i < PRLSTSIZ) {
1852 struct nd_pfxrouter *pfr;
1853 int j;
1854
b0d623f7 1855 (void) in6_embedscope(&prl_64->prefix[i].prefix,
9bccf70c 1856 &pr->ndpr_prefix, NULL, NULL);
b0d623f7
A
1857 prl_64->prefix[i].raflags = pr->ndpr_raf;
1858 prl_64->prefix[i].prefixlen = pr->ndpr_plen;
1859 prl_64->prefix[i].vltime = pr->ndpr_vltime;
1860 prl_64->prefix[i].pltime = pr->ndpr_pltime;
1861 prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index;
1862 prl_64->prefix[i].expire = pr->ndpr_expire;
1c79356b
A
1863
1864 pfr = pr->ndpr_advrtrs.lh_first;
1865 j = 0;
9bccf70c 1866 while (pfr) {
1c79356b 1867 if (j < DRLSTSIZ) {
b0d623f7 1868#define RTRADDR prl_64->prefix[i].advrtr[j]
1c79356b
A
1869 RTRADDR = pfr->router->rtaddr;
1870 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
1871 /* XXX: hack for KAME */
1872 RTRADDR.s6_addr16[1] = 0;
b0d623f7 1873 } else {
1c79356b
A
1874 log(LOG_ERR,
1875 "a router(%s) advertises "
1876 "a prefix with "
1877 "non-link local address\n",
1878 ip6_sprintf(&RTRADDR));
b0d623f7 1879 }
1c79356b
A
1880#undef RTRADDR
1881 }
1882 j++;
1883 pfr = pfr->pfr_next;
1884 }
b0d623f7
A
1885 prl_64->prefix[i].advrtrs = j;
1886 prl_64->prefix[i].origin = PR_ORIG_RA;
1c79356b
A
1887
1888 i++;
1889 pr = pr->ndpr_next;
1890 }
1c79356b
A
1891
1892 for (rpp = LIST_FIRST(&rr_prefix); rpp;
1893 rpp = LIST_NEXT(rpp, rp_entry)) {
1894 if (i >= PRLSTSIZ)
1895 break;
b0d623f7 1896 (void) in6_embedscope(&prl_64->prefix[i].prefix,
9bccf70c 1897 &pr->ndpr_prefix, NULL, NULL);
b0d623f7
A
1898 prl_64->prefix[i].raflags = rpp->rp_raf;
1899 prl_64->prefix[i].prefixlen = rpp->rp_plen;
1900 prl_64->prefix[i].vltime = rpp->rp_vltime;
1901 prl_64->prefix[i].pltime = rpp->rp_pltime;
1902 prl_64->prefix[i].if_index = rpp->rp_ifp->if_index;
1903 prl_64->prefix[i].expire = rpp->rp_expire;
1904 prl_64->prefix[i].advrtrs = 0;
1905 prl_64->prefix[i].origin = rpp->rp_origin;
1c79356b
A
1906 i++;
1907 }
b0d623f7
A
1908 return;
1909 }
1910 /* For 32-bit process */
1911 while (pr && i < PRLSTSIZ) {
1912 struct nd_pfxrouter *pfr;
1913 int j;
1914
1915 (void) in6_embedscope(&prl_32->prefix[i].prefix,
1916 &pr->ndpr_prefix, NULL, NULL);
1917 prl_32->prefix[i].raflags = pr->ndpr_raf;
1918 prl_32->prefix[i].prefixlen = pr->ndpr_plen;
1919 prl_32->prefix[i].vltime = pr->ndpr_vltime;
1920 prl_32->prefix[i].pltime = pr->ndpr_pltime;
1921 prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index;
1922 prl_32->prefix[i].expire = pr->ndpr_expire;
1923
1924 pfr = pr->ndpr_advrtrs.lh_first;
1925 j = 0;
1926 while (pfr) {
1927 if (j < DRLSTSIZ) {
1928#define RTRADDR prl_32->prefix[i].advrtr[j]
1929 RTRADDR = pfr->router->rtaddr;
1930 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
1931 /* XXX: hack for KAME */
1932 RTRADDR.s6_addr16[1] = 0;
1933 } else {
1934 log(LOG_ERR,
1935 "a router(%s) advertises "
1936 "a prefix with "
1937 "non-link local address\n",
1938 ip6_sprintf(&RTRADDR));
1939 }
1940#undef RTRADDR
1941 }
1942 j++;
1943 pfr = pfr->pfr_next;
1944 }
1945 prl_32->prefix[i].advrtrs = j;
1946 prl_32->prefix[i].origin = PR_ORIG_RA;
1947
1948 i++;
1949 pr = pr->ndpr_next;
1950 }
1951
1952 for (rpp = LIST_FIRST(&rr_prefix); rpp;
1953 rpp = LIST_NEXT(rpp, rp_entry)) {
1954 if (i >= PRLSTSIZ)
1955 break;
1956 (void) in6_embedscope(&prl_32->prefix[i].prefix,
1957 &pr->ndpr_prefix, NULL, NULL);
1958 prl_32->prefix[i].raflags = rpp->rp_raf;
1959 prl_32->prefix[i].prefixlen = rpp->rp_plen;
1960 prl_32->prefix[i].vltime = rpp->rp_vltime;
1961 prl_32->prefix[i].pltime = rpp->rp_pltime;
1962 prl_32->prefix[i].if_index = rpp->rp_ifp->if_index;
1963 prl_32->prefix[i].expire = rpp->rp_expire;
1964 prl_32->prefix[i].advrtrs = 0;
1965 prl_32->prefix[i].origin = rpp->rp_origin;
1966 i++;
1967 }
1968}
1969
1970int
1971nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
1972{
1973 struct in6_ndireq *ndi = (struct in6_ndireq *)data;
1974 struct in6_ondireq *ondi = (struct in6_ondireq *)data;
1975 struct nd_defrouter *dr, any;
1976 struct nd_prefix *pr;
1977 struct rtentry *rt;
1978 int i = ifp->if_index, error = 0;
1979
1980 switch (cmd) {
1981 case SIOCGDRLST_IN6_32:
1982 case SIOCGDRLST_IN6_64:
1983 /*
1984 * obsolete API, use sysctl under net.inet6.icmp6
1985 */
1986 lck_mtx_lock(nd6_mutex);
1987 nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64);
91447636 1988 lck_mtx_unlock(nd6_mutex);
9bccf70c 1989 break;
b0d623f7
A
1990
1991 case SIOCGPRLST_IN6_32:
1992 case SIOCGPRLST_IN6_64:
1993 /*
1994 * obsolete API, use sysctl under net.inet6.icmp6
1995 */
1996 lck_mtx_lock(nd6_mutex);
1997 nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64);
1998 lck_mtx_unlock(nd6_mutex);
1c79356b 1999 break;
b0d623f7
A
2000
2001 case OSIOCGIFINFO_IN6:
1c79356b 2002 case SIOCGIFINFO_IN6:
b0d623f7
A
2003 /*
2004 * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq
2005 * instead of in6_ndireq, so we treat it as such.
2006 */
2007 lck_rw_lock_shared(nd_if_rwlock);
9bccf70c 2008 if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
b0d623f7 2009 lck_rw_done(nd_if_rwlock);
9bccf70c
A
2010 error = EINVAL;
2011 break;
2012 }
b0d623f7
A
2013 ondi->ndi.linkmtu = IN6_LINKMTU(ifp);
2014 ondi->ndi.maxmtu = nd_ifinfo[i].maxmtu;
2015 ondi->ndi.basereachable = nd_ifinfo[i].basereachable;
2016 ondi->ndi.reachable = nd_ifinfo[i].reachable;
2017 ondi->ndi.retrans = nd_ifinfo[i].retrans;
2018 ondi->ndi.flags = nd_ifinfo[i].flags;
2019 ondi->ndi.recalctm = nd_ifinfo[i].recalctm;
2020 ondi->ndi.chlim = nd_ifinfo[i].chlim;
2021 ondi->ndi.receivedra = nd_ifinfo[i].receivedra;
2022 lck_rw_done(nd_if_rwlock);
1c79356b 2023 break;
b0d623f7 2024
1c79356b
A
2025 case SIOCSIFINFO_FLAGS:
2026 /* XXX: almost all other fields of ndi->ndi is unused */
b0d623f7 2027 lck_rw_lock_shared(nd_if_rwlock);
9bccf70c 2028 if (!nd_ifinfo || i >= nd_ifinfo_indexlim) {
b0d623f7 2029 lck_rw_done(nd_if_rwlock);
9bccf70c
A
2030 error = EINVAL;
2031 break;
2032 }
b0d623f7
A
2033 nd_ifinfo[i].flags = ndi->ndi.flags;
2034 lck_rw_done(nd_if_rwlock);
1c79356b 2035 break;
b0d623f7 2036
1c79356b
A
2037 case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */
2038 /* flush default router list */
2039 /*
2040 * xxx sumikawa: should not delete route if default
2041 * route equals to the top of default router list
2042 */
2043 bzero(&any, sizeof(any));
91447636
A
2044 lck_mtx_lock(nd6_mutex);
2045 defrouter_delreq(&any, 1);
1c79356b 2046 defrouter_select();
91447636 2047 lck_mtx_unlock(nd6_mutex);
1c79356b
A
2048 /* xxx sumikawa: flush prefix list */
2049 break;
b0d623f7
A
2050
2051 case SIOCSPFXFLUSH_IN6: {
1c79356b 2052 /* flush all the prefix advertised by routers */
2d21ac55 2053 struct nd_prefix *next;
91447636 2054 lck_mtx_lock(nd6_mutex);
1c79356b 2055
1c79356b 2056 for (pr = nd_prefix.lh_first; pr; pr = next) {
9bccf70c
A
2057 struct in6_ifaddr *ia, *ia_next;
2058
1c79356b 2059 next = pr->ndpr_next;
9bccf70c
A
2060
2061 if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
2062 continue; /* XXX */
2063
2064 /* do we really have to remove addresses as well? */
91447636 2065 for (ia = in6_ifaddrs; ia; ia = ia_next) {
55e303ae 2066 /* ia might be removed. keep the next ptr. */
9bccf70c
A
2067 ia_next = ia->ia_next;
2068
2069 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
2070 continue;
2071
2072 if (ia->ia6_ndpr == pr)
91447636 2073 in6_purgeaddr(&ia->ia_ifa, 1);
9bccf70c 2074 }
91447636 2075 prelist_remove(pr, 1);
1c79356b 2076 }
91447636 2077 lck_mtx_unlock(nd6_mutex);
1c79356b 2078 break;
b0d623f7
A
2079 }
2080
2081 case SIOCSRTRFLUSH_IN6: {
1c79356b 2082 /* flush all the default routers */
2d21ac55 2083 struct nd_defrouter *next;
1c79356b 2084
91447636 2085 lck_mtx_lock(nd6_mutex);
1c79356b
A
2086 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
2087 /*
2088 * The first entry of the list may be stored in
2089 * the routing table, so we'll delete it later.
2090 */
2091 for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
2092 next = TAILQ_NEXT(dr, dr_entry);
91447636 2093 defrtrlist_del(dr, 1);
1c79356b 2094 }
91447636 2095 defrtrlist_del(TAILQ_FIRST(&nd_defrouter), 1);
1c79356b 2096 }
91447636 2097 lck_mtx_unlock(nd6_mutex);
1c79356b 2098 break;
b0d623f7
A
2099 }
2100
2101 case SIOCGNBRINFO_IN6_32: {
1c79356b 2102 struct llinfo_nd6 *ln;
b0d623f7
A
2103 struct in6_nbrinfo_32 *nbi_32 = (struct in6_nbrinfo_32 *)data;
2104 /* make local for safety */
2105 struct in6_addr nb_addr = nbi_32->addr;
1c79356b
A
2106
2107 /*
2108 * XXX: KAME specific hack for scoped addresses
2109 * XXXX: for other scopes than link-local?
2110 */
b0d623f7
A
2111 if (IN6_IS_ADDR_LINKLOCAL(&nbi_32->addr) ||
2112 IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32->addr)) {
1c79356b
A
2113 u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
2114
2115 if (*idp == 0)
2116 *idp = htons(ifp->if_index);
2117 }
2118
b0d623f7 2119 /* Callee returns a locked route upon success */
91447636 2120 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
1c79356b 2121 error = EINVAL;
1c79356b
A
2122 break;
2123 }
b0d623f7
A
2124 RT_LOCK_ASSERT_HELD(rt);
2125 ln = rt->rt_llinfo;
2126 nbi_32->state = ln->ln_state;
2127 nbi_32->asked = ln->ln_asked;
2128 nbi_32->isrouter = ln->ln_router;
2129 nbi_32->expire = ln->ln_expire;
2130 RT_REMREF_LOCKED(rt);
2131 RT_UNLOCK(rt);
1c79356b 2132 break;
b0d623f7
A
2133 }
2134
2135 case SIOCGNBRINFO_IN6_64: {
2136 struct llinfo_nd6 *ln;
2137 struct in6_nbrinfo_64 *nbi_64 = (struct in6_nbrinfo_64 *)data;
2138 /* make local for safety */
2139 struct in6_addr nb_addr = nbi_64->addr;
2140
2141 /*
2142 * XXX: KAME specific hack for scoped addresses
2143 * XXXX: for other scopes than link-local?
2144 */
2145 if (IN6_IS_ADDR_LINKLOCAL(&nbi_64->addr) ||
2146 IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64->addr)) {
2147 u_int16_t *idp = (u_int16_t *)&nb_addr.s6_addr[2];
2148
2149 if (*idp == 0)
2150 *idp = htons(ifp->if_index);
2151 }
2152
2153 /* Callee returns a locked route upon success */
2154 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
2155 error = EINVAL;
2156 break;
2157 }
2158 RT_LOCK_ASSERT_HELD(rt);
2159 ln = rt->rt_llinfo;
2160 nbi_64->state = ln->ln_state;
2161 nbi_64->asked = ln->ln_asked;
2162 nbi_64->isrouter = ln->ln_router;
2163 nbi_64->expire = ln->ln_expire;
2164 RT_REMREF_LOCKED(rt);
2165 RT_UNLOCK(rt);
1c79356b 2166 break;
b0d623f7
A
2167 }
2168
2169 case SIOCGDEFIFACE_IN6_32: /* XXX: should be implemented as a sysctl? */
2170 case SIOCGDEFIFACE_IN6_64: {
2171 struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data;
2172 struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data;
2173
2174 if (cmd == SIOCGDEFIFACE_IN6_64)
2175 ndif_64->ifindex = nd6_defifindex;
2176 else
2177 ndif_32->ifindex = nd6_defifindex;
1c79356b
A
2178 break;
2179 }
b0d623f7
A
2180
2181 case SIOCSDEFIFACE_IN6_32: /* XXX: should be implemented as a sysctl? */
2182 case SIOCSDEFIFACE_IN6_64: {
2183 struct in6_ndifreq_64 *ndif_64 = (struct in6_ndifreq_64 *)data;
2184 struct in6_ndifreq_32 *ndif_32 = (struct in6_ndifreq_32 *)data;
2185
2186 return (nd6_setdefaultiface(cmd == SIOCSDEFIFACE_IN6_64 ?
2187 ndif_64->ifindex : ndif_32->ifindex));
2188 /* NOTREACHED */
2189 }
2190 }
2191 return (error);
1c79356b
A
2192}
2193
2194/*
2195 * Create neighbor cache entry and cache link-layer address,
2196 * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
2197 */
b0d623f7 2198void
91447636
A
2199nd6_cache_lladdr(
2200 struct ifnet *ifp,
2201 struct in6_addr *from,
2202 char *lladdr,
2d21ac55 2203 __unused int lladdrlen,
91447636
A
2204 int type, /* ICMP6 type */
2205 int code) /* type dependent information */
1c79356b
A
2206{
2207 struct rtentry *rt = NULL;
2208 struct llinfo_nd6 *ln = NULL;
2209 int is_newentry;
2210 struct sockaddr_dl *sdl = NULL;
2211 int do_update;
2212 int olladdr;
2213 int llchange;
2214 int newstate = 0;
91447636 2215 struct timeval timenow;
1c79356b
A
2216
2217 if (!ifp)
2218 panic("ifp == NULL in nd6_cache_lladdr");
2219 if (!from)
2220 panic("from == NULL in nd6_cache_lladdr");
2221
2222 /* nothing must be updated for unspecified address */
2223 if (IN6_IS_ADDR_UNSPECIFIED(from))
b0d623f7 2224 return;
1c79356b
A
2225
2226 /*
2227 * Validation about ifp->if_addrlen and lladdrlen must be done in
2228 * the caller.
2229 *
2230 * XXX If the link does not have link-layer adderss, what should
2231 * we do? (ifp->if_addrlen == 0)
2232 * Spec says nothing in sections for RA, RS and NA. There's small
2233 * description on it in NS section (RFC 2461 7.2.3).
2234 */
91447636 2235 getmicrotime(&timenow);
1c79356b 2236
b0d623f7
A
2237 rt = nd6_lookup(from, 0, ifp, 0);
2238 if (rt == NULL) {
1c79356b
A
2239#if 0
2240 /* nothing must be done if there's no lladdr */
2241 if (!lladdr || !lladdrlen)
b0d623f7 2242 return;
1c79356b
A
2243#endif
2244
b0d623f7
A
2245 if ((rt = nd6_lookup(from, 1, ifp, 0)) == NULL)
2246 return;
2247 RT_LOCK_ASSERT_HELD(rt);
1c79356b 2248 is_newentry = 1;
9bccf70c 2249 } else {
b0d623f7 2250 RT_LOCK_ASSERT_HELD(rt);
9bccf70c 2251 /* do nothing if static ndp is set */
91447636 2252 if (rt->rt_flags & RTF_STATIC) {
b0d623f7
A
2253 RT_REMREF_LOCKED(rt);
2254 RT_UNLOCK(rt);
2255 return;
91447636 2256 }
1c79356b 2257 is_newentry = 0;
9bccf70c 2258 }
1c79356b 2259
1c79356b
A
2260 if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
2261fail:
b0d623f7
A
2262 RT_UNLOCK(rt);
2263 nd6_free(rt);
2264 rtfree(rt);
2265 return;
1c79356b 2266 }
b0d623f7 2267 ln = rt->rt_llinfo;
1c79356b
A
2268 if (!ln)
2269 goto fail;
2270 if (!rt->rt_gateway)
2271 goto fail;
2272 if (rt->rt_gateway->sa_family != AF_LINK)
2273 goto fail;
2274 sdl = SDL(rt->rt_gateway);
2275
2276 olladdr = (sdl->sdl_alen) ? 1 : 0;
2277 if (olladdr && lladdr) {
2278 if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
2279 llchange = 1;
2280 else
2281 llchange = 0;
2282 } else
2283 llchange = 0;
2284
2285 /*
2286 * newentry olladdr lladdr llchange (*=record)
2287 * 0 n n -- (1)
2288 * 0 y n -- (2)
2289 * 0 n y -- (3) * STALE
2290 * 0 y y n (4) *
2291 * 0 y y y (5) * STALE
2292 * 1 -- n -- (6) NOSTATE(= PASSIVE)
2293 * 1 -- y -- (7) * STALE
2294 */
2295
55e303ae 2296 if (lladdr) { /* (3-5) and (7) */
1c79356b
A
2297 /*
2298 * Record source link-layer address
2299 * XXX is it dependent to ifp->if_type?
2300 */
2301 sdl->sdl_alen = ifp->if_addrlen;
2302 bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
2303 }
2304
2305 if (!is_newentry) {
55e303ae
A
2306 if ((!olladdr && lladdr) /* (3) */
2307 || (olladdr && lladdr && llchange)) { /* (5) */
1c79356b
A
2308 do_update = 1;
2309 newstate = ND6_LLINFO_STALE;
55e303ae 2310 } else /* (1-2,4) */
1c79356b
A
2311 do_update = 0;
2312 } else {
2313 do_update = 1;
55e303ae 2314 if (!lladdr) /* (6) */
1c79356b 2315 newstate = ND6_LLINFO_NOSTATE;
55e303ae 2316 else /* (7) */
1c79356b
A
2317 newstate = ND6_LLINFO_STALE;
2318 }
2319
2320 if (do_update) {
2321 /*
2322 * Update the state of the neighbor cache.
2323 */
2324 ln->ln_state = newstate;
2325
2326 if (ln->ln_state == ND6_LLINFO_STALE) {
b0d623f7 2327 struct mbuf *m = ln->ln_hold;
9bccf70c
A
2328 /*
2329 * XXX: since nd6_output() below will cause
2330 * state tansition to DELAY and reset the timer,
2331 * we must set the timer now, although it is actually
2332 * meaningless.
2333 */
d1ecb069
A
2334 ln->ln_expire = rt_expiry(rt, timenow.tv_sec,
2335 nd6_gctimer);
b0d623f7 2336 ln->ln_hold = NULL;
9bccf70c 2337
b0d623f7 2338 if (m != NULL) {
9bccf70c
A
2339 /*
2340 * we assume ifp is not a p2p here, so just
2341 * set the 2nd argument as the 1st one.
2342 */
b0d623f7
A
2343 RT_UNLOCK(rt);
2344 nd6_output(ifp, ifp, m,
2345 (struct sockaddr_in6 *)rt_key(rt), rt, 0);
2346 RT_LOCK(rt);
1c79356b
A
2347 }
2348 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
2349 /* probe right away */
91447636 2350 ln->ln_expire = timenow.tv_sec;
1c79356b
A
2351 }
2352 }
2353
2354 /*
2355 * ICMP6 type dependent behavior.
2356 *
2357 * NS: clear IsRouter if new entry
2358 * RS: clear IsRouter
2359 * RA: set IsRouter if there's lladdr
2360 * redir: clear IsRouter if new entry
2361 *
2362 * RA case, (1):
2363 * The spec says that we must set IsRouter in the following cases:
2364 * - If lladdr exist, set IsRouter. This means (1-5).
2365 * - If it is old entry (!newentry), set IsRouter. This means (7).
2366 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
2367 * A quetion arises for (1) case. (1) case has no lladdr in the
2368 * neighbor cache, this is similar to (6).
2369 * This case is rare but we figured that we MUST NOT set IsRouter.
2370 *
2371 * newentry olladdr lladdr llchange NS RS RA redir
2372 * D R
2373 * 0 n n -- (1) c ? s
2374 * 0 y n -- (2) c s s
2375 * 0 n y -- (3) c s s
2376 * 0 y y n (4) c s s
2377 * 0 y y y (5) c s s
2378 * 1 -- n -- (6) c c c s
2379 * 1 -- y -- (7) c c s c s
2380 *
2381 * (c=clear s=set)
2382 */
2383 switch (type & 0xff) {
2384 case ND_NEIGHBOR_SOLICIT:
2385 /*
2386 * New entry must have is_router flag cleared.
2387 */
55e303ae 2388 if (is_newentry) /* (6-7) */
1c79356b
A
2389 ln->ln_router = 0;
2390 break;
2391 case ND_REDIRECT:
2392 /*
2393 * If the icmp is a redirect to a better router, always set the
2394 * is_router flag. Otherwise, if the entry is newly created,
2395 * clear the flag. [RFC 2461, sec 8.3]
1c79356b
A
2396 */
2397 if (code == ND_REDIRECT_ROUTER)
2398 ln->ln_router = 1;
55e303ae 2399 else if (is_newentry) /* (6-7) */
1c79356b
A
2400 ln->ln_router = 0;
2401 break;
2402 case ND_ROUTER_SOLICIT:
2403 /*
2404 * is_router flag must always be cleared.
2405 */
2406 ln->ln_router = 0;
2407 break;
2408 case ND_ROUTER_ADVERT:
2409 /*
2410 * Mark an entry with lladdr as a router.
2411 */
55e303ae
A
2412 if ((!is_newentry && (olladdr || lladdr)) /* (2-5) */
2413 || (is_newentry && lladdr)) { /* (7) */
1c79356b
A
2414 ln->ln_router = 1;
2415 }
2416 break;
2417 }
2418
9bccf70c
A
2419 /*
2420 * When the link-layer address of a router changes, select the
2421 * best router again. In particular, when the neighbor entry is newly
2422 * created, it might affect the selection policy.
2423 * Question: can we restrict the first condition to the "is_newentry"
2424 * case?
2425 * XXX: when we hear an RA from a new router with the link-layer
2426 * address option, defrouter_select() is called twice, since
2427 * defrtrlist_update called the function as well. However, I believe
2428 * we can compromise the overhead, since it only happens the first
2429 * time.
2430 * XXX: although defrouter_select() should not have a bad effect
2431 * for those are not autoconfigured hosts, we explicitly avoid such
2432 * cases for safety.
2433 */
b0d623f7
A
2434 if (do_update && ln->ln_router && !ip6_forwarding &&
2435 (ip6_accept_rtadv || (ifp->if_eflags & IFEF_ACCEPT_RTADVD))) {
2436 RT_REMREF_LOCKED(rt);
2437 RT_UNLOCK(rt);
91447636 2438 lck_mtx_lock(nd6_mutex);
9bccf70c 2439 defrouter_select();
91447636 2440 lck_mtx_unlock(nd6_mutex);
b0d623f7
A
2441 } else {
2442 RT_REMREF_LOCKED(rt);
2443 RT_UNLOCK(rt);
91447636 2444 }
1c79356b
A
2445}
2446
2447static void
91447636 2448nd6_slowtimo(
2d21ac55 2449 __unused void *ignored_arg)
1c79356b 2450{
9bccf70c
A
2451 int i;
2452 struct nd_ifinfo *nd6if;
1c79356b 2453
b0d623f7 2454 lck_rw_lock_shared(nd_if_rwlock);
1c79356b 2455 for (i = 1; i < if_index + 1; i++) {
9bccf70c 2456 if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
b0d623f7 2457 break;
1c79356b
A
2458 nd6if = &nd_ifinfo[i];
2459 if (nd6if->basereachable && /* already initialized */
2460 (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
2461 /*
2462 * Since reachable time rarely changes by router
2463 * advertisements, we SHOULD insure that a new random
2464 * value gets recomputed at least once every few hours.
2465 * (RFC 2461, 6.3.4)
2466 */
2467 nd6if->recalctm = nd6_recalc_reachtm_interval;
2468 nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
2469 }
2470 }
b0d623f7 2471 lck_rw_done(nd_if_rwlock);
91447636 2472 timeout(nd6_slowtimo, (caddr_t)0, ND6_SLOWTIMER_INTERVAL * hz);
9bccf70c 2473}
1c79356b 2474
1c79356b
A
2475#define senderr(e) { error = (e); goto bad;}
2476int
b0d623f7
A
2477nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
2478 struct sockaddr_in6 *dst, struct rtentry *hint0, int locked)
1c79356b 2479{
9bccf70c 2480 struct mbuf *m = m0;
b0d623f7 2481 struct rtentry *rt = hint0, *hint = hint0;
1c79356b
A
2482 struct llinfo_nd6 *ln = NULL;
2483 int error = 0;
91447636 2484 struct timeval timenow;
b0d623f7 2485 struct rtentry *rtrele = NULL;
1c79356b 2486
b0d623f7
A
2487 if (rt != NULL) {
2488 RT_LOCK_SPIN(rt);
2489 RT_ADDREF_LOCKED(rt);
2490 }
1c79356b 2491
b0d623f7
A
2492 if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr) || !nd6_need_cache(ifp)) {
2493 if (rt != NULL)
2494 RT_UNLOCK(rt);
1c79356b 2495 goto sendpkt;
b0d623f7 2496 }
1c79356b
A
2497
2498 /*
b0d623f7
A
2499 * Next hop determination. Because we may involve the gateway route
2500 * in addition to the original route, locking is rather complicated.
2501 * The general concept is that regardless of whether the route points
2502 * to the original route or to the gateway route, this routine takes
2503 * an extra reference on such a route. This extra reference will be
2504 * released at the end.
2505 *
2506 * Care must be taken to ensure that the "hint0" route never gets freed
2507 * via rtfree(), since the caller may have stored it inside a struct
2508 * route with a reference held for that placeholder.
2509 *
2510 * This logic is similar to, though not exactly the same as the one
2511 * used by arp_route_to_gateway_route().
1c79356b 2512 */
b0d623f7
A
2513 if (rt != NULL) {
2514 /*
2515 * We have a reference to "rt" by now (or below via rtalloc1),
2516 * which will either be released or freed at the end of this
2517 * routine.
2518 */
2519 RT_LOCK_ASSERT_HELD(rt);
2520 if (!(rt->rt_flags & RTF_UP)) {
2521 RT_REMREF_LOCKED(rt);
2522 RT_UNLOCK(rt);
2523 if ((hint = rt = rtalloc1((struct sockaddr *)dst,
2524 1, 0)) != NULL) {
2525 RT_LOCK_SPIN(rt);
9bccf70c
A
2526 if (rt->rt_ifp != ifp) {
2527 /* XXX: loop care? */
b0d623f7
A
2528 RT_UNLOCK(rt);
2529 error = nd6_output(ifp, origifp, m0,
2530 dst, rt, locked);
2531 rtfree(rt);
2532 return (error);
9bccf70c 2533 }
91447636 2534 } else {
1c79356b 2535 senderr(EHOSTUNREACH);
91447636 2536 }
1c79356b 2537 }
9bccf70c 2538
1c79356b 2539 if (rt->rt_flags & RTF_GATEWAY) {
b0d623f7
A
2540 struct rtentry *gwrt;
2541 struct in6_ifaddr *ia6 = NULL;
2542 struct sockaddr_in6 gw6;
2543
2544 gw6 = *((struct sockaddr_in6 *)rt->rt_gateway);
2545 /*
2546 * Must drop rt_lock since nd6_is_addr_neighbor()
2547 * calls nd6_lookup() and acquires rnh_lock.
2548 */
2549 RT_UNLOCK(rt);
9bccf70c
A
2550
2551 /*
2552 * We skip link-layer address resolution and NUD
2553 * if the gateway is not a neighbor from ND point
55e303ae
A
2554 * of view, regardless of the value of nd_ifinfo.flags.
2555 * The second condition is a bit tricky; we skip
9bccf70c
A
2556 * if the gateway is our own address, which is
2557 * sometimes used to install a route to a p2p link.
2558 */
b0d623f7
A
2559 if (!nd6_is_addr_neighbor(&gw6, ifp, 0) ||
2560 (ia6 = in6ifa_ifpwithaddr(ifp, &gw6.sin6_addr))) {
9bccf70c
A
2561 /*
2562 * We allow this kind of tricky route only
2563 * when the outgoing interface is p2p.
2564 * XXX: we may need a more generic rule here.
2565 */
b0d623f7
A
2566 if (ia6 != NULL)
2567 ifafree(&ia6->ia_ifa);
9bccf70c
A
2568 if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
2569 senderr(EHOSTUNREACH);
9bccf70c
A
2570 goto sendpkt;
2571 }
2572
b0d623f7
A
2573 RT_LOCK_SPIN(rt);
2574 gw6 = *((struct sockaddr_in6 *)rt->rt_gateway);
2575
2576 /* If hint is now down, give up */
2577 if (!(rt->rt_flags & RTF_UP)) {
2578 RT_UNLOCK(rt);
2579 senderr(EHOSTUNREACH);
2580 }
2581
2582 /* If there's no gateway route, look it up */
2583 if ((gwrt = rt->rt_gwroute) == NULL) {
2584 RT_UNLOCK(rt);
1c79356b 2585 goto lookup;
b0d623f7
A
2586 }
2587 /* Become a regular mutex */
2588 RT_CONVERT_LOCK(rt);
2589
2590 /*
2591 * Take gwrt's lock while holding route's lock;
2592 * this is okay since gwrt never points back
2593 * to rt, so no lock ordering issues.
2594 */
2595 RT_LOCK_SPIN(gwrt);
2596 if (!(gwrt->rt_flags & RTF_UP)) {
2597 struct rtentry *ogwrt;
2598
2599 rt->rt_gwroute = NULL;
2600 RT_UNLOCK(gwrt);
2601 RT_UNLOCK(rt);
2602 rtfree(gwrt);
2603lookup:
2604 gwrt = rtalloc1((struct sockaddr *)&gw6, 1, 0);
2605
2606 RT_LOCK(rt);
2607 /*
2608 * Bail out if the route is down, no route
2609 * to gateway, circular route, or if the
2610 * gateway portion of "rt" has changed.
2611 */
2612 if (!(rt->rt_flags & RTF_UP) ||
2613 gwrt == NULL || gwrt == rt ||
2614 !equal(SA(&gw6), rt->rt_gateway)) {
2615 if (gwrt == rt) {
2616 RT_REMREF_LOCKED(gwrt);
2617 gwrt = NULL;
2618 }
2619 RT_UNLOCK(rt);
2620 if (gwrt != NULL)
2621 rtfree(gwrt);
2622 senderr(EHOSTUNREACH);
2623 }
2624
2625 /* Remove any existing gwrt */
2626 ogwrt = rt->rt_gwroute;
2627 if ((rt->rt_gwroute = gwrt) != NULL)
2628 RT_ADDREF(gwrt);
2629
2630 RT_UNLOCK(rt);
2631 /* Now free the replaced gwrt */
2632 if (ogwrt != NULL)
2633 rtfree(ogwrt);
2634 /* If still no route to gateway, bail out */
2635 if (gwrt == NULL)
2636 senderr(EHOSTUNREACH);
2637 /* Remember to release/free "rt" at the end */
2638 rtrele = rt;
2639 rt = gwrt;
2640 RT_LOCK_SPIN(rt);
2641 /* If gwrt is now down, give up */
2642 if (!(rt->rt_flags & RTF_UP)) {
2643 RT_UNLOCK(rt);
2644 rtfree(rt);
2645 rt = NULL;
2646 /* "rtrele" == original "rt" */
2647 senderr(EHOSTUNREACH);
2648 }
2649 } else {
2650 RT_ADDREF_LOCKED(gwrt);
2651 RT_UNLOCK(gwrt);
2652 RT_UNLOCK(rt);
2653 RT_LOCK_SPIN(gwrt);
2654 /* If gwrt is now down, give up */
2655 if (!(gwrt->rt_flags & RTF_UP)) {
2656 RT_UNLOCK(gwrt);
2657 rtfree(gwrt);
1c79356b 2658 senderr(EHOSTUNREACH);
91447636 2659 }
b0d623f7
A
2660 /* Remember to release/free "rt" at the end */
2661 rtrele = rt;
2662 rt = gwrt;
1c79356b
A
2663 }
2664 }
b0d623f7
A
2665 /* Become a regular mutex */
2666 RT_CONVERT_LOCK(rt);
1c79356b
A
2667 }
2668
b0d623f7
A
2669 if (rt != NULL)
2670 RT_LOCK_ASSERT_HELD(rt);
2671
1c79356b
A
2672 /*
2673 * Address resolution or Neighbor Unreachability Detection
2674 * for the next hop.
2675 * At this point, the destination of the packet must be a unicast
2676 * or an anycast address(i.e. not a multicast).
2677 */
2678
2679 /* Look up the neighbor cache for the nexthop */
b0d623f7
A
2680 if (rt && (rt->rt_flags & RTF_LLINFO) != 0) {
2681 ln = rt->rt_llinfo;
2682 } else {
9bccf70c
A
2683 /*
2684 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
55e303ae 2685 * the condition below is not very efficient. But we believe
9bccf70c 2686 * it is tolerable, because this should be a rare case.
b0d623f7
A
2687 * Must drop rt_lock since nd6_is_addr_neighbor() calls
2688 * nd6_lookup() and acquires rnh_lock.
9bccf70c 2689 */
b0d623f7
A
2690 if (rt != NULL)
2691 RT_UNLOCK(rt);
2692 if (nd6_is_addr_neighbor(dst, ifp, 0)) {
2693 /* "rtrele" may have been used, so clean up "rt" now */
2694 if (rt != NULL) {
2695 /* Don't free "hint0" */
2696 if (rt == hint0)
2697 RT_REMREF(rt);
2698 else
2699 rtfree(rt);
2700 }
2701 /* Callee returns a locked route upon success */
2702 rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 0);
2703 if (rt != NULL) {
2704 RT_LOCK_ASSERT_HELD(rt);
2705 ln = rt->rt_llinfo;
2706 }
2707 } else if (rt != NULL) {
2708 RT_LOCK(rt);
2709 }
1c79356b 2710 }
b0d623f7 2711
1c79356b 2712 if (!ln || !rt) {
b0d623f7
A
2713 if (rt != NULL)
2714 RT_UNLOCK(rt);
2715 lck_rw_lock_shared(nd_if_rwlock);
9bccf70c
A
2716 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
2717 !(nd_ifinfo[ifp->if_index].flags & ND6_IFF_PERFORMNUD)) {
b0d623f7 2718 lck_rw_done(nd_if_rwlock);
9bccf70c
A
2719 log(LOG_DEBUG,
2720 "nd6_output: can't allocate llinfo for %s "
2721 "(ln=%p, rt=%p)\n",
2722 ip6_sprintf(&dst->sin6_addr), ln, rt);
2723 senderr(EIO); /* XXX: good error? */
2724 }
b0d623f7 2725 lck_rw_done(nd_if_rwlock);
9bccf70c
A
2726
2727 goto sendpkt; /* send anyway */
1c79356b
A
2728 }
2729
91447636
A
2730 getmicrotime(&timenow);
2731
1c79356b
A
2732 /* We don't have to do link-layer address resolution on a p2p link. */
2733 if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
9bccf70c 2734 ln->ln_state < ND6_LLINFO_REACHABLE) {
1c79356b 2735 ln->ln_state = ND6_LLINFO_STALE;
d1ecb069 2736 ln->ln_expire = rt_expiry(rt, timenow.tv_sec, nd6_gctimer);
9bccf70c 2737 }
1c79356b
A
2738
2739 /*
2740 * The first time we send a packet to a neighbor whose entry is
2741 * STALE, we have to change the state to DELAY and a sets a timer to
2742 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
2743 * neighbor unreachability detection on expiration.
2744 * (RFC 2461 7.3.3)
2745 */
2746 if (ln->ln_state == ND6_LLINFO_STALE) {
2747 ln->ln_asked = 0;
2748 ln->ln_state = ND6_LLINFO_DELAY;
d1ecb069 2749 ln->ln_expire = rt_expiry(rt, timenow.tv_sec, nd6_delay);
1c79356b
A
2750 }
2751
2752 /*
2753 * If the neighbor cache entry has a state other than INCOMPLETE
55e303ae 2754 * (i.e. its link-layer address is already resolved), just
1c79356b
A
2755 * send the packet.
2756 */
b0d623f7
A
2757 if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
2758 RT_UNLOCK(rt);
2759 /*
2760 * Move this entry to the head of the queue so that it is
2761 * less likely for this entry to be a target of forced
2762 * garbage collection (see nd6_rtrequest()).
2763 */
2764 lck_mtx_lock(rnh_lock);
2765 RT_LOCK_SPIN(rt);
2766 if (ln->ln_flags & ND6_LNF_IN_USE) {
2767 LN_DEQUEUE(ln);
2768 LN_INSERTHEAD(ln);
2769 }
2770 RT_UNLOCK(rt);
2771 lck_mtx_unlock(rnh_lock);
1c79356b 2772 goto sendpkt;
b0d623f7 2773 }
1c79356b
A
2774
2775 /*
2776 * There is a neighbor cache entry, but no ethernet address
55e303ae 2777 * response yet. Replace the held mbuf (if any) with this
1c79356b
A
2778 * latest one.
2779 *
55e303ae
A
2780 * This code conforms to the rate-limiting rule described in Section
2781 * 7.2.2 of RFC 2461, because the timer is set correctly after sending
2782 * an NS below.
1c79356b 2783 */
9bccf70c 2784 if (ln->ln_state == ND6_LLINFO_NOSTATE)
1c79356b
A
2785 ln->ln_state = ND6_LLINFO_INCOMPLETE;
2786 if (ln->ln_hold)
2787 m_freem(ln->ln_hold);
2788 ln->ln_hold = m;
b0d623f7
A
2789 if (ln->ln_expire && ln->ln_asked < nd6_mmaxtries &&
2790 ln->ln_expire < timenow.tv_sec) {
2791 ln->ln_asked++;
2792 lck_rw_lock_shared(nd_if_rwlock);
2793 ln->ln_expire = timenow.tv_sec +
2794 nd_ifinfo[ifp->if_index].retrans / 1000;
2795 lck_rw_done(nd_if_rwlock);
2796 RT_UNLOCK(rt);
2797 /* We still have a reference on rt (for ln) */
2798 nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0, locked);
2799 } else {
2800 RT_UNLOCK(rt);
1c79356b 2801 }
b0d623f7
A
2802 /*
2803 * Move this entry to the head of the queue so that it is
2804 * less likely for this entry to be a target of forced
2805 * garbage collection (see nd6_rtrequest()).
2806 */
2807 lck_mtx_lock(rnh_lock);
2808 RT_LOCK_SPIN(rt);
2809 if (ln->ln_flags & ND6_LNF_IN_USE) {
2810 LN_DEQUEUE(ln);
2811 LN_INSERTHEAD(ln);
2812 }
2813 /* Clean up "rt" now while we can */
2814 if (rt == hint0) {
2815 RT_REMREF_LOCKED(rt);
2816 RT_UNLOCK(rt);
2817 } else {
2818 RT_UNLOCK(rt);
2819 rtfree_locked(rt);
2820 }
2821 rt = NULL; /* "rt" has been taken care of */
2822 lck_mtx_unlock(rnh_lock);
9bccf70c 2823
b0d623f7
A
2824 error = 0;
2825 goto release;
2826
2827sendpkt:
2828 if (rt != NULL)
2829 RT_LOCK_ASSERT_NOTHELD(rt);
9bccf70c 2830
b0d623f7 2831 /* Clean up HW checksum flags before sending the packet */
9bccf70c
A
2832 m->m_pkthdr.csum_data = 0;
2833 m->m_pkthdr.csum_flags = 0;
2834
2835 if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
b0d623f7
A
2836 /* forwarding rules require the original scope_id */
2837 m->m_pkthdr.rcvif = origifp;
91447636
A
2838 if (locked)
2839 lck_mtx_unlock(ip6_mutex);
b0d623f7
A
2840 error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt,
2841 (struct sockaddr *)dst, 0);
91447636
A
2842 if (locked)
2843 lck_mtx_lock(ip6_mutex);
b0d623f7 2844 goto release;
e5568f75
A
2845 } else {
2846 /* Do not allow loopback address to wind up on a wire */
2847 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
b0d623f7 2848
e5568f75
A
2849 if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
2850 IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) {
2851 ip6stat.ip6s_badscope++;
b0d623f7
A
2852 /*
2853 * Do not simply drop the packet just like a
2854 * firewall -- we want the the application to feel
2855 * the pain. Return ENETUNREACH like ip6_output
2856 * does in some similar cases. This can startle
2857 * the otherwise clueless process that specifies
e5568f75
A
2858 * loopback as the source address.
2859 */
91447636 2860 error = ENETUNREACH;
e5568f75
A
2861 goto bad;
2862 }
9bccf70c
A
2863 }
2864
b0d623f7 2865 m->m_pkthdr.rcvif = NULL;
91447636
A
2866 if (locked)
2867 lck_mtx_unlock(ip6_mutex);
b0d623f7
A
2868 error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt,
2869 (struct sockaddr *)dst, 0);
91447636
A
2870 if (locked)
2871 lck_mtx_lock(ip6_mutex);
b0d623f7
A
2872 goto release;
2873
2874bad:
2875 if (m != NULL)
1c79356b 2876 m_freem(m);
b0d623f7
A
2877
2878release:
2879 /* Clean up "rt" unless it's already been done */
2880 if (rt != NULL) {
2881 RT_LOCK_SPIN(rt);
2882 if (rt == hint0) {
2883 RT_REMREF_LOCKED(rt);
2884 RT_UNLOCK(rt);
2885 } else {
2886 RT_UNLOCK(rt);
2887 rtfree(rt);
2888 }
2889 }
2890 /* And now clean up "rtrele" if there is any */
2891 if (rtrele != NULL) {
2892 RT_LOCK_SPIN(rtrele);
2893 if (rtrele == hint0) {
2894 RT_REMREF_LOCKED(rtrele);
2895 RT_UNLOCK(rtrele);
2896 } else {
2897 RT_UNLOCK(rtrele);
2898 rtfree(rtrele);
2899 }
2900 }
1c79356b 2901 return (error);
b0d623f7 2902}
1c79356b
A
2903#undef senderr
2904
9bccf70c 2905int
91447636
A
2906nd6_need_cache(
2907 struct ifnet *ifp)
9bccf70c
A
2908{
2909 /*
2910 * XXX: we currently do not make neighbor cache on any interface
2911 * other than ARCnet, Ethernet, FDDI and GIF.
2912 *
2913 * RFC2893 says:
2914 * - unidirectional tunnels needs no ND
2915 */
2916 switch (ifp->if_type) {
2917 case IFT_ARCNET:
2918 case IFT_ETHER:
2919 case IFT_FDDI:
2920 case IFT_IEEE1394:
9bccf70c 2921 case IFT_L2VLAN:
91447636 2922 case IFT_IEEE8023ADLAG:
9bccf70c
A
2923#if IFT_IEEE80211
2924 case IFT_IEEE80211:
2925#endif
b7266188 2926 case IFT_BRIDGE:
9bccf70c
A
2927 case IFT_GIF: /* XXX need more cases? */
2928 return(1);
2929 default:
2930 return(0);
2931 }
2932}
2933
1c79356b 2934int
91447636
A
2935nd6_storelladdr(
2936 struct ifnet *ifp,
2937 struct rtentry *rt,
2938 struct mbuf *m,
2939 struct sockaddr *dst,
2940 u_char *desten)
1c79356b 2941{
9bccf70c 2942 int i;
1c79356b
A
2943 struct sockaddr_dl *sdl;
2944
2945 if (m->m_flags & M_MCAST) {
2946 switch (ifp->if_type) {
2947 case IFT_ETHER:
9bccf70c 2948 case IFT_FDDI:
91447636
A
2949 case IFT_L2VLAN:
2950 case IFT_IEEE8023ADLAG:
9bccf70c
A
2951#if IFT_IEEE80211
2952 case IFT_IEEE80211:
2953#endif
b7266188 2954 case IFT_BRIDGE:
1c79356b
A
2955 ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
2956 desten);
2957 return(1);
9bccf70c
A
2958 case IFT_IEEE1394:
2959 for (i = 0; i < ifp->if_addrlen; i++)
2960 desten[i] = ~0;
2961 return(1);
1c79356b
A
2962 case IFT_ARCNET:
2963 *desten = 0;
2964 return(1);
2965 default:
55e303ae 2966 return(0); /* caller will free mbuf */
1c79356b
A
2967 }
2968 }
2969
9bccf70c
A
2970 if (rt == NULL) {
2971 /* this could happen, if we could not allocate memory */
55e303ae 2972 return(0); /* caller will free mbuf */
9bccf70c 2973 }
b0d623f7 2974 RT_LOCK(rt);
9bccf70c 2975 if (rt->rt_gateway->sa_family != AF_LINK) {
1c79356b 2976 printf("nd6_storelladdr: something odd happens\n");
b0d623f7 2977 RT_UNLOCK(rt);
55e303ae 2978 return(0); /* caller will free mbuf */
1c79356b
A
2979 }
2980 sdl = SDL(rt->rt_gateway);
2981 if (sdl->sdl_alen == 0) {
2982 /* this should be impossible, but we bark here for debugging */
2983 printf("nd6_storelladdr: sdl_alen == 0\n");
b0d623f7 2984 RT_UNLOCK(rt);
55e303ae 2985 return(0); /* caller will free mbuf */
1c79356b
A
2986 }
2987
2988 bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
b0d623f7 2989 RT_UNLOCK(rt);
1c79356b
A
2990 return(1);
2991}
91447636 2992
b0d623f7
A
2993/*
2994 * This is the ND pre-output routine; care must be taken to ensure that
2995 * the "hint" route never gets freed via rtfree(), since the caller may
2996 * have stored it inside a struct route with a reference held for that
2997 * placeholder.
2998 */
91447636 2999errno_t
b0d623f7
A
3000nd6_lookup_ipv6(ifnet_t ifp, const struct sockaddr_in6 *ip6_dest,
3001 struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
3002 mbuf_t packet)
91447636
A
3003{
3004 route_t route = hint;
3005 errno_t result = 0;
3006 struct sockaddr_dl *sdl = NULL;
3007 size_t copy_len;
b0d623f7 3008
91447636 3009 if (ip6_dest->sin6_family != AF_INET6)
b0d623f7
A
3010 return (EAFNOSUPPORT);
3011
91447636 3012 if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
b0d623f7
A
3013 return (ENETDOWN);
3014
3015 if (hint != NULL) {
3016 /*
3017 * Callee holds a reference on the route and returns
3018 * with the route entry locked, upon success.
3019 */
3020 result = arp_route_to_gateway_route(
3021 (const struct sockaddr*)ip6_dest, hint, &route);
91447636 3022 if (result != 0)
b0d623f7
A
3023 return (result);
3024 if (route != NULL)
3025 RT_LOCK_ASSERT_HELD(route);
91447636 3026 }
b0d623f7 3027
91447636 3028 if ((packet->m_flags & M_MCAST) != 0) {
b0d623f7
A
3029 if (route != NULL)
3030 RT_UNLOCK(route);
3031 result = dlil_resolve_multi(ifp,
3032 (const struct sockaddr*)ip6_dest,
3033 (struct sockaddr *)ll_dest, ll_dest_len);
3034 if (route != NULL)
3035 RT_LOCK(route);
3036 goto release;
91447636 3037 }
b0d623f7 3038
91447636 3039 if (route == NULL) {
b0d623f7
A
3040 /*
3041 * This could happen, if we could not allocate memory or
3042 * if arp_route_to_gateway_route() didn't return a route.
3043 */
3044 result = ENOBUFS;
3045 goto release;
91447636 3046 }
b0d623f7 3047
91447636
A
3048 if (route->rt_gateway->sa_family != AF_LINK) {
3049 printf("nd6_lookup_ipv6: gateway address not AF_LINK\n");
3050 result = EADDRNOTAVAIL;
b0d623f7 3051 goto release;
91447636 3052 }
b0d623f7 3053
91447636
A
3054 sdl = SDL(route->rt_gateway);
3055 if (sdl->sdl_alen == 0) {
3056 /* this should be impossible, but we bark here for debugging */
b0d623f7 3057 printf("nd6_lookup_ipv6: sdl_alen == 0\n");
91447636 3058 result = EHOSTUNREACH;
b0d623f7 3059 goto release;
91447636 3060 }
b0d623f7 3061
91447636
A
3062 copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len;
3063 bcopy(sdl, ll_dest, copy_len);
3064
b0d623f7
A
3065release:
3066 if (route != NULL) {
3067 if (route == hint) {
3068 RT_REMREF_LOCKED(route);
3069 RT_UNLOCK(route);
3070 } else {
3071 RT_UNLOCK(route);
3072 rtfree(route);
3073 }
3074 }
3075 return (result);
91447636
A
3076}
3077
9bccf70c 3078SYSCTL_DECL(_net_inet6_icmp6);
9bccf70c
A
3079
3080static int
b0d623f7 3081nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
9bccf70c 3082{
2d21ac55 3083#pragma unused(oidp, arg1, arg2)
b0d623f7 3084 int error = 0;
9bccf70c 3085 char buf[1024];
9bccf70c 3086 struct nd_defrouter *dr;
b0d623f7 3087 int p64 = proc_is64bit(req->p);
9bccf70c
A
3088
3089 if (req->newptr)
b0d623f7 3090 return (EPERM);
9bccf70c 3091
91447636 3092 lck_mtx_lock(nd6_mutex);
b0d623f7
A
3093 if (p64) {
3094 struct in6_defrouter_64 *d, *de;
3095
3096 for (dr = TAILQ_FIRST(&nd_defrouter);
3097 dr;
3098 dr = TAILQ_NEXT(dr, dr_entry)) {
3099 d = (struct in6_defrouter_64 *)buf;
3100 de = (struct in6_defrouter_64 *)(buf + sizeof (buf));
3101
3102 if (d + 1 <= de) {
3103 bzero(d, sizeof (*d));
3104 d->rtaddr.sin6_family = AF_INET6;
3105 d->rtaddr.sin6_len = sizeof (d->rtaddr);
3106 if (in6_recoverscope(&d->rtaddr, &dr->rtaddr,
3107 dr->ifp) != 0)
3108 log(LOG_ERR,
3109 "scope error in "
3110 "default router list (%s)\n",
3111 ip6_sprintf(&dr->rtaddr));
3112 d->flags = dr->flags;
3113 d->rtlifetime = dr->rtlifetime;
3114 d->expire = dr->expire;
3115 d->if_index = dr->ifp->if_index;
3116 } else {
3117 panic("buffer too short");
3118 }
3119 error = SYSCTL_OUT(req, buf, sizeof (*d));
3120 if (error)
3121 break;
3122 }
3123 } else {
3124 struct in6_defrouter_32 *d_32, *de_32;
3125
3126 for (dr = TAILQ_FIRST(&nd_defrouter);
3127 dr;
3128 dr = TAILQ_NEXT(dr, dr_entry)) {
3129 d_32 = (struct in6_defrouter_32 *)buf;
3130 de_32 = (struct in6_defrouter_32 *)(buf + sizeof (buf));
3131
3132 if (d_32 + 1 <= de_32) {
3133 bzero(d_32, sizeof (*d_32));
3134 d_32->rtaddr.sin6_family = AF_INET6;
3135 d_32->rtaddr.sin6_len = sizeof (d_32->rtaddr);
3136 if (in6_recoverscope(&d_32->rtaddr, &dr->rtaddr,
3137 dr->ifp) != 0)
3138 log(LOG_ERR,
3139 "scope error in "
3140 "default router list (%s)\n",
3141 ip6_sprintf(&dr->rtaddr));
3142 d_32->flags = dr->flags;
3143 d_32->rtlifetime = dr->rtlifetime;
3144 d_32->expire = dr->expire;
3145 d_32->if_index = dr->ifp->if_index;
3146 } else {
3147 panic("buffer too short");
3148 }
3149 error = SYSCTL_OUT(req, buf, sizeof (*d_32));
3150 if (error)
3151 break;
3152 }
9bccf70c 3153 }
91447636 3154 lck_mtx_unlock(nd6_mutex);
b0d623f7 3155 return (error);
9bccf70c
A
3156}
3157
3158static int
b0d623f7 3159nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
9bccf70c 3160{
2d21ac55 3161#pragma unused(oidp, arg1, arg2)
b0d623f7 3162 int error = 0;
9bccf70c 3163 char buf[1024];
9bccf70c 3164 struct nd_prefix *pr;
b0d623f7 3165 int p64 = proc_is64bit(req->p);
9bccf70c
A
3166
3167 if (req->newptr)
b0d623f7 3168 return (EPERM);
9bccf70c 3169
91447636 3170 lck_mtx_lock(nd6_mutex);
b0d623f7
A
3171 if (p64) {
3172 struct in6_prefix_64 *p, *pe;
91447636 3173
b0d623f7
A
3174 for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
3175 u_short advrtrs = 0;
3176 size_t advance;
3177 struct sockaddr_in6 *sin6, *s6;
3178 struct nd_pfxrouter *pfr;
9bccf70c 3179
b0d623f7
A
3180 p = (struct in6_prefix_64 *)buf;
3181 pe = (struct in6_prefix_64 *)(buf + sizeof (buf));
9bccf70c 3182
b0d623f7
A
3183 if (p + 1 <= pe) {
3184 bzero(p, sizeof (*p));
3185 sin6 = (struct sockaddr_in6 *)(p + 1);
9bccf70c 3186
b0d623f7
A
3187 p->prefix = pr->ndpr_prefix;
3188 if (in6_recoverscope(&p->prefix,
3189 &p->prefix.sin6_addr, pr->ndpr_ifp) != 0)
3190 log(LOG_ERR,
3191 "scope error in prefix list (%s)\n",
3192 ip6_sprintf(&p->prefix.sin6_addr));
3193 p->raflags = pr->ndpr_raf;
3194 p->prefixlen = pr->ndpr_plen;
3195 p->vltime = pr->ndpr_vltime;
3196 p->pltime = pr->ndpr_pltime;
3197 p->if_index = pr->ndpr_ifp->if_index;
3198 p->expire = pr->ndpr_expire;
3199 p->refcnt = pr->ndpr_refcnt;
3200 p->flags = pr->ndpr_stateflags;
3201 p->origin = PR_ORIG_RA;
3202 advrtrs = 0;
3203 for (pfr = pr->ndpr_advrtrs.lh_first;
3204 pfr;
3205 pfr = pfr->pfr_next) {
3206 if ((void *)&sin6[advrtrs + 1] >
3207 (void *)pe) {
3208 advrtrs++;
3209 continue;
3210 }
3211 s6 = &sin6[advrtrs];
3212 bzero(s6, sizeof (*s6));
3213 s6->sin6_family = AF_INET6;
3214 s6->sin6_len = sizeof (*sin6);
3215 if (in6_recoverscope(s6,
3216 &pfr->router->rtaddr,
3217 pfr->router->ifp) != 0)
3218 log(LOG_ERR, "scope error in "
3219 "prefix list (%s)\n",
3220 ip6_sprintf(&pfr->router->
3221 rtaddr));
9bccf70c 3222 advrtrs++;
9bccf70c 3223 }
b0d623f7
A
3224 p->advrtrs = advrtrs;
3225 } else {
3226 panic("buffer too short");
9bccf70c 3227 }
b0d623f7
A
3228 advance = sizeof (*p) + sizeof (*sin6) * advrtrs;
3229 error = SYSCTL_OUT(req, buf, advance);
3230 if (error)
3231 break;
3232 }
3233 } else {
3234 struct in6_prefix_32 *p_32, *pe_32;
9bccf70c 3235
b0d623f7
A
3236 for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
3237 u_short advrtrs = 0;
3238 size_t advance;
3239 struct sockaddr_in6 *sin6, *s6;
3240 struct nd_pfxrouter *pfr;
3241
3242 p_32 = (struct in6_prefix_32 *)buf;
3243 pe_32 = (struct in6_prefix_32 *)(buf + sizeof (buf));
3244
3245 if (p_32 + 1 <= pe_32) {
3246 bzero(p_32, sizeof (*p_32));
3247 sin6 = (struct sockaddr_in6 *)(p_32 + 1);
3248
3249 p_32->prefix = pr->ndpr_prefix;
3250 if (in6_recoverscope(&p_32->prefix,
3251 &p_32->prefix.sin6_addr, pr->ndpr_ifp) != 0)
3252 log(LOG_ERR, "scope error in prefix "
3253 "list (%s)\n", ip6_sprintf(&p_32->
3254 prefix.sin6_addr));
3255 p_32->raflags = pr->ndpr_raf;
3256 p_32->prefixlen = pr->ndpr_plen;
3257 p_32->vltime = pr->ndpr_vltime;
3258 p_32->pltime = pr->ndpr_pltime;
3259 p_32->if_index = pr->ndpr_ifp->if_index;
3260 p_32->expire = pr->ndpr_expire;
3261 p_32->refcnt = pr->ndpr_refcnt;
3262 p_32->flags = pr->ndpr_stateflags;
3263 p_32->origin = PR_ORIG_RA;
3264 advrtrs = 0;
3265 for (pfr = pr->ndpr_advrtrs.lh_first;
3266 pfr;
3267 pfr = pfr->pfr_next) {
3268 if ((void *)&sin6[advrtrs + 1] >
3269 (void *)pe_32) {
3270 advrtrs++;
3271 continue;
3272 }
3273 s6 = &sin6[advrtrs];
3274 bzero(s6, sizeof (*s6));
3275 s6->sin6_family = AF_INET6;
3276 s6->sin6_len = sizeof (*sin6);
3277 if (in6_recoverscope(s6,
3278 &pfr->router->rtaddr,
3279 pfr->router->ifp) != 0)
3280 log(LOG_ERR, "scope error in "
3281 "prefix list (%s)\n",
3282 ip6_sprintf(&pfr->router->
3283 rtaddr));
3284 advrtrs++;
3285 }
3286 p_32->advrtrs = advrtrs;
3287 } else {
3288 panic("buffer too short");
3289 }
3290 advance = sizeof (*p_32) + sizeof (*sin6) * advrtrs;
3291 error = SYSCTL_OUT(req, buf, advance);
3292 if (error)
3293 break;
3294 }
9bccf70c 3295 }
91447636 3296 lck_mtx_unlock(nd6_mutex);
b0d623f7 3297 return (error);
9bccf70c 3298}
2d21ac55
A
3299SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
3300 CTLFLAG_RD, 0, 0, nd6_sysctl_drlist, "S,in6_defrouter","");
3301SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
3302 CTLFLAG_RD, 0, 0, nd6_sysctl_prlist, "S,in6_defrouter","");
3303