bsd/netinet6/nd6.c

   1 /*
   2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  31  * All rights reserved.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  * 3. Neither the name of the project nor the names of its contributors
  42  *    may be used to endorse or promote products derived from this software
  43  *    without specific prior written permission.
  44  *
  45  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  55  * SUCH DAMAGE.
  56  */
  57
  58 /*
  59  * XXX
  60  * KAME 970409 note:
  61  * BSD/OS version heavily modifies this code, related to llinfo.
  62  * Since we don't have BSD/OS version of net/route.c in our hand,
  63  * I left the code mostly as it was in 970310.  -- itojun
  64  */
  65
  66 #include <sys/param.h>
  67 #include <sys/systm.h>
  68 #include <sys/malloc.h>
  69 #include <sys/mbuf.h>
  70 #include <sys/socket.h>
  71 #include <sys/sockio.h>
  72 #include <sys/time.h>
  73 #include <sys/kernel.h>
  74 #include <sys/sysctl.h>
  75 #include <sys/errno.h>
  76 #include <sys/syslog.h>
  77 #include <sys/protosw.h>
  78 #include <sys/proc.h>
  79 #include <sys/mcache.h>
  80
  81 #include <dev/random/randomdev.h>
  82
  83 #include <kern/queue.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <net/if.h>
  87 #include <net/if_dl.h>
  88 #include <net/if_types.h>
  89 #include <net/if_llreach.h>
  90 #include <net/route.h>
  91 #include <net/dlil.h>
  92 #include <net/ntstat.h>
  93 #include <net/net_osdep.h>
  94
  95 #include <netinet/in.h>
  96 #include <netinet/in_arp.h>
  97 #include <netinet/if_ether.h>
  98 #include <netinet6/in6_var.h>
  99 #include <netinet/ip6.h>
 100 #include <netinet6/ip6_var.h>
 101 #include <netinet6/nd6.h>
 102 #include <netinet6/scope6_var.h>
 103 #include <netinet/icmp6.h>
 104
 105 #include "loop.h"
 106
 107 #define ND6_SLOWTIMER_INTERVAL          (60 * 60)       /* 1 hour */
 108 #define ND6_RECALC_REACHTM_INTERVAL     (60 * 120)      /* 2 hours */
 109
 110 #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
 111
 112 /* timer values */
 113 int     nd6_prune       = 1;    /* walk list every 1 seconds */
 114 int     nd6_prune_lazy  = 5;    /* lazily walk list every 5 seconds */
 115 int     nd6_delay       = 5;    /* delay first probe time 5 second */
 116 int     nd6_umaxtries   = 3;    /* maximum unicast query */
 117 int     nd6_mmaxtries   = 3;    /* maximum multicast query */
 118 int     nd6_useloopback = 1;    /* use loopback interface for local traffic */
 119 int     nd6_gctimer     = (60 * 60 * 24); /* 1 day: garbage collection timer */
 120
 121 /* preventing too many loops in ND option parsing */
 122 int nd6_maxndopt = 10;  /* max # of ND options allowed */
 123
 124 int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */
 125
 126 #if ND6_DEBUG
 127 int nd6_debug = 1;
 128 #else
 129 int nd6_debug = 0;
 130 #endif
 131
 132 int nd6_optimistic_dad =
 133         (ND6_OPTIMISTIC_DAD_LINKLOCAL|ND6_OPTIMISTIC_DAD_AUTOCONF|
 134         ND6_OPTIMISTIC_DAD_TEMPORARY|ND6_OPTIMISTIC_DAD_DYNAMIC|
 135         ND6_OPTIMISTIC_DAD_SECURED|ND6_OPTIMISTIC_DAD_MANUAL);
 136
 137 /* for debugging? */
 138 static int nd6_inuse, nd6_allocated;
 139
 140 /*
 141  * Synchronization notes:
 142  *
 143  * The global list of ND entries are stored in llinfo_nd6; an entry
 144  * gets inserted into the list when the route is created and gets
 145  * removed from the list when it is deleted; this is done as part
 146  * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in nd6_rtrequest().
 147  *
 148  * Because rnh_lock and rt_lock for the entry are held during those
 149  * operations, the same locks (and thus lock ordering) must be used
 150  * elsewhere to access the relevant data structure fields:
 151  *
 152  * ln_next, ln_prev, ln_rt
 153  *
 154  *      - Routing lock (rnh_lock)
 155  *
 156  * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_flags,
 157  * ln_llreach, ln_lastused
 158  *
 159  *      - Routing entry lock (rt_lock)
 160  *
 161  * Due to the dependency on rt_lock, llinfo_nd6 has the same lifetime
 162  * as the route entry itself.  When a route is deleted (RTM_DELETE),
 163  * it is simply removed from the global list but the memory is not
 164  * freed until the route itself is freed.
 165  */
 166 struct llinfo_nd6 llinfo_nd6 = {
 167         .ln_next = &llinfo_nd6,
 168         .ln_prev = &llinfo_nd6,
 169 };
 170
 171 static lck_grp_attr_t   *nd_if_lock_grp_attr = NULL;
 172 static lck_grp_t        *nd_if_lock_grp = NULL;
 173 static lck_attr_t       *nd_if_lock_attr = NULL;
 174
 175 /* Protected by nd6_mutex */
 176 struct nd_drhead nd_defrouter;
 177 struct nd_prhead nd_prefix = { 0 };
 178
 179 /*
 180  * nd6_timeout() is scheduled on a demand basis.  nd6_timeout_run is used
 181  * to indicate whether or not a timeout has been scheduled.  The rnh_lock
 182  * mutex is used to protect this scheduling; it is a natural choice given
 183  * the work done in the timer callback.  Unfortunately, there are cases
 184  * when nd6_timeout() needs to be scheduled while rnh_lock cannot be easily
 185  * held, due to lock ordering.  In those cases, we utilize a "demand" counter
 186  * nd6_sched_timeout_want which can be atomically incremented without
 187  * having to hold rnh_lock.  On places where we acquire rnh_lock, such as
 188  * nd6_rtrequest(), we check this counter and schedule the timer if it is
 189  * non-zero.  The increment happens on various places when we allocate
 190  * new ND entries, default routers, prefixes and addresses.
 191  */
 192 static int nd6_timeout_run;             /* nd6_timeout is scheduled to run */
 193 static void nd6_timeout(void *);
 194 int nd6_sched_timeout_want;             /* demand count for timer to be sched */
 195 static boolean_t nd6_fast_timer_on = FALSE;
 196
 197 /* Serialization variables for nd6_service(), protected by rnh_lock */
 198 static boolean_t nd6_service_busy;
 199 static void *nd6_service_wc = &nd6_service_busy;
 200 static int nd6_service_waiters = 0;
 201
 202 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
 203 static struct sockaddr_in6 all1_sa;
 204
 205 static int regen_tmpaddr(struct in6_ifaddr *);
 206 extern lck_mtx_t *nd6_mutex;
 207
 208 static struct llinfo_nd6 *nd6_llinfo_alloc(int);
 209 static void nd6_llinfo_free(void *);
 210 static void nd6_llinfo_purge(struct rtentry *);
 211 static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
 212 static void nd6_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
 213 static void nd6_llinfo_refresh(struct rtentry *);
 214 static uint64_t ln_getexpire(struct llinfo_nd6 *);
 215
 216 static void nd6_service(void *);
 217 static void nd6_slowtimo(void *);
 218 static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, struct ifnet *);
 219 static int nd6_siocgdrlst(void *, int);
 220 static int nd6_siocgprlst(void *, int);
 221
 222 static int nd6_sysctl_drlist SYSCTL_HANDLER_ARGS;
 223 static int nd6_sysctl_prlist SYSCTL_HANDLER_ARGS;
 224
 225 /*
 226  * Insertion and removal from llinfo_nd6 must be done with rnh_lock held.
 227  */
 228 #define LN_DEQUEUE(_ln) do {                                            \
 229         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);                 \
 230         RT_LOCK_ASSERT_HELD((_ln)->ln_rt);                              \
 231         (_ln)->ln_next->ln_prev = (_ln)->ln_prev;                       \
 232         (_ln)->ln_prev->ln_next = (_ln)->ln_next;                       \
 233         (_ln)->ln_prev = (_ln)->ln_next = NULL;                         \
 234         (_ln)->ln_flags &= ~ND6_LNF_IN_USE;                             \
 235 } while (0)
 236
 237 #define LN_INSERTHEAD(_ln) do {                                         \
 238         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);                 \
 239         RT_LOCK_ASSERT_HELD((_ln)->ln_rt);                              \
 240         (_ln)->ln_next = llinfo_nd6.ln_next;                            \
 241         llinfo_nd6.ln_next = (_ln);                                     \
 242         (_ln)->ln_prev = &llinfo_nd6;                                   \
 243         (_ln)->ln_next->ln_prev = (_ln);                                \
 244         (_ln)->ln_flags |= ND6_LNF_IN_USE;                              \
 245 } while (0)
 246
 247 static struct zone *llinfo_nd6_zone;
 248 #define LLINFO_ND6_ZONE_MAX     256             /* maximum elements in zone */
 249 #define LLINFO_ND6_ZONE_NAME    "llinfo_nd6"    /* name for zone */
 250
 251 extern int tvtohz(struct timeval *);
 252
 253 static int nd6_init_done;
 254
 255 SYSCTL_DECL(_net_inet6_icmp6);
 256
 257 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 258         CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 259         nd6_sysctl_drlist, "S,in6_defrouter", "");
 260
 261 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 262         CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 263         nd6_sysctl_prlist, "S,in6_defrouter", "");
 264
 265 SYSCTL_DECL(_net_inet6_ip6);
 266
 267 static int ip6_maxchainsent = 0;
 268 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
 269         CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0,
 270         "use dlil_output_list");
 271
 272 void
 273 nd6_init(void)
 274 {
 275         int i;
 276
 277         VERIFY(!nd6_init_done);
 278
 279         all1_sa.sin6_family = AF_INET6;
 280         all1_sa.sin6_len = sizeof (struct sockaddr_in6);
 281         for (i = 0; i < sizeof (all1_sa.sin6_addr); i++)
 282                 all1_sa.sin6_addr.s6_addr[i] = 0xff;
 283
 284         /* initialization of the default router list */
 285         TAILQ_INIT(&nd_defrouter);
 286
 287         nd_if_lock_grp_attr = lck_grp_attr_alloc_init();
 288         nd_if_lock_grp = lck_grp_alloc_init("nd_if_lock", nd_if_lock_grp_attr);
 289         nd_if_lock_attr = lck_attr_alloc_init();
 290
 291         llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6),
 292             LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0,
 293             LLINFO_ND6_ZONE_NAME);
 294         if (llinfo_nd6_zone == NULL)
 295                 panic("%s: failed allocating llinfo_nd6_zone", __func__);
 296
 297         zone_change(llinfo_nd6_zone, Z_EXPAND, TRUE);
 298         zone_change(llinfo_nd6_zone, Z_CALLERACCT, FALSE);
 299
 300         nd6_nbr_init();
 301         nd6_rtr_init();
 302         nd6_prproxy_init();
 303
 304         nd6_init_done = 1;
 305
 306         /* start timer */
 307         timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
 308 }
 309
 310 static struct llinfo_nd6 *
 311 nd6_llinfo_alloc(int how)
 312 {
 313         struct llinfo_nd6 *ln;
 314
 315         ln = (how == M_WAITOK) ? zalloc(llinfo_nd6_zone) :
 316             zalloc_noblock(llinfo_nd6_zone);
 317         if (ln != NULL)
 318                 bzero(ln, sizeof (*ln));
 319
 320         return (ln);
 321 }
 322
 323 static void
 324 nd6_llinfo_free(void *arg)
 325 {
 326         struct llinfo_nd6 *ln = arg;
 327
 328         if (ln->ln_next != NULL || ln->ln_prev != NULL) {
 329                 panic("%s: trying to free %p when it is in use", __func__, ln);
 330                 /* NOTREACHED */
 331         }
 332
 333         /* Just in case there's anything there, free it */
 334         if (ln->ln_hold != NULL) {
 335                 m_freem_list(ln->ln_hold);
 336                 ln->ln_hold = NULL;
 337         }
 338
 339         /* Purge any link-layer info caching */
 340         VERIFY(ln->ln_rt->rt_llinfo == ln);
 341         if (ln->ln_rt->rt_llinfo_purge != NULL)
 342                 ln->ln_rt->rt_llinfo_purge(ln->ln_rt);
 343
 344         zfree(llinfo_nd6_zone, ln);
 345 }
 346
 347 static void
 348 nd6_llinfo_purge(struct rtentry *rt)
 349 {
 350         struct llinfo_nd6 *ln = rt->rt_llinfo;
 351
 352         RT_LOCK_ASSERT_HELD(rt);
 353         VERIFY(rt->rt_llinfo_purge == nd6_llinfo_purge && ln != NULL);
 354
 355         if (ln->ln_llreach != NULL) {
 356                 RT_CONVERT_LOCK(rt);
 357                 ifnet_llreach_free(ln->ln_llreach);
 358                 ln->ln_llreach = NULL;
 359         }
 360         ln->ln_lastused = 0;
 361 }
 362
 363 static void
 364 nd6_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
 365 {
 366         struct llinfo_nd6 *ln = rt->rt_llinfo;
 367         struct if_llreach *lr = ln->ln_llreach;
 368
 369         if (lr == NULL) {
 370                 bzero(ri, sizeof (*ri));
 371                 ri->ri_rssi = IFNET_RSSI_UNKNOWN;
 372                 ri->ri_lqm = IFNET_LQM_THRESH_OFF;
 373                 ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
 374         } else {
 375                 IFLR_LOCK(lr);
 376                 /* Export to rt_reach_info structure */
 377                 ifnet_lr2ri(lr, ri);
 378                 /* Export ND6 send expiration (calendar) time */
 379                 ri->ri_snd_expire =
 380                     ifnet_llreach_up2calexp(lr, ln->ln_lastused);
 381                 IFLR_UNLOCK(lr);
 382         }
 383 }
 384
 385 static void
 386 nd6_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
 387 {
 388         struct llinfo_nd6 *ln = rt->rt_llinfo;
 389         struct if_llreach *lr = ln->ln_llreach;
 390
 391         if (lr == NULL) {
 392                 bzero(iflri, sizeof (*iflri));
 393                 iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
 394                 iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
 395                 iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
 396         } else {
 397                 IFLR_LOCK(lr);
 398                 /* Export to ifnet_llreach_info structure */
 399                 ifnet_lr2iflri(lr, iflri);
 400                 /* Export ND6 send expiration (uptime) time */
 401                 iflri->iflri_snd_expire =
 402                     ifnet_llreach_up2upexp(lr, ln->ln_lastused);
 403                 IFLR_UNLOCK(lr);
 404         }
 405 }
 406
 407 static void
 408 nd6_llinfo_refresh(struct rtentry *rt)
 409 {
 410         struct llinfo_nd6 *ln = rt->rt_llinfo;
 411         uint64_t timenow = net_uptime();
 412         /*
 413          * Can't refresh permanent, static or entries that are
 414          * not direct host entries
 415          */
 416         if (!ln || ln->ln_expire == 0 ||
 417             (rt->rt_flags & RTF_STATIC) ||
 418             !(rt->rt_flags & RTF_LLINFO)) {
 419                 return;
 420         }
 421
 422         if ((ln->ln_state > ND6_LLINFO_INCOMPLETE) &&
 423             (ln->ln_state < ND6_LLINFO_PROBE)) {
 424                 if (ln->ln_expire > timenow) {
 425                         ln->ln_expire = timenow;
 426                         ln->ln_state = ND6_LLINFO_PROBE;
 427                 }
 428         }
 429         return;
 430 }
 431
 432 void
 433 ln_setexpire(struct llinfo_nd6 *ln, uint64_t expiry)
 434 {
 435         ln->ln_expire = expiry;
 436 }
 437
 438 static uint64_t
 439 ln_getexpire(struct llinfo_nd6 *ln)
 440 {
 441         struct timeval caltime;
 442         uint64_t expiry;
 443
 444         if (ln->ln_expire != 0) {
 445                 struct rtentry *rt = ln->ln_rt;
 446
 447                 VERIFY(rt != NULL);
 448                 /* account for system time change */
 449                 getmicrotime(&caltime);
 450
 451                 rt->base_calendartime +=
 452                     NET_CALCULATE_CLOCKSKEW(caltime,
 453                     rt->base_calendartime, net_uptime(), rt->base_uptime);
 454
 455                 expiry = rt->base_calendartime +
 456                     ln->ln_expire - rt->base_uptime;
 457         } else {
 458                 expiry = 0;
 459         }
 460         return (expiry);
 461 }
 462
 463 void
 464 nd6_ifreset(struct ifnet *ifp)
 465 {
 466         struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 467         VERIFY(NULL != ndi);
 468         VERIFY(ndi->initialized);
 469
 470         lck_mtx_assert(&ndi->lock, LCK_MTX_ASSERT_OWNED);
 471         ndi->linkmtu = ifp->if_mtu;
 472         ndi->chlim = IPV6_DEFHLIM;
 473         ndi->basereachable = REACHABLE_TIME;
 474         ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
 475         ndi->retrans = RETRANS_TIMER;
 476 }
 477
 478 void
 479 nd6_ifattach(struct ifnet *ifp)
 480 {
 481         struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 482
 483         VERIFY(NULL != ndi);
 484         if (!ndi->initialized) {
 485                 lck_mtx_init(&ndi->lock, nd_if_lock_grp, nd_if_lock_attr);
 486                 ndi->flags = ND6_IFF_PERFORMNUD;
 487                 ndi->initialized = TRUE;
 488         }
 489
 490         lck_mtx_lock(&ndi->lock);
 491
 492         if (!(ifp->if_flags & IFF_MULTICAST)) {
 493                 ndi->flags |= ND6_IFF_IFDISABLED;
 494         }
 495
 496         nd6_ifreset(ifp);
 497         lck_mtx_unlock(&ndi->lock);
 498         nd6_setmtu(ifp);
 499         return;
 500 }
 501
 502 #if 0
 503 /*
 504  * XXX Look more into this. Especially since we recycle ifnets and do delayed
 505  * cleanup
 506  */
 507 void
 508 nd6_ifdetach(struct nd_ifinfo *nd)
 509 {
 510         /* XXX destroy nd's lock? */
 511         FREE(nd, M_IP6NDP);
 512 }
 513 #endif
 514
 515 void
 516 nd6_setmtu(struct ifnet *ifp)
 517 {
 518         struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 519         u_int32_t oldmaxmtu, maxmtu;
 520
 521         if ((NULL == ndi) || (FALSE == ndi->initialized)) {
 522                 return;
 523         }
 524
 525         lck_mtx_lock(&ndi->lock);
 526         oldmaxmtu = ndi->maxmtu;
 527
 528         /*
 529          * The ND level maxmtu is somewhat redundant to the interface MTU
 530          * and is an implementation artifact of KAME.  Instead of hard-
 531          * limiting the maxmtu based on the interface type here, we simply
 532          * take the if_mtu value since SIOCSIFMTU would have taken care of
 533          * the sanity checks related to the maximum MTU allowed for the
 534          * interface (a value that is known only by the interface layer),
 535          * by sending the request down via ifnet_ioctl().  The use of the
 536          * ND level maxmtu and linkmtu are done via IN6_LINKMTU() which
 537          * does further checking against if_mtu.
 538          */
 539         maxmtu = ndi->maxmtu = ifp->if_mtu;
 540
 541         /*
 542          * Decreasing the interface MTU under IPV6 minimum MTU may cause
 543          * undesirable situation.  We thus notify the operator of the change
 544          * explicitly.  The check for oldmaxmtu is necessary to restrict the
 545          * log to the case of changing the MTU, not initializing it.
 546          */
 547         if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
 548                 log(LOG_NOTICE, "nd6_setmtu: "
 549                     "new link MTU on %s (%u) is too small for IPv6\n",
 550                     if_name(ifp), (uint32_t)ndi->maxmtu);
 551         }
 552         ndi->linkmtu = ifp->if_mtu;
 553         lck_mtx_unlock(&ndi->lock);
 554
 555         /* also adjust in6_maxmtu if necessary. */
 556         if (maxmtu > in6_maxmtu) {
 557                 in6_setmaxmtu();
 558         }
 559 }
 560
 561 void
 562 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
 563 {
 564         bzero(ndopts, sizeof (*ndopts));
 565         ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
 566         ndopts->nd_opts_last =
 567             (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
 568
 569         if (icmp6len == 0) {
 570                 ndopts->nd_opts_done = 1;
 571                 ndopts->nd_opts_search = NULL;
 572         }
 573 }
 574
 575 /*
 576  * Take one ND option.
 577  */
 578 struct nd_opt_hdr *
 579 nd6_option(union nd_opts *ndopts)
 580 {
 581         struct nd_opt_hdr *nd_opt;
 582         int olen;
 583
 584         if (!ndopts)
 585                 panic("ndopts == NULL in nd6_option\n");
 586         if (!ndopts->nd_opts_last)
 587                 panic("uninitialized ndopts in nd6_option\n");
 588         if (!ndopts->nd_opts_search)
 589                 return (NULL);
 590         if (ndopts->nd_opts_done)
 591                 return (NULL);
 592
 593         nd_opt = ndopts->nd_opts_search;
 594
 595         /* make sure nd_opt_len is inside the buffer */
 596         if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
 597                 bzero(ndopts, sizeof (*ndopts));
 598                 return (NULL);
 599         }
 600
 601         olen = nd_opt->nd_opt_len << 3;
 602         if (olen == 0) {
 603                 /*
 604                  * Message validation requires that all included
 605                  * options have a length that is greater than zero.
 606                  */
 607                 bzero(ndopts, sizeof (*ndopts));
 608                 return (NULL);
 609         }
 610
 611         ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
 612         if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
 613                 /* option overruns the end of buffer, invalid */
 614                 bzero(ndopts, sizeof (*ndopts));
 615                 return (NULL);
 616         } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
 617                 /* reached the end of options chain */
 618                 ndopts->nd_opts_done = 1;
 619                 ndopts->nd_opts_search = NULL;
 620         }
 621         return (nd_opt);
 622 }
 623
 624 /*
 625  * Parse multiple ND options.
 626  * This function is much easier to use, for ND routines that do not need
 627  * multiple options of the same type.
 628  */
 629 int
 630 nd6_options(union nd_opts *ndopts)
 631 {
 632         struct nd_opt_hdr *nd_opt;
 633         int i = 0;
 634
 635         if (ndopts == NULL)
 636                 panic("ndopts == NULL in nd6_options");
 637         if (ndopts->nd_opts_last == NULL)
 638                 panic("uninitialized ndopts in nd6_options");
 639         if (ndopts->nd_opts_search == NULL)
 640                 return (0);
 641
 642         while (1) {
 643                 nd_opt = nd6_option(ndopts);
 644                 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
 645                         /*
 646                          * Message validation requires that all included
 647                          * options have a length that is greater than zero.
 648                          */
 649                         icmp6stat.icp6s_nd_badopt++;
 650                         bzero(ndopts, sizeof (*ndopts));
 651                         return (-1);
 652                 }
 653
 654                 if (nd_opt == NULL)
 655                         goto skip1;
 656
 657                 switch (nd_opt->nd_opt_type) {
 658                 case ND_OPT_SOURCE_LINKADDR:
 659                 case ND_OPT_TARGET_LINKADDR:
 660                 case ND_OPT_MTU:
 661                 case ND_OPT_REDIRECTED_HEADER:
 662                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 663                                 nd6log((LOG_INFO,
 664                                     "duplicated ND6 option found (type=%d)\n",
 665                                     nd_opt->nd_opt_type));
 666                                 /* XXX bark? */
 667                         } else {
 668                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] =
 669                                     nd_opt;
 670                         }
 671                         break;
 672                 case ND_OPT_PREFIX_INFORMATION:
 673                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
 674                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] =
 675                                     nd_opt;
 676                         }
 677                         ndopts->nd_opts_pi_end =
 678                             (struct nd_opt_prefix_info *)nd_opt;
 679                         break;
 680                 case ND_OPT_RDNSS:
 681                         /* ignore */
 682                         break;
 683                 default:
 684                         /*
 685                          * Unknown options must be silently ignored,
 686                          * to accomodate future extension to the protocol.
 687                          */
 688                         nd6log((LOG_DEBUG,
 689                             "nd6_options: unsupported option %d - "
 690                             "option ignored\n", nd_opt->nd_opt_type));
 691                 }
 692
 693 skip1:
 694                 i++;
 695                 if (i > nd6_maxndopt) {
 696                         icmp6stat.icp6s_nd_toomanyopt++;
 697                         nd6log((LOG_INFO, "too many loop in nd opt\n"));
 698                         break;
 699                 }
 700
 701                 if (ndopts->nd_opts_done)
 702                         break;
 703         }
 704
 705         return (0);
 706 }
 707
 708 struct nd6svc_arg {
 709         int draining;
 710         uint32_t killed;
 711         uint32_t aging_lazy;
 712         uint32_t aging;
 713         uint32_t sticky;
 714         uint32_t found;
 715 };
 716
 717 /*
 718  * ND6 service routine to expire default route list and prefix list
 719  */
 720 static void
 721 nd6_service(void *arg)
 722 {
 723         struct nd6svc_arg *ap = arg;
 724         struct llinfo_nd6 *ln;
 725         struct nd_defrouter *dr;
 726         struct nd_prefix *pr;
 727         struct ifnet *ifp = NULL;
 728         struct in6_ifaddr *ia6, *nia6;
 729         uint64_t timenow;
 730         bool send_nc_failure_kev = false;
 731
 732         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 733         /*
 734          * Since we may drop rnh_lock and nd6_mutex below, we want
 735          * to run this entire operation single threaded.
 736          */
 737         while (nd6_service_busy) {
 738                 nd6log2((LOG_DEBUG, "%s: %s is blocked by %d waiters\n",
 739                     __func__, ap->draining ? "drainer" : "timer",
 740                     nd6_service_waiters));
 741                 nd6_service_waiters++;
 742                 (void) msleep(nd6_service_wc, rnh_lock, (PZERO-1),
 743                     __func__, NULL);
 744                 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 745         }
 746
 747         /* We are busy now; tell everyone else to go away */
 748         nd6_service_busy = TRUE;
 749
 750         net_update_uptime();
 751         timenow = net_uptime();
 752 again:
 753         /*
 754          * send_nc_failure_kev gets set when default router's IPv6 address
 755          * can't be resolved.
 756          * That can happen either:
 757          * 1. When the entry has resolved once but can't be
 758          * resolved later and the neighbor cache entry for gateway is deleted
 759          * after max probe attempts.
 760          *
 761          * 2. When the entry is in ND6_LLINFO_INCOMPLETE but can not be resolved
 762          * after max neighbor address resolution attempts.
 763          *
 764          * Both set send_nc_failure_kev to true. ifp is also set to the previous
 765          * neighbor cache entry's route's ifp.
 766          * Once we are done sending the notification, set send_nc_failure_kev
 767          * to false to stop sending false notifications for non default router
 768          * neighbors.
 769          *
 770          * We may to send more information like Gateway's IP that could not be
 771          * resolved, however right now we do not install more than one default
 772          * route per interface in the routing table.
 773          */
 774         if (send_nc_failure_kev && ifp->if_addrlen == IF_LLREACH_MAXLEN) {
 775                 struct kev_msg ev_msg;
 776                 struct kev_nd6_ndfailure nd6_ndfailure;
 777                 bzero(&ev_msg, sizeof(ev_msg));
 778                 bzero(&nd6_ndfailure, sizeof(nd6_ndfailure));
 779                 ev_msg.vendor_code      = KEV_VENDOR_APPLE;
 780                 ev_msg.kev_class        = KEV_NETWORK_CLASS;
 781                 ev_msg.kev_subclass     = KEV_ND6_SUBCLASS;
 782                 ev_msg.event_code       = KEV_ND6_NDFAILURE;
 783
 784                 nd6_ndfailure.link_data.if_family = ifp->if_family;
 785                 nd6_ndfailure.link_data.if_unit = ifp->if_unit;
 786                 strlcpy(nd6_ndfailure.link_data.if_name,
 787                     ifp->if_name,
 788                     sizeof(nd6_ndfailure.link_data.if_name));
 789                 ev_msg.dv[0].data_ptr = &nd6_ndfailure;
 790                 ev_msg.dv[0].data_length =
 791                         sizeof(nd6_ndfailure);
 792                 kev_post_msg(&ev_msg);
 793         }
 794
 795         send_nc_failure_kev = false;
 796         ifp = NULL;
 797         /*
 798          * The global list llinfo_nd6 is modified by nd6_request() and is
 799          * therefore protected by rnh_lock.  For obvious reasons, we cannot
 800          * hold rnh_lock across calls that might lead to code paths which
 801          * attempt to acquire rnh_lock, else we deadlock.  Hence for such
 802          * cases we drop rt_lock and rnh_lock, make the calls, and repeat the
 803          * loop.  To ensure that we don't process the same entry more than
 804          * once in a single timeout, we mark the "already-seen" entries with
 805          * ND6_LNF_TIMER_SKIP flag.  At the end of the loop, we do a second
 806          * pass thru the entries and clear the flag so they can be processed
 807          * during the next timeout.
 808          */
 809         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 810
 811         ln = llinfo_nd6.ln_next;
 812         while (ln != NULL && ln != &llinfo_nd6) {
 813                 struct rtentry *rt;
 814                 struct sockaddr_in6 *dst;
 815                 struct llinfo_nd6 *next;
 816                 u_int32_t retrans, flags;
 817                 struct nd_ifinfo *ndi = NULL;
 818
 819                 /* ln_next/prev/rt is protected by rnh_lock */
 820                 next = ln->ln_next;
 821                 rt = ln->ln_rt;
 822                 RT_LOCK(rt);
 823
 824                 /* We've seen this already; skip it */
 825                 if (ln->ln_flags & ND6_LNF_TIMER_SKIP) {
 826                         RT_UNLOCK(rt);
 827                         ln = next;
 828                         continue;
 829                 }
 830                 ap->found++;
 831
 832                 /* rt->rt_ifp should never be NULL */
 833                 if ((ifp = rt->rt_ifp) == NULL) {
 834                         panic("%s: ln(%p) rt(%p) rt_ifp == NULL", __func__,
 835                             ln, rt);
 836                         /* NOTREACHED */
 837                 }
 838
 839                 /* rt_llinfo must always be equal to ln */
 840                 if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) {
 841                         panic("%s: rt_llinfo(%p) is not equal to ln(%p)",
 842                             __func__, rt->rt_llinfo, ln);
 843                         /* NOTREACHED */
 844                 }
 845
 846                 /* rt_key should never be NULL */
 847                 dst = SIN6(rt_key(rt));
 848                 if (dst == NULL) {
 849                         panic("%s: rt(%p) key is NULL ln(%p)", __func__,
 850                             rt, ln);
 851                         /* NOTREACHED */
 852                 }
 853
 854                 /* Set the flag in case we jump to "again" */
 855                 ln->ln_flags |= ND6_LNF_TIMER_SKIP;
 856
 857                 if (ln->ln_expire == 0 || (rt->rt_flags & RTF_STATIC)) {
 858                         ap->sticky++;
 859                 } else if (ap->draining && (rt->rt_refcnt == 0)) {
 860                         /*
 861                          * If we are draining, immediately purge non-static
 862                          * entries without oustanding route refcnt.
 863                          */
 864                         if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
 865                                 ln->ln_state = ND6_LLINFO_STALE;
 866                         else
 867                                 ln->ln_state = ND6_LLINFO_PURGE;
 868                         ln_setexpire(ln, timenow);
 869                 }
 870
 871                 /*
 872                  * If the entry has not expired, skip it.  Take note on the
 873                  * state, as entries that are in the STALE state are simply
 874                  * waiting to be garbage collected, in which case we can
 875                  * relax the callout scheduling (use nd6_prune_lazy).
 876                  */
 877                 if (ln->ln_expire > timenow) {
 878                         switch (ln->ln_state) {
 879                         case ND6_LLINFO_STALE:
 880                                 ap->aging_lazy++;
 881                                 break;
 882                         default:
 883                                 ap->aging++;
 884                                 break;
 885                         }
 886                         RT_UNLOCK(rt);
 887                         ln = next;
 888                         continue;
 889                 }
 890
 891                 ndi = ND_IFINFO(ifp);
 892                 VERIFY(ndi->initialized);
 893                 retrans = ndi->retrans;
 894                 flags = ndi->flags;
 895
 896                 RT_LOCK_ASSERT_HELD(rt);
 897
 898                 switch (ln->ln_state) {
 899                 case ND6_LLINFO_INCOMPLETE:
 900                         if (ln->ln_asked < nd6_mmaxtries) {
 901                                 struct ifnet *exclifp = ln->ln_exclifp;
 902                                 ln->ln_asked++;
 903                                 ln_setexpire(ln, timenow + retrans / 1000);
 904                                 RT_ADDREF_LOCKED(rt);
 905                                 RT_UNLOCK(rt);
 906                                 lck_mtx_unlock(rnh_lock);
 907                                 if (ip6_forwarding) {
 908                                         nd6_prproxy_ns_output(ifp, exclifp,
 909                                             NULL, &dst->sin6_addr, ln);
 910                                 } else {
 911                                         nd6_ns_output(ifp, NULL,
 912                                             &dst->sin6_addr, ln, 0);
 913                                 }
 914                                 RT_REMREF(rt);
 915                                 ap->aging++;
 916                                 lck_mtx_lock(rnh_lock);
 917                         } else {
 918                                 struct mbuf *m = ln->ln_hold;
 919                                 ln->ln_hold = NULL;
 920                                 send_nc_failure_kev = (rt->rt_flags & RTF_ROUTER) ? true : false;
 921                                 if (m != NULL) {
 922                                         RT_ADDREF_LOCKED(rt);
 923                                         RT_UNLOCK(rt);
 924                                         lck_mtx_unlock(rnh_lock);
 925
 926                                         struct mbuf *mnext;
 927                                         while (m) {
 928                                                 mnext = m->m_nextpkt;
 929                                                 m->m_nextpkt = NULL;
 930                                                 m->m_pkthdr.rcvif = ifp;
 931                                                 icmp6_error_flag(m, ICMP6_DST_UNREACH,
 932                                                     ICMP6_DST_UNREACH_ADDR, 0, 0);
 933                                                 m = mnext;
 934                                         }
 935                                 } else {
 936                                         RT_ADDREF_LOCKED(rt);
 937                                         RT_UNLOCK(rt);
 938                                         lck_mtx_unlock(rnh_lock);
 939                                 }
 940                                 nd6_free(rt);
 941                                 ap->killed++;
 942                                 lck_mtx_lock(rnh_lock);
 943                                 rtfree_locked(rt);
 944                         }
 945                         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 946                         goto again;
 947
 948                 case ND6_LLINFO_REACHABLE:
 949                         if (ln->ln_expire != 0) {
 950                                 ln->ln_state = ND6_LLINFO_STALE;
 951                                 ln_setexpire(ln, timenow + nd6_gctimer);
 952                                 ap->aging_lazy++;
 953                         }
 954                         RT_UNLOCK(rt);
 955                         break;
 956
 957                 case ND6_LLINFO_STALE:
 958                 case ND6_LLINFO_PURGE:
 959                         /* Garbage Collection(RFC 4861 5.3) */
 960                         if (ln->ln_expire != 0) {
 961                                 RT_ADDREF_LOCKED(rt);
 962                                 RT_UNLOCK(rt);
 963                                 lck_mtx_unlock(rnh_lock);
 964                                 nd6_free(rt);
 965                                 ap->killed++;
 966                                 lck_mtx_lock(rnh_lock);
 967                                 rtfree_locked(rt);
 968                                 goto again;
 969                         } else {
 970                                 RT_UNLOCK(rt);
 971                         }
 972                         break;
 973
 974                 case ND6_LLINFO_DELAY:
 975                         if ((flags & ND6_IFF_PERFORMNUD) != 0) {
 976                                 /* We need NUD */
 977                                 ln->ln_asked = 1;
 978                                 ln->ln_state = ND6_LLINFO_PROBE;
 979                                 ln_setexpire(ln, timenow + retrans / 1000);
 980                                 RT_ADDREF_LOCKED(rt);
 981                                 RT_UNLOCK(rt);
 982                                 lck_mtx_unlock(rnh_lock);
 983                                 nd6_ns_output(ifp, &dst->sin6_addr,
 984                                     &dst->sin6_addr, ln, 0);
 985                                 RT_REMREF(rt);
 986                                 ap->aging++;
 987                                 lck_mtx_lock(rnh_lock);
 988                                 goto again;
 989                         }
 990                         ln->ln_state = ND6_LLINFO_STALE; /* XXX */
 991                         ln_setexpire(ln, timenow + nd6_gctimer);
 992                         RT_UNLOCK(rt);
 993                         ap->aging_lazy++;
 994                         break;
 995
 996                 case ND6_LLINFO_PROBE:
 997                         if (ln->ln_asked < nd6_umaxtries) {
 998                                 ln->ln_asked++;
 999                                 ln_setexpire(ln, timenow + retrans / 1000);
1000                                 RT_ADDREF_LOCKED(rt);
1001                                 RT_UNLOCK(rt);
1002                                 lck_mtx_unlock(rnh_lock);
1003                                 nd6_ns_output(ifp, &dst->sin6_addr,
1004                                     &dst->sin6_addr, ln, 0);
1005                                 RT_REMREF(rt);
1006                                 ap->aging++;
1007                                 lck_mtx_lock(rnh_lock);
1008                         } else {
1009                                 send_nc_failure_kev = (rt->rt_flags & RTF_ROUTER) ? true : false;
1010                                 RT_ADDREF_LOCKED(rt);
1011                                 RT_UNLOCK(rt);
1012                                 lck_mtx_unlock(rnh_lock);
1013                                 nd6_free(rt);
1014                                 ap->killed++;
1015                                 lck_mtx_lock(rnh_lock);
1016                                 rtfree_locked(rt);
1017                         }
1018                         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1019                         goto again;
1020
1021                 default:
1022                         RT_UNLOCK(rt);
1023                         break;
1024                 }
1025                 ln = next;
1026         }
1027         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1028
1029         /* Now clear the flag from all entries */
1030         ln = llinfo_nd6.ln_next;
1031         while (ln != NULL && ln != &llinfo_nd6) {
1032                 struct rtentry *rt = ln->ln_rt;
1033                 struct llinfo_nd6 *next = ln->ln_next;
1034
1035                 RT_LOCK_SPIN(rt);
1036                 if (ln->ln_flags & ND6_LNF_TIMER_SKIP)
1037                         ln->ln_flags &= ~ND6_LNF_TIMER_SKIP;
1038                 RT_UNLOCK(rt);
1039                 ln = next;
1040         }
1041         lck_mtx_unlock(rnh_lock);
1042
1043         /* expire default router list */
1044         lck_mtx_lock(nd6_mutex);
1045         dr = TAILQ_FIRST(&nd_defrouter);
1046         while (dr) {
1047                 ap->found++;
1048                 if (dr->expire != 0 && dr->expire < timenow) {
1049                         struct nd_defrouter *t;
1050                         t = TAILQ_NEXT(dr, dr_entry);
1051                         defrtrlist_del(dr);
1052                         dr = t;
1053                         ap->killed++;
1054                 } else {
1055                         if (dr->expire == 0 || (dr->stateflags & NDDRF_STATIC))
1056                                 ap->sticky++;
1057                         else
1058                                 ap->aging_lazy++;
1059                         dr = TAILQ_NEXT(dr, dr_entry);
1060                 }
1061         }
1062         lck_mtx_unlock(nd6_mutex);
1063
1064         /*
1065          * expire interface addresses.
1066          * in the past the loop was inside prefix expiry processing.
1067          * However, from a stricter speci-confrmance standpoint, we should
1068          * rather separate address lifetimes and prefix lifetimes.
1069          */
1070 addrloop:
1071         lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
1072         for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) {
1073                 ap->found++;
1074                 nia6 = ia6->ia_next;
1075                 IFA_LOCK(&ia6->ia_ifa);
1076                 /*
1077                  * Extra reference for ourselves; it's no-op if
1078                  * we don't have to regenerate temporary address,
1079                  * otherwise it protects the address from going
1080                  * away since we drop in6_ifaddr_rwlock below.
1081                  */
1082                 IFA_ADDREF_LOCKED(&ia6->ia_ifa);
1083                 /* check address lifetime */
1084                 if (IFA6_IS_INVALID(ia6, timenow)) {
1085                         /*
1086                          * If the expiring address is temporary, try
1087                          * regenerating a new one.  This would be useful when
1088                          * we suspended a laptop PC, then turned it on after a
1089                          * period that could invalidate all temporary
1090                          * addresses.  Although we may have to restart the
1091                          * loop (see below), it must be after purging the
1092                          * address.  Otherwise, we'd see an infinite loop of
1093                          * regeneration.
1094                          */
1095                         if (ip6_use_tempaddr &&
1096                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
1097                                 /*
1098                                  * NOTE: We have to drop the lock here
1099                                  * because regen_tmpaddr() eventually calls
1100                                  * in6_update_ifa(), which must take the lock
1101                                  * and would otherwise cause a hang.  This is
1102                                  * safe because the goto addrloop leads to a
1103                                  * re-evaluation of the in6_ifaddrs list
1104                                  */
1105                                 IFA_UNLOCK(&ia6->ia_ifa);
1106                                 lck_rw_done(&in6_ifaddr_rwlock);
1107                                 (void) regen_tmpaddr(ia6);
1108                         } else {
1109                                 IFA_UNLOCK(&ia6->ia_ifa);
1110                                 lck_rw_done(&in6_ifaddr_rwlock);
1111                         }
1112
1113                         /*
1114                          * Purging the address would have caused
1115                          * in6_ifaddr_rwlock to be dropped and reacquired;
1116                          * therefore search again from the beginning
1117                          * of in6_ifaddrs list.
1118                          */
1119                         in6_purgeaddr(&ia6->ia_ifa);
1120                         ap->killed++;
1121
1122                         /* Release extra reference taken above */
1123                         IFA_REMREF(&ia6->ia_ifa);
1124                         goto addrloop;
1125                 }
1126                 /*
1127                  * The lazy timer runs every nd6_prune_lazy seconds with at
1128                  * most "2 * nd6_prune_lazy - 1" leeway. We consider the worst
1129                  * case here and make sure we schedule the regular timer if an
1130                  * interface address is about to expire.
1131                  */
1132                 if (IFA6_IS_INVALID(ia6, timenow + 3 * nd6_prune_lazy))
1133                         ap->aging++;
1134                 else
1135                         ap->aging_lazy++;
1136                 IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa);
1137                 if (IFA6_IS_DEPRECATED(ia6, timenow)) {
1138                         int oldflags = ia6->ia6_flags;
1139
1140                         ia6->ia6_flags |= IN6_IFF_DEPRECATED;
1141
1142                         /*
1143                          * If a temporary address has just become deprecated,
1144                          * regenerate a new one if possible.
1145                          */
1146                         if (ip6_use_tempaddr &&
1147                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1148                             (oldflags & IN6_IFF_DEPRECATED) == 0) {
1149
1150                                 /* see NOTE above */
1151                                 IFA_UNLOCK(&ia6->ia_ifa);
1152                                 lck_rw_done(&in6_ifaddr_rwlock);
1153                                 if (regen_tmpaddr(ia6) == 0) {
1154                                         /*
1155                                          * A new temporary address is
1156                                          * generated.
1157                                          * XXX: this means the address chain
1158                                          * has changed while we are still in
1159                                          * the loop.  Although the change
1160                                          * would not cause disaster (because
1161                                          * it's not a deletion, but an
1162                                          * addition,) we'd rather restart the
1163                                          * loop just for safety.  Or does this
1164                                          * significantly reduce performance??
1165                                          */
1166                                         /* Release extra reference */
1167                                         IFA_REMREF(&ia6->ia_ifa);
1168                                         goto addrloop;
1169                                 }
1170                                 lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
1171                         } else {
1172                                 IFA_UNLOCK(&ia6->ia_ifa);
1173                         }
1174                 } else {
1175                         /*
1176                          * A new RA might have made a deprecated address
1177                          * preferred.
1178                          */
1179                         ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
1180                         IFA_UNLOCK(&ia6->ia_ifa);
1181                 }
1182                 lck_rw_assert(&in6_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
1183                 /* Release extra reference taken above */
1184                 IFA_REMREF(&ia6->ia_ifa);
1185         }
1186         lck_rw_done(&in6_ifaddr_rwlock);
1187
1188         lck_mtx_lock(nd6_mutex);
1189         /* expire prefix list */
1190         pr = nd_prefix.lh_first;
1191         while (pr != NULL) {
1192                 ap->found++;
1193                 /*
1194                  * check prefix lifetime.
1195                  * since pltime is just for autoconf, pltime processing for
1196                  * prefix is not necessary.
1197                  */
1198                 NDPR_LOCK(pr);
1199                 if (pr->ndpr_stateflags & NDPRF_PROCESSED_SERVICE ||
1200                     pr->ndpr_stateflags & NDPRF_DEFUNCT) {
1201                         pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE;
1202                         NDPR_UNLOCK(pr);
1203                         pr = pr->ndpr_next;
1204                         continue;
1205                 }
1206                 if (pr->ndpr_expire != 0 && pr->ndpr_expire < timenow) {
1207                         /*
1208                          * address expiration and prefix expiration are
1209                          * separate.  NEVER perform in6_purgeaddr here.
1210                          */
1211                         pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE;
1212                         NDPR_ADDREF_LOCKED(pr);
1213                         prelist_remove(pr);
1214                         NDPR_UNLOCK(pr);
1215                         NDPR_REMREF(pr);
1216                         pfxlist_onlink_check();
1217                         pr = nd_prefix.lh_first;
1218                         ap->killed++;
1219                 } else {
1220                         if (pr->ndpr_expire == 0 ||
1221                             (pr->ndpr_stateflags & NDPRF_STATIC))
1222                                 ap->sticky++;
1223                         else
1224                                 ap->aging_lazy++;
1225                         pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE;
1226                         NDPR_UNLOCK(pr);
1227                         pr = pr->ndpr_next;
1228                 }
1229         }
1230         LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
1231                 NDPR_LOCK(pr);
1232                 pr->ndpr_stateflags &= ~NDPRF_PROCESSED_SERVICE;
1233                 NDPR_UNLOCK(pr);
1234         }
1235         lck_mtx_unlock(nd6_mutex);
1236
1237         lck_mtx_lock(rnh_lock);
1238         /* We're done; let others enter */
1239         nd6_service_busy = FALSE;
1240         if (nd6_service_waiters > 0) {
1241                 nd6_service_waiters = 0;
1242                 wakeup(nd6_service_wc);
1243         }
1244 }
1245
1246 void
1247 nd6_drain(void *arg)
1248 {
1249 #pragma unused(arg)
1250         struct nd6svc_arg sarg;
1251
1252         nd6log2((LOG_DEBUG, "%s: draining ND6 entries\n", __func__));
1253
1254         lck_mtx_lock(rnh_lock);
1255         bzero(&sarg, sizeof (sarg));
1256         sarg.draining = 1;
1257         nd6_service(&sarg);
1258         nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, "
1259             "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy,
1260             sarg.aging, sarg.sticky, sarg.killed));
1261         lck_mtx_unlock(rnh_lock);
1262 }
1263
1264 /*
1265  * We use the ``arg'' variable to decide whether or not the timer we're
1266  * running is the fast timer. We do this to reset the nd6_fast_timer_on
1267  * variable so that later we don't end up ignoring a ``fast timer''
1268  * request if the 5 second timer is running (see nd6_sched_timeout).
1269  */
1270 static void
1271 nd6_timeout(void *arg)
1272 {
1273         struct nd6svc_arg sarg;
1274         uint32_t buf;
1275
1276         lck_mtx_lock(rnh_lock);
1277         bzero(&sarg, sizeof (sarg));
1278         nd6_service(&sarg);
1279         nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, "
1280             "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy,
1281             sarg.aging, sarg.sticky, sarg.killed));
1282         /* re-arm the timer if there's work to do */
1283         nd6_timeout_run--;
1284         VERIFY(nd6_timeout_run >= 0 && nd6_timeout_run < 2);
1285         if (arg == &nd6_fast_timer_on)
1286                 nd6_fast_timer_on = FALSE;
1287         if (sarg.aging_lazy > 0 || sarg.aging > 0 || nd6_sched_timeout_want) {
1288                 struct timeval atv, ltv, *leeway;
1289                 int lazy = nd6_prune_lazy;
1290
1291                 if (sarg.aging > 0 || lazy < 1) {
1292                         atv.tv_usec = 0;
1293                         atv.tv_sec = nd6_prune;
1294                         leeway = NULL;
1295                 } else {
1296                         VERIFY(lazy >= 1);
1297                         atv.tv_usec = 0;
1298                         atv.tv_sec = MAX(nd6_prune, lazy);
1299                         ltv.tv_usec = 0;
1300                         read_frandom(&buf, sizeof(buf));
1301                         ltv.tv_sec = MAX(buf % lazy, 1) * 2;
1302                         leeway = &ltv;
1303                 }
1304                 nd6_sched_timeout(&atv, leeway);
1305         } else if (nd6_debug) {
1306                 nd6log2((LOG_DEBUG, "%s: not rescheduling timer\n", __func__));
1307         }
1308         lck_mtx_unlock(rnh_lock);
1309 }
1310
1311 void
1312 nd6_sched_timeout(struct timeval *atv, struct timeval *ltv)
1313 {
1314         struct timeval tv;
1315
1316         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1317         if (atv == NULL) {
1318                 tv.tv_usec = 0;
1319                 tv.tv_sec = MAX(nd6_prune, 1);
1320                 atv = &tv;
1321                 ltv = NULL;     /* ignore leeway */
1322         }
1323         /* see comments on top of this file */
1324         if (nd6_timeout_run == 0) {
1325                 if (ltv == NULL) {
1326                         nd6log2((LOG_DEBUG, "%s: timer scheduled in "
1327                             "T+%llus.%lluu (demand %d)\n", __func__,
1328                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1329                             nd6_sched_timeout_want));
1330                         nd6_fast_timer_on = TRUE;
1331                         timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv));
1332                 } else {
1333                         nd6log2((LOG_DEBUG, "%s: timer scheduled in "
1334                             "T+%llus.%lluu with %llus.%lluu leeway "
1335                             "(demand %d)\n", __func__, (uint64_t)atv->tv_sec,
1336                             (uint64_t)atv->tv_usec, (uint64_t)ltv->tv_sec,
1337                             (uint64_t)ltv->tv_usec, nd6_sched_timeout_want));
1338                         nd6_fast_timer_on = FALSE;
1339                         timeout_with_leeway(nd6_timeout, NULL,
1340                             tvtohz(atv), tvtohz(ltv));
1341                 }
1342                 nd6_timeout_run++;
1343                 nd6_sched_timeout_want = 0;
1344         } else if (nd6_timeout_run == 1 && ltv == NULL &&
1345             nd6_fast_timer_on == FALSE) {
1346                 nd6log2((LOG_DEBUG, "%s: fast timer scheduled in "
1347                     "T+%llus.%lluu (demand %d)\n", __func__,
1348                     (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1349                     nd6_sched_timeout_want));
1350                 nd6_fast_timer_on = TRUE;
1351                 nd6_sched_timeout_want = 0;
1352                 nd6_timeout_run++;
1353                 timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv));
1354         } else {
1355                 if (ltv == NULL) {
1356                         nd6log2((LOG_DEBUG, "%s: not scheduling timer: "
1357                             "timers %d, fast_timer %d, T+%llus.%lluu\n",
1358                             __func__, nd6_timeout_run, nd6_fast_timer_on,
1359                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec));
1360                 } else {
1361                         nd6log2((LOG_DEBUG, "%s: not scheduling timer: "
1362                             "timers %d, fast_timer %d, T+%llus.%lluu "
1363                             "with %llus.%lluu leeway\n", __func__,
1364                             nd6_timeout_run, nd6_fast_timer_on,
1365                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1366                             (uint64_t)ltv->tv_sec, (uint64_t)ltv->tv_usec));
1367                 }
1368         }
1369 }
1370
1371 /*
1372  * ND6 router advertisement kernel notification
1373  */
1374 void
1375 nd6_post_msg(u_int32_t code, struct nd_prefix_list *prefix_list,
1376     u_int32_t list_length, u_int32_t mtu, char *dl_addr, u_int32_t dl_addr_len)
1377 {
1378         struct kev_msg ev_msg;
1379         struct kev_nd6_ra_data nd6_ra_msg_data;
1380         struct nd_prefix_list *itr = prefix_list;
1381
1382         bzero(&ev_msg, sizeof (struct kev_msg));
1383         ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1384         ev_msg.kev_class        = KEV_NETWORK_CLASS;
1385         ev_msg.kev_subclass     = KEV_ND6_SUBCLASS;
1386         ev_msg.event_code       = code;
1387
1388         bzero(&nd6_ra_msg_data, sizeof (nd6_ra_msg_data));
1389         nd6_ra_msg_data.lladdrlen = (dl_addr_len <= ND6_ROUTER_LL_SIZE) ?
1390             dl_addr_len : ND6_ROUTER_LL_SIZE;
1391         bcopy(dl_addr, &nd6_ra_msg_data.lladdr, nd6_ra_msg_data.lladdrlen);
1392
1393         if (mtu > 0 && mtu >= IPV6_MMTU) {
1394                 nd6_ra_msg_data.mtu = mtu;
1395                 nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_MTU;
1396         }
1397
1398         if (list_length > 0 && prefix_list != NULL) {
1399                 nd6_ra_msg_data.list_length = list_length;
1400                 nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_PREFIX;
1401         }
1402
1403         while (itr != NULL && nd6_ra_msg_data.list_index < list_length) {
1404                 bcopy(&itr->pr.ndpr_prefix, &nd6_ra_msg_data.prefix.prefix,
1405                     sizeof (nd6_ra_msg_data.prefix.prefix));
1406                 nd6_ra_msg_data.prefix.raflags = itr->pr.ndpr_raf;
1407                 nd6_ra_msg_data.prefix.prefixlen = itr->pr.ndpr_plen;
1408                 nd6_ra_msg_data.prefix.origin = PR_ORIG_RA;
1409                 nd6_ra_msg_data.prefix.vltime = itr->pr.ndpr_vltime;
1410                 nd6_ra_msg_data.prefix.pltime = itr->pr.ndpr_pltime;
1411                 nd6_ra_msg_data.prefix.expire = ndpr_getexpire(&itr->pr);
1412                 nd6_ra_msg_data.prefix.flags = itr->pr.ndpr_stateflags;
1413                 nd6_ra_msg_data.prefix.refcnt = itr->pr.ndpr_addrcnt;
1414                 nd6_ra_msg_data.prefix.if_index = itr->pr.ndpr_ifp->if_index;
1415
1416                 /* send the message up */
1417                 ev_msg.dv[0].data_ptr           = &nd6_ra_msg_data;
1418                 ev_msg.dv[0].data_length        = sizeof (nd6_ra_msg_data);
1419                 ev_msg.dv[1].data_length        = 0;
1420                 kev_post_msg(&ev_msg);
1421
1422                 /* clean up for the next prefix */
1423                 bzero(&nd6_ra_msg_data.prefix, sizeof (nd6_ra_msg_data.prefix));
1424                 itr = itr->next;
1425                 nd6_ra_msg_data.list_index++;
1426         }
1427 }
1428
1429 /*
1430  * Regenerate deprecated/invalidated temporary address
1431  */
1432 static int
1433 regen_tmpaddr(struct in6_ifaddr *ia6)
1434 {
1435         struct ifaddr *ifa;
1436         struct ifnet *ifp;
1437         struct in6_ifaddr *public_ifa6 = NULL;
1438         uint64_t timenow = net_uptime();
1439
1440         ifp = ia6->ia_ifa.ifa_ifp;
1441         ifnet_lock_shared(ifp);
1442         TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1443                 struct in6_ifaddr *it6;
1444
1445                 IFA_LOCK(ifa);
1446                 if (ifa->ifa_addr->sa_family != AF_INET6) {
1447                         IFA_UNLOCK(ifa);
1448                         continue;
1449                 }
1450                 it6 = (struct in6_ifaddr *)ifa;
1451
1452                 /* ignore no autoconf addresses. */
1453                 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
1454                         IFA_UNLOCK(ifa);
1455                         continue;
1456                 }
1457                 /* ignore autoconf addresses with different prefixes. */
1458                 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) {
1459                         IFA_UNLOCK(ifa);
1460                         continue;
1461                 }
1462                 /*
1463                  * Now we are looking at an autoconf address with the same
1464                  * prefix as ours.  If the address is temporary and is still
1465                  * preferred, do not create another one.  It would be rare, but
1466                  * could happen, for example, when we resume a laptop PC after
1467                  * a long period.
1468                  */
1469                 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1470                     !IFA6_IS_DEPRECATED(it6, timenow)) {
1471                         IFA_UNLOCK(ifa);
1472                         if (public_ifa6 != NULL)
1473                                 IFA_REMREF(&public_ifa6->ia_ifa);
1474                         public_ifa6 = NULL;
1475                         break;
1476                 }
1477
1478                 /*
1479                  * This is a public autoconf address that has the same prefix
1480                  * as ours.  If it is preferred, keep it.  We can't break the
1481                  * loop here, because there may be a still-preferred temporary
1482                  * address with the prefix.
1483                  */
1484                 if (!IFA6_IS_DEPRECATED(it6, timenow)) {
1485                         IFA_ADDREF_LOCKED(ifa); /* for public_ifa6 */
1486                         IFA_UNLOCK(ifa);
1487                         if (public_ifa6 != NULL)
1488                                 IFA_REMREF(&public_ifa6->ia_ifa);
1489                         public_ifa6 = it6;
1490                 } else {
1491                         IFA_UNLOCK(ifa);
1492                 }
1493         }
1494         ifnet_lock_done(ifp);
1495
1496         if (public_ifa6 != NULL) {
1497                 int e;
1498
1499                 if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) {
1500                         log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
1501                             " tmp addr,errno=%d\n", e);
1502                         IFA_REMREF(&public_ifa6->ia_ifa);
1503                         return (-1);
1504                 }
1505                 IFA_REMREF(&public_ifa6->ia_ifa);
1506                 return (0);
1507         }
1508
1509         return (-1);
1510 }
1511
1512 /*
1513  * Nuke neighbor cache/prefix/default router management table, right before
1514  * ifp goes away.
1515  */
1516 void
1517 nd6_purge(struct ifnet *ifp)
1518 {
1519         struct llinfo_nd6 *ln;
1520         struct nd_defrouter *dr, *ndr;
1521         struct nd_prefix *pr, *npr;
1522         boolean_t removed;
1523
1524         /* Nuke default router list entries toward ifp */
1525         lck_mtx_lock(nd6_mutex);
1526         if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
1527                 /*
1528                  * The first entry of the list may be stored in
1529                  * the routing table, so we'll delete it later.
1530                  */
1531                 for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
1532                         ndr = TAILQ_NEXT(dr, dr_entry);
1533                         if (dr->stateflags & NDDRF_INSTALLED)
1534                                 continue;
1535                         if (dr->ifp == ifp)
1536                                 defrtrlist_del(dr);
1537                 }
1538                 dr = TAILQ_FIRST(&nd_defrouter);
1539                 if (dr->ifp == ifp)
1540                         defrtrlist_del(dr);
1541         }
1542
1543         for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = ndr) {
1544                 ndr = TAILQ_NEXT(dr, dr_entry);
1545                 if (!(dr->stateflags & NDDRF_INSTALLED))
1546                         continue;
1547
1548                 if (dr->ifp == ifp)
1549                         defrtrlist_del(dr);
1550         }
1551
1552         /* Nuke prefix list entries toward ifp */
1553         removed = FALSE;
1554         for (pr = nd_prefix.lh_first; pr; pr = npr) {
1555                 NDPR_LOCK(pr);
1556                 npr = pr->ndpr_next;
1557                 if (pr->ndpr_ifp == ifp &&
1558                     !(pr->ndpr_stateflags & NDPRF_DEFUNCT)) {
1559                         /*
1560                          * Because if_detach() does *not* release prefixes
1561                          * while purging addresses the reference count will
1562                          * still be above zero. We therefore reset it to
1563                          * make sure that the prefix really gets purged.
1564                          */
1565                         pr->ndpr_addrcnt = 0;
1566
1567                         /*
1568                          * Previously, pr->ndpr_addr is removed as well,
1569                          * but I strongly believe we don't have to do it.
1570                          * nd6_purge() is only called from in6_ifdetach(),
1571                          * which removes all the associated interface addresses
1572                          * by itself.
1573                          * (jinmei@kame.net 20010129)
1574                          */
1575                         NDPR_ADDREF_LOCKED(pr);
1576                         prelist_remove(pr);
1577                         NDPR_UNLOCK(pr);
1578                         NDPR_REMREF(pr);
1579                         removed = TRUE;
1580                         npr = nd_prefix.lh_first;
1581                 } else {
1582                         NDPR_UNLOCK(pr);
1583                 }
1584         }
1585         if (removed)
1586                 pfxlist_onlink_check();
1587         lck_mtx_unlock(nd6_mutex);
1588
1589         /* cancel default outgoing interface setting */
1590         if (nd6_defifindex == ifp->if_index) {
1591                 nd6_setdefaultiface(0);
1592         }
1593
1594         /*
1595          * Perform default router selection even when we are a router,
1596          * if Scoped Routing is enabled.
1597          */
1598         if (ip6_doscopedroute || !ip6_forwarding) {
1599                 lck_mtx_lock(nd6_mutex);
1600                 /* refresh default router list */
1601                 defrouter_select(ifp);
1602                 lck_mtx_unlock(nd6_mutex);
1603         }
1604
1605         /*
1606          * Nuke neighbor cache entries for the ifp.
1607          * Note that rt->rt_ifp may not be the same as ifp,
1608          * due to KAME goto ours hack.  See RTM_RESOLVE case in
1609          * nd6_rtrequest(), and ip6_input().
1610          */
1611 again:
1612         lck_mtx_lock(rnh_lock);
1613         ln = llinfo_nd6.ln_next;
1614         while (ln != NULL && ln != &llinfo_nd6) {
1615                 struct rtentry *rt;
1616                 struct llinfo_nd6 *nln;
1617
1618                 nln = ln->ln_next;
1619                 rt = ln->ln_rt;
1620                 RT_LOCK(rt);
1621                 if (rt->rt_gateway != NULL &&
1622                     rt->rt_gateway->sa_family == AF_LINK &&
1623                     SDL(rt->rt_gateway)->sdl_index == ifp->if_index) {
1624                         RT_ADDREF_LOCKED(rt);
1625                         RT_UNLOCK(rt);
1626                         lck_mtx_unlock(rnh_lock);
1627                         /*
1628                          * See comments on nd6_service() for reasons why
1629                          * this loop is repeated; we bite the costs of
1630                          * going thru the same llinfo_nd6 more than once
1631                          * here, since this purge happens during detach,
1632                          * and that unlike the timer case, it's possible
1633                          * there's more than one purges happening at the
1634                          * same time (thus a flag wouldn't buy anything).
1635                          */
1636                         nd6_free(rt);
1637                         RT_REMREF(rt);
1638                         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1639                         goto again;
1640                 } else {
1641                         RT_UNLOCK(rt);
1642                 }
1643                 ln = nln;
1644         }
1645         lck_mtx_unlock(rnh_lock);
1646 }
1647
1648 /*
1649  * Upon success, the returned route will be locked and the caller is
1650  * responsible for releasing the reference and doing RT_UNLOCK(rt).
1651  * This routine does not require rnh_lock to be held by the caller,
1652  * although it needs to be indicated of such a case in order to call
1653  * the correct variant of the relevant routing routines.
1654  */
1655 struct rtentry *
1656 nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp, int rt_locked)
1657 {
1658         struct rtentry *rt;
1659         struct sockaddr_in6 sin6;
1660         unsigned int ifscope;
1661
1662         bzero(&sin6, sizeof (sin6));
1663         sin6.sin6_len = sizeof (struct sockaddr_in6);
1664         sin6.sin6_family = AF_INET6;
1665         sin6.sin6_addr = *addr6;
1666
1667         ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
1668         if (rt_locked) {
1669                 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1670                 rt = rtalloc1_scoped_locked(SA(&sin6), create, 0, ifscope);
1671         } else {
1672                 rt = rtalloc1_scoped(SA(&sin6), create, 0, ifscope);
1673         }
1674
1675         if (rt != NULL) {
1676                 RT_LOCK(rt);
1677                 if ((rt->rt_flags & RTF_LLINFO) == 0) {
1678                         /*
1679                          * This is the case for the default route.
1680                          * If we want to create a neighbor cache for the
1681                          * address, we should free the route for the
1682                          * destination and allocate an interface route.
1683                          */
1684                         if (create) {
1685                                 RT_UNLOCK(rt);
1686                                 if (rt_locked)
1687                                         rtfree_locked(rt);
1688                                 else
1689                                         rtfree(rt);
1690                                 rt = NULL;
1691                         }
1692                 }
1693         }
1694         if (rt == NULL) {
1695                 if (create && ifp) {
1696                         struct ifaddr *ifa;
1697                         u_int32_t ifa_flags;
1698                         int e;
1699
1700                         /*
1701                          * If no route is available and create is set,
1702                          * we allocate a host route for the destination
1703                          * and treat it like an interface route.
1704                          * This hack is necessary for a neighbor which can't
1705                          * be covered by our own prefix.
1706                          */
1707                         ifa = ifaof_ifpforaddr(SA(&sin6), ifp);
1708                         if (ifa == NULL)
1709                                 return (NULL);
1710
1711                         /*
1712                          * Create a new route.  RTF_LLINFO is necessary
1713                          * to create a Neighbor Cache entry for the
1714                          * destination in nd6_rtrequest which will be
1715                          * called in rtrequest via ifa->ifa_rtrequest.
1716                          */
1717                         if (!rt_locked)
1718                                 lck_mtx_lock(rnh_lock);
1719                         IFA_LOCK_SPIN(ifa);
1720                         ifa_flags = ifa->ifa_flags;
1721                         IFA_UNLOCK(ifa);
1722                         if ((e = rtrequest_scoped_locked(RTM_ADD,
1723                             SA(&sin6), ifa->ifa_addr, SA(&all1_sa),
1724                             (ifa_flags | RTF_HOST | RTF_LLINFO) &
1725                             ~RTF_CLONING, &rt, ifscope)) != 0) {
1726                                 if (e != EEXIST)
1727                                         log(LOG_ERR, "%s: failed to add route "
1728                                             "for a neighbor(%s), errno=%d\n",
1729                                             __func__, ip6_sprintf(addr6), e);
1730                         }
1731                         if (!rt_locked)
1732                                 lck_mtx_unlock(rnh_lock);
1733                         IFA_REMREF(ifa);
1734                         if (rt == NULL)
1735                                 return (NULL);
1736
1737                         RT_LOCK(rt);
1738                         if (rt->rt_llinfo) {
1739                                 struct llinfo_nd6 *ln = rt->rt_llinfo;
1740                                 ln->ln_state = ND6_LLINFO_NOSTATE;
1741                         }
1742                 } else {
1743                         return (NULL);
1744                 }
1745         }
1746         RT_LOCK_ASSERT_HELD(rt);
1747         /*
1748          * Validation for the entry.
1749          * Note that the check for rt_llinfo is necessary because a cloned
1750          * route from a parent route that has the L flag (e.g. the default
1751          * route to a p2p interface) may have the flag, too, while the
1752          * destination is not actually a neighbor.
1753          * XXX: we can't use rt->rt_ifp to check for the interface, since
1754          *      it might be the loopback interface if the entry is for our
1755          *      own address on a non-loopback interface. Instead, we should
1756          *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
1757          *      interface.
1758          * Note also that ifa_ifp and ifp may differ when we connect two
1759          * interfaces to a same link, install a link prefix to an interface,
1760          * and try to install a neighbor cache on an interface that does not
1761          * have a route to the prefix.
1762          *
1763          * If the address is from a proxied prefix, the ifa_ifp and ifp might
1764          * not match, because nd6_na_input() could have modified the ifp
1765          * of the route to point to the interface where the NA arrived on,
1766          * hence the test for RTF_PROXY.
1767          */
1768         if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
1769             rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
1770             (ifp && rt->rt_ifa->ifa_ifp != ifp &&
1771             !(rt->rt_flags & RTF_PROXY))) {
1772                 RT_REMREF_LOCKED(rt);
1773                 RT_UNLOCK(rt);
1774                 if (create) {
1775                         log(LOG_DEBUG, "%s: failed to lookup %s "
1776                             "(if = %s)\n", __func__, ip6_sprintf(addr6),
1777                             ifp ? if_name(ifp) : "unspec");
1778                         /* xxx more logs... kazu */
1779                 }
1780                 return (NULL);
1781         }
1782         /*
1783          * Caller needs to release reference and call RT_UNLOCK(rt).
1784          */
1785         return (rt);
1786 }
1787
1788 /*
1789  * Test whether a given IPv6 address is a neighbor or not, ignoring
1790  * the actual neighbor cache.  The neighbor cache is ignored in order
1791  * to not reenter the routing code from within itself.
1792  */
1793 static int
1794 nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
1795 {
1796         struct nd_prefix *pr;
1797         struct ifaddr *dstaddr;
1798
1799         lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
1800
1801         /*
1802          * A link-local address is always a neighbor.
1803          * XXX: a link does not necessarily specify a single interface.
1804          */
1805         if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
1806                 struct sockaddr_in6 sin6_copy;
1807                 u_int32_t zone;
1808
1809                 /*
1810                  * We need sin6_copy since sa6_recoverscope() may modify the
1811                  * content (XXX).
1812                  */
1813                 sin6_copy = *addr;
1814                 if (sa6_recoverscope(&sin6_copy, FALSE))
1815                         return (0); /* XXX: should be impossible */
1816                 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
1817                         return (0);
1818                 if (sin6_copy.sin6_scope_id == zone)
1819                         return (1);
1820                 else
1821                         return (0);
1822         }
1823
1824         /*
1825          * If the address matches one of our addresses,
1826          * it should be a neighbor.
1827          * If the address matches one of our on-link prefixes, it should be a
1828          * neighbor.
1829          */
1830         for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1831                 NDPR_LOCK(pr);
1832                 if (pr->ndpr_ifp != ifp) {
1833                         NDPR_UNLOCK(pr);
1834                         continue;
1835                 }
1836                 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
1837                         NDPR_UNLOCK(pr);
1838                         continue;
1839                 }
1840                 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
1841                     &addr->sin6_addr, &pr->ndpr_mask)) {
1842                         NDPR_UNLOCK(pr);
1843                         return (1);
1844                 }
1845                 NDPR_UNLOCK(pr);
1846         }
1847
1848         /*
1849          * If the address is assigned on the node of the other side of
1850          * a p2p interface, the address should be a neighbor.
1851          */
1852         dstaddr = ifa_ifwithdstaddr(SA(addr));
1853         if (dstaddr != NULL) {
1854                 if (dstaddr->ifa_ifp == ifp) {
1855                         IFA_REMREF(dstaddr);
1856                         return (1);
1857                 }
1858                 IFA_REMREF(dstaddr);
1859                 dstaddr = NULL;
1860         }
1861
1862         /*
1863          * If the default router list is empty, all addresses are regarded
1864          * as on-link, and thus, as a neighbor.
1865          * XXX: we restrict the condition to hosts, because routers usually do
1866          * not have the "default router list".
1867          * XXX: this block should eventually be removed (it is disabled when
1868          * Scoped Routing is in effect); treating all destinations as on-link
1869          * in the absence of a router is rather harmful.
1870          */
1871         if (!ip6_doscopedroute && !ip6_forwarding &&
1872             TAILQ_FIRST(&nd_defrouter) == NULL &&
1873             nd6_defifindex == ifp->if_index) {
1874                 return (1);
1875         }
1876
1877         return (0);
1878 }
1879
1880
1881 /*
1882  * Detect if a given IPv6 address identifies a neighbor on a given link.
1883  * XXX: should take care of the destination of a p2p link?
1884  */
1885 int
1886 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp,
1887     int rt_locked)
1888 {
1889         struct rtentry *rt;
1890
1891         lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
1892         lck_mtx_lock(nd6_mutex);
1893         if (nd6_is_new_addr_neighbor(addr, ifp)) {
1894                 lck_mtx_unlock(nd6_mutex);
1895                 return (1);
1896         }
1897         lck_mtx_unlock(nd6_mutex);
1898
1899         /*
1900          * Even if the address matches none of our addresses, it might be
1901          * in the neighbor cache.
1902          */
1903         if ((rt = nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked)) != NULL) {
1904                 RT_LOCK_ASSERT_HELD(rt);
1905                 RT_REMREF_LOCKED(rt);
1906                 RT_UNLOCK(rt);
1907                 return (1);
1908         }
1909
1910         return (0);
1911 }
1912
1913 /*
1914  * Free an nd6 llinfo entry.
1915  * Since the function would cause significant changes in the kernel, DO NOT
1916  * make it global, unless you have a strong reason for the change, and are sure
1917  * that the change is safe.
1918  */
1919 void
1920 nd6_free(struct rtentry *rt)
1921 {
1922         struct llinfo_nd6 *ln;
1923         struct in6_addr in6;
1924         struct nd_defrouter *dr;
1925
1926         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1927         RT_LOCK_ASSERT_NOTHELD(rt);
1928         lck_mtx_lock(nd6_mutex);
1929
1930         RT_LOCK(rt);
1931         RT_ADDREF_LOCKED(rt);   /* Extra ref */
1932         ln = rt->rt_llinfo;
1933         in6 = SIN6(rt_key(rt))->sin6_addr;
1934
1935         /*
1936          * Prevent another thread from modifying rt_key, rt_gateway
1937          * via rt_setgate() after the rt_lock is dropped by marking
1938          * the route as defunct.
1939          */
1940         rt->rt_flags |= RTF_CONDEMNED;
1941
1942         /*
1943          * We used to have pfctlinput(PRC_HOSTDEAD) here.  Even though it is
1944          * not harmful, it was not really necessary.  Perform default router
1945          * selection even when we are a router, if Scoped Routing is enabled.
1946          */
1947         if (ip6_doscopedroute || !ip6_forwarding) {
1948                 dr = defrouter_lookup(&SIN6(rt_key(rt))->sin6_addr, rt->rt_ifp);
1949
1950                 if ((ln && ln->ln_router) || dr) {
1951                         /*
1952                          * rt6_flush must be called whether or not the neighbor
1953                          * is in the Default Router List.
1954                          * See a corresponding comment in nd6_na_input().
1955                          */
1956                         RT_UNLOCK(rt);
1957                         lck_mtx_unlock(nd6_mutex);
1958                         rt6_flush(&in6, rt->rt_ifp);
1959                         lck_mtx_lock(nd6_mutex);
1960                 } else {
1961                         RT_UNLOCK(rt);
1962                 }
1963
1964                 if (dr) {
1965                         NDDR_REMREF(dr);
1966                         /*
1967                          * Unreachablity of a router might affect the default
1968                          * router selection and on-link detection of advertised
1969                          * prefixes.
1970                          */
1971
1972                         /*
1973                          * Temporarily fake the state to choose a new default
1974                          * router and to perform on-link determination of
1975                          * prefixes correctly.
1976                          * Below the state will be set correctly,
1977                          * or the entry itself will be deleted.
1978                          */
1979                         RT_LOCK_SPIN(rt);
1980                         ln->ln_state = ND6_LLINFO_INCOMPLETE;
1981
1982                         /*
1983                          * Since defrouter_select() does not affect the
1984                          * on-link determination and MIP6 needs the check
1985                          * before the default router selection, we perform
1986                          * the check now.
1987                          */
1988                         RT_UNLOCK(rt);
1989                         pfxlist_onlink_check();
1990
1991                         /*
1992                          * refresh default router list
1993                          */
1994                         defrouter_select(rt->rt_ifp);
1995                 }
1996                 RT_LOCK_ASSERT_NOTHELD(rt);
1997         } else {
1998                 RT_UNLOCK(rt);
1999         }
2000
2001         lck_mtx_unlock(nd6_mutex);
2002         /*
2003          * Detach the route from the routing tree and the list of neighbor
2004          * caches, and disable the route entry not to be used in already
2005          * cached routes.
2006          */
2007         (void) rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL);
2008
2009         /* Extra ref held above; now free it */
2010         rtfree(rt);
2011 }
2012
2013 void
2014 nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
2015 {
2016 #pragma unused(sa)
2017         struct sockaddr *gate = rt->rt_gateway;
2018         struct llinfo_nd6 *ln = rt->rt_llinfo;
2019         static struct sockaddr_dl null_sdl =
2020             { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK };
2021         struct ifnet *ifp = rt->rt_ifp;
2022         struct ifaddr *ifa;
2023         uint64_t timenow;
2024         char buf[MAX_IPv6_STR_LEN];
2025
2026         VERIFY(nd6_init_done);
2027         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
2028         RT_LOCK_ASSERT_HELD(rt);
2029
2030         /*
2031          * We have rnh_lock held, see if we need to schedule the timer;
2032          * we might do this again below during RTM_RESOLVE, but doing it
2033          * now handles all other cases.
2034          */
2035         if (nd6_sched_timeout_want)
2036                 nd6_sched_timeout(NULL, NULL);
2037
2038         if (rt->rt_flags & RTF_GATEWAY)
2039                 return;
2040
2041         if (!nd6_need_cache(ifp) && !(rt->rt_flags & RTF_HOST)) {
2042                 /*
2043                  * This is probably an interface direct route for a link
2044                  * which does not need neighbor caches (e.g. fe80::%lo0/64).
2045                  * We do not need special treatment below for such a route.
2046                  * Moreover, the RTF_LLINFO flag which would be set below
2047                  * would annoy the ndp(8) command.
2048                  */
2049                 return;
2050         }
2051
2052         if (req == RTM_RESOLVE) {
2053                 int no_nd_cache;
2054
2055                 if (!nd6_need_cache(ifp)) {     /* stf case */
2056                         no_nd_cache = 1;
2057                 } else {
2058                         struct sockaddr_in6 sin6;
2059
2060                         rtkey_to_sa6(rt, &sin6);
2061                         /*
2062                          * nd6_is_addr_neighbor() may call nd6_lookup(),
2063                          * therefore we drop rt_lock to avoid deadlock
2064                          * during the lookup.
2065                          */
2066                         RT_ADDREF_LOCKED(rt);
2067                         RT_UNLOCK(rt);
2068                         no_nd_cache = !nd6_is_addr_neighbor(&sin6, ifp, 1);
2069                         RT_LOCK(rt);
2070                         RT_REMREF_LOCKED(rt);
2071                 }
2072
2073                 /*
2074                  * FreeBSD and BSD/OS often make a cloned host route based
2075                  * on a less-specific route (e.g. the default route).
2076                  * If the less specific route does not have a "gateway"
2077                  * (this is the case when the route just goes to a p2p or an
2078                  * stf interface), we'll mistakenly make a neighbor cache for
2079                  * the host route, and will see strange neighbor solicitation
2080                  * for the corresponding destination.  In order to avoid the
2081                  * confusion, we check if the destination of the route is
2082                  * a neighbor in terms of neighbor discovery, and stop the
2083                  * process if not.  Additionally, we remove the LLINFO flag
2084                  * so that ndp(8) will not try to get the neighbor information
2085                  * of the destination.
2086                  */
2087                 if (no_nd_cache) {
2088                         rt->rt_flags &= ~RTF_LLINFO;
2089                         return;
2090                 }
2091         }
2092
2093         timenow = net_uptime();
2094
2095         switch (req) {
2096         case RTM_ADD:
2097                 /*
2098                  * There is no backward compatibility :)
2099                  *
2100                  * if ((rt->rt_flags & RTF_HOST) == 0 &&
2101                  *      SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
2102                  *              rt->rt_flags |= RTF_CLONING;
2103                  */
2104                 if ((rt->rt_flags & RTF_CLONING) ||
2105                     ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) {
2106                         /*
2107                          * Case 1: This route should come from a route to
2108                          * interface (RTF_CLONING case) or the route should be
2109                          * treated as on-link but is currently not
2110                          * (RTF_LLINFO && ln == NULL case).
2111                          */
2112                         if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) {
2113                                 gate = rt->rt_gateway;
2114                                 SDL(gate)->sdl_type = ifp->if_type;
2115                                 SDL(gate)->sdl_index = ifp->if_index;
2116                                 /*
2117                                  * In case we're called before 1.0 sec.
2118                                  * has elapsed.
2119                                  */
2120                                 if (ln != NULL) {
2121                                         ln_setexpire(ln,
2122                                             (ifp->if_eflags & IFEF_IPV6_ND6ALT)
2123                                             ? 0 : MAX(timenow, 1));
2124                                 }
2125                         }
2126                         if (rt->rt_flags & RTF_CLONING)
2127                                 break;
2128                 }
2129                 /*
2130                  * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
2131                  * We don't do that here since llinfo is not ready yet.
2132                  *
2133                  * There are also couple of other things to be discussed:
2134                  * - unsolicited NA code needs improvement beforehand
2135                  * - RFC4861 says we MAY send multicast unsolicited NA
2136                  *   (7.2.6 paragraph 4), however, it also says that we
2137                  *   SHOULD provide a mechanism to prevent multicast NA storm.
2138                  *   we don't have anything like it right now.
2139                  *   note that the mechanism needs a mutual agreement
2140                  *   between proxies, which means that we need to implement
2141                  *   a new protocol, or a new kludge.
2142                  * - from RFC4861 6.2.4, host MUST NOT send an unsolicited RA.
2143                  *   we need to check ip6forwarding before sending it.
2144                  *   (or should we allow proxy ND configuration only for
2145                  *   routers?  there's no mention about proxy ND from hosts)
2146                  */
2147                 /* FALLTHROUGH */
2148         case RTM_RESOLVE:
2149                 if (!(ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK))) {
2150                         /*
2151                          * Address resolution isn't necessary for a point to
2152                          * point link, so we can skip this test for a p2p link.
2153                          */
2154                         if (gate->sa_family != AF_LINK ||
2155                             gate->sa_len < sizeof (null_sdl)) {
2156                                 /* Don't complain in case of RTM_ADD */
2157                                 if (req == RTM_RESOLVE) {
2158                                         log(LOG_ERR, "%s: route to %s has bad "
2159                                             "gateway address (sa_family %u "
2160                                             "sa_len %u) on %s\n", __func__,
2161                                             inet_ntop(AF_INET6,
2162                                             &SIN6(rt_key(rt))->sin6_addr, buf,
2163                                             sizeof (buf)), gate->sa_family,
2164                                             gate->sa_len, if_name(ifp));
2165                                 }
2166                                 break;
2167                         }
2168                         SDL(gate)->sdl_type = ifp->if_type;
2169                         SDL(gate)->sdl_index = ifp->if_index;
2170                 }
2171                 if (ln != NULL)
2172                         break;  /* This happens on a route change */
2173                 /*
2174                  * Case 2: This route may come from cloning, or a manual route
2175                  * add with a LL address.
2176                  */
2177                 rt->rt_llinfo = ln = nd6_llinfo_alloc(M_WAITOK);
2178                 if (ln == NULL)
2179                         break;
2180
2181                 nd6_allocated++;
2182                 rt->rt_llinfo_get_ri    = nd6_llinfo_get_ri;
2183                 rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri;
2184                 rt->rt_llinfo_purge     = nd6_llinfo_purge;
2185                 rt->rt_llinfo_free      = nd6_llinfo_free;
2186                 rt->rt_llinfo_refresh   = nd6_llinfo_refresh;
2187                 rt->rt_flags |= RTF_LLINFO;
2188                 ln->ln_rt = rt;
2189                 /* this is required for "ndp" command. - shin */
2190                 if (req == RTM_ADD) {
2191                         /*
2192                          * gate should have some valid AF_LINK entry,
2193                          * and ln->ln_expire should have some lifetime
2194                          * which is specified by ndp command.
2195                          */
2196                         ln->ln_state = ND6_LLINFO_REACHABLE;
2197                 } else {
2198                         /*
2199                          * When req == RTM_RESOLVE, rt is created and
2200                          * initialized in rtrequest(), so rt_expire is 0.
2201                          */
2202                         ln->ln_state = ND6_LLINFO_NOSTATE;
2203
2204                         /* In case we're called before 1.0 sec. has elapsed */
2205                         ln_setexpire(ln, (ifp->if_eflags & IFEF_IPV6_ND6ALT) ?
2206                             0 : MAX(timenow, 1));
2207                 }
2208                 LN_INSERTHEAD(ln);
2209                 nd6_inuse++;
2210
2211                 /* We have at least one entry; arm the timer if not already */
2212                 nd6_sched_timeout(NULL, NULL);
2213
2214                 /*
2215                  * If we have too many cache entries, initiate immediate
2216                  * purging for some "less recently used" entries.  Note that
2217                  * we cannot directly call nd6_free() here because it would
2218                  * cause re-entering rtable related routines triggering an LOR
2219                  * problem.
2220                  */
2221                 if (ip6_neighborgcthresh > 0 &&
2222                     nd6_inuse >= ip6_neighborgcthresh) {
2223                         int i;
2224
2225                         for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) {
2226                                 struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev;
2227                                 struct rtentry *rt_end = ln_end->ln_rt;
2228
2229                                 /* Move this entry to the head */
2230                                 RT_LOCK(rt_end);
2231                                 LN_DEQUEUE(ln_end);
2232                                 LN_INSERTHEAD(ln_end);
2233
2234                                 if (ln_end->ln_expire == 0) {
2235                                         RT_UNLOCK(rt_end);
2236                                         continue;
2237                                 }
2238                                 if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE)
2239                                         ln_end->ln_state = ND6_LLINFO_STALE;
2240                                 else
2241                                         ln_end->ln_state = ND6_LLINFO_PURGE;
2242                                 ln_setexpire(ln_end, timenow);
2243                                 RT_UNLOCK(rt_end);
2244                         }
2245                 }
2246
2247                 /*
2248                  * check if rt_key(rt) is one of my address assigned
2249                  * to the interface.
2250                  */
2251                 ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
2252                     &SIN6(rt_key(rt))->sin6_addr);
2253                 if (ifa != NULL) {
2254                         caddr_t macp = nd6_ifptomac(ifp);
2255                         ln_setexpire(ln, 0);
2256                         ln->ln_state = ND6_LLINFO_REACHABLE;
2257                         if (macp != NULL) {
2258                                 Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
2259                                 SDL(gate)->sdl_alen = ifp->if_addrlen;
2260                         }
2261                         if (nd6_useloopback) {
2262                                 if (rt->rt_ifp != lo_ifp) {
2263                                         /*
2264                                          * Purge any link-layer info caching.
2265                                          */
2266                                         if (rt->rt_llinfo_purge != NULL)
2267                                                 rt->rt_llinfo_purge(rt);
2268
2269                                         /*
2270                                          * Adjust route ref count for the
2271                                          * interfaces.
2272                                          */
2273                                         if (rt->rt_if_ref_fn != NULL) {
2274                                                 rt->rt_if_ref_fn(lo_ifp, 1);
2275                                                 rt->rt_if_ref_fn(rt->rt_ifp,
2276                                                     -1);
2277                                         }
2278                                 }
2279                                 rt->rt_ifp = lo_ifp;
2280                                 /*
2281                                  * If rmx_mtu is not locked, update it
2282                                  * to the MTU used by the new interface.
2283                                  */
2284                                 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
2285                                         rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
2286                                 /*
2287                                  * Make sure rt_ifa be equal to the ifaddr
2288                                  * corresponding to the address.
2289                                  * We need this because when we refer
2290                                  * rt_ifa->ia6_flags in ip6_input, we assume
2291                                  * that the rt_ifa points to the address instead
2292                                  * of the loopback address.
2293                                  */
2294                                 if (ifa != rt->rt_ifa) {
2295                                         rtsetifa(rt, ifa);
2296                                 }
2297                         }
2298                         IFA_REMREF(ifa);
2299                 } else if (rt->rt_flags & RTF_ANNOUNCE) {
2300                         ln_setexpire(ln, 0);
2301                         ln->ln_state = ND6_LLINFO_REACHABLE;
2302
2303                         /* join solicited node multicast for proxy ND */
2304                         if (ifp->if_flags & IFF_MULTICAST) {
2305                                 struct in6_addr llsol;
2306                                 struct in6_multi *in6m;
2307                                 int error;
2308
2309                                 llsol = SIN6(rt_key(rt))->sin6_addr;
2310                                 llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
2311                                 llsol.s6_addr32[1] = 0;
2312                                 llsol.s6_addr32[2] = htonl(1);
2313                                 llsol.s6_addr8[12] = 0xff;
2314                                 if (in6_setscope(&llsol, ifp, NULL))
2315                                         break;
2316                                 error = in6_mc_join(ifp, &llsol,
2317                                     NULL, &in6m, 0);
2318                                 if (error) {
2319                                         nd6log((LOG_ERR, "%s: failed to join "
2320                                             "%s (errno=%d)\n", if_name(ifp),
2321                                             ip6_sprintf(&llsol), error));
2322                                 } else {
2323                                         IN6M_REMREF(in6m);
2324                                 }
2325                         }
2326                 }
2327                 break;
2328
2329         case RTM_DELETE:
2330                 if (ln == NULL)
2331                         break;
2332                 /* leave from solicited node multicast for proxy ND */
2333                 if ((rt->rt_flags & RTF_ANNOUNCE) &&
2334                     (ifp->if_flags & IFF_MULTICAST)) {
2335                         struct in6_addr llsol;
2336                         struct in6_multi *in6m;
2337
2338                         llsol = SIN6(rt_key(rt))->sin6_addr;
2339                         llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
2340                         llsol.s6_addr32[1] = 0;
2341                         llsol.s6_addr32[2] = htonl(1);
2342                         llsol.s6_addr8[12] = 0xff;
2343                         if (in6_setscope(&llsol, ifp, NULL) == 0) {
2344                                 in6_multihead_lock_shared();
2345                                 IN6_LOOKUP_MULTI(&llsol, ifp, in6m);
2346                                 in6_multihead_lock_done();
2347                                 if (in6m != NULL) {
2348                                         in6_mc_leave(in6m, NULL);
2349                                         IN6M_REMREF(in6m);
2350                                 }
2351                         }
2352                 }
2353                 nd6_inuse--;
2354                 /*
2355                  * Unchain it but defer the actual freeing until the route
2356                  * itself is to be freed.  rt->rt_llinfo still points to
2357                  * llinfo_nd6, and likewise, ln->ln_rt stil points to this
2358                  * route entry, except that RTF_LLINFO is now cleared.
2359                  */
2360                 if (ln->ln_flags & ND6_LNF_IN_USE)
2361                         LN_DEQUEUE(ln);
2362
2363                 /*
2364                  * Purge any link-layer info caching.
2365                  */
2366                 if (rt->rt_llinfo_purge != NULL)
2367                         rt->rt_llinfo_purge(rt);
2368
2369                 rt->rt_flags &= ~RTF_LLINFO;
2370                 if (ln->ln_hold != NULL) {
2371                         m_freem_list(ln->ln_hold);
2372                         ln->ln_hold = NULL;
2373                 }
2374         }
2375 }
2376
2377 static int
2378 nd6_siocgdrlst(void *data, int data_is_64)
2379 {
2380         struct in6_drlist_32 *drl_32;
2381         struct nd_defrouter *dr;
2382         int i = 0;
2383
2384         lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2385
2386         dr = TAILQ_FIRST(&nd_defrouter);
2387
2388         /* For 64-bit process */
2389         if (data_is_64) {
2390                 struct in6_drlist_64 *drl_64;
2391
2392                 drl_64 = _MALLOC(sizeof (*drl_64), M_TEMP, M_WAITOK|M_ZERO);
2393                 if (drl_64 == NULL)
2394                         return (ENOMEM);
2395
2396                 /* preserve the interface name */
2397                 bcopy(data, drl_64, sizeof (drl_64->ifname));
2398
2399                 while (dr && i < DRLSTSIZ) {
2400                         drl_64->defrouter[i].rtaddr = dr->rtaddr;
2401                         if (IN6_IS_ADDR_LINKLOCAL(
2402                             &drl_64->defrouter[i].rtaddr)) {
2403                                 /* XXX: need to this hack for KAME stack */
2404                                 drl_64->defrouter[i].rtaddr.s6_addr16[1] = 0;
2405                         } else {
2406                                 log(LOG_ERR,
2407                                     "default router list contains a "
2408                                     "non-linklocal address(%s)\n",
2409                                     ip6_sprintf(&drl_64->defrouter[i].rtaddr));
2410                         }
2411                         drl_64->defrouter[i].flags = dr->flags;
2412                         drl_64->defrouter[i].rtlifetime = dr->rtlifetime;
2413                         drl_64->defrouter[i].expire = nddr_getexpire(dr);
2414                         drl_64->defrouter[i].if_index = dr->ifp->if_index;
2415                         i++;
2416                         dr = TAILQ_NEXT(dr, dr_entry);
2417                 }
2418                 bcopy(drl_64, data, sizeof (*drl_64));
2419                 _FREE(drl_64, M_TEMP);
2420                 return (0);
2421         }
2422
2423         /* For 32-bit process */
2424         drl_32 = _MALLOC(sizeof (*drl_32), M_TEMP, M_WAITOK|M_ZERO);
2425         if (drl_32 == NULL)
2426                 return (ENOMEM);
2427
2428         /* preserve the interface name */
2429         bcopy(data, drl_32, sizeof (drl_32->ifname));
2430
2431         while (dr != NULL && i < DRLSTSIZ) {
2432                 drl_32->defrouter[i].rtaddr = dr->rtaddr;
2433                 if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) {
2434                         /* XXX: need to this hack for KAME stack */
2435                         drl_32->defrouter[i].rtaddr.s6_addr16[1] = 0;
2436                 } else {
2437                         log(LOG_ERR,
2438                             "default router list contains a "
2439                             "non-linklocal address(%s)\n",
2440                             ip6_sprintf(&drl_32->defrouter[i].rtaddr));
2441                 }
2442                 drl_32->defrouter[i].flags = dr->flags;
2443                 drl_32->defrouter[i].rtlifetime = dr->rtlifetime;
2444                 drl_32->defrouter[i].expire = nddr_getexpire(dr);
2445                 drl_32->defrouter[i].if_index = dr->ifp->if_index;
2446                 i++;
2447                 dr = TAILQ_NEXT(dr, dr_entry);
2448         }
2449         bcopy(drl_32, data, sizeof (*drl_32));
2450         _FREE(drl_32, M_TEMP);
2451         return (0);
2452 }
2453
2454 /*
2455  * XXX meaning of fields, especialy "raflags", is very
2456  * differnet between RA prefix list and RR/static prefix list.
2457  * how about separating ioctls into two?
2458  */
2459 static int
2460 nd6_siocgprlst(void *data, int data_is_64)
2461 {
2462         struct in6_prlist_32 *prl_32;
2463         struct nd_prefix *pr;
2464         int i = 0;
2465
2466         lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2467
2468         pr = nd_prefix.lh_first;
2469
2470         /* For 64-bit process */
2471         if (data_is_64) {
2472                 struct in6_prlist_64 *prl_64;
2473
2474                 prl_64 = _MALLOC(sizeof (*prl_64), M_TEMP, M_WAITOK|M_ZERO);
2475                 if (prl_64 == NULL)
2476                         return (ENOMEM);
2477
2478                 /* preserve the interface name */
2479                 bcopy(data, prl_64, sizeof (prl_64->ifname));
2480
2481                 while (pr && i < PRLSTSIZ) {
2482                         struct nd_pfxrouter *pfr;
2483                         int j;
2484
2485                         NDPR_LOCK(pr);
2486                         (void) in6_embedscope(&prl_64->prefix[i].prefix,
2487                             &pr->ndpr_prefix, NULL, NULL, NULL);
2488                         prl_64->prefix[i].raflags = pr->ndpr_raf;
2489                         prl_64->prefix[i].prefixlen = pr->ndpr_plen;
2490                         prl_64->prefix[i].vltime = pr->ndpr_vltime;
2491                         prl_64->prefix[i].pltime = pr->ndpr_pltime;
2492                         prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index;
2493                         prl_64->prefix[i].expire = ndpr_getexpire(pr);
2494
2495                         pfr = pr->ndpr_advrtrs.lh_first;
2496                         j = 0;
2497                         while (pfr) {
2498                                 if (j < DRLSTSIZ) {
2499 #define RTRADDR prl_64->prefix[i].advrtr[j]
2500                                         RTRADDR = pfr->router->rtaddr;
2501                                         if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
2502                                                 /* XXX: hack for KAME */
2503                                                 RTRADDR.s6_addr16[1] = 0;
2504                                         } else {
2505                                                 log(LOG_ERR,
2506                                                     "a router(%s) advertises "
2507                                                     "a prefix with "
2508                                                     "non-link local address\n",
2509                                                     ip6_sprintf(&RTRADDR));
2510                                         }
2511 #undef RTRADDR
2512                                 }
2513                                 j++;
2514                                 pfr = pfr->pfr_next;
2515                         }
2516                         prl_64->prefix[i].advrtrs = j;
2517                         prl_64->prefix[i].origin = PR_ORIG_RA;
2518                         NDPR_UNLOCK(pr);
2519
2520                         i++;
2521                         pr = pr->ndpr_next;
2522                 }
2523                 bcopy(prl_64, data, sizeof (*prl_64));
2524                 _FREE(prl_64, M_TEMP);
2525                 return (0);
2526         }
2527
2528         /* For 32-bit process */
2529         prl_32 = _MALLOC(sizeof (*prl_32), M_TEMP, M_WAITOK|M_ZERO);
2530         if (prl_32 == NULL)
2531                 return (ENOMEM);
2532
2533         /* preserve the interface name */
2534         bcopy(data, prl_32, sizeof (prl_32->ifname));
2535
2536         while (pr && i < PRLSTSIZ) {
2537                 struct nd_pfxrouter *pfr;
2538                 int j;
2539
2540                 NDPR_LOCK(pr);
2541                 (void) in6_embedscope(&prl_32->prefix[i].prefix,
2542                     &pr->ndpr_prefix, NULL, NULL, NULL);
2543                 prl_32->prefix[i].raflags = pr->ndpr_raf;
2544                 prl_32->prefix[i].prefixlen = pr->ndpr_plen;
2545                 prl_32->prefix[i].vltime = pr->ndpr_vltime;
2546                 prl_32->prefix[i].pltime = pr->ndpr_pltime;
2547                 prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index;
2548                 prl_32->prefix[i].expire = ndpr_getexpire(pr);
2549
2550                 pfr = pr->ndpr_advrtrs.lh_first;
2551                 j = 0;
2552                 while (pfr) {
2553                         if (j < DRLSTSIZ) {
2554 #define RTRADDR prl_32->prefix[i].advrtr[j]
2555                                 RTRADDR = pfr->router->rtaddr;
2556                                 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
2557                                         /* XXX: hack for KAME */
2558                                         RTRADDR.s6_addr16[1] = 0;
2559                                 } else {
2560                                         log(LOG_ERR,
2561                                             "a router(%s) advertises "
2562                                             "a prefix with "
2563                                             "non-link local address\n",
2564                                             ip6_sprintf(&RTRADDR));
2565                                 }
2566 #undef RTRADDR
2567                         }
2568                         j++;
2569                         pfr = pfr->pfr_next;
2570                 }
2571                 prl_32->prefix[i].advrtrs = j;
2572                 prl_32->prefix[i].origin = PR_ORIG_RA;
2573                 NDPR_UNLOCK(pr);
2574
2575                 i++;
2576                 pr = pr->ndpr_next;
2577         }
2578         bcopy(prl_32, data, sizeof (*prl_32));
2579         _FREE(prl_32, M_TEMP);
2580         return (0);
2581 }
2582
2583 int
2584 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
2585 {
2586         struct nd_defrouter *dr;
2587         struct nd_prefix *pr;
2588         struct rtentry *rt;
2589         int error = 0;
2590
2591         VERIFY(ifp != NULL);
2592
2593         switch (cmd) {
2594         case SIOCGDRLST_IN6_32:         /* struct in6_drlist_32 */
2595         case SIOCGDRLST_IN6_64:         /* struct in6_drlist_64 */
2596                 /*
2597                  * obsolete API, use sysctl under net.inet6.icmp6
2598                  */
2599                 lck_mtx_lock(nd6_mutex);
2600                 error = nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64);
2601                 lck_mtx_unlock(nd6_mutex);
2602                 break;
2603
2604         case SIOCGPRLST_IN6_32:         /* struct in6_prlist_32 */
2605         case SIOCGPRLST_IN6_64:         /* struct in6_prlist_64 */
2606                 /*
2607                  * obsolete API, use sysctl under net.inet6.icmp6
2608                  */
2609                 lck_mtx_lock(nd6_mutex);
2610                 error = nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64);
2611                 lck_mtx_unlock(nd6_mutex);
2612                 break;
2613
2614         case OSIOCGIFINFO_IN6:          /* struct in6_ondireq */
2615         case SIOCGIFINFO_IN6: {         /* struct in6_ondireq */
2616                 u_int32_t linkmtu;
2617                 struct in6_ondireq *ondi = (struct in6_ondireq *)(void *)data;
2618                 struct nd_ifinfo *ndi;
2619                 /*
2620                  * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq
2621                  * instead of in6_ndireq, so we treat it as such.
2622                  */
2623                 ndi = ND_IFINFO(ifp);
2624                 if ((NULL == ndi) || (FALSE == ndi->initialized)){
2625                         error = EINVAL;
2626                         break;
2627                 }
2628                 lck_mtx_lock(&ndi->lock);
2629                 linkmtu = IN6_LINKMTU(ifp);
2630                 bcopy(&linkmtu, &ondi->ndi.linkmtu, sizeof (linkmtu));
2631                 bcopy(&ndi->maxmtu, &ondi->ndi.maxmtu,
2632                     sizeof (u_int32_t));
2633                 bcopy(&ndi->basereachable, &ondi->ndi.basereachable,
2634                     sizeof (u_int32_t));
2635                 bcopy(&ndi->reachable, &ondi->ndi.reachable,
2636                     sizeof (u_int32_t));
2637                 bcopy(&ndi->retrans, &ondi->ndi.retrans,
2638                     sizeof (u_int32_t));
2639                 bcopy(&ndi->flags, &ondi->ndi.flags,
2640                     sizeof (u_int32_t));
2641                 bcopy(&ndi->recalctm, &ondi->ndi.recalctm,
2642                     sizeof (int));
2643                 ondi->ndi.chlim = ndi->chlim;
2644                 ondi->ndi.receivedra = 0;
2645                 lck_mtx_unlock(&ndi->lock);
2646                 break;
2647         }
2648
2649         case SIOCSIFINFO_FLAGS: {       /* struct in6_ndireq */
2650                 /*
2651                  * XXX BSD has a bunch of checks here to ensure
2652                  * that interface disabled flag is not reset if
2653                  * link local address has failed DAD.
2654                  * Investigate that part.
2655                  */
2656                 struct in6_ndireq *cndi = (struct in6_ndireq *)(void *)data;
2657                 u_int32_t oflags, flags;
2658                 struct nd_ifinfo *ndi = ND_IFINFO(ifp);
2659
2660                 /* XXX: almost all other fields of cndi->ndi is unused */
2661                 if ((NULL == ndi) || !ndi->initialized) {
2662                         error = EINVAL;
2663                         break;
2664                 }
2665
2666                 lck_mtx_lock(&ndi->lock);
2667                 oflags = ndi->flags;
2668                 bcopy(&cndi->ndi.flags, &(ndi->flags), sizeof (flags));
2669                 flags = ndi->flags;
2670                 lck_mtx_unlock(&ndi->lock);
2671
2672                 if (oflags == flags) {
2673                         break;
2674                 }
2675
2676                 error = nd6_setifinfo(ifp, oflags, flags);
2677                 break;
2678         }
2679
2680         case SIOCSNDFLUSH_IN6:          /* struct in6_ifreq */
2681                 /* flush default router list */
2682                 /*
2683                  * xxx sumikawa: should not delete route if default
2684                  * route equals to the top of default router list
2685                  */
2686                 lck_mtx_lock(nd6_mutex);
2687                 defrouter_reset();
2688                 defrouter_select(ifp);
2689                 lck_mtx_unlock(nd6_mutex);
2690                 /* xxx sumikawa: flush prefix list */
2691                 break;
2692
2693         case SIOCSPFXFLUSH_IN6: {       /* struct in6_ifreq */
2694                 /* flush all the prefix advertised by routers */
2695                 struct nd_prefix *next;
2696
2697                 lck_mtx_lock(nd6_mutex);
2698                 for (pr = nd_prefix.lh_first; pr; pr = next) {
2699                         struct in6_ifaddr *ia;
2700
2701                         next = pr->ndpr_next;
2702
2703                         NDPR_LOCK(pr);
2704                         if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) {
2705                                 NDPR_UNLOCK(pr);
2706                                 continue; /* XXX */
2707                         }
2708                         if (ifp != lo_ifp && pr->ndpr_ifp != ifp) {
2709                                 NDPR_UNLOCK(pr);
2710                                 continue;
2711                         }
2712                         /* do we really have to remove addresses as well? */
2713                         NDPR_ADDREF_LOCKED(pr);
2714                         NDPR_UNLOCK(pr);
2715                         lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
2716                         ia = in6_ifaddrs;
2717                         while (ia != NULL) {
2718                                 IFA_LOCK(&ia->ia_ifa);
2719                                 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
2720                                         IFA_UNLOCK(&ia->ia_ifa);
2721                                         ia = ia->ia_next;
2722                                         continue;
2723                                 }
2724
2725                                 if (ia->ia6_ndpr == pr) {
2726                                         IFA_ADDREF_LOCKED(&ia->ia_ifa);
2727                                         IFA_UNLOCK(&ia->ia_ifa);
2728                                         lck_rw_done(&in6_ifaddr_rwlock);
2729                                         lck_mtx_unlock(nd6_mutex);
2730                                         in6_purgeaddr(&ia->ia_ifa);
2731                                         IFA_REMREF(&ia->ia_ifa);
2732                                         lck_mtx_lock(nd6_mutex);
2733                                         lck_rw_lock_exclusive(
2734                                             &in6_ifaddr_rwlock);
2735                                         /*
2736                                          * Purging the address caused
2737                                          * in6_ifaddr_rwlock to be
2738                                          * dropped and
2739                                          * reacquired; therefore search again
2740                                          * from the beginning of in6_ifaddrs.
2741                                          * The same applies for the prefix list.
2742                                          */
2743                                         ia = in6_ifaddrs;
2744                                         next = nd_prefix.lh_first;
2745                                         continue;
2746
2747                                 }
2748                                 IFA_UNLOCK(&ia->ia_ifa);
2749                                 ia = ia->ia_next;
2750                         }
2751                         lck_rw_done(&in6_ifaddr_rwlock);
2752                         NDPR_LOCK(pr);
2753                         prelist_remove(pr);
2754                         NDPR_UNLOCK(pr);
2755                         pfxlist_onlink_check();
2756                         /*
2757                          * If we were trying to restart this loop
2758                          * above by changing the value of 'next', we might
2759                          * end up freeing the only element on the list
2760                          * when we call NDPR_REMREF().
2761                          * When this happens, we also have get out of this
2762                          * loop because we have nothing else to do.
2763                          */
2764                         if (pr == next)
2765                                 next = NULL;
2766                         NDPR_REMREF(pr);
2767                 }
2768                 lck_mtx_unlock(nd6_mutex);
2769                 break;
2770         }
2771
2772         case SIOCSRTRFLUSH_IN6: {       /* struct in6_ifreq */
2773                 /* flush all the default routers */
2774                 struct nd_defrouter *next;
2775
2776                 lck_mtx_lock(nd6_mutex);
2777                 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
2778                         /*
2779                          * The first entry of the list may be stored in
2780                          * the routing table, so we'll delete it later.
2781                          */
2782                         for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
2783                                 next = TAILQ_NEXT(dr, dr_entry);
2784                                 if (ifp == lo_ifp || dr->ifp == ifp)
2785                                         defrtrlist_del(dr);
2786                         }
2787                         if (ifp == lo_ifp ||
2788                             TAILQ_FIRST(&nd_defrouter)->ifp == ifp)
2789                                 defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
2790                 }
2791                 lck_mtx_unlock(nd6_mutex);
2792                 break;
2793         }
2794
2795         case SIOCGNBRINFO_IN6_32: {     /* struct in6_nbrinfo_32 */
2796                 struct llinfo_nd6 *ln;
2797                 struct in6_nbrinfo_32 nbi_32;
2798                 struct in6_addr nb_addr; /* make local for safety */
2799
2800                 bcopy(data, &nbi_32, sizeof (nbi_32));
2801                 nb_addr = nbi_32.addr;
2802                 /*
2803                  * XXX: KAME specific hack for scoped addresses
2804                  *      XXXX: for other scopes than link-local?
2805                  */
2806                 if (IN6_IS_ADDR_LINKLOCAL(&nbi_32.addr) ||
2807                     IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32.addr)) {
2808                         u_int16_t *idp =
2809                             (u_int16_t *)(void *)&nb_addr.s6_addr[2];
2810
2811                         if (*idp == 0)
2812                                 *idp = htons(ifp->if_index);
2813                 }
2814
2815                 /* Callee returns a locked route upon success */
2816                 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
2817                         error = EINVAL;
2818                         break;
2819                 }
2820                 RT_LOCK_ASSERT_HELD(rt);
2821                 ln = rt->rt_llinfo;
2822                 nbi_32.state = ln->ln_state;
2823                 nbi_32.asked = ln->ln_asked;
2824                 nbi_32.isrouter = ln->ln_router;
2825                 nbi_32.expire = ln_getexpire(ln);
2826                 RT_REMREF_LOCKED(rt);
2827                 RT_UNLOCK(rt);
2828                 bcopy(&nbi_32, data, sizeof (nbi_32));
2829                 break;
2830         }
2831
2832         case SIOCGNBRINFO_IN6_64: {     /* struct in6_nbrinfo_64 */
2833                 struct llinfo_nd6 *ln;
2834                 struct in6_nbrinfo_64 nbi_64;
2835                 struct in6_addr nb_addr; /* make local for safety */
2836
2837                 bcopy(data, &nbi_64, sizeof (nbi_64));
2838                 nb_addr = nbi_64.addr;
2839                 /*
2840                  * XXX: KAME specific hack for scoped addresses
2841                  *      XXXX: for other scopes than link-local?
2842                  */
2843                 if (IN6_IS_ADDR_LINKLOCAL(&nbi_64.addr) ||
2844                     IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64.addr)) {
2845                         u_int16_t *idp =
2846                             (u_int16_t *)(void *)&nb_addr.s6_addr[2];
2847
2848                         if (*idp == 0)
2849                                 *idp = htons(ifp->if_index);
2850                 }
2851
2852                 /* Callee returns a locked route upon success */
2853                 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
2854                         error = EINVAL;
2855                         break;
2856                 }
2857                 RT_LOCK_ASSERT_HELD(rt);
2858                 ln = rt->rt_llinfo;
2859                 nbi_64.state = ln->ln_state;
2860                 nbi_64.asked = ln->ln_asked;
2861                 nbi_64.isrouter = ln->ln_router;
2862                 nbi_64.expire = ln_getexpire(ln);
2863                 RT_REMREF_LOCKED(rt);
2864                 RT_UNLOCK(rt);
2865                 bcopy(&nbi_64, data, sizeof (nbi_64));
2866                 break;
2867         }
2868
2869         case SIOCGDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
2870         case SIOCGDEFIFACE_IN6_64: {    /* struct in6_ndifreq_64 */
2871                 struct in6_ndifreq_64 *ndif_64 =
2872                     (struct in6_ndifreq_64 *)(void *)data;
2873                 struct in6_ndifreq_32 *ndif_32 =
2874                     (struct in6_ndifreq_32 *)(void *)data;
2875
2876                 if (cmd == SIOCGDEFIFACE_IN6_64) {
2877                         u_int64_t j = nd6_defifindex;
2878                         bcopy(&j, &ndif_64->ifindex, sizeof (j));
2879                 } else {
2880                         bcopy(&nd6_defifindex, &ndif_32->ifindex,
2881                             sizeof (u_int32_t));
2882                 }
2883                 break;
2884         }
2885
2886         case SIOCSDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
2887         case SIOCSDEFIFACE_IN6_64: {    /* struct in6_ndifreq_64 */
2888                 struct in6_ndifreq_64 *ndif_64 =
2889                     (struct in6_ndifreq_64 *)(void *)data;
2890                 struct in6_ndifreq_32 *ndif_32 =
2891                     (struct in6_ndifreq_32 *)(void *)data;
2892                 u_int32_t idx;
2893
2894                 if (cmd == SIOCSDEFIFACE_IN6_64) {
2895                         u_int64_t j;
2896                         bcopy(&ndif_64->ifindex, &j, sizeof (j));
2897                         idx = (u_int32_t)j;
2898                 } else {
2899                         bcopy(&ndif_32->ifindex, &idx, sizeof (idx));
2900                 }
2901
2902                 error = nd6_setdefaultiface(idx);
2903                 return (error);
2904                 /* NOTREACHED */
2905         }
2906         }
2907         return (error);
2908 }
2909
2910 /*
2911  * Create neighbor cache entry and cache link-layer address,
2912  * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
2913  */
2914 void
2915 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
2916     int lladdrlen, int type, int code)
2917 {
2918 #pragma unused(lladdrlen)
2919         struct rtentry *rt = NULL;
2920         struct llinfo_nd6 *ln = NULL;
2921         int is_newentry;
2922         struct sockaddr_dl *sdl = NULL;
2923         int do_update;
2924         int olladdr;
2925         int llchange;
2926         int newstate = 0;
2927         uint64_t timenow;
2928         boolean_t sched_timeout = FALSE;
2929
2930         if (ifp == NULL)
2931                 panic("ifp == NULL in nd6_cache_lladdr");
2932         if (from == NULL)
2933                 panic("from == NULL in nd6_cache_lladdr");
2934
2935         /* nothing must be updated for unspecified address */
2936         if (IN6_IS_ADDR_UNSPECIFIED(from))
2937                 return;
2938
2939         /*
2940          * Validation about ifp->if_addrlen and lladdrlen must be done in
2941          * the caller.
2942          */
2943         timenow = net_uptime();
2944
2945         rt = nd6_lookup(from, 0, ifp, 0);
2946         if (rt == NULL) {
2947                 if ((rt = nd6_lookup(from, 1, ifp, 0)) == NULL)
2948                         return;
2949                 RT_LOCK_ASSERT_HELD(rt);
2950                 is_newentry = 1;
2951         } else {
2952                 RT_LOCK_ASSERT_HELD(rt);
2953                 /* do nothing if static ndp is set */
2954                 if (rt->rt_flags & RTF_STATIC) {
2955                         RT_REMREF_LOCKED(rt);
2956                         RT_UNLOCK(rt);
2957                         return;
2958                 }
2959                 is_newentry = 0;
2960         }
2961
2962         if (rt == NULL)
2963                 return;
2964         if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
2965 fail:
2966                 RT_UNLOCK(rt);
2967                 nd6_free(rt);
2968                 rtfree(rt);
2969                 return;
2970         }
2971         ln = (struct llinfo_nd6 *)rt->rt_llinfo;
2972         if (ln == NULL)
2973                 goto fail;
2974         if (rt->rt_gateway == NULL)
2975                 goto fail;
2976         if (rt->rt_gateway->sa_family != AF_LINK)
2977                 goto fail;
2978         sdl = SDL(rt->rt_gateway);
2979
2980         olladdr = (sdl->sdl_alen) ? 1 : 0;
2981         if (olladdr && lladdr) {
2982                 if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
2983                         llchange = 1;
2984                 else
2985                         llchange = 0;
2986         } else
2987                 llchange = 0;
2988
2989         /*
2990          * newentry olladdr  lladdr  llchange   (*=record)
2991          *      0       n       n       --      (1)
2992          *      0       y       n       --      (2)
2993          *      0       n       y       --      (3) * STALE
2994          *      0       y       y       n       (4) *
2995          *      0       y       y       y       (5) * STALE
2996          *      1       --      n       --      (6)   NOSTATE(= PASSIVE)
2997          *      1       --      y       --      (7) * STALE
2998          */
2999
3000         if (lladdr) {           /* (3-5) and (7) */
3001                 /*
3002                  * Record source link-layer address
3003                  * XXX is it dependent to ifp->if_type?
3004                  */
3005                 sdl->sdl_alen = ifp->if_addrlen;
3006                 bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
3007
3008                 /* cache the gateway (sender HW) address */
3009                 nd6_llreach_alloc(rt, ifp, LLADDR(sdl), sdl->sdl_alen, FALSE);
3010         }
3011
3012         if (!is_newentry) {
3013                 if ((!olladdr && lladdr != NULL) ||     /* (3) */
3014                     (olladdr && lladdr != NULL && llchange)) {  /* (5) */
3015                         do_update = 1;
3016                         newstate = ND6_LLINFO_STALE;
3017                 } else                                  /* (1-2,4) */
3018                         do_update = 0;
3019         } else {
3020                 do_update = 1;
3021                 if (lladdr == NULL)                     /* (6) */
3022                         newstate = ND6_LLINFO_NOSTATE;
3023                 else                                    /* (7) */
3024                         newstate = ND6_LLINFO_STALE;
3025         }
3026
3027         if (do_update) {
3028                 /*
3029                  * Update the state of the neighbor cache.
3030                  */
3031                 ln->ln_state = newstate;
3032
3033                 if (ln->ln_state == ND6_LLINFO_STALE) {
3034                         struct mbuf *m = ln->ln_hold;
3035                         /*
3036                          * XXX: since nd6_output() below will cause
3037                          * state tansition to DELAY and reset the timer,
3038                          * we must set the timer now, although it is actually
3039                          * meaningless.
3040                          */
3041                         ln_setexpire(ln, timenow + nd6_gctimer);
3042                         ln->ln_hold = NULL;
3043
3044                         if (m != NULL) {
3045                                 struct sockaddr_in6 sin6;
3046
3047                                 rtkey_to_sa6(rt, &sin6);
3048                                 /*
3049                                  * we assume ifp is not a p2p here, so just
3050                                  * set the 2nd argument as the 1st one.
3051                                  */
3052                                 RT_UNLOCK(rt);
3053                                 nd6_output_list(ifp, ifp, m, &sin6, rt, NULL);
3054                                 RT_LOCK(rt);
3055                         }
3056                 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
3057                         /* probe right away */
3058                         ln_setexpire(ln, timenow);
3059                         sched_timeout = TRUE;
3060                 }
3061         }
3062
3063         /*
3064          * ICMP6 type dependent behavior.
3065          *
3066          * NS: clear IsRouter if new entry
3067          * RS: clear IsRouter
3068          * RA: set IsRouter if there's lladdr
3069          * redir: clear IsRouter if new entry
3070          *
3071          * RA case, (1):
3072          * The spec says that we must set IsRouter in the following cases:
3073          * - If lladdr exist, set IsRouter.  This means (1-5).
3074          * - If it is old entry (!newentry), set IsRouter.  This means (7).
3075          * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
3076          * A quetion arises for (1) case.  (1) case has no lladdr in the
3077          * neighbor cache, this is similar to (6).
3078          * This case is rare but we figured that we MUST NOT set IsRouter.
3079          *
3080          * newentry olladdr  lladdr  llchange       NS  RS      RA      redir
3081          *                                                              D R
3082          *      0       n       n       --      (1)     c       ?       s
3083          *      0       y       n       --      (2)     c       s       s
3084          *      0       n       y       --      (3)     c       s       s
3085          *      0       y       y       n       (4)     c       s       s
3086          *      0       y       y       y       (5)     c       s       s
3087          *      1       --      n       --      (6) c   c               c s
3088          *      1       --      y       --      (7) c   c       s       c s
3089          *
3090          *                                      (c=clear s=set)
3091          */
3092         switch (type & 0xff) {
3093         case ND_NEIGHBOR_SOLICIT:
3094                 /*
3095                  * New entry must have is_router flag cleared.
3096                  */
3097                 if (is_newentry)        /* (6-7) */
3098                         ln->ln_router = 0;
3099                 break;
3100         case ND_REDIRECT:
3101                 /*
3102                  * If the ICMP message is a Redirect to a better router, always
3103                  * set the is_router flag.  Otherwise, if the entry is newly
3104                  * created, then clear the flag.  [RFC 4861, sec 8.3]
3105                  */
3106                 if (code == ND_REDIRECT_ROUTER)
3107                         ln->ln_router = 1;
3108                 else if (is_newentry) /* (6-7) */
3109                         ln->ln_router = 0;
3110                 break;
3111         case ND_ROUTER_SOLICIT:
3112                 /*
3113                  * is_router flag must always be cleared.
3114                  */
3115                 ln->ln_router = 0;
3116                 break;
3117         case ND_ROUTER_ADVERT:
3118                 /*
3119                  * Mark an entry with lladdr as a router.
3120                  */
3121                 if ((!is_newentry && (olladdr || lladdr)) ||    /* (2-5) */
3122                     (is_newentry && lladdr)) {                  /* (7) */
3123                         ln->ln_router = 1;
3124                 }
3125                 break;
3126         }
3127
3128         /*
3129          * When the link-layer address of a router changes, select the
3130          * best router again.  In particular, when the neighbor entry is newly
3131          * created, it might affect the selection policy.
3132          * Question: can we restrict the first condition to the "is_newentry"
3133          * case?
3134          *
3135          * Note: Perform default router selection even when we are a router,
3136          * if Scoped Routing is enabled.
3137          */
3138         if (do_update && ln->ln_router &&
3139             (ip6_doscopedroute || !ip6_forwarding)) {
3140                 RT_REMREF_LOCKED(rt);
3141                 RT_UNLOCK(rt);
3142                 lck_mtx_lock(nd6_mutex);
3143                 defrouter_select(ifp);
3144                 lck_mtx_unlock(nd6_mutex);
3145         } else {
3146                 RT_REMREF_LOCKED(rt);
3147                 RT_UNLOCK(rt);
3148         }
3149         if (sched_timeout) {
3150                 lck_mtx_lock(rnh_lock);
3151                 nd6_sched_timeout(NULL, NULL);
3152                 lck_mtx_unlock(rnh_lock);
3153         }
3154 }
3155
3156 static void
3157 nd6_slowtimo(void *arg)
3158 {
3159 #pragma unused(arg)
3160         struct nd_ifinfo *nd6if = NULL;
3161         struct ifnet *ifp = NULL;
3162
3163         ifnet_head_lock_shared();
3164         for (ifp = ifnet_head.tqh_first; ifp;
3165             ifp = ifp->if_link.tqe_next) {
3166                 nd6if = ND_IFINFO(ifp);
3167                 if ((NULL == nd6if) || (FALSE == nd6if->initialized)) {
3168                         continue;
3169                 }
3170
3171                 lck_mtx_lock(&nd6if->lock);
3172                 if (nd6if->basereachable && /* already initialized */
3173                     (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
3174                         /*
3175                          * Since reachable time rarely changes by router
3176                          * advertisements, we SHOULD insure that a new random
3177                          * value gets recomputed at least once every few hours.
3178                          * (RFC 4861, 6.3.4)
3179                          */
3180                         nd6if->recalctm = nd6_recalc_reachtm_interval;
3181                         nd6if->reachable =
3182                             ND_COMPUTE_RTIME(nd6if->basereachable);
3183                 }
3184                 lck_mtx_unlock(&nd6if->lock);
3185         }
3186         ifnet_head_done();
3187         timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
3188 }
3189
3190 int
3191 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
3192     struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv)
3193 {
3194         return nd6_output_list(ifp, origifp, m0, dst, hint0, adv);
3195 }
3196
3197 /*
3198  * nd6_output_list()
3199  *
3200  * Assumption: route determination for first packet can be correctly applied to
3201  * all packets in the chain.
3202  */
3203 #define senderr(e) { error = (e); goto bad; }
3204 int
3205 nd6_output_list(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
3206     struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv)
3207 {
3208         struct rtentry *rt = hint0, *hint = hint0;
3209         struct llinfo_nd6 *ln = NULL;
3210         int error = 0;
3211         uint64_t timenow;
3212         struct rtentry *rtrele = NULL;
3213         struct nd_ifinfo *ndi = NULL;
3214
3215         if (rt != NULL) {
3216                 RT_LOCK_SPIN(rt);
3217                 RT_ADDREF_LOCKED(rt);
3218         }
3219
3220         if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr) || !nd6_need_cache(ifp)) {
3221                 if (rt != NULL)
3222                         RT_UNLOCK(rt);
3223                 goto sendpkt;
3224         }
3225
3226         /*
3227          * Next hop determination.  Because we may involve the gateway route
3228          * in addition to the original route, locking is rather complicated.
3229          * The general concept is that regardless of whether the route points
3230          * to the original route or to the gateway route, this routine takes
3231          * an extra reference on such a route.  This extra reference will be
3232          * released at the end.
3233          *
3234          * Care must be taken to ensure that the "hint0" route never gets freed
3235          * via rtfree(), since the caller may have stored it inside a struct
3236          * route with a reference held for that placeholder.
3237          *
3238          * This logic is similar to, though not exactly the same as the one
3239          * used by route_to_gwroute().
3240          */
3241         if (rt != NULL) {
3242                 /*
3243                  * We have a reference to "rt" by now (or below via rtalloc1),
3244                  * which will either be released or freed at the end of this
3245                  * routine.
3246                  */
3247                 RT_LOCK_ASSERT_HELD(rt);
3248                 if (!(rt->rt_flags & RTF_UP)) {
3249                         RT_REMREF_LOCKED(rt);
3250                         RT_UNLOCK(rt);
3251                         if ((hint = rt = rtalloc1_scoped(SA(dst), 1, 0,
3252                             ifp->if_index)) != NULL) {
3253                                 RT_LOCK_SPIN(rt);
3254                                 if (rt->rt_ifp != ifp) {
3255                                         /* XXX: loop care? */
3256                                         RT_UNLOCK(rt);
3257                                         error = nd6_output_list(ifp, origifp, m0,
3258                                             dst, rt, adv);
3259                                         rtfree(rt);
3260                                         return (error);
3261                                 }
3262                         } else {
3263                                 senderr(EHOSTUNREACH);
3264                         }
3265                 }
3266
3267                 if (rt->rt_flags & RTF_GATEWAY) {
3268                         struct rtentry *gwrt;
3269                         struct in6_ifaddr *ia6 = NULL;
3270                         struct sockaddr_in6 gw6;
3271
3272                         rtgw_to_sa6(rt, &gw6);
3273                         /*
3274                          * Must drop rt_lock since nd6_is_addr_neighbor()
3275                          * calls nd6_lookup() and acquires rnh_lock.
3276                          */
3277                         RT_UNLOCK(rt);
3278
3279                         /*
3280                          * We skip link-layer address resolution and NUD
3281                          * if the gateway is not a neighbor from ND point
3282                          * of view, regardless of the value of nd_ifinfo.flags.
3283                          * The second condition is a bit tricky; we skip
3284                          * if the gateway is our own address, which is
3285                          * sometimes used to install a route to a p2p link.
3286                          */
3287                         if (!nd6_is_addr_neighbor(&gw6, ifp, 0) ||
3288                             (ia6 = in6ifa_ifpwithaddr(ifp, &gw6.sin6_addr))) {
3289                                 /*
3290                                  * We allow this kind of tricky route only
3291                                  * when the outgoing interface is p2p.
3292                                  * XXX: we may need a more generic rule here.
3293                                  */
3294                                 if (ia6 != NULL)
3295                                         IFA_REMREF(&ia6->ia_ifa);
3296                                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
3297                                         senderr(EHOSTUNREACH);
3298                                 goto sendpkt;
3299                         }
3300
3301                         RT_LOCK_SPIN(rt);
3302                         gw6 = *(SIN6(rt->rt_gateway));
3303
3304                         /* If hint is now down, give up */
3305                         if (!(rt->rt_flags & RTF_UP)) {
3306                                 RT_UNLOCK(rt);
3307                                 senderr(EHOSTUNREACH);
3308                         }
3309
3310                         /* If there's no gateway route, look it up */
3311                         if ((gwrt = rt->rt_gwroute) == NULL) {
3312                                 RT_UNLOCK(rt);
3313                                 goto lookup;
3314                         }
3315                         /* Become a regular mutex */
3316                         RT_CONVERT_LOCK(rt);
3317
3318                         /*
3319                          * Take gwrt's lock while holding route's lock;
3320                          * this is okay since gwrt never points back
3321                          * to rt, so no lock ordering issues.
3322                          */
3323                         RT_LOCK_SPIN(gwrt);
3324                         if (!(gwrt->rt_flags & RTF_UP)) {
3325                                 rt->rt_gwroute = NULL;
3326                                 RT_UNLOCK(gwrt);
3327                                 RT_UNLOCK(rt);
3328                                 rtfree(gwrt);
3329 lookup:
3330                                 lck_mtx_lock(rnh_lock);
3331                                 gwrt = rtalloc1_scoped_locked(SA(&gw6), 1, 0,
3332                                     ifp->if_index);
3333
3334                                 RT_LOCK(rt);
3335                                 /*
3336                                  * Bail out if the route is down, no route
3337                                  * to gateway, circular route, or if the
3338                                  * gateway portion of "rt" has changed.
3339                                  */
3340                                 if (!(rt->rt_flags & RTF_UP) ||
3341                                     gwrt == NULL || gwrt == rt ||
3342                                     !equal(SA(&gw6), rt->rt_gateway)) {
3343                                         if (gwrt == rt) {
3344                                                 RT_REMREF_LOCKED(gwrt);
3345                                                 gwrt = NULL;
3346                                         }
3347                                         RT_UNLOCK(rt);
3348                                         if (gwrt != NULL)
3349                                                 rtfree_locked(gwrt);
3350                                         lck_mtx_unlock(rnh_lock);
3351                                         senderr(EHOSTUNREACH);
3352                                 }
3353                                 VERIFY(gwrt != NULL);
3354                                 /*
3355                                  * Set gateway route; callee adds ref to gwrt;
3356                                  * gwrt has an extra ref from rtalloc1() for
3357                                  * this routine.
3358                                  */
3359                                 rt_set_gwroute(rt, rt_key(rt), gwrt);
3360                                 RT_UNLOCK(rt);
3361                                 lck_mtx_unlock(rnh_lock);
3362                                 /* Remember to release/free "rt" at the end */
3363                                 rtrele = rt;
3364                                 rt = gwrt;
3365                         } else {
3366                                 RT_ADDREF_LOCKED(gwrt);
3367                                 RT_UNLOCK(gwrt);
3368                                 RT_UNLOCK(rt);
3369                                 /* Remember to release/free "rt" at the end */
3370                                 rtrele = rt;
3371                                 rt = gwrt;
3372                         }
3373                         VERIFY(rt == gwrt);
3374
3375                         /*
3376                          * This is an opportunity to revalidate the parent
3377                          * route's gwroute, in case it now points to a dead
3378                          * route entry.  Parent route won't go away since the
3379                          * clone (hint) holds a reference to it.  rt == gwrt.
3380                          */
3381                         RT_LOCK_SPIN(hint);
3382                         if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) ==
3383                             (RTF_WASCLONED | RTF_UP)) {
3384                                 struct rtentry *prt = hint->rt_parent;
3385                                 VERIFY(prt != NULL);
3386
3387                                 RT_CONVERT_LOCK(hint);
3388                                 RT_ADDREF(prt);
3389                                 RT_UNLOCK(hint);
3390                                 rt_revalidate_gwroute(prt, rt);
3391                                 RT_REMREF(prt);
3392                         } else {
3393                                 RT_UNLOCK(hint);
3394                         }
3395
3396                         RT_LOCK_SPIN(rt);
3397                         /* rt == gwrt; if it is now down, give up */
3398                         if (!(rt->rt_flags & RTF_UP)) {
3399                                 RT_UNLOCK(rt);
3400                                 rtfree(rt);
3401                                 rt = NULL;
3402                                 /* "rtrele" == original "rt" */
3403                                 senderr(EHOSTUNREACH);
3404                         }
3405                 }
3406
3407                 /* Become a regular mutex */
3408                 RT_CONVERT_LOCK(rt);
3409         }
3410
3411         /*
3412          * Address resolution or Neighbor Unreachability Detection
3413          * for the next hop.
3414          * At this point, the destination of the packet must be a unicast
3415          * or an anycast address(i.e. not a multicast).
3416          */
3417
3418         /* Look up the neighbor cache for the nexthop */
3419         if (rt && (rt->rt_flags & RTF_LLINFO) != 0) {
3420                 ln = rt->rt_llinfo;
3421         } else {
3422                 struct sockaddr_in6 sin6;
3423                 /*
3424                  * Clear out Scope ID field in case it is set.
3425                  */
3426                 sin6 = *dst;
3427                 sin6.sin6_scope_id = 0;
3428                 /*
3429                  * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
3430                  * the condition below is not very efficient.  But we believe
3431                  * it is tolerable, because this should be a rare case.
3432                  * Must drop rt_lock since nd6_is_addr_neighbor() calls
3433                  * nd6_lookup() and acquires rnh_lock.
3434                  */
3435                 if (rt != NULL)
3436                         RT_UNLOCK(rt);
3437                 if (nd6_is_addr_neighbor(&sin6, ifp, 0)) {
3438                         /* "rtrele" may have been used, so clean up "rt" now */
3439                         if (rt != NULL) {
3440                                 /* Don't free "hint0" */
3441                                 if (rt == hint0)
3442                                         RT_REMREF(rt);
3443                                 else
3444                                         rtfree(rt);
3445                         }
3446                         /* Callee returns a locked route upon success */
3447                         rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 0);
3448                         if (rt != NULL) {
3449                                 RT_LOCK_ASSERT_HELD(rt);
3450                                 ln = rt->rt_llinfo;
3451                         }
3452                 } else if (rt != NULL) {
3453                         RT_LOCK(rt);
3454                 }
3455         }
3456
3457         if (!ln || !rt) {
3458                 if (rt != NULL) {
3459                         RT_UNLOCK(rt);
3460                 }
3461                 ndi = ND_IFINFO(ifp);
3462                 VERIFY(ndi != NULL && ndi->initialized);
3463                 lck_mtx_lock(&ndi->lock);
3464                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
3465                     !(ndi->flags & ND6_IFF_PERFORMNUD)) {
3466                         lck_mtx_unlock(&ndi->lock);
3467                         log(LOG_DEBUG,
3468                             "nd6_output: can't allocate llinfo for %s "
3469                             "(ln=0x%llx, rt=0x%llx)\n",
3470                             ip6_sprintf(&dst->sin6_addr),
3471                             (uint64_t)VM_KERNEL_ADDRPERM(ln),
3472                             (uint64_t)VM_KERNEL_ADDRPERM(rt));
3473                         senderr(EIO);   /* XXX: good error? */
3474                 }
3475                 lck_mtx_unlock(&ndi->lock);
3476
3477                 goto sendpkt;   /* send anyway */
3478         }
3479
3480         net_update_uptime();
3481         timenow = net_uptime();
3482
3483         /* We don't have to do link-layer address resolution on a p2p link. */
3484         if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
3485             ln->ln_state < ND6_LLINFO_REACHABLE) {
3486                 ln->ln_state = ND6_LLINFO_STALE;
3487                 ln_setexpire(ln, timenow + nd6_gctimer);
3488         }
3489
3490         /*
3491          * The first time we send a packet to a neighbor whose entry is
3492          * STALE, we have to change the state to DELAY and a sets a timer to
3493          * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
3494          * neighbor unreachability detection on expiration.
3495          * (RFC 4861 7.3.3)
3496          */
3497         if (ln->ln_state == ND6_LLINFO_STALE) {
3498                 ln->ln_asked = 0;
3499                 ln->ln_state = ND6_LLINFO_DELAY;
3500                 ln_setexpire(ln, timenow + nd6_delay);
3501                 /* N.B.: we will re-arm the timer below. */
3502                 _CASSERT(ND6_LLINFO_DELAY > ND6_LLINFO_INCOMPLETE);
3503         }
3504
3505         /*
3506          * If the neighbor cache entry has a state other than INCOMPLETE
3507          * (i.e. its link-layer address is already resolved), just
3508          * send the packet.
3509          */
3510         if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
3511                 RT_UNLOCK(rt);
3512                 /*
3513                  * Move this entry to the head of the queue so that it is
3514                  * less likely for this entry to be a target of forced
3515                  * garbage collection (see nd6_rtrequest()).  Do this only
3516                  * if the entry is non-permanent (as permanent ones will
3517                  * never be purged), and if the number of active entries
3518                  * is at least half of the threshold.
3519                  */
3520                 if (ln->ln_state == ND6_LLINFO_DELAY ||
3521                     (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3522                     nd6_inuse >= (ip6_neighborgcthresh >> 1))) {
3523                         lck_mtx_lock(rnh_lock);
3524                         if (ln->ln_state == ND6_LLINFO_DELAY)
3525                                 nd6_sched_timeout(NULL, NULL);
3526                         if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3527                             nd6_inuse >= (ip6_neighborgcthresh >> 1)) {
3528                                 RT_LOCK_SPIN(rt);
3529                                 if (ln->ln_flags & ND6_LNF_IN_USE) {
3530                                         LN_DEQUEUE(ln);
3531                                         LN_INSERTHEAD(ln);
3532                                 }
3533                                 RT_UNLOCK(rt);
3534                         }
3535                         lck_mtx_unlock(rnh_lock);
3536                 }
3537                 goto sendpkt;
3538         }
3539
3540         /*
3541          * If this is a prefix proxy route, record the inbound interface
3542          * so that it can be excluded from the list of interfaces eligible
3543          * for forwarding the proxied NS in nd6_prproxy_ns_output().
3544          */
3545         if (rt->rt_flags & RTF_PROXY)
3546                 ln->ln_exclifp = ((origifp == ifp) ? NULL : origifp);
3547
3548         /*
3549          * There is a neighbor cache entry, but no ethernet address
3550          * response yet.  Replace the held mbuf (if any) with this
3551          * latest one.
3552          *
3553          * This code conforms to the rate-limiting rule described in Section
3554          * 7.2.2 of RFC 4861, because the timer is set correctly after sending
3555          * an NS below.
3556          */
3557         if (ln->ln_state == ND6_LLINFO_NOSTATE)
3558                 ln->ln_state = ND6_LLINFO_INCOMPLETE;
3559         if (ln->ln_hold)
3560                 m_freem_list(ln->ln_hold);
3561         ln->ln_hold = m0;
3562         if (ln->ln_expire != 0 && ln->ln_asked < nd6_mmaxtries &&
3563             ln->ln_expire <= timenow) {
3564                 ln->ln_asked++;
3565                 ndi = ND_IFINFO(ifp);
3566                 VERIFY(ndi != NULL && ndi->initialized);
3567                 lck_mtx_lock(&ndi->lock);
3568                 ln_setexpire(ln, timenow + ndi->retrans / 1000);
3569                 lck_mtx_unlock(&ndi->lock);
3570                 RT_UNLOCK(rt);
3571                 /* We still have a reference on rt (for ln) */
3572                 if (ip6_forwarding)
3573                         nd6_prproxy_ns_output(ifp, origifp, NULL,
3574                             &dst->sin6_addr, ln);
3575                 else
3576                         nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
3577                 lck_mtx_lock(rnh_lock);
3578                 nd6_sched_timeout(NULL, NULL);
3579                 lck_mtx_unlock(rnh_lock);
3580         } else {
3581                 if(ln->ln_state == ND6_LLINFO_INCOMPLETE) {
3582                         ln->ln_expire = timenow;
3583                 }
3584                 RT_UNLOCK(rt);
3585         }
3586         /*
3587          * Move this entry to the head of the queue so that it is
3588          * less likely for this entry to be a target of forced
3589          * garbage collection (see nd6_rtrequest()).  Do this only
3590          * if the entry is non-permanent (as permanent ones will
3591          * never be purged), and if the number of active entries
3592          * is at least half of the threshold.
3593          */
3594         if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3595             nd6_inuse >= (ip6_neighborgcthresh >> 1)) {
3596                 lck_mtx_lock(rnh_lock);
3597                 RT_LOCK_SPIN(rt);
3598                 if (ln->ln_flags & ND6_LNF_IN_USE) {
3599                         LN_DEQUEUE(ln);
3600                         LN_INSERTHEAD(ln);
3601                 }
3602                 /* Clean up "rt" now while we can */
3603                 if (rt == hint0) {
3604                         RT_REMREF_LOCKED(rt);
3605                         RT_UNLOCK(rt);
3606                 } else {
3607                         RT_UNLOCK(rt);
3608                         rtfree_locked(rt);
3609                 }
3610                 rt = NULL;      /* "rt" has been taken care of */
3611                 lck_mtx_unlock(rnh_lock);
3612         }
3613         error = 0;
3614         goto release;
3615
3616 sendpkt:
3617         if (rt != NULL)
3618                 RT_LOCK_ASSERT_NOTHELD(rt);
3619
3620         /* discard the packet if IPv6 operation is disabled on the interface */
3621         if (ifp->if_eflags & IFEF_IPV6_DISABLED) {
3622                 error = ENETDOWN; /* better error? */
3623                 goto bad;
3624         }
3625
3626         if (ifp->if_flags & IFF_LOOPBACK) {
3627                 /* forwarding rules require the original scope_id */
3628                 m0->m_pkthdr.rcvif = origifp;
3629                 error = dlil_output(origifp, PF_INET6, m0, (caddr_t)rt,
3630                     SA(dst), 0, adv);
3631                 goto release;
3632         } else {
3633                 /* Do not allow loopback address to wind up on a wire */
3634                 struct ip6_hdr *ip6 = mtod(m0, struct ip6_hdr *);
3635
3636                 if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
3637                     IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) {
3638                         ip6stat.ip6s_badscope++;
3639                         error = EADDRNOTAVAIL;
3640                         goto bad;
3641                 }
3642         }
3643
3644         if (rt != NULL) {
3645                 RT_LOCK_SPIN(rt);
3646                 /* Mark use timestamp */
3647                 if (rt->rt_llinfo != NULL)
3648                         nd6_llreach_use(rt->rt_llinfo);
3649                 RT_UNLOCK(rt);
3650         }
3651
3652         struct mbuf *mcur = m0;
3653         uint32_t pktcnt = 0;
3654
3655         while (mcur) {
3656                 if (hint != NULL && nstat_collect) {
3657                         int scnt;
3658
3659                         if ((mcur->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
3660                                         (mcur->m_pkthdr.tso_segsz > 0))
3661                                 scnt = mcur->m_pkthdr.len / mcur->m_pkthdr.tso_segsz;
3662                         else
3663                                 scnt = 1;
3664
3665                         nstat_route_tx(hint, scnt, mcur->m_pkthdr.len, 0);
3666                 }
3667                 pktcnt++;
3668
3669                 mcur->m_pkthdr.rcvif = NULL;
3670                 mcur = mcur->m_nextpkt;
3671         }
3672         if (pktcnt > ip6_maxchainsent)
3673                 ip6_maxchainsent = pktcnt;
3674         error = dlil_output(ifp, PF_INET6, m0, (caddr_t)rt, SA(dst), 0, adv);
3675         goto release;
3676
3677 bad:
3678         if (m0 != NULL)
3679                 m_freem_list(m0);
3680
3681 release:
3682         /* Clean up "rt" unless it's already been done */
3683         if (rt != NULL) {
3684                 RT_LOCK_SPIN(rt);
3685                 if (rt == hint0) {
3686                         RT_REMREF_LOCKED(rt);
3687                         RT_UNLOCK(rt);
3688                 } else {
3689                         RT_UNLOCK(rt);
3690                         rtfree(rt);
3691                 }
3692         }
3693         /* And now clean up "rtrele" if there is any */
3694         if (rtrele != NULL) {
3695                 RT_LOCK_SPIN(rtrele);
3696                 if (rtrele == hint0) {
3697                         RT_REMREF_LOCKED(rtrele);
3698                         RT_UNLOCK(rtrele);
3699                 } else {
3700                         RT_UNLOCK(rtrele);
3701                         rtfree(rtrele);
3702                 }
3703         }
3704         return (error);
3705 }
3706 #undef senderr
3707
3708 int
3709 nd6_need_cache(struct ifnet *ifp)
3710 {
3711         /*
3712          * XXX: we currently do not make neighbor cache on any interface
3713          * other than ARCnet, Ethernet, FDDI and GIF.
3714          *
3715          * RFC2893 says:
3716          * - unidirectional tunnels needs no ND
3717          */
3718         switch (ifp->if_type) {
3719         case IFT_ARCNET:
3720         case IFT_ETHER:
3721         case IFT_FDDI:
3722         case IFT_IEEE1394:
3723         case IFT_L2VLAN:
3724         case IFT_IEEE8023ADLAG:
3725 #if IFT_IEEE80211
3726         case IFT_IEEE80211:
3727 #endif
3728         case IFT_GIF:           /* XXX need more cases? */
3729         case IFT_PPP:
3730 #if IFT_TUNNEL
3731         case IFT_TUNNEL:
3732 #endif
3733         case IFT_BRIDGE:
3734         case IFT_CELLULAR:
3735                 return (1);
3736         default:
3737                 return (0);
3738         }
3739 }
3740
3741 int
3742 nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m,
3743     struct sockaddr *dst, u_char *desten)
3744 {
3745         int i;
3746         struct sockaddr_dl *sdl;
3747
3748         if (m->m_flags & M_MCAST) {
3749                 switch (ifp->if_type) {
3750                 case IFT_ETHER:
3751                 case IFT_FDDI:
3752                 case IFT_L2VLAN:
3753                 case IFT_IEEE8023ADLAG:
3754 #if IFT_IEEE80211
3755                 case IFT_IEEE80211:
3756 #endif
3757                 case IFT_BRIDGE:
3758                         ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, desten);
3759                         return (1);
3760                 case IFT_IEEE1394:
3761                         for (i = 0; i < ifp->if_addrlen; i++)
3762                                 desten[i] = ~0;
3763                         return (1);
3764                 case IFT_ARCNET:
3765                         *desten = 0;
3766                         return (1);
3767                 default:
3768                         return (0); /* caller will free mbuf */
3769                 }
3770         }
3771
3772         if (rt == NULL) {
3773                 /* this could happen, if we could not allocate memory */
3774                 return (0); /* caller will free mbuf */
3775         }
3776         RT_LOCK(rt);
3777         if (rt->rt_gateway->sa_family != AF_LINK) {
3778                 printf("nd6_storelladdr: something odd happens\n");
3779                 RT_UNLOCK(rt);
3780                 return (0); /* caller will free mbuf */
3781         }
3782         sdl = SDL(rt->rt_gateway);
3783         if (sdl->sdl_alen == 0) {
3784                 /* this should be impossible, but we bark here for debugging */
3785                 printf("nd6_storelladdr: sdl_alen == 0\n");
3786                 RT_UNLOCK(rt);
3787                 return (0); /* caller will free mbuf */
3788         }
3789
3790         bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
3791         RT_UNLOCK(rt);
3792         return (1);
3793 }
3794
3795 /*
3796  * This is the ND pre-output routine; care must be taken to ensure that
3797  * the "hint" route never gets freed via rtfree(), since the caller may
3798  * have stored it inside a struct route with a reference held for that
3799  * placeholder.
3800  */
3801 errno_t
3802 nd6_lookup_ipv6(ifnet_t  ifp, const struct sockaddr_in6 *ip6_dest,
3803     struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
3804     mbuf_t packet)
3805 {
3806         route_t route = hint;
3807         errno_t result = 0;
3808         struct sockaddr_dl *sdl = NULL;
3809         size_t  copy_len;
3810
3811         if (ip6_dest->sin6_family != AF_INET6)
3812                 return (EAFNOSUPPORT);
3813
3814         if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
3815                 return (ENETDOWN);
3816
3817         if (hint != NULL) {
3818                 /*
3819                  * Callee holds a reference on the route and returns
3820                  * with the route entry locked, upon success.
3821                  */
3822                 result = route_to_gwroute((const struct sockaddr *)ip6_dest,
3823                     hint, &route);
3824                 if (result != 0)
3825                         return (result);
3826                 if (route != NULL)
3827                         RT_LOCK_ASSERT_HELD(route);
3828         }
3829
3830         if ((packet->m_flags & M_MCAST) != 0) {
3831                 if (route != NULL)
3832                         RT_UNLOCK(route);
3833                 result = dlil_resolve_multi(ifp,
3834                     (const struct sockaddr *)ip6_dest,
3835                     SA(ll_dest), ll_dest_len);
3836                 if (route != NULL)
3837                         RT_LOCK(route);
3838                 goto release;
3839         }
3840
3841         if (route == NULL) {
3842                 /*
3843                  * This could happen, if we could not allocate memory or
3844                  * if route_to_gwroute() didn't return a route.
3845                  */
3846                 result = ENOBUFS;
3847                 goto release;
3848         }
3849
3850         if (route->rt_gateway->sa_family != AF_LINK) {
3851                 printf("%s: route %s on %s%d gateway address not AF_LINK\n",
3852                     __func__, ip6_sprintf(&ip6_dest->sin6_addr),
3853                     route->rt_ifp->if_name, route->rt_ifp->if_unit);
3854                 result = EADDRNOTAVAIL;
3855                 goto release;
3856         }
3857
3858         sdl = SDL(route->rt_gateway);
3859         if (sdl->sdl_alen == 0) {
3860                 /* this should be impossible, but we bark here for debugging */
3861                 printf("%s: route %s on %s%d sdl_alen == 0\n", __func__,
3862                     ip6_sprintf(&ip6_dest->sin6_addr), route->rt_ifp->if_name,
3863                     route->rt_ifp->if_unit);
3864                 result = EHOSTUNREACH;
3865                 goto release;
3866         }
3867
3868         copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len;
3869         bcopy(sdl, ll_dest, copy_len);
3870
3871 release:
3872         if (route != NULL) {
3873                 if (route == hint) {
3874                         RT_REMREF_LOCKED(route);
3875                         RT_UNLOCK(route);
3876                 } else {
3877                         RT_UNLOCK(route);
3878                         rtfree(route);
3879                 }
3880         }
3881         return (result);
3882 }
3883
3884 int
3885 nd6_setifinfo(struct ifnet *ifp, u_int32_t before, u_int32_t after)
3886 {
3887         uint32_t b, a;
3888         int err = 0;
3889
3890         /*
3891          * Handle ND6_IFF_IFDISABLED
3892          */
3893         if ((before & ND6_IFF_IFDISABLED) ||
3894             (after & ND6_IFF_IFDISABLED)) {
3895                 b = (before & ND6_IFF_IFDISABLED);
3896                 a = (after & ND6_IFF_IFDISABLED);
3897
3898                 if (b != a && (err = nd6_if_disable(ifp,
3899                      ((int32_t)(a - b) > 0))) != 0)
3900                         goto done;
3901         }
3902
3903         /*
3904          * Handle ND6_IFF_PROXY_PREFIXES
3905          */
3906         if ((before & ND6_IFF_PROXY_PREFIXES) ||
3907             (after & ND6_IFF_PROXY_PREFIXES)) {
3908                 b = (before & ND6_IFF_PROXY_PREFIXES);
3909                 a = (after & ND6_IFF_PROXY_PREFIXES);
3910
3911                 if (b != a && (err = nd6_if_prproxy(ifp,
3912                      ((int32_t)(a - b) > 0))) != 0)
3913                         goto done;
3914         }
3915 done:
3916         return (err);
3917 }
3918
3919 /*
3920  * Enable/disable IPv6 on an interface, called as part of
3921  * setting/clearing ND6_IFF_IFDISABLED, or during DAD failure.
3922  */
3923 int
3924 nd6_if_disable(struct ifnet *ifp, boolean_t enable)
3925 {
3926         ifnet_lock_shared(ifp);
3927         if (enable)
3928                 ifp->if_eflags |= IFEF_IPV6_DISABLED;
3929         else
3930                 ifp->if_eflags &= ~IFEF_IPV6_DISABLED;
3931         ifnet_lock_done(ifp);
3932
3933         return (0);
3934 }
3935
3936 static int
3937 nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
3938 {
3939 #pragma unused(oidp, arg1, arg2)
3940         char pbuf[MAX_IPv6_STR_LEN];
3941         struct nd_defrouter *dr;
3942         int error = 0;
3943
3944         if (req->newptr != USER_ADDR_NULL)
3945                 return (EPERM);
3946
3947         lck_mtx_lock(nd6_mutex);
3948         if (proc_is64bit(req->p)) {
3949                 struct in6_defrouter_64 d;
3950
3951                 bzero(&d, sizeof (d));
3952                 d.rtaddr.sin6_family = AF_INET6;
3953                 d.rtaddr.sin6_len = sizeof (d.rtaddr);
3954
3955                 TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
3956                         d.rtaddr.sin6_addr = dr->rtaddr;
3957                         if (in6_recoverscope(&d.rtaddr,
3958                             &dr->rtaddr, dr->ifp) != 0)
3959                                 log(LOG_ERR, "scope error in default router "
3960                                     "list (%s)\n", inet_ntop(AF_INET6,
3961                                     &dr->rtaddr, pbuf, sizeof (pbuf)));
3962                         d.flags = dr->flags;
3963                         d.stateflags = dr->stateflags;
3964                         d.stateflags &= ~NDDRF_PROCESSED;
3965                         d.rtlifetime = dr->rtlifetime;
3966                         d.expire = nddr_getexpire(dr);
3967                         d.if_index = dr->ifp->if_index;
3968                         error = SYSCTL_OUT(req, &d, sizeof (d));
3969                         if (error != 0)
3970                                 break;
3971                 }
3972         } else {
3973                 struct in6_defrouter_32 d;
3974
3975                 bzero(&d, sizeof (d));
3976                 d.rtaddr.sin6_family = AF_INET6;
3977                 d.rtaddr.sin6_len = sizeof (d.rtaddr);
3978
3979                 TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
3980                         d.rtaddr.sin6_addr = dr->rtaddr;
3981                         if (in6_recoverscope(&d.rtaddr,
3982                             &dr->rtaddr, dr->ifp) != 0)
3983                                 log(LOG_ERR, "scope error in default router "
3984                                     "list (%s)\n", inet_ntop(AF_INET6,
3985                                     &dr->rtaddr, pbuf, sizeof (pbuf)));
3986                         d.flags = dr->flags;
3987                         d.stateflags = dr->stateflags;
3988                         d.stateflags &= ~NDDRF_PROCESSED;
3989                         d.rtlifetime = dr->rtlifetime;
3990                         d.expire = nddr_getexpire(dr);
3991                         d.if_index = dr->ifp->if_index;
3992                         error = SYSCTL_OUT(req, &d, sizeof (d));
3993                         if (error != 0)
3994                                 break;
3995                 }
3996         }
3997         lck_mtx_unlock(nd6_mutex);
3998         return (error);
3999 }
4000
4001 static int
4002 nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
4003 {
4004 #pragma unused(oidp, arg1, arg2)
4005         char pbuf[MAX_IPv6_STR_LEN];
4006         struct nd_pfxrouter *pfr;
4007         struct sockaddr_in6 s6;
4008         struct nd_prefix *pr;
4009         int error = 0;
4010
4011         if (req->newptr != USER_ADDR_NULL)
4012                 return (EPERM);
4013
4014         bzero(&s6, sizeof (s6));
4015         s6.sin6_family = AF_INET6;
4016         s6.sin6_len = sizeof (s6);
4017
4018         lck_mtx_lock(nd6_mutex);
4019         if (proc_is64bit(req->p)) {
4020                 struct in6_prefix_64 p;
4021
4022                 bzero(&p, sizeof (p));
4023                 p.origin = PR_ORIG_RA;
4024
4025                 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
4026                         NDPR_LOCK(pr);
4027                         p.prefix = pr->ndpr_prefix;
4028                         if (in6_recoverscope(&p.prefix,
4029                             &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0)
4030                                 log(LOG_ERR, "scope error in "
4031                                     "prefix list (%s)\n", inet_ntop(AF_INET6,
4032                                     &p.prefix.sin6_addr, pbuf, sizeof (pbuf)));
4033                         p.raflags = pr->ndpr_raf;
4034                         p.prefixlen = pr->ndpr_plen;
4035                         p.vltime = pr->ndpr_vltime;
4036                         p.pltime = pr->ndpr_pltime;
4037                         p.if_index = pr->ndpr_ifp->if_index;
4038                         p.expire = ndpr_getexpire(pr);
4039                         p.refcnt = pr->ndpr_addrcnt;
4040                         p.flags = pr->ndpr_stateflags;
4041                         p.advrtrs = 0;
4042                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
4043                                 p.advrtrs++;
4044                         error = SYSCTL_OUT(req, &p, sizeof (p));
4045                         if (error != 0) {
4046                                 NDPR_UNLOCK(pr);
4047                                 break;
4048                         }
4049                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
4050                                 s6.sin6_addr = pfr->router->rtaddr;
4051                                 if (in6_recoverscope(&s6, &pfr->router->rtaddr,
4052                                     pfr->router->ifp) != 0)
4053                                         log(LOG_ERR,
4054                                             "scope error in prefix list (%s)\n",
4055                                             inet_ntop(AF_INET6, &s6.sin6_addr,
4056                                             pbuf, sizeof (pbuf)));
4057                                 error = SYSCTL_OUT(req, &s6, sizeof (s6));
4058                                 if (error != 0)
4059                                         break;
4060                         }
4061                         NDPR_UNLOCK(pr);
4062                         if (error != 0)
4063                                 break;
4064                 }
4065         } else {
4066                 struct in6_prefix_32 p;
4067
4068                 bzero(&p, sizeof (p));
4069                 p.origin = PR_ORIG_RA;
4070
4071                 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
4072                         NDPR_LOCK(pr);
4073                         p.prefix = pr->ndpr_prefix;
4074                         if (in6_recoverscope(&p.prefix,
4075                             &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0)
4076                                 log(LOG_ERR,
4077                                     "scope error in prefix list (%s)\n",
4078                                     inet_ntop(AF_INET6, &p.prefix.sin6_addr,
4079                                     pbuf, sizeof (pbuf)));
4080                         p.raflags = pr->ndpr_raf;
4081                         p.prefixlen = pr->ndpr_plen;
4082                         p.vltime = pr->ndpr_vltime;
4083                         p.pltime = pr->ndpr_pltime;
4084                         p.if_index = pr->ndpr_ifp->if_index;
4085                         p.expire = ndpr_getexpire(pr);
4086                         p.refcnt = pr->ndpr_addrcnt;
4087                         p.flags = pr->ndpr_stateflags;
4088                         p.advrtrs = 0;
4089                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
4090                                 p.advrtrs++;
4091                         error = SYSCTL_OUT(req, &p, sizeof (p));
4092                         if (error != 0) {
4093                                 NDPR_UNLOCK(pr);
4094                                 break;
4095                         }
4096                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
4097                                 s6.sin6_addr = pfr->router->rtaddr;
4098                                 if (in6_recoverscope(&s6, &pfr->router->rtaddr,
4099                                     pfr->router->ifp) != 0)
4100                                         log(LOG_ERR,
4101                                             "scope error in prefix list (%s)\n",
4102                                             inet_ntop(AF_INET6, &s6.sin6_addr,
4103                                             pbuf, sizeof (pbuf)));
4104                                 error = SYSCTL_OUT(req, &s6, sizeof (s6));
4105                                 if (error != 0)
4106                                         break;
4107                         }
4108                         NDPR_UNLOCK(pr);
4109                         if (error != 0)
4110                                 break;
4111                 }
4112         }
4113         lck_mtx_unlock(nd6_mutex);
4114
4115         return (error);
4116 }