bsd/netinet6/nd6.c

   1 /*
   2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  31  * All rights reserved.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  * 3. Neither the name of the project nor the names of its contributors
  42  *    may be used to endorse or promote products derived from this software
  43  *    without specific prior written permission.
  44  *
  45  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  55  * SUCH DAMAGE.
  56  */
  57
  58 /*
  59  * XXX
  60  * KAME 970409 note:
  61  * BSD/OS version heavily modifies this code, related to llinfo.
  62  * Since we don't have BSD/OS version of net/route.c in our hand,
  63  * I left the code mostly as it was in 970310.  -- itojun
  64  */
  65
  66 #include <sys/param.h>
  67 #include <sys/systm.h>
  68 #include <sys/malloc.h>
  69 #include <sys/mbuf.h>
  70 #include <sys/socket.h>
  71 #include <sys/sockio.h>
  72 #include <sys/time.h>
  73 #include <sys/kernel.h>
  74 #include <sys/sysctl.h>
  75 #include <sys/errno.h>
  76 #include <sys/syslog.h>
  77 #include <sys/protosw.h>
  78 #include <sys/proc.h>
  79 #include <sys/mcache.h>
  80
  81 #include <dev/random/randomdev.h>
  82
  83 #include <kern/queue.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <net/if.h>
  87 #include <net/if_dl.h>
  88 #include <net/if_types.h>
  89 #include <net/if_llreach.h>
  90 #include <net/route.h>
  91 #include <net/dlil.h>
  92 #include <net/ntstat.h>
  93 #include <net/net_osdep.h>
  94
  95 #include <netinet/in.h>
  96 #include <netinet/in_arp.h>
  97 #include <netinet/if_ether.h>
  98 #include <netinet6/in6_var.h>
  99 #include <netinet/ip6.h>
 100 #include <netinet6/ip6_var.h>
 101 #include <netinet6/nd6.h>
 102 #include <netinet6/scope6_var.h>
 103 #include <netinet/icmp6.h>
 104
 105 #include "loop.h"
 106
 107 #define ND6_SLOWTIMER_INTERVAL          (60 * 60)       /* 1 hour */
 108 #define ND6_RECALC_REACHTM_INTERVAL     (60 * 120)      /* 2 hours */
 109
 110 #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
 111
 112 /* timer values */
 113 int     nd6_prune       = 1;    /* walk list every 1 seconds */
 114 int     nd6_prune_lazy  = 5;    /* lazily walk list every 5 seconds */
 115 int     nd6_delay       = 5;    /* delay first probe time 5 second */
 116 int     nd6_umaxtries   = 3;    /* maximum unicast query */
 117 int     nd6_mmaxtries   = 3;    /* maximum multicast query */
 118 int     nd6_useloopback = 1;    /* use loopback interface for local traffic */
 119 int     nd6_gctimer     = (60 * 60 * 24); /* 1 day: garbage collection timer */
 120
 121 /* preventing too many loops in ND option parsing */
 122 int nd6_maxndopt = 10;  /* max # of ND options allowed */
 123
 124 int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */
 125
 126 #if ND6_DEBUG
 127 int nd6_debug = 1;
 128 #else
 129 int nd6_debug = 0;
 130 #endif
 131
 132 int nd6_optimistic_dad =
 133         (ND6_OPTIMISTIC_DAD_LINKLOCAL|ND6_OPTIMISTIC_DAD_AUTOCONF|
 134         ND6_OPTIMISTIC_DAD_TEMPORARY|ND6_OPTIMISTIC_DAD_DYNAMIC|
 135         ND6_OPTIMISTIC_DAD_SECURED);
 136
 137 /* for debugging? */
 138 static int nd6_inuse, nd6_allocated;
 139
 140 /*
 141  * Synchronization notes:
 142  *
 143  * The global list of ND entries are stored in llinfo_nd6; an entry
 144  * gets inserted into the list when the route is created and gets
 145  * removed from the list when it is deleted; this is done as part
 146  * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in nd6_rtrequest().
 147  *
 148  * Because rnh_lock and rt_lock for the entry are held during those
 149  * operations, the same locks (and thus lock ordering) must be used
 150  * elsewhere to access the relevant data structure fields:
 151  *
 152  * ln_next, ln_prev, ln_rt
 153  *
 154  *      - Routing lock (rnh_lock)
 155  *
 156  * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_flags,
 157  * ln_llreach, ln_lastused
 158  *
 159  *      - Routing entry lock (rt_lock)
 160  *
 161  * Due to the dependency on rt_lock, llinfo_nd6 has the same lifetime
 162  * as the route entry itself.  When a route is deleted (RTM_DELETE),
 163  * it is simply removed from the global list but the memory is not
 164  * freed until the route itself is freed.
 165  */
 166 struct llinfo_nd6 llinfo_nd6 = {
 167         .ln_next = &llinfo_nd6,
 168         .ln_prev = &llinfo_nd6,
 169 };
 170
 171 /* Protected by nd_if_rwlock */
 172 size_t nd_ifinfo_indexlim = 32; /* increased for 5589193 */
 173 struct nd_ifinfo *nd_ifinfo = NULL;
 174
 175 static lck_grp_attr_t   *nd_if_lock_grp_attr;
 176 static lck_grp_t        *nd_if_lock_grp;
 177 static lck_attr_t       *nd_if_lock_attr;
 178 decl_lck_rw_data(, nd_if_rwlock_data);
 179 lck_rw_t                *nd_if_rwlock = &nd_if_rwlock_data;
 180
 181 /* Protected by nd6_mutex */
 182 struct nd_drhead nd_defrouter;
 183 struct nd_prhead nd_prefix = { 0 };
 184
 185 /*
 186  * nd6_timeout() is scheduled on a demand basis.  nd6_timeout_run is used
 187  * to indicate whether or not a timeout has been scheduled.  The rnh_lock
 188  * mutex is used to protect this scheduling; it is a natural choice given
 189  * the work done in the timer callback.  Unfortunately, there are cases
 190  * when nd6_timeout() needs to be scheduled while rnh_lock cannot be easily
 191  * held, due to lock ordering.  In those cases, we utilize a "demand" counter
 192  * nd6_sched_timeout_want which can be atomically incremented without
 193  * having to hold rnh_lock.  On places where we acquire rnh_lock, such as
 194  * nd6_rtrequest(), we check this counter and schedule the timer if it is
 195  * non-zero.  The increment happens on various places when we allocate
 196  * new ND entries, default routers, prefixes and addresses.
 197  */
 198 static int nd6_timeout_run;             /* nd6_timeout is scheduled to run */
 199 static void nd6_timeout(void *);
 200 int nd6_sched_timeout_want;             /* demand count for timer to be sched */
 201 static boolean_t nd6_fast_timer_on = FALSE;
 202
 203 /* Serialization variables for nd6_service(), protected by rnh_lock */
 204 static boolean_t nd6_service_busy;
 205 static void *nd6_service_wc = &nd6_service_busy;
 206 static int nd6_service_waiters = 0;
 207
 208 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
 209 static struct sockaddr_in6 all1_sa;
 210
 211 static int regen_tmpaddr(struct in6_ifaddr *);
 212 extern lck_mtx_t *nd6_mutex;
 213
 214 static struct llinfo_nd6 *nd6_llinfo_alloc(int);
 215 static void nd6_llinfo_free(void *);
 216 static void nd6_llinfo_purge(struct rtentry *);
 217 static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
 218 static void nd6_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
 219 static uint64_t ln_getexpire(struct llinfo_nd6 *);
 220
 221 static void nd6_service(void *);
 222 static void nd6_slowtimo(void *);
 223 static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, struct ifnet *);
 224 static int nd6_siocgdrlst(void *, int);
 225 static int nd6_siocgprlst(void *, int);
 226
 227 static int nd6_sysctl_drlist SYSCTL_HANDLER_ARGS;
 228 static int nd6_sysctl_prlist SYSCTL_HANDLER_ARGS;
 229
 230 /*
 231  * Insertion and removal from llinfo_nd6 must be done with rnh_lock held.
 232  */
 233 #define LN_DEQUEUE(_ln) do {                                            \
 234         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);                 \
 235         RT_LOCK_ASSERT_HELD((_ln)->ln_rt);                              \
 236         (_ln)->ln_next->ln_prev = (_ln)->ln_prev;                       \
 237         (_ln)->ln_prev->ln_next = (_ln)->ln_next;                       \
 238         (_ln)->ln_prev = (_ln)->ln_next = NULL;                         \
 239         (_ln)->ln_flags &= ~ND6_LNF_IN_USE;                             \
 240 } while (0)
 241
 242 #define LN_INSERTHEAD(_ln) do {                                         \
 243         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);                 \
 244         RT_LOCK_ASSERT_HELD((_ln)->ln_rt);                              \
 245         (_ln)->ln_next = llinfo_nd6.ln_next;                            \
 246         llinfo_nd6.ln_next = (_ln);                                     \
 247         (_ln)->ln_prev = &llinfo_nd6;                                   \
 248         (_ln)->ln_next->ln_prev = (_ln);                                \
 249         (_ln)->ln_flags |= ND6_LNF_IN_USE;                              \
 250 } while (0)
 251
 252 static struct zone *llinfo_nd6_zone;
 253 #define LLINFO_ND6_ZONE_MAX     256             /* maximum elements in zone */
 254 #define LLINFO_ND6_ZONE_NAME    "llinfo_nd6"    /* name for zone */
 255
 256 extern int tvtohz(struct timeval *);
 257
 258 static int nd6_init_done;
 259
 260 SYSCTL_DECL(_net_inet6_icmp6);
 261
 262 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 263         CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 264         nd6_sysctl_drlist, "S,in6_defrouter", "");
 265
 266 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 267         CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 268         nd6_sysctl_prlist, "S,in6_defrouter", "");
 269
 270 void
 271 nd6_init(void)
 272 {
 273         int i;
 274
 275         VERIFY(!nd6_init_done);
 276
 277         all1_sa.sin6_family = AF_INET6;
 278         all1_sa.sin6_len = sizeof (struct sockaddr_in6);
 279         for (i = 0; i < sizeof (all1_sa.sin6_addr); i++)
 280                 all1_sa.sin6_addr.s6_addr[i] = 0xff;
 281
 282         /* initialization of the default router list */
 283         TAILQ_INIT(&nd_defrouter);
 284
 285         nd_if_lock_grp_attr = lck_grp_attr_alloc_init();
 286         nd_if_lock_grp = lck_grp_alloc_init("nd_if_lock", nd_if_lock_grp_attr);
 287         nd_if_lock_attr = lck_attr_alloc_init();
 288         lck_rw_init(nd_if_rwlock, nd_if_lock_grp, nd_if_lock_attr);
 289
 290         llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6),
 291             LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0,
 292             LLINFO_ND6_ZONE_NAME);
 293         if (llinfo_nd6_zone == NULL)
 294                 panic("%s: failed allocating llinfo_nd6_zone", __func__);
 295
 296         zone_change(llinfo_nd6_zone, Z_EXPAND, TRUE);
 297         zone_change(llinfo_nd6_zone, Z_CALLERACCT, FALSE);
 298
 299         nd6_nbr_init();
 300         nd6_rtr_init();
 301         nd6_prproxy_init();
 302
 303         nd6_init_done = 1;
 304
 305         /* start timer */
 306         timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
 307 }
 308
 309 static struct llinfo_nd6 *
 310 nd6_llinfo_alloc(int how)
 311 {
 312         struct llinfo_nd6 *ln;
 313
 314         ln = (how == M_WAITOK) ? zalloc(llinfo_nd6_zone) :
 315             zalloc_noblock(llinfo_nd6_zone);
 316         if (ln != NULL)
 317                 bzero(ln, sizeof (*ln));
 318
 319         return (ln);
 320 }
 321
 322 static void
 323 nd6_llinfo_free(void *arg)
 324 {
 325         struct llinfo_nd6 *ln = arg;
 326
 327         if (ln->ln_next != NULL || ln->ln_prev != NULL) {
 328                 panic("%s: trying to free %p when it is in use", __func__, ln);
 329                 /* NOTREACHED */
 330         }
 331
 332         /* Just in case there's anything there, free it */
 333         if (ln->ln_hold != NULL) {
 334                 m_freem(ln->ln_hold);
 335                 ln->ln_hold = NULL;
 336         }
 337
 338         /* Purge any link-layer info caching */
 339         VERIFY(ln->ln_rt->rt_llinfo == ln);
 340         if (ln->ln_rt->rt_llinfo_purge != NULL)
 341                 ln->ln_rt->rt_llinfo_purge(ln->ln_rt);
 342
 343         zfree(llinfo_nd6_zone, ln);
 344 }
 345
 346 static void
 347 nd6_llinfo_purge(struct rtentry *rt)
 348 {
 349         struct llinfo_nd6 *ln = rt->rt_llinfo;
 350
 351         RT_LOCK_ASSERT_HELD(rt);
 352         VERIFY(rt->rt_llinfo_purge == nd6_llinfo_purge && ln != NULL);
 353
 354         if (ln->ln_llreach != NULL) {
 355                 RT_CONVERT_LOCK(rt);
 356                 ifnet_llreach_free(ln->ln_llreach);
 357                 ln->ln_llreach = NULL;
 358         }
 359         ln->ln_lastused = 0;
 360 }
 361
 362 static void
 363 nd6_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
 364 {
 365         struct llinfo_nd6 *ln = rt->rt_llinfo;
 366         struct if_llreach *lr = ln->ln_llreach;
 367
 368         if (lr == NULL) {
 369                 bzero(ri, sizeof (*ri));
 370                 ri->ri_rssi = IFNET_RSSI_UNKNOWN;
 371                 ri->ri_lqm = IFNET_LQM_THRESH_OFF;
 372                 ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
 373         } else {
 374                 IFLR_LOCK(lr);
 375                 /* Export to rt_reach_info structure */
 376                 ifnet_lr2ri(lr, ri);
 377                 /* Export ND6 send expiration (calendar) time */
 378                 ri->ri_snd_expire =
 379                     ifnet_llreach_up2calexp(lr, ln->ln_lastused);
 380                 IFLR_UNLOCK(lr);
 381         }
 382 }
 383
 384 static void
 385 nd6_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
 386 {
 387         struct llinfo_nd6 *ln = rt->rt_llinfo;
 388         struct if_llreach *lr = ln->ln_llreach;
 389
 390         if (lr == NULL) {
 391                 bzero(iflri, sizeof (*iflri));
 392                 iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
 393                 iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
 394                 iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
 395         } else {
 396                 IFLR_LOCK(lr);
 397                 /* Export to ifnet_llreach_info structure */
 398                 ifnet_lr2iflri(lr, iflri);
 399                 /* Export ND6 send expiration (uptime) time */
 400                 iflri->iflri_snd_expire =
 401                     ifnet_llreach_up2upexp(lr, ln->ln_lastused);
 402                 IFLR_UNLOCK(lr);
 403         }
 404 }
 405
 406 void
 407 ln_setexpire(struct llinfo_nd6 *ln, uint64_t expiry)
 408 {
 409         ln->ln_expire = expiry;
 410 }
 411
 412 static uint64_t
 413 ln_getexpire(struct llinfo_nd6 *ln)
 414 {
 415         struct timeval caltime;
 416         uint64_t expiry;
 417
 418         if (ln->ln_expire != 0) {
 419                 struct rtentry *rt = ln->ln_rt;
 420
 421                 VERIFY(rt != NULL);
 422                 /* account for system time change */
 423                 getmicrotime(&caltime);
 424
 425                 rt->base_calendartime +=
 426                     NET_CALCULATE_CLOCKSKEW(caltime,
 427                     rt->base_calendartime, net_uptime(), rt->base_uptime);
 428
 429                 expiry = rt->base_calendartime +
 430                     ln->ln_expire - rt->base_uptime;
 431         } else {
 432                 expiry = 0;
 433         }
 434         return (expiry);
 435 }
 436
 437 void
 438 nd6_ifreset(struct ifnet *ifp)
 439 {
 440         struct nd_ifinfo *ndi;
 441
 442         lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_HELD);
 443         VERIFY(ifp != NULL && ifp->if_index < nd_ifinfo_indexlim);
 444         ndi = &nd_ifinfo[ifp->if_index];
 445
 446         VERIFY(ndi->initialized);
 447         lck_mtx_assert(&ndi->lock, LCK_MTX_ASSERT_OWNED);
 448         ndi->linkmtu = ifp->if_mtu;
 449         ndi->chlim = IPV6_DEFHLIM;
 450         ndi->basereachable = REACHABLE_TIME;
 451         ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
 452         ndi->retrans = RETRANS_TIMER;
 453 }
 454
 455 int
 456 nd6_ifattach(struct ifnet *ifp)
 457 {
 458         size_t newlim;
 459         struct nd_ifinfo *ndi;
 460
 461         /*
 462          * We have some arrays that should be indexed by if_index.
 463          * since if_index will grow dynamically, they should grow too.
 464          */
 465         lck_rw_lock_shared(nd_if_rwlock);
 466         newlim = nd_ifinfo_indexlim;
 467         if (nd_ifinfo == NULL || if_index >= newlim) {
 468                 if (!lck_rw_lock_shared_to_exclusive(nd_if_rwlock))
 469                         lck_rw_lock_exclusive(nd_if_rwlock);
 470                 lck_rw_assert(nd_if_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
 471
 472                 newlim = nd_ifinfo_indexlim;
 473                 if (nd_ifinfo == NULL || if_index >= newlim) {
 474                         size_t n;
 475                         caddr_t q;
 476
 477                         while (if_index >= newlim)
 478                                 newlim <<= 1;
 479
 480                         /* grow nd_ifinfo */
 481                         n = newlim * sizeof (struct nd_ifinfo);
 482                         q = (caddr_t)_MALLOC(n, M_IP6NDP, M_WAITOK);
 483                         if (q == NULL) {
 484                                 lck_rw_done(nd_if_rwlock);
 485                                 return (ENOBUFS);
 486                         }
 487                         bzero(q, n);
 488                         if (nd_ifinfo != NULL) {
 489                                 bcopy((caddr_t)nd_ifinfo, q, n/2);
 490                                 /*
 491                                  * We might want to pattern fill the old
 492                                  * array to catch use-after-free cases.
 493                                  */
 494                                 FREE((caddr_t)nd_ifinfo, M_IP6NDP);
 495                         }
 496                         nd_ifinfo = (struct nd_ifinfo *)(void *)q;
 497                         nd_ifinfo_indexlim = newlim;
 498                 }
 499         }
 500
 501         VERIFY(ifp != NULL);
 502         ndi = &nd_ifinfo[ifp->if_index];
 503         if (!ndi->initialized) {
 504                 lck_mtx_init(&ndi->lock, nd_if_lock_grp, nd_if_lock_attr);
 505                 ndi->initialized = TRUE;
 506         }
 507
 508         lck_mtx_lock(&ndi->lock);
 509
 510         ndi->flags = ND6_IFF_PERFORMNUD;
 511         if (!(ifp->if_flags & IFF_MULTICAST))
 512                 ndi->flags |= ND6_IFF_IFDISABLED;
 513
 514         nd6_ifreset(ifp);
 515         lck_mtx_unlock(&ndi->lock);
 516
 517         lck_rw_done(nd_if_rwlock);
 518
 519         nd6_setmtu(ifp);
 520
 521         return (0);
 522 }
 523
 524 /*
 525  * Reset ND level link MTU. This function is called when the physical MTU
 526  * changes, which means we might have to adjust the ND level MTU.
 527  */
 528 void
 529 nd6_setmtu(struct ifnet *ifp)
 530 {
 531         struct nd_ifinfo *ndi;
 532         u_int32_t oldmaxmtu, maxmtu;
 533
 534         /*
 535          * Make sure IPv6 is enabled for the interface first,
 536          * because this can be called directly from SIOCSIFMTU for IPv4
 537          */
 538         lck_rw_lock_shared(nd_if_rwlock);
 539         if (ifp->if_index >= nd_ifinfo_indexlim ||
 540             !nd_ifinfo[ifp->if_index].initialized) {
 541                 lck_rw_done(nd_if_rwlock);
 542                 return; /* nd_ifinfo out of bound, or not yet initialized */
 543         }
 544
 545         ndi = &nd_ifinfo[ifp->if_index];
 546         VERIFY(ndi->initialized);
 547         lck_mtx_lock(&ndi->lock);
 548         oldmaxmtu = ndi->maxmtu;
 549
 550         /*
 551          * The ND level maxmtu is somewhat redundant to the interface MTU
 552          * and is an implementation artifact of KAME.  Instead of hard-
 553          * limiting the maxmtu based on the interface type here, we simply
 554          * take the if_mtu value since SIOCSIFMTU would have taken care of
 555          * the sanity checks related to the maximum MTU allowed for the
 556          * interface (a value that is known only by the interface layer),
 557          * by sending the request down via ifnet_ioctl().  The use of the
 558          * ND level maxmtu and linkmtu are done via IN6_LINKMTU() which
 559          * does further checking against if_mtu.
 560          */
 561         maxmtu = ndi->maxmtu = ifp->if_mtu;
 562
 563         /*
 564          * Decreasing the interface MTU under IPV6 minimum MTU may cause
 565          * undesirable situation.  We thus notify the operator of the change
 566          * explicitly.  The check for oldmaxmtu is necessary to restrict the
 567          * log to the case of changing the MTU, not initializing it.
 568          */
 569         if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
 570                 log(LOG_NOTICE, "nd6_setmtu: "
 571                     "new link MTU on %s (%u) is too small for IPv6\n",
 572                     if_name(ifp), (uint32_t)ndi->maxmtu);
 573         }
 574         ndi->linkmtu = ifp->if_mtu;
 575         lck_mtx_unlock(&ndi->lock);
 576         lck_rw_done(nd_if_rwlock);
 577
 578         /* also adjust in6_maxmtu if necessary. */
 579         if (maxmtu > in6_maxmtu)
 580                 in6_setmaxmtu();
 581 }
 582
 583 void
 584 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
 585 {
 586         bzero(ndopts, sizeof (*ndopts));
 587         ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
 588         ndopts->nd_opts_last =
 589             (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
 590
 591         if (icmp6len == 0) {
 592                 ndopts->nd_opts_done = 1;
 593                 ndopts->nd_opts_search = NULL;
 594         }
 595 }
 596
 597 /*
 598  * Take one ND option.
 599  */
 600 struct nd_opt_hdr *
 601 nd6_option(union nd_opts *ndopts)
 602 {
 603         struct nd_opt_hdr *nd_opt;
 604         int olen;
 605
 606         if (!ndopts)
 607                 panic("ndopts == NULL in nd6_option\n");
 608         if (!ndopts->nd_opts_last)
 609                 panic("uninitialized ndopts in nd6_option\n");
 610         if (!ndopts->nd_opts_search)
 611                 return (NULL);
 612         if (ndopts->nd_opts_done)
 613                 return (NULL);
 614
 615         nd_opt = ndopts->nd_opts_search;
 616
 617         /* make sure nd_opt_len is inside the buffer */
 618         if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
 619                 bzero(ndopts, sizeof (*ndopts));
 620                 return (NULL);
 621         }
 622
 623         olen = nd_opt->nd_opt_len << 3;
 624         if (olen == 0) {
 625                 /*
 626                  * Message validation requires that all included
 627                  * options have a length that is greater than zero.
 628                  */
 629                 bzero(ndopts, sizeof (*ndopts));
 630                 return (NULL);
 631         }
 632
 633         ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
 634         if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
 635                 /* option overruns the end of buffer, invalid */
 636                 bzero(ndopts, sizeof (*ndopts));
 637                 return (NULL);
 638         } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
 639                 /* reached the end of options chain */
 640                 ndopts->nd_opts_done = 1;
 641                 ndopts->nd_opts_search = NULL;
 642         }
 643         return (nd_opt);
 644 }
 645
 646 /*
 647  * Parse multiple ND options.
 648  * This function is much easier to use, for ND routines that do not need
 649  * multiple options of the same type.
 650  */
 651 int
 652 nd6_options(union nd_opts *ndopts)
 653 {
 654         struct nd_opt_hdr *nd_opt;
 655         int i = 0;
 656
 657         if (ndopts == NULL)
 658                 panic("ndopts == NULL in nd6_options");
 659         if (ndopts->nd_opts_last == NULL)
 660                 panic("uninitialized ndopts in nd6_options");
 661         if (ndopts->nd_opts_search == NULL)
 662                 return (0);
 663
 664         while (1) {
 665                 nd_opt = nd6_option(ndopts);
 666                 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
 667                         /*
 668                          * Message validation requires that all included
 669                          * options have a length that is greater than zero.
 670                          */
 671                         icmp6stat.icp6s_nd_badopt++;
 672                         bzero(ndopts, sizeof (*ndopts));
 673                         return (-1);
 674                 }
 675
 676                 if (nd_opt == NULL)
 677                         goto skip1;
 678
 679                 switch (nd_opt->nd_opt_type) {
 680                 case ND_OPT_SOURCE_LINKADDR:
 681                 case ND_OPT_TARGET_LINKADDR:
 682                 case ND_OPT_MTU:
 683                 case ND_OPT_REDIRECTED_HEADER:
 684                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 685                                 nd6log((LOG_INFO,
 686                                     "duplicated ND6 option found (type=%d)\n",
 687                                     nd_opt->nd_opt_type));
 688                                 /* XXX bark? */
 689                         } else {
 690                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] =
 691                                     nd_opt;
 692                         }
 693                         break;
 694                 case ND_OPT_PREFIX_INFORMATION:
 695                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
 696                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] =
 697                                     nd_opt;
 698                         }
 699                         ndopts->nd_opts_pi_end =
 700                             (struct nd_opt_prefix_info *)nd_opt;
 701                         break;
 702                 case ND_OPT_RDNSS:
 703                         /* ignore */
 704                         break;
 705                 default:
 706                         /*
 707                          * Unknown options must be silently ignored,
 708                          * to accomodate future extension to the protocol.
 709                          */
 710                         nd6log((LOG_DEBUG,
 711                             "nd6_options: unsupported option %d - "
 712                             "option ignored\n", nd_opt->nd_opt_type));
 713                 }
 714
 715 skip1:
 716                 i++;
 717                 if (i > nd6_maxndopt) {
 718                         icmp6stat.icp6s_nd_toomanyopt++;
 719                         nd6log((LOG_INFO, "too many loop in nd opt\n"));
 720                         break;
 721                 }
 722
 723                 if (ndopts->nd_opts_done)
 724                         break;
 725         }
 726
 727         return (0);
 728 }
 729
 730 struct nd6svc_arg {
 731         int draining;
 732         uint32_t killed;
 733         uint32_t aging_lazy;
 734         uint32_t aging;
 735         uint32_t sticky;
 736         uint32_t found;
 737 };
 738
 739 /*
 740  * ND6 service routine to expire default route list and prefix list
 741  */
 742 static void
 743 nd6_service(void *arg)
 744 {
 745         struct nd6svc_arg *ap = arg;
 746         struct llinfo_nd6 *ln;
 747         struct nd_defrouter *dr;
 748         struct nd_prefix *pr;
 749         struct ifnet *ifp = NULL;
 750         struct in6_ifaddr *ia6, *nia6;
 751         uint64_t timenow;
 752
 753         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 754         /*
 755          * Since we may drop rnh_lock and nd6_mutex below, we want
 756          * to run this entire operation single threaded.
 757          */
 758         while (nd6_service_busy) {
 759                 nd6log2((LOG_DEBUG, "%s: %s is blocked by %d waiters\n",
 760                     __func__, ap->draining ? "drainer" : "timer",
 761                     nd6_service_waiters));
 762                 nd6_service_waiters++;
 763                 (void) msleep(nd6_service_wc, rnh_lock, (PZERO-1),
 764                     __func__, NULL);
 765                 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 766         }
 767
 768         /* We are busy now; tell everyone else to go away */
 769         nd6_service_busy = TRUE;
 770
 771         net_update_uptime();
 772         timenow = net_uptime();
 773 again:
 774         /*
 775          * The global list llinfo_nd6 is modified by nd6_request() and is
 776          * therefore protected by rnh_lock.  For obvious reasons, we cannot
 777          * hold rnh_lock across calls that might lead to code paths which
 778          * attempt to acquire rnh_lock, else we deadlock.  Hence for such
 779          * cases we drop rt_lock and rnh_lock, make the calls, and repeat the
 780          * loop.  To ensure that we don't process the same entry more than
 781          * once in a single timeout, we mark the "already-seen" entries with
 782          * ND6_LNF_TIMER_SKIP flag.  At the end of the loop, we do a second
 783          * pass thru the entries and clear the flag so they can be processed
 784          * during the next timeout.
 785          */
 786         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 787
 788         ln = llinfo_nd6.ln_next;
 789         while (ln != NULL && ln != &llinfo_nd6) {
 790                 struct rtentry *rt;
 791                 struct sockaddr_in6 *dst;
 792                 struct llinfo_nd6 *next;
 793                 u_int32_t retrans, flags;
 794
 795                 /* ln_next/prev/rt is protected by rnh_lock */
 796                 next = ln->ln_next;
 797                 rt = ln->ln_rt;
 798                 RT_LOCK(rt);
 799
 800                 /* We've seen this already; skip it */
 801                 if (ln->ln_flags & ND6_LNF_TIMER_SKIP) {
 802                         RT_UNLOCK(rt);
 803                         ln = next;
 804                         continue;
 805                 }
 806                 ap->found++;
 807
 808                 /* rt->rt_ifp should never be NULL */
 809                 if ((ifp = rt->rt_ifp) == NULL) {
 810                         panic("%s: ln(%p) rt(%p) rt_ifp == NULL", __func__,
 811                             ln, rt);
 812                         /* NOTREACHED */
 813                 }
 814
 815                 /* rt_llinfo must always be equal to ln */
 816                 if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) {
 817                         panic("%s: rt_llinfo(%p) is not equal to ln(%p)",
 818                             __func__, rt->rt_llinfo, ln);
 819                         /* NOTREACHED */
 820                 }
 821
 822                 /* rt_key should never be NULL */
 823                 dst = SIN6(rt_key(rt));
 824                 if (dst == NULL) {
 825                         panic("%s: rt(%p) key is NULL ln(%p)", __func__,
 826                             rt, ln);
 827                         /* NOTREACHED */
 828                 }
 829
 830                 /* Set the flag in case we jump to "again" */
 831                 ln->ln_flags |= ND6_LNF_TIMER_SKIP;
 832
 833                 if (ln->ln_expire == 0 || (rt->rt_flags & RTF_STATIC)) {
 834                         ap->sticky++;
 835                 } else if (ap->draining && (rt->rt_refcnt == 0)) {
 836                         /*
 837                          * If we are draining, immediately purge non-static
 838                          * entries without oustanding route refcnt.
 839                          */
 840                         if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
 841                                 ln->ln_state = ND6_LLINFO_STALE;
 842                         else
 843                                 ln->ln_state = ND6_LLINFO_PURGE;
 844                         ln_setexpire(ln, timenow);
 845                 }
 846
 847                 /*
 848                  * If the entry has not expired, skip it.  Take note on the
 849                  * state, as entries that are in the STALE state are simply
 850                  * waiting to be garbage collected, in which case we can
 851                  * relax the callout scheduling (use nd6_prune_lazy).
 852                  */
 853                 if (ln->ln_expire > timenow) {
 854                         switch (ln->ln_state) {
 855                         case ND6_LLINFO_STALE:
 856                                 ap->aging_lazy++;
 857                                 break;
 858                         default:
 859                                 ap->aging++;
 860                                 break;
 861                         }
 862                         RT_UNLOCK(rt);
 863                         ln = next;
 864                         continue;
 865                 }
 866
 867                 lck_rw_lock_shared(nd_if_rwlock);
 868                 if (ifp->if_index >= nd_ifinfo_indexlim) {
 869                         /*
 870                          * In the event the nd_ifinfo[] array is not in synch
 871                          * by now, we don't want to hold on to the llinfo entry
 872                          * forever; just purge it rather than have it consume
 873                          * resources.  That's better than transmitting out of
 874                          * the interface as the rest of the layers may not be
 875                          * ready as well.
 876                          *
 877                          * We can retire this logic once we get rid of the
 878                          * separate array and utilize a per-ifnet structure.
 879                          */
 880                         retrans = RETRANS_TIMER;
 881                         flags = ND6_IFF_PERFORMNUD;
 882                         if (ln->ln_expire != 0) {
 883                                 ln->ln_state = ND6_LLINFO_PURGE;
 884                                 log (LOG_ERR, "%s: purging rt(0x%llx) "
 885                                     "ln(0x%llx) dst %s, if_index %d >= %d\n",
 886                                     __func__, (uint64_t)VM_KERNEL_ADDRPERM(rt),
 887                                     (uint64_t)VM_KERNEL_ADDRPERM(ln),
 888                                     ip6_sprintf(&dst->sin6_addr), ifp->if_index,
 889                                     nd_ifinfo_indexlim);
 890                         }
 891                 } else {
 892                         struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 893                         VERIFY(ndi->initialized);
 894                         retrans = ndi->retrans;
 895                         flags = ndi->flags;
 896                 }
 897                 lck_rw_done(nd_if_rwlock);
 898
 899                 RT_LOCK_ASSERT_HELD(rt);
 900
 901                 switch (ln->ln_state) {
 902                 case ND6_LLINFO_INCOMPLETE:
 903                         if (ln->ln_asked < nd6_mmaxtries) {
 904                                 struct ifnet *exclifp = ln->ln_exclifp;
 905                                 ln->ln_asked++;
 906                                 ln_setexpire(ln, timenow + retrans / 1000);
 907                                 RT_ADDREF_LOCKED(rt);
 908                                 RT_UNLOCK(rt);
 909                                 lck_mtx_unlock(rnh_lock);
 910                                 if (ip6_forwarding) {
 911                                         nd6_prproxy_ns_output(ifp, exclifp,
 912                                             NULL, &dst->sin6_addr, ln);
 913                                 } else {
 914                                         nd6_ns_output(ifp, NULL,
 915                                             &dst->sin6_addr, ln, 0);
 916                                 }
 917                                 RT_REMREF(rt);
 918                                 ap->aging++;
 919                                 lck_mtx_lock(rnh_lock);
 920                         } else {
 921                                 struct mbuf *m = ln->ln_hold;
 922                                 ln->ln_hold = NULL;
 923                                 if (m != NULL) {
 924                                         /*
 925                                          * Fake rcvif to make ICMP error
 926                                          * more helpful in diagnosing
 927                                          * for the receiver.
 928                                          * XXX: should we consider
 929                                          * older rcvif?
 930                                          */
 931                                         m->m_pkthdr.rcvif = ifp;
 932                                         RT_ADDREF_LOCKED(rt);
 933                                         RT_UNLOCK(rt);
 934                                         lck_mtx_unlock(rnh_lock);
 935                                         icmp6_error(m, ICMP6_DST_UNREACH,
 936                                             ICMP6_DST_UNREACH_ADDR, 0);
 937                                 } else {
 938                                         RT_ADDREF_LOCKED(rt);
 939                                         RT_UNLOCK(rt);
 940                                         lck_mtx_unlock(rnh_lock);
 941                                 }
 942                                 nd6_free(rt);
 943                                 ap->killed++;
 944                                 lck_mtx_lock(rnh_lock);
 945                                 rtfree_locked(rt);
 946                         }
 947                         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
 948                         goto again;
 949
 950                 case ND6_LLINFO_REACHABLE:
 951                         if (ln->ln_expire != 0) {
 952                                 ln->ln_state = ND6_LLINFO_STALE;
 953                                 ln_setexpire(ln, timenow + nd6_gctimer);
 954                                 ap->aging_lazy++;
 955                         }
 956                         RT_UNLOCK(rt);
 957                         break;
 958
 959                 case ND6_LLINFO_STALE:
 960                 case ND6_LLINFO_PURGE:
 961                         /* Garbage Collection(RFC 4861 5.3) */
 962                         if (ln->ln_expire != 0) {
 963                                 RT_ADDREF_LOCKED(rt);
 964                                 RT_UNLOCK(rt);
 965                                 lck_mtx_unlock(rnh_lock);
 966                                 nd6_free(rt);
 967                                 ap->killed++;
 968                                 lck_mtx_lock(rnh_lock);
 969                                 rtfree_locked(rt);
 970                                 goto again;
 971                         } else {
 972                                 RT_UNLOCK(rt);
 973                         }
 974                         break;
 975
 976                 case ND6_LLINFO_DELAY:
 977                         if ((flags & ND6_IFF_PERFORMNUD) != 0) {
 978                                 /* We need NUD */
 979                                 ln->ln_asked = 1;
 980                                 ln->ln_state = ND6_LLINFO_PROBE;
 981                                 ln_setexpire(ln, timenow + retrans / 1000);
 982                                 RT_ADDREF_LOCKED(rt);
 983                                 RT_UNLOCK(rt);
 984                                 lck_mtx_unlock(rnh_lock);
 985                                 nd6_ns_output(ifp, &dst->sin6_addr,
 986                                     &dst->sin6_addr, ln, 0);
 987                                 RT_REMREF(rt);
 988                                 ap->aging++;
 989                                 lck_mtx_lock(rnh_lock);
 990                                 goto again;
 991                         }
 992                         ln->ln_state = ND6_LLINFO_STALE; /* XXX */
 993                         ln_setexpire(ln, timenow + nd6_gctimer);
 994                         RT_UNLOCK(rt);
 995                         ap->aging_lazy++;
 996                         break;
 997
 998                 case ND6_LLINFO_PROBE:
 999                         if (ln->ln_asked < nd6_umaxtries) {
1000                                 ln->ln_asked++;
1001                                 ln_setexpire(ln, timenow + retrans / 1000);
1002                                 RT_ADDREF_LOCKED(rt);
1003                                 RT_UNLOCK(rt);
1004                                 lck_mtx_unlock(rnh_lock);
1005                                 nd6_ns_output(ifp, &dst->sin6_addr,
1006                                     &dst->sin6_addr, ln, 0);
1007                                 RT_REMREF(rt);
1008                                 ap->aging++;
1009                                 lck_mtx_lock(rnh_lock);
1010                         } else {
1011                                 RT_ADDREF_LOCKED(rt);
1012                                 RT_UNLOCK(rt);
1013                                 lck_mtx_unlock(rnh_lock);
1014                                 nd6_free(rt);
1015                                 ap->killed++;
1016                                 lck_mtx_lock(rnh_lock);
1017                                 rtfree_locked(rt);
1018                         }
1019                         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1020                         goto again;
1021
1022                 default:
1023                         RT_UNLOCK(rt);
1024                         break;
1025                 }
1026                 ln = next;
1027         }
1028         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1029
1030         /* Now clear the flag from all entries */
1031         ln = llinfo_nd6.ln_next;
1032         while (ln != NULL && ln != &llinfo_nd6) {
1033                 struct rtentry *rt = ln->ln_rt;
1034                 struct llinfo_nd6 *next = ln->ln_next;
1035
1036                 RT_LOCK_SPIN(rt);
1037                 if (ln->ln_flags & ND6_LNF_TIMER_SKIP)
1038                         ln->ln_flags &= ~ND6_LNF_TIMER_SKIP;
1039                 RT_UNLOCK(rt);
1040                 ln = next;
1041         }
1042         lck_mtx_unlock(rnh_lock);
1043
1044         /* expire default router list */
1045         lck_mtx_lock(nd6_mutex);
1046         dr = TAILQ_FIRST(&nd_defrouter);
1047         while (dr) {
1048                 ap->found++;
1049                 if (dr->expire != 0 && dr->expire < timenow) {
1050                         struct nd_defrouter *t;
1051                         t = TAILQ_NEXT(dr, dr_entry);
1052                         defrtrlist_del(dr);
1053                         dr = t;
1054                         ap->killed++;
1055                 } else {
1056                         if (dr->expire == 0 || (dr->stateflags & NDDRF_STATIC))
1057                                 ap->sticky++;
1058                         else
1059                                 ap->aging_lazy++;
1060                         dr = TAILQ_NEXT(dr, dr_entry);
1061                 }
1062         }
1063         lck_mtx_unlock(nd6_mutex);
1064
1065         /*
1066          * expire interface addresses.
1067          * in the past the loop was inside prefix expiry processing.
1068          * However, from a stricter speci-confrmance standpoint, we should
1069          * rather separate address lifetimes and prefix lifetimes.
1070          */
1071 addrloop:
1072         lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
1073         for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) {
1074                 ap->found++;
1075                 nia6 = ia6->ia_next;
1076                 IFA_LOCK(&ia6->ia_ifa);
1077                 /*
1078                  * Extra reference for ourselves; it's no-op if
1079                  * we don't have to regenerate temporary address,
1080                  * otherwise it protects the address from going
1081                  * away since we drop in6_ifaddr_rwlock below.
1082                  */
1083                 IFA_ADDREF_LOCKED(&ia6->ia_ifa);
1084                 /* check address lifetime */
1085                 if (IFA6_IS_INVALID(ia6, timenow)) {
1086                         /*
1087                          * If the expiring address is temporary, try
1088                          * regenerating a new one.  This would be useful when
1089                          * we suspended a laptop PC, then turned it on after a
1090                          * period that could invalidate all temporary
1091                          * addresses.  Although we may have to restart the
1092                          * loop (see below), it must be after purging the
1093                          * address.  Otherwise, we'd see an infinite loop of
1094                          * regeneration.
1095                          */
1096                         if (ip6_use_tempaddr &&
1097                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
1098                                 /*
1099                                  * NOTE: We have to drop the lock here
1100                                  * because regen_tmpaddr() eventually calls
1101                                  * in6_update_ifa(), which must take the lock
1102                                  * and would otherwise cause a hang.  This is
1103                                  * safe because the goto addrloop leads to a
1104                                  * re-evaluation of the in6_ifaddrs list
1105                                  */
1106                                 IFA_UNLOCK(&ia6->ia_ifa);
1107                                 lck_rw_done(&in6_ifaddr_rwlock);
1108                                 (void) regen_tmpaddr(ia6);
1109                         } else {
1110                                 IFA_UNLOCK(&ia6->ia_ifa);
1111                                 lck_rw_done(&in6_ifaddr_rwlock);
1112                         }
1113
1114                         /*
1115                          * Purging the address would have caused
1116                          * in6_ifaddr_rwlock to be dropped and reacquired;
1117                          * therefore search again from the beginning
1118                          * of in6_ifaddrs list.
1119                          */
1120                         in6_purgeaddr(&ia6->ia_ifa);
1121                         ap->killed++;
1122
1123                         /* Release extra reference taken above */
1124                         IFA_REMREF(&ia6->ia_ifa);
1125                         goto addrloop;
1126                 }
1127                 /*
1128                  * The lazy timer runs every nd6_prune_lazy seconds with at
1129                  * most "2 * nd6_prune_lazy - 1" leeway. We consider the worst
1130                  * case here and make sure we schedule the regular timer if an
1131                  * interface address is about to expire.
1132                  */
1133                 if (IFA6_IS_INVALID(ia6, timenow + 3 * nd6_prune_lazy))
1134                         ap->aging++;
1135                 else
1136                         ap->aging_lazy++;
1137                 IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa);
1138                 if (IFA6_IS_DEPRECATED(ia6, timenow)) {
1139                         int oldflags = ia6->ia6_flags;
1140
1141                         ia6->ia6_flags |= IN6_IFF_DEPRECATED;
1142
1143                         /*
1144                          * If a temporary address has just become deprecated,
1145                          * regenerate a new one if possible.
1146                          */
1147                         if (ip6_use_tempaddr &&
1148                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1149                             (oldflags & IN6_IFF_DEPRECATED) == 0) {
1150
1151                                 /* see NOTE above */
1152                                 IFA_UNLOCK(&ia6->ia_ifa);
1153                                 lck_rw_done(&in6_ifaddr_rwlock);
1154                                 if (regen_tmpaddr(ia6) == 0) {
1155                                         /*
1156                                          * A new temporary address is
1157                                          * generated.
1158                                          * XXX: this means the address chain
1159                                          * has changed while we are still in
1160                                          * the loop.  Although the change
1161                                          * would not cause disaster (because
1162                                          * it's not a deletion, but an
1163                                          * addition,) we'd rather restart the
1164                                          * loop just for safety.  Or does this
1165                                          * significantly reduce performance??
1166                                          */
1167                                         /* Release extra reference */
1168                                         IFA_REMREF(&ia6->ia_ifa);
1169                                         goto addrloop;
1170                                 }
1171                                 lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
1172                         } else {
1173                                 IFA_UNLOCK(&ia6->ia_ifa);
1174                         }
1175                 } else {
1176                         /*
1177                          * A new RA might have made a deprecated address
1178                          * preferred.
1179                          */
1180                         ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
1181                         IFA_UNLOCK(&ia6->ia_ifa);
1182                 }
1183                 lck_rw_assert(&in6_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
1184                 /* Release extra reference taken above */
1185                 IFA_REMREF(&ia6->ia_ifa);
1186         }
1187         lck_rw_done(&in6_ifaddr_rwlock);
1188
1189         lck_mtx_lock(nd6_mutex);
1190         /* expire prefix list */
1191         pr = nd_prefix.lh_first;
1192         while (pr != NULL) {
1193                 ap->found++;
1194                 /*
1195                  * check prefix lifetime.
1196                  * since pltime is just for autoconf, pltime processing for
1197                  * prefix is not necessary.
1198                  */
1199                 NDPR_LOCK(pr);
1200                 if (pr->ndpr_stateflags & NDPRF_PROCESSED_SERVICE) {
1201                         NDPR_UNLOCK(pr);
1202                         pr = pr->ndpr_next;
1203                         continue;
1204                 }
1205                 if (pr->ndpr_expire != 0 && pr->ndpr_expire < timenow) {
1206                         /*
1207                          * address expiration and prefix expiration are
1208                          * separate.  NEVER perform in6_purgeaddr here.
1209                          */
1210                         pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE;
1211                         NDPR_ADDREF_LOCKED(pr);
1212                         prelist_remove(pr);
1213                         NDPR_UNLOCK(pr);
1214                         pfxlist_onlink_check();
1215                         NDPR_REMREF(pr);
1216                         pr = nd_prefix.lh_first;
1217                         ap->killed++;
1218                 } else {
1219                         if (pr->ndpr_expire == 0 ||
1220                             (pr->ndpr_stateflags & NDPRF_STATIC))
1221                                 ap->sticky++;
1222                         else
1223                                 ap->aging_lazy++;
1224                         pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE;
1225                         NDPR_UNLOCK(pr);
1226                         pr = pr->ndpr_next;
1227                 }
1228         }
1229         LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
1230                 NDPR_LOCK(pr);
1231                 pr->ndpr_stateflags &= ~NDPRF_PROCESSED_SERVICE;
1232                 NDPR_UNLOCK(pr);
1233         }
1234         lck_mtx_unlock(nd6_mutex);
1235
1236         lck_mtx_lock(rnh_lock);
1237         /* We're done; let others enter */
1238         nd6_service_busy = FALSE;
1239         if (nd6_service_waiters > 0) {
1240                 nd6_service_waiters = 0;
1241                 wakeup(nd6_service_wc);
1242         }
1243 }
1244
1245 void
1246 nd6_drain(void *arg)
1247 {
1248 #pragma unused(arg)
1249         struct nd6svc_arg sarg;
1250
1251         nd6log2((LOG_DEBUG, "%s: draining ND6 entries\n", __func__));
1252
1253         lck_mtx_lock(rnh_lock);
1254         bzero(&sarg, sizeof (sarg));
1255         sarg.draining = 1;
1256         nd6_service(&sarg);
1257         nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, "
1258             "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy,
1259             sarg.aging, sarg.sticky, sarg.killed));
1260         lck_mtx_unlock(rnh_lock);
1261 }
1262
1263 /*
1264  * We use the ``arg'' variable to decide whether or not the timer we're
1265  * running is the fast timer. We do this to reset the nd6_fast_timer_on
1266  * variable so that later we don't end up ignoring a ``fast timer''
1267  * request if the 5 second timer is running (see nd6_sched_timeout).
1268  */
1269 static void
1270 nd6_timeout(void *arg)
1271 {
1272         struct nd6svc_arg sarg;
1273
1274         lck_mtx_lock(rnh_lock);
1275         bzero(&sarg, sizeof (sarg));
1276         nd6_service(&sarg);
1277         nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, "
1278             "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy,
1279             sarg.aging, sarg.sticky, sarg.killed));
1280         /* re-arm the timer if there's work to do */
1281         nd6_timeout_run--;
1282         VERIFY(nd6_timeout_run >= 0 && nd6_timeout_run < 2);
1283         if (arg == &nd6_fast_timer_on)
1284                 nd6_fast_timer_on = FALSE;
1285         if (sarg.aging_lazy > 0 || sarg.aging > 0 || nd6_sched_timeout_want) {
1286                 struct timeval atv, ltv, *leeway;
1287                 int lazy = nd6_prune_lazy;
1288
1289                 if (sarg.aging > 0 || lazy < 1) {
1290                         atv.tv_usec = 0;
1291                         atv.tv_sec = nd6_prune;
1292                         leeway = NULL;
1293                 } else {
1294                         VERIFY(lazy >= 1);
1295                         atv.tv_usec = 0;
1296                         atv.tv_sec = MAX(nd6_prune, lazy);
1297                         ltv.tv_usec = 0;
1298                         ltv.tv_sec = MAX(random() % lazy, 1) * 2;
1299                         leeway = &ltv;
1300                 }
1301                 nd6_sched_timeout(&atv, leeway);
1302         } else if (nd6_debug) {
1303                 nd6log2((LOG_DEBUG, "%s: not rescheduling timer\n", __func__));
1304         }
1305         lck_mtx_unlock(rnh_lock);
1306 }
1307
1308 void
1309 nd6_sched_timeout(struct timeval *atv, struct timeval *ltv)
1310 {
1311         struct timeval tv;
1312
1313         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1314         if (atv == NULL) {
1315                 tv.tv_usec = 0;
1316                 tv.tv_sec = MAX(nd6_prune, 1);
1317                 atv = &tv;
1318                 ltv = NULL;     /* ignore leeway */
1319         }
1320         /* see comments on top of this file */
1321         if (nd6_timeout_run == 0) {
1322                 if (ltv == NULL) {
1323                         nd6log2((LOG_DEBUG, "%s: timer scheduled in "
1324                             "T+%llus.%lluu (demand %d)\n", __func__,
1325                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1326                             nd6_sched_timeout_want));
1327                         nd6_fast_timer_on = TRUE;
1328                         timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv));
1329                 } else {
1330                         nd6log2((LOG_DEBUG, "%s: timer scheduled in "
1331                             "T+%llus.%lluu with %llus.%lluu leeway "
1332                             "(demand %d)\n", __func__, (uint64_t)atv->tv_sec,
1333                             (uint64_t)atv->tv_usec, (uint64_t)ltv->tv_sec,
1334                             (uint64_t)ltv->tv_usec, nd6_sched_timeout_want));
1335                         nd6_fast_timer_on = FALSE;
1336                         timeout_with_leeway(nd6_timeout, NULL,
1337                             tvtohz(atv), tvtohz(ltv));
1338                 }
1339                 nd6_timeout_run++;
1340                 nd6_sched_timeout_want = 0;
1341         } else if (nd6_timeout_run == 1 && ltv == NULL &&
1342             nd6_fast_timer_on == FALSE) {
1343                 nd6log2((LOG_DEBUG, "%s: fast timer scheduled in "
1344                     "T+%llus.%lluu (demand %d)\n", __func__,
1345                     (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1346                     nd6_sched_timeout_want));
1347                 nd6_fast_timer_on = TRUE;
1348                 nd6_sched_timeout_want = 0;
1349                 nd6_timeout_run++;
1350                 timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv));
1351         } else {
1352                 if (ltv == NULL) {
1353                         nd6log2((LOG_DEBUG, "%s: not scheduling timer: "
1354                             "timers %d, fast_timer %d, T+%llus.%lluu\n",
1355                             __func__, nd6_timeout_run, nd6_fast_timer_on,
1356                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec));
1357                 } else {
1358                         nd6log2((LOG_DEBUG, "%s: not scheduling timer: "
1359                             "timers %d, fast_timer %d, T+%llus.%lluu "
1360                             "with %llus.%lluu leeway\n", __func__,
1361                             nd6_timeout_run, nd6_fast_timer_on,
1362                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1363                             (uint64_t)ltv->tv_sec, (uint64_t)ltv->tv_usec));
1364                 }
1365         }
1366 }
1367
1368 /*
1369  * ND6 router advertisement kernel notification
1370  */
1371 void
1372 nd6_post_msg(u_int32_t code, struct nd_prefix_list *prefix_list,
1373     u_int32_t list_length, u_int32_t mtu, char *dl_addr, u_int32_t dl_addr_len)
1374 {
1375         struct kev_msg ev_msg;
1376         struct kev_nd6_ra_data nd6_ra_msg_data;
1377         struct nd_prefix_list *itr = prefix_list;
1378
1379         bzero(&ev_msg, sizeof (struct kev_msg));
1380         ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1381         ev_msg.kev_class        = KEV_NETWORK_CLASS;
1382         ev_msg.kev_subclass     = KEV_ND6_SUBCLASS;
1383         ev_msg.event_code       = code;
1384
1385         bzero(&nd6_ra_msg_data, sizeof (nd6_ra_msg_data));
1386         nd6_ra_msg_data.lladdrlen = (dl_addr_len <= ND6_ROUTER_LL_SIZE) ?
1387             dl_addr_len : ND6_ROUTER_LL_SIZE;
1388         bcopy(dl_addr, &nd6_ra_msg_data.lladdr, nd6_ra_msg_data.lladdrlen);
1389
1390         if (mtu > 0 && mtu >= IPV6_MMTU) {
1391                 nd6_ra_msg_data.mtu = mtu;
1392                 nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_MTU;
1393         }
1394
1395         if (list_length > 0 && prefix_list != NULL) {
1396                 nd6_ra_msg_data.list_length = list_length;
1397                 nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_PREFIX;
1398         }
1399
1400         while (itr != NULL && nd6_ra_msg_data.list_index < list_length) {
1401                 bcopy(&itr->pr.ndpr_prefix, &nd6_ra_msg_data.prefix.prefix,
1402                     sizeof (nd6_ra_msg_data.prefix.prefix));
1403                 nd6_ra_msg_data.prefix.raflags = itr->pr.ndpr_raf;
1404                 nd6_ra_msg_data.prefix.prefixlen = itr->pr.ndpr_plen;
1405                 nd6_ra_msg_data.prefix.origin = PR_ORIG_RA;
1406                 nd6_ra_msg_data.prefix.vltime = itr->pr.ndpr_vltime;
1407                 nd6_ra_msg_data.prefix.pltime = itr->pr.ndpr_pltime;
1408                 nd6_ra_msg_data.prefix.expire = ndpr_getexpire(&itr->pr);
1409                 nd6_ra_msg_data.prefix.flags = itr->pr.ndpr_stateflags;
1410                 nd6_ra_msg_data.prefix.refcnt = itr->pr.ndpr_addrcnt;
1411                 nd6_ra_msg_data.prefix.if_index = itr->pr.ndpr_ifp->if_index;
1412
1413                 /* send the message up */
1414                 ev_msg.dv[0].data_ptr           = &nd6_ra_msg_data;
1415                 ev_msg.dv[0].data_length        = sizeof (nd6_ra_msg_data);
1416                 ev_msg.dv[1].data_length        = 0;
1417                 kev_post_msg(&ev_msg);
1418
1419                 /* clean up for the next prefix */
1420                 bzero(&nd6_ra_msg_data.prefix, sizeof (nd6_ra_msg_data.prefix));
1421                 itr = itr->next;
1422                 nd6_ra_msg_data.list_index++;
1423         }
1424 }
1425
1426 /*
1427  * Regenerate deprecated/invalidated temporary address
1428  */
1429 static int
1430 regen_tmpaddr(struct in6_ifaddr *ia6)
1431 {
1432         struct ifaddr *ifa;
1433         struct ifnet *ifp;
1434         struct in6_ifaddr *public_ifa6 = NULL;
1435         uint64_t timenow = net_uptime();
1436
1437         ifp = ia6->ia_ifa.ifa_ifp;
1438         ifnet_lock_shared(ifp);
1439         TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1440                 struct in6_ifaddr *it6;
1441
1442                 IFA_LOCK(ifa);
1443                 if (ifa->ifa_addr->sa_family != AF_INET6) {
1444                         IFA_UNLOCK(ifa);
1445                         continue;
1446                 }
1447                 it6 = (struct in6_ifaddr *)ifa;
1448
1449                 /* ignore no autoconf addresses. */
1450                 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
1451                         IFA_UNLOCK(ifa);
1452                         continue;
1453                 }
1454                 /* ignore autoconf addresses with different prefixes. */
1455                 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) {
1456                         IFA_UNLOCK(ifa);
1457                         continue;
1458                 }
1459                 /*
1460                  * Now we are looking at an autoconf address with the same
1461                  * prefix as ours.  If the address is temporary and is still
1462                  * preferred, do not create another one.  It would be rare, but
1463                  * could happen, for example, when we resume a laptop PC after
1464                  * a long period.
1465                  */
1466                 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1467                     !IFA6_IS_DEPRECATED(it6, timenow)) {
1468                         IFA_UNLOCK(ifa);
1469                         if (public_ifa6 != NULL)
1470                                 IFA_REMREF(&public_ifa6->ia_ifa);
1471                         public_ifa6 = NULL;
1472                         break;
1473                 }
1474
1475                 /*
1476                  * This is a public autoconf address that has the same prefix
1477                  * as ours.  If it is preferred, keep it.  We can't break the
1478                  * loop here, because there may be a still-preferred temporary
1479                  * address with the prefix.
1480                  */
1481                 if (!IFA6_IS_DEPRECATED(it6, timenow)) {
1482                         IFA_ADDREF_LOCKED(ifa); /* for public_ifa6 */
1483                         IFA_UNLOCK(ifa);
1484                         if (public_ifa6 != NULL)
1485                                 IFA_REMREF(&public_ifa6->ia_ifa);
1486                         public_ifa6 = it6;
1487                 } else {
1488                         IFA_UNLOCK(ifa);
1489                 }
1490         }
1491         ifnet_lock_done(ifp);
1492
1493         if (public_ifa6 != NULL) {
1494                 int e;
1495
1496                 if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) {
1497                         log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
1498                             " tmp addr,errno=%d\n", e);
1499                         IFA_REMREF(&public_ifa6->ia_ifa);
1500                         return (-1);
1501                 }
1502                 IFA_REMREF(&public_ifa6->ia_ifa);
1503                 return (0);
1504         }
1505
1506         return (-1);
1507 }
1508
1509 /*
1510  * Nuke neighbor cache/prefix/default router management table, right before
1511  * ifp goes away.
1512  */
1513 void
1514 nd6_purge(struct ifnet *ifp)
1515 {
1516         struct llinfo_nd6 *ln;
1517         struct nd_defrouter *dr, *ndr;
1518         struct nd_prefix *pr, *npr;
1519
1520         /* Nuke default router list entries toward ifp */
1521         lck_mtx_lock(nd6_mutex);
1522         if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
1523                 /*
1524                  * The first entry of the list may be stored in
1525                  * the routing table, so we'll delete it later.
1526                  */
1527                 for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = ndr) {
1528                         ndr = TAILQ_NEXT(dr, dr_entry);
1529                         if (dr->stateflags & NDDRF_INSTALLED)
1530                                 continue;
1531                         if (dr->ifp == ifp)
1532                                 defrtrlist_del(dr);
1533                 }
1534                 dr = TAILQ_FIRST(&nd_defrouter);
1535                 if (dr->ifp == ifp)
1536                         defrtrlist_del(dr);
1537         }
1538
1539         for (dr = TAILQ_FIRST(&nd_defrouter); dr; dr = ndr) {
1540                 ndr = TAILQ_NEXT(dr, dr_entry);
1541                 if (!(dr->stateflags & NDDRF_INSTALLED))
1542                         continue;
1543
1544                 if (dr->ifp == ifp)
1545                         defrtrlist_del(dr);
1546         }
1547
1548         /* Nuke prefix list entries toward ifp */
1549         for (pr = nd_prefix.lh_first; pr; pr = npr) {
1550                 npr = pr->ndpr_next;
1551                 NDPR_LOCK(pr);
1552                 if (pr->ndpr_ifp == ifp) {
1553                         /*
1554                          * Because if_detach() does *not* release prefixes
1555                          * while purging addresses the reference count will
1556                          * still be above zero. We therefore reset it to
1557                          * make sure that the prefix really gets purged.
1558                          */
1559                         pr->ndpr_addrcnt = 0;
1560
1561                         /*
1562                          * Previously, pr->ndpr_addr is removed as well,
1563                          * but I strongly believe we don't have to do it.
1564                          * nd6_purge() is only called from in6_ifdetach(),
1565                          * which removes all the associated interface addresses
1566                          * by itself.
1567                          * (jinmei@kame.net 20010129)
1568                          */
1569                         NDPR_ADDREF_LOCKED(pr);
1570                         prelist_remove(pr);
1571                         NDPR_UNLOCK(pr);
1572                         pfxlist_onlink_check();
1573                         NDPR_REMREF(pr);
1574                 } else {
1575                         NDPR_UNLOCK(pr);
1576                 }
1577         }
1578         lck_mtx_unlock(nd6_mutex);
1579
1580         /* cancel default outgoing interface setting */
1581         if (nd6_defifindex == ifp->if_index) {
1582                 nd6_setdefaultiface(0);
1583         }
1584
1585         /*
1586          * Perform default router selection even when we are a router,
1587          * if Scoped Routing is enabled.
1588          */
1589         if (ip6_doscopedroute || !ip6_forwarding) {
1590                 lck_mtx_lock(nd6_mutex);
1591                 /* refresh default router list */
1592                 defrouter_select(ifp);
1593                 lck_mtx_unlock(nd6_mutex);
1594         }
1595
1596         /*
1597          * Nuke neighbor cache entries for the ifp.
1598          * Note that rt->rt_ifp may not be the same as ifp,
1599          * due to KAME goto ours hack.  See RTM_RESOLVE case in
1600          * nd6_rtrequest(), and ip6_input().
1601          */
1602 again:
1603         lck_mtx_lock(rnh_lock);
1604         ln = llinfo_nd6.ln_next;
1605         while (ln != NULL && ln != &llinfo_nd6) {
1606                 struct rtentry *rt;
1607                 struct llinfo_nd6 *nln;
1608
1609                 nln = ln->ln_next;
1610                 rt = ln->ln_rt;
1611                 RT_LOCK(rt);
1612                 if (rt->rt_gateway != NULL &&
1613                     rt->rt_gateway->sa_family == AF_LINK &&
1614                     SDL(rt->rt_gateway)->sdl_index == ifp->if_index) {
1615                         RT_ADDREF_LOCKED(rt);
1616                         RT_UNLOCK(rt);
1617                         lck_mtx_unlock(rnh_lock);
1618                         /*
1619                          * See comments on nd6_service() for reasons why
1620                          * this loop is repeated; we bite the costs of
1621                          * going thru the same llinfo_nd6 more than once
1622                          * here, since this purge happens during detach,
1623                          * and that unlike the timer case, it's possible
1624                          * there's more than one purges happening at the
1625                          * same time (thus a flag wouldn't buy anything).
1626                          */
1627                         nd6_free(rt);
1628                         RT_REMREF(rt);
1629                         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1630                         goto again;
1631                 } else {
1632                         RT_UNLOCK(rt);
1633                 }
1634                 ln = nln;
1635         }
1636         lck_mtx_unlock(rnh_lock);
1637 }
1638
1639 /*
1640  * Upon success, the returned route will be locked and the caller is
1641  * responsible for releasing the reference and doing RT_UNLOCK(rt).
1642  * This routine does not require rnh_lock to be held by the caller,
1643  * although it needs to be indicated of such a case in order to call
1644  * the correct variant of the relevant routing routines.
1645  */
1646 struct rtentry *
1647 nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp, int rt_locked)
1648 {
1649         struct rtentry *rt;
1650         struct sockaddr_in6 sin6;
1651         unsigned int ifscope;
1652
1653         bzero(&sin6, sizeof (sin6));
1654         sin6.sin6_len = sizeof (struct sockaddr_in6);
1655         sin6.sin6_family = AF_INET6;
1656         sin6.sin6_addr = *addr6;
1657
1658         ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
1659         if (rt_locked) {
1660                 lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
1661                 rt = rtalloc1_scoped_locked(SA(&sin6), create, 0, ifscope);
1662         } else {
1663                 rt = rtalloc1_scoped(SA(&sin6), create, 0, ifscope);
1664         }
1665
1666         if (rt != NULL) {
1667                 RT_LOCK(rt);
1668                 if ((rt->rt_flags & RTF_LLINFO) == 0) {
1669                         /*
1670                          * This is the case for the default route.
1671                          * If we want to create a neighbor cache for the
1672                          * address, we should free the route for the
1673                          * destination and allocate an interface route.
1674                          */
1675                         if (create) {
1676                                 RT_UNLOCK(rt);
1677                                 if (rt_locked)
1678                                         rtfree_locked(rt);
1679                                 else
1680                                         rtfree(rt);
1681                                 rt = NULL;
1682                         }
1683                 }
1684         }
1685         if (rt == NULL) {
1686                 if (create && ifp) {
1687                         struct ifaddr *ifa;
1688                         u_int32_t ifa_flags;
1689                         int e;
1690
1691                         /*
1692                          * If no route is available and create is set,
1693                          * we allocate a host route for the destination
1694                          * and treat it like an interface route.
1695                          * This hack is necessary for a neighbor which can't
1696                          * be covered by our own prefix.
1697                          */
1698                         ifa = ifaof_ifpforaddr(SA(&sin6), ifp);
1699                         if (ifa == NULL)
1700                                 return (NULL);
1701
1702                         /*
1703                          * Create a new route.  RTF_LLINFO is necessary
1704                          * to create a Neighbor Cache entry for the
1705                          * destination in nd6_rtrequest which will be
1706                          * called in rtrequest via ifa->ifa_rtrequest.
1707                          */
1708                         if (!rt_locked)
1709                                 lck_mtx_lock(rnh_lock);
1710                         IFA_LOCK_SPIN(ifa);
1711                         ifa_flags = ifa->ifa_flags;
1712                         IFA_UNLOCK(ifa);
1713                         if ((e = rtrequest_scoped_locked(RTM_ADD,
1714                             SA(&sin6), ifa->ifa_addr, SA(&all1_sa),
1715                             (ifa_flags | RTF_HOST | RTF_LLINFO) &
1716                             ~RTF_CLONING, &rt, ifscope)) != 0) {
1717                                 if (e != EEXIST)
1718                                         log(LOG_ERR, "%s: failed to add route "
1719                                             "for a neighbor(%s), errno=%d\n",
1720                                             __func__, ip6_sprintf(addr6), e);
1721                         }
1722                         if (!rt_locked)
1723                                 lck_mtx_unlock(rnh_lock);
1724                         IFA_REMREF(ifa);
1725                         if (rt == NULL)
1726                                 return (NULL);
1727
1728                         RT_LOCK(rt);
1729                         if (rt->rt_llinfo) {
1730                                 struct llinfo_nd6 *ln = rt->rt_llinfo;
1731                                 ln->ln_state = ND6_LLINFO_NOSTATE;
1732                         }
1733                 } else {
1734                         return (NULL);
1735                 }
1736         }
1737         RT_LOCK_ASSERT_HELD(rt);
1738         /*
1739          * Validation for the entry.
1740          * Note that the check for rt_llinfo is necessary because a cloned
1741          * route from a parent route that has the L flag (e.g. the default
1742          * route to a p2p interface) may have the flag, too, while the
1743          * destination is not actually a neighbor.
1744          * XXX: we can't use rt->rt_ifp to check for the interface, since
1745          *      it might be the loopback interface if the entry is for our
1746          *      own address on a non-loopback interface. Instead, we should
1747          *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
1748          *      interface.
1749          * Note also that ifa_ifp and ifp may differ when we connect two
1750          * interfaces to a same link, install a link prefix to an interface,
1751          * and try to install a neighbor cache on an interface that does not
1752          * have a route to the prefix.
1753          *
1754          * If the address is from a proxied prefix, the ifa_ifp and ifp might
1755          * not match, because nd6_na_input() could have modified the ifp
1756          * of the route to point to the interface where the NA arrived on,
1757          * hence the test for RTF_PROXY.
1758          */
1759         if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
1760             rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
1761             (ifp && rt->rt_ifa->ifa_ifp != ifp &&
1762             !(rt->rt_flags & RTF_PROXY))) {
1763                 RT_REMREF_LOCKED(rt);
1764                 RT_UNLOCK(rt);
1765                 if (create) {
1766                         log(LOG_DEBUG, "%s: failed to lookup %s "
1767                             "(if = %s)\n", __func__, ip6_sprintf(addr6),
1768                             ifp ? if_name(ifp) : "unspec");
1769                         /* xxx more logs... kazu */
1770                 }
1771                 return (NULL);
1772         }
1773         /*
1774          * Caller needs to release reference and call RT_UNLOCK(rt).
1775          */
1776         return (rt);
1777 }
1778
1779 /*
1780  * Test whether a given IPv6 address is a neighbor or not, ignoring
1781  * the actual neighbor cache.  The neighbor cache is ignored in order
1782  * to not reenter the routing code from within itself.
1783  */
1784 static int
1785 nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
1786 {
1787         struct nd_prefix *pr;
1788         struct ifaddr *dstaddr;
1789
1790         lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
1791
1792         /*
1793          * A link-local address is always a neighbor.
1794          * XXX: a link does not necessarily specify a single interface.
1795          */
1796         if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
1797                 struct sockaddr_in6 sin6_copy;
1798                 u_int32_t zone;
1799
1800                 /*
1801                  * We need sin6_copy since sa6_recoverscope() may modify the
1802                  * content (XXX).
1803                  */
1804                 sin6_copy = *addr;
1805                 if (sa6_recoverscope(&sin6_copy, FALSE))
1806                         return (0); /* XXX: should be impossible */
1807                 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
1808                         return (0);
1809                 if (sin6_copy.sin6_scope_id == zone)
1810                         return (1);
1811                 else
1812                         return (0);
1813         }
1814
1815         /*
1816          * If the address matches one of our addresses,
1817          * it should be a neighbor.
1818          * If the address matches one of our on-link prefixes, it should be a
1819          * neighbor.
1820          */
1821         for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
1822                 NDPR_LOCK(pr);
1823                 if (pr->ndpr_ifp != ifp) {
1824                         NDPR_UNLOCK(pr);
1825                         continue;
1826                 }
1827                 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
1828                         NDPR_UNLOCK(pr);
1829                         continue;
1830                 }
1831                 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
1832                     &addr->sin6_addr, &pr->ndpr_mask)) {
1833                         NDPR_UNLOCK(pr);
1834                         return (1);
1835                 }
1836                 NDPR_UNLOCK(pr);
1837         }
1838
1839         /*
1840          * If the address is assigned on the node of the other side of
1841          * a p2p interface, the address should be a neighbor.
1842          */
1843         dstaddr = ifa_ifwithdstaddr(SA(addr));
1844         if (dstaddr != NULL) {
1845                 if (dstaddr->ifa_ifp == ifp) {
1846                         IFA_REMREF(dstaddr);
1847                         return (1);
1848                 }
1849                 IFA_REMREF(dstaddr);
1850                 dstaddr = NULL;
1851         }
1852
1853         /*
1854          * If the default router list is empty, all addresses are regarded
1855          * as on-link, and thus, as a neighbor.
1856          * XXX: we restrict the condition to hosts, because routers usually do
1857          * not have the "default router list".
1858          * XXX: this block should eventually be removed (it is disabled when
1859          * Scoped Routing is in effect); treating all destinations as on-link
1860          * in the absence of a router is rather harmful.
1861          */
1862         if (!ip6_doscopedroute && !ip6_forwarding &&
1863             TAILQ_FIRST(&nd_defrouter) == NULL &&
1864             nd6_defifindex == ifp->if_index) {
1865                 return (1);
1866         }
1867
1868         return (0);
1869 }
1870
1871
1872 /*
1873  * Detect if a given IPv6 address identifies a neighbor on a given link.
1874  * XXX: should take care of the destination of a p2p link?
1875  */
1876 int
1877 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp,
1878     int rt_locked)
1879 {
1880         struct rtentry *rt;
1881
1882         lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
1883         lck_mtx_lock(nd6_mutex);
1884         if (nd6_is_new_addr_neighbor(addr, ifp)) {
1885                 lck_mtx_unlock(nd6_mutex);
1886                 return (1);
1887         }
1888         lck_mtx_unlock(nd6_mutex);
1889
1890         /*
1891          * Even if the address matches none of our addresses, it might be
1892          * in the neighbor cache.
1893          */
1894         if ((rt = nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked)) != NULL) {
1895                 RT_LOCK_ASSERT_HELD(rt);
1896                 RT_REMREF_LOCKED(rt);
1897                 RT_UNLOCK(rt);
1898                 return (1);
1899         }
1900
1901         return (0);
1902 }
1903
1904 /*
1905  * Free an nd6 llinfo entry.
1906  * Since the function would cause significant changes in the kernel, DO NOT
1907  * make it global, unless you have a strong reason for the change, and are sure
1908  * that the change is safe.
1909  */
1910 void
1911 nd6_free(struct rtentry *rt)
1912 {
1913         struct llinfo_nd6 *ln;
1914         struct in6_addr in6;
1915         struct nd_defrouter *dr;
1916
1917         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1918         RT_LOCK_ASSERT_NOTHELD(rt);
1919         lck_mtx_lock(nd6_mutex);
1920
1921         RT_LOCK(rt);
1922         RT_ADDREF_LOCKED(rt);   /* Extra ref */
1923         ln = rt->rt_llinfo;
1924         in6 = SIN6(rt_key(rt))->sin6_addr;
1925
1926         /*
1927          * Prevent another thread from modifying rt_key, rt_gateway
1928          * via rt_setgate() after the rt_lock is dropped by marking
1929          * the route as defunct.
1930          */
1931         rt->rt_flags |= RTF_CONDEMNED;
1932
1933         /*
1934          * We used to have pfctlinput(PRC_HOSTDEAD) here.  Even though it is
1935          * not harmful, it was not really necessary.  Perform default router
1936          * selection even when we are a router, if Scoped Routing is enabled.
1937          */
1938         if (ip6_doscopedroute || !ip6_forwarding) {
1939                 dr = defrouter_lookup(&SIN6(rt_key(rt))->sin6_addr, rt->rt_ifp);
1940
1941                 if ((ln && ln->ln_router) || dr) {
1942                         /*
1943                          * rt6_flush must be called whether or not the neighbor
1944                          * is in the Default Router List.
1945                          * See a corresponding comment in nd6_na_input().
1946                          */
1947                         RT_UNLOCK(rt);
1948                         lck_mtx_unlock(nd6_mutex);
1949                         rt6_flush(&in6, rt->rt_ifp);
1950                         lck_mtx_lock(nd6_mutex);
1951                 } else {
1952                         RT_UNLOCK(rt);
1953                 }
1954
1955                 if (dr) {
1956                         NDDR_REMREF(dr);
1957                         /*
1958                          * Unreachablity of a router might affect the default
1959                          * router selection and on-link detection of advertised
1960                          * prefixes.
1961                          */
1962
1963                         /*
1964                          * Temporarily fake the state to choose a new default
1965                          * router and to perform on-link determination of
1966                          * prefixes correctly.
1967                          * Below the state will be set correctly,
1968                          * or the entry itself will be deleted.
1969                          */
1970                         RT_LOCK_SPIN(rt);
1971                         ln->ln_state = ND6_LLINFO_INCOMPLETE;
1972
1973                         /*
1974                          * Since defrouter_select() does not affect the
1975                          * on-link determination and MIP6 needs the check
1976                          * before the default router selection, we perform
1977                          * the check now.
1978                          */
1979                         RT_UNLOCK(rt);
1980                         pfxlist_onlink_check();
1981
1982                         /*
1983                          * refresh default router list
1984                          */
1985                         defrouter_select(rt->rt_ifp);
1986                 }
1987                 RT_LOCK_ASSERT_NOTHELD(rt);
1988         } else {
1989                 RT_UNLOCK(rt);
1990         }
1991
1992         lck_mtx_unlock(nd6_mutex);
1993         /*
1994          * Detach the route from the routing tree and the list of neighbor
1995          * caches, and disable the route entry not to be used in already
1996          * cached routes.
1997          */
1998         (void) rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL);
1999
2000         /* Extra ref held above; now free it */
2001         rtfree(rt);
2002 }
2003
2004 void
2005 nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
2006 {
2007 #pragma unused(sa)
2008         struct sockaddr *gate = rt->rt_gateway;
2009         struct llinfo_nd6 *ln = rt->rt_llinfo;
2010         static struct sockaddr_dl null_sdl =
2011             { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK };
2012         struct ifnet *ifp = rt->rt_ifp;
2013         struct ifaddr *ifa;
2014         uint64_t timenow;
2015         char buf[MAX_IPv6_STR_LEN];
2016
2017         VERIFY(nd6_init_done);
2018         lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED);
2019         RT_LOCK_ASSERT_HELD(rt);
2020
2021         /*
2022          * We have rnh_lock held, see if we need to schedule the timer;
2023          * we might do this again below during RTM_RESOLVE, but doing it
2024          * now handles all other cases.
2025          */
2026         if (nd6_sched_timeout_want)
2027                 nd6_sched_timeout(NULL, NULL);
2028
2029         if (rt->rt_flags & RTF_GATEWAY)
2030                 return;
2031
2032         if (!nd6_need_cache(ifp) && !(rt->rt_flags & RTF_HOST)) {
2033                 /*
2034                  * This is probably an interface direct route for a link
2035                  * which does not need neighbor caches (e.g. fe80::%lo0/64).
2036                  * We do not need special treatment below for such a route.
2037                  * Moreover, the RTF_LLINFO flag which would be set below
2038                  * would annoy the ndp(8) command.
2039                  */
2040                 return;
2041         }
2042
2043         if (req == RTM_RESOLVE) {
2044                 int no_nd_cache;
2045
2046                 if (!nd6_need_cache(ifp)) {     /* stf case */
2047                         no_nd_cache = 1;
2048                 } else {
2049                         struct sockaddr_in6 sin6;
2050
2051                         rtkey_to_sa6(rt, &sin6);
2052                         /*
2053                          * nd6_is_addr_neighbor() may call nd6_lookup(),
2054                          * therefore we drop rt_lock to avoid deadlock
2055                          * during the lookup.
2056                          */
2057                         RT_ADDREF_LOCKED(rt);
2058                         RT_UNLOCK(rt);
2059                         no_nd_cache = !nd6_is_addr_neighbor(&sin6, ifp, 1);
2060                         RT_LOCK(rt);
2061                         RT_REMREF_LOCKED(rt);
2062                 }
2063
2064                 /*
2065                  * FreeBSD and BSD/OS often make a cloned host route based
2066                  * on a less-specific route (e.g. the default route).
2067                  * If the less specific route does not have a "gateway"
2068                  * (this is the case when the route just goes to a p2p or an
2069                  * stf interface), we'll mistakenly make a neighbor cache for
2070                  * the host route, and will see strange neighbor solicitation
2071                  * for the corresponding destination.  In order to avoid the
2072                  * confusion, we check if the destination of the route is
2073                  * a neighbor in terms of neighbor discovery, and stop the
2074                  * process if not.  Additionally, we remove the LLINFO flag
2075                  * so that ndp(8) will not try to get the neighbor information
2076                  * of the destination.
2077                  */
2078                 if (no_nd_cache) {
2079                         rt->rt_flags &= ~RTF_LLINFO;
2080                         return;
2081                 }
2082         }
2083
2084         timenow = net_uptime();
2085
2086         switch (req) {
2087         case RTM_ADD:
2088                 /*
2089                  * There is no backward compatibility :)
2090                  *
2091                  * if ((rt->rt_flags & RTF_HOST) == 0 &&
2092                  *      SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
2093                  *              rt->rt_flags |= RTF_CLONING;
2094                  */
2095                 if ((rt->rt_flags & RTF_CLONING) ||
2096                     ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) {
2097                         /*
2098                          * Case 1: This route should come from a route to
2099                          * interface (RTF_CLONING case) or the route should be
2100                          * treated as on-link but is currently not
2101                          * (RTF_LLINFO && ln == NULL case).
2102                          */
2103                         if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) {
2104                                 gate = rt->rt_gateway;
2105                                 SDL(gate)->sdl_type = ifp->if_type;
2106                                 SDL(gate)->sdl_index = ifp->if_index;
2107                                 /*
2108                                  * In case we're called before 1.0 sec.
2109                                  * has elapsed.
2110                                  */
2111                                 if (ln != NULL) {
2112                                         ln_setexpire(ln,
2113                                             (ifp->if_eflags & IFEF_IPV6_ND6ALT)
2114                                             ? 0 : MAX(timenow, 1));
2115                                 }
2116                         }
2117                         if (rt->rt_flags & RTF_CLONING)
2118                                 break;
2119                 }
2120                 /*
2121                  * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
2122                  * We don't do that here since llinfo is not ready yet.
2123                  *
2124                  * There are also couple of other things to be discussed:
2125                  * - unsolicited NA code needs improvement beforehand
2126                  * - RFC4861 says we MAY send multicast unsolicited NA
2127                  *   (7.2.6 paragraph 4), however, it also says that we
2128                  *   SHOULD provide a mechanism to prevent multicast NA storm.
2129                  *   we don't have anything like it right now.
2130                  *   note that the mechanism needs a mutual agreement
2131                  *   between proxies, which means that we need to implement
2132                  *   a new protocol, or a new kludge.
2133                  * - from RFC4861 6.2.4, host MUST NOT send an unsolicited RA.
2134                  *   we need to check ip6forwarding before sending it.
2135                  *   (or should we allow proxy ND configuration only for
2136                  *   routers?  there's no mention about proxy ND from hosts)
2137                  */
2138                 /* FALLTHROUGH */
2139         case RTM_RESOLVE:
2140                 if (!(ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK))) {
2141                         /*
2142                          * Address resolution isn't necessary for a point to
2143                          * point link, so we can skip this test for a p2p link.
2144                          */
2145                         if (gate->sa_family != AF_LINK ||
2146                             gate->sa_len < sizeof (null_sdl)) {
2147                                 /* Don't complain in case of RTM_ADD */
2148                                 if (req == RTM_RESOLVE) {
2149                                         log(LOG_ERR, "%s: route to %s has bad "
2150                                             "gateway address (sa_family %u "
2151                                             "sa_len %u) on %s\n", __func__,
2152                                             inet_ntop(AF_INET6,
2153                                             &SIN6(rt_key(rt))->sin6_addr, buf,
2154                                             sizeof (buf)), gate->sa_family,
2155                                             gate->sa_len, if_name(ifp));
2156                                 }
2157                                 break;
2158                         }
2159                         SDL(gate)->sdl_type = ifp->if_type;
2160                         SDL(gate)->sdl_index = ifp->if_index;
2161                 }
2162                 if (ln != NULL)
2163                         break;  /* This happens on a route change */
2164                 /*
2165                  * Case 2: This route may come from cloning, or a manual route
2166                  * add with a LL address.
2167                  */
2168                 rt->rt_llinfo = ln = nd6_llinfo_alloc(M_WAITOK);
2169                 if (ln == NULL)
2170                         break;
2171
2172                 nd6_allocated++;
2173                 rt->rt_llinfo_get_ri    = nd6_llinfo_get_ri;
2174                 rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri;
2175                 rt->rt_llinfo_purge     = nd6_llinfo_purge;
2176                 rt->rt_llinfo_free      = nd6_llinfo_free;
2177                 rt->rt_flags |= RTF_LLINFO;
2178                 ln->ln_rt = rt;
2179                 /* this is required for "ndp" command. - shin */
2180                 if (req == RTM_ADD) {
2181                         /*
2182                          * gate should have some valid AF_LINK entry,
2183                          * and ln->ln_expire should have some lifetime
2184                          * which is specified by ndp command.
2185                          */
2186                         ln->ln_state = ND6_LLINFO_REACHABLE;
2187                 } else {
2188                         /*
2189                          * When req == RTM_RESOLVE, rt is created and
2190                          * initialized in rtrequest(), so rt_expire is 0.
2191                          */
2192                         ln->ln_state = ND6_LLINFO_NOSTATE;
2193
2194                         /* In case we're called before 1.0 sec. has elapsed */
2195                         ln_setexpire(ln, (ifp->if_eflags & IFEF_IPV6_ND6ALT) ?
2196                             0 : MAX(timenow, 1));
2197                 }
2198                 LN_INSERTHEAD(ln);
2199                 nd6_inuse++;
2200
2201                 /* We have at least one entry; arm the timer if not already */
2202                 nd6_sched_timeout(NULL, NULL);
2203
2204                 /*
2205                  * If we have too many cache entries, initiate immediate
2206                  * purging for some "less recently used" entries.  Note that
2207                  * we cannot directly call nd6_free() here because it would
2208                  * cause re-entering rtable related routines triggering an LOR
2209                  * problem.
2210                  */
2211                 if (ip6_neighborgcthresh > 0 &&
2212                     nd6_inuse >= ip6_neighborgcthresh) {
2213                         int i;
2214
2215                         for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) {
2216                                 struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev;
2217                                 struct rtentry *rt_end = ln_end->ln_rt;
2218
2219                                 /* Move this entry to the head */
2220                                 RT_LOCK(rt_end);
2221                                 LN_DEQUEUE(ln_end);
2222                                 LN_INSERTHEAD(ln_end);
2223
2224                                 if (ln_end->ln_expire == 0) {
2225                                         RT_UNLOCK(rt_end);
2226                                         continue;
2227                                 }
2228                                 if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE)
2229                                         ln_end->ln_state = ND6_LLINFO_STALE;
2230                                 else
2231                                         ln_end->ln_state = ND6_LLINFO_PURGE;
2232                                 ln_setexpire(ln_end, timenow);
2233                                 RT_UNLOCK(rt_end);
2234                         }
2235                 }
2236
2237                 /*
2238                  * check if rt_key(rt) is one of my address assigned
2239                  * to the interface.
2240                  */
2241                 ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
2242                     &SIN6(rt_key(rt))->sin6_addr);
2243                 if (ifa != NULL) {
2244                         caddr_t macp = nd6_ifptomac(ifp);
2245                         ln_setexpire(ln, 0);
2246                         ln->ln_state = ND6_LLINFO_REACHABLE;
2247                         if (macp != NULL) {
2248                                 Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
2249                                 SDL(gate)->sdl_alen = ifp->if_addrlen;
2250                         }
2251                         if (nd6_useloopback) {
2252                                 if (rt->rt_ifp != lo_ifp) {
2253                                         /*
2254                                          * Purge any link-layer info caching.
2255                                          */
2256                                         if (rt->rt_llinfo_purge != NULL)
2257                                                 rt->rt_llinfo_purge(rt);
2258
2259                                         /*
2260                                          * Adjust route ref count for the
2261                                          * interfaces.
2262                                          */
2263                                         if (rt->rt_if_ref_fn != NULL) {
2264                                                 rt->rt_if_ref_fn(lo_ifp, 1);
2265                                                 rt->rt_if_ref_fn(rt->rt_ifp,
2266                                                     -1);
2267                                         }
2268                                 }
2269                                 rt->rt_ifp = lo_ifp;
2270                                 /*
2271                                  * If rmx_mtu is not locked, update it
2272                                  * to the MTU used by the new interface.
2273                                  */
2274                                 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
2275                                         rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
2276                                 /*
2277                                  * Make sure rt_ifa be equal to the ifaddr
2278                                  * corresponding to the address.
2279                                  * We need this because when we refer
2280                                  * rt_ifa->ia6_flags in ip6_input, we assume
2281                                  * that the rt_ifa points to the address instead
2282                                  * of the loopback address.
2283                                  */
2284                                 if (ifa != rt->rt_ifa) {
2285                                         rtsetifa(rt, ifa);
2286                                 }
2287                         }
2288                         IFA_REMREF(ifa);
2289                 } else if (rt->rt_flags & RTF_ANNOUNCE) {
2290                         ln_setexpire(ln, 0);
2291                         ln->ln_state = ND6_LLINFO_REACHABLE;
2292
2293                         /* join solicited node multicast for proxy ND */
2294                         if (ifp->if_flags & IFF_MULTICAST) {
2295                                 struct in6_addr llsol;
2296                                 struct in6_multi *in6m;
2297                                 int error;
2298
2299                                 llsol = SIN6(rt_key(rt))->sin6_addr;
2300                                 llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
2301                                 llsol.s6_addr32[1] = 0;
2302                                 llsol.s6_addr32[2] = htonl(1);
2303                                 llsol.s6_addr8[12] = 0xff;
2304                                 if (in6_setscope(&llsol, ifp, NULL))
2305                                         break;
2306                                 error = in6_mc_join(ifp, &llsol,
2307                                     NULL, &in6m, 0);
2308                                 if (error) {
2309                                         nd6log((LOG_ERR, "%s: failed to join "
2310                                             "%s (errno=%d)\n", if_name(ifp),
2311                                             ip6_sprintf(&llsol), error));
2312                                 } else {
2313                                         IN6M_REMREF(in6m);
2314                                 }
2315                         }
2316                 }
2317                 break;
2318
2319         case RTM_DELETE:
2320                 if (ln == NULL)
2321                         break;
2322                 /* leave from solicited node multicast for proxy ND */
2323                 if ((rt->rt_flags & RTF_ANNOUNCE) &&
2324                     (ifp->if_flags & IFF_MULTICAST)) {
2325                         struct in6_addr llsol;
2326                         struct in6_multi *in6m;
2327
2328                         llsol = SIN6(rt_key(rt))->sin6_addr;
2329                         llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
2330                         llsol.s6_addr32[1] = 0;
2331                         llsol.s6_addr32[2] = htonl(1);
2332                         llsol.s6_addr8[12] = 0xff;
2333                         if (in6_setscope(&llsol, ifp, NULL) == 0) {
2334                                 in6_multihead_lock_shared();
2335                                 IN6_LOOKUP_MULTI(&llsol, ifp, in6m);
2336                                 in6_multihead_lock_done();
2337                                 if (in6m != NULL) {
2338                                         in6_mc_leave(in6m, NULL);
2339                                         IN6M_REMREF(in6m);
2340                                 }
2341                         }
2342                 }
2343                 nd6_inuse--;
2344                 /*
2345                  * Unchain it but defer the actual freeing until the route
2346                  * itself is to be freed.  rt->rt_llinfo still points to
2347                  * llinfo_nd6, and likewise, ln->ln_rt stil points to this
2348                  * route entry, except that RTF_LLINFO is now cleared.
2349                  */
2350                 if (ln->ln_flags & ND6_LNF_IN_USE)
2351                         LN_DEQUEUE(ln);
2352
2353                 /*
2354                  * Purge any link-layer info caching.
2355                  */
2356                 if (rt->rt_llinfo_purge != NULL)
2357                         rt->rt_llinfo_purge(rt);
2358
2359                 rt->rt_flags &= ~RTF_LLINFO;
2360                 if (ln->ln_hold != NULL) {
2361                         m_freem(ln->ln_hold);
2362                         ln->ln_hold = NULL;
2363                 }
2364         }
2365 }
2366
2367 static int
2368 nd6_siocgdrlst(void *data, int data_is_64)
2369 {
2370         struct in6_drlist_32 *drl_32;
2371         struct nd_defrouter *dr;
2372         int i = 0;
2373
2374         lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2375
2376         dr = TAILQ_FIRST(&nd_defrouter);
2377
2378         /* For 64-bit process */
2379         if (data_is_64) {
2380                 struct in6_drlist_64 *drl_64;
2381
2382                 drl_64 = _MALLOC(sizeof (*drl_64), M_TEMP, M_WAITOK|M_ZERO);
2383                 if (drl_64 == NULL)
2384                         return (ENOMEM);
2385
2386                 /* preserve the interface name */
2387                 bcopy(data, drl_64, sizeof (drl_64->ifname));
2388
2389                 while (dr && i < DRLSTSIZ) {
2390                         drl_64->defrouter[i].rtaddr = dr->rtaddr;
2391                         if (IN6_IS_ADDR_LINKLOCAL(
2392                             &drl_64->defrouter[i].rtaddr)) {
2393                                 /* XXX: need to this hack for KAME stack */
2394                                 drl_64->defrouter[i].rtaddr.s6_addr16[1] = 0;
2395                         } else {
2396                                 log(LOG_ERR,
2397                                     "default router list contains a "
2398                                     "non-linklocal address(%s)\n",
2399                                     ip6_sprintf(&drl_64->defrouter[i].rtaddr));
2400                         }
2401                         drl_64->defrouter[i].flags = dr->flags;
2402                         drl_64->defrouter[i].rtlifetime = dr->rtlifetime;
2403                         drl_64->defrouter[i].expire = nddr_getexpire(dr);
2404                         drl_64->defrouter[i].if_index = dr->ifp->if_index;
2405                         i++;
2406                         dr = TAILQ_NEXT(dr, dr_entry);
2407                 }
2408                 bcopy(drl_64, data, sizeof (*drl_64));
2409                 _FREE(drl_64, M_TEMP);
2410                 return (0);
2411         }
2412
2413         /* For 32-bit process */
2414         drl_32 = _MALLOC(sizeof (*drl_32), M_TEMP, M_WAITOK|M_ZERO);
2415         if (drl_32 == NULL)
2416                 return (ENOMEM);
2417
2418         /* preserve the interface name */
2419         bcopy(data, drl_32, sizeof (drl_32->ifname));
2420
2421         while (dr != NULL && i < DRLSTSIZ) {
2422                 drl_32->defrouter[i].rtaddr = dr->rtaddr;
2423                 if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) {
2424                         /* XXX: need to this hack for KAME stack */
2425                         drl_32->defrouter[i].rtaddr.s6_addr16[1] = 0;
2426                 } else {
2427                         log(LOG_ERR,
2428                             "default router list contains a "
2429                             "non-linklocal address(%s)\n",
2430                             ip6_sprintf(&drl_32->defrouter[i].rtaddr));
2431                 }
2432                 drl_32->defrouter[i].flags = dr->flags;
2433                 drl_32->defrouter[i].rtlifetime = dr->rtlifetime;
2434                 drl_32->defrouter[i].expire = nddr_getexpire(dr);
2435                 drl_32->defrouter[i].if_index = dr->ifp->if_index;
2436                 i++;
2437                 dr = TAILQ_NEXT(dr, dr_entry);
2438         }
2439         bcopy(drl_32, data, sizeof (*drl_32));
2440         _FREE(drl_32, M_TEMP);
2441         return (0);
2442 }
2443
2444 /*
2445  * XXX meaning of fields, especialy "raflags", is very
2446  * differnet between RA prefix list and RR/static prefix list.
2447  * how about separating ioctls into two?
2448  */
2449 static int
2450 nd6_siocgprlst(void *data, int data_is_64)
2451 {
2452         struct in6_prlist_32 *prl_32;
2453         struct nd_prefix *pr;
2454         int i = 0;
2455
2456         lck_mtx_assert(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2457
2458         pr = nd_prefix.lh_first;
2459
2460         /* For 64-bit process */
2461         if (data_is_64) {
2462                 struct in6_prlist_64 *prl_64;
2463
2464                 prl_64 = _MALLOC(sizeof (*prl_64), M_TEMP, M_WAITOK|M_ZERO);
2465                 if (prl_64 == NULL)
2466                         return (ENOMEM);
2467
2468                 /* preserve the interface name */
2469                 bcopy(data, prl_64, sizeof (prl_64->ifname));
2470
2471                 while (pr && i < PRLSTSIZ) {
2472                         struct nd_pfxrouter *pfr;
2473                         int j;
2474
2475                         NDPR_LOCK(pr);
2476                         (void) in6_embedscope(&prl_64->prefix[i].prefix,
2477                             &pr->ndpr_prefix, NULL, NULL, NULL);
2478                         prl_64->prefix[i].raflags = pr->ndpr_raf;
2479                         prl_64->prefix[i].prefixlen = pr->ndpr_plen;
2480                         prl_64->prefix[i].vltime = pr->ndpr_vltime;
2481                         prl_64->prefix[i].pltime = pr->ndpr_pltime;
2482                         prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index;
2483                         prl_64->prefix[i].expire = ndpr_getexpire(pr);
2484
2485                         pfr = pr->ndpr_advrtrs.lh_first;
2486                         j = 0;
2487                         while (pfr) {
2488                                 if (j < DRLSTSIZ) {
2489 #define RTRADDR prl_64->prefix[i].advrtr[j]
2490                                         RTRADDR = pfr->router->rtaddr;
2491                                         if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
2492                                                 /* XXX: hack for KAME */
2493                                                 RTRADDR.s6_addr16[1] = 0;
2494                                         } else {
2495                                                 log(LOG_ERR,
2496                                                     "a router(%s) advertises "
2497                                                     "a prefix with "
2498                                                     "non-link local address\n",
2499                                                     ip6_sprintf(&RTRADDR));
2500                                         }
2501 #undef RTRADDR
2502                                 }
2503                                 j++;
2504                                 pfr = pfr->pfr_next;
2505                         }
2506                         prl_64->prefix[i].advrtrs = j;
2507                         prl_64->prefix[i].origin = PR_ORIG_RA;
2508                         NDPR_UNLOCK(pr);
2509
2510                         i++;
2511                         pr = pr->ndpr_next;
2512                 }
2513                 bcopy(prl_64, data, sizeof (*prl_64));
2514                 _FREE(prl_64, M_TEMP);
2515                 return (0);
2516         }
2517
2518         /* For 32-bit process */
2519         prl_32 = _MALLOC(sizeof (*prl_32), M_TEMP, M_WAITOK|M_ZERO);
2520         if (prl_32 == NULL)
2521                 return (ENOMEM);
2522
2523         /* preserve the interface name */
2524         bcopy(data, prl_32, sizeof (prl_32->ifname));
2525
2526         while (pr && i < PRLSTSIZ) {
2527                 struct nd_pfxrouter *pfr;
2528                 int j;
2529
2530                 NDPR_LOCK(pr);
2531                 (void) in6_embedscope(&prl_32->prefix[i].prefix,
2532                     &pr->ndpr_prefix, NULL, NULL, NULL);
2533                 prl_32->prefix[i].raflags = pr->ndpr_raf;
2534                 prl_32->prefix[i].prefixlen = pr->ndpr_plen;
2535                 prl_32->prefix[i].vltime = pr->ndpr_vltime;
2536                 prl_32->prefix[i].pltime = pr->ndpr_pltime;
2537                 prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index;
2538                 prl_32->prefix[i].expire = ndpr_getexpire(pr);
2539
2540                 pfr = pr->ndpr_advrtrs.lh_first;
2541                 j = 0;
2542                 while (pfr) {
2543                         if (j < DRLSTSIZ) {
2544 #define RTRADDR prl_32->prefix[i].advrtr[j]
2545                                 RTRADDR = pfr->router->rtaddr;
2546                                 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
2547                                         /* XXX: hack for KAME */
2548                                         RTRADDR.s6_addr16[1] = 0;
2549                                 } else {
2550                                         log(LOG_ERR,
2551                                             "a router(%s) advertises "
2552                                             "a prefix with "
2553                                             "non-link local address\n",
2554                                             ip6_sprintf(&RTRADDR));
2555                                 }
2556 #undef RTRADDR
2557                         }
2558                         j++;
2559                         pfr = pfr->pfr_next;
2560                 }
2561                 prl_32->prefix[i].advrtrs = j;
2562                 prl_32->prefix[i].origin = PR_ORIG_RA;
2563                 NDPR_UNLOCK(pr);
2564
2565                 i++;
2566                 pr = pr->ndpr_next;
2567         }
2568         bcopy(prl_32, data, sizeof (*prl_32));
2569         _FREE(prl_32, M_TEMP);
2570         return (0);
2571 }
2572
2573 int
2574 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
2575 {
2576         struct nd_defrouter *dr;
2577         struct nd_prefix *pr;
2578         struct rtentry *rt;
2579         int i, error = 0;
2580
2581         VERIFY(ifp != NULL);
2582         i = ifp->if_index;
2583
2584         switch (cmd) {
2585         case SIOCGDRLST_IN6_32:         /* struct in6_drlist_32 */
2586         case SIOCGDRLST_IN6_64:         /* struct in6_drlist_64 */
2587                 /*
2588                  * obsolete API, use sysctl under net.inet6.icmp6
2589                  */
2590                 lck_mtx_lock(nd6_mutex);
2591                 error = nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64);
2592                 lck_mtx_unlock(nd6_mutex);
2593                 break;
2594
2595         case SIOCGPRLST_IN6_32:         /* struct in6_prlist_32 */
2596         case SIOCGPRLST_IN6_64:         /* struct in6_prlist_64 */
2597                 /*
2598                  * obsolete API, use sysctl under net.inet6.icmp6
2599                  */
2600                 lck_mtx_lock(nd6_mutex);
2601                 error = nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64);
2602                 lck_mtx_unlock(nd6_mutex);
2603                 break;
2604
2605         case OSIOCGIFINFO_IN6:          /* struct in6_ondireq */
2606         case SIOCGIFINFO_IN6: {         /* struct in6_ondireq */
2607                 u_int32_t linkmtu;
2608                 struct in6_ondireq *ondi = (struct in6_ondireq *)(void *)data;
2609                 struct nd_ifinfo *ndi;
2610                 /*
2611                  * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq
2612                  * instead of in6_ndireq, so we treat it as such.
2613                  */
2614                 lck_rw_lock_shared(nd_if_rwlock);
2615                 ndi = ND_IFINFO(ifp);
2616                 if (!nd_ifinfo || i >= nd_ifinfo_indexlim ||
2617                     !ndi->initialized) {
2618                         lck_rw_done(nd_if_rwlock);
2619                         error = EINVAL;
2620                         break;
2621                 }
2622                 lck_mtx_lock(&ndi->lock);
2623                 linkmtu = IN6_LINKMTU(ifp);
2624                 bcopy(&linkmtu, &ondi->ndi.linkmtu, sizeof (linkmtu));
2625                 bcopy(&nd_ifinfo[i].maxmtu, &ondi->ndi.maxmtu,
2626                     sizeof (u_int32_t));
2627                 bcopy(&nd_ifinfo[i].basereachable, &ondi->ndi.basereachable,
2628                     sizeof (u_int32_t));
2629                 bcopy(&nd_ifinfo[i].reachable, &ondi->ndi.reachable,
2630                     sizeof (u_int32_t));
2631                 bcopy(&nd_ifinfo[i].retrans, &ondi->ndi.retrans,
2632                     sizeof (u_int32_t));
2633                 bcopy(&nd_ifinfo[i].flags, &ondi->ndi.flags,
2634                     sizeof (u_int32_t));
2635                 bcopy(&nd_ifinfo[i].recalctm, &ondi->ndi.recalctm,
2636                     sizeof (int));
2637                 ondi->ndi.chlim = nd_ifinfo[i].chlim;
2638                 ondi->ndi.receivedra = 0;
2639                 lck_mtx_unlock(&ndi->lock);
2640                 lck_rw_done(nd_if_rwlock);
2641                 break;
2642         }
2643
2644         case SIOCSIFINFO_FLAGS: {       /* struct in6_ndireq */
2645                 struct in6_ndireq *cndi = (struct in6_ndireq *)(void *)data;
2646                 u_int32_t oflags, flags;
2647                 struct nd_ifinfo *ndi;
2648
2649                 /* XXX: almost all other fields of cndi->ndi is unused */
2650                 lck_rw_lock_shared(nd_if_rwlock);
2651                 ndi = ND_IFINFO(ifp);
2652                 if (!nd_ifinfo || i >= nd_ifinfo_indexlim ||
2653                     !ndi->initialized) {
2654                         lck_rw_done(nd_if_rwlock);
2655                         error = EINVAL;
2656                         break;
2657                 }
2658                 lck_mtx_lock(&ndi->lock);
2659                 oflags = nd_ifinfo[i].flags;
2660                 bcopy(&cndi->ndi.flags, &nd_ifinfo[i].flags, sizeof (flags));
2661                 flags = nd_ifinfo[i].flags;
2662                 lck_mtx_unlock(&ndi->lock);
2663                 lck_rw_done(nd_if_rwlock);
2664
2665                 if (oflags == flags)
2666                         break;
2667
2668                 error = nd6_setifinfo(ifp, oflags, flags);
2669                 break;
2670         }
2671
2672         case SIOCSNDFLUSH_IN6:          /* struct in6_ifreq */
2673                 /* flush default router list */
2674                 /*
2675                  * xxx sumikawa: should not delete route if default
2676                  * route equals to the top of default router list
2677                  */
2678                 lck_mtx_lock(nd6_mutex);
2679                 defrouter_reset();
2680                 defrouter_select(ifp);
2681                 lck_mtx_unlock(nd6_mutex);
2682                 /* xxx sumikawa: flush prefix list */
2683                 break;
2684
2685         case SIOCSPFXFLUSH_IN6: {       /* struct in6_ifreq */
2686                 /* flush all the prefix advertised by routers */
2687                 struct nd_prefix *next;
2688
2689                 lck_mtx_lock(nd6_mutex);
2690                 for (pr = nd_prefix.lh_first; pr; pr = next) {
2691                         struct in6_ifaddr *ia;
2692
2693                         next = pr->ndpr_next;
2694
2695                         NDPR_LOCK(pr);
2696                         if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) {
2697                                 NDPR_UNLOCK(pr);
2698                                 continue; /* XXX */
2699                         }
2700                         if (ifp != lo_ifp && pr->ndpr_ifp != ifp) {
2701                                 NDPR_UNLOCK(pr);
2702                                 continue;
2703                         }
2704                         /* do we really have to remove addresses as well? */
2705                         NDPR_ADDREF_LOCKED(pr);
2706                         NDPR_UNLOCK(pr);
2707                         lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
2708                         ia = in6_ifaddrs;
2709                         while (ia != NULL) {
2710                                 IFA_LOCK(&ia->ia_ifa);
2711                                 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
2712                                         IFA_UNLOCK(&ia->ia_ifa);
2713                                         ia = ia->ia_next;
2714                                         continue;
2715                                 }
2716
2717                                 if (ia->ia6_ndpr == pr) {
2718                                         IFA_ADDREF_LOCKED(&ia->ia_ifa);
2719                                         IFA_UNLOCK(&ia->ia_ifa);
2720                                         lck_rw_done(&in6_ifaddr_rwlock);
2721                                         lck_mtx_unlock(nd6_mutex);
2722                                         in6_purgeaddr(&ia->ia_ifa);
2723                                         IFA_REMREF(&ia->ia_ifa);
2724                                         lck_mtx_lock(nd6_mutex);
2725                                         lck_rw_lock_exclusive(
2726                                             &in6_ifaddr_rwlock);
2727                                         /*
2728                                          * Purging the address caused
2729                                          * in6_ifaddr_rwlock to be
2730                                          * dropped and
2731                                          * reacquired; therefore search again
2732                                          * from the beginning of in6_ifaddrs.
2733                                          * The same applies for the prefix list.
2734                                          */
2735                                         ia = in6_ifaddrs;
2736                                         next = nd_prefix.lh_first;
2737                                         continue;
2738
2739                                 }
2740                                 IFA_UNLOCK(&ia->ia_ifa);
2741                                 ia = ia->ia_next;
2742                         }
2743                         lck_rw_done(&in6_ifaddr_rwlock);
2744                         NDPR_LOCK(pr);
2745                         prelist_remove(pr);
2746                         NDPR_UNLOCK(pr);
2747                         pfxlist_onlink_check();
2748                         /*
2749                          * If we were trying to restart this loop
2750                          * above by changing the value of 'next', we might
2751                          * end up freeing the only element on the list
2752                          * when we call NDPR_REMREF().
2753                          * When this happens, we also have get out of this
2754                          * loop because we have nothing else to do.
2755                          */
2756                         if (pr == next)
2757                                 next = NULL;
2758                         NDPR_REMREF(pr);
2759                 }
2760                 lck_mtx_unlock(nd6_mutex);
2761                 break;
2762         }
2763
2764         case SIOCSRTRFLUSH_IN6: {       /* struct in6_ifreq */
2765                 /* flush all the default routers */
2766                 struct nd_defrouter *next;
2767
2768                 lck_mtx_lock(nd6_mutex);
2769                 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
2770                         /*
2771                          * The first entry of the list may be stored in
2772                          * the routing table, so we'll delete it later.
2773                          */
2774                         for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
2775                                 next = TAILQ_NEXT(dr, dr_entry);
2776                                 if (ifp == lo_ifp || dr->ifp == ifp)
2777                                         defrtrlist_del(dr);
2778                         }
2779                         if (ifp == lo_ifp ||
2780                             TAILQ_FIRST(&nd_defrouter)->ifp == ifp)
2781                                 defrtrlist_del(TAILQ_FIRST(&nd_defrouter));
2782                 }
2783                 lck_mtx_unlock(nd6_mutex);
2784                 break;
2785         }
2786
2787         case SIOCGNBRINFO_IN6_32: {     /* struct in6_nbrinfo_32 */
2788                 struct llinfo_nd6 *ln;
2789                 struct in6_nbrinfo_32 nbi_32;
2790                 struct in6_addr nb_addr; /* make local for safety */
2791
2792                 bcopy(data, &nbi_32, sizeof (nbi_32));
2793                 nb_addr = nbi_32.addr;
2794                 /*
2795                  * XXX: KAME specific hack for scoped addresses
2796                  *      XXXX: for other scopes than link-local?
2797                  */
2798                 if (IN6_IS_ADDR_LINKLOCAL(&nbi_32.addr) ||
2799                     IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32.addr)) {
2800                         u_int16_t *idp =
2801                             (u_int16_t *)(void *)&nb_addr.s6_addr[2];
2802
2803                         if (*idp == 0)
2804                                 *idp = htons(ifp->if_index);
2805                 }
2806
2807                 /* Callee returns a locked route upon success */
2808                 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
2809                         error = EINVAL;
2810                         break;
2811                 }
2812                 RT_LOCK_ASSERT_HELD(rt);
2813                 ln = rt->rt_llinfo;
2814                 nbi_32.state = ln->ln_state;
2815                 nbi_32.asked = ln->ln_asked;
2816                 nbi_32.isrouter = ln->ln_router;
2817                 nbi_32.expire = ln_getexpire(ln);
2818                 RT_REMREF_LOCKED(rt);
2819                 RT_UNLOCK(rt);
2820                 bcopy(&nbi_32, data, sizeof (nbi_32));
2821                 break;
2822         }
2823
2824         case SIOCGNBRINFO_IN6_64: {     /* struct in6_nbrinfo_64 */
2825                 struct llinfo_nd6 *ln;
2826                 struct in6_nbrinfo_64 nbi_64;
2827                 struct in6_addr nb_addr; /* make local for safety */
2828
2829                 bcopy(data, &nbi_64, sizeof (nbi_64));
2830                 nb_addr = nbi_64.addr;
2831                 /*
2832                  * XXX: KAME specific hack for scoped addresses
2833                  *      XXXX: for other scopes than link-local?
2834                  */
2835                 if (IN6_IS_ADDR_LINKLOCAL(&nbi_64.addr) ||
2836                     IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64.addr)) {
2837                         u_int16_t *idp =
2838                             (u_int16_t *)(void *)&nb_addr.s6_addr[2];
2839
2840                         if (*idp == 0)
2841                                 *idp = htons(ifp->if_index);
2842                 }
2843
2844                 /* Callee returns a locked route upon success */
2845                 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
2846                         error = EINVAL;
2847                         break;
2848                 }
2849                 RT_LOCK_ASSERT_HELD(rt);
2850                 ln = rt->rt_llinfo;
2851                 nbi_64.state = ln->ln_state;
2852                 nbi_64.asked = ln->ln_asked;
2853                 nbi_64.isrouter = ln->ln_router;
2854                 nbi_64.expire = ln_getexpire(ln);
2855                 RT_REMREF_LOCKED(rt);
2856                 RT_UNLOCK(rt);
2857                 bcopy(&nbi_64, data, sizeof (nbi_64));
2858                 break;
2859         }
2860
2861         case SIOCGDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
2862         case SIOCGDEFIFACE_IN6_64: {    /* struct in6_ndifreq_64 */
2863                 struct in6_ndifreq_64 *ndif_64 =
2864                     (struct in6_ndifreq_64 *)(void *)data;
2865                 struct in6_ndifreq_32 *ndif_32 =
2866                     (struct in6_ndifreq_32 *)(void *)data;
2867
2868                 if (cmd == SIOCGDEFIFACE_IN6_64) {
2869                         u_int64_t j = nd6_defifindex;
2870                         bcopy(&j, &ndif_64->ifindex, sizeof (j));
2871                 } else {
2872                         bcopy(&nd6_defifindex, &ndif_32->ifindex,
2873                             sizeof (u_int32_t));
2874                 }
2875                 break;
2876         }
2877
2878         case SIOCSDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
2879         case SIOCSDEFIFACE_IN6_64: {    /* struct in6_ndifreq_64 */
2880                 struct in6_ndifreq_64 *ndif_64 =
2881                     (struct in6_ndifreq_64 *)(void *)data;
2882                 struct in6_ndifreq_32 *ndif_32 =
2883                     (struct in6_ndifreq_32 *)(void *)data;
2884                 u_int32_t idx;
2885
2886                 if (cmd == SIOCSDEFIFACE_IN6_64) {
2887                         u_int64_t j;
2888                         bcopy(&ndif_64->ifindex, &j, sizeof (j));
2889                         idx = (u_int32_t)j;
2890                 } else {
2891                         bcopy(&ndif_32->ifindex, &idx, sizeof (idx));
2892                 }
2893
2894                 error = nd6_setdefaultiface(idx);
2895                 return (error);
2896                 /* NOTREACHED */
2897         }
2898         }
2899         return (error);
2900 }
2901
2902 /*
2903  * Create neighbor cache entry and cache link-layer address,
2904  * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
2905  */
2906 void
2907 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
2908     int lladdrlen, int type, int code)
2909 {
2910 #pragma unused(lladdrlen)
2911         struct rtentry *rt = NULL;
2912         struct llinfo_nd6 *ln = NULL;
2913         int is_newentry;
2914         struct sockaddr_dl *sdl = NULL;
2915         int do_update;
2916         int olladdr;
2917         int llchange;
2918         int newstate = 0;
2919         uint64_t timenow;
2920         boolean_t sched_timeout = FALSE;
2921
2922         if (ifp == NULL)
2923                 panic("ifp == NULL in nd6_cache_lladdr");
2924         if (from == NULL)
2925                 panic("from == NULL in nd6_cache_lladdr");
2926
2927         /* nothing must be updated for unspecified address */
2928         if (IN6_IS_ADDR_UNSPECIFIED(from))
2929                 return;
2930
2931         /*
2932          * Validation about ifp->if_addrlen and lladdrlen must be done in
2933          * the caller.
2934          */
2935         timenow = net_uptime();
2936
2937         rt = nd6_lookup(from, 0, ifp, 0);
2938         if (rt == NULL) {
2939                 if ((rt = nd6_lookup(from, 1, ifp, 0)) == NULL)
2940                         return;
2941                 RT_LOCK_ASSERT_HELD(rt);
2942                 is_newentry = 1;
2943         } else {
2944                 RT_LOCK_ASSERT_HELD(rt);
2945                 /* do nothing if static ndp is set */
2946                 if (rt->rt_flags & RTF_STATIC) {
2947                         RT_REMREF_LOCKED(rt);
2948                         RT_UNLOCK(rt);
2949                         return;
2950                 }
2951                 is_newentry = 0;
2952         }
2953
2954         if (rt == NULL)
2955                 return;
2956         if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
2957 fail:
2958                 RT_UNLOCK(rt);
2959                 nd6_free(rt);
2960                 rtfree(rt);
2961                 return;
2962         }
2963         ln = (struct llinfo_nd6 *)rt->rt_llinfo;
2964         if (ln == NULL)
2965                 goto fail;
2966         if (rt->rt_gateway == NULL)
2967                 goto fail;
2968         if (rt->rt_gateway->sa_family != AF_LINK)
2969                 goto fail;
2970         sdl = SDL(rt->rt_gateway);
2971
2972         olladdr = (sdl->sdl_alen) ? 1 : 0;
2973         if (olladdr && lladdr) {
2974                 if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
2975                         llchange = 1;
2976                 else
2977                         llchange = 0;
2978         } else
2979                 llchange = 0;
2980
2981         /*
2982          * newentry olladdr  lladdr  llchange   (*=record)
2983          *      0       n       n       --      (1)
2984          *      0       y       n       --      (2)
2985          *      0       n       y       --      (3) * STALE
2986          *      0       y       y       n       (4) *
2987          *      0       y       y       y       (5) * STALE
2988          *      1       --      n       --      (6)   NOSTATE(= PASSIVE)
2989          *      1       --      y       --      (7) * STALE
2990          */
2991
2992         if (lladdr) {           /* (3-5) and (7) */
2993                 /*
2994                  * Record source link-layer address
2995                  * XXX is it dependent to ifp->if_type?
2996                  */
2997                 sdl->sdl_alen = ifp->if_addrlen;
2998                 bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
2999
3000                 /* cache the gateway (sender HW) address */
3001                 nd6_llreach_alloc(rt, ifp, LLADDR(sdl), sdl->sdl_alen, FALSE);
3002         }
3003
3004         if (!is_newentry) {
3005                 if ((!olladdr && lladdr != NULL) ||     /* (3) */
3006                     (olladdr && lladdr != NULL && llchange)) {  /* (5) */
3007                         do_update = 1;
3008                         newstate = ND6_LLINFO_STALE;
3009                 } else                                  /* (1-2,4) */
3010                         do_update = 0;
3011         } else {
3012                 do_update = 1;
3013                 if (lladdr == NULL)                     /* (6) */
3014                         newstate = ND6_LLINFO_NOSTATE;
3015                 else                                    /* (7) */
3016                         newstate = ND6_LLINFO_STALE;
3017         }
3018
3019         if (do_update) {
3020                 /*
3021                  * Update the state of the neighbor cache.
3022                  */
3023                 ln->ln_state = newstate;
3024
3025                 if (ln->ln_state == ND6_LLINFO_STALE) {
3026                         struct mbuf *m = ln->ln_hold;
3027                         /*
3028                          * XXX: since nd6_output() below will cause
3029                          * state tansition to DELAY and reset the timer,
3030                          * we must set the timer now, although it is actually
3031                          * meaningless.
3032                          */
3033                         ln_setexpire(ln, timenow + nd6_gctimer);
3034                         ln->ln_hold = NULL;
3035
3036                         if (m != NULL) {
3037                                 struct sockaddr_in6 sin6;
3038
3039                                 rtkey_to_sa6(rt, &sin6);
3040                                 /*
3041                                  * we assume ifp is not a p2p here, so just
3042                                  * set the 2nd argument as the 1st one.
3043                                  */
3044                                 RT_UNLOCK(rt);
3045                                 nd6_output(ifp, ifp, m, &sin6, rt, NULL);
3046                                 RT_LOCK(rt);
3047                         }
3048                 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
3049                         /* probe right away */
3050                         ln_setexpire(ln, timenow);
3051                         sched_timeout = TRUE;
3052                 }
3053         }
3054
3055         /*
3056          * ICMP6 type dependent behavior.
3057          *
3058          * NS: clear IsRouter if new entry
3059          * RS: clear IsRouter
3060          * RA: set IsRouter if there's lladdr
3061          * redir: clear IsRouter if new entry
3062          *
3063          * RA case, (1):
3064          * The spec says that we must set IsRouter in the following cases:
3065          * - If lladdr exist, set IsRouter.  This means (1-5).
3066          * - If it is old entry (!newentry), set IsRouter.  This means (7).
3067          * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
3068          * A quetion arises for (1) case.  (1) case has no lladdr in the
3069          * neighbor cache, this is similar to (6).
3070          * This case is rare but we figured that we MUST NOT set IsRouter.
3071          *
3072          * newentry olladdr  lladdr  llchange       NS  RS      RA      redir
3073          *                                                              D R
3074          *      0       n       n       --      (1)     c       ?       s
3075          *      0       y       n       --      (2)     c       s       s
3076          *      0       n       y       --      (3)     c       s       s
3077          *      0       y       y       n       (4)     c       s       s
3078          *      0       y       y       y       (5)     c       s       s
3079          *      1       --      n       --      (6) c   c               c s
3080          *      1       --      y       --      (7) c   c       s       c s
3081          *
3082          *                                      (c=clear s=set)
3083          */
3084         switch (type & 0xff) {
3085         case ND_NEIGHBOR_SOLICIT:
3086                 /*
3087                  * New entry must have is_router flag cleared.
3088                  */
3089                 if (is_newentry)        /* (6-7) */
3090                         ln->ln_router = 0;
3091                 break;
3092         case ND_REDIRECT:
3093                 /*
3094                  * If the ICMP message is a Redirect to a better router, always
3095                  * set the is_router flag.  Otherwise, if the entry is newly
3096                  * created, then clear the flag.  [RFC 4861, sec 8.3]
3097                  */
3098                 if (code == ND_REDIRECT_ROUTER)
3099                         ln->ln_router = 1;
3100                 else if (is_newentry) /* (6-7) */
3101                         ln->ln_router = 0;
3102                 break;
3103         case ND_ROUTER_SOLICIT:
3104                 /*
3105                  * is_router flag must always be cleared.
3106                  */
3107                 ln->ln_router = 0;
3108                 break;
3109         case ND_ROUTER_ADVERT:
3110                 /*
3111                  * Mark an entry with lladdr as a router.
3112                  */
3113                 if ((!is_newentry && (olladdr || lladdr)) ||    /* (2-5) */
3114                     (is_newentry && lladdr)) {                  /* (7) */
3115                         ln->ln_router = 1;
3116                 }
3117                 break;
3118         }
3119
3120         /*
3121          * When the link-layer address of a router changes, select the
3122          * best router again.  In particular, when the neighbor entry is newly
3123          * created, it might affect the selection policy.
3124          * Question: can we restrict the first condition to the "is_newentry"
3125          * case?
3126          *
3127          * Note: Perform default router selection even when we are a router,
3128          * if Scoped Routing is enabled.
3129          */
3130         if (do_update && ln->ln_router &&
3131             (ip6_doscopedroute || !ip6_forwarding)) {
3132                 RT_REMREF_LOCKED(rt);
3133                 RT_UNLOCK(rt);
3134                 lck_mtx_lock(nd6_mutex);
3135                 defrouter_select(ifp);
3136                 lck_mtx_unlock(nd6_mutex);
3137         } else {
3138                 RT_REMREF_LOCKED(rt);
3139                 RT_UNLOCK(rt);
3140         }
3141         if (sched_timeout) {
3142                 lck_mtx_lock(rnh_lock);
3143                 nd6_sched_timeout(NULL, NULL);
3144                 lck_mtx_unlock(rnh_lock);
3145         }
3146 }
3147
3148 static void
3149 nd6_slowtimo(void *arg)
3150 {
3151 #pragma unused(arg)
3152         int i;
3153         struct nd_ifinfo *nd6if;
3154
3155         lck_rw_lock_shared(nd_if_rwlock);
3156         for (i = 1; i < if_index + 1; i++) {
3157                 if (!nd_ifinfo || i >= nd_ifinfo_indexlim)
3158                         break;
3159                 nd6if = &nd_ifinfo[i];
3160                 if (!nd6if->initialized)
3161                         break;
3162                 lck_mtx_lock(&nd6if->lock);
3163                 if (nd6if->basereachable && /* already initialized */
3164                     (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
3165                         /*
3166                          * Since reachable time rarely changes by router
3167                          * advertisements, we SHOULD insure that a new random
3168                          * value gets recomputed at least once every few hours.
3169                          * (RFC 4861, 6.3.4)
3170                          */
3171                         nd6if->recalctm = nd6_recalc_reachtm_interval;
3172                         nd6if->reachable =
3173                             ND_COMPUTE_RTIME(nd6if->basereachable);
3174                 }
3175                 lck_mtx_unlock(&nd6if->lock);
3176         }
3177         lck_rw_done(nd_if_rwlock);
3178         timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
3179 }
3180
3181 #define senderr(e) { error = (e); goto bad; }
3182 int
3183 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
3184     struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv)
3185 {
3186         struct mbuf *m = m0;
3187         struct rtentry *rt = hint0, *hint = hint0;
3188         struct llinfo_nd6 *ln = NULL;
3189         int error = 0;
3190         uint64_t timenow;
3191         struct rtentry *rtrele = NULL;
3192         struct nd_ifinfo *ndi;
3193
3194         if (rt != NULL) {
3195                 RT_LOCK_SPIN(rt);
3196                 RT_ADDREF_LOCKED(rt);
3197         }
3198
3199         if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr) || !nd6_need_cache(ifp)) {
3200                 if (rt != NULL)
3201                         RT_UNLOCK(rt);
3202                 goto sendpkt;
3203         }
3204
3205         /*
3206          * Next hop determination.  Because we may involve the gateway route
3207          * in addition to the original route, locking is rather complicated.
3208          * The general concept is that regardless of whether the route points
3209          * to the original route or to the gateway route, this routine takes
3210          * an extra reference on such a route.  This extra reference will be
3211          * released at the end.
3212          *
3213          * Care must be taken to ensure that the "hint0" route never gets freed
3214          * via rtfree(), since the caller may have stored it inside a struct
3215          * route with a reference held for that placeholder.
3216          *
3217          * This logic is similar to, though not exactly the same as the one
3218          * used by route_to_gwroute().
3219          */
3220         if (rt != NULL) {
3221                 /*
3222                  * We have a reference to "rt" by now (or below via rtalloc1),
3223                  * which will either be released or freed at the end of this
3224                  * routine.
3225                  */
3226                 RT_LOCK_ASSERT_HELD(rt);
3227                 if (!(rt->rt_flags & RTF_UP)) {
3228                         RT_REMREF_LOCKED(rt);
3229                         RT_UNLOCK(rt);
3230                         if ((hint = rt = rtalloc1_scoped(SA(dst), 1, 0,
3231                             ifp->if_index)) != NULL) {
3232                                 RT_LOCK_SPIN(rt);
3233                                 if (rt->rt_ifp != ifp) {
3234                                         /* XXX: loop care? */
3235                                         RT_UNLOCK(rt);
3236                                         error = nd6_output(ifp, origifp, m0,
3237                                             dst, rt, adv);
3238                                         rtfree(rt);
3239                                         return (error);
3240                                 }
3241                         } else {
3242                                 senderr(EHOSTUNREACH);
3243                         }
3244                 }
3245
3246                 if (rt->rt_flags & RTF_GATEWAY) {
3247                         struct rtentry *gwrt;
3248                         struct in6_ifaddr *ia6 = NULL;
3249                         struct sockaddr_in6 gw6;
3250
3251                         rtgw_to_sa6(rt, &gw6);
3252                         /*
3253                          * Must drop rt_lock since nd6_is_addr_neighbor()
3254                          * calls nd6_lookup() and acquires rnh_lock.
3255                          */
3256                         RT_UNLOCK(rt);
3257
3258                         /*
3259                          * We skip link-layer address resolution and NUD
3260                          * if the gateway is not a neighbor from ND point
3261                          * of view, regardless of the value of nd_ifinfo.flags.
3262                          * The second condition is a bit tricky; we skip
3263                          * if the gateway is our own address, which is
3264                          * sometimes used to install a route to a p2p link.
3265                          */
3266                         if (!nd6_is_addr_neighbor(&gw6, ifp, 0) ||
3267                             (ia6 = in6ifa_ifpwithaddr(ifp, &gw6.sin6_addr))) {
3268                                 /*
3269                                  * We allow this kind of tricky route only
3270                                  * when the outgoing interface is p2p.
3271                                  * XXX: we may need a more generic rule here.
3272                                  */
3273                                 if (ia6 != NULL)
3274                                         IFA_REMREF(&ia6->ia_ifa);
3275                                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
3276                                         senderr(EHOSTUNREACH);
3277                                 goto sendpkt;
3278                         }
3279
3280                         RT_LOCK_SPIN(rt);
3281                         gw6 = *(SIN6(rt->rt_gateway));
3282
3283                         /* If hint is now down, give up */
3284                         if (!(rt->rt_flags & RTF_UP)) {
3285                                 RT_UNLOCK(rt);
3286                                 senderr(EHOSTUNREACH);
3287                         }
3288
3289                         /* If there's no gateway route, look it up */
3290                         if ((gwrt = rt->rt_gwroute) == NULL) {
3291                                 RT_UNLOCK(rt);
3292                                 goto lookup;
3293                         }
3294                         /* Become a regular mutex */
3295                         RT_CONVERT_LOCK(rt);
3296
3297                         /*
3298                          * Take gwrt's lock while holding route's lock;
3299                          * this is okay since gwrt never points back
3300                          * to rt, so no lock ordering issues.
3301                          */
3302                         RT_LOCK_SPIN(gwrt);
3303                         if (!(gwrt->rt_flags & RTF_UP)) {
3304                                 rt->rt_gwroute = NULL;
3305                                 RT_UNLOCK(gwrt);
3306                                 RT_UNLOCK(rt);
3307                                 rtfree(gwrt);
3308 lookup:
3309                                 lck_mtx_lock(rnh_lock);
3310                                 gwrt = rtalloc1_scoped_locked(SA(&gw6), 1, 0,
3311                                     ifp->if_index);
3312
3313                                 RT_LOCK(rt);
3314                                 /*
3315                                  * Bail out if the route is down, no route
3316                                  * to gateway, circular route, or if the
3317                                  * gateway portion of "rt" has changed.
3318                                  */
3319                                 if (!(rt->rt_flags & RTF_UP) ||
3320                                     gwrt == NULL || gwrt == rt ||
3321                                     !equal(SA(&gw6), rt->rt_gateway)) {
3322                                         if (gwrt == rt) {
3323                                                 RT_REMREF_LOCKED(gwrt);
3324                                                 gwrt = NULL;
3325                                         }
3326                                         RT_UNLOCK(rt);
3327                                         if (gwrt != NULL)
3328                                                 rtfree_locked(gwrt);
3329                                         lck_mtx_unlock(rnh_lock);
3330                                         senderr(EHOSTUNREACH);
3331                                 }
3332                                 VERIFY(gwrt != NULL);
3333                                 /*
3334                                  * Set gateway route; callee adds ref to gwrt;
3335                                  * gwrt has an extra ref from rtalloc1() for
3336                                  * this routine.
3337                                  */
3338                                 rt_set_gwroute(rt, rt_key(rt), gwrt);
3339                                 RT_UNLOCK(rt);
3340                                 lck_mtx_unlock(rnh_lock);
3341                                 /* Remember to release/free "rt" at the end */
3342                                 rtrele = rt;
3343                                 rt = gwrt;
3344                         } else {
3345                                 RT_ADDREF_LOCKED(gwrt);
3346                                 RT_UNLOCK(gwrt);
3347                                 RT_UNLOCK(rt);
3348                                 /* Remember to release/free "rt" at the end */
3349                                 rtrele = rt;
3350                                 rt = gwrt;
3351                         }
3352                         VERIFY(rt == gwrt);
3353
3354                         /*
3355                          * This is an opportunity to revalidate the parent
3356                          * route's gwroute, in case it now points to a dead
3357                          * route entry.  Parent route won't go away since the
3358                          * clone (hint) holds a reference to it.  rt == gwrt.
3359                          */
3360                         RT_LOCK_SPIN(hint);
3361                         if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) ==
3362                             (RTF_WASCLONED | RTF_UP)) {
3363                                 struct rtentry *prt = hint->rt_parent;
3364                                 VERIFY(prt != NULL);
3365
3366                                 RT_CONVERT_LOCK(hint);
3367                                 RT_ADDREF(prt);
3368                                 RT_UNLOCK(hint);
3369                                 rt_revalidate_gwroute(prt, rt);
3370                                 RT_REMREF(prt);
3371                         } else {
3372                                 RT_UNLOCK(hint);
3373                         }
3374
3375                         RT_LOCK_SPIN(rt);
3376                         /* rt == gwrt; if it is now down, give up */
3377                         if (!(rt->rt_flags & RTF_UP)) {
3378                                 RT_UNLOCK(rt);
3379                                 rtfree(rt);
3380                                 rt = NULL;
3381                                 /* "rtrele" == original "rt" */
3382                                 senderr(EHOSTUNREACH);
3383                         }
3384                 }
3385
3386                 /* Become a regular mutex */
3387                 RT_CONVERT_LOCK(rt);
3388         }
3389
3390         /*
3391          * Address resolution or Neighbor Unreachability Detection
3392          * for the next hop.
3393          * At this point, the destination of the packet must be a unicast
3394          * or an anycast address(i.e. not a multicast).
3395          */
3396
3397         /* Look up the neighbor cache for the nexthop */
3398         if (rt && (rt->rt_flags & RTF_LLINFO) != 0) {
3399                 ln = rt->rt_llinfo;
3400         } else {
3401                 struct sockaddr_in6 sin6;
3402                 /*
3403                  * Clear out Scope ID field in case it is set.
3404                  */
3405                 sin6 = *dst;
3406                 sin6.sin6_scope_id = 0;
3407                 /*
3408                  * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
3409                  * the condition below is not very efficient.  But we believe
3410                  * it is tolerable, because this should be a rare case.
3411                  * Must drop rt_lock since nd6_is_addr_neighbor() calls
3412                  * nd6_lookup() and acquires rnh_lock.
3413                  */
3414                 if (rt != NULL)
3415                         RT_UNLOCK(rt);
3416                 if (nd6_is_addr_neighbor(&sin6, ifp, 0)) {
3417                         /* "rtrele" may have been used, so clean up "rt" now */
3418                         if (rt != NULL) {
3419                                 /* Don't free "hint0" */
3420                                 if (rt == hint0)
3421                                         RT_REMREF(rt);
3422                                 else
3423                                         rtfree(rt);
3424                         }
3425                         /* Callee returns a locked route upon success */
3426                         rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 0);
3427                         if (rt != NULL) {
3428                                 RT_LOCK_ASSERT_HELD(rt);
3429                                 ln = rt->rt_llinfo;
3430                         }
3431                 } else if (rt != NULL) {
3432                         RT_LOCK(rt);
3433                 }
3434         }
3435
3436         if (!ln || !rt) {
3437                 if (rt != NULL)
3438                         RT_UNLOCK(rt);
3439                 lck_rw_lock_shared(nd_if_rwlock);
3440                 ndi = ND_IFINFO(ifp);
3441                 VERIFY(ndi != NULL && ndi->initialized);
3442                 lck_mtx_lock(&ndi->lock);
3443                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
3444                     !(ndi->flags & ND6_IFF_PERFORMNUD)) {
3445                         lck_mtx_unlock(&ndi->lock);
3446                         lck_rw_done(nd_if_rwlock);
3447                         log(LOG_DEBUG,
3448                             "nd6_output: can't allocate llinfo for %s "
3449                             "(ln=0x%llx, rt=0x%llx)\n",
3450                             ip6_sprintf(&dst->sin6_addr),
3451                             (uint64_t)VM_KERNEL_ADDRPERM(ln),
3452                             (uint64_t)VM_KERNEL_ADDRPERM(rt));
3453                         senderr(EIO);   /* XXX: good error? */
3454                 }
3455                 lck_mtx_unlock(&ndi->lock);
3456                 lck_rw_done(nd_if_rwlock);
3457
3458                 goto sendpkt;   /* send anyway */
3459         }
3460
3461         net_update_uptime();
3462         timenow = net_uptime();
3463
3464         /* We don't have to do link-layer address resolution on a p2p link. */
3465         if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
3466             ln->ln_state < ND6_LLINFO_REACHABLE) {
3467                 ln->ln_state = ND6_LLINFO_STALE;
3468                 ln_setexpire(ln, timenow + nd6_gctimer);
3469         }
3470
3471         /*
3472          * The first time we send a packet to a neighbor whose entry is
3473          * STALE, we have to change the state to DELAY and a sets a timer to
3474          * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
3475          * neighbor unreachability detection on expiration.
3476          * (RFC 4861 7.3.3)
3477          */
3478         if (ln->ln_state == ND6_LLINFO_STALE) {
3479                 ln->ln_asked = 0;
3480                 ln->ln_state = ND6_LLINFO_DELAY;
3481                 ln_setexpire(ln, timenow + nd6_delay);
3482                 /* N.B.: we will re-arm the timer below. */
3483                 _CASSERT(ND6_LLINFO_DELAY > ND6_LLINFO_INCOMPLETE);
3484         }
3485
3486         /*
3487          * If the neighbor cache entry has a state other than INCOMPLETE
3488          * (i.e. its link-layer address is already resolved), just
3489          * send the packet.
3490          */
3491         if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
3492                 RT_UNLOCK(rt);
3493                 /*
3494                  * Move this entry to the head of the queue so that it is
3495                  * less likely for this entry to be a target of forced
3496                  * garbage collection (see nd6_rtrequest()).  Do this only
3497                  * if the entry is non-permanent (as permanent ones will
3498                  * never be purged), and if the number of active entries
3499                  * is at least half of the threshold.
3500                  */
3501                 if (ln->ln_state == ND6_LLINFO_DELAY ||
3502                     (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3503                     nd6_inuse >= (ip6_neighborgcthresh >> 1))) {
3504                         lck_mtx_lock(rnh_lock);
3505                         if (ln->ln_state == ND6_LLINFO_DELAY)
3506                                 nd6_sched_timeout(NULL, NULL);
3507                         if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3508                             nd6_inuse >= (ip6_neighborgcthresh >> 1)) {
3509                                 RT_LOCK_SPIN(rt);
3510                                 if (ln->ln_flags & ND6_LNF_IN_USE) {
3511                                         LN_DEQUEUE(ln);
3512                                         LN_INSERTHEAD(ln);
3513                                 }
3514                                 RT_UNLOCK(rt);
3515                         }
3516                         lck_mtx_unlock(rnh_lock);
3517                 }
3518                 goto sendpkt;
3519         }
3520
3521         /*
3522          * If this is a prefix proxy route, record the inbound interface
3523          * so that it can be excluded from the list of interfaces eligible
3524          * for forwarding the proxied NS in nd6_prproxy_ns_output().
3525          */
3526         if (rt->rt_flags & RTF_PROXY)
3527                 ln->ln_exclifp = ((origifp == ifp) ? NULL : origifp);
3528
3529         /*
3530          * There is a neighbor cache entry, but no ethernet address
3531          * response yet.  Replace the held mbuf (if any) with this
3532          * latest one.
3533          *
3534          * This code conforms to the rate-limiting rule described in Section
3535          * 7.2.2 of RFC 4861, because the timer is set correctly after sending
3536          * an NS below.
3537          */
3538         if (ln->ln_state == ND6_LLINFO_NOSTATE)
3539                 ln->ln_state = ND6_LLINFO_INCOMPLETE;
3540         if (ln->ln_hold)
3541                 m_freem(ln->ln_hold);
3542         ln->ln_hold = m;
3543         if (ln->ln_expire != 0 && ln->ln_asked < nd6_mmaxtries &&
3544             ln->ln_expire <= timenow) {
3545                 ln->ln_asked++;
3546                 lck_rw_lock_shared(nd_if_rwlock);
3547                 ndi = ND_IFINFO(ifp);
3548                 VERIFY(ndi != NULL && ndi->initialized);
3549                 lck_mtx_lock(&ndi->lock);
3550                 ln_setexpire(ln, timenow + ndi->retrans / 1000);
3551                 lck_mtx_unlock(&ndi->lock);
3552                 lck_rw_done(nd_if_rwlock);
3553                 RT_UNLOCK(rt);
3554                 /* We still have a reference on rt (for ln) */
3555                 if (ip6_forwarding)
3556                         nd6_prproxy_ns_output(ifp, origifp, NULL,
3557                             &dst->sin6_addr, ln);
3558                 else
3559                         nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
3560                 lck_mtx_lock(rnh_lock);
3561                 nd6_sched_timeout(NULL, NULL);
3562                 lck_mtx_unlock(rnh_lock);
3563         } else {
3564                 RT_UNLOCK(rt);
3565         }
3566         /*
3567          * Move this entry to the head of the queue so that it is
3568          * less likely for this entry to be a target of forced
3569          * garbage collection (see nd6_rtrequest()).  Do this only
3570          * if the entry is non-permanent (as permanent ones will
3571          * never be purged), and if the number of active entries
3572          * is at least half of the threshold.
3573          */
3574         if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3575             nd6_inuse >= (ip6_neighborgcthresh >> 1)) {
3576                 lck_mtx_lock(rnh_lock);
3577                 RT_LOCK_SPIN(rt);
3578                 if (ln->ln_flags & ND6_LNF_IN_USE) {
3579                         LN_DEQUEUE(ln);
3580                         LN_INSERTHEAD(ln);
3581                 }
3582                 /* Clean up "rt" now while we can */
3583                 if (rt == hint0) {
3584                         RT_REMREF_LOCKED(rt);
3585                         RT_UNLOCK(rt);
3586                 } else {
3587                         RT_UNLOCK(rt);
3588                         rtfree_locked(rt);
3589                 }
3590                 rt = NULL;      /* "rt" has been taken care of */
3591                 lck_mtx_unlock(rnh_lock);
3592         }
3593         error = 0;
3594         goto release;
3595
3596 sendpkt:
3597         if (rt != NULL)
3598                 RT_LOCK_ASSERT_NOTHELD(rt);
3599
3600         /* discard the packet if IPv6 operation is disabled on the interface */
3601         if (ifp->if_eflags & IFEF_IPV6_DISABLED) {
3602                 error = ENETDOWN; /* better error? */
3603                 goto bad;
3604         }
3605
3606         if (ifp->if_flags & IFF_LOOPBACK) {
3607                 /* forwarding rules require the original scope_id */
3608                 m->m_pkthdr.rcvif = origifp;
3609                 error = dlil_output(origifp, PF_INET6, m, (caddr_t)rt,
3610                     SA(dst), 0, adv);
3611                 goto release;
3612         } else {
3613                 /* Do not allow loopback address to wind up on a wire */
3614                 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
3615
3616                 if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
3617                     IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) {
3618                         ip6stat.ip6s_badscope++;
3619                         error = EADDRNOTAVAIL;
3620                         goto bad;
3621                 }
3622         }
3623
3624         if (rt != NULL) {
3625                 RT_LOCK_SPIN(rt);
3626                 /* Mark use timestamp */
3627                 if (rt->rt_llinfo != NULL)
3628                         nd6_llreach_use(rt->rt_llinfo);
3629                 RT_UNLOCK(rt);
3630         }
3631
3632         if (hint != NULL && nstat_collect) {
3633                 int scnt;
3634
3635                 if ((m->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
3636                     (m->m_pkthdr.tso_segsz > 0))
3637                         scnt = m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
3638                 else
3639                         scnt = 1;
3640
3641                 nstat_route_tx(hint, scnt, m->m_pkthdr.len, 0);
3642         }
3643
3644         m->m_pkthdr.rcvif = NULL;
3645         error = dlil_output(ifp, PF_INET6, m, (caddr_t)rt, SA(dst), 0, adv);
3646         goto release;
3647
3648 bad:
3649         if (m != NULL)
3650                 m_freem(m);
3651
3652 release:
3653         /* Clean up "rt" unless it's already been done */
3654         if (rt != NULL) {
3655                 RT_LOCK_SPIN(rt);
3656                 if (rt == hint0) {
3657                         RT_REMREF_LOCKED(rt);
3658                         RT_UNLOCK(rt);
3659                 } else {
3660                         RT_UNLOCK(rt);
3661                         rtfree(rt);
3662                 }
3663         }
3664         /* And now clean up "rtrele" if there is any */
3665         if (rtrele != NULL) {
3666                 RT_LOCK_SPIN(rtrele);
3667                 if (rtrele == hint0) {
3668                         RT_REMREF_LOCKED(rtrele);
3669                         RT_UNLOCK(rtrele);
3670                 } else {
3671                         RT_UNLOCK(rtrele);
3672                         rtfree(rtrele);
3673                 }
3674         }
3675         return (error);
3676 }
3677 #undef senderr
3678
3679 int
3680 nd6_need_cache(struct ifnet *ifp)
3681 {
3682         /*
3683          * XXX: we currently do not make neighbor cache on any interface
3684          * other than ARCnet, Ethernet, FDDI and GIF.
3685          *
3686          * RFC2893 says:
3687          * - unidirectional tunnels needs no ND
3688          */
3689         switch (ifp->if_type) {
3690         case IFT_ARCNET:
3691         case IFT_ETHER:
3692         case IFT_FDDI:
3693         case IFT_IEEE1394:
3694         case IFT_L2VLAN:
3695         case IFT_IEEE8023ADLAG:
3696 #if IFT_IEEE80211
3697         case IFT_IEEE80211:
3698 #endif
3699         case IFT_GIF:           /* XXX need more cases? */
3700         case IFT_PPP:
3701 #if IFT_TUNNEL
3702         case IFT_TUNNEL:
3703 #endif
3704         case IFT_BRIDGE:
3705         case IFT_CELLULAR:
3706                 return (1);
3707         default:
3708                 return (0);
3709         }
3710 }
3711
3712 int
3713 nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m,
3714     struct sockaddr *dst, u_char *desten)
3715 {
3716         int i;
3717         struct sockaddr_dl *sdl;
3718
3719         if (m->m_flags & M_MCAST) {
3720                 switch (ifp->if_type) {
3721                 case IFT_ETHER:
3722                 case IFT_FDDI:
3723                 case IFT_L2VLAN:
3724                 case IFT_IEEE8023ADLAG:
3725 #if IFT_IEEE80211
3726                 case IFT_IEEE80211:
3727 #endif
3728                 case IFT_BRIDGE:
3729                         ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, desten);
3730                         return (1);
3731                 case IFT_IEEE1394:
3732                         for (i = 0; i < ifp->if_addrlen; i++)
3733                                 desten[i] = ~0;
3734                         return (1);
3735                 case IFT_ARCNET:
3736                         *desten = 0;
3737                         return (1);
3738                 default:
3739                         return (0); /* caller will free mbuf */
3740                 }
3741         }
3742
3743         if (rt == NULL) {
3744                 /* this could happen, if we could not allocate memory */
3745                 return (0); /* caller will free mbuf */
3746         }
3747         RT_LOCK(rt);
3748         if (rt->rt_gateway->sa_family != AF_LINK) {
3749                 printf("nd6_storelladdr: something odd happens\n");
3750                 RT_UNLOCK(rt);
3751                 return (0); /* caller will free mbuf */
3752         }
3753         sdl = SDL(rt->rt_gateway);
3754         if (sdl->sdl_alen == 0) {
3755                 /* this should be impossible, but we bark here for debugging */
3756                 printf("nd6_storelladdr: sdl_alen == 0\n");
3757                 RT_UNLOCK(rt);
3758                 return (0); /* caller will free mbuf */
3759         }
3760
3761         bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
3762         RT_UNLOCK(rt);
3763         return (1);
3764 }
3765
3766 /*
3767  * This is the ND pre-output routine; care must be taken to ensure that
3768  * the "hint" route never gets freed via rtfree(), since the caller may
3769  * have stored it inside a struct route with a reference held for that
3770  * placeholder.
3771  */
3772 errno_t
3773 nd6_lookup_ipv6(ifnet_t  ifp, const struct sockaddr_in6 *ip6_dest,
3774     struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
3775     mbuf_t packet)
3776 {
3777         route_t route = hint;
3778         errno_t result = 0;
3779         struct sockaddr_dl *sdl = NULL;
3780         size_t  copy_len;
3781
3782         if (ip6_dest->sin6_family != AF_INET6)
3783                 return (EAFNOSUPPORT);
3784
3785         if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
3786                 return (ENETDOWN);
3787
3788         if (hint != NULL) {
3789                 /*
3790                  * Callee holds a reference on the route and returns
3791                  * with the route entry locked, upon success.
3792                  */
3793                 result = route_to_gwroute((const struct sockaddr *)ip6_dest,
3794                     hint, &route);
3795                 if (result != 0)
3796                         return (result);
3797                 if (route != NULL)
3798                         RT_LOCK_ASSERT_HELD(route);
3799         }
3800
3801         if ((packet->m_flags & M_MCAST) != 0) {
3802                 if (route != NULL)
3803                         RT_UNLOCK(route);
3804                 result = dlil_resolve_multi(ifp,
3805                     (const struct sockaddr *)ip6_dest,
3806                     SA(ll_dest), ll_dest_len);
3807                 if (route != NULL)
3808                         RT_LOCK(route);
3809                 goto release;
3810         }
3811
3812         if (route == NULL) {
3813                 /*
3814                  * This could happen, if we could not allocate memory or
3815                  * if route_to_gwroute() didn't return a route.
3816                  */
3817                 result = ENOBUFS;
3818                 goto release;
3819         }
3820
3821         if (route->rt_gateway->sa_family != AF_LINK) {
3822                 printf("%s: route %s on %s%d gateway address not AF_LINK\n",
3823                     __func__, ip6_sprintf(&ip6_dest->sin6_addr),
3824                     route->rt_ifp->if_name, route->rt_ifp->if_unit);
3825                 result = EADDRNOTAVAIL;
3826                 goto release;
3827         }
3828
3829         sdl = SDL(route->rt_gateway);
3830         if (sdl->sdl_alen == 0) {
3831                 /* this should be impossible, but we bark here for debugging */
3832                 printf("%s: route %s on %s%d sdl_alen == 0\n", __func__,
3833                     ip6_sprintf(&ip6_dest->sin6_addr), route->rt_ifp->if_name,
3834                     route->rt_ifp->if_unit);
3835                 result = EHOSTUNREACH;
3836                 goto release;
3837         }
3838
3839         copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len;
3840         bcopy(sdl, ll_dest, copy_len);
3841
3842 release:
3843         if (route != NULL) {
3844                 if (route == hint) {
3845                         RT_REMREF_LOCKED(route);
3846                         RT_UNLOCK(route);
3847                 } else {
3848                         RT_UNLOCK(route);
3849                         rtfree(route);
3850                 }
3851         }
3852         return (result);
3853 }
3854
3855 int
3856 nd6_setifinfo(struct ifnet *ifp, u_int32_t before, u_int32_t after)
3857 {
3858         uint32_t b, a;
3859         int err = 0;
3860
3861         /*
3862          * Handle ND6_IFF_IFDISABLED
3863          */
3864         if ((before & ND6_IFF_IFDISABLED) ||
3865             (after & ND6_IFF_IFDISABLED)) {
3866                 b = (before & ND6_IFF_IFDISABLED);
3867                 a = (after & ND6_IFF_IFDISABLED);
3868
3869                 if (b != a && (err = nd6_if_disable(ifp,
3870                      ((int32_t)(a - b) > 0))) != 0)
3871                         goto done;
3872         }
3873
3874         /*
3875          * Handle ND6_IFF_PROXY_PREFIXES
3876          */
3877         if ((before & ND6_IFF_PROXY_PREFIXES) ||
3878             (after & ND6_IFF_PROXY_PREFIXES)) {
3879                 b = (before & ND6_IFF_PROXY_PREFIXES);
3880                 a = (after & ND6_IFF_PROXY_PREFIXES);
3881
3882                 if (b != a && (err = nd6_if_prproxy(ifp,
3883                      ((int32_t)(a - b) > 0))) != 0)
3884                         goto done;
3885         }
3886 done:
3887         return (err);
3888 }
3889
3890 /*
3891  * Enable/disable IPv6 on an interface, called as part of
3892  * setting/clearing ND6_IFF_IFDISABLED, or during DAD failure.
3893  */
3894 int
3895 nd6_if_disable(struct ifnet *ifp, boolean_t enable)
3896 {
3897         ifnet_lock_shared(ifp);
3898         if (enable)
3899                 ifp->if_eflags |= IFEF_IPV6_DISABLED;
3900         else
3901                 ifp->if_eflags &= ~IFEF_IPV6_DISABLED;
3902         ifnet_lock_done(ifp);
3903
3904         return (0);
3905 }
3906
3907 static int
3908 nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
3909 {
3910 #pragma unused(oidp, arg1, arg2)
3911         char pbuf[MAX_IPv6_STR_LEN];
3912         struct nd_defrouter *dr;
3913         int error = 0;
3914
3915         if (req->newptr != USER_ADDR_NULL)
3916                 return (EPERM);
3917
3918         lck_mtx_lock(nd6_mutex);
3919         if (proc_is64bit(req->p)) {
3920                 struct in6_defrouter_64 d;
3921
3922                 bzero(&d, sizeof (d));
3923                 d.rtaddr.sin6_family = AF_INET6;
3924                 d.rtaddr.sin6_len = sizeof (d.rtaddr);
3925
3926                 TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
3927                         d.rtaddr.sin6_addr = dr->rtaddr;
3928                         if (in6_recoverscope(&d.rtaddr,
3929                             &dr->rtaddr, dr->ifp) != 0)
3930                                 log(LOG_ERR, "scope error in default router "
3931                                     "list (%s)\n", inet_ntop(AF_INET6,
3932                                     &dr->rtaddr, pbuf, sizeof (pbuf)));
3933                         d.flags = dr->flags;
3934                         d.stateflags = dr->stateflags;
3935                         d.stateflags &= ~NDDRF_PROCESSED;
3936                         d.rtlifetime = dr->rtlifetime;
3937                         d.expire = nddr_getexpire(dr);
3938                         d.if_index = dr->ifp->if_index;
3939                         error = SYSCTL_OUT(req, &d, sizeof (d));
3940                         if (error != 0)
3941                                 break;
3942                 }
3943         } else {
3944                 struct in6_defrouter_32 d;
3945
3946                 bzero(&d, sizeof (d));
3947                 d.rtaddr.sin6_family = AF_INET6;
3948                 d.rtaddr.sin6_len = sizeof (d.rtaddr);
3949
3950                 TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
3951                         d.rtaddr.sin6_addr = dr->rtaddr;
3952                         if (in6_recoverscope(&d.rtaddr,
3953                             &dr->rtaddr, dr->ifp) != 0)
3954                                 log(LOG_ERR, "scope error in default router "
3955                                     "list (%s)\n", inet_ntop(AF_INET6,
3956                                     &dr->rtaddr, pbuf, sizeof (pbuf)));
3957                         d.flags = dr->flags;
3958                         d.stateflags = dr->stateflags;
3959                         d.stateflags &= ~NDDRF_PROCESSED;
3960                         d.rtlifetime = dr->rtlifetime;
3961                         d.expire = nddr_getexpire(dr);
3962                         d.if_index = dr->ifp->if_index;
3963                         error = SYSCTL_OUT(req, &d, sizeof (d));
3964                         if (error != 0)
3965                                 break;
3966                 }
3967         }
3968         lck_mtx_unlock(nd6_mutex);
3969         return (error);
3970 }
3971
3972 static int
3973 nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
3974 {
3975 #pragma unused(oidp, arg1, arg2)
3976         char pbuf[MAX_IPv6_STR_LEN];
3977         struct nd_pfxrouter *pfr;
3978         struct sockaddr_in6 s6;
3979         struct nd_prefix *pr;
3980         int error = 0;
3981
3982         if (req->newptr != USER_ADDR_NULL)
3983                 return (EPERM);
3984
3985         bzero(&s6, sizeof (s6));
3986         s6.sin6_family = AF_INET6;
3987         s6.sin6_len = sizeof (s6);
3988
3989         lck_mtx_lock(nd6_mutex);
3990         if (proc_is64bit(req->p)) {
3991                 struct in6_prefix_64 p;
3992
3993                 bzero(&p, sizeof (p));
3994                 p.origin = PR_ORIG_RA;
3995
3996                 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
3997                         NDPR_LOCK(pr);
3998                         p.prefix = pr->ndpr_prefix;
3999                         if (in6_recoverscope(&p.prefix,
4000                             &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0)
4001                                 log(LOG_ERR, "scope error in "
4002                                     "prefix list (%s)\n", inet_ntop(AF_INET6,
4003                                     &p.prefix.sin6_addr, pbuf, sizeof (pbuf)));
4004                         p.raflags = pr->ndpr_raf;
4005                         p.prefixlen = pr->ndpr_plen;
4006                         p.vltime = pr->ndpr_vltime;
4007                         p.pltime = pr->ndpr_pltime;
4008                         p.if_index = pr->ndpr_ifp->if_index;
4009                         p.expire = ndpr_getexpire(pr);
4010                         p.refcnt = pr->ndpr_addrcnt;
4011                         p.flags = pr->ndpr_stateflags;
4012                         p.advrtrs = 0;
4013                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
4014                                 p.advrtrs++;
4015                         error = SYSCTL_OUT(req, &p, sizeof (p));
4016                         if (error != 0) {
4017                                 NDPR_UNLOCK(pr);
4018                                 break;
4019                         }
4020                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
4021                                 s6.sin6_addr = pfr->router->rtaddr;
4022                                 if (in6_recoverscope(&s6, &pfr->router->rtaddr,
4023                                     pfr->router->ifp) != 0)
4024                                         log(LOG_ERR,
4025                                             "scope error in prefix list (%s)\n",
4026                                             inet_ntop(AF_INET6, &s6.sin6_addr,
4027                                             pbuf, sizeof (pbuf)));
4028                                 error = SYSCTL_OUT(req, &s6, sizeof (s6));
4029                                 if (error != 0)
4030                                         break;
4031                         }
4032                         NDPR_UNLOCK(pr);
4033                         if (error != 0)
4034                                 break;
4035                 }
4036         } else {
4037                 struct in6_prefix_32 p;
4038
4039                 bzero(&p, sizeof (p));
4040                 p.origin = PR_ORIG_RA;
4041
4042                 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
4043                         NDPR_LOCK(pr);
4044                         p.prefix = pr->ndpr_prefix;
4045                         if (in6_recoverscope(&p.prefix,
4046                             &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0)
4047                                 log(LOG_ERR,
4048                                     "scope error in prefix list (%s)\n",
4049                                     inet_ntop(AF_INET6, &p.prefix.sin6_addr,
4050                                     pbuf, sizeof (pbuf)));
4051                         p.raflags = pr->ndpr_raf;
4052                         p.prefixlen = pr->ndpr_plen;
4053                         p.vltime = pr->ndpr_vltime;
4054                         p.pltime = pr->ndpr_pltime;
4055                         p.if_index = pr->ndpr_ifp->if_index;
4056                         p.expire = ndpr_getexpire(pr);
4057                         p.refcnt = pr->ndpr_addrcnt;
4058                         p.flags = pr->ndpr_stateflags;
4059                         p.advrtrs = 0;
4060                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
4061                                 p.advrtrs++;
4062                         error = SYSCTL_OUT(req, &p, sizeof (p));
4063                         if (error != 0) {
4064                                 NDPR_UNLOCK(pr);
4065                                 break;
4066                         }
4067                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
4068                                 s6.sin6_addr = pfr->router->rtaddr;
4069                                 if (in6_recoverscope(&s6, &pfr->router->rtaddr,
4070                                     pfr->router->ifp) != 0)
4071                                         log(LOG_ERR,
4072                                             "scope error in prefix list (%s)\n",
4073                                             inet_ntop(AF_INET6, &s6.sin6_addr,
4074                                             pbuf, sizeof (pbuf)));
4075                                 error = SYSCTL_OUT(req, &s6, sizeof (s6));
4076                                 if (error != 0)
4077                                         break;
4078                         }
4079                         NDPR_UNLOCK(pr);
4080                         if (error != 0)
4081                                 break;
4082                 }
4083         }
4084         lck_mtx_unlock(nd6_mutex);
4085
4086         return (error);
4087 }