bsd/netinet6/nd6.c

   1 /*
   2  * Copyright (c) 2000-2018 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  31  * All rights reserved.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *    notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *    notice, this list of conditions and the following disclaimer in the
  40  *    documentation and/or other materials provided with the distribution.
  41  * 3. Neither the name of the project nor the names of its contributors
  42  *    may be used to endorse or promote products derived from this software
  43  *    without specific prior written permission.
  44  *
  45  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  55  * SUCH DAMAGE.
  56  */
  57
  58 /*
  59  * XXX
  60  * KAME 970409 note:
  61  * BSD/OS version heavily modifies this code, related to llinfo.
  62  * Since we don't have BSD/OS version of net/route.c in our hand,
  63  * I left the code mostly as it was in 970310.  -- itojun
  64  */
  65
  66 #include <sys/param.h>
  67 #include <sys/systm.h>
  68 #include <sys/malloc.h>
  69 #include <sys/mbuf.h>
  70 #include <sys/socket.h>
  71 #include <sys/sockio.h>
  72 #include <sys/time.h>
  73 #include <sys/kernel.h>
  74 #include <sys/sysctl.h>
  75 #include <sys/errno.h>
  76 #include <sys/syslog.h>
  77 #include <sys/protosw.h>
  78 #include <sys/proc.h>
  79 #include <sys/mcache.h>
  80
  81 #include <dev/random/randomdev.h>
  82
  83 #include <kern/queue.h>
  84 #include <kern/zalloc.h>
  85
  86 #include <net/if.h>
  87 #include <net/if_dl.h>
  88 #include <net/if_types.h>
  89 #include <net/if_llreach.h>
  90 #include <net/route.h>
  91 #include <net/dlil.h>
  92 #include <net/ntstat.h>
  93 #include <net/net_osdep.h>
  94 #include <net/nwk_wq.h>
  95
  96 #include <netinet/in.h>
  97 #include <netinet/in_arp.h>
  98 #include <netinet/if_ether.h>
  99 #include <netinet6/in6_var.h>
 100 #include <netinet/ip6.h>
 101 #include <netinet6/ip6_var.h>
 102 #include <netinet6/nd6.h>
 103 #include <netinet6/scope6_var.h>
 104 #include <netinet/icmp6.h>
 105
 106 #include "loop.h"
 107
 108 #define ND6_SLOWTIMER_INTERVAL          (60 * 60)       /* 1 hour */
 109 #define ND6_RECALC_REACHTM_INTERVAL     (60 * 120)      /* 2 hours */
 110
 111 #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
 112
 113 /* timer values */
 114 int     nd6_prune       = 1;    /* walk list every 1 seconds */
 115 int     nd6_prune_lazy  = 5;    /* lazily walk list every 5 seconds */
 116 int     nd6_delay       = 5;    /* delay first probe time 5 second */
 117 int     nd6_umaxtries   = 3;    /* maximum unicast query */
 118 int     nd6_mmaxtries   = 3;    /* maximum multicast query */
 119 int     nd6_useloopback = 1;    /* use loopback interface for local traffic */
 120 int     nd6_gctimer     = (60 * 60 * 24); /* 1 day: garbage collection timer */
 121
 122 /* preventing too many loops in ND option parsing */
 123 int nd6_maxndopt = 10;  /* max # of ND options allowed */
 124
 125 int nd6_maxqueuelen = 1; /* max # of packets cached in unresolved ND entries */
 126
 127 #if ND6_DEBUG
 128 int nd6_debug = 1;
 129 #else
 130 int nd6_debug = 0;
 131 #endif
 132
 133 int nd6_optimistic_dad =
 134         (ND6_OPTIMISTIC_DAD_LINKLOCAL|ND6_OPTIMISTIC_DAD_AUTOCONF|
 135         ND6_OPTIMISTIC_DAD_TEMPORARY|ND6_OPTIMISTIC_DAD_DYNAMIC|
 136         ND6_OPTIMISTIC_DAD_SECURED|ND6_OPTIMISTIC_DAD_MANUAL);
 137
 138 /* for debugging? */
 139 static int nd6_inuse, nd6_allocated;
 140
 141 /*
 142  * Synchronization notes:
 143  *
 144  * The global list of ND entries are stored in llinfo_nd6; an entry
 145  * gets inserted into the list when the route is created and gets
 146  * removed from the list when it is deleted; this is done as part
 147  * of RTM_ADD/RTM_RESOLVE/RTM_DELETE in nd6_rtrequest().
 148  *
 149  * Because rnh_lock and rt_lock for the entry are held during those
 150  * operations, the same locks (and thus lock ordering) must be used
 151  * elsewhere to access the relevant data structure fields:
 152  *
 153  * ln_next, ln_prev, ln_rt
 154  *
 155  *      - Routing lock (rnh_lock)
 156  *
 157  * ln_hold, ln_asked, ln_expire, ln_state, ln_router, ln_flags,
 158  * ln_llreach, ln_lastused
 159  *
 160  *      - Routing entry lock (rt_lock)
 161  *
 162  * Due to the dependency on rt_lock, llinfo_nd6 has the same lifetime
 163  * as the route entry itself.  When a route is deleted (RTM_DELETE),
 164  * it is simply removed from the global list but the memory is not
 165  * freed until the route itself is freed.
 166  */
 167 struct llinfo_nd6 llinfo_nd6 = {
 168         .ln_next = &llinfo_nd6,
 169         .ln_prev = &llinfo_nd6,
 170 };
 171
 172 static lck_grp_attr_t   *nd_if_lock_grp_attr = NULL;
 173 static lck_grp_t        *nd_if_lock_grp = NULL;
 174 static lck_attr_t       *nd_if_lock_attr = NULL;
 175
 176 /* Protected by nd6_mutex */
 177 struct nd_drhead nd_defrouter;
 178 struct nd_prhead nd_prefix = { 0 };
 179
 180 /*
 181  * nd6_timeout() is scheduled on a demand basis.  nd6_timeout_run is used
 182  * to indicate whether or not a timeout has been scheduled.  The rnh_lock
 183  * mutex is used to protect this scheduling; it is a natural choice given
 184  * the work done in the timer callback.  Unfortunately, there are cases
 185  * when nd6_timeout() needs to be scheduled while rnh_lock cannot be easily
 186  * held, due to lock ordering.  In those cases, we utilize a "demand" counter
 187  * nd6_sched_timeout_want which can be atomically incremented without
 188  * having to hold rnh_lock.  On places where we acquire rnh_lock, such as
 189  * nd6_rtrequest(), we check this counter and schedule the timer if it is
 190  * non-zero.  The increment happens on various places when we allocate
 191  * new ND entries, default routers, prefixes and addresses.
 192  */
 193 static int nd6_timeout_run;             /* nd6_timeout is scheduled to run */
 194 static void nd6_timeout(void *);
 195 int nd6_sched_timeout_want;             /* demand count for timer to be sched */
 196 static boolean_t nd6_fast_timer_on = FALSE;
 197
 198 /* Serialization variables for nd6_service(), protected by rnh_lock */
 199 static boolean_t nd6_service_busy;
 200 static void *nd6_service_wc = &nd6_service_busy;
 201 static int nd6_service_waiters = 0;
 202
 203 int nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
 204 static struct sockaddr_in6 all1_sa;
 205
 206 static int regen_tmpaddr(struct in6_ifaddr *);
 207 extern lck_mtx_t *nd6_mutex;
 208
 209 static struct llinfo_nd6 *nd6_llinfo_alloc(int);
 210 static void nd6_llinfo_free(void *);
 211 static void nd6_llinfo_purge(struct rtentry *);
 212 static void nd6_llinfo_get_ri(struct rtentry *, struct rt_reach_info *);
 213 static void nd6_llinfo_get_iflri(struct rtentry *, struct ifnet_llreach_info *);
 214 static void nd6_llinfo_refresh(struct rtentry *);
 215 static uint64_t ln_getexpire(struct llinfo_nd6 *);
 216
 217 static void nd6_service(void *);
 218 static void nd6_slowtimo(void *);
 219 static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, struct ifnet *);
 220 static int nd6_siocgdrlst(void *, int);
 221 static int nd6_siocgprlst(void *, int);
 222
 223 static int nd6_sysctl_drlist SYSCTL_HANDLER_ARGS;
 224 static int nd6_sysctl_prlist SYSCTL_HANDLER_ARGS;
 225
 226 /*
 227  * Insertion and removal from llinfo_nd6 must be done with rnh_lock held.
 228  */
 229 #define LN_DEQUEUE(_ln) do {                                            \
 230         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);                 \
 231         RT_LOCK_ASSERT_HELD((_ln)->ln_rt);                              \
 232         (_ln)->ln_next->ln_prev = (_ln)->ln_prev;                       \
 233         (_ln)->ln_prev->ln_next = (_ln)->ln_next;                       \
 234         (_ln)->ln_prev = (_ln)->ln_next = NULL;                         \
 235         (_ln)->ln_flags &= ~ND6_LNF_IN_USE;                             \
 236 } while (0)
 237
 238 #define LN_INSERTHEAD(_ln) do {                                         \
 239         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);                 \
 240         RT_LOCK_ASSERT_HELD((_ln)->ln_rt);                              \
 241         (_ln)->ln_next = llinfo_nd6.ln_next;                            \
 242         llinfo_nd6.ln_next = (_ln);                                     \
 243         (_ln)->ln_prev = &llinfo_nd6;                                   \
 244         (_ln)->ln_next->ln_prev = (_ln);                                \
 245         (_ln)->ln_flags |= ND6_LNF_IN_USE;                              \
 246 } while (0)
 247
 248 static struct zone *llinfo_nd6_zone;
 249 #define LLINFO_ND6_ZONE_MAX     256             /* maximum elements in zone */
 250 #define LLINFO_ND6_ZONE_NAME    "llinfo_nd6"    /* name for zone */
 251
 252 extern int tvtohz(struct timeval *);
 253
 254 static int nd6_init_done;
 255
 256 SYSCTL_DECL(_net_inet6_icmp6);
 257
 258 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
 259         CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 260         nd6_sysctl_drlist, "S,in6_defrouter", "");
 261
 262 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 263         CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED, 0, 0,
 264         nd6_sysctl_prlist, "S,in6_defrouter", "");
 265
 266 SYSCTL_DECL(_net_inet6_ip6);
 267
 268 static int ip6_maxchainsent = 0;
 269 SYSCTL_INT(_net_inet6_ip6, OID_AUTO, maxchainsent,
 270         CTLFLAG_RW | CTLFLAG_LOCKED, &ip6_maxchainsent, 0,
 271         "use dlil_output_list");
 272
 273 void
 274 nd6_init(void)
 275 {
 276         int i;
 277
 278         VERIFY(!nd6_init_done);
 279
 280         all1_sa.sin6_family = AF_INET6;
 281         all1_sa.sin6_len = sizeof (struct sockaddr_in6);
 282         for (i = 0; i < sizeof (all1_sa.sin6_addr); i++)
 283                 all1_sa.sin6_addr.s6_addr[i] = 0xff;
 284
 285         /* initialization of the default router list */
 286         TAILQ_INIT(&nd_defrouter);
 287
 288         nd_if_lock_grp_attr = lck_grp_attr_alloc_init();
 289         nd_if_lock_grp = lck_grp_alloc_init("nd_if_lock", nd_if_lock_grp_attr);
 290         nd_if_lock_attr = lck_attr_alloc_init();
 291
 292         llinfo_nd6_zone = zinit(sizeof (struct llinfo_nd6),
 293             LLINFO_ND6_ZONE_MAX * sizeof (struct llinfo_nd6), 0,
 294             LLINFO_ND6_ZONE_NAME);
 295         if (llinfo_nd6_zone == NULL)
 296                 panic("%s: failed allocating llinfo_nd6_zone", __func__);
 297
 298         zone_change(llinfo_nd6_zone, Z_EXPAND, TRUE);
 299         zone_change(llinfo_nd6_zone, Z_CALLERACCT, FALSE);
 300
 301         nd6_nbr_init();
 302         nd6_rtr_init();
 303         nd6_prproxy_init();
 304
 305         nd6_init_done = 1;
 306
 307         /* start timer */
 308         timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
 309 }
 310
 311 static struct llinfo_nd6 *
 312 nd6_llinfo_alloc(int how)
 313 {
 314         struct llinfo_nd6 *ln;
 315
 316         ln = (how == M_WAITOK) ? zalloc(llinfo_nd6_zone) :
 317             zalloc_noblock(llinfo_nd6_zone);
 318         if (ln != NULL)
 319                 bzero(ln, sizeof (*ln));
 320
 321         return (ln);
 322 }
 323
 324 static void
 325 nd6_llinfo_free(void *arg)
 326 {
 327         struct llinfo_nd6 *ln = arg;
 328
 329         if (ln->ln_next != NULL || ln->ln_prev != NULL) {
 330                 panic("%s: trying to free %p when it is in use", __func__, ln);
 331                 /* NOTREACHED */
 332         }
 333
 334         /* Just in case there's anything there, free it */
 335         if (ln->ln_hold != NULL) {
 336                 m_freem_list(ln->ln_hold);
 337                 ln->ln_hold = NULL;
 338         }
 339
 340         /* Purge any link-layer info caching */
 341         VERIFY(ln->ln_rt->rt_llinfo == ln);
 342         if (ln->ln_rt->rt_llinfo_purge != NULL)
 343                 ln->ln_rt->rt_llinfo_purge(ln->ln_rt);
 344
 345         zfree(llinfo_nd6_zone, ln);
 346 }
 347
 348 static void
 349 nd6_llinfo_purge(struct rtentry *rt)
 350 {
 351         struct llinfo_nd6 *ln = rt->rt_llinfo;
 352
 353         RT_LOCK_ASSERT_HELD(rt);
 354         VERIFY(rt->rt_llinfo_purge == nd6_llinfo_purge && ln != NULL);
 355
 356         if (ln->ln_llreach != NULL) {
 357                 RT_CONVERT_LOCK(rt);
 358                 ifnet_llreach_free(ln->ln_llreach);
 359                 ln->ln_llreach = NULL;
 360         }
 361         ln->ln_lastused = 0;
 362 }
 363
 364 static void
 365 nd6_llinfo_get_ri(struct rtentry *rt, struct rt_reach_info *ri)
 366 {
 367         struct llinfo_nd6 *ln = rt->rt_llinfo;
 368         struct if_llreach *lr = ln->ln_llreach;
 369
 370         if (lr == NULL) {
 371                 bzero(ri, sizeof (*ri));
 372                 ri->ri_rssi = IFNET_RSSI_UNKNOWN;
 373                 ri->ri_lqm = IFNET_LQM_THRESH_OFF;
 374                 ri->ri_npm = IFNET_NPM_THRESH_UNKNOWN;
 375         } else {
 376                 IFLR_LOCK(lr);
 377                 /* Export to rt_reach_info structure */
 378                 ifnet_lr2ri(lr, ri);
 379                 /* Export ND6 send expiration (calendar) time */
 380                 ri->ri_snd_expire =
 381                     ifnet_llreach_up2calexp(lr, ln->ln_lastused);
 382                 IFLR_UNLOCK(lr);
 383         }
 384 }
 385
 386 static void
 387 nd6_llinfo_get_iflri(struct rtentry *rt, struct ifnet_llreach_info *iflri)
 388 {
 389         struct llinfo_nd6 *ln = rt->rt_llinfo;
 390         struct if_llreach *lr = ln->ln_llreach;
 391
 392         if (lr == NULL) {
 393                 bzero(iflri, sizeof (*iflri));
 394                 iflri->iflri_rssi = IFNET_RSSI_UNKNOWN;
 395                 iflri->iflri_lqm = IFNET_LQM_THRESH_OFF;
 396                 iflri->iflri_npm = IFNET_NPM_THRESH_UNKNOWN;
 397         } else {
 398                 IFLR_LOCK(lr);
 399                 /* Export to ifnet_llreach_info structure */
 400                 ifnet_lr2iflri(lr, iflri);
 401                 /* Export ND6 send expiration (uptime) time */
 402                 iflri->iflri_snd_expire =
 403                     ifnet_llreach_up2upexp(lr, ln->ln_lastused);
 404                 IFLR_UNLOCK(lr);
 405         }
 406 }
 407
 408 static void
 409 nd6_llinfo_refresh(struct rtentry *rt)
 410 {
 411         struct llinfo_nd6 *ln = rt->rt_llinfo;
 412         uint64_t timenow = net_uptime();
 413         /*
 414          * Can't refresh permanent, static or entries that are
 415          * not direct host entries
 416          */
 417         if (!ln || ln->ln_expire == 0 ||
 418             (rt->rt_flags & RTF_STATIC) ||
 419             !(rt->rt_flags & RTF_LLINFO)) {
 420                 return;
 421         }
 422
 423         if ((ln->ln_state > ND6_LLINFO_INCOMPLETE) &&
 424             (ln->ln_state < ND6_LLINFO_PROBE)) {
 425                 if (ln->ln_expire > timenow) {
 426                         ln_setexpire(ln, timenow);
 427                         ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_PROBE);
 428                 }
 429         }
 430         return;
 431 }
 432
 433 const char *
 434 ndcache_state2str(short ndp_state)
 435 {
 436         const char *ndp_state_str = "UNKNOWN";
 437         switch (ndp_state) {
 438         case ND6_LLINFO_PURGE:
 439                 ndp_state_str = "ND6_LLINFO_PURGE";
 440                 break;
 441         case ND6_LLINFO_NOSTATE:
 442                 ndp_state_str = "ND6_LLINFO_NOSTATE";
 443                 break;
 444         case ND6_LLINFO_INCOMPLETE:
 445                 ndp_state_str = "ND6_LLINFO_INCOMPLETE";
 446                 break;
 447         case ND6_LLINFO_REACHABLE:
 448                 ndp_state_str = "ND6_LLINFO_REACHABLE";
 449                 break;
 450         case ND6_LLINFO_STALE:
 451                 ndp_state_str = "ND6_LLINFO_STALE";
 452                 break;
 453         case ND6_LLINFO_DELAY:
 454                 ndp_state_str = "ND6_LLINFO_DELAY";
 455                 break;
 456         case ND6_LLINFO_PROBE:
 457                 ndp_state_str = "ND6_LLINFO_PROBE";
 458                 break;
 459         default:
 460                 /* Init'd to UNKNOWN */
 461                 break;
 462         }
 463         return  ndp_state_str;
 464 }
 465
 466 void
 467 ln_setexpire(struct llinfo_nd6 *ln, uint64_t expiry)
 468 {
 469         ln->ln_expire = expiry;
 470 }
 471
 472 static uint64_t
 473 ln_getexpire(struct llinfo_nd6 *ln)
 474 {
 475         struct timeval caltime;
 476         uint64_t expiry;
 477
 478         if (ln->ln_expire != 0) {
 479                 struct rtentry *rt = ln->ln_rt;
 480
 481                 VERIFY(rt != NULL);
 482                 /* account for system time change */
 483                 getmicrotime(&caltime);
 484
 485                 rt->base_calendartime +=
 486                     NET_CALCULATE_CLOCKSKEW(caltime,
 487                     rt->base_calendartime, net_uptime(), rt->base_uptime);
 488
 489                 expiry = rt->base_calendartime +
 490                     ln->ln_expire - rt->base_uptime;
 491         } else {
 492                 expiry = 0;
 493         }
 494         return (expiry);
 495 }
 496
 497 void
 498 nd6_ifreset(struct ifnet *ifp)
 499 {
 500         struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 501         VERIFY(NULL != ndi);
 502         VERIFY(ndi->initialized);
 503
 504         LCK_MTX_ASSERT(&ndi->lock, LCK_MTX_ASSERT_OWNED);
 505         ndi->linkmtu = ifp->if_mtu;
 506         ndi->chlim = IPV6_DEFHLIM;
 507         ndi->basereachable = REACHABLE_TIME;
 508         ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
 509         ndi->retrans = RETRANS_TIMER;
 510 }
 511
 512 void
 513 nd6_ifattach(struct ifnet *ifp)
 514 {
 515         struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 516
 517         VERIFY(NULL != ndi);
 518         if (!ndi->initialized) {
 519                 lck_mtx_init(&ndi->lock, nd_if_lock_grp, nd_if_lock_attr);
 520                 ndi->flags = ND6_IFF_PERFORMNUD;
 521                 ndi->flags |= ND6_IFF_DAD;
 522                 ndi->initialized = TRUE;
 523         }
 524
 525         lck_mtx_lock(&ndi->lock);
 526
 527         if (!(ifp->if_flags & IFF_MULTICAST)) {
 528                 ndi->flags |= ND6_IFF_IFDISABLED;
 529         }
 530
 531         nd6_ifreset(ifp);
 532         lck_mtx_unlock(&ndi->lock);
 533         nd6_setmtu(ifp);
 534
 535         nd6log0((LOG_INFO, ": ",
 536             "%s Reinit'd ND information for interface %s\n",
 537             if_name(ifp)));
 538         return;
 539 }
 540
 541 #if 0
 542 /*
 543  * XXX Look more into this. Especially since we recycle ifnets and do delayed
 544  * cleanup
 545  */
 546 void
 547 nd6_ifdetach(struct nd_ifinfo *nd)
 548 {
 549         /* XXX destroy nd's lock? */
 550         FREE(nd, M_IP6NDP);
 551 }
 552 #endif
 553
 554 void
 555 nd6_setmtu(struct ifnet *ifp)
 556 {
 557         struct nd_ifinfo *ndi = ND_IFINFO(ifp);
 558         u_int32_t oldmaxmtu, maxmtu;
 559
 560         if ((NULL == ndi) || (FALSE == ndi->initialized)) {
 561                 return;
 562         }
 563
 564         lck_mtx_lock(&ndi->lock);
 565         oldmaxmtu = ndi->maxmtu;
 566
 567         /*
 568          * The ND level maxmtu is somewhat redundant to the interface MTU
 569          * and is an implementation artifact of KAME.  Instead of hard-
 570          * limiting the maxmtu based on the interface type here, we simply
 571          * take the if_mtu value since SIOCSIFMTU would have taken care of
 572          * the sanity checks related to the maximum MTU allowed for the
 573          * interface (a value that is known only by the interface layer),
 574          * by sending the request down via ifnet_ioctl().  The use of the
 575          * ND level maxmtu and linkmtu are done via IN6_LINKMTU() which
 576          * does further checking against if_mtu.
 577          */
 578         maxmtu = ndi->maxmtu = ifp->if_mtu;
 579
 580         /*
 581          * Decreasing the interface MTU under IPV6 minimum MTU may cause
 582          * undesirable situation.  We thus notify the operator of the change
 583          * explicitly.  The check for oldmaxmtu is necessary to restrict the
 584          * log to the case of changing the MTU, not initializing it.
 585          */
 586         if (oldmaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
 587                 log(LOG_NOTICE, "nd6_setmtu: "
 588                     "new link MTU on %s (%u) is too small for IPv6\n",
 589                     if_name(ifp), (uint32_t)ndi->maxmtu);
 590         }
 591         ndi->linkmtu = ifp->if_mtu;
 592         lck_mtx_unlock(&ndi->lock);
 593
 594         /* also adjust in6_maxmtu if necessary. */
 595         if (maxmtu > in6_maxmtu) {
 596                 in6_setmaxmtu();
 597         }
 598 }
 599
 600 void
 601 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
 602 {
 603         bzero(ndopts, sizeof (*ndopts));
 604         ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
 605         ndopts->nd_opts_last =
 606             (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
 607
 608         if (icmp6len == 0) {
 609                 ndopts->nd_opts_done = 1;
 610                 ndopts->nd_opts_search = NULL;
 611         }
 612 }
 613
 614 /*
 615  * Take one ND option.
 616  */
 617 struct nd_opt_hdr *
 618 nd6_option(union nd_opts *ndopts)
 619 {
 620         struct nd_opt_hdr *nd_opt;
 621         int olen;
 622
 623         if (!ndopts)
 624                 panic("ndopts == NULL in nd6_option\n");
 625         if (!ndopts->nd_opts_last)
 626                 panic("uninitialized ndopts in nd6_option\n");
 627         if (!ndopts->nd_opts_search)
 628                 return (NULL);
 629         if (ndopts->nd_opts_done)
 630                 return (NULL);
 631
 632         nd_opt = ndopts->nd_opts_search;
 633
 634         /* make sure nd_opt_len is inside the buffer */
 635         if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
 636                 bzero(ndopts, sizeof (*ndopts));
 637                 return (NULL);
 638         }
 639
 640         olen = nd_opt->nd_opt_len << 3;
 641         if (olen == 0) {
 642                 /*
 643                  * Message validation requires that all included
 644                  * options have a length that is greater than zero.
 645                  */
 646                 bzero(ndopts, sizeof (*ndopts));
 647                 return (NULL);
 648         }
 649
 650         ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
 651         if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
 652                 /* option overruns the end of buffer, invalid */
 653                 bzero(ndopts, sizeof (*ndopts));
 654                 return (NULL);
 655         } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
 656                 /* reached the end of options chain */
 657                 ndopts->nd_opts_done = 1;
 658                 ndopts->nd_opts_search = NULL;
 659         }
 660         return (nd_opt);
 661 }
 662
 663 /*
 664  * Parse multiple ND options.
 665  * This function is much easier to use, for ND routines that do not need
 666  * multiple options of the same type.
 667  */
 668 int
 669 nd6_options(union nd_opts *ndopts)
 670 {
 671         struct nd_opt_hdr *nd_opt;
 672         int i = 0;
 673
 674         if (ndopts == NULL)
 675                 panic("ndopts == NULL in nd6_options");
 676         if (ndopts->nd_opts_last == NULL)
 677                 panic("uninitialized ndopts in nd6_options");
 678         if (ndopts->nd_opts_search == NULL)
 679                 return (0);
 680
 681         while (1) {
 682                 nd_opt = nd6_option(ndopts);
 683                 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
 684                         /*
 685                          * Message validation requires that all included
 686                          * options have a length that is greater than zero.
 687                          */
 688                         icmp6stat.icp6s_nd_badopt++;
 689                         bzero(ndopts, sizeof (*ndopts));
 690                         return (-1);
 691                 }
 692
 693                 if (nd_opt == NULL)
 694                         goto skip1;
 695
 696                 switch (nd_opt->nd_opt_type) {
 697                 case ND_OPT_SOURCE_LINKADDR:
 698                 case ND_OPT_TARGET_LINKADDR:
 699                 case ND_OPT_MTU:
 700                 case ND_OPT_REDIRECTED_HEADER:
 701                 case ND_OPT_NONCE:
 702                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 703                                 nd6log((LOG_INFO,
 704                                     "duplicated ND6 option found (type=%d)\n",
 705                                     nd_opt->nd_opt_type));
 706                                 /* XXX bark? */
 707                         } else {
 708                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] =
 709                                     nd_opt;
 710                         }
 711                         break;
 712                 case ND_OPT_PREFIX_INFORMATION:
 713                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
 714                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] =
 715                                     nd_opt;
 716                         }
 717                         ndopts->nd_opts_pi_end =
 718                             (struct nd_opt_prefix_info *)nd_opt;
 719                         break;
 720                 case ND_OPT_RDNSS:
 721                 case ND_OPT_DNSSL:
 722                         /* ignore */
 723                         break;
 724                 default:
 725                         /*
 726                          * Unknown options must be silently ignored,
 727                          * to accomodate future extension to the protocol.
 728                          */
 729                         nd6log((LOG_DEBUG,
 730                             "nd6_options: unsupported option %d - "
 731                             "option ignored\n", nd_opt->nd_opt_type));
 732                 }
 733
 734 skip1:
 735                 i++;
 736                 if (i > nd6_maxndopt) {
 737                         icmp6stat.icp6s_nd_toomanyopt++;
 738                         nd6log((LOG_INFO, "too many loop in nd opt\n"));
 739                         break;
 740                 }
 741
 742                 if (ndopts->nd_opts_done)
 743                         break;
 744         }
 745
 746         return (0);
 747 }
 748
 749 struct nd6svc_arg {
 750         int draining;
 751         uint32_t killed;
 752         uint32_t aging_lazy;
 753         uint32_t aging;
 754         uint32_t sticky;
 755         uint32_t found;
 756 };
 757
 758 /*
 759  * ND6 service routine to expire default route list and prefix list
 760  */
 761 static void
 762 nd6_service(void *arg)
 763 {
 764         struct nd6svc_arg *ap = arg;
 765         struct llinfo_nd6 *ln;
 766         struct nd_defrouter *dr = NULL;
 767         struct nd_prefix *pr = NULL;
 768         struct ifnet *ifp = NULL;
 769         struct in6_ifaddr *ia6, *nia6;
 770         uint64_t timenow;
 771         boolean_t send_nc_failure_kev = FALSE;
 772         struct nd_drhead nd_defrouter_tmp;
 773         struct nd_defrouter *ndr = NULL;
 774         struct radix_node_head  *rnh = rt_tables[AF_INET6];
 775
 776         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
 777         /*
 778          * Since we may drop rnh_lock and nd6_mutex below, we want
 779          * to run this entire operation single threaded.
 780          */
 781         while (nd6_service_busy) {
 782                 nd6log2((LOG_DEBUG, "%s: %s is blocked by %d waiters\n",
 783                     __func__, ap->draining ? "drainer" : "timer",
 784                     nd6_service_waiters));
 785                 nd6_service_waiters++;
 786                 (void) msleep(nd6_service_wc, rnh_lock, (PZERO-1),
 787                     __func__, NULL);
 788                 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
 789         }
 790
 791         /* We are busy now; tell everyone else to go away */
 792         nd6_service_busy = TRUE;
 793
 794         net_update_uptime();
 795         timenow = net_uptime();
 796 again:
 797         /*
 798          * send_nc_failure_kev gets set when default router's IPv6 address
 799          * can't be resolved.
 800          * That can happen either:
 801          * 1. When the entry has resolved once but can't be
 802          * resolved later and the neighbor cache entry for gateway is deleted
 803          * after max probe attempts.
 804          *
 805          * 2. When the entry is in ND6_LLINFO_INCOMPLETE but can not be resolved
 806          * after max neighbor address resolution attempts.
 807          *
 808          * Both set send_nc_failure_kev to true. ifp is also set to the previous
 809          * neighbor cache entry's route's ifp.
 810          * Once we are done sending the notification, set send_nc_failure_kev
 811          * to false to stop sending false notifications for non default router
 812          * neighbors.
 813          *
 814          * We may to send more information like Gateway's IP that could not be
 815          * resolved, however right now we do not install more than one default
 816          * route per interface in the routing table.
 817          */
 818         if (send_nc_failure_kev && ifp != NULL &&
 819             ifp->if_addrlen == IF_LLREACH_MAXLEN) {
 820                 struct kev_msg ev_msg;
 821                 struct kev_nd6_ndfailure nd6_ndfailure;
 822                 bzero(&ev_msg, sizeof(ev_msg));
 823                 bzero(&nd6_ndfailure, sizeof(nd6_ndfailure));
 824                 ev_msg.vendor_code      = KEV_VENDOR_APPLE;
 825                 ev_msg.kev_class        = KEV_NETWORK_CLASS;
 826                 ev_msg.kev_subclass     = KEV_ND6_SUBCLASS;
 827                 ev_msg.event_code       = KEV_ND6_NDFAILURE;
 828
 829                 nd6_ndfailure.link_data.if_family = ifp->if_family;
 830                 nd6_ndfailure.link_data.if_unit = ifp->if_unit;
 831                 strlcpy(nd6_ndfailure.link_data.if_name,
 832                     ifp->if_name,
 833                     sizeof(nd6_ndfailure.link_data.if_name));
 834                 ev_msg.dv[0].data_ptr = &nd6_ndfailure;
 835                 ev_msg.dv[0].data_length =
 836                         sizeof(nd6_ndfailure);
 837                 dlil_post_complete_msg(NULL, &ev_msg);
 838         }
 839
 840         send_nc_failure_kev = FALSE;
 841         ifp = NULL;
 842         /*
 843          * The global list llinfo_nd6 is modified by nd6_request() and is
 844          * therefore protected by rnh_lock.  For obvious reasons, we cannot
 845          * hold rnh_lock across calls that might lead to code paths which
 846          * attempt to acquire rnh_lock, else we deadlock.  Hence for such
 847          * cases we drop rt_lock and rnh_lock, make the calls, and repeat the
 848          * loop.  To ensure that we don't process the same entry more than
 849          * once in a single timeout, we mark the "already-seen" entries with
 850          * ND6_LNF_TIMER_SKIP flag.  At the end of the loop, we do a second
 851          * pass thru the entries and clear the flag so they can be processed
 852          * during the next timeout.
 853          */
 854         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
 855
 856         ln = llinfo_nd6.ln_next;
 857         while (ln != NULL && ln != &llinfo_nd6) {
 858                 struct rtentry *rt;
 859                 struct sockaddr_in6 *dst;
 860                 struct llinfo_nd6 *next;
 861                 u_int32_t retrans, flags;
 862                 struct nd_ifinfo *ndi = NULL;
 863                 boolean_t is_router = FALSE;
 864
 865                 /* ln_next/prev/rt is protected by rnh_lock */
 866                 next = ln->ln_next;
 867                 rt = ln->ln_rt;
 868                 RT_LOCK(rt);
 869
 870                 /* We've seen this already; skip it */
 871                 if (ln->ln_flags & ND6_LNF_TIMER_SKIP) {
 872                         RT_UNLOCK(rt);
 873                         ln = next;
 874                         continue;
 875                 }
 876                 ap->found++;
 877
 878                 /* rt->rt_ifp should never be NULL */
 879                 if ((ifp = rt->rt_ifp) == NULL) {
 880                         panic("%s: ln(%p) rt(%p) rt_ifp == NULL", __func__,
 881                             ln, rt);
 882                         /* NOTREACHED */
 883                 }
 884
 885                 /* rt_llinfo must always be equal to ln */
 886                 if ((struct llinfo_nd6 *)rt->rt_llinfo != ln) {
 887                         panic("%s: rt_llinfo(%p) is not equal to ln(%p)",
 888                             __func__, rt->rt_llinfo, ln);
 889                         /* NOTREACHED */
 890                 }
 891
 892                 /* rt_key should never be NULL */
 893                 dst = SIN6(rt_key(rt));
 894                 if (dst == NULL) {
 895                         panic("%s: rt(%p) key is NULL ln(%p)", __func__,
 896                             rt, ln);
 897                         /* NOTREACHED */
 898                 }
 899
 900                 /* Set the flag in case we jump to "again" */
 901                 ln->ln_flags |= ND6_LNF_TIMER_SKIP;
 902
 903                 if (ln->ln_expire == 0 || (rt->rt_flags & RTF_STATIC)) {
 904                         ap->sticky++;
 905                 } else if (ap->draining && (rt->rt_refcnt == 0)) {
 906                         /*
 907                          * If we are draining, immediately purge non-static
 908                          * entries without oustanding route refcnt.
 909                          */
 910                         if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
 911                                 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_STALE);
 912                         else
 913                                 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_PURGE);
 914                         ln_setexpire(ln, timenow);
 915                 }
 916
 917                 /*
 918                  * If the entry has not expired, skip it.  Take note on the
 919                  * state, as entries that are in the STALE state are simply
 920                  * waiting to be garbage collected, in which case we can
 921                  * relax the callout scheduling (use nd6_prune_lazy).
 922                  */
 923                 if (ln->ln_expire > timenow) {
 924                         switch (ln->ln_state) {
 925                         case ND6_LLINFO_STALE:
 926                                 ap->aging_lazy++;
 927                                 break;
 928                         default:
 929                                 ap->aging++;
 930                                 break;
 931                         }
 932                         RT_UNLOCK(rt);
 933                         ln = next;
 934                         continue;
 935                 }
 936
 937                 ndi = ND_IFINFO(ifp);
 938                 VERIFY(ndi->initialized);
 939                 retrans = ndi->retrans;
 940                 flags = ndi->flags;
 941
 942                 RT_LOCK_ASSERT_HELD(rt);
 943                 is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
 944
 945                 switch (ln->ln_state) {
 946                 case ND6_LLINFO_INCOMPLETE:
 947                         if (ln->ln_asked < nd6_mmaxtries) {
 948                                 struct ifnet *exclifp = ln->ln_exclifp;
 949                                 ln->ln_asked++;
 950                                 ln_setexpire(ln, timenow + retrans / 1000);
 951                                 RT_ADDREF_LOCKED(rt);
 952                                 RT_UNLOCK(rt);
 953                                 lck_mtx_unlock(rnh_lock);
 954                                 if (ip6_forwarding) {
 955                                         nd6_prproxy_ns_output(ifp, exclifp,
 956                                             NULL, &dst->sin6_addr, ln);
 957                                 } else {
 958                                         nd6_ns_output(ifp, NULL,
 959                                             &dst->sin6_addr, ln, NULL);
 960                                 }
 961                                 RT_REMREF(rt);
 962                                 ap->aging++;
 963                                 lck_mtx_lock(rnh_lock);
 964                         } else {
 965                                 struct mbuf *m = ln->ln_hold;
 966                                 ln->ln_hold = NULL;
 967                                 send_nc_failure_kev = is_router;
 968                                 if (m != NULL) {
 969                                         RT_ADDREF_LOCKED(rt);
 970                                         RT_UNLOCK(rt);
 971                                         lck_mtx_unlock(rnh_lock);
 972
 973                                         struct mbuf *mnext;
 974                                         while (m) {
 975                                                 mnext = m->m_nextpkt;
 976                                                 m->m_nextpkt = NULL;
 977                                                 m->m_pkthdr.rcvif = ifp;
 978                                                 icmp6_error_flag(m, ICMP6_DST_UNREACH,
 979                                                     ICMP6_DST_UNREACH_ADDR, 0, 0);
 980                                                 m = mnext;
 981                                         }
 982                                 } else {
 983                                         RT_ADDREF_LOCKED(rt);
 984                                         RT_UNLOCK(rt);
 985                                         lck_mtx_unlock(rnh_lock);
 986                                 }
 987
 988                                 /*
 989                                  * Enqueue work item to invoke callback for
 990                                  * this route entry
 991                                  */
 992                                 route_event_enqueue_nwk_wq_entry(rt, NULL,
 993                                     ROUTE_LLENTRY_UNREACH, NULL, FALSE);
 994                                 nd6_free(rt);
 995                                 ap->killed++;
 996                                 lck_mtx_lock(rnh_lock);
 997                                 /*
 998                                  * nd6_free above would flush out the routing table of
 999                                  * any cloned routes with same next-hop.
1000                                  * Walk the tree anyways as there could be static routes
1001                                  * left.
1002                                  *
1003                                  * We also already have a reference to rt that gets freed right
1004                                  * after the block below executes. Don't need an extra reference
1005                                  * on rt here.
1006                                  */
1007                                 if (is_router) {
1008                                         struct route_event rt_ev;
1009                                         route_event_init(&rt_ev, rt, NULL, ROUTE_LLENTRY_UNREACH);
1010                                         (void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
1011                                 }
1012                                 rtfree_locked(rt);
1013                         }
1014                         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1015                         goto again;
1016
1017                 case ND6_LLINFO_REACHABLE:
1018                         if (ln->ln_expire != 0) {
1019                                 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_STALE);
1020                                 ln_setexpire(ln, timenow + nd6_gctimer);
1021                                 ap->aging_lazy++;
1022                                 /*
1023                                  * Enqueue work item to invoke callback for
1024                                  * this route entry
1025                                  */
1026                                 route_event_enqueue_nwk_wq_entry(rt, NULL,
1027                                     ROUTE_LLENTRY_STALE, NULL, TRUE);
1028
1029                                 RT_ADDREF_LOCKED(rt);
1030                                 RT_UNLOCK(rt);
1031                                 if (is_router) {
1032                                         struct route_event rt_ev;
1033                                         route_event_init(&rt_ev, rt, NULL, ROUTE_LLENTRY_STALE);
1034                                         (void) rnh->rnh_walktree(rnh, route_event_walktree, (void *)&rt_ev);
1035                                 }
1036                                 rtfree_locked(rt);
1037                         } else {
1038                                 RT_UNLOCK(rt);
1039                         }
1040                         break;
1041
1042                 case ND6_LLINFO_STALE:
1043                 case ND6_LLINFO_PURGE:
1044                         /* Garbage Collection(RFC 4861 5.3) */
1045                         if (ln->ln_expire != 0) {
1046                                 RT_ADDREF_LOCKED(rt);
1047                                 RT_UNLOCK(rt);
1048                                 lck_mtx_unlock(rnh_lock);
1049                                 nd6_free(rt);
1050                                 ap->killed++;
1051                                 lck_mtx_lock(rnh_lock);
1052                                 rtfree_locked(rt);
1053                                 goto again;
1054                         } else {
1055                                 RT_UNLOCK(rt);
1056                         }
1057                         break;
1058
1059                 case ND6_LLINFO_DELAY:
1060                         if ((flags & ND6_IFF_PERFORMNUD) != 0) {
1061                                 /* We need NUD */
1062                                 ln->ln_asked = 1;
1063                                 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_PROBE);
1064                                 ln_setexpire(ln, timenow + retrans / 1000);
1065                                 RT_ADDREF_LOCKED(rt);
1066                                 RT_UNLOCK(rt);
1067                                 lck_mtx_unlock(rnh_lock);
1068                                 nd6_ns_output(ifp, &dst->sin6_addr,
1069                                     &dst->sin6_addr, ln, NULL);
1070                                 RT_REMREF(rt);
1071                                 ap->aging++;
1072                                 lck_mtx_lock(rnh_lock);
1073                                 goto again;
1074                         }
1075                         ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_STALE); /* XXX */
1076                         ln_setexpire(ln, timenow + nd6_gctimer);
1077                         RT_UNLOCK(rt);
1078                         ap->aging_lazy++;
1079                         break;
1080
1081                 case ND6_LLINFO_PROBE:
1082                         if (ln->ln_asked < nd6_umaxtries) {
1083                                 ln->ln_asked++;
1084                                 ln_setexpire(ln, timenow + retrans / 1000);
1085                                 RT_ADDREF_LOCKED(rt);
1086                                 RT_UNLOCK(rt);
1087                                 lck_mtx_unlock(rnh_lock);
1088                                 nd6_ns_output(ifp, &dst->sin6_addr,
1089                                     &dst->sin6_addr, ln, NULL);
1090                                 RT_REMREF(rt);
1091                                 ap->aging++;
1092                                 lck_mtx_lock(rnh_lock);
1093                         } else {
1094                                 is_router = (rt->rt_flags & RTF_ROUTER) ? TRUE : FALSE;
1095                                 send_nc_failure_kev = is_router;
1096                                 RT_ADDREF_LOCKED(rt);
1097                                 RT_UNLOCK(rt);
1098                                 lck_mtx_unlock(rnh_lock);
1099                                 nd6_free(rt);
1100                                 ap->killed++;
1101
1102                                 /*
1103                                  * Enqueue work item to invoke callback for
1104                                  * this route entry
1105                                  */
1106                                 route_event_enqueue_nwk_wq_entry(rt, NULL,
1107                                     ROUTE_LLENTRY_UNREACH, NULL, FALSE);
1108
1109                                 lck_mtx_lock(rnh_lock);
1110                                 /*
1111                                  * nd6_free above would flush out the routing table of
1112                                  * any cloned routes with same next-hop.
1113                                  * Walk the tree anyways as there could be static routes
1114                                  * left.
1115                                  *
1116                                  * We also already have a reference to rt that gets freed right
1117                                  * after the block below executes. Don't need an extra reference
1118                                  * on rt here.
1119                                  */
1120                                 if (is_router) {
1121                                         struct route_event rt_ev;
1122                                         route_event_init(&rt_ev, rt, NULL, ROUTE_LLENTRY_UNREACH);
1123                                         (void) rnh->rnh_walktree(rnh,
1124                                             route_event_walktree, (void *)&rt_ev);
1125                                 }
1126                                 rtfree_locked(rt);
1127                         }
1128                         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1129                         goto again;
1130
1131                 default:
1132                         RT_UNLOCK(rt);
1133                         break;
1134                 }
1135                 ln = next;
1136         }
1137         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1138
1139         /* Now clear the flag from all entries */
1140         ln = llinfo_nd6.ln_next;
1141         while (ln != NULL && ln != &llinfo_nd6) {
1142                 struct rtentry *rt = ln->ln_rt;
1143                 struct llinfo_nd6 *next = ln->ln_next;
1144
1145                 RT_LOCK_SPIN(rt);
1146                 if (ln->ln_flags & ND6_LNF_TIMER_SKIP)
1147                         ln->ln_flags &= ~ND6_LNF_TIMER_SKIP;
1148                 RT_UNLOCK(rt);
1149                 ln = next;
1150         }
1151         lck_mtx_unlock(rnh_lock);
1152
1153         /* expire default router list */
1154         TAILQ_INIT(&nd_defrouter_tmp);
1155
1156         lck_mtx_lock(nd6_mutex);
1157         TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, ndr) {
1158                 ap->found++;
1159                 if (dr->expire != 0 && dr->expire < timenow) {
1160                         VERIFY(dr->ifp != NULL);
1161                         in6_ifstat_inc(dr->ifp, ifs6_defrtr_expiry_cnt);
1162                         in6_event_enqueue_nwk_wq_entry(IN6_NDP_RTR_EXPIRY, dr->ifp,
1163                             &dr->rtaddr, dr->rtlifetime);
1164                         if (dr->ifp != NULL &&
1165                             dr->ifp->if_type == IFT_CELLULAR) {
1166                                 /*
1167                                  * Some buggy cellular gateways may not send
1168                                  * periodic router advertisements.
1169                                  * Or they may send it with router lifetime
1170                                  * value that is less than the configured Max and Min
1171                                  * Router Advertisement interval.
1172                                  * To top that an idle device may not wake up
1173                                  * when periodic RA is received on cellular
1174                                  * interface.
1175                                  * We could send RS on every wake but RFC
1176                                  * 4861 precludes that.
1177                                  * The addresses are of infinite lifetimes
1178                                  * and are tied to the lifetime of the bearer,
1179                                  * so keeping the addresses and just getting rid of
1180                                  * the router does not help us anyways.
1181                                  * If there's network renumbering, a lifetime with
1182                                  * value 0 would remove the default router.
1183                                  * Also it will get deleted as part of purge when
1184                                  * the PDP context is torn down and configured again.
1185                                  * For that reason, do not expire the default router
1186                                  * learned on cellular interface. Ever.
1187                                  */
1188                                 dr->expire += dr->rtlifetime;
1189                                 nd6log2((LOG_DEBUG,
1190                                     "%s: Refreshing expired default router entry "
1191                                     "%s for interface %s\n", __func__,
1192                                     ip6_sprintf(&dr->rtaddr), if_name(dr->ifp)));
1193                         } else {
1194                                 ap->killed++;
1195                                 /*
1196                                  * Remove the entry from default router list
1197                                  * and add it to the temp list.
1198                                  * nd_defrouter_tmp will be a local temporary
1199                                  * list as no one else can get the same
1200                                  * removed entry once it is removed from default
1201                                  * router list.
1202                                  * Remove the reference after calling defrtrlist_del
1203                                  */
1204                                 TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
1205                                 TAILQ_INSERT_TAIL(&nd_defrouter_tmp, dr, dr_entry);
1206                         }
1207                 } else {
1208                         if (dr->expire == 0 || (dr->stateflags & NDDRF_STATIC))
1209                                 ap->sticky++;
1210                         else
1211                                 ap->aging_lazy++;
1212                 }
1213         }
1214
1215         /*
1216          * Keep the following  separate from the above
1217          * iteration of nd_defrouter because it's not safe
1218          * to call defrtrlist_del while iterating global default
1219          * router list. Global list has to be traversed
1220          * while holding nd6_mutex throughout.
1221          *
1222          * The following call to defrtrlist_del should be
1223          * safe as we are iterating a local list of
1224          * default routers.
1225          */
1226         TAILQ_FOREACH_SAFE(dr, &nd_defrouter_tmp, dr_entry, ndr) {
1227                 TAILQ_REMOVE(&nd_defrouter_tmp, dr, dr_entry);
1228                 defrtrlist_del(dr);
1229                 NDDR_REMREF(dr);        /* remove list reference */
1230         }
1231         lck_mtx_unlock(nd6_mutex);
1232
1233         /*
1234          * expire interface addresses.
1235          * in the past the loop was inside prefix expiry processing.
1236          * However, from a stricter speci-confrmance standpoint, we should
1237          * rather separate address lifetimes and prefix lifetimes.
1238          */
1239 addrloop:
1240         lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
1241         for (ia6 = in6_ifaddrs; ia6; ia6 = nia6) {
1242                 int oldflags = ia6->ia6_flags;
1243                 ap->found++;
1244                 nia6 = ia6->ia_next;
1245                 IFA_LOCK(&ia6->ia_ifa);
1246                 /*
1247                  * Extra reference for ourselves; it's no-op if
1248                  * we don't have to regenerate temporary address,
1249                  * otherwise it protects the address from going
1250                  * away since we drop in6_ifaddr_rwlock below.
1251                  */
1252                 IFA_ADDREF_LOCKED(&ia6->ia_ifa);
1253                 /* check address lifetime */
1254                 if (IFA6_IS_INVALID(ia6, timenow)) {
1255                         /*
1256                          * If the expiring address is temporary, try
1257                          * regenerating a new one.  This would be useful when
1258                          * we suspended a laptop PC, then turned it on after a
1259                          * period that could invalidate all temporary
1260                          * addresses.  Although we may have to restart the
1261                          * loop (see below), it must be after purging the
1262                          * address.  Otherwise, we'd see an infinite loop of
1263                          * regeneration.
1264                          */
1265                         if (ip6_use_tempaddr &&
1266                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
1267                                 /*
1268                                  * NOTE: We have to drop the lock here
1269                                  * because regen_tmpaddr() eventually calls
1270                                  * in6_update_ifa(), which must take the lock
1271                                  * and would otherwise cause a hang.  This is
1272                                  * safe because the goto addrloop leads to a
1273                                  * re-evaluation of the in6_ifaddrs list
1274                                  */
1275                                 IFA_UNLOCK(&ia6->ia_ifa);
1276                                 lck_rw_done(&in6_ifaddr_rwlock);
1277                                 (void) regen_tmpaddr(ia6);
1278                         } else {
1279                                 IFA_UNLOCK(&ia6->ia_ifa);
1280                                 lck_rw_done(&in6_ifaddr_rwlock);
1281                         }
1282
1283                         /*
1284                          * Purging the address would have caused
1285                          * in6_ifaddr_rwlock to be dropped and reacquired;
1286                          * therefore search again from the beginning
1287                          * of in6_ifaddrs list.
1288                          */
1289                         in6_purgeaddr(&ia6->ia_ifa);
1290                         ap->killed++;
1291
1292                         if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) == 0) {
1293                                 in6_ifstat_inc(ia6->ia_ifa.ifa_ifp, ifs6_addr_expiry_cnt);
1294                                 in6_event_enqueue_nwk_wq_entry(IN6_NDP_ADDR_EXPIRY,
1295                                     ia6->ia_ifa.ifa_ifp, &ia6->ia_addr.sin6_addr,
1296                                     0);
1297                         }
1298                         /* Release extra reference taken above */
1299                         IFA_REMREF(&ia6->ia_ifa);
1300                         goto addrloop;
1301                 }
1302                 /*
1303                  * The lazy timer runs every nd6_prune_lazy seconds with at
1304                  * most "2 * nd6_prune_lazy - 1" leeway. We consider the worst
1305                  * case here and make sure we schedule the regular timer if an
1306                  * interface address is about to expire.
1307                  */
1308                 if (IFA6_IS_INVALID(ia6, timenow + 3 * nd6_prune_lazy))
1309                         ap->aging++;
1310                 else
1311                         ap->aging_lazy++;
1312                 IFA_LOCK_ASSERT_HELD(&ia6->ia_ifa);
1313                 if (IFA6_IS_DEPRECATED(ia6, timenow)) {
1314                         ia6->ia6_flags |= IN6_IFF_DEPRECATED;
1315
1316                         if((oldflags & IN6_IFF_DEPRECATED) == 0) {
1317                                 /*
1318                                  * Only enqueue the Deprecated event when the address just
1319                                  * becomes deprecated.
1320                                  * Keep it limited to the stable address as it is common for
1321                                  * older temporary addresses to get deprecated while we generate
1322                                  * new ones.
1323                                  */
1324                                 if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) == 0) {
1325                                         in6_event_enqueue_nwk_wq_entry(IN6_ADDR_MARKED_DEPRECATED,
1326                                             ia6->ia_ifa.ifa_ifp, &ia6->ia_addr.sin6_addr,
1327                                             0);
1328                                 }
1329                         }
1330                         /*
1331                          * If a temporary address has just become deprecated,
1332                          * regenerate a new one if possible.
1333                          */
1334                         if (ip6_use_tempaddr &&
1335                             (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1336                             (oldflags & IN6_IFF_DEPRECATED) == 0) {
1337
1338                                 /* see NOTE above */
1339                                 IFA_UNLOCK(&ia6->ia_ifa);
1340                                 lck_rw_done(&in6_ifaddr_rwlock);
1341                                 if (regen_tmpaddr(ia6) == 0) {
1342                                         /*
1343                                          * A new temporary address is
1344                                          * generated.
1345                                          * XXX: this means the address chain
1346                                          * has changed while we are still in
1347                                          * the loop.  Although the change
1348                                          * would not cause disaster (because
1349                                          * it's not a deletion, but an
1350                                          * addition,) we'd rather restart the
1351                                          * loop just for safety.  Or does this
1352                                          * significantly reduce performance??
1353                                          */
1354                                         /* Release extra reference */
1355                                         IFA_REMREF(&ia6->ia_ifa);
1356                                         goto addrloop;
1357                                 }
1358                                 lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
1359                         } else {
1360                                 IFA_UNLOCK(&ia6->ia_ifa);
1361                         }
1362                 } else {
1363                         /*
1364                          * A new RA might have made a deprecated address
1365                          * preferred.
1366                          */
1367                         ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
1368                         IFA_UNLOCK(&ia6->ia_ifa);
1369                 }
1370                 LCK_RW_ASSERT(&in6_ifaddr_rwlock, LCK_RW_ASSERT_EXCLUSIVE);
1371                 /* Release extra reference taken above */
1372                 IFA_REMREF(&ia6->ia_ifa);
1373         }
1374         lck_rw_done(&in6_ifaddr_rwlock);
1375
1376         lck_mtx_lock(nd6_mutex);
1377         /* expire prefix list */
1378         pr = nd_prefix.lh_first;
1379         while (pr != NULL) {
1380                 ap->found++;
1381                 /*
1382                  * check prefix lifetime.
1383                  * since pltime is just for autoconf, pltime processing for
1384                  * prefix is not necessary.
1385                  */
1386                 NDPR_LOCK(pr);
1387                 if (pr->ndpr_stateflags & NDPRF_PROCESSED_SERVICE ||
1388                     pr->ndpr_stateflags & NDPRF_DEFUNCT) {
1389                         pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE;
1390                         NDPR_UNLOCK(pr);
1391                         pr = pr->ndpr_next;
1392                         continue;
1393                 }
1394                 if (pr->ndpr_expire != 0 && pr->ndpr_expire < timenow) {
1395                         /*
1396                          * address expiration and prefix expiration are
1397                          * separate. NEVER perform in6_purgeaddr here.
1398                          */
1399                         pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE;
1400                         NDPR_ADDREF_LOCKED(pr);
1401                         prelist_remove(pr);
1402                         NDPR_UNLOCK(pr);
1403
1404                         in6_ifstat_inc(pr->ndpr_ifp, ifs6_pfx_expiry_cnt);
1405                         in6_event_enqueue_nwk_wq_entry(IN6_NDP_PFX_EXPIRY,
1406                             pr->ndpr_ifp, &pr->ndpr_prefix.sin6_addr,
1407                             0);
1408                         NDPR_REMREF(pr);
1409                         pfxlist_onlink_check();
1410                         pr = nd_prefix.lh_first;
1411                         ap->killed++;
1412                 } else {
1413                         if (pr->ndpr_expire == 0 ||
1414                             (pr->ndpr_stateflags & NDPRF_STATIC))
1415                                 ap->sticky++;
1416                         else
1417                                 ap->aging_lazy++;
1418                         pr->ndpr_stateflags |= NDPRF_PROCESSED_SERVICE;
1419                         NDPR_UNLOCK(pr);
1420                         pr = pr->ndpr_next;
1421                 }
1422         }
1423         LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
1424                 NDPR_LOCK(pr);
1425                 pr->ndpr_stateflags &= ~NDPRF_PROCESSED_SERVICE;
1426                 NDPR_UNLOCK(pr);
1427         }
1428         lck_mtx_unlock(nd6_mutex);
1429
1430         lck_mtx_lock(rnh_lock);
1431         /* We're done; let others enter */
1432         nd6_service_busy = FALSE;
1433         if (nd6_service_waiters > 0) {
1434                 nd6_service_waiters = 0;
1435                 wakeup(nd6_service_wc);
1436         }
1437 }
1438
1439
1440 static int nd6_need_draining = 0;
1441
1442 void
1443 nd6_drain(void *arg)
1444 {
1445 #pragma unused(arg)
1446         nd6log2((LOG_DEBUG, "%s: draining ND6 entries\n", __func__));
1447
1448         lck_mtx_lock(rnh_lock);
1449         nd6_need_draining = 1;
1450         nd6_sched_timeout(NULL, NULL);
1451         lck_mtx_unlock(rnh_lock);
1452 }
1453
1454 /*
1455  * We use the ``arg'' variable to decide whether or not the timer we're
1456  * running is the fast timer. We do this to reset the nd6_fast_timer_on
1457  * variable so that later we don't end up ignoring a ``fast timer''
1458  * request if the 5 second timer is running (see nd6_sched_timeout).
1459  */
1460 static void
1461 nd6_timeout(void *arg)
1462 {
1463         struct nd6svc_arg sarg;
1464         uint32_t buf;
1465
1466         lck_mtx_lock(rnh_lock);
1467         bzero(&sarg, sizeof (sarg));
1468         if (nd6_need_draining != 0) {
1469                 nd6_need_draining = 0;
1470                 sarg.draining = 1;
1471         }
1472         nd6_service(&sarg);
1473         nd6log2((LOG_DEBUG, "%s: found %u, aging_lazy %u, aging %u, "
1474             "sticky %u, killed %u\n", __func__, sarg.found, sarg.aging_lazy,
1475             sarg.aging, sarg.sticky, sarg.killed));
1476         /* re-arm the timer if there's work to do */
1477         nd6_timeout_run--;
1478         VERIFY(nd6_timeout_run >= 0 && nd6_timeout_run < 2);
1479         if (arg == &nd6_fast_timer_on)
1480                 nd6_fast_timer_on = FALSE;
1481         if (sarg.aging_lazy > 0 || sarg.aging > 0 || nd6_sched_timeout_want) {
1482                 struct timeval atv, ltv, *leeway;
1483                 int lazy = nd6_prune_lazy;
1484
1485                 if (sarg.aging > 0 || lazy < 1) {
1486                         atv.tv_usec = 0;
1487                         atv.tv_sec = nd6_prune;
1488                         leeway = NULL;
1489                 } else {
1490                         VERIFY(lazy >= 1);
1491                         atv.tv_usec = 0;
1492                         atv.tv_sec = MAX(nd6_prune, lazy);
1493                         ltv.tv_usec = 0;
1494                         read_frandom(&buf, sizeof(buf));
1495                         ltv.tv_sec = MAX(buf % lazy, 1) * 2;
1496                         leeway = &ltv;
1497                 }
1498                 nd6_sched_timeout(&atv, leeway);
1499         } else if (nd6_debug) {
1500                 nd6log2((LOG_DEBUG, "%s: not rescheduling timer\n", __func__));
1501         }
1502         lck_mtx_unlock(rnh_lock);
1503 }
1504
1505 void
1506 nd6_sched_timeout(struct timeval *atv, struct timeval *ltv)
1507 {
1508         struct timeval tv;
1509
1510         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1511         if (atv == NULL) {
1512                 tv.tv_usec = 0;
1513                 tv.tv_sec = MAX(nd6_prune, 1);
1514                 atv = &tv;
1515                 ltv = NULL;     /* ignore leeway */
1516         }
1517         /* see comments on top of this file */
1518         if (nd6_timeout_run == 0) {
1519                 if (ltv == NULL) {
1520                         nd6log2((LOG_DEBUG, "%s: timer scheduled in "
1521                             "T+%llus.%lluu (demand %d)\n", __func__,
1522                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1523                             nd6_sched_timeout_want));
1524                         nd6_fast_timer_on = TRUE;
1525                         timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv));
1526                 } else {
1527                         nd6log2((LOG_DEBUG, "%s: timer scheduled in "
1528                             "T+%llus.%lluu with %llus.%lluu leeway "
1529                             "(demand %d)\n", __func__, (uint64_t)atv->tv_sec,
1530                             (uint64_t)atv->tv_usec, (uint64_t)ltv->tv_sec,
1531                             (uint64_t)ltv->tv_usec, nd6_sched_timeout_want));
1532                         nd6_fast_timer_on = FALSE;
1533                         timeout_with_leeway(nd6_timeout, NULL,
1534                             tvtohz(atv), tvtohz(ltv));
1535                 }
1536                 nd6_timeout_run++;
1537                 nd6_sched_timeout_want = 0;
1538         } else if (nd6_timeout_run == 1 && ltv == NULL &&
1539             nd6_fast_timer_on == FALSE) {
1540                 nd6log2((LOG_DEBUG, "%s: fast timer scheduled in "
1541                     "T+%llus.%lluu (demand %d)\n", __func__,
1542                     (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1543                     nd6_sched_timeout_want));
1544                 nd6_fast_timer_on = TRUE;
1545                 nd6_sched_timeout_want = 0;
1546                 nd6_timeout_run++;
1547                 timeout(nd6_timeout, &nd6_fast_timer_on, tvtohz(atv));
1548         } else {
1549                 if (ltv == NULL) {
1550                         nd6log2((LOG_DEBUG, "%s: not scheduling timer: "
1551                             "timers %d, fast_timer %d, T+%llus.%lluu\n",
1552                             __func__, nd6_timeout_run, nd6_fast_timer_on,
1553                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec));
1554                 } else {
1555                         nd6log2((LOG_DEBUG, "%s: not scheduling timer: "
1556                             "timers %d, fast_timer %d, T+%llus.%lluu "
1557                             "with %llus.%lluu leeway\n", __func__,
1558                             nd6_timeout_run, nd6_fast_timer_on,
1559                             (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec,
1560                             (uint64_t)ltv->tv_sec, (uint64_t)ltv->tv_usec));
1561                 }
1562         }
1563 }
1564
1565 /*
1566  * ND6 router advertisement kernel notification
1567  */
1568 void
1569 nd6_post_msg(u_int32_t code, struct nd_prefix_list *prefix_list,
1570     u_int32_t list_length, u_int32_t mtu)
1571 {
1572         struct kev_msg ev_msg;
1573         struct kev_nd6_ra_data nd6_ra_msg_data;
1574         struct nd_prefix_list *itr = prefix_list;
1575
1576         bzero(&ev_msg, sizeof (struct kev_msg));
1577         ev_msg.vendor_code      = KEV_VENDOR_APPLE;
1578         ev_msg.kev_class        = KEV_NETWORK_CLASS;
1579         ev_msg.kev_subclass     = KEV_ND6_SUBCLASS;
1580         ev_msg.event_code       = code;
1581
1582         bzero(&nd6_ra_msg_data, sizeof (nd6_ra_msg_data));
1583
1584         if (mtu > 0 && mtu >= IPV6_MMTU) {
1585                 nd6_ra_msg_data.mtu = mtu;
1586                 nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_MTU;
1587         }
1588
1589         if (list_length > 0 && prefix_list != NULL) {
1590                 nd6_ra_msg_data.list_length = list_length;
1591                 nd6_ra_msg_data.flags |= KEV_ND6_DATA_VALID_PREFIX;
1592         }
1593
1594         while (itr != NULL && nd6_ra_msg_data.list_index < list_length) {
1595                 bcopy(&itr->pr.ndpr_prefix, &nd6_ra_msg_data.prefix.prefix,
1596                     sizeof (nd6_ra_msg_data.prefix.prefix));
1597                 nd6_ra_msg_data.prefix.raflags = itr->pr.ndpr_raf;
1598                 nd6_ra_msg_data.prefix.prefixlen = itr->pr.ndpr_plen;
1599                 nd6_ra_msg_data.prefix.origin = PR_ORIG_RA;
1600                 nd6_ra_msg_data.prefix.vltime = itr->pr.ndpr_vltime;
1601                 nd6_ra_msg_data.prefix.pltime = itr->pr.ndpr_pltime;
1602                 nd6_ra_msg_data.prefix.expire = ndpr_getexpire(&itr->pr);
1603                 nd6_ra_msg_data.prefix.flags = itr->pr.ndpr_stateflags;
1604                 nd6_ra_msg_data.prefix.refcnt = itr->pr.ndpr_addrcnt;
1605                 nd6_ra_msg_data.prefix.if_index = itr->pr.ndpr_ifp->if_index;
1606
1607                 /* send the message up */
1608                 ev_msg.dv[0].data_ptr           = &nd6_ra_msg_data;
1609                 ev_msg.dv[0].data_length        = sizeof (nd6_ra_msg_data);
1610                 ev_msg.dv[1].data_length        = 0;
1611                 dlil_post_complete_msg(NULL, &ev_msg);
1612
1613                 /* clean up for the next prefix */
1614                 bzero(&nd6_ra_msg_data.prefix, sizeof (nd6_ra_msg_data.prefix));
1615                 itr = itr->next;
1616                 nd6_ra_msg_data.list_index++;
1617         }
1618 }
1619
1620 /*
1621  * Regenerate deprecated/invalidated temporary address
1622  */
1623 static int
1624 regen_tmpaddr(struct in6_ifaddr *ia6)
1625 {
1626         struct ifaddr *ifa;
1627         struct ifnet *ifp;
1628         struct in6_ifaddr *public_ifa6 = NULL;
1629         uint64_t timenow = net_uptime();
1630
1631         ifp = ia6->ia_ifa.ifa_ifp;
1632         ifnet_lock_shared(ifp);
1633         TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
1634                 struct in6_ifaddr *it6;
1635
1636                 IFA_LOCK(ifa);
1637                 if (ifa->ifa_addr->sa_family != AF_INET6) {
1638                         IFA_UNLOCK(ifa);
1639                         continue;
1640                 }
1641                 it6 = (struct in6_ifaddr *)ifa;
1642
1643                 /* ignore no autoconf addresses. */
1644                 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
1645                         IFA_UNLOCK(ifa);
1646                         continue;
1647                 }
1648                 /* ignore autoconf addresses with different prefixes. */
1649                 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) {
1650                         IFA_UNLOCK(ifa);
1651                         continue;
1652                 }
1653                 /*
1654                  * Now we are looking at an autoconf address with the same
1655                  * prefix as ours.  If the address is temporary and is still
1656                  * preferred, do not create another one.  It would be rare, but
1657                  * could happen, for example, when we resume a laptop PC after
1658                  * a long period.
1659                  */
1660                 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
1661                     !IFA6_IS_DEPRECATED(it6, timenow)) {
1662                         IFA_UNLOCK(ifa);
1663                         if (public_ifa6 != NULL)
1664                                 IFA_REMREF(&public_ifa6->ia_ifa);
1665                         public_ifa6 = NULL;
1666                         break;
1667                 }
1668
1669                 /*
1670                  * This is a public autoconf address that has the same prefix
1671                  * as ours.  If it is preferred, keep it.  We can't break the
1672                  * loop here, because there may be a still-preferred temporary
1673                  * address with the prefix.
1674                  */
1675                 if (!IFA6_IS_DEPRECATED(it6, timenow)) {
1676                         IFA_ADDREF_LOCKED(ifa); /* for public_ifa6 */
1677                         IFA_UNLOCK(ifa);
1678                         if (public_ifa6 != NULL)
1679                                 IFA_REMREF(&public_ifa6->ia_ifa);
1680                         public_ifa6 = it6;
1681                 } else {
1682                         IFA_UNLOCK(ifa);
1683                 }
1684         }
1685         ifnet_lock_done(ifp);
1686
1687         if (public_ifa6 != NULL) {
1688                 int e;
1689
1690                 if ((e = in6_tmpifadd(public_ifa6, 0)) != 0) {
1691                         log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
1692                             " tmp addr,errno=%d\n", e);
1693                         IFA_REMREF(&public_ifa6->ia_ifa);
1694                         return (-1);
1695                 }
1696                 IFA_REMREF(&public_ifa6->ia_ifa);
1697                 return (0);
1698         }
1699
1700         return (-1);
1701 }
1702
1703 /*
1704  * Nuke neighbor cache/prefix/default router management table, right before
1705  * ifp goes away.
1706  */
1707 void
1708 nd6_purge(struct ifnet *ifp)
1709 {
1710         struct llinfo_nd6 *ln;
1711         struct nd_defrouter *dr, *ndr;
1712         struct nd_prefix *pr, *npr;
1713         boolean_t removed;
1714         struct nd_drhead nd_defrouter_tmp;
1715
1716         TAILQ_INIT(&nd_defrouter_tmp);
1717
1718         /* Nuke default router list entries toward ifp */
1719         lck_mtx_lock(nd6_mutex);
1720         TAILQ_FOREACH_SAFE(dr, &nd_defrouter, dr_entry, ndr) {
1721                 if (dr->ifp != ifp)
1722                         continue;
1723                 /*
1724                  * Remove the entry from default router list
1725                  * and add it to the temp list.
1726                  * nd_defrouter_tmp will be a local temporary
1727                  * list as no one else can get the same
1728                  * removed entry once it is removed from default
1729                  * router list.
1730                  * Remove the reference after calling defrtrlist_del.
1731                  *
1732                  * The uninstalled entries have to be iterated first
1733                  * when we call defrtrlist_del.
1734                  * This is to ensure that we don't end up calling
1735                  * default router  selection when there are other
1736                  * uninstalled candidate default routers on
1737                  * the interface.
1738                  * If we don't respect that order, we may end
1739                  * up missing out on some entries.
1740                  *
1741                  * For that reason, installed ones must be inserted
1742                  * at the tail and uninstalled ones at the head
1743                  */
1744                 TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
1745
1746                 if (dr->stateflags & NDDRF_INSTALLED)
1747                         TAILQ_INSERT_TAIL(&nd_defrouter_tmp, dr, dr_entry);
1748                 else
1749                         TAILQ_INSERT_HEAD(&nd_defrouter_tmp, dr, dr_entry);
1750         }
1751
1752         /*
1753          * The following call to defrtrlist_del should be
1754          * safe as we are iterating a local list of
1755          * default routers.
1756          *
1757          * We don't really need nd6_mutex here but keeping
1758          * it as it is to avoid changing assertios held in
1759          * the functions in the call-path.
1760          */
1761         TAILQ_FOREACH_SAFE(dr, &nd_defrouter_tmp, dr_entry, ndr) {
1762                 TAILQ_REMOVE(&nd_defrouter_tmp, dr, dr_entry);
1763                 defrtrlist_del(dr);
1764                 NDDR_REMREF(dr);        /* remove list reference */
1765         }
1766
1767         /* Nuke prefix list entries toward ifp */
1768         removed = FALSE;
1769         for (pr = nd_prefix.lh_first; pr; pr = npr) {
1770                 NDPR_LOCK(pr);
1771                 npr = pr->ndpr_next;
1772                 if (pr->ndpr_ifp == ifp &&
1773                     !(pr->ndpr_stateflags & NDPRF_DEFUNCT)) {
1774                         /*
1775                          * Because if_detach() does *not* release prefixes
1776                          * while purging addresses the reference count will
1777                          * still be above zero. We therefore reset it to
1778                          * make sure that the prefix really gets purged.
1779                          */
1780                         pr->ndpr_addrcnt = 0;
1781
1782                         /*
1783                          * Previously, pr->ndpr_addr is removed as well,
1784                          * but I strongly believe we don't have to do it.
1785                          * nd6_purge() is only called from in6_ifdetach(),
1786                          * which removes all the associated interface addresses
1787                          * by itself.
1788                          * (jinmei@kame.net 20010129)
1789                          */
1790                         NDPR_ADDREF_LOCKED(pr);
1791                         prelist_remove(pr);
1792                         NDPR_UNLOCK(pr);
1793                         NDPR_REMREF(pr);
1794                         removed = TRUE;
1795                         npr = nd_prefix.lh_first;
1796                 } else {
1797                         NDPR_UNLOCK(pr);
1798                 }
1799         }
1800         if (removed)
1801                 pfxlist_onlink_check();
1802         lck_mtx_unlock(nd6_mutex);
1803
1804         /* cancel default outgoing interface setting */
1805         if (nd6_defifindex == ifp->if_index) {
1806                 nd6_setdefaultiface(0);
1807         }
1808
1809         /*
1810          * Perform default router selection even when we are a router,
1811          * if Scoped Routing is enabled.
1812          */
1813         lck_mtx_lock(nd6_mutex);
1814         /* refresh default router list */
1815         defrouter_select(ifp);
1816         lck_mtx_unlock(nd6_mutex);
1817
1818         /*
1819          * Nuke neighbor cache entries for the ifp.
1820          * Note that rt->rt_ifp may not be the same as ifp,
1821          * due to KAME goto ours hack.  See RTM_RESOLVE case in
1822          * nd6_rtrequest(), and ip6_input().
1823          */
1824 again:
1825         lck_mtx_lock(rnh_lock);
1826         ln = llinfo_nd6.ln_next;
1827         while (ln != NULL && ln != &llinfo_nd6) {
1828                 struct rtentry *rt;
1829                 struct llinfo_nd6 *nln;
1830
1831                 nln = ln->ln_next;
1832                 rt = ln->ln_rt;
1833                 RT_LOCK(rt);
1834                 if (rt->rt_gateway != NULL &&
1835                     rt->rt_gateway->sa_family == AF_LINK &&
1836                     SDL(rt->rt_gateway)->sdl_index == ifp->if_index) {
1837                         RT_ADDREF_LOCKED(rt);
1838                         RT_UNLOCK(rt);
1839                         lck_mtx_unlock(rnh_lock);
1840                         /*
1841                          * See comments on nd6_service() for reasons why
1842                          * this loop is repeated; we bite the costs of
1843                          * going thru the same llinfo_nd6 more than once
1844                          * here, since this purge happens during detach,
1845                          * and that unlike the timer case, it's possible
1846                          * there's more than one purges happening at the
1847                          * same time (thus a flag wouldn't buy anything).
1848                          */
1849                         nd6_free(rt);
1850                         RT_REMREF(rt);
1851                         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1852                         goto again;
1853                 } else {
1854                         RT_UNLOCK(rt);
1855                 }
1856                 ln = nln;
1857         }
1858         lck_mtx_unlock(rnh_lock);
1859 }
1860
1861 /*
1862  * Upon success, the returned route will be locked and the caller is
1863  * responsible for releasing the reference and doing RT_UNLOCK(rt).
1864  * This routine does not require rnh_lock to be held by the caller,
1865  * although it needs to be indicated of such a case in order to call
1866  * the correct variant of the relevant routing routines.
1867  */
1868 struct rtentry *
1869 nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp, int rt_locked)
1870 {
1871         struct rtentry *rt;
1872         struct sockaddr_in6 sin6;
1873         unsigned int ifscope;
1874
1875         bzero(&sin6, sizeof (sin6));
1876         sin6.sin6_len = sizeof (struct sockaddr_in6);
1877         sin6.sin6_family = AF_INET6;
1878         sin6.sin6_addr = *addr6;
1879
1880         ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
1881         if (rt_locked) {
1882                 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1883                 rt = rtalloc1_scoped_locked(SA(&sin6), create, 0, ifscope);
1884         } else {
1885                 rt = rtalloc1_scoped(SA(&sin6), create, 0, ifscope);
1886         }
1887
1888         if (rt != NULL) {
1889                 RT_LOCK(rt);
1890                 if ((rt->rt_flags & RTF_LLINFO) == 0) {
1891                         /*
1892                          * This is the case for the default route.
1893                          * If we want to create a neighbor cache for the
1894                          * address, we should free the route for the
1895                          * destination and allocate an interface route.
1896                          */
1897                         if (create) {
1898                                 RT_UNLOCK(rt);
1899                                 if (rt_locked)
1900                                         rtfree_locked(rt);
1901                                 else
1902                                         rtfree(rt);
1903                                 rt = NULL;
1904                         }
1905                 }
1906         }
1907         if (rt == NULL) {
1908                 if (create && ifp) {
1909                         struct ifaddr *ifa;
1910                         u_int32_t ifa_flags;
1911                         int e;
1912
1913                         /*
1914                          * If no route is available and create is set,
1915                          * we allocate a host route for the destination
1916                          * and treat it like an interface route.
1917                          * This hack is necessary for a neighbor which can't
1918                          * be covered by our own prefix.
1919                          */
1920                         ifa = ifaof_ifpforaddr(SA(&sin6), ifp);
1921                         if (ifa == NULL)
1922                                 return (NULL);
1923
1924                         /*
1925                          * Create a new route.  RTF_LLINFO is necessary
1926                          * to create a Neighbor Cache entry for the
1927                          * destination in nd6_rtrequest which will be
1928                          * called in rtrequest via ifa->ifa_rtrequest.
1929                          */
1930                         if (!rt_locked)
1931                                 lck_mtx_lock(rnh_lock);
1932                         IFA_LOCK_SPIN(ifa);
1933                         ifa_flags = ifa->ifa_flags;
1934                         IFA_UNLOCK(ifa);
1935                         if ((e = rtrequest_scoped_locked(RTM_ADD,
1936                             SA(&sin6), ifa->ifa_addr, SA(&all1_sa),
1937                             (ifa_flags | RTF_HOST | RTF_LLINFO) &
1938                             ~RTF_CLONING, &rt, ifscope)) != 0) {
1939                                 if (e != EEXIST)
1940                                         log(LOG_ERR, "%s: failed to add route "
1941                                             "for a neighbor(%s), errno=%d\n",
1942                                             __func__, ip6_sprintf(addr6), e);
1943                         }
1944                         if (!rt_locked)
1945                                 lck_mtx_unlock(rnh_lock);
1946                         IFA_REMREF(ifa);
1947                         if (rt == NULL)
1948                                 return (NULL);
1949
1950                         RT_LOCK(rt);
1951                         if (rt->rt_llinfo) {
1952                                 struct llinfo_nd6 *ln = rt->rt_llinfo;
1953                                 struct nd_ifinfo *ndi = ND_IFINFO(rt->rt_ifp);
1954
1955                                 VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
1956                                 /*
1957                                  * For interface's that do not perform NUD
1958                                  * neighbor cache entres must always be marked
1959                                  * reachable with no expiry
1960                                  */
1961                                 if (ndi->flags & ND6_IFF_PERFORMNUD) {
1962                                         ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_NOSTATE);
1963                                 } else {
1964                                         ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_REACHABLE);
1965                                         ln_setexpire(ln, 0);
1966                                 }
1967                         }
1968                 } else {
1969                         return (NULL);
1970                 }
1971         }
1972         RT_LOCK_ASSERT_HELD(rt);
1973         /*
1974          * Validation for the entry.
1975          * Note that the check for rt_llinfo is necessary because a cloned
1976          * route from a parent route that has the L flag (e.g. the default
1977          * route to a p2p interface) may have the flag, too, while the
1978          * destination is not actually a neighbor.
1979          * XXX: we can't use rt->rt_ifp to check for the interface, since
1980          *      it might be the loopback interface if the entry is for our
1981          *      own address on a non-loopback interface. Instead, we should
1982          *      use rt->rt_ifa->ifa_ifp, which would specify the REAL
1983          *      interface.
1984          * Note also that ifa_ifp and ifp may differ when we connect two
1985          * interfaces to a same link, install a link prefix to an interface,
1986          * and try to install a neighbor cache on an interface that does not
1987          * have a route to the prefix.
1988          *
1989          * If the address is from a proxied prefix, the ifa_ifp and ifp might
1990          * not match, because nd6_na_input() could have modified the ifp
1991          * of the route to point to the interface where the NA arrived on,
1992          * hence the test for RTF_PROXY.
1993          */
1994         if ((rt->rt_flags & RTF_GATEWAY) || (rt->rt_flags & RTF_LLINFO) == 0 ||
1995             rt->rt_gateway->sa_family != AF_LINK || rt->rt_llinfo == NULL ||
1996             (ifp && rt->rt_ifa->ifa_ifp != ifp &&
1997             !(rt->rt_flags & RTF_PROXY))) {
1998                 RT_REMREF_LOCKED(rt);
1999                 RT_UNLOCK(rt);
2000                 if (create) {
2001                         log(LOG_DEBUG, "%s: failed to lookup %s "
2002                             "(if = %s)\n", __func__, ip6_sprintf(addr6),
2003                             ifp ? if_name(ifp) : "unspec");
2004                         /* xxx more logs... kazu */
2005                 }
2006                 return (NULL);
2007         }
2008         /*
2009          * Caller needs to release reference and call RT_UNLOCK(rt).
2010          */
2011         return (rt);
2012 }
2013
2014 /*
2015  * Test whether a given IPv6 address is a neighbor or not, ignoring
2016  * the actual neighbor cache.  The neighbor cache is ignored in order
2017  * to not reenter the routing code from within itself.
2018  */
2019 static int
2020 nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
2021 {
2022         struct nd_prefix *pr;
2023         struct ifaddr *dstaddr;
2024
2025         LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2026
2027         /*
2028          * A link-local address is always a neighbor.
2029          * XXX: a link does not necessarily specify a single interface.
2030          */
2031         if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
2032                 struct sockaddr_in6 sin6_copy;
2033                 u_int32_t zone;
2034
2035                 /*
2036                  * We need sin6_copy since sa6_recoverscope() may modify the
2037                  * content (XXX).
2038                  */
2039                 sin6_copy = *addr;
2040                 if (sa6_recoverscope(&sin6_copy, FALSE))
2041                         return (0); /* XXX: should be impossible */
2042                 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
2043                         return (0);
2044                 if (sin6_copy.sin6_scope_id == zone)
2045                         return (1);
2046                 else
2047                         return (0);
2048         }
2049
2050         /*
2051          * If the address matches one of our addresses,
2052          * it should be a neighbor.
2053          * If the address matches one of our on-link prefixes, it should be a
2054          * neighbor.
2055          */
2056         for (pr = nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
2057                 NDPR_LOCK(pr);
2058                 if (pr->ndpr_ifp != ifp) {
2059                         NDPR_UNLOCK(pr);
2060                         continue;
2061                 }
2062                 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) {
2063                         NDPR_UNLOCK(pr);
2064                         continue;
2065                 }
2066                 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
2067                     &addr->sin6_addr, &pr->ndpr_mask)) {
2068                         NDPR_UNLOCK(pr);
2069                         return (1);
2070                 }
2071                 NDPR_UNLOCK(pr);
2072         }
2073
2074         /*
2075          * If the address is assigned on the node of the other side of
2076          * a p2p interface, the address should be a neighbor.
2077          */
2078         dstaddr = ifa_ifwithdstaddr(SA(addr));
2079         if (dstaddr != NULL) {
2080                 if (dstaddr->ifa_ifp == ifp) {
2081                         IFA_REMREF(dstaddr);
2082                         return (1);
2083                 }
2084                 IFA_REMREF(dstaddr);
2085                 dstaddr = NULL;
2086         }
2087
2088         return (0);
2089 }
2090
2091
2092 /*
2093  * Detect if a given IPv6 address identifies a neighbor on a given link.
2094  * XXX: should take care of the destination of a p2p link?
2095  */
2096 int
2097 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp,
2098     int rt_locked)
2099 {
2100         struct rtentry *rt;
2101
2102         LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_NOTOWNED);
2103         lck_mtx_lock(nd6_mutex);
2104         if (nd6_is_new_addr_neighbor(addr, ifp)) {
2105                 lck_mtx_unlock(nd6_mutex);
2106                 return (1);
2107         }
2108         lck_mtx_unlock(nd6_mutex);
2109
2110         /*
2111          * Even if the address matches none of our addresses, it might be
2112          * in the neighbor cache.
2113          */
2114         if ((rt = nd6_lookup(&addr->sin6_addr, 0, ifp, rt_locked)) != NULL) {
2115                 RT_LOCK_ASSERT_HELD(rt);
2116                 RT_REMREF_LOCKED(rt);
2117                 RT_UNLOCK(rt);
2118                 return (1);
2119         }
2120
2121         return (0);
2122 }
2123
2124 /*
2125  * Free an nd6 llinfo entry.
2126  * Since the function would cause significant changes in the kernel, DO NOT
2127  * make it global, unless you have a strong reason for the change, and are sure
2128  * that the change is safe.
2129  */
2130 void
2131 nd6_free(struct rtentry *rt)
2132 {
2133         struct llinfo_nd6 *ln;
2134         struct in6_addr in6;
2135         struct nd_defrouter *dr;
2136
2137         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
2138         RT_LOCK_ASSERT_NOTHELD(rt);
2139         lck_mtx_lock(nd6_mutex);
2140
2141         RT_LOCK(rt);
2142         RT_ADDREF_LOCKED(rt);   /* Extra ref */
2143         ln = rt->rt_llinfo;
2144         in6 = SIN6(rt_key(rt))->sin6_addr;
2145
2146         /*
2147          * Prevent another thread from modifying rt_key, rt_gateway
2148          * via rt_setgate() after the rt_lock is dropped by marking
2149          * the route as defunct.
2150          */
2151         rt->rt_flags |= RTF_CONDEMNED;
2152
2153         /*
2154          * We used to have pfctlinput(PRC_HOSTDEAD) here.  Even though it is
2155          * not harmful, it was not really necessary.  Perform default router
2156          * selection even when we are a router, if Scoped Routing is enabled.
2157          */
2158         dr = defrouter_lookup(&SIN6(rt_key(rt))->sin6_addr, rt->rt_ifp);
2159
2160         if ((ln && ln->ln_router) || dr) {
2161                 /*
2162                  * rt6_flush must be called whether or not the neighbor
2163                  * is in the Default Router List.
2164                  * See a corresponding comment in nd6_na_input().
2165                  */
2166                 RT_UNLOCK(rt);
2167                 lck_mtx_unlock(nd6_mutex);
2168                 rt6_flush(&in6, rt->rt_ifp);
2169                 lck_mtx_lock(nd6_mutex);
2170         } else {
2171                 RT_UNLOCK(rt);
2172         }
2173
2174         if (dr) {
2175                 NDDR_REMREF(dr);
2176                 /*
2177                  * Unreachablity of a router might affect the default
2178                  * router selection and on-link detection of advertised
2179                  * prefixes.
2180                  */
2181
2182                 /*
2183                  * Temporarily fake the state to choose a new default
2184                  * router and to perform on-link determination of
2185                  * prefixes correctly.
2186                  * Below the state will be set correctly,
2187                  * or the entry itself will be deleted.
2188                  */
2189                 RT_LOCK_SPIN(rt);
2190                 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_INCOMPLETE);
2191
2192                 /*
2193                  * Since defrouter_select() does not affect the
2194                  * on-link determination and MIP6 needs the check
2195                  * before the default router selection, we perform
2196                  * the check now.
2197                  */
2198                 RT_UNLOCK(rt);
2199                 pfxlist_onlink_check();
2200
2201                 /*
2202                  * refresh default router list
2203                  */
2204                 defrouter_select(rt->rt_ifp);
2205         }
2206         RT_LOCK_ASSERT_NOTHELD(rt);
2207         lck_mtx_unlock(nd6_mutex);
2208         /*
2209          * Detach the route from the routing tree and the list of neighbor
2210          * caches, and disable the route entry not to be used in already
2211          * cached routes.
2212          */
2213         (void) rtrequest(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt), 0, NULL);
2214
2215         /* Extra ref held above; now free it */
2216         rtfree(rt);
2217 }
2218
2219 void
2220 nd6_rtrequest(int req, struct rtentry *rt, struct sockaddr *sa)
2221 {
2222 #pragma unused(sa)
2223         struct sockaddr *gate = rt->rt_gateway;
2224         struct llinfo_nd6 *ln = rt->rt_llinfo;
2225         static struct sockaddr_dl null_sdl =
2226             { .sdl_len = sizeof (null_sdl), .sdl_family = AF_LINK };
2227         struct ifnet *ifp = rt->rt_ifp;
2228         struct ifaddr *ifa;
2229         uint64_t timenow;
2230         char buf[MAX_IPv6_STR_LEN];
2231         struct nd_ifinfo *ndi = ND_IFINFO(rt->rt_ifp);
2232
2233         VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
2234         VERIFY(nd6_init_done);
2235         LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2236         RT_LOCK_ASSERT_HELD(rt);
2237
2238         /*
2239          * We have rnh_lock held, see if we need to schedule the timer;
2240          * we might do this again below during RTM_RESOLVE, but doing it
2241          * now handles all other cases.
2242          */
2243         if (nd6_sched_timeout_want)
2244                 nd6_sched_timeout(NULL, NULL);
2245
2246         if (rt->rt_flags & RTF_GATEWAY)
2247                 return;
2248
2249         if (!nd6_need_cache(ifp) && !(rt->rt_flags & RTF_HOST)) {
2250                 /*
2251                  * This is probably an interface direct route for a link
2252                  * which does not need neighbor caches (e.g. fe80::%lo0/64).
2253                  * We do not need special treatment below for such a route.
2254                  * Moreover, the RTF_LLINFO flag which would be set below
2255                  * would annoy the ndp(8) command.
2256                  */
2257                 return;
2258         }
2259
2260         if (req == RTM_RESOLVE) {
2261                 int no_nd_cache;
2262
2263                 if (!nd6_need_cache(ifp)) {     /* stf case */
2264                         no_nd_cache = 1;
2265                 } else {
2266                         struct sockaddr_in6 sin6;
2267
2268                         rtkey_to_sa6(rt, &sin6);
2269                         /*
2270                          * nd6_is_addr_neighbor() may call nd6_lookup(),
2271                          * therefore we drop rt_lock to avoid deadlock
2272                          * during the lookup.
2273                          */
2274                         RT_ADDREF_LOCKED(rt);
2275                         RT_UNLOCK(rt);
2276                         no_nd_cache = !nd6_is_addr_neighbor(&sin6, ifp, 1);
2277                         RT_LOCK(rt);
2278                         RT_REMREF_LOCKED(rt);
2279                 }
2280
2281                 /*
2282                  * FreeBSD and BSD/OS often make a cloned host route based
2283                  * on a less-specific route (e.g. the default route).
2284                  * If the less specific route does not have a "gateway"
2285                  * (this is the case when the route just goes to a p2p or an
2286                  * stf interface), we'll mistakenly make a neighbor cache for
2287                  * the host route, and will see strange neighbor solicitation
2288                  * for the corresponding destination.  In order to avoid the
2289                  * confusion, we check if the destination of the route is
2290                  * a neighbor in terms of neighbor discovery, and stop the
2291                  * process if not.  Additionally, we remove the LLINFO flag
2292                  * so that ndp(8) will not try to get the neighbor information
2293                  * of the destination.
2294                  */
2295                 if (no_nd_cache) {
2296                         rt->rt_flags &= ~RTF_LLINFO;
2297                         return;
2298                 }
2299         }
2300
2301         timenow = net_uptime();
2302
2303         switch (req) {
2304         case RTM_ADD:
2305                 /*
2306                  * There is no backward compatibility :)
2307                  *
2308                  * if ((rt->rt_flags & RTF_HOST) == 0 &&
2309                  *      SIN(rt_mask(rt))->sin_addr.s_addr != 0xffffffff)
2310                  *              rt->rt_flags |= RTF_CLONING;
2311                  */
2312                 if ((rt->rt_flags & RTF_CLONING) ||
2313                     ((rt->rt_flags & RTF_LLINFO) && ln == NULL)) {
2314                         /*
2315                          * Case 1: This route should come from a route to
2316                          * interface (RTF_CLONING case) or the route should be
2317                          * treated as on-link but is currently not
2318                          * (RTF_LLINFO && ln == NULL case).
2319                          */
2320                         if (rt_setgate(rt, rt_key(rt), SA(&null_sdl)) == 0) {
2321                                 gate = rt->rt_gateway;
2322                                 SDL(gate)->sdl_type = ifp->if_type;
2323                                 SDL(gate)->sdl_index = ifp->if_index;
2324                                 /*
2325                                  * In case we're called before 1.0 sec.
2326                                  * has elapsed.
2327                                  */
2328                                 if (ln != NULL) {
2329                                         ln_setexpire(ln,
2330                                             (ifp->if_eflags & IFEF_IPV6_ND6ALT)
2331                                             ? 0 : MAX(timenow, 1));
2332                                 }
2333                         }
2334                         if (rt->rt_flags & RTF_CLONING)
2335                                 break;
2336                 }
2337                 /*
2338                  * In IPv4 code, we try to annonuce new RTF_ANNOUNCE entry here.
2339                  * We don't do that here since llinfo is not ready yet.
2340                  *
2341                  * There are also couple of other things to be discussed:
2342                  * - unsolicited NA code needs improvement beforehand
2343                  * - RFC4861 says we MAY send multicast unsolicited NA
2344                  *   (7.2.6 paragraph 4), however, it also says that we
2345                  *   SHOULD provide a mechanism to prevent multicast NA storm.
2346                  *   we don't have anything like it right now.
2347                  *   note that the mechanism needs a mutual agreement
2348                  *   between proxies, which means that we need to implement
2349                  *   a new protocol, or a new kludge.
2350                  * - from RFC4861 6.2.4, host MUST NOT send an unsolicited RA.
2351                  *   we need to check ip6forwarding before sending it.
2352                  *   (or should we allow proxy ND configuration only for
2353                  *   routers?  there's no mention about proxy ND from hosts)
2354                  */
2355                 /* FALLTHROUGH */
2356         case RTM_RESOLVE:
2357                 if (!(ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK))) {
2358                         /*
2359                          * Address resolution isn't necessary for a point to
2360                          * point link, so we can skip this test for a p2p link.
2361                          */
2362                         if (gate->sa_family != AF_LINK ||
2363                             gate->sa_len < sizeof (null_sdl)) {
2364                                 /* Don't complain in case of RTM_ADD */
2365                                 if (req == RTM_RESOLVE) {
2366                                         log(LOG_ERR, "%s: route to %s has bad "
2367                                             "gateway address (sa_family %u "
2368                                             "sa_len %u) on %s\n", __func__,
2369                                             inet_ntop(AF_INET6,
2370                                             &SIN6(rt_key(rt))->sin6_addr, buf,
2371                                             sizeof (buf)), gate->sa_family,
2372                                             gate->sa_len, if_name(ifp));
2373                                 }
2374                                 break;
2375                         }
2376                         SDL(gate)->sdl_type = ifp->if_type;
2377                         SDL(gate)->sdl_index = ifp->if_index;
2378                 }
2379                 if (ln != NULL)
2380                         break;  /* This happens on a route change */
2381                 /*
2382                  * Case 2: This route may come from cloning, or a manual route
2383                  * add with a LL address.
2384                  */
2385                 rt->rt_llinfo = ln = nd6_llinfo_alloc(M_WAITOK);
2386                 if (ln == NULL)
2387                         break;
2388
2389                 nd6_allocated++;
2390                 rt->rt_llinfo_get_ri    = nd6_llinfo_get_ri;
2391                 rt->rt_llinfo_get_iflri = nd6_llinfo_get_iflri;
2392                 rt->rt_llinfo_purge     = nd6_llinfo_purge;
2393                 rt->rt_llinfo_free      = nd6_llinfo_free;
2394                 rt->rt_llinfo_refresh   = nd6_llinfo_refresh;
2395                 rt->rt_flags |= RTF_LLINFO;
2396                 ln->ln_rt = rt;
2397                 /* this is required for "ndp" command. - shin */
2398                 /*
2399                  * For interface's that do not perform NUD
2400                  * neighbor cache entries must always be marked
2401                  * reachable with no expiry
2402                  */
2403                 if ((req == RTM_ADD) ||
2404                     !(ndi->flags & ND6_IFF_PERFORMNUD)) {
2405                         /*
2406                          * gate should have some valid AF_LINK entry,
2407                          * and ln->ln_expire should have some lifetime
2408                          * which is specified by ndp command.
2409                          */
2410                         ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_REACHABLE);
2411                         ln_setexpire(ln, 0);
2412                 } else {
2413                         /*
2414                          * When req == RTM_RESOLVE, rt is created and
2415                          * initialized in rtrequest(), so rt_expire is 0.
2416                          */
2417                         ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_NOSTATE);
2418                         /* In case we're called before 1.0 sec. has elapsed */
2419                         ln_setexpire(ln, (ifp->if_eflags & IFEF_IPV6_ND6ALT) ?
2420                             0 : MAX(timenow, 1));
2421                 }
2422                 LN_INSERTHEAD(ln);
2423                 nd6_inuse++;
2424
2425                 /* We have at least one entry; arm the timer if not already */
2426                 nd6_sched_timeout(NULL, NULL);
2427
2428                 /*
2429                  * If we have too many cache entries, initiate immediate
2430                  * purging for some "less recently used" entries.  Note that
2431                  * we cannot directly call nd6_free() here because it would
2432                  * cause re-entering rtable related routines triggering an LOR
2433                  * problem.
2434                  */
2435                 if (ip6_neighborgcthresh > 0 &&
2436                     nd6_inuse >= ip6_neighborgcthresh) {
2437                         int i;
2438
2439                         for (i = 0; i < 10 && llinfo_nd6.ln_prev != ln; i++) {
2440                                 struct llinfo_nd6 *ln_end = llinfo_nd6.ln_prev;
2441                                 struct rtentry *rt_end = ln_end->ln_rt;
2442
2443                                 /* Move this entry to the head */
2444                                 RT_LOCK(rt_end);
2445                                 LN_DEQUEUE(ln_end);
2446                                 LN_INSERTHEAD(ln_end);
2447
2448                                 if (ln_end->ln_expire == 0) {
2449                                         RT_UNLOCK(rt_end);
2450                                         continue;
2451                                 }
2452                                 if (ln_end->ln_state > ND6_LLINFO_INCOMPLETE)
2453                                         ND6_CACHE_STATE_TRANSITION(ln_end, ND6_LLINFO_STALE);
2454                                 else
2455                                         ND6_CACHE_STATE_TRANSITION(ln_end, ND6_LLINFO_PURGE);
2456                                 ln_setexpire(ln_end, timenow);
2457                                 RT_UNLOCK(rt_end);
2458                         }
2459                 }
2460
2461                 /*
2462                  * check if rt_key(rt) is one of my address assigned
2463                  * to the interface.
2464                  */
2465                 ifa = (struct ifaddr *)in6ifa_ifpwithaddr(rt->rt_ifp,
2466                     &SIN6(rt_key(rt))->sin6_addr);
2467                 if (ifa != NULL) {
2468                         caddr_t macp = nd6_ifptomac(ifp);
2469                         ln_setexpire(ln, 0);
2470                         ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_REACHABLE);
2471                         if (macp != NULL) {
2472                                 Bcopy(macp, LLADDR(SDL(gate)), ifp->if_addrlen);
2473                                 SDL(gate)->sdl_alen = ifp->if_addrlen;
2474                         }
2475                         if (nd6_useloopback) {
2476                                 if (rt->rt_ifp != lo_ifp) {
2477                                         /*
2478                                          * Purge any link-layer info caching.
2479                                          */
2480                                         if (rt->rt_llinfo_purge != NULL)
2481                                                 rt->rt_llinfo_purge(rt);
2482
2483                                         /*
2484                                          * Adjust route ref count for the
2485                                          * interfaces.
2486                                          */
2487                                         if (rt->rt_if_ref_fn != NULL) {
2488                                                 rt->rt_if_ref_fn(lo_ifp, 1);
2489                                                 rt->rt_if_ref_fn(rt->rt_ifp,
2490                                                     -1);
2491                                         }
2492                                 }
2493                                 rt->rt_ifp = lo_ifp;
2494                                 /*
2495                                  * If rmx_mtu is not locked, update it
2496                                  * to the MTU used by the new interface.
2497                                  */
2498                                 if (!(rt->rt_rmx.rmx_locks & RTV_MTU))
2499                                         rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
2500                                 /*
2501                                  * Make sure rt_ifa be equal to the ifaddr
2502                                  * corresponding to the address.
2503                                  * We need this because when we refer
2504                                  * rt_ifa->ia6_flags in ip6_input, we assume
2505                                  * that the rt_ifa points to the address instead
2506                                  * of the loopback address.
2507                                  */
2508                                 if (ifa != rt->rt_ifa) {
2509                                         rtsetifa(rt, ifa);
2510                                 }
2511                         }
2512                         IFA_REMREF(ifa);
2513                 } else if (rt->rt_flags & RTF_ANNOUNCE) {
2514                         ln_setexpire(ln, 0);
2515                         ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_REACHABLE);
2516
2517                         /* join solicited node multicast for proxy ND */
2518                         if (ifp->if_flags & IFF_MULTICAST) {
2519                                 struct in6_addr llsol;
2520                                 struct in6_multi *in6m;
2521                                 int error;
2522
2523                                 llsol = SIN6(rt_key(rt))->sin6_addr;
2524                                 llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
2525                                 llsol.s6_addr32[1] = 0;
2526                                 llsol.s6_addr32[2] = htonl(1);
2527                                 llsol.s6_addr8[12] = 0xff;
2528                                 if (in6_setscope(&llsol, ifp, NULL))
2529                                         break;
2530                                 error = in6_mc_join(ifp, &llsol,
2531                                     NULL, &in6m, 0);
2532                                 if (error) {
2533                                         nd6log((LOG_ERR, "%s: failed to join "
2534                                             "%s (errno=%d)\n", if_name(ifp),
2535                                             ip6_sprintf(&llsol), error));
2536                                 } else {
2537                                         IN6M_REMREF(in6m);
2538                                 }
2539                         }
2540                 }
2541                 break;
2542
2543         case RTM_DELETE:
2544                 if (ln == NULL)
2545                         break;
2546                 /* leave from solicited node multicast for proxy ND */
2547                 if ((rt->rt_flags & RTF_ANNOUNCE) &&
2548                     (ifp->if_flags & IFF_MULTICAST)) {
2549                         struct in6_addr llsol;
2550                         struct in6_multi *in6m;
2551
2552                         llsol = SIN6(rt_key(rt))->sin6_addr;
2553                         llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
2554                         llsol.s6_addr32[1] = 0;
2555                         llsol.s6_addr32[2] = htonl(1);
2556                         llsol.s6_addr8[12] = 0xff;
2557                         if (in6_setscope(&llsol, ifp, NULL) == 0) {
2558                                 in6_multihead_lock_shared();
2559                                 IN6_LOOKUP_MULTI(&llsol, ifp, in6m);
2560                                 in6_multihead_lock_done();
2561                                 if (in6m != NULL) {
2562                                         in6_mc_leave(in6m, NULL);
2563                                         IN6M_REMREF(in6m);
2564                                 }
2565                         }
2566                 }
2567                 nd6_inuse--;
2568                 /*
2569                  * Unchain it but defer the actual freeing until the route
2570                  * itself is to be freed.  rt->rt_llinfo still points to
2571                  * llinfo_nd6, and likewise, ln->ln_rt stil points to this
2572                  * route entry, except that RTF_LLINFO is now cleared.
2573                  */
2574                 if (ln->ln_flags & ND6_LNF_IN_USE)
2575                         LN_DEQUEUE(ln);
2576
2577                 /*
2578                  * Purge any link-layer info caching.
2579                  */
2580                 if (rt->rt_llinfo_purge != NULL)
2581                         rt->rt_llinfo_purge(rt);
2582
2583                 rt->rt_flags &= ~RTF_LLINFO;
2584                 if (ln->ln_hold != NULL) {
2585                         m_freem_list(ln->ln_hold);
2586                         ln->ln_hold = NULL;
2587                 }
2588         }
2589 }
2590
2591 static int
2592 nd6_siocgdrlst(void *data, int data_is_64)
2593 {
2594         struct in6_drlist_32 *drl_32;
2595         struct nd_defrouter *dr;
2596         int i = 0;
2597
2598         LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2599
2600         dr = TAILQ_FIRST(&nd_defrouter);
2601
2602         /* XXX Handle mapped defrouter entries */
2603         /* For 64-bit process */
2604         if (data_is_64) {
2605                 struct in6_drlist_64 *drl_64;
2606
2607                 drl_64 = _MALLOC(sizeof (*drl_64), M_TEMP, M_WAITOK|M_ZERO);
2608                 if (drl_64 == NULL)
2609                         return (ENOMEM);
2610
2611                 /* preserve the interface name */
2612                 bcopy(data, drl_64, sizeof (drl_64->ifname));
2613
2614                 while (dr && i < DRLSTSIZ) {
2615                         drl_64->defrouter[i].rtaddr = dr->rtaddr;
2616                         if (IN6_IS_ADDR_LINKLOCAL(
2617                             &drl_64->defrouter[i].rtaddr)) {
2618                                 /* XXX: need to this hack for KAME stack */
2619                                 drl_64->defrouter[i].rtaddr.s6_addr16[1] = 0;
2620                         } else {
2621                                 log(LOG_ERR,
2622                                     "default router list contains a "
2623                                     "non-linklocal address(%s)\n",
2624                                     ip6_sprintf(&drl_64->defrouter[i].rtaddr));
2625                         }
2626                         drl_64->defrouter[i].flags = dr->flags;
2627                         drl_64->defrouter[i].rtlifetime = dr->rtlifetime;
2628                         drl_64->defrouter[i].expire = nddr_getexpire(dr);
2629                         drl_64->defrouter[i].if_index = dr->ifp->if_index;
2630                         i++;
2631                         dr = TAILQ_NEXT(dr, dr_entry);
2632                 }
2633                 bcopy(drl_64, data, sizeof (*drl_64));
2634                 _FREE(drl_64, M_TEMP);
2635                 return (0);
2636         }
2637
2638         /* For 32-bit process */
2639         drl_32 = _MALLOC(sizeof (*drl_32), M_TEMP, M_WAITOK|M_ZERO);
2640         if (drl_32 == NULL)
2641                 return (ENOMEM);
2642
2643         /* preserve the interface name */
2644         bcopy(data, drl_32, sizeof (drl_32->ifname));
2645
2646         while (dr != NULL && i < DRLSTSIZ) {
2647                 drl_32->defrouter[i].rtaddr = dr->rtaddr;
2648                 if (IN6_IS_ADDR_LINKLOCAL(&drl_32->defrouter[i].rtaddr)) {
2649                         /* XXX: need to this hack for KAME stack */
2650                         drl_32->defrouter[i].rtaddr.s6_addr16[1] = 0;
2651                 } else {
2652                         log(LOG_ERR,
2653                             "default router list contains a "
2654                             "non-linklocal address(%s)\n",
2655                             ip6_sprintf(&drl_32->defrouter[i].rtaddr));
2656                 }
2657                 drl_32->defrouter[i].flags = dr->flags;
2658                 drl_32->defrouter[i].rtlifetime = dr->rtlifetime;
2659                 drl_32->defrouter[i].expire = nddr_getexpire(dr);
2660                 drl_32->defrouter[i].if_index = dr->ifp->if_index;
2661                 i++;
2662                 dr = TAILQ_NEXT(dr, dr_entry);
2663         }
2664         bcopy(drl_32, data, sizeof (*drl_32));
2665         _FREE(drl_32, M_TEMP);
2666         return (0);
2667 }
2668
2669 /*
2670  * XXX meaning of fields, especialy "raflags", is very
2671  * differnet between RA prefix list and RR/static prefix list.
2672  * how about separating ioctls into two?
2673  */
2674 static int
2675 nd6_siocgprlst(void *data, int data_is_64)
2676 {
2677         struct in6_prlist_32 *prl_32;
2678         struct nd_prefix *pr;
2679         int i = 0;
2680
2681         LCK_MTX_ASSERT(nd6_mutex, LCK_MTX_ASSERT_OWNED);
2682
2683         pr = nd_prefix.lh_first;
2684
2685         /* XXX Handle mapped defrouter entries */
2686         /* For 64-bit process */
2687         if (data_is_64) {
2688                 struct in6_prlist_64 *prl_64;
2689
2690                 prl_64 = _MALLOC(sizeof (*prl_64), M_TEMP, M_WAITOK|M_ZERO);
2691                 if (prl_64 == NULL)
2692                         return (ENOMEM);
2693
2694                 /* preserve the interface name */
2695                 bcopy(data, prl_64, sizeof (prl_64->ifname));
2696
2697                 while (pr && i < PRLSTSIZ) {
2698                         struct nd_pfxrouter *pfr;
2699                         int j;
2700
2701                         NDPR_LOCK(pr);
2702                         (void) in6_embedscope(&prl_64->prefix[i].prefix,
2703                             &pr->ndpr_prefix, NULL, NULL, NULL);
2704                         prl_64->prefix[i].raflags = pr->ndpr_raf;
2705                         prl_64->prefix[i].prefixlen = pr->ndpr_plen;
2706                         prl_64->prefix[i].vltime = pr->ndpr_vltime;
2707                         prl_64->prefix[i].pltime = pr->ndpr_pltime;
2708                         prl_64->prefix[i].if_index = pr->ndpr_ifp->if_index;
2709                         prl_64->prefix[i].expire = ndpr_getexpire(pr);
2710
2711                         pfr = pr->ndpr_advrtrs.lh_first;
2712                         j = 0;
2713                         while (pfr) {
2714                                 if (j < DRLSTSIZ) {
2715 #define RTRADDR prl_64->prefix[i].advrtr[j]
2716                                         RTRADDR = pfr->router->rtaddr;
2717                                         if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
2718                                                 /* XXX: hack for KAME */
2719                                                 RTRADDR.s6_addr16[1] = 0;
2720                                         } else {
2721                                                 log(LOG_ERR,
2722                                                     "a router(%s) advertises "
2723                                                     "a prefix with "
2724                                                     "non-link local address\n",
2725                                                     ip6_sprintf(&RTRADDR));
2726                                         }
2727 #undef RTRADDR
2728                                 }
2729                                 j++;
2730                                 pfr = pfr->pfr_next;
2731                         }
2732                         prl_64->prefix[i].advrtrs = j;
2733                         prl_64->prefix[i].origin = PR_ORIG_RA;
2734                         NDPR_UNLOCK(pr);
2735
2736                         i++;
2737                         pr = pr->ndpr_next;
2738                 }
2739                 bcopy(prl_64, data, sizeof (*prl_64));
2740                 _FREE(prl_64, M_TEMP);
2741                 return (0);
2742         }
2743
2744         /* For 32-bit process */
2745         prl_32 = _MALLOC(sizeof (*prl_32), M_TEMP, M_WAITOK|M_ZERO);
2746         if (prl_32 == NULL)
2747                 return (ENOMEM);
2748
2749         /* preserve the interface name */
2750         bcopy(data, prl_32, sizeof (prl_32->ifname));
2751
2752         while (pr && i < PRLSTSIZ) {
2753                 struct nd_pfxrouter *pfr;
2754                 int j;
2755
2756                 NDPR_LOCK(pr);
2757                 (void) in6_embedscope(&prl_32->prefix[i].prefix,
2758                     &pr->ndpr_prefix, NULL, NULL, NULL);
2759                 prl_32->prefix[i].raflags = pr->ndpr_raf;
2760                 prl_32->prefix[i].prefixlen = pr->ndpr_plen;
2761                 prl_32->prefix[i].vltime = pr->ndpr_vltime;
2762                 prl_32->prefix[i].pltime = pr->ndpr_pltime;
2763                 prl_32->prefix[i].if_index = pr->ndpr_ifp->if_index;
2764                 prl_32->prefix[i].expire = ndpr_getexpire(pr);
2765
2766                 pfr = pr->ndpr_advrtrs.lh_first;
2767                 j = 0;
2768                 while (pfr) {
2769                         if (j < DRLSTSIZ) {
2770 #define RTRADDR prl_32->prefix[i].advrtr[j]
2771                                 RTRADDR = pfr->router->rtaddr;
2772                                 if (IN6_IS_ADDR_LINKLOCAL(&RTRADDR)) {
2773                                         /* XXX: hack for KAME */
2774                                         RTRADDR.s6_addr16[1] = 0;
2775                                 } else {
2776                                         log(LOG_ERR,
2777                                             "a router(%s) advertises "
2778                                             "a prefix with "
2779                                             "non-link local address\n",
2780                                             ip6_sprintf(&RTRADDR));
2781                                 }
2782 #undef RTRADDR
2783                         }
2784                         j++;
2785                         pfr = pfr->pfr_next;
2786                 }
2787                 prl_32->prefix[i].advrtrs = j;
2788                 prl_32->prefix[i].origin = PR_ORIG_RA;
2789                 NDPR_UNLOCK(pr);
2790
2791                 i++;
2792                 pr = pr->ndpr_next;
2793         }
2794         bcopy(prl_32, data, sizeof (*prl_32));
2795         _FREE(prl_32, M_TEMP);
2796         return (0);
2797 }
2798
2799 int
2800 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
2801 {
2802         struct nd_defrouter *dr;
2803         struct nd_prefix *pr;
2804         struct rtentry *rt;
2805         int error = 0;
2806
2807         VERIFY(ifp != NULL);
2808
2809         switch (cmd) {
2810         case SIOCGDRLST_IN6_32:         /* struct in6_drlist_32 */
2811         case SIOCGDRLST_IN6_64:         /* struct in6_drlist_64 */
2812                 /*
2813                  * obsolete API, use sysctl under net.inet6.icmp6
2814                  */
2815                 lck_mtx_lock(nd6_mutex);
2816                 error = nd6_siocgdrlst(data, cmd == SIOCGDRLST_IN6_64);
2817                 lck_mtx_unlock(nd6_mutex);
2818                 break;
2819
2820         case SIOCGPRLST_IN6_32:         /* struct in6_prlist_32 */
2821         case SIOCGPRLST_IN6_64:         /* struct in6_prlist_64 */
2822                 /*
2823                  * obsolete API, use sysctl under net.inet6.icmp6
2824                  */
2825                 lck_mtx_lock(nd6_mutex);
2826                 error = nd6_siocgprlst(data, cmd == SIOCGPRLST_IN6_64);
2827                 lck_mtx_unlock(nd6_mutex);
2828                 break;
2829
2830         case OSIOCGIFINFO_IN6:          /* struct in6_ondireq */
2831         case SIOCGIFINFO_IN6: {         /* struct in6_ondireq */
2832                 u_int32_t linkmtu;
2833                 struct in6_ondireq *ondi = (struct in6_ondireq *)(void *)data;
2834                 struct nd_ifinfo *ndi;
2835                 /*
2836                  * SIOCGIFINFO_IN6 ioctl is encoded with in6_ondireq
2837                  * instead of in6_ndireq, so we treat it as such.
2838                  */
2839                 ndi = ND_IFINFO(ifp);
2840                 if ((NULL == ndi) || (FALSE == ndi->initialized)){
2841                         error = EINVAL;
2842                         break;
2843                 }
2844                 lck_mtx_lock(&ndi->lock);
2845                 linkmtu = IN6_LINKMTU(ifp);
2846                 bcopy(&linkmtu, &ondi->ndi.linkmtu, sizeof (linkmtu));
2847                 bcopy(&ndi->maxmtu, &ondi->ndi.maxmtu,
2848                     sizeof (u_int32_t));
2849                 bcopy(&ndi->basereachable, &ondi->ndi.basereachable,
2850                     sizeof (u_int32_t));
2851                 bcopy(&ndi->reachable, &ondi->ndi.reachable,
2852                     sizeof (u_int32_t));
2853                 bcopy(&ndi->retrans, &ondi->ndi.retrans,
2854                     sizeof (u_int32_t));
2855                 bcopy(&ndi->flags, &ondi->ndi.flags,
2856                     sizeof (u_int32_t));
2857                 bcopy(&ndi->recalctm, &ondi->ndi.recalctm,
2858                     sizeof (int));
2859                 ondi->ndi.chlim = ndi->chlim;
2860                 ondi->ndi.receivedra = 0;
2861                 lck_mtx_unlock(&ndi->lock);
2862                 break;
2863         }
2864
2865         case SIOCSIFINFO_FLAGS: {       /* struct in6_ndireq */
2866                 /*
2867                  * XXX BSD has a bunch of checks here to ensure
2868                  * that interface disabled flag is not reset if
2869                  * link local address has failed DAD.
2870                  * Investigate that part.
2871                  */
2872                 struct in6_ndireq *cndi = (struct in6_ndireq *)(void *)data;
2873                 u_int32_t oflags, flags;
2874                 struct nd_ifinfo *ndi = ND_IFINFO(ifp);
2875
2876                 /* XXX: almost all other fields of cndi->ndi is unused */
2877                 if ((NULL == ndi) || !ndi->initialized) {
2878                         error = EINVAL;
2879                         break;
2880                 }
2881
2882                 lck_mtx_lock(&ndi->lock);
2883                 oflags = ndi->flags;
2884                 bcopy(&cndi->ndi.flags, &(ndi->flags), sizeof (flags));
2885                 flags = ndi->flags;
2886                 lck_mtx_unlock(&ndi->lock);
2887
2888                 if (oflags == flags) {
2889                         break;
2890                 }
2891
2892                 error = nd6_setifinfo(ifp, oflags, flags);
2893                 break;
2894         }
2895
2896         case SIOCSNDFLUSH_IN6:          /* struct in6_ifreq */
2897                 /* flush default router list */
2898                 /*
2899                  * xxx sumikawa: should not delete route if default
2900                  * route equals to the top of default router list
2901                  */
2902                 lck_mtx_lock(nd6_mutex);
2903                 defrouter_reset();
2904                 defrouter_select(ifp);
2905                 lck_mtx_unlock(nd6_mutex);
2906                 /* xxx sumikawa: flush prefix list */
2907                 break;
2908
2909         case SIOCSPFXFLUSH_IN6: {       /* struct in6_ifreq */
2910                 /* flush all the prefix advertised by routers */
2911                 struct nd_prefix *next = NULL;
2912
2913                 lck_mtx_lock(nd6_mutex);
2914                 for (pr = nd_prefix.lh_first; pr; pr = next) {
2915                         struct in6_ifaddr *ia = NULL;
2916                         bool iterate_pfxlist_again = false;
2917
2918                         next = pr->ndpr_next;
2919
2920                         NDPR_LOCK(pr);
2921                         if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) {
2922                                 NDPR_UNLOCK(pr);
2923                                 continue; /* XXX */
2924                         }
2925                         if (ifp != lo_ifp && pr->ndpr_ifp != ifp) {
2926                                 NDPR_UNLOCK(pr);
2927                                 continue;
2928                         }
2929                         /* do we really have to remove addresses as well? */
2930                         NDPR_ADDREF_LOCKED(pr);
2931                         NDPR_UNLOCK(pr);
2932                         lck_rw_lock_exclusive(&in6_ifaddr_rwlock);
2933                         ia = in6_ifaddrs;
2934                         while (ia != NULL) {
2935                                 IFA_LOCK(&ia->ia_ifa);
2936                                 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) {
2937                                         IFA_UNLOCK(&ia->ia_ifa);
2938                                         ia = ia->ia_next;
2939                                         continue;
2940                                 }
2941
2942                                 if (ia->ia6_ndpr == pr) {
2943                                         IFA_ADDREF_LOCKED(&ia->ia_ifa);
2944                                         IFA_UNLOCK(&ia->ia_ifa);
2945                                         lck_rw_done(&in6_ifaddr_rwlock);
2946                                         lck_mtx_unlock(nd6_mutex);
2947                                         in6_purgeaddr(&ia->ia_ifa);
2948                                         IFA_REMREF(&ia->ia_ifa);
2949                                         lck_mtx_lock(nd6_mutex);
2950                                         lck_rw_lock_exclusive(
2951                                             &in6_ifaddr_rwlock);
2952                                         /*
2953                                          * Purging the address caused
2954                                          * in6_ifaddr_rwlock to be
2955                                          * dropped and
2956                                          * reacquired; therefore search again
2957                                          * from the beginning of in6_ifaddrs.
2958                                          * The same applies for the prefix list.
2959                                          */
2960                                         ia = in6_ifaddrs;
2961                                         iterate_pfxlist_again = true;
2962                                         continue;
2963                                 }
2964                                 IFA_UNLOCK(&ia->ia_ifa);
2965                                 ia = ia->ia_next;
2966                         }
2967                         lck_rw_done(&in6_ifaddr_rwlock);
2968                         NDPR_LOCK(pr);
2969                         prelist_remove(pr);
2970                         NDPR_UNLOCK(pr);
2971                         pfxlist_onlink_check();
2972                         NDPR_REMREF(pr);
2973                         if (iterate_pfxlist_again) {
2974                                 next = nd_prefix.lh_first;
2975                         }
2976                 }
2977                 lck_mtx_unlock(nd6_mutex);
2978                 break;
2979         }
2980
2981         case SIOCSRTRFLUSH_IN6: {       /* struct in6_ifreq */
2982                 /* flush all the default routers */
2983                 struct nd_defrouter *next;
2984                 struct nd_drhead nd_defrouter_tmp;
2985
2986                 TAILQ_INIT(&nd_defrouter_tmp);
2987                 lck_mtx_lock(nd6_mutex);
2988                 if ((dr = TAILQ_FIRST(&nd_defrouter)) != NULL) {
2989                         /*
2990                          * The first entry of the list may be stored in
2991                          * the routing table, so we'll delete it later.
2992                          */
2993                         for (dr = TAILQ_NEXT(dr, dr_entry); dr; dr = next) {
2994                                 next = TAILQ_NEXT(dr, dr_entry);
2995                                 if (ifp == lo_ifp || dr->ifp == ifp) {
2996                                         /*
2997                                          * Remove the entry from default router list
2998                                          * and add it to the temp list.
2999                                          * nd_defrouter_tmp will be a local temporary
3000                                          * list as no one else can get the same
3001                                          * removed entry once it is removed from default
3002                                          * router list.
3003                                          * Remove the reference after calling defrtrlist_de
3004                                          */
3005                                         TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
3006                                         TAILQ_INSERT_TAIL(&nd_defrouter_tmp, dr, dr_entry);
3007                                 }
3008                         }
3009
3010                         dr = TAILQ_FIRST(&nd_defrouter);
3011                         if (ifp == lo_ifp ||
3012                             dr->ifp == ifp) {
3013                                 TAILQ_REMOVE(&nd_defrouter, dr, dr_entry);
3014                                 TAILQ_INSERT_TAIL(&nd_defrouter_tmp, dr, dr_entry);
3015                         }
3016                 }
3017
3018                 /*
3019                  * Keep the following separate from the above iteration of
3020                  * nd_defrouter because it's not safe to call
3021                  * defrtrlist_del while iterating global default
3022                  * router list. Global list has to be traversed
3023                  * while holding nd6_mutex throughout.
3024                  *
3025                  * The following call to defrtrlist_del should be
3026                  * safe as we are iterating a local list of
3027                  * default routers.
3028                  */
3029                 TAILQ_FOREACH_SAFE(dr, &nd_defrouter_tmp, dr_entry, next) {
3030                         TAILQ_REMOVE(&nd_defrouter_tmp, dr, dr_entry);
3031                         defrtrlist_del(dr);
3032                         NDDR_REMREF(dr);        /* remove list reference */
3033                 }
3034                 lck_mtx_unlock(nd6_mutex);
3035                 break;
3036         }
3037
3038         case SIOCGNBRINFO_IN6_32: {     /* struct in6_nbrinfo_32 */
3039                 struct llinfo_nd6 *ln;
3040                 struct in6_nbrinfo_32 nbi_32;
3041                 struct in6_addr nb_addr; /* make local for safety */
3042
3043                 bcopy(data, &nbi_32, sizeof (nbi_32));
3044                 nb_addr = nbi_32.addr;
3045                 /*
3046                  * XXX: KAME specific hack for scoped addresses
3047                  *      XXXX: for other scopes than link-local?
3048                  */
3049                 if (IN6_IS_ADDR_LINKLOCAL(&nbi_32.addr) ||
3050                     IN6_IS_ADDR_MC_LINKLOCAL(&nbi_32.addr)) {
3051                         u_int16_t *idp =
3052                             (u_int16_t *)(void *)&nb_addr.s6_addr[2];
3053
3054                         if (*idp == 0)
3055                                 *idp = htons(ifp->if_index);
3056                 }
3057
3058                 /* Callee returns a locked route upon success */
3059                 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
3060                         error = EINVAL;
3061                         break;
3062                 }
3063                 RT_LOCK_ASSERT_HELD(rt);
3064                 ln = rt->rt_llinfo;
3065                 nbi_32.state = ln->ln_state;
3066                 nbi_32.asked = ln->ln_asked;
3067                 nbi_32.isrouter = ln->ln_router;
3068                 nbi_32.expire = ln_getexpire(ln);
3069                 RT_REMREF_LOCKED(rt);
3070                 RT_UNLOCK(rt);
3071                 bcopy(&nbi_32, data, sizeof (nbi_32));
3072                 break;
3073         }
3074
3075         case SIOCGNBRINFO_IN6_64: {     /* struct in6_nbrinfo_64 */
3076                 struct llinfo_nd6 *ln;
3077                 struct in6_nbrinfo_64 nbi_64;
3078                 struct in6_addr nb_addr; /* make local for safety */
3079
3080                 bcopy(data, &nbi_64, sizeof (nbi_64));
3081                 nb_addr = nbi_64.addr;
3082                 /*
3083                  * XXX: KAME specific hack for scoped addresses
3084                  *      XXXX: for other scopes than link-local?
3085                  */
3086                 if (IN6_IS_ADDR_LINKLOCAL(&nbi_64.addr) ||
3087                     IN6_IS_ADDR_MC_LINKLOCAL(&nbi_64.addr)) {
3088                         u_int16_t *idp =
3089                             (u_int16_t *)(void *)&nb_addr.s6_addr[2];
3090
3091                         if (*idp == 0)
3092                                 *idp = htons(ifp->if_index);
3093                 }
3094
3095                 /* Callee returns a locked route upon success */
3096                 if ((rt = nd6_lookup(&nb_addr, 0, ifp, 0)) == NULL) {
3097                         error = EINVAL;
3098                         break;
3099                 }
3100                 RT_LOCK_ASSERT_HELD(rt);
3101                 ln = rt->rt_llinfo;
3102                 nbi_64.state = ln->ln_state;
3103                 nbi_64.asked = ln->ln_asked;
3104                 nbi_64.isrouter = ln->ln_router;
3105                 nbi_64.expire = ln_getexpire(ln);
3106                 RT_REMREF_LOCKED(rt);
3107                 RT_UNLOCK(rt);
3108                 bcopy(&nbi_64, data, sizeof (nbi_64));
3109                 break;
3110         }
3111
3112         case SIOCGDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
3113         case SIOCGDEFIFACE_IN6_64: {    /* struct in6_ndifreq_64 */
3114                 struct in6_ndifreq_64 *ndif_64 =
3115                     (struct in6_ndifreq_64 *)(void *)data;
3116                 struct in6_ndifreq_32 *ndif_32 =
3117                     (struct in6_ndifreq_32 *)(void *)data;
3118
3119                 if (cmd == SIOCGDEFIFACE_IN6_64) {
3120                         u_int64_t j = nd6_defifindex;
3121                         bcopy(&j, &ndif_64->ifindex, sizeof (j));
3122                 } else {
3123                         bcopy(&nd6_defifindex, &ndif_32->ifindex,
3124                             sizeof (u_int32_t));
3125                 }
3126                 break;
3127         }
3128
3129         case SIOCSDEFIFACE_IN6_32:      /* struct in6_ndifreq_32 */
3130         case SIOCSDEFIFACE_IN6_64: {    /* struct in6_ndifreq_64 */
3131                 struct in6_ndifreq_64 *ndif_64 =
3132                     (struct in6_ndifreq_64 *)(void *)data;
3133                 struct in6_ndifreq_32 *ndif_32 =
3134                     (struct in6_ndifreq_32 *)(void *)data;
3135                 u_int32_t idx;
3136
3137                 if (cmd == SIOCSDEFIFACE_IN6_64) {
3138                         u_int64_t j;
3139                         bcopy(&ndif_64->ifindex, &j, sizeof (j));
3140                         idx = (u_int32_t)j;
3141                 } else {
3142                         bcopy(&ndif_32->ifindex, &idx, sizeof (idx));
3143                 }
3144
3145                 error = nd6_setdefaultiface(idx);
3146                 return (error);
3147                 /* NOTREACHED */
3148         }
3149         case SIOCGIFCGAPREP_IN6:
3150         case SIOCSIFCGAPREP_IN6:
3151         {
3152                 struct in6_cgareq *p_cgareq =
3153                     (struct in6_cgareq *)(void *)data;
3154                 struct nd_ifinfo *ndi = ND_IFINFO(ifp);
3155
3156                 struct in6_cga_modifier *req_cga_mod =
3157                     &(p_cgareq->cgar_cgaprep.cga_modifier);
3158                 struct in6_cga_modifier *ndi_cga_mod = NULL;
3159
3160                 if ((NULL == ndi) || !ndi->initialized) {
3161                         error = EINVAL;
3162                         break;
3163                 }
3164
3165                 lck_mtx_lock(&ndi->lock);
3166                 ndi_cga_mod = &(ndi->local_cga_modifier);
3167
3168                 if (cmd == SIOCSIFCGAPREP_IN6) {
3169                         bcopy(req_cga_mod, ndi_cga_mod, sizeof(*ndi_cga_mod));
3170                         ndi->cga_initialized = TRUE;
3171                 } else
3172                         bcopy(ndi_cga_mod, req_cga_mod, sizeof(*req_cga_mod));
3173
3174                 lck_mtx_unlock(&ndi->lock);
3175                 return (error);
3176                 /* NOTREACHED */
3177         }
3178         }
3179         return (error);
3180 }
3181
3182 /*
3183  * Create neighbor cache entry and cache link-layer address,
3184  * on reception of inbound ND6 packets. (RS/RA/NS/redirect)
3185  */
3186 void
3187 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
3188     int lladdrlen, int type, int code)
3189 {
3190 #pragma unused(lladdrlen)
3191         struct rtentry *rt = NULL;
3192         struct llinfo_nd6 *ln = NULL;
3193         int is_newentry;
3194         struct sockaddr_dl *sdl = NULL;
3195         int do_update;
3196         int olladdr;
3197         int llchange;
3198         int newstate = 0;
3199         uint64_t timenow;
3200         boolean_t sched_timeout = FALSE;
3201         struct nd_ifinfo *ndi = NULL;
3202
3203         if (ifp == NULL)
3204                 panic("ifp == NULL in nd6_cache_lladdr");
3205         if (from == NULL)
3206                 panic("from == NULL in nd6_cache_lladdr");
3207
3208         /* nothing must be updated for unspecified address */
3209         if (IN6_IS_ADDR_UNSPECIFIED(from))
3210                 return;
3211
3212         /*
3213          * Validation about ifp->if_addrlen and lladdrlen must be done in
3214          * the caller.
3215          */
3216         timenow = net_uptime();
3217
3218         rt = nd6_lookup(from, 0, ifp, 0);
3219         if (rt == NULL) {
3220                 if ((rt = nd6_lookup(from, 1, ifp, 0)) == NULL)
3221                         return;
3222                 RT_LOCK_ASSERT_HELD(rt);
3223                 is_newentry = 1;
3224         } else {
3225                 RT_LOCK_ASSERT_HELD(rt);
3226                 /* do nothing if static ndp is set */
3227                 if (rt->rt_flags & RTF_STATIC) {
3228                         RT_REMREF_LOCKED(rt);
3229                         RT_UNLOCK(rt);
3230                         return;
3231                 }
3232                 is_newentry = 0;
3233         }
3234
3235         if (rt == NULL)
3236                 return;
3237         if ((rt->rt_flags & (RTF_GATEWAY | RTF_LLINFO)) != RTF_LLINFO) {
3238 fail:
3239                 RT_UNLOCK(rt);
3240                 nd6_free(rt);
3241                 rtfree(rt);
3242                 return;
3243         }
3244         ln = (struct llinfo_nd6 *)rt->rt_llinfo;
3245         if (ln == NULL)
3246                 goto fail;
3247         if (rt->rt_gateway == NULL)
3248                 goto fail;
3249         if (rt->rt_gateway->sa_family != AF_LINK)
3250                 goto fail;
3251         sdl = SDL(rt->rt_gateway);
3252
3253         olladdr = (sdl->sdl_alen) ? 1 : 0;
3254         if (olladdr && lladdr) {
3255                 if (bcmp(lladdr, LLADDR(sdl), ifp->if_addrlen))
3256                         llchange = 1;
3257                 else
3258                         llchange = 0;
3259         } else
3260                 llchange = 0;
3261
3262         /*
3263          * newentry olladdr  lladdr  llchange   (*=record)
3264          *      0       n       n       --      (1)
3265          *      0       y       n       --      (2)
3266          *      0       n       y       --      (3) * STALE
3267          *      0       y       y       n       (4) *
3268          *      0       y       y       y       (5) * STALE
3269          *      1       --      n       --      (6)   NOSTATE(= PASSIVE)
3270          *      1       --      y       --      (7) * STALE
3271          */
3272
3273         if (lladdr != NULL) {           /* (3-5) and (7) */
3274                 /*
3275                  * Record source link-layer address
3276                  * XXX is it dependent to ifp->if_type?
3277                  */
3278                 sdl->sdl_alen = ifp->if_addrlen;
3279                 bcopy(lladdr, LLADDR(sdl), ifp->if_addrlen);
3280
3281                 /* cache the gateway (sender HW) address */
3282                 nd6_llreach_alloc(rt, ifp, LLADDR(sdl), sdl->sdl_alen, FALSE);
3283         }
3284
3285         if (is_newentry == 0) {
3286                 if ((!olladdr && lladdr != NULL) ||     /* (3) */
3287                     (olladdr && lladdr != NULL && llchange)) {  /* (5) */
3288                         do_update = 1;
3289                         newstate = ND6_LLINFO_STALE;
3290                 } else                                  /* (1-2,4) */
3291                         do_update = 0;
3292         } else {
3293                 do_update = 1;
3294                 if (lladdr == NULL)                     /* (6) */
3295                         newstate = ND6_LLINFO_NOSTATE;
3296                 else                                    /* (7) */
3297                         newstate = ND6_LLINFO_STALE;
3298         }
3299
3300         /*
3301          * For interface's that do not perform NUD
3302          * neighbor cache entres must always be marked
3303          * reachable with no expiry
3304          */
3305         ndi = ND_IFINFO(ifp);
3306         VERIFY((NULL != ndi) && (TRUE == ndi->initialized));
3307
3308         if (ndi && !(ndi->flags & ND6_IFF_PERFORMNUD)) {
3309                 newstate = ND6_LLINFO_REACHABLE;
3310                 ln_setexpire(ln, 0);
3311         }
3312
3313         if (do_update) {
3314                 /*
3315                  * Update the state of the neighbor cache.
3316                  */
3317                 ND6_CACHE_STATE_TRANSITION(ln, newstate);
3318
3319                 if ((ln->ln_state == ND6_LLINFO_STALE) ||
3320                    (ln->ln_state == ND6_LLINFO_REACHABLE)) {
3321                         struct mbuf *m = ln->ln_hold;
3322                         /*
3323                          * XXX: since nd6_output() below will cause
3324                          * state tansition to DELAY and reset the timer,
3325                          * we must set the timer now, although it is actually
3326                          * meaningless.
3327                          */
3328                         if (ln->ln_state == ND6_LLINFO_STALE)
3329                                 ln_setexpire(ln, timenow + nd6_gctimer);
3330
3331                         ln->ln_hold = NULL;
3332                         if (m != NULL) {
3333                                 struct sockaddr_in6 sin6;
3334
3335                                 rtkey_to_sa6(rt, &sin6);
3336                                 /*
3337                                  * we assume ifp is not a p2p here, so just
3338                                  * set the 2nd argument as the 1st one.
3339                                  */
3340                                 RT_UNLOCK(rt);
3341                                 nd6_output_list(ifp, ifp, m, &sin6, rt, NULL);
3342                                 RT_LOCK(rt);
3343                         }
3344                 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
3345                         /* probe right away */
3346                         ln_setexpire(ln, timenow);
3347                         sched_timeout = TRUE;
3348                 }
3349         }
3350
3351         /*
3352          * ICMP6 type dependent behavior.
3353          *
3354          * NS: clear IsRouter if new entry
3355          * RS: clear IsRouter
3356          * RA: set IsRouter if there's lladdr
3357          * redir: clear IsRouter if new entry
3358          *
3359          * RA case, (1):
3360          * The spec says that we must set IsRouter in the following cases:
3361          * - If lladdr exist, set IsRouter.  This means (1-5).
3362          * - If it is old entry (!newentry), set IsRouter.  This means (7).
3363          * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
3364          * A quetion arises for (1) case.  (1) case has no lladdr in the
3365          * neighbor cache, this is similar to (6).
3366          * This case is rare but we figured that we MUST NOT set IsRouter.
3367          *
3368          * newentry olladdr  lladdr  llchange       NS  RS      RA      redir
3369          *                                                              D R
3370          *      0       n       n       --      (1)     c       ?       s
3371          *      0       y       n       --      (2)     c       s       s
3372          *      0       n       y       --      (3)     c       s       s
3373          *      0       y       y       n       (4)     c       s       s
3374          *      0       y       y       y       (5)     c       s       s
3375          *      1       --      n       --      (6) c   c               c s
3376          *      1       --      y       --      (7) c   c       s       c s
3377          *
3378          *                                      (c=clear s=set)
3379          */
3380         switch (type & 0xff) {
3381         case ND_NEIGHBOR_SOLICIT:
3382                 /*
3383                  * New entry must have is_router flag cleared.
3384                  */
3385                 if (is_newentry)        /* (6-7) */
3386                         ln->ln_router = 0;
3387                 break;
3388         case ND_REDIRECT:
3389                 /*
3390                  * If the ICMP message is a Redirect to a better router, always
3391                  * set the is_router flag.  Otherwise, if the entry is newly
3392                  * created, then clear the flag.  [RFC 4861, sec 8.3]
3393                  */
3394                 if (code == ND_REDIRECT_ROUTER)
3395                         ln->ln_router = 1;
3396                 else if (is_newentry) /* (6-7) */
3397                         ln->ln_router = 0;
3398                 break;
3399         case ND_ROUTER_SOLICIT:
3400                 /*
3401                  * is_router flag must always be cleared.
3402                  */
3403                 ln->ln_router = 0;
3404                 break;
3405         case ND_ROUTER_ADVERT:
3406                 /*
3407                  * Mark an entry with lladdr as a router.
3408                  */
3409                 if ((!is_newentry && (olladdr || lladdr)) ||    /* (2-5) */
3410                     (is_newentry && lladdr)) {                  /* (7) */
3411                         ln->ln_router = 1;
3412                 }
3413                 break;
3414         }
3415
3416         if (do_update) {
3417                 int route_ev_code = 0;
3418
3419                 if (llchange)
3420                         route_ev_code = ROUTE_LLENTRY_CHANGED;
3421                 else
3422                         route_ev_code = ROUTE_LLENTRY_RESOLVED;
3423
3424                 /* Enqueue work item to invoke callback for this route entry */
3425                 route_event_enqueue_nwk_wq_entry(rt, NULL, route_ev_code, NULL, TRUE);
3426
3427                 if (ln->ln_router || (rt->rt_flags & RTF_ROUTER)) {
3428                         struct radix_node_head  *rnh = NULL;
3429                         struct route_event rt_ev;
3430                         route_event_init(&rt_ev, rt, NULL, llchange ? ROUTE_LLENTRY_CHANGED :
3431                             ROUTE_LLENTRY_RESOLVED);
3432                         /*
3433                          * We already have a valid reference on rt.
3434                          * The function frees that before returning.
3435                          * We therefore don't need an extra reference here
3436                          */
3437                         RT_UNLOCK(rt);
3438                         lck_mtx_lock(rnh_lock);
3439
3440                         rnh = rt_tables[AF_INET6];
3441                         if (rnh != NULL)
3442                                 (void) rnh->rnh_walktree(rnh, route_event_walktree,
3443                                     (void *)&rt_ev);
3444                         lck_mtx_unlock(rnh_lock);
3445                         RT_LOCK(rt);
3446                 }
3447         }
3448
3449         /*
3450          * When the link-layer address of a router changes, select the
3451          * best router again.  In particular, when the neighbor entry is newly
3452          * created, it might affect the selection policy.
3453          * Question: can we restrict the first condition to the "is_newentry"
3454          * case?
3455          *
3456          * Note: Perform default router selection even when we are a router,
3457          * if Scoped Routing is enabled.
3458          */
3459         if (do_update && ln->ln_router) {
3460                 RT_REMREF_LOCKED(rt);
3461                 RT_UNLOCK(rt);
3462                 lck_mtx_lock(nd6_mutex);
3463                 defrouter_select(ifp);
3464                 lck_mtx_unlock(nd6_mutex);
3465         } else {
3466                 RT_REMREF_LOCKED(rt);
3467                 RT_UNLOCK(rt);
3468         }
3469         if (sched_timeout) {
3470                 lck_mtx_lock(rnh_lock);
3471                 nd6_sched_timeout(NULL, NULL);
3472                 lck_mtx_unlock(rnh_lock);
3473         }
3474 }
3475
3476 static void
3477 nd6_slowtimo(void *arg)
3478 {
3479 #pragma unused(arg)
3480         struct nd_ifinfo *nd6if = NULL;
3481         struct ifnet *ifp = NULL;
3482
3483         ifnet_head_lock_shared();
3484         for (ifp = ifnet_head.tqh_first; ifp;
3485             ifp = ifp->if_link.tqe_next) {
3486                 nd6if = ND_IFINFO(ifp);
3487                 if ((NULL == nd6if) || (FALSE == nd6if->initialized)) {
3488                         continue;
3489                 }
3490
3491                 lck_mtx_lock(&nd6if->lock);
3492                 if (nd6if->basereachable && /* already initialized */
3493                     (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
3494                         /*
3495                          * Since reachable time rarely changes by router
3496                          * advertisements, we SHOULD insure that a new random
3497                          * value gets recomputed at least once every few hours.
3498                          * (RFC 4861, 6.3.4)
3499                          */
3500                         nd6if->recalctm = nd6_recalc_reachtm_interval;
3501                         nd6if->reachable =
3502                             ND_COMPUTE_RTIME(nd6if->basereachable);
3503                 }
3504                 lck_mtx_unlock(&nd6if->lock);
3505         }
3506         ifnet_head_done();
3507         timeout(nd6_slowtimo, NULL, ND6_SLOWTIMER_INTERVAL * hz);
3508 }
3509
3510 int
3511 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
3512     struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv)
3513 {
3514         return nd6_output_list(ifp, origifp, m0, dst, hint0, adv);
3515 }
3516
3517 /*
3518  * nd6_output_list()
3519  *
3520  * Assumption: route determination for first packet can be correctly applied to
3521  * all packets in the chain.
3522  */
3523 #define senderr(e) { error = (e); goto bad; }
3524 int
3525 nd6_output_list(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
3526     struct sockaddr_in6 *dst, struct rtentry *hint0, struct flowadv *adv)
3527 {
3528         struct rtentry *rt = hint0, *hint = hint0;
3529         struct llinfo_nd6 *ln = NULL;
3530         int error = 0;
3531         uint64_t timenow;
3532         struct rtentry *rtrele = NULL;
3533         struct nd_ifinfo *ndi = NULL;
3534
3535         if (rt != NULL) {
3536                 RT_LOCK_SPIN(rt);
3537                 RT_ADDREF_LOCKED(rt);
3538         }
3539
3540         if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr) || !nd6_need_cache(ifp)) {
3541                 if (rt != NULL)
3542                         RT_UNLOCK(rt);
3543                 goto sendpkt;
3544         }
3545
3546         /*
3547          * Next hop determination.  Because we may involve the gateway route
3548          * in addition to the original route, locking is rather complicated.
3549          * The general concept is that regardless of whether the route points
3550          * to the original route or to the gateway route, this routine takes
3551          * an extra reference on such a route.  This extra reference will be
3552          * released at the end.
3553          *
3554          * Care must be taken to ensure that the "hint0" route never gets freed
3555          * via rtfree(), since the caller may have stored it inside a struct
3556          * route with a reference held for that placeholder.
3557          *
3558          * This logic is similar to, though not exactly the same as the one
3559          * used by route_to_gwroute().
3560          */
3561         if (rt != NULL) {
3562                 /*
3563                  * We have a reference to "rt" by now (or below via rtalloc1),
3564                  * which will either be released or freed at the end of this
3565                  * routine.
3566                  */
3567                 RT_LOCK_ASSERT_HELD(rt);
3568                 if (!(rt->rt_flags & RTF_UP)) {
3569                         RT_REMREF_LOCKED(rt);
3570                         RT_UNLOCK(rt);
3571                         if ((hint = rt = rtalloc1_scoped(SA(dst), 1, 0,
3572                             ifp->if_index)) != NULL) {
3573                                 RT_LOCK_SPIN(rt);
3574                                 if (rt->rt_ifp != ifp) {
3575                                         /* XXX: loop care? */
3576                                         RT_UNLOCK(rt);
3577                                         error = nd6_output_list(ifp, origifp, m0,
3578                                             dst, rt, adv);
3579                                         rtfree(rt);
3580                                         return (error);
3581                                 }
3582                         } else {
3583                                 senderr(EHOSTUNREACH);
3584                         }
3585                 }
3586
3587                 if (rt->rt_flags & RTF_GATEWAY) {
3588                         struct rtentry *gwrt;
3589                         struct in6_ifaddr *ia6 = NULL;
3590                         struct sockaddr_in6 gw6;
3591
3592                         rtgw_to_sa6(rt, &gw6);
3593                         /*
3594                          * Must drop rt_lock since nd6_is_addr_neighbor()
3595                          * calls nd6_lookup() and acquires rnh_lock.
3596                          */
3597                         RT_UNLOCK(rt);
3598
3599                         /*
3600                          * We skip link-layer address resolution and NUD
3601                          * if the gateway is not a neighbor from ND point
3602                          * of view, regardless of the value of nd_ifinfo.flags.
3603                          * The second condition is a bit tricky; we skip
3604                          * if the gateway is our own address, which is
3605                          * sometimes used to install a route to a p2p link.
3606                          */
3607                         if (!nd6_is_addr_neighbor(&gw6, ifp, 0) ||
3608                             (ia6 = in6ifa_ifpwithaddr(ifp, &gw6.sin6_addr))) {
3609                                 /*
3610                                  * We allow this kind of tricky route only
3611                                  * when the outgoing interface is p2p.
3612                                  * XXX: we may need a more generic rule here.
3613                                  */
3614                                 if (ia6 != NULL)
3615                                         IFA_REMREF(&ia6->ia_ifa);
3616                                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
3617                                         senderr(EHOSTUNREACH);
3618                                 goto sendpkt;
3619                         }
3620
3621                         RT_LOCK_SPIN(rt);
3622                         gw6 = *(SIN6(rt->rt_gateway));
3623
3624                         /* If hint is now down, give up */
3625                         if (!(rt->rt_flags & RTF_UP)) {
3626                                 RT_UNLOCK(rt);
3627                                 senderr(EHOSTUNREACH);
3628                         }
3629
3630                         /* If there's no gateway route, look it up */
3631                         if ((gwrt = rt->rt_gwroute) == NULL) {
3632                                 RT_UNLOCK(rt);
3633                                 goto lookup;
3634                         }
3635                         /* Become a regular mutex */
3636                         RT_CONVERT_LOCK(rt);
3637
3638                         /*
3639                          * Take gwrt's lock while holding route's lock;
3640                          * this is okay since gwrt never points back
3641                          * to rt, so no lock ordering issues.
3642                          */
3643                         RT_LOCK_SPIN(gwrt);
3644                         if (!(gwrt->rt_flags & RTF_UP)) {
3645                                 rt->rt_gwroute = NULL;
3646                                 RT_UNLOCK(gwrt);
3647                                 RT_UNLOCK(rt);
3648                                 rtfree(gwrt);
3649 lookup:
3650                                 lck_mtx_lock(rnh_lock);
3651                                 gwrt = rtalloc1_scoped_locked(SA(&gw6), 1, 0,
3652                                     ifp->if_index);
3653
3654                                 RT_LOCK(rt);
3655                                 /*
3656                                  * Bail out if the route is down, no route
3657                                  * to gateway, circular route, or if the
3658                                  * gateway portion of "rt" has changed.
3659                                  */
3660                                 if (!(rt->rt_flags & RTF_UP) ||
3661                                     gwrt == NULL || gwrt == rt ||
3662                                     !equal(SA(&gw6), rt->rt_gateway)) {
3663                                         if (gwrt == rt) {
3664                                                 RT_REMREF_LOCKED(gwrt);
3665                                                 gwrt = NULL;
3666                                         }
3667                                         RT_UNLOCK(rt);
3668                                         if (gwrt != NULL)
3669                                                 rtfree_locked(gwrt);
3670                                         lck_mtx_unlock(rnh_lock);
3671                                         senderr(EHOSTUNREACH);
3672                                 }
3673                                 VERIFY(gwrt != NULL);
3674                                 /*
3675                                  * Set gateway route; callee adds ref to gwrt;
3676                                  * gwrt has an extra ref from rtalloc1() for
3677                                  * this routine.
3678                                  */
3679                                 rt_set_gwroute(rt, rt_key(rt), gwrt);
3680                                 RT_UNLOCK(rt);
3681                                 lck_mtx_unlock(rnh_lock);
3682                                 /* Remember to release/free "rt" at the end */
3683                                 rtrele = rt;
3684                                 rt = gwrt;
3685                         } else {
3686                                 RT_ADDREF_LOCKED(gwrt);
3687                                 RT_UNLOCK(gwrt);
3688                                 RT_UNLOCK(rt);
3689                                 /* Remember to release/free "rt" at the end */
3690                                 rtrele = rt;
3691                                 rt = gwrt;
3692                         }
3693                         VERIFY(rt == gwrt);
3694
3695                         /*
3696                          * This is an opportunity to revalidate the parent
3697                          * route's gwroute, in case it now points to a dead
3698                          * route entry.  Parent route won't go away since the
3699                          * clone (hint) holds a reference to it.  rt == gwrt.
3700                          */
3701                         RT_LOCK_SPIN(hint);
3702                         if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) ==
3703                             (RTF_WASCLONED | RTF_UP)) {
3704                                 struct rtentry *prt = hint->rt_parent;
3705                                 VERIFY(prt != NULL);
3706
3707                                 RT_CONVERT_LOCK(hint);
3708                                 RT_ADDREF(prt);
3709                                 RT_UNLOCK(hint);
3710                                 rt_revalidate_gwroute(prt, rt);
3711                                 RT_REMREF(prt);
3712                         } else {
3713                                 RT_UNLOCK(hint);
3714                         }
3715
3716                         RT_LOCK_SPIN(rt);
3717                         /* rt == gwrt; if it is now down, give up */
3718                         if (!(rt->rt_flags & RTF_UP)) {
3719                                 RT_UNLOCK(rt);
3720                                 rtfree(rt);
3721                                 rt = NULL;
3722                                 /* "rtrele" == original "rt" */
3723                                 senderr(EHOSTUNREACH);
3724                         }
3725                 }
3726
3727                 /* Become a regular mutex */
3728                 RT_CONVERT_LOCK(rt);
3729         }
3730
3731         /*
3732          * Address resolution or Neighbor Unreachability Detection
3733          * for the next hop.
3734          * At this point, the destination of the packet must be a unicast
3735          * or an anycast address(i.e. not a multicast).
3736          */
3737
3738         /* Look up the neighbor cache for the nexthop */
3739         if (rt && (rt->rt_flags & RTF_LLINFO) != 0) {
3740                 ln = rt->rt_llinfo;
3741         } else {
3742                 struct sockaddr_in6 sin6;
3743                 /*
3744                  * Clear out Scope ID field in case it is set.
3745                  */
3746                 sin6 = *dst;
3747                 sin6.sin6_scope_id = 0;
3748                 /*
3749                  * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
3750                  * the condition below is not very efficient.  But we believe
3751                  * it is tolerable, because this should be a rare case.
3752                  * Must drop rt_lock since nd6_is_addr_neighbor() calls
3753                  * nd6_lookup() and acquires rnh_lock.
3754                  */
3755                 if (rt != NULL)
3756                         RT_UNLOCK(rt);
3757                 if (nd6_is_addr_neighbor(&sin6, ifp, 0)) {
3758                         /* "rtrele" may have been used, so clean up "rt" now */
3759                         if (rt != NULL) {
3760                                 /* Don't free "hint0" */
3761                                 if (rt == hint0)
3762                                         RT_REMREF(rt);
3763                                 else
3764                                         rtfree(rt);
3765                         }
3766                         /* Callee returns a locked route upon success */
3767                         rt = nd6_lookup(&dst->sin6_addr, 1, ifp, 0);
3768                         if (rt != NULL) {
3769                                 RT_LOCK_ASSERT_HELD(rt);
3770                                 ln = rt->rt_llinfo;
3771                         }
3772                 } else if (rt != NULL) {
3773                         RT_LOCK(rt);
3774                 }
3775         }
3776
3777         if (!ln || !rt) {
3778                 if (rt != NULL) {
3779                         RT_UNLOCK(rt);
3780                 }
3781                 ndi = ND_IFINFO(ifp);
3782                 VERIFY(ndi != NULL && ndi->initialized);
3783                 lck_mtx_lock(&ndi->lock);
3784                 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
3785                     !(ndi->flags & ND6_IFF_PERFORMNUD)) {
3786                         lck_mtx_unlock(&ndi->lock);
3787                         log(LOG_DEBUG,
3788                             "nd6_output: can't allocate llinfo for %s "
3789                             "(ln=0x%llx, rt=0x%llx)\n",
3790                             ip6_sprintf(&dst->sin6_addr),
3791                             (uint64_t)VM_KERNEL_ADDRPERM(ln),
3792                             (uint64_t)VM_KERNEL_ADDRPERM(rt));
3793                         senderr(EIO);   /* XXX: good error? */
3794                 }
3795                 lck_mtx_unlock(&ndi->lock);
3796
3797                 goto sendpkt;   /* send anyway */
3798         }
3799
3800         net_update_uptime();
3801         timenow = net_uptime();
3802
3803         /* We don't have to do link-layer address resolution on a p2p link. */
3804         if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
3805             ln->ln_state < ND6_LLINFO_REACHABLE) {
3806                 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_STALE);
3807                 ln_setexpire(ln, timenow + nd6_gctimer);
3808         }
3809
3810         /*
3811          * The first time we send a packet to a neighbor whose entry is
3812          * STALE, we have to change the state to DELAY and a sets a timer to
3813          * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
3814          * neighbor unreachability detection on expiration.
3815          * (RFC 4861 7.3.3)
3816          */
3817         if (ln->ln_state == ND6_LLINFO_STALE) {
3818                 ln->ln_asked = 0;
3819                 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_DELAY);
3820                 ln_setexpire(ln, timenow + nd6_delay);
3821                 /* N.B.: we will re-arm the timer below. */
3822                 _CASSERT(ND6_LLINFO_DELAY > ND6_LLINFO_INCOMPLETE);
3823         }
3824
3825         /*
3826          * If the neighbor cache entry has a state other than INCOMPLETE
3827          * (i.e. its link-layer address is already resolved), just
3828          * send the packet.
3829          */
3830         if (ln->ln_state > ND6_LLINFO_INCOMPLETE) {
3831                 RT_UNLOCK(rt);
3832                 /*
3833                  * Move this entry to the head of the queue so that it is
3834                  * less likely for this entry to be a target of forced
3835                  * garbage collection (see nd6_rtrequest()).  Do this only
3836                  * if the entry is non-permanent (as permanent ones will
3837                  * never be purged), and if the number of active entries
3838                  * is at least half of the threshold.
3839                  */
3840                 if (ln->ln_state == ND6_LLINFO_DELAY ||
3841                     (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3842                     nd6_inuse >= (ip6_neighborgcthresh >> 1))) {
3843                         lck_mtx_lock(rnh_lock);
3844                         if (ln->ln_state == ND6_LLINFO_DELAY)
3845                                 nd6_sched_timeout(NULL, NULL);
3846                         if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3847                             nd6_inuse >= (ip6_neighborgcthresh >> 1)) {
3848                                 RT_LOCK_SPIN(rt);
3849                                 if (ln->ln_flags & ND6_LNF_IN_USE) {
3850                                         LN_DEQUEUE(ln);
3851                                         LN_INSERTHEAD(ln);
3852                                 }
3853                                 RT_UNLOCK(rt);
3854                         }
3855                         lck_mtx_unlock(rnh_lock);
3856                 }
3857                 goto sendpkt;
3858         }
3859
3860         /*
3861          * If this is a prefix proxy route, record the inbound interface
3862          * so that it can be excluded from the list of interfaces eligible
3863          * for forwarding the proxied NS in nd6_prproxy_ns_output().
3864          */
3865         if (rt->rt_flags & RTF_PROXY)
3866                 ln->ln_exclifp = ((origifp == ifp) ? NULL : origifp);
3867
3868         /*
3869          * There is a neighbor cache entry, but no ethernet address
3870          * response yet.  Replace the held mbuf (if any) with this
3871          * latest one.
3872          *
3873          * This code conforms to the rate-limiting rule described in Section
3874          * 7.2.2 of RFC 4861, because the timer is set correctly after sending
3875          * an NS below.
3876          */
3877         if (ln->ln_state == ND6_LLINFO_NOSTATE)
3878                 ND6_CACHE_STATE_TRANSITION(ln, ND6_LLINFO_INCOMPLETE);
3879         if (ln->ln_hold)
3880                 m_freem_list(ln->ln_hold);
3881         ln->ln_hold = m0;
3882         if (!ND6_LLINFO_PERMANENT(ln) && ln->ln_asked == 0) {
3883                 ln->ln_asked++;
3884                 ndi = ND_IFINFO(ifp);
3885                 VERIFY(ndi != NULL && ndi->initialized);
3886                 lck_mtx_lock(&ndi->lock);
3887                 ln_setexpire(ln, timenow + ndi->retrans / 1000);
3888                 lck_mtx_unlock(&ndi->lock);
3889                 RT_UNLOCK(rt);
3890                 /* We still have a reference on rt (for ln) */
3891                 if (ip6_forwarding)
3892                         nd6_prproxy_ns_output(ifp, origifp, NULL,
3893                             &dst->sin6_addr, ln);
3894                 else
3895                         nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, NULL);
3896                 lck_mtx_lock(rnh_lock);
3897                 nd6_sched_timeout(NULL, NULL);
3898                 lck_mtx_unlock(rnh_lock);
3899         } else {
3900                 RT_UNLOCK(rt);
3901         }
3902         /*
3903          * Move this entry to the head of the queue so that it is
3904          * less likely for this entry to be a target of forced
3905          * garbage collection (see nd6_rtrequest()).  Do this only
3906          * if the entry is non-permanent (as permanent ones will
3907          * never be purged), and if the number of active entries
3908          * is at least half of the threshold.
3909          */
3910         if (ln->ln_expire != 0 && ip6_neighborgcthresh > 0 &&
3911             nd6_inuse >= (ip6_neighborgcthresh >> 1)) {
3912                 lck_mtx_lock(rnh_lock);
3913                 RT_LOCK_SPIN(rt);
3914                 if (ln->ln_flags & ND6_LNF_IN_USE) {
3915                         LN_DEQUEUE(ln);
3916                         LN_INSERTHEAD(ln);
3917                 }
3918                 /* Clean up "rt" now while we can */
3919                 if (rt == hint0) {
3920                         RT_REMREF_LOCKED(rt);
3921                         RT_UNLOCK(rt);
3922                 } else {
3923                         RT_UNLOCK(rt);
3924                         rtfree_locked(rt);
3925                 }
3926                 rt = NULL;      /* "rt" has been taken care of */
3927                 lck_mtx_unlock(rnh_lock);
3928         }
3929         error = 0;
3930         goto release;
3931
3932 sendpkt:
3933         if (rt != NULL)
3934                 RT_LOCK_ASSERT_NOTHELD(rt);
3935
3936         /* discard the packet if IPv6 operation is disabled on the interface */
3937         if (ifp->if_eflags & IFEF_IPV6_DISABLED) {
3938                 error = ENETDOWN; /* better error? */
3939                 goto bad;
3940         }
3941
3942         if (ifp->if_flags & IFF_LOOPBACK) {
3943                 /* forwarding rules require the original scope_id */
3944                 m0->m_pkthdr.rcvif = origifp;
3945                 error = dlil_output(origifp, PF_INET6, m0, (caddr_t)rt,
3946                     SA(dst), 0, adv);
3947                 goto release;
3948         } else {
3949                 /* Do not allow loopback address to wind up on a wire */
3950                 struct ip6_hdr *ip6 = mtod(m0, struct ip6_hdr *);
3951
3952                 if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
3953                     IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst))) {
3954                         ip6stat.ip6s_badscope++;
3955                         error = EADDRNOTAVAIL;
3956                         goto bad;
3957                 }
3958         }
3959
3960         if (rt != NULL) {
3961                 RT_LOCK_SPIN(rt);
3962                 /* Mark use timestamp */
3963                 if (rt->rt_llinfo != NULL)
3964                         nd6_llreach_use(rt->rt_llinfo);
3965                 RT_UNLOCK(rt);
3966         }
3967
3968         struct mbuf *mcur = m0;
3969         uint32_t pktcnt = 0;
3970
3971         while (mcur) {
3972                 if (hint != NULL && nstat_collect) {
3973                         int scnt;
3974
3975                         if ((mcur->m_pkthdr.csum_flags & CSUM_TSO_IPV6) &&
3976                                         (mcur->m_pkthdr.tso_segsz > 0))
3977                                 scnt = mcur->m_pkthdr.len / mcur->m_pkthdr.tso_segsz;
3978                         else
3979                                 scnt = 1;
3980
3981                         nstat_route_tx(hint, scnt, mcur->m_pkthdr.len, 0);
3982                 }
3983                 pktcnt++;
3984
3985                 mcur->m_pkthdr.rcvif = NULL;
3986                 mcur = mcur->m_nextpkt;
3987         }
3988         if (pktcnt > ip6_maxchainsent)
3989                 ip6_maxchainsent = pktcnt;
3990         error = dlil_output(ifp, PF_INET6, m0, (caddr_t)rt, SA(dst), 0, adv);
3991         goto release;
3992
3993 bad:
3994         if (m0 != NULL)
3995                 m_freem_list(m0);
3996
3997 release:
3998         /* Clean up "rt" unless it's already been done */
3999         if (rt != NULL) {
4000                 RT_LOCK_SPIN(rt);
4001                 if (rt == hint0) {
4002                         RT_REMREF_LOCKED(rt);
4003                         RT_UNLOCK(rt);
4004                 } else {
4005                         RT_UNLOCK(rt);
4006                         rtfree(rt);
4007                 }
4008         }
4009         /* And now clean up "rtrele" if there is any */
4010         if (rtrele != NULL) {
4011                 RT_LOCK_SPIN(rtrele);
4012                 if (rtrele == hint0) {
4013                         RT_REMREF_LOCKED(rtrele);
4014                         RT_UNLOCK(rtrele);
4015                 } else {
4016                         RT_UNLOCK(rtrele);
4017                         rtfree(rtrele);
4018                 }
4019         }
4020         return (error);
4021 }
4022 #undef senderr
4023
4024 int
4025 nd6_need_cache(struct ifnet *ifp)
4026 {
4027         /*
4028          * XXX: we currently do not make neighbor cache on any interface
4029          * other than ARCnet, Ethernet, FDDI and GIF.
4030          *
4031          * RFC2893 says:
4032          * - unidirectional tunnels needs no ND
4033          */
4034         switch (ifp->if_type) {
4035         case IFT_ARCNET:
4036         case IFT_ETHER:
4037         case IFT_FDDI:
4038         case IFT_IEEE1394:
4039         case IFT_L2VLAN:
4040         case IFT_IEEE8023ADLAG:
4041 #if IFT_IEEE80211
4042         case IFT_IEEE80211:
4043 #endif
4044         case IFT_GIF:           /* XXX need more cases? */
4045         case IFT_PPP:
4046 #if IFT_TUNNEL
4047         case IFT_TUNNEL:
4048 #endif
4049         case IFT_BRIDGE:
4050         case IFT_CELLULAR:
4051                 return (1);
4052         default:
4053                 return (0);
4054         }
4055 }
4056
4057 int
4058 nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt, struct mbuf *m,
4059     struct sockaddr *dst, u_char *desten)
4060 {
4061         int i;
4062         struct sockaddr_dl *sdl;
4063
4064         if (m->m_flags & M_MCAST) {
4065                 switch (ifp->if_type) {
4066                 case IFT_ETHER:
4067                 case IFT_FDDI:
4068                 case IFT_L2VLAN:
4069                 case IFT_IEEE8023ADLAG:
4070 #if IFT_IEEE80211
4071                 case IFT_IEEE80211:
4072 #endif
4073                 case IFT_BRIDGE:
4074                         ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, desten);
4075                         return (1);
4076                 case IFT_IEEE1394:
4077                         for (i = 0; i < ifp->if_addrlen; i++)
4078                                 desten[i] = ~0;
4079                         return (1);
4080                 case IFT_ARCNET:
4081                         *desten = 0;
4082                         return (1);
4083                 default:
4084                         return (0); /* caller will free mbuf */
4085                 }
4086         }
4087
4088         if (rt == NULL) {
4089                 /* this could happen, if we could not allocate memory */
4090                 return (0); /* caller will free mbuf */
4091         }
4092         RT_LOCK(rt);
4093         if (rt->rt_gateway->sa_family != AF_LINK) {
4094                 printf("nd6_storelladdr: something odd happens\n");
4095                 RT_UNLOCK(rt);
4096                 return (0); /* caller will free mbuf */
4097         }
4098         sdl = SDL(rt->rt_gateway);
4099         if (sdl->sdl_alen == 0) {
4100                 /* this should be impossible, but we bark here for debugging */
4101                 printf("nd6_storelladdr: sdl_alen == 0\n");
4102                 RT_UNLOCK(rt);
4103                 return (0); /* caller will free mbuf */
4104         }
4105
4106         bcopy(LLADDR(sdl), desten, sdl->sdl_alen);
4107         RT_UNLOCK(rt);
4108         return (1);
4109 }
4110
4111 /*
4112  * This is the ND pre-output routine; care must be taken to ensure that
4113  * the "hint" route never gets freed via rtfree(), since the caller may
4114  * have stored it inside a struct route with a reference held for that
4115  * placeholder.
4116  */
4117 errno_t
4118 nd6_lookup_ipv6(ifnet_t  ifp, const struct sockaddr_in6 *ip6_dest,
4119     struct sockaddr_dl *ll_dest, size_t ll_dest_len, route_t hint,
4120     mbuf_t packet)
4121 {
4122         route_t route = hint;
4123         errno_t result = 0;
4124         struct sockaddr_dl *sdl = NULL;
4125         size_t  copy_len;
4126
4127         if (ifp == NULL || ip6_dest == NULL)
4128                 return (EINVAL);
4129
4130         if (ip6_dest->sin6_family != AF_INET6)
4131                 return (EAFNOSUPPORT);
4132
4133         if ((ifp->if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING))
4134                 return (ENETDOWN);
4135
4136         if (hint != NULL) {
4137                 /*
4138                  * Callee holds a reference on the route and returns
4139                  * with the route entry locked, upon success.
4140                  */
4141                 result = route_to_gwroute((const struct sockaddr *)ip6_dest,
4142                     hint, &route);
4143                 if (result != 0)
4144                         return (result);
4145                 if (route != NULL)
4146                         RT_LOCK_ASSERT_HELD(route);
4147         }
4148
4149         if ((packet != NULL && (packet->m_flags & M_MCAST) != 0) ||
4150             ((ifp->if_flags & IFF_MULTICAST) &&
4151             IN6_IS_ADDR_MULTICAST(&ip6_dest->sin6_addr))) {
4152                 if (route != NULL)
4153                         RT_UNLOCK(route);
4154                 result = dlil_resolve_multi(ifp,
4155                     (const struct sockaddr *)ip6_dest,
4156                     SA(ll_dest), ll_dest_len);
4157                 if (route != NULL)
4158                         RT_LOCK(route);
4159                 goto release;
4160         } else if (route == NULL) {
4161                 /*
4162                  * rdar://24596652
4163                  * For unicast, lookup existing ND6 entries but
4164                  * do not trigger a resolution
4165                  */
4166                 lck_mtx_lock(rnh_lock);
4167                 route = rt_lookup(TRUE,
4168                     __DECONST(struct sockaddr *, ip6_dest), NULL,
4169                     rt_tables[AF_INET6], ifp->if_index);
4170                 lck_mtx_unlock(rnh_lock);
4171
4172                 if (route != NULL) {
4173                          RT_LOCK(route);
4174                 }
4175         }
4176
4177         if (route == NULL) {
4178                 /*
4179                  * This could happen, if we could not allocate memory or
4180                  * if route_to_gwroute() didn't return a route.
4181                  */
4182                 result = ENOBUFS;
4183                 goto release;
4184         }
4185
4186         if (route->rt_gateway->sa_family != AF_LINK) {
4187                 printf("%s: route %s on %s%d gateway address not AF_LINK\n",
4188                     __func__, ip6_sprintf(&ip6_dest->sin6_addr),
4189                     route->rt_ifp->if_name, route->rt_ifp->if_unit);
4190                 result = EADDRNOTAVAIL;
4191                 goto release;
4192         }
4193
4194         sdl = SDL(route->rt_gateway);
4195         if (sdl->sdl_alen == 0) {
4196                 /* this should be impossible, but we bark here for debugging */
4197                 printf("%s: route %s on %s%d sdl_alen == 0\n", __func__,
4198                     ip6_sprintf(&ip6_dest->sin6_addr), route->rt_ifp->if_name,
4199                     route->rt_ifp->if_unit);
4200                 result = EHOSTUNREACH;
4201                 goto release;
4202         }
4203
4204         copy_len = sdl->sdl_len <= ll_dest_len ? sdl->sdl_len : ll_dest_len;
4205         bcopy(sdl, ll_dest, copy_len);
4206
4207 release:
4208         if (route != NULL) {
4209                 if (route == hint) {
4210                         RT_REMREF_LOCKED(route);
4211                         RT_UNLOCK(route);
4212                 } else {
4213                         RT_UNLOCK(route);
4214                         rtfree(route);
4215                 }
4216         }
4217         return (result);
4218 }
4219
4220 #if (DEVELOPMENT || DEBUG)
4221
4222 static int sysctl_nd6_lookup_ipv6 SYSCTL_HANDLER_ARGS;
4223 SYSCTL_PROC(_net_inet6_icmp6, OID_AUTO, nd6_lookup_ipv6,
4224         CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_LOCKED, 0, 0,
4225         sysctl_nd6_lookup_ipv6, "S", "");
4226
4227 int
4228 sysctl_nd6_lookup_ipv6 SYSCTL_HANDLER_ARGS
4229 {
4230 #pragma unused(oidp, arg1, arg2)
4231         int error = 0;
4232         struct nd6_lookup_ipv6_args nd6_lookup_ipv6_args;
4233         ifnet_t ifp = NULL;
4234
4235         /*
4236          * Only root can lookup MAC addresses
4237          */
4238         error = proc_suser(current_proc());
4239         if (error != 0) {
4240                 printf("%s: proc_suser() error %d\n",
4241                     __func__, error);
4242                 goto done;
4243         }
4244         if (req->oldptr == USER_ADDR_NULL) {
4245                 req->oldidx = sizeof(struct nd6_lookup_ipv6_args);
4246         }
4247         if (req->newptr == USER_ADDR_NULL) {
4248                 goto done;
4249         }
4250         if (req->oldlen != sizeof(struct nd6_lookup_ipv6_args) ||
4251             req->newlen != sizeof(struct nd6_lookup_ipv6_args)) {
4252                 error = EINVAL;
4253                 printf("%s: bad req, error %d\n",
4254                     __func__, error);
4255                 goto done;
4256         }
4257         error = SYSCTL_IN(req, &nd6_lookup_ipv6_args,
4258             sizeof(struct nd6_lookup_ipv6_args));
4259         if (error != 0) {
4260                 printf("%s: SYSCTL_IN() error %d\n",
4261                     __func__, error);
4262                 goto done;
4263         }
4264         /* Make sure to terminate the string */
4265         nd6_lookup_ipv6_args.ifname[IFNAMSIZ - 1] = 0;
4266
4267         error = ifnet_find_by_name(nd6_lookup_ipv6_args.ifname, &ifp);
4268         if (error != 0) {
4269                 printf("%s: ifnet_find_by_name() error %d\n",
4270                     __func__, error);
4271                 goto done;
4272         }
4273
4274         error = nd6_lookup_ipv6(ifp, &nd6_lookup_ipv6_args.ip6_dest,
4275             &nd6_lookup_ipv6_args.ll_dest_._sdl,
4276             nd6_lookup_ipv6_args.ll_dest_len, NULL, NULL);
4277         if (error != 0) {
4278                 printf("%s: nd6_lookup_ipv6() error %d\n",
4279                     __func__, error);
4280                 goto done;
4281         }
4282
4283         error = SYSCTL_OUT(req, &nd6_lookup_ipv6_args,
4284             sizeof(struct nd6_lookup_ipv6_args));
4285         if (error != 0) {
4286                 printf("%s: SYSCTL_OUT() error %d\n",
4287                     __func__, error);
4288                 goto done;
4289         }
4290 done:
4291         return (error);
4292 }
4293
4294 #endif /* (DEVELOPEMENT || DEBUG) */
4295
4296 int
4297 nd6_setifinfo(struct ifnet *ifp, u_int32_t before, u_int32_t after)
4298 {
4299         uint32_t b, a;
4300         int err = 0;
4301
4302         /*
4303          * Handle ND6_IFF_IFDISABLED
4304          */
4305         if ((before & ND6_IFF_IFDISABLED) ||
4306             (after & ND6_IFF_IFDISABLED)) {
4307                 b = (before & ND6_IFF_IFDISABLED);
4308                 a = (after & ND6_IFF_IFDISABLED);
4309
4310                 if (b != a && (err = nd6_if_disable(ifp,
4311                      ((int32_t)(a - b) > 0))) != 0)
4312                         goto done;
4313         }
4314
4315         /*
4316          * Handle ND6_IFF_PROXY_PREFIXES
4317          */
4318         if ((before & ND6_IFF_PROXY_PREFIXES) ||
4319             (after & ND6_IFF_PROXY_PREFIXES)) {
4320                 b = (before & ND6_IFF_PROXY_PREFIXES);
4321                 a = (after & ND6_IFF_PROXY_PREFIXES);
4322
4323                 if (b != a && (err = nd6_if_prproxy(ifp,
4324                      ((int32_t)(a - b) > 0))) != 0)
4325                         goto done;
4326         }
4327 done:
4328         return (err);
4329 }
4330
4331 /*
4332  * Enable/disable IPv6 on an interface, called as part of
4333  * setting/clearing ND6_IFF_IFDISABLED, or during DAD failure.
4334  */
4335 int
4336 nd6_if_disable(struct ifnet *ifp, boolean_t enable)
4337 {
4338         ifnet_lock_shared(ifp);
4339         if (enable)
4340                 ifp->if_eflags |= IFEF_IPV6_DISABLED;
4341         else
4342                 ifp->if_eflags &= ~IFEF_IPV6_DISABLED;
4343         ifnet_lock_done(ifp);
4344
4345         return (0);
4346 }
4347
4348 static int
4349 nd6_sysctl_drlist SYSCTL_HANDLER_ARGS
4350 {
4351 #pragma unused(oidp, arg1, arg2)
4352         char pbuf[MAX_IPv6_STR_LEN];
4353         struct nd_defrouter *dr;
4354         int error = 0;
4355
4356         if (req->newptr != USER_ADDR_NULL)
4357                 return (EPERM);
4358
4359         /* XXX Handle mapped defrouter entries */
4360         lck_mtx_lock(nd6_mutex);
4361         if (proc_is64bit(req->p)) {
4362                 struct in6_defrouter_64 d;
4363
4364                 bzero(&d, sizeof (d));
4365                 d.rtaddr.sin6_family = AF_INET6;
4366                 d.rtaddr.sin6_len = sizeof (d.rtaddr);
4367
4368                 TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
4369                         d.rtaddr.sin6_addr = dr->rtaddr;
4370                         if (in6_recoverscope(&d.rtaddr,
4371                             &dr->rtaddr, dr->ifp) != 0)
4372                                 log(LOG_ERR, "scope error in default router "
4373                                     "list (%s)\n", inet_ntop(AF_INET6,
4374                                     &dr->rtaddr, pbuf, sizeof (pbuf)));
4375                         d.flags = dr->flags;
4376                         d.stateflags = dr->stateflags;
4377                         d.rtlifetime = dr->rtlifetime;
4378                         d.expire = nddr_getexpire(dr);
4379                         d.if_index = dr->ifp->if_index;
4380                         error = SYSCTL_OUT(req, &d, sizeof (d));
4381                         if (error != 0)
4382                                 break;
4383                 }
4384         } else {
4385                 struct in6_defrouter_32 d;
4386
4387                 bzero(&d, sizeof (d));
4388                 d.rtaddr.sin6_family = AF_INET6;
4389                 d.rtaddr.sin6_len = sizeof (d.rtaddr);
4390
4391                 TAILQ_FOREACH(dr, &nd_defrouter, dr_entry) {
4392                         d.rtaddr.sin6_addr = dr->rtaddr;
4393                         if (in6_recoverscope(&d.rtaddr,
4394                             &dr->rtaddr, dr->ifp) != 0)
4395                                 log(LOG_ERR, "scope error in default router "
4396                                     "list (%s)\n", inet_ntop(AF_INET6,
4397                                     &dr->rtaddr, pbuf, sizeof (pbuf)));
4398                         d.flags = dr->flags;
4399                         d.stateflags = dr->stateflags;
4400                         d.rtlifetime = dr->rtlifetime;
4401                         d.expire = nddr_getexpire(dr);
4402                         d.if_index = dr->ifp->if_index;
4403                         error = SYSCTL_OUT(req, &d, sizeof (d));
4404                         if (error != 0)
4405                                 break;
4406                 }
4407         }
4408         lck_mtx_unlock(nd6_mutex);
4409         return (error);
4410 }
4411
4412 static int
4413 nd6_sysctl_prlist SYSCTL_HANDLER_ARGS
4414 {
4415 #pragma unused(oidp, arg1, arg2)
4416         char pbuf[MAX_IPv6_STR_LEN];
4417         struct nd_pfxrouter *pfr;
4418         struct sockaddr_in6 s6;
4419         struct nd_prefix *pr;
4420         int error = 0;
4421
4422         if (req->newptr != USER_ADDR_NULL)
4423                 return (EPERM);
4424
4425         bzero(&s6, sizeof (s6));
4426         s6.sin6_family = AF_INET6;
4427         s6.sin6_len = sizeof (s6);
4428
4429         /* XXX Handle mapped defrouter entries */
4430         lck_mtx_lock(nd6_mutex);
4431         if (proc_is64bit(req->p)) {
4432                 struct in6_prefix_64 p;
4433
4434                 bzero(&p, sizeof (p));
4435                 p.origin = PR_ORIG_RA;
4436
4437                 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
4438                         NDPR_LOCK(pr);
4439                         p.prefix = pr->ndpr_prefix;
4440                         if (in6_recoverscope(&p.prefix,
4441                             &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0)
4442                                 log(LOG_ERR, "scope error in "
4443                                     "prefix list (%s)\n", inet_ntop(AF_INET6,
4444                                     &p.prefix.sin6_addr, pbuf, sizeof (pbuf)));
4445                         p.raflags = pr->ndpr_raf;
4446                         p.prefixlen = pr->ndpr_plen;
4447                         p.vltime = pr->ndpr_vltime;
4448                         p.pltime = pr->ndpr_pltime;
4449                         p.if_index = pr->ndpr_ifp->if_index;
4450                         p.expire = ndpr_getexpire(pr);
4451                         p.refcnt = pr->ndpr_addrcnt;
4452                         p.flags = pr->ndpr_stateflags;
4453                         p.advrtrs = 0;
4454                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
4455                                 p.advrtrs++;
4456                         error = SYSCTL_OUT(req, &p, sizeof (p));
4457                         if (error != 0) {
4458                                 NDPR_UNLOCK(pr);
4459                                 break;
4460                         }
4461                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
4462                                 s6.sin6_addr = pfr->router->rtaddr;
4463                                 if (in6_recoverscope(&s6, &pfr->router->rtaddr,
4464                                     pfr->router->ifp) != 0)
4465                                         log(LOG_ERR,
4466                                             "scope error in prefix list (%s)\n",
4467                                             inet_ntop(AF_INET6, &s6.sin6_addr,
4468                                             pbuf, sizeof (pbuf)));
4469                                 error = SYSCTL_OUT(req, &s6, sizeof (s6));
4470                                 if (error != 0)
4471                                         break;
4472                         }
4473                         NDPR_UNLOCK(pr);
4474                         if (error != 0)
4475                                 break;
4476                 }
4477         } else {
4478                 struct in6_prefix_32 p;
4479
4480                 bzero(&p, sizeof (p));
4481                 p.origin = PR_ORIG_RA;
4482
4483                 LIST_FOREACH(pr, &nd_prefix, ndpr_entry) {
4484                         NDPR_LOCK(pr);
4485                         p.prefix = pr->ndpr_prefix;
4486                         if (in6_recoverscope(&p.prefix,
4487                             &pr->ndpr_prefix.sin6_addr, pr->ndpr_ifp) != 0)
4488                                 log(LOG_ERR,
4489                                     "scope error in prefix list (%s)\n",
4490                                     inet_ntop(AF_INET6, &p.prefix.sin6_addr,
4491                                     pbuf, sizeof (pbuf)));
4492                         p.raflags = pr->ndpr_raf;
4493                         p.prefixlen = pr->ndpr_plen;
4494                         p.vltime = pr->ndpr_vltime;
4495                         p.pltime = pr->ndpr_pltime;
4496                         p.if_index = pr->ndpr_ifp->if_index;
4497                         p.expire = ndpr_getexpire(pr);
4498                         p.refcnt = pr->ndpr_addrcnt;
4499                         p.flags = pr->ndpr_stateflags;
4500                         p.advrtrs = 0;
4501                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
4502                                 p.advrtrs++;
4503                         error = SYSCTL_OUT(req, &p, sizeof (p));
4504                         if (error != 0) {
4505                                 NDPR_UNLOCK(pr);
4506                                 break;
4507                         }
4508                         LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
4509                                 s6.sin6_addr = pfr->router->rtaddr;
4510                                 if (in6_recoverscope(&s6, &pfr->router->rtaddr,
4511                                     pfr->router->ifp) != 0)
4512                                         log(LOG_ERR,
4513                                             "scope error in prefix list (%s)\n",
4514                                             inet_ntop(AF_INET6, &s6.sin6_addr,
4515                                             pbuf, sizeof (pbuf)));
4516                                 error = SYSCTL_OUT(req, &s6, sizeof (s6));
4517                                 if (error != 0)
4518                                         break;
4519                         }
4520                         NDPR_UNLOCK(pr);
4521                         if (error != 0)
4522                                 break;
4523                 }
4524         }
4525         lck_mtx_unlock(nd6_mutex);
4526
4527         return (error);
4528 }
4529
4530 void
4531 in6_ifaddr_set_dadprogress(struct in6_ifaddr *ia)
4532 {
4533         struct ifnet* ifp = ia->ia_ifp;
4534         uint32_t flags = IN6_IFF_TENTATIVE;
4535         uint32_t optdad = nd6_optimistic_dad;
4536         struct nd_ifinfo *ndi = NULL;
4537
4538         ndi = ND_IFINFO(ifp);
4539         VERIFY((NULL != ndi)  && (TRUE == ndi->initialized));
4540         if (!(ndi->flags & ND6_IFF_DAD))
4541                 return;
4542
4543         if (optdad) {
4544                 if ((ifp->if_eflags & IFEF_IPV6_ROUTER) != 0) {
4545                         optdad = 0;
4546                 } else {
4547                         lck_mtx_lock(&ndi->lock);
4548                         if ((ndi->flags & ND6_IFF_REPLICATED) != 0) {
4549                                 optdad = 0;
4550                         }
4551                         lck_mtx_unlock(&ndi->lock);
4552                 }
4553         }
4554
4555         if (optdad) {
4556                 if ((optdad & ND6_OPTIMISTIC_DAD_LINKLOCAL) &&
4557                     IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr))
4558                         flags = IN6_IFF_OPTIMISTIC;
4559                 else if ((optdad & ND6_OPTIMISTIC_DAD_AUTOCONF) &&
4560                     (ia->ia6_flags & IN6_IFF_AUTOCONF)) {
4561                         if (ia->ia6_flags & IN6_IFF_TEMPORARY) {
4562                                 if (optdad & ND6_OPTIMISTIC_DAD_TEMPORARY)
4563                                         flags = IN6_IFF_OPTIMISTIC;
4564                         } else if (ia->ia6_flags & IN6_IFF_SECURED) {
4565                                 if (optdad & ND6_OPTIMISTIC_DAD_SECURED)
4566                                         flags = IN6_IFF_OPTIMISTIC;
4567                         } else {
4568                                 /*
4569                                  * Keeping the behavior for temp and CGA
4570                                  * SLAAC addresses to have a knob for optimistic
4571                                  * DAD.
4572                                  * Other than that if ND6_OPTIMISTIC_DAD_AUTOCONF
4573                                  * is set, we should default to optimistic
4574                                  * DAD.
4575                                  * For now this means SLAAC addresses with interface
4576                                  * identifier derived from modified EUI-64 bit
4577                                  * identifiers.
4578                                  */
4579                                 flags = IN6_IFF_OPTIMISTIC;
4580                         }
4581                 } else if ((optdad & ND6_OPTIMISTIC_DAD_DYNAMIC) &&
4582                     (ia->ia6_flags & IN6_IFF_DYNAMIC)) {
4583                         if (ia->ia6_flags & IN6_IFF_TEMPORARY) {
4584                                 if (optdad & ND6_OPTIMISTIC_DAD_TEMPORARY)
4585                                         flags = IN6_IFF_OPTIMISTIC;
4586                         } else {
4587                                 flags = IN6_IFF_OPTIMISTIC;
4588                         }
4589                 } else if ((optdad & ND6_OPTIMISTIC_DAD_MANUAL) &&
4590                     (ia->ia6_flags & IN6_IFF_OPTIMISTIC)) {
4591                         /*
4592                          * rdar://17483438
4593                          * Bypass tentative for address assignments
4594                          * not covered above (e.g. manual) upon request
4595                          */
4596                         if (!IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr) &&
4597                             !(ia->ia6_flags & IN6_IFF_AUTOCONF) &&
4598                             !(ia->ia6_flags & IN6_IFF_DYNAMIC))
4599                                 flags = IN6_IFF_OPTIMISTIC;
4600                 }
4601         }
4602
4603         ia->ia6_flags &= ~(IN6_IFF_DUPLICATED | IN6_IFF_DADPROGRESS);
4604         ia->ia6_flags |= flags;
4605
4606         nd6log2((LOG_DEBUG, "%s - %s ifp %s ia6_flags 0x%x\n",
4607                                 __func__,
4608                                 ip6_sprintf(&ia->ia_addr.sin6_addr),
4609                                 if_name(ia->ia_ifp),
4610                                 ia->ia6_flags));
4611 }
4612