2  * Copyright (c) 2000-2018 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  * Copyright (c) 1980, 1986, 1991, 1993 
  30  *      The Regents of the University of California.  All rights reserved. 
  32  * Redistribution and use in source and binary forms, with or without 
  33  * modification, are permitted provided that the following conditions 
  35  * 1. Redistributions of source code must retain the above copyright 
  36  *    notice, this list of conditions and the following disclaimer. 
  37  * 2. Redistributions in binary form must reproduce the above copyright 
  38  *    notice, this list of conditions and the following disclaimer in the 
  39  *    documentation and/or other materials provided with the distribution. 
  40  * 3. All advertising materials mentioning features or use of this software 
  41  *    must display the following acknowledgement: 
  42  *      This product includes software developed by the University of 
  43  *      California, Berkeley and its contributors. 
  44  * 4. Neither the name of the University nor the names of its contributors 
  45  *    may be used to endorse or promote products derived from this software 
  46  *    without specific prior written permission. 
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  60  *      @(#)route.c     8.2 (Berkeley) 11/15/93 
  61  * $FreeBSD: src/sys/net/route.c,v 1.59.2.3 2001/07/29 19:18:02 ume Exp $ 
  64 #include <sys/param.h> 
  65 #include <sys/sysctl.h> 
  66 #include <sys/systm.h> 
  67 #include <sys/malloc.h> 
  69 #include <sys/socket.h> 
  70 #include <sys/domain.h> 
  73 #include <sys/vnode.h> 
  74 #include <sys/syslog.h> 
  75 #include <sys/queue.h> 
  76 #include <sys/mcache.h> 
  78 #include <sys/protosw.h> 
  79 #include <sys/kernel.h> 
  80 #include <kern/locks.h> 
  81 #include <kern/zalloc.h> 
  85 #include <net/route.h> 
  86 #include <net/ntstat.h> 
  87 #include <net/nwk_wq.h> 
  92 #include <netinet/in.h> 
  93 #include <netinet/in_var.h> 
  94 #include <netinet/ip_var.h> 
  95 #include <netinet/ip.h> 
  96 #include <netinet/ip6.h> 
  97 #include <netinet/in_arp.h> 
 100 #include <netinet6/ip6_var.h> 
 101 #include <netinet6/in6_var.h> 
 102 #include <netinet6/nd6.h> 
 105 #include <net/if_dl.h> 
 107 #include <libkern/OSAtomic.h> 
 108 #include <libkern/OSDebug.h> 
 110 #include <pexpert/pexpert.h> 
 113 #include <sys/kauth.h> 
 117  * Synchronization notes: 
 119  * Routing entries fall under two locking domains: the global routing table 
 120  * lock (rnh_lock) and the per-entry lock (rt_lock); the latter is a mutex that 
 121  * resides (statically defined) in the rtentry structure. 
 123  * The locking domains for routing are defined as follows: 
 125  * The global routing lock is used to serialize all accesses to the radix 
 126  * trees defined by rt_tables[], as well as the tree of masks.  This includes 
 127  * lookups, insertions and removals of nodes to/from the respective tree. 
 128  * It is also used to protect certain fields in the route entry that aren't 
 129  * often modified and/or require global serialization (more details below.) 
 131  * The per-route entry lock is used to serialize accesses to several routing 
 132  * entry fields (more details below.)  Acquiring and releasing this lock is 
 133  * done via RT_LOCK() and RT_UNLOCK() routines. 
 135  * In cases where both rnh_lock and rt_lock must be held, the former must be 
 136  * acquired first in order to maintain lock ordering.  It is not a requirement 
 137  * that rnh_lock be acquired first before rt_lock, but in case both must be 
 138  * acquired in succession, the correct lock ordering must be followed. 
 140  * The fields of the rtentry structure are protected in the following way: 
 144  *      - Routing table lock (rnh_lock). 
 146  * rt_parent, rt_mask, rt_llinfo_free, rt_tree_genid 
 148  *      - Set once during creation and never changes; no locks to read. 
 150  * rt_flags, rt_genmask, rt_llinfo, rt_rmx, rt_refcnt, rt_gwroute 
 152  *      - Routing entry lock (rt_lock) for read/write access. 
 154  *      - Some values of rt_flags are either set once at creation time, 
 155  *        or aren't currently used, and thus checking against them can 
 156  *        be done without rt_lock: RTF_GATEWAY, RTF_HOST, RTF_DYNAMIC, 
 157  *        RTF_DONE,  RTF_XRESOLVE, RTF_STATIC, RTF_BLACKHOLE, RTF_ANNOUNCE, 
 158  *        RTF_USETRAILERS, RTF_WASCLONED, RTF_PINNED, RTF_LOCAL, 
 159  *        RTF_BROADCAST, RTF_MULTICAST, RTF_IFSCOPE, RTF_IFREF. 
 161  * rt_key, rt_gateway, rt_ifp, rt_ifa 
 163  *      - Always written/modified with both rnh_lock and rt_lock held. 
 165  *      - May be read freely with rnh_lock held, else must hold rt_lock 
 166  *        for read access; holding both locks for read is also okay. 
 168  *      - In the event rnh_lock is not acquired, or is not possible to be 
 169  *        acquired across the operation, setting RTF_CONDEMNED on a route 
 170  *        entry will prevent its rt_key, rt_gateway, rt_ifp and rt_ifa 
 171  *        from being modified.  This is typically done on a route that 
 172  *        has been chosen for a removal (from the tree) prior to dropping 
 173  *        the rt_lock, so that those values will remain the same until 
 174  *        the route is freed. 
 176  *        When rnh_lock is held rt_setgate(), rt_setif(), and rtsetifa() are 
 177  *        single-threaded, thus exclusive.  This flag will also prevent the 
 178  *        route from being looked up via rt_lookup(). 
 182  *      - Assumes that 32-bit writes are atomic; no locks. 
 186  *      - Currently unused; no locks. 
 188  * Operations on a route entry can be described as follows: 
 190  * CREATE an entry with reference count set to 0 as part of RTM_ADD/RESOLVE. 
 192  * INSERTION of an entry into the radix tree holds the rnh_lock, checks 
 193  * for duplicates and then adds the entry.  rtrequest returns the entry 
 194  * after bumping up the reference count to 1 (for the caller). 
 196  * LOOKUP of an entry holds the rnh_lock and bumps up the reference count 
 197  * before returning; it is valid to also bump up the reference count using 
 198  * RT_ADDREF after the lookup has returned an entry. 
 200  * REMOVAL of an entry from the radix tree holds the rnh_lock, removes the 
 201  * entry but does not decrement the reference count.  Removal happens when 
 202  * the route is explicitly deleted (RTM_DELETE) or when it is in the cached 
 203  * state and it expires.  The route is said to be "down" when it is no 
 204  * longer present in the tree.  Freeing the entry will happen on the last 
 205  * reference release of such a "down" route. 
 207  * RT_ADDREF/RT_REMREF operates on the routing entry which increments/ 
 208  * decrements the reference count, rt_refcnt, atomically on the rtentry. 
 209  * rt_refcnt is modified only using this routine.  The general rule is to 
 210  * do RT_ADDREF in the function that is passing the entry as an argument, 
 211  * in order to prevent the entry from being freed by the callee. 
 214 #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0) 
 216 extern void kdp_set_gateway_mac(void *gatewaymac
); 
 218 __private_extern__ 
struct rtstat rtstat  
= { 0, 0, 0, 0, 0, 0 }; 
 219 struct radix_node_head 
*rt_tables
[AF_MAX
+1]; 
 221 decl_lck_mtx_data(, rnh_lock_data
);     /* global routing tables mutex */ 
 222 lck_mtx_t               
*rnh_lock 
= &rnh_lock_data
; 
 223 static lck_attr_t       
*rnh_lock_attr
; 
 224 static lck_grp_t        
*rnh_lock_grp
; 
 225 static lck_grp_attr_t   
*rnh_lock_grp_attr
; 
 227 /* Lock group and attribute for routing entry locks */ 
 228 static lck_attr_t       
*rte_mtx_attr
; 
 229 static lck_grp_t        
*rte_mtx_grp
; 
 230 static lck_grp_attr_t   
*rte_mtx_grp_attr
; 
 232 int rttrash 
= 0;                /* routes not in table but not freed */ 
 234 unsigned int rte_debug 
= 0; 
 236 /* Possible flags for rte_debug */ 
 237 #define RTD_DEBUG       0x1     /* enable or disable rtentry debug facility */ 
 238 #define RTD_TRACE       0x2     /* trace alloc, free, refcnt and lock */ 
 239 #define RTD_NO_FREE     0x4     /* don't free (good to catch corruptions) */ 
 241 #define RTE_NAME                "rtentry"       /* name for zone and rt_lock */ 
 243 static struct zone 
*rte_zone
;                   /* special zone for rtentry */ 
 244 #define RTE_ZONE_MAX            65536           /* maximum elements in zone */ 
 245 #define RTE_ZONE_NAME           RTE_NAME        /* name of rtentry zone */ 
 247 #define RTD_INUSE               0xFEEDFACE      /* entry is in use */ 
 248 #define RTD_FREED               0xDEADBEEF      /* entry is freed */ 
 250 #define MAX_SCOPE_ADDR_STR_LEN  (MAX_IPv6_STR_LEN + 6) 
 253 __private_extern__ 
unsigned int ctrace_stack_size 
= CTRACE_STACK_SIZE
; 
 254 __private_extern__ 
unsigned int ctrace_hist_size 
= CTRACE_HIST_SIZE
; 
 257  * Debug variant of rtentry structure. 
 260         struct rtentry  rtd_entry
;                      /* rtentry */ 
 261         struct rtentry  rtd_entry_saved
;                /* saved rtentry */ 
 262         uint32_t        rtd_inuse
;                      /* in use pattern */ 
 263         uint16_t        rtd_refhold_cnt
;                /* # of rtref */ 
 264         uint16_t        rtd_refrele_cnt
;                /* # of rtunref */ 
 265         uint32_t        rtd_lock_cnt
;                   /* # of locks */ 
 266         uint32_t        rtd_unlock_cnt
;                 /* # of unlocks */ 
 268          * Alloc and free callers. 
 273          * Circular lists of rtref and rtunref callers. 
 275         ctrace_t        rtd_refhold
[CTRACE_HIST_SIZE
]; 
 276         ctrace_t        rtd_refrele
[CTRACE_HIST_SIZE
]; 
 278          * Circular lists of locks and unlocks. 
 280         ctrace_t        rtd_lock
[CTRACE_HIST_SIZE
]; 
 281         ctrace_t        rtd_unlock
[CTRACE_HIST_SIZE
]; 
 285         TAILQ_ENTRY(rtentry_dbg
) rtd_trash_link
; 
 288 /* List of trash route entries protected by rnh_lock */ 
 289 static TAILQ_HEAD(, rtentry_dbg
) rttrash_head
; 
 291 static void rte_lock_init(struct rtentry 
*); 
 292 static void rte_lock_destroy(struct rtentry 
*); 
 293 static inline struct rtentry 
*rte_alloc_debug(void); 
 294 static inline void rte_free_debug(struct rtentry 
*); 
 295 static inline void rte_lock_debug(struct rtentry_dbg 
*); 
 296 static inline void rte_unlock_debug(struct rtentry_dbg 
*); 
 297 static void rt_maskedcopy(const struct sockaddr 
*, 
 298             struct sockaddr 
*, const struct sockaddr 
*); 
 299 static void rtable_init(void **); 
 300 static inline void rtref_audit(struct rtentry_dbg 
*); 
 301 static inline void rtunref_audit(struct rtentry_dbg 
*); 
 302 static struct rtentry 
*rtalloc1_common_locked(struct sockaddr 
*, int, uint32_t, 
 304 static int rtrequest_common_locked(int, struct sockaddr 
*, 
 305     struct sockaddr 
*, struct sockaddr 
*, int, struct rtentry 
**, 
 307 static struct rtentry 
*rtalloc1_locked(struct sockaddr 
*, int, uint32_t); 
 308 static void rtalloc_ign_common_locked(struct route 
*, uint32_t, unsigned int); 
 309 static inline void sin6_set_ifscope(struct sockaddr 
*, unsigned int); 
 310 static inline void sin6_set_embedded_ifscope(struct sockaddr 
*, unsigned int); 
 311 static inline unsigned int sin6_get_embedded_ifscope(struct sockaddr 
*); 
 312 static struct sockaddr 
*ma_copy(int, struct sockaddr 
*, 
 313     struct sockaddr_storage 
*, unsigned int); 
 314 static struct sockaddr 
*sa_trim(struct sockaddr 
*, int); 
 315 static struct radix_node 
*node_lookup(struct sockaddr 
*, struct sockaddr 
*, 
 317 static struct radix_node 
*node_lookup_default(int); 
 318 static struct rtentry 
*rt_lookup_common(boolean_t
, boolean_t
, struct sockaddr 
*, 
 319     struct sockaddr 
*, struct radix_node_head 
*, unsigned int); 
 320 static int rn_match_ifscope(struct radix_node 
*, void *); 
 321 static struct ifaddr 
*ifa_ifwithroute_common_locked(int, 
 322     const struct sockaddr 
*, const struct sockaddr 
*, unsigned int); 
 323 static struct rtentry 
*rte_alloc(void); 
 324 static void rte_free(struct rtentry 
*); 
 325 static void rtfree_common(struct rtentry 
*, boolean_t
); 
 326 static void rte_if_ref(struct ifnet 
*, int); 
 327 static void rt_set_idleref(struct rtentry 
*); 
 328 static void rt_clear_idleref(struct rtentry 
*); 
 329 static void route_event_callback(void *); 
 330 static void rt_str4(struct rtentry 
*, char *, uint32_t, char *, uint32_t); 
 332 static void rt_str6(struct rtentry 
*, char *, uint32_t, char *, uint32_t); 
 335 uint32_t route_genid_inet 
= 0; 
 337 uint32_t route_genid_inet6 
= 0; 
 340 #define ASSERT_SINIFSCOPE(sa) {                                         \ 
 341         if ((sa)->sa_family != AF_INET ||                               \ 
 342             (sa)->sa_len < sizeof (struct sockaddr_in))                 \ 
 343                 panic("%s: bad sockaddr_in %p\n", __func__, sa);        \ 
 346 #define ASSERT_SIN6IFSCOPE(sa) {                                        \ 
 347         if ((sa)->sa_family != AF_INET6 ||                              \ 
 348             (sa)->sa_len < sizeof (struct sockaddr_in6))                \ 
 349                 panic("%s: bad sockaddr_in6 %p\n", __func__, sa);       \ 
 353  * Argument to leaf-matching routine; at present it is scoped routing 
 354  * specific but can be expanded in future to include other search filters. 
 356 struct matchleaf_arg 
{ 
 357         unsigned int    ifscope
;        /* interface scope */ 
 361  * For looking up the non-scoped default route (sockaddr instead 
 362  * of sockaddr_in for convenience). 
 364 static struct sockaddr sin_def 
= { 
 365         sizeof (struct sockaddr_in
), AF_INET
, { 0, } 
 368 static struct sockaddr_in6 sin6_def 
= { 
 369         sizeof (struct sockaddr_in6
), AF_INET6
, 0, 0, IN6ADDR_ANY_INIT
, 0 
 373  * Interface index (scope) of the primary interface; determined at 
 374  * the time when the default, non-scoped route gets added, changed 
 375  * or deleted.  Protected by rnh_lock. 
 377 static unsigned int primary_ifscope 
= IFSCOPE_NONE
; 
 378 static unsigned int primary6_ifscope 
= IFSCOPE_NONE
; 
 380 #define INET_DEFAULT(sa)        \ 
 381         ((sa)->sa_family == AF_INET && SIN(sa)->sin_addr.s_addr == 0) 
 383 #define INET6_DEFAULT(sa)                                               \ 
 384         ((sa)->sa_family == AF_INET6 &&                                 \ 
 385         IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr)) 
 387 #define SA_DEFAULT(sa)  (INET_DEFAULT(sa) || INET6_DEFAULT(sa)) 
 388 #define RT(r)           ((struct rtentry *)r) 
 389 #define RN(r)           ((struct radix_node *)r) 
 390 #define RT_HOST(r)      (RT(r)->rt_flags & RTF_HOST) 
 392 unsigned int rt_verbose 
= 0; 
 393 #if (DEVELOPMENT || DEBUG) 
 394 SYSCTL_DECL(_net_route
); 
 395 SYSCTL_UINT(_net_route
, OID_AUTO
, verbose
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 397 #endif /* (DEVELOPMENT || DEBUG) */ 
 400 rtable_init(void **table
) 
 404         domain_proto_mtx_lock_assert_held(); 
 406         TAILQ_FOREACH(dom
, &domains
, dom_entry
) { 
 407                 if (dom
->dom_rtattach 
!= NULL
) 
 408                         dom
->dom_rtattach(&table
[dom
->dom_family
], 
 414  * Called by route_dinit(). 
 422         _CASSERT(offsetof(struct route
, ro_rt
) == 
 423             offsetof(struct route_in6
, ro_rt
)); 
 424         _CASSERT(offsetof(struct route
, ro_lle
) == 
 425             offsetof(struct route_in6
, ro_lle
)); 
 426         _CASSERT(offsetof(struct route
, ro_srcia
) == 
 427             offsetof(struct route_in6
, ro_srcia
)); 
 428         _CASSERT(offsetof(struct route
, ro_flags
) == 
 429             offsetof(struct route_in6
, ro_flags
)); 
 430         _CASSERT(offsetof(struct route
, ro_dst
) == 
 431             offsetof(struct route_in6
, ro_dst
)); 
 434         PE_parse_boot_argn("rte_debug", &rte_debug
, sizeof (rte_debug
)); 
 436                 rte_debug 
|= RTD_DEBUG
; 
 438         rnh_lock_grp_attr 
= lck_grp_attr_alloc_init(); 
 439         rnh_lock_grp 
= lck_grp_alloc_init("route", rnh_lock_grp_attr
); 
 440         rnh_lock_attr 
= lck_attr_alloc_init(); 
 441         lck_mtx_init(rnh_lock
, rnh_lock_grp
, rnh_lock_attr
); 
 443         rte_mtx_grp_attr 
= lck_grp_attr_alloc_init(); 
 444         rte_mtx_grp 
= lck_grp_alloc_init(RTE_NAME
, rte_mtx_grp_attr
); 
 445         rte_mtx_attr 
= lck_attr_alloc_init(); 
 447         lck_mtx_lock(rnh_lock
); 
 448         rn_init();      /* initialize all zeroes, all ones, mask table */ 
 449         lck_mtx_unlock(rnh_lock
); 
 450         rtable_init((void **)rt_tables
); 
 452         if (rte_debug 
& RTD_DEBUG
) 
 453                 size 
= sizeof (struct rtentry_dbg
); 
 455                 size 
= sizeof (struct rtentry
); 
 457         rte_zone 
= zinit(size
, RTE_ZONE_MAX 
* size
, 0, RTE_ZONE_NAME
); 
 458         if (rte_zone 
== NULL
) { 
 459                 panic("%s: failed allocating rte_zone", __func__
); 
 462         zone_change(rte_zone
, Z_EXPAND
, TRUE
); 
 463         zone_change(rte_zone
, Z_CALLERACCT
, FALSE
); 
 464         zone_change(rte_zone
, Z_NOENCRYPT
, TRUE
); 
 466         TAILQ_INIT(&rttrash_head
); 
 470  * Given a route, determine whether or not it is the non-scoped default 
 471  * route; dst typically comes from rt_key(rt) but may be coming from 
 472  * a separate place when rt is in the process of being created. 
 475 rt_primary_default(struct rtentry 
*rt
, struct sockaddr 
*dst
) 
 477         return (SA_DEFAULT(dst
) && !(rt
->rt_flags 
& RTF_IFSCOPE
)); 
 481  * Set the ifscope of the primary interface; caller holds rnh_lock. 
 484 set_primary_ifscope(int af
, unsigned int ifscope
) 
 487                 primary_ifscope 
= ifscope
; 
 489                 primary6_ifscope 
= ifscope
; 
 493  * Return the ifscope of the primary interface; caller holds rnh_lock. 
 496 get_primary_ifscope(int af
) 
 498         return (af 
== AF_INET 
? primary_ifscope 
: primary6_ifscope
); 
 502  * Set the scope ID of a given a sockaddr_in. 
 505 sin_set_ifscope(struct sockaddr 
*sa
, unsigned int ifscope
) 
 507         /* Caller must pass in sockaddr_in */ 
 508         ASSERT_SINIFSCOPE(sa
); 
 510         SINIFSCOPE(sa
)->sin_scope_id 
= ifscope
; 
 514  * Set the scope ID of given a sockaddr_in6. 
 517 sin6_set_ifscope(struct sockaddr 
*sa
, unsigned int ifscope
) 
 519         /* Caller must pass in sockaddr_in6 */ 
 520         ASSERT_SIN6IFSCOPE(sa
); 
 522         SIN6IFSCOPE(sa
)->sin6_scope_id 
= ifscope
; 
 526  * Given a sockaddr_in, return the scope ID to the caller. 
 529 sin_get_ifscope(struct sockaddr 
*sa
) 
 531         /* Caller must pass in sockaddr_in */ 
 532         ASSERT_SINIFSCOPE(sa
); 
 534         return (SINIFSCOPE(sa
)->sin_scope_id
); 
 538  * Given a sockaddr_in6, return the scope ID to the caller. 
 541 sin6_get_ifscope(struct sockaddr 
*sa
) 
 543         /* Caller must pass in sockaddr_in6 */ 
 544         ASSERT_SIN6IFSCOPE(sa
); 
 546         return (SIN6IFSCOPE(sa
)->sin6_scope_id
); 
 550 sin6_set_embedded_ifscope(struct sockaddr 
*sa
, unsigned int ifscope
) 
 552         /* Caller must pass in sockaddr_in6 */ 
 553         ASSERT_SIN6IFSCOPE(sa
); 
 554         VERIFY(IN6_IS_SCOPE_EMBED(&(SIN6(sa
)->sin6_addr
))); 
 556         SIN6(sa
)->sin6_addr
.s6_addr16
[1] = htons(ifscope
); 
 559 static inline unsigned int 
 560 sin6_get_embedded_ifscope(struct sockaddr 
*sa
) 
 562         /* Caller must pass in sockaddr_in6 */ 
 563         ASSERT_SIN6IFSCOPE(sa
); 
 565         return (ntohs(SIN6(sa
)->sin6_addr
.s6_addr16
[1])); 
 569  * Copy a sockaddr_{in,in6} src to a dst storage and set scope ID into dst. 
 571  * To clear the scope ID, pass is a NULL pifscope.  To set the scope ID, pass 
 572  * in a non-NULL pifscope with non-zero ifscope.  Otherwise if pifscope is 
 573  * non-NULL and ifscope is IFSCOPE_NONE, the existing scope ID is left intact. 
 574  * In any case, the effective scope ID value is returned to the caller via 
 575  * pifscope, if it is non-NULL. 
 578 sa_copy(struct sockaddr 
*src
, struct sockaddr_storage 
*dst
, 
 579     unsigned int *pifscope
) 
 581         int af 
= src
->sa_family
; 
 582         unsigned int ifscope 
= (pifscope 
!= NULL
) ? *pifscope 
: IFSCOPE_NONE
; 
 584         VERIFY(af 
== AF_INET 
|| af 
== AF_INET6
); 
 586         bzero(dst
, sizeof (*dst
)); 
 589                 bcopy(src
, dst
, sizeof (struct sockaddr_in
)); 
 590                 if (pifscope 
== NULL 
|| ifscope 
!= IFSCOPE_NONE
) 
 591                         sin_set_ifscope(SA(dst
), ifscope
); 
 593                 bcopy(src
, dst
, sizeof (struct sockaddr_in6
)); 
 594                 if (pifscope 
!= NULL 
&& 
 595                     IN6_IS_SCOPE_EMBED(&SIN6(dst
)->sin6_addr
)) { 
 596                         unsigned int eifscope
; 
 598                          * If the address contains the embedded scope ID, 
 599                          * use that as the value for sin6_scope_id as long 
 600                          * the caller doesn't insist on clearing it (by 
 601                          * passing NULL) or setting it. 
 603                         eifscope 
= sin6_get_embedded_ifscope(SA(dst
)); 
 604                         if (eifscope 
!= IFSCOPE_NONE 
&& ifscope 
== IFSCOPE_NONE
) 
 606                         if (ifscope 
!= IFSCOPE_NONE
) { 
 607                                 /* Set ifscope from pifscope or eifscope */ 
 608                                 sin6_set_ifscope(SA(dst
), ifscope
); 
 610                                 /* If sin6_scope_id has a value, use that one */ 
 611                                 ifscope 
= sin6_get_ifscope(SA(dst
)); 
 614                          * If sin6_scope_id is set but the address doesn't 
 615                          * contain the equivalent embedded value, set it. 
 617                         if (ifscope 
!= IFSCOPE_NONE 
&& eifscope 
!= ifscope
) 
 618                                 sin6_set_embedded_ifscope(SA(dst
), ifscope
); 
 619                 } else if (pifscope 
== NULL 
|| ifscope 
!= IFSCOPE_NONE
) { 
 620                         sin6_set_ifscope(SA(dst
), ifscope
); 
 624         if (pifscope 
!= NULL
) { 
 625                 *pifscope 
= (af 
== AF_INET
) ? sin_get_ifscope(SA(dst
)) : 
 626                     sin6_get_ifscope(SA(dst
)); 
 633  * Copy a mask from src to a dst storage and set scope ID into dst. 
 635 static struct sockaddr 
* 
 636 ma_copy(int af
, struct sockaddr 
*src
, struct sockaddr_storage 
*dst
, 
 637     unsigned int ifscope
) 
 639         VERIFY(af 
== AF_INET 
|| af 
== AF_INET6
); 
 641         bzero(dst
, sizeof (*dst
)); 
 642         rt_maskedcopy(src
, SA(dst
), src
); 
 645          * The length of the mask sockaddr would need to be adjusted 
 646          * to cover the additional {sin,sin6}_ifscope field; when ifscope 
 647          * is IFSCOPE_NONE, we'd end up clearing the scope ID field on 
 648          * the destination mask in addition to extending the length 
 649          * of the sockaddr, as a side effect.  This is okay, as any 
 650          * trailing zeroes would be skipped by rn_addmask prior to 
 651          * inserting or looking up the mask in the mask tree. 
 654                 SINIFSCOPE(dst
)->sin_scope_id 
= ifscope
; 
 655                 SINIFSCOPE(dst
)->sin_len 
= 
 656                     offsetof(struct sockaddr_inifscope
, sin_scope_id
) + 
 657                     sizeof (SINIFSCOPE(dst
)->sin_scope_id
); 
 659                 SIN6IFSCOPE(dst
)->sin6_scope_id 
= ifscope
; 
 660                 SIN6IFSCOPE(dst
)->sin6_len 
= 
 661                     offsetof(struct sockaddr_in6
, sin6_scope_id
) + 
 662                     sizeof (SIN6IFSCOPE(dst
)->sin6_scope_id
); 
 669  * Trim trailing zeroes on a sockaddr and update its length. 
 671 static struct sockaddr 
* 
 672 sa_trim(struct sockaddr 
*sa
, int skip
) 
 674         caddr_t cp
, base 
= (caddr_t
)sa 
+ skip
; 
 676         if (sa
->sa_len 
<= skip
) 
 679         for (cp 
= base 
+ (sa
->sa_len 
- skip
); cp 
> base 
&& cp
[-1] == 0; ) 
 682         sa
->sa_len 
= (cp 
- base
) + skip
; 
 683         if (sa
->sa_len 
< skip
) { 
 684                 /* Must not happen, and if so, panic */ 
 685                 panic("%s: broken logic (sa_len %d < skip %d )", __func__
, 
 688         } else if (sa
->sa_len 
== skip
) { 
 689                 /* If we end up with all zeroes, then there's no mask */ 
 697  * Called by rtm_msg{1,2} routines to "scrub" socket address structures of 
 698  * kernel private information, so that clients of the routing socket will 
 699  * not be confused by the presence of the information, or the side effect of 
 700  * the increased length due to that.  The source sockaddr is not modified; 
 701  * instead, the scrubbing happens on the destination sockaddr storage that 
 702  * is passed in by the caller. 
 705  *   - removing embedded scope identifiers from network mask and destination 
 706  *     IPv4 and IPv6 socket addresses 
 707  *   - optionally removing global scope interface hardware addresses from 
 708  *     link-layer interface addresses when the MAC framework check fails. 
 711 rtm_scrub(int type
, int idx
, struct sockaddr 
*hint
, struct sockaddr 
*sa
, 
 712     void *buf
, uint32_t buflen
, kauth_cred_t 
*credp
) 
 714         struct sockaddr_storage 
*ss 
= (struct sockaddr_storage 
*)buf
; 
 715         struct sockaddr 
*ret 
= sa
; 
 717         VERIFY(buf 
!= NULL 
&& buflen 
>= sizeof (*ss
)); 
 723                  * If this is for an AF_INET/AF_INET6 destination address, 
 724                  * call sa_copy() to clear the scope ID field. 
 726                 if (sa
->sa_family 
== AF_INET 
&& 
 727                     SINIFSCOPE(sa
)->sin_scope_id 
!= IFSCOPE_NONE
) { 
 728                         ret 
= sa_copy(sa
, ss
, NULL
); 
 729                 } else if (sa
->sa_family 
== AF_INET6 
&& 
 730                     SIN6IFSCOPE(sa
)->sin6_scope_id 
!= IFSCOPE_NONE
) { 
 731                         ret 
= sa_copy(sa
, ss
, NULL
); 
 738                  * If this is for a mask, we can't tell whether or not there 
 739                  * is an valid scope ID value, as the span of bytes between 
 740                  * sa_len and the beginning of the mask (offset of sin_addr in 
 741                  * the case of AF_INET, or sin6_addr for AF_INET6) may be 
 742                  * filled with all-ones by rn_addmask(), and hence we cannot 
 743                  * rely on sa_family.  Because of this, we use the sa_family 
 744                  * of the hint sockaddr (RTAX_{DST,IFA}) as indicator as to 
 745                  * whether or not the mask is to be treated as one for AF_INET 
 746                  * or AF_INET6.  Clearing the scope ID field involves setting 
 747                  * it to IFSCOPE_NONE followed by calling sa_trim() to trim 
 748                  * trailing zeroes from the storage sockaddr, which reverses 
 749                  * what was done earlier by ma_copy() on the source sockaddr. 
 752                     ((af 
= hint
->sa_family
) != AF_INET 
&& af 
!= AF_INET6
)) 
 753                         break;  /* nothing to do */ 
 755                 skip 
= (af 
== AF_INET
) ? 
 756                     offsetof(struct sockaddr_in
, sin_addr
) : 
 757                     offsetof(struct sockaddr_in6
, sin6_addr
); 
 759                 if (sa
->sa_len 
> skip 
&& sa
->sa_len 
<= sizeof (*ss
)) { 
 760                         bcopy(sa
, ss
, sa
->sa_len
); 
 762                          * Don't use {sin,sin6}_set_ifscope() as sa_family 
 763                          * and sa_len for the netmask might not be set to 
 764                          * the corresponding expected values of the hint. 
 766                         if (hint
->sa_family 
== AF_INET
) 
 767                                 SINIFSCOPE(ss
)->sin_scope_id 
= IFSCOPE_NONE
; 
 769                                 SIN6IFSCOPE(ss
)->sin6_scope_id 
= IFSCOPE_NONE
; 
 770                         ret 
= sa_trim(SA(ss
), skip
); 
 773                          * For AF_INET6 mask, set sa_len appropriately unless 
 774                          * this is requested via systl_dumpentry(), in which 
 775                          * case we return the raw value. 
 777                         if (hint
->sa_family 
== AF_INET6 
&& 
 778                             type 
!= RTM_GET 
&& type 
!= RTM_GET2
) 
 779                                 SA(ret
)->sa_len 
= sizeof (struct sockaddr_in6
); 
 785                  * Break if the gateway is not AF_LINK type (indirect routes) 
 787                  * Else, if is, check if it is resolved. If not yet resolved 
 788                  * simply break else scrub the link layer address. 
 790                 if ((sa
->sa_family 
!= AF_LINK
) || (SDL(sa
)->sdl_alen 
== 0)) 
 795                 if (sa
->sa_family 
== AF_LINK 
&& credp
) { 
 796                         struct sockaddr_dl 
*sdl 
= SDL(buf
); 
 800                         /* caller should handle worst case: SOCK_MAXADDRLEN */ 
 801                         VERIFY(buflen 
>= sa
->sa_len
); 
 803                         bcopy(sa
, sdl
, sa
->sa_len
); 
 804                         bytes 
= dlil_ifaddr_bytes(sdl
, &size
, credp
); 
 805                         if (bytes 
!= CONST_LLADDR(sdl
)) { 
 806                                 VERIFY(sdl
->sdl_alen 
== size
); 
 807                                 bcopy(bytes
, LLADDR(sdl
), size
); 
 809                         ret 
= (struct sockaddr 
*)sdl
; 
 821  * Callback leaf-matching routine for rn_matchaddr_args used 
 822  * for looking up an exact match for a scoped route entry. 
 825 rn_match_ifscope(struct radix_node 
*rn
, void *arg
) 
 827         struct rtentry 
*rt 
= (struct rtentry 
*)rn
; 
 828         struct matchleaf_arg 
*ma 
= arg
; 
 829         int af 
= rt_key(rt
)->sa_family
; 
 831         if (!(rt
->rt_flags 
& RTF_IFSCOPE
) || (af 
!= AF_INET 
&& af 
!= AF_INET6
)) 
 834         return (af 
== AF_INET 
? 
 835             (SINIFSCOPE(rt_key(rt
))->sin_scope_id 
== ma
->ifscope
) : 
 836             (SIN6IFSCOPE(rt_key(rt
))->sin6_scope_id 
== ma
->ifscope
)); 
 840  * Atomically increment route generation counter 
 843 routegenid_update(void) 
 845         routegenid_inet_update(); 
 847         routegenid_inet6_update(); 
 852 routegenid_inet_update(void) 
 854         atomic_add_32(&route_genid_inet
, 1); 
 859 routegenid_inet6_update(void) 
 861         atomic_add_32(&route_genid_inet6
, 1); 
 866  * Packet routing routines. 
 869 rtalloc(struct route 
*ro
) 
 875 rtalloc_scoped(struct route 
*ro
, unsigned int ifscope
) 
 877         rtalloc_scoped_ign(ro
, 0, ifscope
); 
 881 rtalloc_ign_common_locked(struct route 
*ro
, uint32_t ignore
, 
 882     unsigned int ifscope
) 
 886         if ((rt 
= ro
->ro_rt
) != NULL
) { 
 888                 if (rt
->rt_ifp 
!= NULL 
&& !ROUTE_UNUSABLE(ro
)) { 
 893                 ROUTE_RELEASE_LOCKED(ro
);       /* rnh_lock already held */ 
 895         ro
->ro_rt 
= rtalloc1_common_locked(&ro
->ro_dst
, 1, ignore
, ifscope
); 
 896         if (ro
->ro_rt 
!= NULL
) { 
 897                 RT_GENID_SYNC(ro
->ro_rt
); 
 898                 RT_LOCK_ASSERT_NOTHELD(ro
->ro_rt
); 
 903 rtalloc_ign(struct route 
*ro
, uint32_t ignore
) 
 905         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
 906         lck_mtx_lock(rnh_lock
); 
 907         rtalloc_ign_common_locked(ro
, ignore
, IFSCOPE_NONE
); 
 908         lck_mtx_unlock(rnh_lock
); 
 912 rtalloc_scoped_ign(struct route 
*ro
, uint32_t ignore
, unsigned int ifscope
) 
 914         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
 915         lck_mtx_lock(rnh_lock
); 
 916         rtalloc_ign_common_locked(ro
, ignore
, ifscope
); 
 917         lck_mtx_unlock(rnh_lock
); 
 920 static struct rtentry 
* 
 921 rtalloc1_locked(struct sockaddr 
*dst
, int report
, uint32_t ignflags
) 
 923         return (rtalloc1_common_locked(dst
, report
, ignflags
, IFSCOPE_NONE
)); 
 927 rtalloc1_scoped_locked(struct sockaddr 
*dst
, int report
, uint32_t ignflags
, 
 928     unsigned int ifscope
) 
 930         return (rtalloc1_common_locked(dst
, report
, ignflags
, ifscope
)); 
 934 rtalloc1_common_locked(struct sockaddr 
*dst
, int report
, uint32_t ignflags
, 
 935     unsigned int ifscope
) 
 937         struct radix_node_head 
*rnh 
= rt_tables
[dst
->sa_family
]; 
 938         struct rtentry 
*rt
, *newrt 
= NULL
; 
 939         struct rt_addrinfo info
; 
 941         int  err 
= 0, msgtype 
= RTM_MISS
; 
 947          * Find the longest prefix or exact (in the scoped case) address match; 
 948          * callee adds a reference to entry and checks for root node as well 
 950         rt 
= rt_lookup(FALSE
, dst
, NULL
, rnh
, ifscope
); 
 956         nflags 
= rt
->rt_flags 
& ~ignflags
; 
 958         if (report 
&& (nflags 
& (RTF_CLONING 
| RTF_PRCLONING
))) { 
 960                  * We are apparently adding (report = 0 in delete). 
 961                  * If it requires that it be cloned, do so. 
 962                  * (This implies it wasn't a HOST route.) 
 964                 err 
= rtrequest_locked(RTM_RESOLVE
, dst
, NULL
, NULL
, 0, &newrt
); 
 967                          * If the cloning didn't succeed, maybe what we 
 968                          * have from lookup above will do.  Return that; 
 969                          * no need to hold another reference since it's 
 977                  * We cloned it; drop the original route found during lookup. 
 978                  * The resulted cloned route (newrt) would now have an extra 
 979                  * reference held during rtrequest. 
 984                  * If the newly created cloned route is a direct host route 
 985                  * then also check if it is to a router or not. 
 986                  * If it is, then set the RTF_ROUTER flag on the host route 
 989                  * XXX It is possible for the default route to be created post 
 990                  * cloned route creation of router's IP. 
 991                  * We can handle that corner case by special handing for RTM_ADD 
 994                 if ((newrt
->rt_flags 
& (RTF_HOST 
| RTF_LLINFO
)) == 
 995                     (RTF_HOST 
| RTF_LLINFO
)) { 
 996                         struct rtentry 
*defrt 
= NULL
; 
 997                         struct sockaddr_storage def_key
; 
 999                         bzero(&def_key
, sizeof(def_key
)); 
1000                         def_key
.ss_len 
= rt_key(newrt
)->sa_len
; 
1001                         def_key
.ss_family 
= rt_key(newrt
)->sa_family
; 
1003                         defrt 
= rtalloc1_scoped_locked((struct sockaddr 
*)&def_key
, 
1004                                         0, 0, newrt
->rt_ifp
->if_index
); 
1007                                 if (equal(rt_key(newrt
), defrt
->rt_gateway
)) { 
1008                                         newrt
->rt_flags 
|= RTF_ROUTER
; 
1010                                 rtfree_locked(defrt
); 
1014                 if ((rt 
= newrt
) && (rt
->rt_flags 
& RTF_XRESOLVE
)) { 
1016                          * If the new route specifies it be 
1017                          * externally resolved, then go do that. 
1019                         msgtype 
= RTM_RESOLVE
; 
1027          * Either we hit the root or couldn't find any match, 
1028          * Which basically means "cant get there from here" 
1030         rtstat
.rts_unreach
++; 
1035                  * If required, report the failure to the supervising 
1037                  * For a delete, this is not an error. (report == 0) 
1039                 bzero((caddr_t
)&info
, sizeof(info
)); 
1040                 info
.rti_info
[RTAX_DST
] = dst
; 
1041                 rt_missmsg(msgtype
, &info
, 0, err
); 
1048 rtalloc1(struct sockaddr 
*dst
, int report
, uint32_t ignflags
) 
1050         struct rtentry 
*entry
; 
1051         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
1052         lck_mtx_lock(rnh_lock
); 
1053         entry 
= rtalloc1_locked(dst
, report
, ignflags
); 
1054         lck_mtx_unlock(rnh_lock
); 
1059 rtalloc1_scoped(struct sockaddr 
*dst
, int report
, uint32_t ignflags
, 
1060     unsigned int ifscope
) 
1062         struct rtentry 
*entry
; 
1063         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
1064         lck_mtx_lock(rnh_lock
); 
1065         entry 
= rtalloc1_scoped_locked(dst
, report
, ignflags
, ifscope
); 
1066         lck_mtx_unlock(rnh_lock
); 
1071  * Remove a reference count from an rtentry. 
1072  * If the count gets low enough, take it out of the routing table 
1075 rtfree_locked(struct rtentry 
*rt
) 
1077         rtfree_common(rt
, TRUE
); 
1081 rtfree_common(struct rtentry 
*rt
, boolean_t locked
) 
1083         struct radix_node_head 
*rnh
; 
1085         LCK_MTX_ASSERT(rnh_lock
, locked 
? 
1086             LCK_MTX_ASSERT_OWNED 
: LCK_MTX_ASSERT_NOTOWNED
); 
1089          * Atomically decrement the reference count and if it reaches 0, 
1090          * and there is a close function defined, call the close function. 
1093         if (rtunref(rt
) > 0) { 
1099          * To avoid violating lock ordering, we must drop rt_lock before 
1100          * trying to acquire the global rnh_lock.  If we are called with 
1101          * rnh_lock held, then we already have exclusive access; otherwise 
1102          * we do the lock dance. 
1106                  * Note that we check it again below after grabbing rnh_lock, 
1107                  * since it is possible that another thread doing a lookup wins 
1108                  * the race, grabs the rnh_lock first, and bumps up reference 
1109                  * count in which case the route should be left alone as it is 
1110                  * still in use.  It's also possible that another thread frees 
1111                  * the route after we drop rt_lock; to prevent the route from 
1112                  * being freed, we hold an extra reference. 
1114                 RT_ADDREF_LOCKED(rt
); 
1116                 lck_mtx_lock(rnh_lock
); 
1118                 if (rtunref(rt
) > 0) { 
1119                         /* We've lost the race, so abort */ 
1126          * We may be blocked on other lock(s) as part of freeing 
1127          * the entry below, so convert from spin to full mutex. 
1129         RT_CONVERT_LOCK(rt
); 
1131         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
1133         /* Negative refcnt must never happen */ 
1134         if (rt
->rt_refcnt 
!= 0) { 
1135                 panic("rt %p invalid refcnt %d", rt
, rt
->rt_refcnt
); 
1138         /* Idle refcnt must have been dropped during rtunref() */ 
1139         VERIFY(!(rt
->rt_flags 
& RTF_IFREF
)); 
1142          * find the tree for that address family 
1143          * Note: in the case of igmp packets, there might not be an rnh 
1145         rnh 
= rt_tables
[rt_key(rt
)->sa_family
]; 
1148          * On last reference give the "close method" a chance to cleanup 
1149          * private state.  This also permits (for IPv4 and IPv6) a chance 
1150          * to decide if the routing table entry should be purged immediately 
1151          * or at a later time.  When an immediate purge is to happen the 
1152          * close routine typically issues RTM_DELETE which clears the RTF_UP 
1153          * flag on the entry so that the code below reclaims the storage. 
1155         if (rnh 
!= NULL 
&& rnh
->rnh_close 
!= NULL
) 
1156                 rnh
->rnh_close((struct radix_node 
*)rt
, rnh
); 
1159          * If we are no longer "up" (and ref == 0) then we can free the 
1160          * resources associated with the route. 
1162         if (!(rt
->rt_flags 
& RTF_UP
)) { 
1163                 struct rtentry 
*rt_parent
; 
1164                 struct ifaddr 
*rt_ifa
; 
1166                 rt
->rt_flags 
|= RTF_DEAD
; 
1167                 if (rt
->rt_nodes
->rn_flags 
& (RNF_ACTIVE 
| RNF_ROOT
)) { 
1168                         panic("rt %p freed while in radix tree\n", rt
); 
1172                  * the rtentry must have been removed from the routing table 
1173                  * so it is represented in rttrash; remove that now. 
1175                 (void) OSDecrementAtomic(&rttrash
); 
1176                 if (rte_debug 
& RTD_DEBUG
) { 
1177                         TAILQ_REMOVE(&rttrash_head
, (struct rtentry_dbg 
*)rt
, 
1182                  * release references on items we hold them on.. 
1183                  * e.g other routes and ifaddrs. 
1185                 if ((rt_parent 
= rt
->rt_parent
) != NULL
) 
1186                         rt
->rt_parent 
= NULL
; 
1188                 if ((rt_ifa 
= rt
->rt_ifa
) != NULL
) 
1192                  * Now free any attached link-layer info. 
1194                 if (rt
->rt_llinfo 
!= NULL
) { 
1195                         if (rt
->rt_llinfo_free 
!= NULL
) 
1196                                 (*rt
->rt_llinfo_free
)(rt
->rt_llinfo
); 
1198                                 R_Free(rt
->rt_llinfo
); 
1199                         rt
->rt_llinfo 
= NULL
; 
1202                 /* Destroy eventhandler lists context */ 
1203                 eventhandler_lists_ctxt_destroy(&rt
->rt_evhdlr_ctxt
); 
1206                  * Route is no longer in the tree and refcnt is 0; 
1207                  * we have exclusive access, so destroy it. 
1210                 rte_lock_destroy(rt
); 
1212                 if (rt_parent 
!= NULL
) 
1213                         rtfree_locked(rt_parent
); 
1219                  * The key is separately alloc'd so free it (see rt_setgate()). 
1220                  * This also frees the gateway, as they are always malloc'd 
1226                  * Free any statistics that may have been allocated 
1228                 nstat_route_detach(rt
); 
1231                  * and the rtentry itself of course 
1236                  * The "close method" has been called, but the route is 
1237                  * still in the radix tree with zero refcnt, i.e. "up" 
1238                  * and in the cached state. 
1244                 lck_mtx_unlock(rnh_lock
); 
1248 rtfree(struct rtentry 
*rt
) 
1250         rtfree_common(rt
, FALSE
); 
1254  * Decrements the refcount but does not free the route when 
1255  * the refcount reaches zero. Unless you have really good reason, 
1256  * use rtfree not rtunref. 
1259 rtunref(struct rtentry 
*p
) 
1261         RT_LOCK_ASSERT_HELD(p
); 
1263         if (p
->rt_refcnt 
== 0) { 
1264                 panic("%s(%p) bad refcnt\n", __func__
, p
); 
1266         } else if (--p
->rt_refcnt 
== 0) { 
1268                  * Release any idle reference count held on the interface; 
1269                  * if the route is eligible, still UP and the refcnt becomes 
1270                  * non-zero at some point in future before it is purged from 
1271                  * the routing table, rt_set_idleref() will undo this. 
1273                 rt_clear_idleref(p
); 
1276         if (rte_debug 
& RTD_DEBUG
) 
1277                 rtunref_audit((struct rtentry_dbg 
*)p
); 
1279         /* Return new value */ 
1280         return (p
->rt_refcnt
); 
1284 rtunref_audit(struct rtentry_dbg 
*rte
) 
1288         if (rte
->rtd_inuse 
!= RTD_INUSE
) { 
1289                 panic("rtunref: on freed rte=%p\n", rte
); 
1292         idx 
= atomic_add_16_ov(&rte
->rtd_refrele_cnt
, 1) % CTRACE_HIST_SIZE
; 
1293         if (rte_debug 
& RTD_TRACE
) 
1294                 ctrace_record(&rte
->rtd_refrele
[idx
]); 
1298  * Add a reference count from an rtentry. 
1301 rtref(struct rtentry 
*p
) 
1303         RT_LOCK_ASSERT_HELD(p
); 
1305         VERIFY((p
->rt_flags 
& RTF_DEAD
) == 0); 
1306         if (++p
->rt_refcnt 
== 0) { 
1307                 panic("%s(%p) bad refcnt\n", __func__
, p
); 
1309         } else if (p
->rt_refcnt 
== 1) { 
1311                  * Hold an idle reference count on the interface, 
1312                  * if the route is eligible for it. 
1317         if (rte_debug 
& RTD_DEBUG
) 
1318                 rtref_audit((struct rtentry_dbg 
*)p
); 
1322 rtref_audit(struct rtentry_dbg 
*rte
) 
1326         if (rte
->rtd_inuse 
!= RTD_INUSE
) { 
1327                 panic("rtref_audit: on freed rte=%p\n", rte
); 
1330         idx 
= atomic_add_16_ov(&rte
->rtd_refhold_cnt
, 1) % CTRACE_HIST_SIZE
; 
1331         if (rte_debug 
& RTD_TRACE
) 
1332                 ctrace_record(&rte
->rtd_refhold
[idx
]); 
1336 rtsetifa(struct rtentry 
*rt
, struct ifaddr 
*ifa
) 
1338         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
1340         RT_LOCK_ASSERT_HELD(rt
); 
1342         if (rt
->rt_ifa 
== ifa
) 
1345         /* Become a regular mutex, just in case */ 
1346         RT_CONVERT_LOCK(rt
); 
1348         /* Release the old ifa */ 
1350                 IFA_REMREF(rt
->rt_ifa
); 
1355         /* Take a reference to the ifa */ 
1357                 IFA_ADDREF(rt
->rt_ifa
); 
1361  * Force a routing table entry to the specified 
1362  * destination to go through the given gateway. 
1363  * Normally called as a result of a routing redirect 
1364  * message from the network layer. 
1367 rtredirect(struct ifnet 
*ifp
, struct sockaddr 
*dst
, struct sockaddr 
*gateway
, 
1368     struct sockaddr 
*netmask
, int flags
, struct sockaddr 
*src
, 
1369     struct rtentry 
**rtp
) 
1371         struct rtentry 
*rt 
= NULL
; 
1374         struct rt_addrinfo info
; 
1375         struct ifaddr 
*ifa 
= NULL
; 
1376         unsigned int ifscope 
= (ifp 
!= NULL
) ? ifp
->if_index 
: IFSCOPE_NONE
; 
1377         struct sockaddr_storage ss
; 
1378         int af 
= src
->sa_family
; 
1380         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
1381         lck_mtx_lock(rnh_lock
); 
1384          * Transform src into the internal routing table form for 
1385          * comparison against rt_gateway below. 
1388         if ((af 
== AF_INET
) || (af 
== AF_INET6
)) { 
1390         if (af 
== AF_INET
) { 
1392                 src 
= sa_copy(src
, &ss
, &ifscope
); 
1396          * Verify the gateway is directly reachable; if scoped routing 
1397          * is enabled, verify that it is reachable from the interface 
1398          * where the ICMP redirect arrived on. 
1400         if ((ifa 
= ifa_ifwithnet_scoped(gateway
, ifscope
)) == NULL
) { 
1401                 error 
= ENETUNREACH
; 
1405         /* Lookup route to the destination (from the original IP header) */ 
1406         rt 
= rtalloc1_scoped_locked(dst
, 0, RTF_CLONING
|RTF_PRCLONING
, ifscope
); 
1411          * If the redirect isn't from our current router for this dst, 
1412          * it's either old or wrong.  If it redirects us to ourselves, 
1413          * we have a routing loop, perhaps as a result of an interface 
1414          * going down recently.  Holding rnh_lock here prevents the 
1415          * possibility of rt_ifa/ifa's ifa_addr from changing (e.g. 
1416          * in_ifinit), so okay to access ifa_addr without locking. 
1418         if (!(flags 
& RTF_DONE
) && rt 
!= NULL 
&& 
1419             (!equal(src
, rt
->rt_gateway
) || !equal(rt
->rt_ifa
->ifa_addr
, 
1424                 if ((ifa 
= ifa_ifwithaddr(gateway
))) { 
1427                         error 
= EHOSTUNREACH
; 
1443          * Create a new entry if we just got back a wildcard entry 
1444          * or the the lookup failed.  This is necessary for hosts 
1445          * which use routing redirects generated by smart gateways 
1446          * to dynamically build the routing tables. 
1448         if ((rt 
== NULL
) || (rt_mask(rt
) != NULL 
&& rt_mask(rt
)->sa_len 
< 2)) 
1451          * Don't listen to the redirect if it's 
1452          * for a route to an interface. 
1454         RT_LOCK_ASSERT_HELD(rt
); 
1455         if (rt
->rt_flags 
& RTF_GATEWAY
) { 
1456                 if (((rt
->rt_flags 
& RTF_HOST
) == 0) && (flags 
& RTF_HOST
)) { 
1458                          * Changing from route to net => route to host. 
1459                          * Create new route, rather than smashing route 
1460                          * to net; similar to cloned routes, the newly 
1461                          * created host route is scoped as well. 
1466                         flags 
|=  RTF_GATEWAY 
| RTF_DYNAMIC
; 
1467                         error 
= rtrequest_scoped_locked(RTM_ADD
, dst
, 
1468                             gateway
, netmask
, flags
, NULL
, ifscope
); 
1469                         stat 
= &rtstat
.rts_dynamic
; 
1472                          * Smash the current notion of the gateway to 
1473                          * this destination.  Should check about netmask!!! 
1475                         rt
->rt_flags 
|= RTF_MODIFIED
; 
1476                         flags 
|= RTF_MODIFIED
; 
1477                         stat 
= &rtstat
.rts_newgateway
; 
1479                          * add the key and gateway (in one malloc'd chunk). 
1481                         error 
= rt_setgate(rt
, rt_key(rt
), gateway
); 
1486                 error 
= EHOSTUNREACH
; 
1490                 RT_LOCK_ASSERT_NOTHELD(rt
); 
1492                         /* Enqueue event to refresh flow route entries */ 
1493                         route_event_enqueue_nwk_wq_entry(rt
, NULL
, ROUTE_ENTRY_REFRESH
, NULL
, FALSE
); 
1504                 rtstat
.rts_badredirect
++; 
1510                         routegenid_inet_update(); 
1512                 else if (af 
== AF_INET6
) 
1513                         routegenid_inet6_update(); 
1516         lck_mtx_unlock(rnh_lock
); 
1517         bzero((caddr_t
)&info
, sizeof(info
)); 
1518         info
.rti_info
[RTAX_DST
] = dst
; 
1519         info
.rti_info
[RTAX_GATEWAY
] = gateway
; 
1520         info
.rti_info
[RTAX_NETMASK
] = netmask
; 
1521         info
.rti_info
[RTAX_AUTHOR
] = src
; 
1522         rt_missmsg(RTM_REDIRECT
, &info
, flags
, error
); 
1526 * Routing table ioctl interface. 
1529 rtioctl(unsigned long req
, caddr_t data
, struct proc 
*p
) 
1531 #pragma unused(p, req, data) 
1538         const struct sockaddr   
*dst
, 
1539         const struct sockaddr 
*gateway
) 
1543         lck_mtx_lock(rnh_lock
); 
1544         ifa 
= ifa_ifwithroute_locked(flags
, dst
, gateway
); 
1545         lck_mtx_unlock(rnh_lock
); 
1551 ifa_ifwithroute_locked(int flags
, const struct sockaddr 
*dst
, 
1552     const struct sockaddr 
*gateway
) 
1554         return (ifa_ifwithroute_common_locked((flags 
& ~RTF_IFSCOPE
), dst
, 
1555             gateway
, IFSCOPE_NONE
)); 
1559 ifa_ifwithroute_scoped_locked(int flags
, const struct sockaddr 
*dst
, 
1560     const struct sockaddr 
*gateway
, unsigned int ifscope
) 
1562         if (ifscope 
!= IFSCOPE_NONE
) 
1563                 flags 
|= RTF_IFSCOPE
; 
1565                 flags 
&= ~RTF_IFSCOPE
; 
1567         return (ifa_ifwithroute_common_locked(flags
, dst
, gateway
, ifscope
)); 
1570 static struct ifaddr 
* 
1571 ifa_ifwithroute_common_locked(int flags
, const struct sockaddr 
*dst
, 
1572     const struct sockaddr 
*gw
, unsigned int ifscope
) 
1574         struct ifaddr 
*ifa 
= NULL
; 
1575         struct rtentry 
*rt 
= NULL
; 
1576         struct sockaddr_storage dst_ss
, gw_ss
; 
1578         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
1581          * Just in case the sockaddr passed in by the caller 
1582          * contains a scope ID, make sure to clear it since 
1583          * interface addresses aren't scoped. 
1587             ((dst
->sa_family 
== AF_INET
) || 
1588             (dst
->sa_family 
== AF_INET6
))) 
1590         if (dst 
!= NULL 
&& dst
->sa_family 
== AF_INET
) 
1592                 dst 
= sa_copy(SA((uintptr_t)dst
), &dst_ss
, NULL
); 
1596             ((gw
->sa_family 
== AF_INET
) || 
1597             (gw
->sa_family 
== AF_INET6
))) 
1599         if (gw 
!= NULL 
&& gw
->sa_family 
== AF_INET
) 
1601                 gw 
= sa_copy(SA((uintptr_t)gw
), &gw_ss
, NULL
); 
1603         if (!(flags 
& RTF_GATEWAY
)) { 
1605                  * If we are adding a route to an interface, 
1606                  * and the interface is a pt to pt link 
1607                  * we should search for the destination 
1608                  * as our clue to the interface.  Otherwise 
1609                  * we can use the local address. 
1611                 if (flags 
& RTF_HOST
) { 
1612                         ifa 
= ifa_ifwithdstaddr(dst
); 
1615                         ifa 
= ifa_ifwithaddr_scoped(gw
, ifscope
); 
1618                  * If we are adding a route to a remote net 
1619                  * or host, the gateway may still be on the 
1620                  * other end of a pt to pt link. 
1622                 ifa 
= ifa_ifwithdstaddr(gw
); 
1625                 ifa 
= ifa_ifwithnet_scoped(gw
, ifscope
); 
1627                 /* Workaround to avoid gcc warning regarding const variable */ 
1628                 rt 
= rtalloc1_scoped_locked((struct sockaddr 
*)(size_t)dst
, 
1634                                 /* Become a regular mutex */ 
1635                                 RT_CONVERT_LOCK(rt
); 
1638                         RT_REMREF_LOCKED(rt
); 
1644          * Holding rnh_lock here prevents the possibility of ifa from 
1645          * changing (e.g. in_ifinit), so it is safe to access its 
1646          * ifa_addr (here and down below) without locking. 
1648         if (ifa 
!= NULL 
&& ifa
->ifa_addr
->sa_family 
!= dst
->sa_family
) { 
1649                 struct ifaddr 
*newifa
; 
1650                 /* Callee adds reference to newifa upon success */ 
1651                 newifa 
= ifaof_ifpforaddr(dst
, ifa
->ifa_ifp
); 
1652                 if (newifa 
!= NULL
) { 
1658          * If we are adding a gateway, it is quite possible that the 
1659          * routing table has a static entry in place for the gateway, 
1660          * that may not agree with info garnered from the interfaces. 
1661          * The routing table should carry more precedence than the 
1662          * interfaces in this matter.  Must be careful not to stomp 
1663          * on new entries from rtinit, hence (ifa->ifa_addr != gw). 
1666             !equal(ifa
->ifa_addr
, (struct sockaddr 
*)(size_t)gw
)) && 
1667             (rt 
= rtalloc1_scoped_locked((struct sockaddr 
*)(size_t)gw
, 
1668             0, 0, ifscope
)) != NULL
) { 
1674                         /* Become a regular mutex */ 
1675                         RT_CONVERT_LOCK(rt
); 
1678                 RT_REMREF_LOCKED(rt
); 
1682          * If an interface scope was specified, the interface index of 
1683          * the found ifaddr must be equivalent to that of the scope; 
1684          * otherwise there is no match. 
1686         if ((flags 
& RTF_IFSCOPE
) && 
1687             ifa 
!= NULL 
&& ifa
->ifa_ifp
->if_index 
!= ifscope
) { 
1693          * ifa's address family must match destination's address family 
1694          * after all is said and done. 
1697             ifa
->ifa_addr
->sa_family 
!= dst
->sa_family
) { 
1705 static int rt_fixdelete(struct radix_node 
*, void *); 
1706 static int rt_fixchange(struct radix_node 
*, void *); 
1709         struct rtentry 
*rt0
; 
1710         struct radix_node_head 
*rnh
; 
1714 rtrequest_locked(int req
, struct sockaddr 
*dst
, struct sockaddr 
*gateway
, 
1715     struct sockaddr 
*netmask
, int flags
, struct rtentry 
**ret_nrt
) 
1717         return (rtrequest_common_locked(req
, dst
, gateway
, netmask
, 
1718             (flags 
& ~RTF_IFSCOPE
), ret_nrt
, IFSCOPE_NONE
)); 
1722 rtrequest_scoped_locked(int req
, struct sockaddr 
*dst
, 
1723     struct sockaddr 
*gateway
, struct sockaddr 
*netmask
, int flags
, 
1724     struct rtentry 
**ret_nrt
, unsigned int ifscope
) 
1726         if (ifscope 
!= IFSCOPE_NONE
) 
1727                 flags 
|= RTF_IFSCOPE
; 
1729                 flags 
&= ~RTF_IFSCOPE
; 
1731         return (rtrequest_common_locked(req
, dst
, gateway
, netmask
, 
1732             flags
, ret_nrt
, ifscope
)); 
1736  * Do appropriate manipulations of a routing tree given all the bits of 
1739  * Storing the scope ID in the radix key is an internal job that should be 
1740  * left to routines in this module.  Callers should specify the scope value 
1741  * to the "scoped" variants of route routines instead of manipulating the 
1742  * key itself.  This is typically done when creating a scoped route, e.g. 
1743  * rtrequest(RTM_ADD).  Once such a route is created and marked with the 
1744  * RTF_IFSCOPE flag, callers can simply use its rt_key(rt) to clone it 
1745  * (RTM_RESOLVE) or to remove it (RTM_DELETE).  An exception to this is 
1746  * during certain routing socket operations where the search key might be 
1747  * derived from the routing message itself, in which case the caller must 
1748  * specify the destination address and scope value for RTM_ADD/RTM_DELETE. 
1751 rtrequest_common_locked(int req
, struct sockaddr 
*dst0
, 
1752     struct sockaddr 
*gateway
, struct sockaddr 
*netmask
, int flags
, 
1753     struct rtentry 
**ret_nrt
, unsigned int ifscope
) 
1757         struct radix_node 
*rn
; 
1758         struct radix_node_head 
*rnh
; 
1759         struct ifaddr 
*ifa 
= NULL
; 
1760         struct sockaddr 
*ndst
, *dst 
= dst0
; 
1761         struct sockaddr_storage ss
, mask
; 
1762         struct timeval caltime
; 
1763         int af 
= dst
->sa_family
; 
1764         void (*ifa_rtrequest
)(int, struct rtentry 
*, struct sockaddr 
*); 
1766 #define senderr(x) { error = x; goto bad; } 
1768         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
1770          * Find the correct routing tree to use for this Address Family 
1772         if ((rnh 
= rt_tables
[af
]) == NULL
) 
1775          * If we are adding a host route then we don't want to put 
1776          * a netmask in the tree 
1778         if (flags 
& RTF_HOST
) 
1782          * If Scoped Routing is enabled, use a local copy of the destination 
1783          * address to store the scope ID into.  This logic is repeated below 
1784          * in the RTM_RESOLVE handler since the caller does not normally 
1785          * specify such a flag during a resolve, as well as for the handling 
1786          * of IPv4 link-local address; instead, it passes in the route used for 
1787          * cloning for which the scope info is derived from.  Note also that 
1788          * in the case of RTM_DELETE, the address passed in by the caller 
1789          * might already contain the scope ID info when it is the key itself, 
1790          * thus making RTF_IFSCOPE unnecessary; one instance where it is 
1791          * explicitly set is inside route_output() as part of handling a 
1792          * routing socket request. 
1795         if (req 
!= RTM_RESOLVE 
&& ((af 
== AF_INET
) || (af 
== AF_INET6
))) { 
1797         if (req 
!= RTM_RESOLVE 
&& af 
== AF_INET
) { 
1799                 /* Transform dst into the internal routing table form */ 
1800                 dst 
= sa_copy(dst
, &ss
, &ifscope
); 
1802                 /* Transform netmask into the internal routing table form */ 
1803                 if (netmask 
!= NULL
) 
1804                         netmask 
= ma_copy(af
, netmask
, &mask
, ifscope
); 
1806                 if (ifscope 
!= IFSCOPE_NONE
) 
1807                         flags 
|= RTF_IFSCOPE
; 
1808         } else if ((flags 
& RTF_IFSCOPE
) && 
1809             (af 
!= AF_INET 
&& af 
!= AF_INET6
)) { 
1813         if (ifscope 
== IFSCOPE_NONE
) 
1814                 flags 
&= ~RTF_IFSCOPE
; 
1818                 struct rtentry 
*gwrt 
= NULL
; 
1819                 boolean_t was_router 
= FALSE
; 
1820                 uint32_t old_rt_refcnt 
= 0; 
1822                  * Remove the item from the tree and return it. 
1823                  * Complain if it is not there and do no more processing. 
1825                 if ((rn 
= rnh
->rnh_deladdr(dst
, netmask
, rnh
)) == NULL
) 
1827                 if (rn
->rn_flags 
& (RNF_ACTIVE 
| RNF_ROOT
)) { 
1828                         panic("rtrequest delete"); 
1831                 rt 
= (struct rtentry 
*)rn
; 
1834                 old_rt_refcnt 
= rt
->rt_refcnt
; 
1835                 rt
->rt_flags 
&= ~RTF_UP
; 
1837                  * Release any idle reference count held on the interface 
1838                  * as this route is no longer externally visible. 
1840                 rt_clear_idleref(rt
); 
1842                  * Take an extra reference to handle the deletion of a route 
1843                  * entry whose reference count is already 0; e.g. an expiring 
1844                  * cloned route entry or an entry that was added to the table 
1845                  * with 0 reference. If the caller is interested in this route, 
1846                  * we will return it with the reference intact. Otherwise we 
1847                  * will decrement the reference via rtfree_locked() and then 
1848                  * possibly deallocate it. 
1850                 RT_ADDREF_LOCKED(rt
); 
1853                  * For consistency, in case the caller didn't set the flag. 
1855                 rt
->rt_flags 
|= RTF_CONDEMNED
; 
1858                  * Clear RTF_ROUTER if it's set. 
1860                 if (rt
->rt_flags 
& RTF_ROUTER
) { 
1862                         VERIFY(rt
->rt_flags 
& RTF_HOST
); 
1863                         rt
->rt_flags 
&= ~RTF_ROUTER
; 
1867                  * Enqueue work item to invoke callback for this route entry 
1869                  * If the old count is 0, it implies that last reference is being 
1870                  * removed and there's no one listening for this route event. 
1872                 if (old_rt_refcnt 
!= 0) 
1873                         route_event_enqueue_nwk_wq_entry(rt
, NULL
, 
1874                             ROUTE_ENTRY_DELETED
, NULL
, TRUE
); 
1877                  * Now search what's left of the subtree for any cloned 
1878                  * routes which might have been formed from this node. 
1880                 if ((rt
->rt_flags 
& (RTF_CLONING 
| RTF_PRCLONING
)) && 
1883                         rnh
->rnh_walktree_from(rnh
, dst
, rt_mask(rt
), 
1889                         struct route_event rt_ev
; 
1890                         route_event_init(&rt_ev
, rt
, NULL
, ROUTE_LLENTRY_DELETED
); 
1892                         (void) rnh
->rnh_walktree(rnh
, 
1893                             route_event_walktree
, (void *)&rt_ev
); 
1898                  * Remove any external references we may have. 
1900                 if ((gwrt 
= rt
->rt_gwroute
) != NULL
) 
1901                         rt
->rt_gwroute 
= NULL
; 
1904                  * give the protocol a chance to keep things in sync. 
1906                 if ((ifa 
= rt
->rt_ifa
) != NULL
) { 
1908                         ifa_rtrequest 
= ifa
->ifa_rtrequest
; 
1910                         if (ifa_rtrequest 
!= NULL
) 
1911                                 ifa_rtrequest(RTM_DELETE
, rt
, NULL
); 
1912                         /* keep reference on rt_ifa */ 
1917                  * one more rtentry floating around that is not 
1918                  * linked to the routing table. 
1920                 (void) OSIncrementAtomic(&rttrash
); 
1921                 if (rte_debug 
& RTD_DEBUG
) { 
1922                         TAILQ_INSERT_TAIL(&rttrash_head
, 
1923                             (struct rtentry_dbg 
*)rt
, rtd_trash_link
); 
1927                  * If this is the (non-scoped) default route, clear 
1928                  * the interface index used for the primary ifscope. 
1930                 if (rt_primary_default(rt
, rt_key(rt
))) { 
1931                         set_primary_ifscope(rt_key(rt
)->sa_family
, 
1937                  * If this is a change in a default route, update 
1938                  * necp client watchers to re-evaluate 
1940                 if (SA_DEFAULT(rt_key(rt
))) { 
1941                         if (rt
->rt_ifp 
!= NULL
) { 
1942                                 ifnet_touch_lastupdown(rt
->rt_ifp
); 
1944                         necp_update_all_clients(); 
1951                  * This might result in another rtentry being freed if 
1952                  * we held its last reference.  Do this after the rtentry 
1953                  * lock is dropped above, as it could lead to the same 
1954                  * lock being acquired if gwrt is a clone of rt. 
1957                         rtfree_locked(gwrt
); 
1960                  * If the caller wants it, then it can have it, 
1961                  * but it's up to it to free the rtentry as we won't be 
1964                 if (ret_nrt 
!= NULL
) { 
1965                         /* Return the route to caller with reference intact */ 
1968                         /* Dereference or deallocate the route */ 
1972                         routegenid_inet_update(); 
1974                 else if (af 
== AF_INET6
) 
1975                         routegenid_inet6_update(); 
1980                 if (ret_nrt 
== NULL 
|| (rt 
= *ret_nrt
) == NULL
) 
1983                  * According to the UNIX conformance tests, we need to return 
1984                  * ENETUNREACH when the parent route is RTF_REJECT. 
1985                  * However, there isn't any point in cloning RTF_REJECT 
1986                  * routes, so we immediately return an error. 
1988                 if (rt
->rt_flags 
& RTF_REJECT
) { 
1989                         if (rt
->rt_flags 
& RTF_HOST
) { 
1990                                 senderr(EHOSTUNREACH
); 
1992                                 senderr(ENETUNREACH
); 
1996                  * If cloning, we have the parent route given by the caller 
1997                  * and will use its rt_gateway, rt_rmx as part of the cloning 
1998                  * process below.  Since rnh_lock is held at this point, the 
1999                  * parent's rt_ifa and rt_gateway will not change, and its 
2000                  * relevant rt_flags will not change as well.  The only thing 
2001                  * that could change are the metrics, and thus we hold the 
2002                  * parent route's rt_lock later on during the actual copying 
2007                 flags 
= rt
->rt_flags 
& 
2008                     ~(RTF_CLONING 
| RTF_PRCLONING 
| RTF_STATIC
); 
2009                 flags 
|= RTF_WASCLONED
; 
2010                 gateway 
= rt
->rt_gateway
; 
2011                 if ((netmask 
= rt
->rt_genmask
) == NULL
) 
2015                 if (af 
!= AF_INET 
&& af 
!= AF_INET6
) 
2022                  * When scoped routing is enabled, cloned entries are 
2023                  * always scoped according to the interface portion of 
2024                  * the parent route.  The exception to this are IPv4 
2025                  * link local addresses, or those routes that are cloned 
2026                  * from a RTF_PROXY route.  For the latter, the clone 
2027                  * gets to keep the RTF_PROXY flag. 
2029                 if ((af 
== AF_INET 
&& 
2030                     IN_LINKLOCAL(ntohl(SIN(dst
)->sin_addr
.s_addr
))) || 
2031                     (rt
->rt_flags 
& RTF_PROXY
)) { 
2032                         ifscope 
= IFSCOPE_NONE
; 
2033                         flags 
&= ~RTF_IFSCOPE
; 
2035                          * These types of cloned routes aren't currently 
2036                          * eligible for idle interface reference counting. 
2038                         flags 
|= RTF_NOIFREF
; 
2040                         if (flags 
& RTF_IFSCOPE
) { 
2041                                 ifscope 
= (af 
== AF_INET
) ? 
2042                                     sin_get_ifscope(rt_key(rt
)) : 
2043                                     sin6_get_ifscope(rt_key(rt
)); 
2045                                 ifscope 
= rt
->rt_ifp
->if_index
; 
2046                                 flags 
|= RTF_IFSCOPE
; 
2048                         VERIFY(ifscope 
!= IFSCOPE_NONE
); 
2052                  * Transform dst into the internal routing table form, 
2053                  * clearing out the scope ID field if ifscope isn't set. 
2055                 dst 
= sa_copy(dst
, &ss
, (ifscope 
== IFSCOPE_NONE
) ? 
2058                 /* Transform netmask into the internal routing table form */ 
2059                 if (netmask 
!= NULL
) 
2060                         netmask 
= ma_copy(af
, netmask
, &mask
, ifscope
); 
2065                 if ((flags 
& RTF_GATEWAY
) && !gateway
) { 
2066                         panic("rtrequest: RTF_GATEWAY but no gateway"); 
2069                 if (flags 
& RTF_IFSCOPE
) { 
2070                         ifa 
= ifa_ifwithroute_scoped_locked(flags
, dst0
, 
2073                         ifa 
= ifa_ifwithroute_locked(flags
, dst0
, gateway
); 
2076                         senderr(ENETUNREACH
); 
2079                  * We land up here for both RTM_RESOLVE and RTM_ADD 
2080                  * when we decide to create a route. 
2082                 if ((rt 
= rte_alloc()) == NULL
) 
2084                 Bzero(rt
, sizeof(*rt
)); 
2086                 eventhandler_lists_ctxt_init(&rt
->rt_evhdlr_ctxt
); 
2087                 getmicrotime(&caltime
); 
2088                 rt
->base_calendartime 
= caltime
.tv_sec
; 
2089                 rt
->base_uptime 
= net_uptime(); 
2091                 rt
->rt_flags 
= RTF_UP 
| flags
; 
2094                  * Point the generation ID to the tree's. 
2098                         rt
->rt_tree_genid 
= &route_genid_inet
; 
2102                         rt
->rt_tree_genid 
= &route_genid_inet6
; 
2110                  * Add the gateway. Possibly re-malloc-ing the storage for it 
2111                  * also add the rt_gwroute if possible. 
2113                 if ((error 
= rt_setgate(rt
, dst
, gateway
)) != 0) { 
2116                         nstat_route_detach(rt
); 
2117                         rte_lock_destroy(rt
); 
2123                  * point to the (possibly newly malloc'd) dest address. 
2128                  * make sure it contains the value we want (masked if needed). 
2131                         rt_maskedcopy(dst
, ndst
, netmask
); 
2133                         Bcopy(dst
, ndst
, dst
->sa_len
); 
2136                  * Note that we now have a reference to the ifa. 
2137                  * This moved from below so that rnh->rnh_addaddr() can 
2138                  * examine the ifa and  ifa->ifa_ifp if it so desires. 
2141                 rt
->rt_ifp 
= rt
->rt_ifa
->ifa_ifp
; 
2143                 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ 
2145                 rn 
= rnh
->rnh_addaddr((caddr_t
)ndst
, (caddr_t
)netmask
, 
2148                         struct rtentry 
*rt2
; 
2150                          * Uh-oh, we already have one of these in the tree. 
2151                          * We do a special hack: if the route that's already 
2152                          * there was generated by the protocol-cloning 
2153                          * mechanism, then we just blow it away and retry 
2154                          * the insertion of the new one. 
2156                         if (flags 
& RTF_IFSCOPE
) { 
2157                                 rt2 
= rtalloc1_scoped_locked(dst0
, 0, 
2158                                     RTF_CLONING 
| RTF_PRCLONING
, ifscope
); 
2160                                 rt2 
= rtalloc1_locked(dst
, 0, 
2161                                     RTF_CLONING 
| RTF_PRCLONING
); 
2163                         if (rt2 
&& rt2
->rt_parent
) { 
2165                                  * rnh_lock is held here, so rt_key and 
2166                                  * rt_gateway of rt2 will not change. 
2168                                 (void) rtrequest_locked(RTM_DELETE
, rt_key(rt2
), 
2169                                     rt2
->rt_gateway
, rt_mask(rt2
), 
2172                                 rn 
= rnh
->rnh_addaddr((caddr_t
)ndst
, 
2173                                     (caddr_t
)netmask
, rnh
, rt
->rt_nodes
); 
2175                                 /* undo the extra ref we got */ 
2181                  * If it still failed to go into the tree, 
2182                  * then un-make it (this should be a function) 
2185                         /* Clear gateway route */ 
2186                         rt_set_gwroute(rt
, rt_key(rt
), NULL
); 
2188                                 IFA_REMREF(rt
->rt_ifa
); 
2193                         nstat_route_detach(rt
); 
2194                         rte_lock_destroy(rt
); 
2199                 rt
->rt_parent 
= NULL
; 
2202                  * If we got here from RESOLVE, then we are cloning so clone 
2203                  * the rest, and note that we are a clone (and increment the 
2204                  * parent's references).  rnh_lock is still held, which prevents 
2205                  * a lookup from returning the newly-created route.  Hence 
2206                  * holding and releasing the parent's rt_lock while still 
2207                  * holding the route's rt_lock is safe since the new route 
2208                  * is not yet externally visible. 
2210                 if (req 
== RTM_RESOLVE
) { 
2211                         RT_LOCK_SPIN(*ret_nrt
); 
2212                         VERIFY((*ret_nrt
)->rt_expire 
== 0 || 
2213                             (*ret_nrt
)->rt_rmx
.rmx_expire 
!= 0); 
2214                         VERIFY((*ret_nrt
)->rt_expire 
!= 0 || 
2215                             (*ret_nrt
)->rt_rmx
.rmx_expire 
== 0); 
2216                         rt
->rt_rmx 
= (*ret_nrt
)->rt_rmx
; 
2217                         rt_setexpire(rt
, (*ret_nrt
)->rt_expire
); 
2218                         if ((*ret_nrt
)->rt_flags 
& 
2219                             (RTF_CLONING 
| RTF_PRCLONING
)) { 
2220                                 rt
->rt_parent 
= (*ret_nrt
); 
2221                                 RT_ADDREF_LOCKED(*ret_nrt
); 
2223                         RT_UNLOCK(*ret_nrt
); 
2227                  * if this protocol has something to add to this then 
2228                  * allow it to do that as well. 
2231                 ifa_rtrequest 
= ifa
->ifa_rtrequest
; 
2233                 if (ifa_rtrequest 
!= NULL
) 
2234                         ifa_rtrequest(req
, rt
, SA(ret_nrt 
? *ret_nrt 
: NULL
)); 
2239                  * If this is the (non-scoped) default route, record 
2240                  * the interface index used for the primary ifscope. 
2242                 if (rt_primary_default(rt
, rt_key(rt
))) { 
2243                         set_primary_ifscope(rt_key(rt
)->sa_family
, 
2244                             rt
->rt_ifp
->if_index
); 
2249                  * If this is a change in a default route, update 
2250                  * necp client watchers to re-evaluate 
2252                 if (SA_DEFAULT(rt_key(rt
))) { 
2253                         if (rt
->rt_ifp 
!= NULL
) { 
2254                                 ifnet_touch_lastupdown(rt
->rt_ifp
); 
2256                         necp_update_all_clients(); 
2261                  * actually return a resultant rtentry and 
2262                  * give the caller a single reference. 
2266                         RT_ADDREF_LOCKED(rt
); 
2270                         routegenid_inet_update(); 
2272                 else if (af 
== AF_INET6
) 
2273                         routegenid_inet6_update(); 
2279                  * We repeat the same procedures from rt_setgate() here 
2280                  * because they weren't completed when we called it earlier, 
2281                  * since the node was embryonic. 
2283                 if ((rt
->rt_flags 
& RTF_GATEWAY
) && rt
->rt_gwroute 
!= NULL
) 
2284                         rt_set_gwroute(rt
, rt_key(rt
), rt
->rt_gwroute
); 
2286                 if (req 
== RTM_ADD 
&& 
2287                     !(rt
->rt_flags 
& RTF_HOST
) && rt_mask(rt
) != NULL
) { 
2288                         struct rtfc_arg arg
; 
2292                         rnh
->rnh_walktree_from(rnh
, rt_key(rt
), rt_mask(rt
), 
2293                             rt_fixchange
, &arg
); 
2298                 nstat_route_new_entry(rt
); 
2309 rtrequest(int req
, struct sockaddr 
*dst
, struct sockaddr 
*gateway
, 
2310     struct sockaddr 
*netmask
, int flags
, struct rtentry 
**ret_nrt
) 
2313         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
2314         lck_mtx_lock(rnh_lock
); 
2315         error 
= rtrequest_locked(req
, dst
, gateway
, netmask
, flags
, ret_nrt
); 
2316         lck_mtx_unlock(rnh_lock
); 
2321 rtrequest_scoped(int req
, struct sockaddr 
*dst
, struct sockaddr 
*gateway
, 
2322     struct sockaddr 
*netmask
, int flags
, struct rtentry 
**ret_nrt
, 
2323     unsigned int ifscope
) 
2326         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
2327         lck_mtx_lock(rnh_lock
); 
2328         error 
= rtrequest_scoped_locked(req
, dst
, gateway
, netmask
, flags
, 
2330         lck_mtx_unlock(rnh_lock
); 
2335  * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family'' 
2336  * (i.e., the routes related to it by the operation of cloning).  This 
2337  * routine is iterated over all potential former-child-routes by way of 
2338  * rnh->rnh_walktree_from() above, and those that actually are children of 
2339  * the late parent (passed in as VP here) are themselves deleted. 
2342 rt_fixdelete(struct radix_node 
*rn
, void *vp
) 
2344         struct rtentry 
*rt 
= (struct rtentry 
*)rn
; 
2345         struct rtentry 
*rt0 
= vp
; 
2347         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
2350         if (rt
->rt_parent 
== rt0 
&& 
2351             !(rt
->rt_flags 
& (RTF_CLONING 
| RTF_PRCLONING
))) { 
2353                  * Safe to drop rt_lock and use rt_key, since holding 
2354                  * rnh_lock here prevents another thread from calling 
2355                  * rt_setgate() on this route. 
2358                 return (rtrequest_locked(RTM_DELETE
, rt_key(rt
), NULL
, 
2359                     rt_mask(rt
), rt
->rt_flags
, NULL
)); 
2366  * This routine is called from rt_setgate() to do the analogous thing for 
2367  * adds and changes.  There is the added complication in this case of a 
2368  * middle insert; i.e., insertion of a new network route between an older 
2369  * network route and (cloned) host routes.  For this reason, a simple check 
2370  * of rt->rt_parent is insufficient; each candidate route must be tested 
2371  * against the (mask, value) of the new route (passed as before in vp) 
2372  * to see if the new route matches it. 
2374  * XXX - it may be possible to do fixdelete() for changes and reserve this 
2375  * routine just for adds.  I'm not sure why I thought it was necessary to do 
2379 rt_fixchange(struct radix_node 
*rn
, void *vp
) 
2381         struct rtentry 
*rt 
= (struct rtentry 
*)rn
; 
2382         struct rtfc_arg 
*ap 
= vp
; 
2383         struct rtentry 
*rt0 
= ap
->rt0
; 
2384         struct radix_node_head 
*rnh 
= ap
->rnh
; 
2385         u_char 
*xk1
, *xm1
, *xk2
, *xmp
; 
2388         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
2392         if (!rt
->rt_parent 
|| 
2393             (rt
->rt_flags 
& (RTF_CLONING 
| RTF_PRCLONING
))) { 
2398         if (rt
->rt_parent 
== rt0
) 
2402          * There probably is a function somewhere which does this... 
2403          * if not, there should be. 
2405         len 
= imin(rt_key(rt0
)->sa_len
, rt_key(rt
)->sa_len
); 
2407         xk1 
= (u_char 
*)rt_key(rt0
); 
2408         xm1 
= (u_char 
*)rt_mask(rt0
); 
2409         xk2 
= (u_char 
*)rt_key(rt
); 
2412          * Avoid applying a less specific route; do this only if the parent 
2413          * route (rt->rt_parent) is a network route, since otherwise its mask 
2414          * will be NULL if it is a cloning host route. 
2416         if ((xmp 
= (u_char 
*)rt_mask(rt
->rt_parent
)) != NULL
) { 
2417                 int mlen 
= rt_mask(rt
->rt_parent
)->sa_len
; 
2418                 if (mlen 
> rt_mask(rt0
)->sa_len
) { 
2423                 for (i 
= rnh
->rnh_treetop
->rn_offset
; i 
< mlen
; i
++) { 
2424                         if ((xmp
[i
] & ~(xmp
[i
] ^ xm1
[i
])) != xmp
[i
]) { 
2431         for (i 
= rnh
->rnh_treetop
->rn_offset
; i 
< len
; i
++) { 
2432                 if ((xk2
[i
] & xm1
[i
]) != xk1
[i
]) { 
2439          * OK, this node is a clone, and matches the node currently being 
2440          * changed/added under the node's mask.  So, get rid of it. 
2444          * Safe to drop rt_lock and use rt_key, since holding rnh_lock here 
2445          * prevents another thread from calling rt_setgate() on this route. 
2448         return (rtrequest_locked(RTM_DELETE
, rt_key(rt
), NULL
, 
2449             rt_mask(rt
), rt
->rt_flags
, NULL
)); 
2453  * Round up sockaddr len to multiples of 32-bytes.  This will reduce 
2454  * or even eliminate the need to re-allocate the chunk of memory used 
2455  * for rt_key and rt_gateway in the event the gateway portion changes. 
2456  * Certain code paths (e.g. IPSec) are notorious for caching the address 
2457  * of rt_gateway; this rounding-up would help ensure that the gateway 
2458  * portion never gets deallocated (though it may change contents) and 
2459  * thus greatly simplifies things. 
2461 #define SA_SIZE(x) (-(-((uintptr_t)(x)) & -(32))) 
2464  * Sets the gateway and/or gateway route portion of a route; may be 
2465  * called on an existing route to modify the gateway portion.  Both 
2466  * rt_key and rt_gateway are allocated out of the same memory chunk. 
2467  * Route entry lock must be held by caller; this routine will return 
2468  * with the lock held. 
2471 rt_setgate(struct rtentry 
*rt
, struct sockaddr 
*dst
, struct sockaddr 
*gate
) 
2473         int dlen 
= SA_SIZE(dst
->sa_len
), glen 
= SA_SIZE(gate
->sa_len
); 
2474         struct radix_node_head 
*rnh 
= NULL
; 
2475         boolean_t loop 
= FALSE
; 
2477         if (dst
->sa_family 
!= AF_INET 
&& dst
->sa_family 
!= AF_INET6
) { 
2481         rnh 
= rt_tables
[dst
->sa_family
]; 
2482         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
2483         RT_LOCK_ASSERT_HELD(rt
); 
2486          * If this is for a route that is on its way of being removed, 
2487          * or is temporarily frozen, reject the modification request. 
2489         if (rt
->rt_flags 
& RTF_CONDEMNED
) { 
2493         /* Add an extra ref for ourselves */ 
2494         RT_ADDREF_LOCKED(rt
); 
2496         if (rt
->rt_flags 
& RTF_GATEWAY
) { 
2497                 if ((dst
->sa_len 
== gate
->sa_len
) && 
2498                     (dst
->sa_family 
== AF_INET 
|| dst
->sa_family 
== AF_INET6
)) { 
2499                         struct sockaddr_storage dst_ss
, gate_ss
; 
2501                         (void) sa_copy(dst
, &dst_ss
, NULL
); 
2502                         (void) sa_copy(gate
, &gate_ss
, NULL
); 
2504                         loop 
= equal(SA(&dst_ss
), SA(&gate_ss
)); 
2506                         loop 
= (dst
->sa_len 
== gate
->sa_len 
&& 
2512          * A (cloning) network route with the destination equal to the gateway 
2513          * will create an endless loop (see notes below), so disallow it. 
2515         if (((rt
->rt_flags 
& (RTF_HOST
|RTF_GATEWAY
|RTF_LLINFO
)) == 
2516             RTF_GATEWAY
) && loop
) { 
2517                 /* Release extra ref */ 
2518                 RT_REMREF_LOCKED(rt
); 
2519                 return (EADDRNOTAVAIL
); 
2523          * A host route with the destination equal to the gateway 
2524          * will interfere with keeping LLINFO in the routing 
2525          * table, so disallow it. 
2527         if (((rt
->rt_flags 
& (RTF_HOST
|RTF_GATEWAY
|RTF_LLINFO
)) == 
2528             (RTF_HOST
|RTF_GATEWAY
)) && loop
) { 
2530                  * The route might already exist if this is an RTM_CHANGE 
2531                  * or a routing redirect, so try to delete it. 
2533                 if (rt_key(rt
) != NULL
) { 
2535                          * Safe to drop rt_lock and use rt_key, rt_gateway, 
2536                          * since holding rnh_lock here prevents another thread 
2537                          * from calling rt_setgate() on this route. 
2540                         (void) rtrequest_locked(RTM_DELETE
, rt_key(rt
), 
2541                             rt
->rt_gateway
, rt_mask(rt
), rt
->rt_flags
, NULL
); 
2544                 /* Release extra ref */ 
2545                 RT_REMREF_LOCKED(rt
); 
2546                 return (EADDRNOTAVAIL
); 
2550          * The destination is not directly reachable.  Get a route 
2551          * to the next-hop gateway and store it in rt_gwroute. 
2553         if (rt
->rt_flags 
& RTF_GATEWAY
) { 
2554                 struct rtentry 
*gwrt
; 
2555                 unsigned int ifscope
; 
2557                 if (dst
->sa_family 
== AF_INET
) 
2558                         ifscope 
= sin_get_ifscope(dst
); 
2559                 else if (dst
->sa_family 
== AF_INET6
) 
2560                         ifscope 
= sin6_get_ifscope(dst
); 
2562                         ifscope 
= IFSCOPE_NONE
; 
2566                  * Don't ignore RTF_CLONING, since we prefer that rt_gwroute 
2567                  * points to a clone rather than a cloning route; see above 
2568                  * check for cloning loop avoidance (dst == gate). 
2570                 gwrt 
= rtalloc1_scoped_locked(gate
, 1, RTF_PRCLONING
, ifscope
); 
2572                         RT_LOCK_ASSERT_NOTHELD(gwrt
); 
2576                  * Cloning loop avoidance: 
2578                  * In the presence of protocol-cloning and bad configuration, 
2579                  * it is possible to get stuck in bottomless mutual recursion 
2580                  * (rtrequest rt_setgate rtalloc1).  We avoid this by not 
2581                  * allowing protocol-cloning to operate for gateways (which 
2582                  * is probably the correct choice anyway), and avoid the 
2583                  * resulting reference loops by disallowing any route to run 
2584                  * through itself as a gateway.  This is obviously mandatory 
2585                  * when we get rt->rt_output().  It implies that a route to 
2586                  * the gateway must already be present in the system in order 
2587                  * for the gateway to be referred to by another route. 
2590                         RT_REMREF_LOCKED(gwrt
); 
2591                         /* Release extra ref */ 
2592                         RT_REMREF_LOCKED(rt
); 
2593                         return (EADDRINUSE
); /* failure */ 
2597                  * If scoped, the gateway route must use the same interface; 
2598                  * we're holding rnh_lock now, so rt_gateway and rt_ifp of gwrt 
2599                  * should not change and are freely accessible. 
2601                 if (ifscope 
!= IFSCOPE_NONE 
&& (rt
->rt_flags 
& RTF_IFSCOPE
) && 
2602                     gwrt 
!= NULL 
&& gwrt
->rt_ifp 
!= NULL 
&& 
2603                     gwrt
->rt_ifp
->if_index 
!= ifscope
) { 
2604                         rtfree_locked(gwrt
);    /* rt != gwrt, no deadlock */ 
2605                         /* Release extra ref */ 
2606                         RT_REMREF_LOCKED(rt
); 
2607                         return ((rt
->rt_flags 
& RTF_HOST
) ? 
2608                             EHOSTUNREACH 
: ENETUNREACH
); 
2611                 /* Check again since we dropped the lock above */ 
2612                 if (rt
->rt_flags 
& RTF_CONDEMNED
) { 
2614                                 rtfree_locked(gwrt
); 
2615                         /* Release extra ref */ 
2616                         RT_REMREF_LOCKED(rt
); 
2620                 /* Set gateway route; callee adds ref to gwrt if non-NULL */ 
2621                 rt_set_gwroute(rt
, dst
, gwrt
); 
2624                  * In case the (non-scoped) default route gets modified via 
2625                  * an ICMP redirect, record the interface index used for the 
2626                  * primary ifscope.  Also done in rt_setif() to take care 
2627                  * of the non-redirect cases. 
2629                 if (rt_primary_default(rt
, dst
) && rt
->rt_ifp 
!= NULL
) { 
2630                         set_primary_ifscope(dst
->sa_family
, 
2631                             rt
->rt_ifp
->if_index
); 
2636                  * If this is a change in a default route, update 
2637                  * necp client watchers to re-evaluate 
2639                 if (SA_DEFAULT(dst
)) { 
2640                         necp_update_all_clients(); 
2645                  * Tell the kernel debugger about the new default gateway 
2646                  * if the gateway route uses the primary interface, or 
2647                  * if we are in a transient state before the non-scoped 
2648                  * default gateway is installed (similar to how the system 
2649                  * was behaving in the past).  In future, it would be good 
2650                  * to do all this only when KDP is enabled. 
2652                 if ((dst
->sa_family 
== AF_INET
) && 
2653                     gwrt 
!= NULL 
&& gwrt
->rt_gateway
->sa_family 
== AF_LINK 
&& 
2654                     (gwrt
->rt_ifp
->if_index 
== get_primary_ifscope(AF_INET
) || 
2655                     get_primary_ifscope(AF_INET
) == IFSCOPE_NONE
)) { 
2656                         kdp_set_gateway_mac(SDL((void *)gwrt
->rt_gateway
)-> 
2660                 /* Release extra ref from rtalloc1() */ 
2666          * Prepare to store the gateway in rt_gateway.  Both dst and gateway 
2667          * are stored one after the other in the same malloc'd chunk.  If we 
2668          * have room, reuse the old buffer since rt_gateway already points 
2669          * to the right place.  Otherwise, malloc a new block and update 
2670          * the 'dst' address and point rt_gateway to the right place. 
2672         if (rt
->rt_gateway 
== NULL 
|| glen 
> SA_SIZE(rt
->rt_gateway
->sa_len
)) { 
2675                 /* The underlying allocation is done with M_WAITOK set */ 
2676                 R_Malloc(new, caddr_t
, dlen 
+ glen
); 
2678                         /* Clear gateway route */ 
2679                         rt_set_gwroute(rt
, dst
, NULL
); 
2680                         /* Release extra ref */ 
2681                         RT_REMREF_LOCKED(rt
); 
2686                  * Copy from 'dst' and not rt_key(rt) because we can get 
2687                  * here to initialize a newly allocated route entry, in 
2688                  * which case rt_key(rt) is NULL (and so does rt_gateway). 
2690                 bzero(new, dlen 
+ glen
); 
2691                 Bcopy(dst
, new, dst
->sa_len
); 
2692                 R_Free(rt_key(rt
));     /* free old block; NULL is okay */ 
2693                 rt
->rt_nodes
->rn_key 
= new; 
2694                 rt
->rt_gateway 
= (struct sockaddr 
*)(new + dlen
); 
2698          * Copy the new gateway value into the memory chunk. 
2700         Bcopy(gate
, rt
->rt_gateway
, gate
->sa_len
); 
2703          * For consistency between rt_gateway and rt_key(gwrt). 
2705         if ((rt
->rt_flags 
& RTF_GATEWAY
) && rt
->rt_gwroute 
!= NULL 
&& 
2706             (rt
->rt_gwroute
->rt_flags 
& RTF_IFSCOPE
)) { 
2707                 if (rt
->rt_gateway
->sa_family 
== AF_INET 
&& 
2708                     rt_key(rt
->rt_gwroute
)->sa_family 
== AF_INET
) { 
2709                         sin_set_ifscope(rt
->rt_gateway
, 
2710                             sin_get_ifscope(rt_key(rt
->rt_gwroute
))); 
2711                 } else if (rt
->rt_gateway
->sa_family 
== AF_INET6 
&& 
2712                     rt_key(rt
->rt_gwroute
)->sa_family 
== AF_INET6
) { 
2713                         sin6_set_ifscope(rt
->rt_gateway
, 
2714                             sin6_get_ifscope(rt_key(rt
->rt_gwroute
))); 
2719          * This isn't going to do anything useful for host routes, so 
2720          * don't bother.  Also make sure we have a reasonable mask 
2721          * (we don't yet have one during adds). 
2723         if (!(rt
->rt_flags 
& RTF_HOST
) && rt_mask(rt
) != 0) { 
2724                 struct rtfc_arg arg
; 
2728                 rnh
->rnh_walktree_from(rnh
, rt_key(rt
), rt_mask(rt
), 
2729                     rt_fixchange
, &arg
); 
2733         /* Release extra ref */ 
2734         RT_REMREF_LOCKED(rt
); 
2741 rt_set_gwroute(struct rtentry 
*rt
, struct sockaddr 
*dst
, struct rtentry 
*gwrt
) 
2743         boolean_t gwrt_isrouter
; 
2745         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
2746         RT_LOCK_ASSERT_HELD(rt
); 
2749                 RT_ADDREF(gwrt
);        /* for this routine */ 
2752          * Get rid of existing gateway route; if rt_gwroute is already 
2753          * set to gwrt, this is slightly redundant (though safe since 
2754          * we held an extra ref above) but makes the code simpler. 
2756         if (rt
->rt_gwroute 
!= NULL
) { 
2757                 struct rtentry 
*ogwrt 
= rt
->rt_gwroute
; 
2759                 VERIFY(rt 
!= ogwrt
);    /* sanity check */ 
2760                 rt
->rt_gwroute 
= NULL
; 
2762                 rtfree_locked(ogwrt
); 
2764                 VERIFY(rt
->rt_gwroute 
== NULL
); 
2768          * And associate the new gateway route. 
2770         if ((rt
->rt_gwroute 
= gwrt
) != NULL
) { 
2771                 RT_ADDREF(gwrt
);        /* for rt */ 
2773                 if (rt
->rt_flags 
& RTF_WASCLONED
) { 
2774                         /* rt_parent might be NULL if rt is embryonic */ 
2775                         gwrt_isrouter 
= (rt
->rt_parent 
!= NULL 
&& 
2776                             SA_DEFAULT(rt_key(rt
->rt_parent
)) && 
2777                             !RT_HOST(rt
->rt_parent
)); 
2779                         gwrt_isrouter 
= (SA_DEFAULT(dst
) && !RT_HOST(rt
)); 
2782                 /* If gwrt points to a default router, mark it accordingly */ 
2783                 if (gwrt_isrouter 
&& RT_HOST(gwrt
) && 
2784                     !(gwrt
->rt_flags 
& RTF_ROUTER
)) { 
2786                         gwrt
->rt_flags 
|= RTF_ROUTER
; 
2790                 RT_REMREF(gwrt
);        /* for this routine */ 
2795 rt_maskedcopy(const struct sockaddr 
*src
, struct sockaddr 
*dst
, 
2796     const struct sockaddr 
*netmask
) 
2798         const char *netmaskp 
= &netmask
->sa_data
[0]; 
2799         const char *srcp 
= &src
->sa_data
[0]; 
2800         char *dstp 
= &dst
->sa_data
[0]; 
2801         const char *maskend 
= (char *)dst
 
2802                                     + MIN(netmask
->sa_len
, src
->sa_len
); 
2803         const char *srcend 
= (char *)dst 
+ src
->sa_len
; 
2805         dst
->sa_len 
= src
->sa_len
; 
2806         dst
->sa_family 
= src
->sa_family
; 
2808         while (dstp 
< maskend
) 
2809                 *dstp
++ = *srcp
++ & *netmaskp
++; 
2811                 memset(dstp
, 0, (size_t)(srcend 
- dstp
)); 
2815  * Lookup an AF_INET/AF_INET6 scoped or non-scoped route depending on the 
2816  * ifscope value passed in by the caller (IFSCOPE_NONE implies non-scoped). 
2818 static struct radix_node 
* 
2819 node_lookup(struct sockaddr 
*dst
, struct sockaddr 
*netmask
, 
2820     unsigned int ifscope
) 
2822         struct radix_node_head 
*rnh
; 
2823         struct radix_node 
*rn
; 
2824         struct sockaddr_storage ss
, mask
; 
2825         int af 
= dst
->sa_family
; 
2826         struct matchleaf_arg ma 
= { ifscope 
}; 
2827         rn_matchf_t 
*f 
= rn_match_ifscope
; 
2830         if (af 
!= AF_INET 
&& af 
!= AF_INET6
) 
2833         rnh 
= rt_tables
[af
]; 
2836          * Transform dst into the internal routing table form, 
2837          * clearing out the scope ID field if ifscope isn't set. 
2839         dst 
= sa_copy(dst
, &ss
, (ifscope 
== IFSCOPE_NONE
) ? NULL 
: &ifscope
); 
2841         /* Transform netmask into the internal routing table form */ 
2842         if (netmask 
!= NULL
) 
2843                 netmask 
= ma_copy(af
, netmask
, &mask
, ifscope
); 
2845         if (ifscope 
== IFSCOPE_NONE
) 
2848         rn 
= rnh
->rnh_lookup_args(dst
, netmask
, rnh
, f
, w
); 
2849         if (rn 
!= NULL 
&& (rn
->rn_flags 
& RNF_ROOT
)) 
2856  * Lookup the AF_INET/AF_INET6 non-scoped default route. 
2858 static struct radix_node 
* 
2859 node_lookup_default(int af
) 
2861         struct radix_node_head 
*rnh
; 
2863         VERIFY(af 
== AF_INET 
|| af 
== AF_INET6
); 
2864         rnh 
= rt_tables
[af
]; 
2866         return (af 
== AF_INET 
? rnh
->rnh_lookup(&sin_def
, NULL
, rnh
) : 
2867             rnh
->rnh_lookup(&sin6_def
, NULL
, rnh
)); 
2871 rt_ifa_is_dst(struct sockaddr 
*dst
, struct ifaddr 
*ifa
) 
2873         boolean_t result 
= FALSE
; 
2875         if (ifa 
== NULL 
|| ifa
->ifa_addr 
== NULL
) 
2880         if (dst
->sa_family 
== ifa
->ifa_addr
->sa_family 
&& 
2881             ((dst
->sa_family 
== AF_INET 
&& 
2882             SIN(dst
)->sin_addr
.s_addr 
== 
2883             SIN(ifa
->ifa_addr
)->sin_addr
.s_addr
) || 
2884             (dst
->sa_family 
== AF_INET6 
&& 
2885             SA6_ARE_ADDR_EQUAL(SIN6(dst
), SIN6(ifa
->ifa_addr
))))) 
2894  * Common routine to lookup/match a route.  It invokes the lookup/matchaddr 
2895  * callback which could be address family-specific.  The main difference 
2896  * between the two (at least for AF_INET/AF_INET6) is that a lookup does 
2897  * not alter the expiring state of a route, whereas a match would unexpire 
2898  * or revalidate the route. 
2900  * The optional scope or interface index property of a route allows for a 
2901  * per-interface route instance.  This permits multiple route entries having 
2902  * the same destination (but not necessarily the same gateway) to exist in 
2903  * the routing table; each of these entries is specific to the corresponding 
2904  * interface.  This is made possible by storing the scope ID value into the 
2905  * radix key, thus making each route entry unique.  These scoped entries 
2906  * exist along with the regular, non-scoped entries in the same radix tree 
2907  * for a given address family (AF_INET/AF_INET6); the scope logically 
2908  * partitions it into multiple per-interface sub-trees. 
2910  * When a scoped route lookup is performed, the routing table is searched for 
2911  * the best match that would result in a route using the same interface as the 
2912  * one associated with the scope (the exception to this are routes that point 
2913  * to the loopback interface).  The search rule follows the longest matching 
2914  * prefix with the additional interface constraint. 
2916 static struct rtentry 
* 
2917 rt_lookup_common(boolean_t lookup_only
, boolean_t coarse
, struct sockaddr 
*dst
, 
2918     struct sockaddr 
*netmask
, struct radix_node_head 
*rnh
, unsigned int ifscope
) 
2920         struct radix_node 
*rn0
, *rn 
= NULL
; 
2921         int af 
= dst
->sa_family
; 
2922         struct sockaddr_storage dst_ss
; 
2923         struct sockaddr_storage mask_ss
; 
2925 #if (DEVELOPMENT || DEBUG) 
2926         char dbuf
[MAX_SCOPE_ADDR_STR_LEN
], gbuf
[MAX_IPv6_STR_LEN
]; 
2927         char s_dst
[MAX_IPv6_STR_LEN
], s_netmask
[MAX_IPv6_STR_LEN
]; 
2929         VERIFY(!coarse 
|| ifscope 
== IFSCOPE_NONE
); 
2931         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
2934          * While we have rnh_lock held, see if we need to schedule the timer. 
2936         if (nd6_sched_timeout_want
) 
2937                 nd6_sched_timeout(NULL
, NULL
); 
2944          * Non-scoped route lookup. 
2947         if (af 
!= AF_INET 
&& af 
!= AF_INET6
) { 
2949         if (af 
!= AF_INET
) { 
2951                 rn 
= rnh
->rnh_matchaddr(dst
, rnh
); 
2954                  * Don't return a root node; also, rnh_matchaddr callback 
2955                  * would have done the necessary work to clear RTPRF_OURS 
2956                  * for certain protocol families. 
2958                 if (rn 
!= NULL 
&& (rn
->rn_flags 
& RNF_ROOT
)) 
2961                         RT_LOCK_SPIN(RT(rn
)); 
2962                         if (!(RT(rn
)->rt_flags 
& RTF_CONDEMNED
)) { 
2963                                 RT_ADDREF_LOCKED(RT(rn
)); 
2973         /* Transform dst/netmask into the internal routing table form */ 
2974         dst 
= sa_copy(dst
, &dst_ss
, &ifscope
); 
2975         if (netmask 
!= NULL
) 
2976                 netmask 
= ma_copy(af
, netmask
, &mask_ss
, ifscope
); 
2977         dontcare 
= (ifscope 
== IFSCOPE_NONE
); 
2979 #if (DEVELOPMENT || DEBUG) 
2982                         (void) inet_ntop(af
, &SIN(dst
)->sin_addr
.s_addr
, 
2983                             s_dst
, sizeof (s_dst
)); 
2985                         (void) inet_ntop(af
, &SIN6(dst
)->sin6_addr
, 
2986                             s_dst
, sizeof (s_dst
)); 
2988                 if (netmask 
!= NULL 
&& af 
== AF_INET
) 
2989                         (void) inet_ntop(af
, &SIN(netmask
)->sin_addr
.s_addr
, 
2990                             s_netmask
, sizeof (s_netmask
)); 
2991                 if (netmask 
!= NULL 
&& af 
== AF_INET6
) 
2992                         (void) inet_ntop(af
, &SIN6(netmask
)->sin6_addr
, 
2993                             s_netmask
, sizeof (s_netmask
)); 
2996                 printf("%s (%d, %d, %s, %s, %u)\n", 
2997                     __func__
, lookup_only
, coarse
, s_dst
, s_netmask
, ifscope
); 
3002          * Scoped route lookup: 
3004          * We first perform a non-scoped lookup for the original result. 
3005          * Afterwards, depending on whether or not the caller has specified 
3006          * a scope, we perform a more specific scoped search and fallback 
3007          * to this original result upon failure. 
3009         rn0 
= rn 
= node_lookup(dst
, netmask
, IFSCOPE_NONE
); 
3012          * If the caller did not specify a scope, use the primary scope 
3013          * derived from the system's non-scoped default route.  If, for 
3014          * any reason, there is no primary interface, ifscope will be 
3015          * set to IFSCOPE_NONE; if the above lookup resulted in a route, 
3016          * we'll do a more-specific search below, scoped to the interface 
3020                 ifscope 
= get_primary_ifscope(af
); 
3023          * Keep the original result if either of the following is true: 
3025          *   1) The interface portion of the route has the same interface 
3026          *      index as the scope value and it is marked with RTF_IFSCOPE. 
3027          *   2) The route uses the loopback interface, in which case the 
3028          *      destination (host/net) is local/loopback. 
3030          * Otherwise, do a more specified search using the scope; 
3031          * we're holding rnh_lock now, so rt_ifp should not change. 
3034                 struct rtentry 
*rt 
= RT(rn
); 
3035 #if (DEVELOPMENT || DEBUG) 
3037                         rt_str(rt
, dbuf
, sizeof (dbuf
), gbuf
, sizeof (gbuf
)); 
3038                         printf("%s unscoped search %p to %s->%s->%s ifa_ifp %s\n", 
3041                             (rt
->rt_ifp 
!= NULL
) ? rt
->rt_ifp
->if_xname 
: "", 
3042                             (rt
->rt_ifa
->ifa_ifp 
!= NULL
) ? 
3043                             rt
->rt_ifa
->ifa_ifp
->if_xname 
: ""); 
3046                 if (!(rt
->rt_ifp
->if_flags 
& IFF_LOOPBACK
) || 
3047                     (rt
->rt_flags 
& RTF_GATEWAY
)) { 
3048                         if (rt
->rt_ifp
->if_index 
!= ifscope
) { 
3050                                  * Wrong interface; keep the original result 
3051                                  * only if the caller did not specify a scope, 
3052                                  * and do a more specific scoped search using 
3053                                  * the scope of the found route.  Otherwise, 
3054                                  * start again from scratch. 
3056                                  * For loopback scope we keep the unscoped 
3057                                  * route for local addresses 
3061                                         ifscope 
= rt
->rt_ifp
->if_index
; 
3062                                 else if (ifscope 
!= lo_ifp
->if_index 
|| 
3063                                     rt_ifa_is_dst(dst
, rt
->rt_ifa
) == FALSE
) 
3065                         } else if (!(rt
->rt_flags 
& RTF_IFSCOPE
)) { 
3067                                  * Right interface, except that this route 
3068                                  * isn't marked with RTF_IFSCOPE.  Do a more 
3069                                  * specific scoped search.  Keep the original 
3070                                  * result and return it it in case the scoped 
3079          * Scoped search.  Find the most specific entry having the same 
3080          * interface scope as the one requested.  The following will result 
3081          * in searching for the longest prefix scoped match. 
3084                 rn 
= node_lookup(dst
, netmask
, ifscope
); 
3085 #if (DEVELOPMENT || DEBUG) 
3086                 if (rt_verbose 
&& rn 
!= NULL
) { 
3087                         struct rtentry 
*rt 
= RT(rn
); 
3089                         rt_str(rt
, dbuf
, sizeof (dbuf
), gbuf
, sizeof (gbuf
)); 
3090                         printf("%s scoped search %p to %s->%s->%s ifa %s\n", 
3093                             (rt
->rt_ifp 
!= NULL
) ? rt
->rt_ifp
->if_xname 
: "", 
3094                             (rt
->rt_ifa
->ifa_ifp 
!= NULL
) ? 
3095                             rt
->rt_ifa
->ifa_ifp
->if_xname 
: ""); 
3100          * Use the original result if either of the following is true: 
3102          *   1) The scoped search did not yield any result. 
3103          *   2) The caller insists on performing a coarse-grained lookup. 
3104          *   3) The result from the scoped search is a scoped default route, 
3105          *      and the original (non-scoped) result is not a default route, 
3106          *      i.e. the original result is a more specific host/net route. 
3107          *   4) The scoped search yielded a net route but the original 
3108          *      result is a host route, i.e. the original result is treated 
3109          *      as a more specific route. 
3111         if (rn 
== NULL 
|| coarse 
|| (rn0 
!= NULL 
&& 
3112             ((SA_DEFAULT(rt_key(RT(rn
))) && !SA_DEFAULT(rt_key(RT(rn0
)))) || 
3113             (!RT_HOST(rn
) && RT_HOST(rn0
))))) 
3117          * If we still don't have a route, use the non-scoped default 
3118          * route as long as the interface portion satistifes the scope. 
3120         if (rn 
== NULL 
&& (rn 
= node_lookup_default(af
)) != NULL 
&& 
3121             RT(rn
)->rt_ifp
->if_index 
!= ifscope
) { 
3127                  * Manually clear RTPRF_OURS using rt_validate() and 
3128                  * bump up the reference count after, and not before; 
3129                  * we only get here for AF_INET/AF_INET6.  node_lookup() 
3130                  * has done the check against RNF_ROOT, so we can be sure 
3131                  * that we're not returning a root node here. 
3133                 RT_LOCK_SPIN(RT(rn
)); 
3134                 if (rt_validate(RT(rn
))) { 
3135                         RT_ADDREF_LOCKED(RT(rn
)); 
3142 #if (DEVELOPMENT || DEBUG) 
3145                         printf("%s %u return NULL\n", __func__
, ifscope
); 
3147                         struct rtentry 
*rt 
= RT(rn
); 
3149                         rt_str(rt
, dbuf
, sizeof (dbuf
), gbuf
, sizeof (gbuf
)); 
3151                         printf("%s %u return %p to %s->%s->%s ifa_ifp %s\n", 
3152                             __func__
, ifscope
, rt
, 
3154                             (rt
->rt_ifp 
!= NULL
) ? rt
->rt_ifp
->if_xname 
: "", 
3155                             (rt
->rt_ifa
->ifa_ifp 
!= NULL
) ? 
3156                             rt
->rt_ifa
->ifa_ifp
->if_xname 
: ""); 
3164 rt_lookup(boolean_t lookup_only
, struct sockaddr 
*dst
, struct sockaddr 
*netmask
, 
3165     struct radix_node_head 
*rnh
, unsigned int ifscope
) 
3167         return (rt_lookup_common(lookup_only
, FALSE
, dst
, netmask
, 
3172 rt_lookup_coarse(boolean_t lookup_only
, struct sockaddr 
*dst
, 
3173     struct sockaddr 
*netmask
, struct radix_node_head 
*rnh
) 
3175         return (rt_lookup_common(lookup_only
, TRUE
, dst
, netmask
, 
3176             rnh
, IFSCOPE_NONE
)); 
3180 rt_validate(struct rtentry 
*rt
) 
3182         RT_LOCK_ASSERT_HELD(rt
); 
3184         if ((rt
->rt_flags 
& (RTF_UP 
| RTF_CONDEMNED
)) == RTF_UP
) { 
3185                 int af 
= rt_key(rt
)->sa_family
; 
3188                         (void) in_validate(RN(rt
)); 
3189                 else if (af 
== AF_INET6
) 
3190                         (void) in6_validate(RN(rt
)); 
3195         return (rt 
!= NULL
); 
3199  * Set up a routing table entry, normally 
3203 rtinit(struct ifaddr 
*ifa
, int cmd
, int flags
) 
3207         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
3209         lck_mtx_lock(rnh_lock
); 
3210         error 
= rtinit_locked(ifa
, cmd
, flags
); 
3211         lck_mtx_unlock(rnh_lock
); 
3217 rtinit_locked(struct ifaddr 
*ifa
, int cmd
, int flags
) 
3219         struct radix_node_head 
*rnh
; 
3220         uint8_t nbuf
[128];      /* long enough for IPv6 */ 
3221 #if (DEVELOPMENT || DEBUG) 
3222         char dbuf
[MAX_IPv6_STR_LEN
], gbuf
[MAX_IPv6_STR_LEN
]; 
3223         char abuf
[MAX_IPv6_STR_LEN
]; 
3225         struct rtentry 
*rt 
= NULL
; 
3226         struct sockaddr 
*dst
; 
3227         struct sockaddr 
*netmask
; 
3231          * Holding rnh_lock here prevents the possibility of ifa from 
3232          * changing (e.g. in_ifinit), so it is safe to access its 
3233          * ifa_{dst}addr (here and down below) without locking. 
3235         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
3237         if (flags 
& RTF_HOST
) { 
3238                 dst 
= ifa
->ifa_dstaddr
; 
3241                 dst 
= ifa
->ifa_addr
; 
3242                 netmask 
= ifa
->ifa_netmask
; 
3245         if (dst
->sa_len 
== 0) { 
3246                 log(LOG_ERR
, "%s: %s failed, invalid dst sa_len %d\n", 
3247                     __func__
, rtm2str(cmd
), dst
->sa_len
); 
3251         if (netmask 
!= NULL 
&& netmask
->sa_len 
> sizeof (nbuf
)) { 
3252                 log(LOG_ERR
, "%s: %s failed, mask sa_len %d too large\n", 
3253                     __func__
, rtm2str(cmd
), dst
->sa_len
); 
3258 #if (DEVELOPMENT || DEBUG) 
3259         if (dst
->sa_family 
== AF_INET
) { 
3260                 (void) inet_ntop(AF_INET
, &SIN(dst
)->sin_addr
.s_addr
, 
3261                     abuf
, sizeof (abuf
)); 
3264         else if (dst
->sa_family 
== AF_INET6
) { 
3265                 (void) inet_ntop(AF_INET6
, &SIN6(dst
)->sin6_addr
, 
3266                     abuf
, sizeof (abuf
)); 
3269 #endif /* (DEVELOPMENT || DEBUG) */      
3271         if ((rnh 
= rt_tables
[dst
->sa_family
]) == NULL
) { 
3277          * If it's a delete, check that if it exists, it's on the correct 
3278          * interface or we might scrub a route to another ifa which would 
3279          * be confusing at best and possibly worse. 
3281         if (cmd 
== RTM_DELETE
) { 
3283                  * It's a delete, so it should already exist.. 
3284                  * If it's a net, mask off the host bits 
3285                  * (Assuming we have a mask) 
3287                 if (netmask 
!= NULL
) { 
3288                         rt_maskedcopy(dst
, SA(nbuf
), netmask
); 
3292                  * Get an rtentry that is in the routing tree and contains 
3293                  * the correct info.  Note that we perform a coarse-grained 
3294                  * lookup here, in case there is a scoped variant of the 
3295                  * subnet/prefix route which we should ignore, as we never 
3296                  * add a scoped subnet/prefix route as part of adding an 
3297                  * interface address. 
3299                 rt 
= rt_lookup_coarse(TRUE
, dst
, NULL
, rnh
); 
3301 #if (DEVELOPMENT || DEBUG) 
3302                         rt_str(rt
, dbuf
, sizeof (dbuf
), gbuf
, sizeof (gbuf
)); 
3305                          * Ok so we found the rtentry. it has an extra reference 
3306                          * for us at this stage. we won't need that so 
3310                         if (rt
->rt_ifa 
!= ifa
) { 
3312                                  * If the interface address in the rtentry 
3313                                  * doesn't match the interface we are using, 
3314                                  * then we don't want to delete it, so return 
3315                                  * an error.  This seems to be the only point 
3316                                  * of this whole RTM_DELETE clause. 
3318 #if (DEVELOPMENT || DEBUG) 
3320                                         log(LOG_DEBUG
, "%s: not removing " 
3321                                             "route to %s->%s->%s, flags %b, " 
3322                                             "ifaddr %s, rt_ifa 0x%llx != " 
3323                                             "ifa 0x%llx\n", __func__
, dbuf
, 
3324                                             gbuf
, ((rt
->rt_ifp 
!= NULL
) ? 
3325                                             rt
->rt_ifp
->if_xname 
: ""), 
3326                                             rt
->rt_flags
, RTF_BITS
, abuf
, 
3327                                             (uint64_t)VM_KERNEL_ADDRPERM( 
3329                                             (uint64_t)VM_KERNEL_ADDRPERM(ifa
)); 
3331 #endif /* (DEVELOPMENT || DEBUG) */ 
3332                                 RT_REMREF_LOCKED(rt
); 
3335                                 error 
= ((flags 
& RTF_HOST
) ? 
3336                                     EHOSTUNREACH 
: ENETUNREACH
); 
3338                         } else if (rt
->rt_flags 
& RTF_STATIC
) { 
3340                                  * Don't remove the subnet/prefix route if 
3341                                  * this was manually added from above. 
3343 #if (DEVELOPMENT || DEBUG) 
3345                                         log(LOG_DEBUG
, "%s: not removing " 
3346                                             "static route to %s->%s->%s, " 
3347                                             "flags %b, ifaddr %s\n", __func__
, 
3348                                             dbuf
, gbuf
, ((rt
->rt_ifp 
!= NULL
) ? 
3349                                             rt
->rt_ifp
->if_xname 
: ""), 
3350                                             rt
->rt_flags
, RTF_BITS
, abuf
); 
3352 #endif /* (DEVELOPMENT || DEBUG) */ 
3353                                 RT_REMREF_LOCKED(rt
); 
3359 #if (DEVELOPMENT || DEBUG) 
3361                                 log(LOG_DEBUG
, "%s: removing route to " 
3362                                     "%s->%s->%s, flags %b, ifaddr %s\n", 
3363                                     __func__
, dbuf
, gbuf
, 
3364                                     ((rt
->rt_ifp 
!= NULL
) ? 
3365                                     rt
->rt_ifp
->if_xname 
: ""), 
3366                                     rt
->rt_flags
, RTF_BITS
, abuf
); 
3368 #endif /* (DEVELOPMENT || DEBUG) */ 
3369                         RT_REMREF_LOCKED(rt
); 
3375          * Do the actual request 
3377         if ((error 
= rtrequest_locked(cmd
, dst
, ifa
->ifa_addr
, netmask
, 
3378             flags 
| ifa
->ifa_flags
, &rt
)) != 0) 
3382 #if (DEVELOPMENT || DEBUG) 
3383         rt_str(rt
, dbuf
, sizeof (dbuf
), gbuf
, sizeof (gbuf
)); 
3384 #endif /* (DEVELOPMENT || DEBUG) */ 
3388                  * If we are deleting, and we found an entry, then it's 
3389                  * been removed from the tree.   Notify any listening 
3390                  * routing agents of the change and throw it away. 
3393                 rt_newaddrmsg(cmd
, ifa
, error
, rt
); 
3395 #if (DEVELOPMENT || DEBUG) 
3397                         log(LOG_DEBUG
, "%s: removed route to %s->%s->%s, " 
3398                             "flags %b, ifaddr %s\n", __func__
, dbuf
, gbuf
, 
3399                             ((rt
->rt_ifp 
!= NULL
) ? rt
->rt_ifp
->if_xname 
: ""), 
3400                             rt
->rt_flags
, RTF_BITS
, abuf
); 
3402 #endif /* (DEVELOPMENT || DEBUG) */ 
3408                  * We are adding, and we have a returned routing entry. 
3409                  * We need to sanity check the result.  If it came back 
3410                  * with an unexpected interface, then it must have already 
3411                  * existed or something. 
3414                 if (rt
->rt_ifa 
!= ifa
) { 
3415                         void (*ifa_rtrequest
) 
3416                             (int, struct rtentry 
*, struct sockaddr 
*); 
3417 #if (DEVELOPMENT || DEBUG) 
3419                                 if (!(rt
->rt_ifa
->ifa_ifp
->if_flags 
& 
3420                                     (IFF_POINTOPOINT
|IFF_LOOPBACK
))) { 
3421                                         log(LOG_ERR
, "%s: %s route to %s->%s->%s, " 
3422                                             "flags %b, ifaddr %s, rt_ifa 0x%llx != " 
3423                                             "ifa 0x%llx\n", __func__
, rtm2str(cmd
), 
3424                                             dbuf
, gbuf
, ((rt
->rt_ifp 
!= NULL
) ? 
3425                                                 rt
->rt_ifp
->if_xname 
: ""), rt
->rt_flags
, 
3427                                             (uint64_t)VM_KERNEL_ADDRPERM(rt
->rt_ifa
), 
3428                                             (uint64_t)VM_KERNEL_ADDRPERM(ifa
)); 
3431                                 log(LOG_DEBUG
, "%s: %s route to %s->%s->%s, " 
3432                                     "flags %b, ifaddr %s, rt_ifa was 0x%llx " 
3433                                     "now 0x%llx\n", __func__
, rtm2str(cmd
), 
3434                                     dbuf
, gbuf
, ((rt
->rt_ifp 
!= NULL
) ? 
3435                                     rt
->rt_ifp
->if_xname 
: ""), rt
->rt_flags
, 
3437                                     (uint64_t)VM_KERNEL_ADDRPERM(rt
->rt_ifa
), 
3438                                     (uint64_t)VM_KERNEL_ADDRPERM(ifa
)); 
3440 #endif /* (DEVELOPMENT || DEBUG) */ 
3443                          * Ask that the protocol in question 
3444                          * remove anything it has associated with 
3445                          * this route and ifaddr. 
3447                         ifa_rtrequest 
= rt
->rt_ifa
->ifa_rtrequest
; 
3448                         if (ifa_rtrequest 
!= NULL
) 
3449                                 ifa_rtrequest(RTM_DELETE
, rt
, NULL
); 
3451                          * Set the route's ifa. 
3455                         if (rt
->rt_ifp 
!= ifa
->ifa_ifp
) { 
3457                                  * Purge any link-layer info caching. 
3459                                 if (rt
->rt_llinfo_purge 
!= NULL
) 
3460                                         rt
->rt_llinfo_purge(rt
); 
3462                                  * Adjust route ref count for the interfaces. 
3464                                 if (rt
->rt_if_ref_fn 
!= NULL
) { 
3465                                         rt
->rt_if_ref_fn(ifa
->ifa_ifp
, 1); 
3466                                         rt
->rt_if_ref_fn(rt
->rt_ifp
, -1); 
3471                          * And substitute in references to the ifaddr 
3474                         rt
->rt_ifp 
= ifa
->ifa_ifp
; 
3476                          * If rmx_mtu is not locked, update it 
3477                          * to the MTU used by the new interface. 
3479                         if (!(rt
->rt_rmx
.rmx_locks 
& RTV_MTU
)) { 
3480                                 rt
->rt_rmx
.rmx_mtu 
= rt
->rt_ifp
->if_mtu
; 
3481                                 if (dst
->sa_family 
== AF_INET 
&& 
3482                                     INTF_ADJUST_MTU_FOR_CLAT46(rt
->rt_ifp
)) { 
3483                                         rt
->rt_rmx
.rmx_mtu 
= IN6_LINKMTU(rt
->rt_ifp
); 
3484                                         /* Further adjust the size for CLAT46 expansion */ 
3485                                         rt
->rt_rmx
.rmx_mtu 
-= CLAT46_HDR_EXPANSION_OVERHD
; 
3490                          * Now ask the protocol to check if it needs 
3491                          * any special processing in its new form. 
3493                         ifa_rtrequest 
= ifa
->ifa_rtrequest
; 
3494                         if (ifa_rtrequest 
!= NULL
) 
3495                                 ifa_rtrequest(RTM_ADD
, rt
, NULL
); 
3497 #if (DEVELOPMENT || DEBUG) 
3499                                 log(LOG_DEBUG
, "%s: added route to %s->%s->%s, " 
3500                                     "flags %b, ifaddr %s\n", __func__
, dbuf
, 
3501                                     gbuf
, ((rt
->rt_ifp 
!= NULL
) ? 
3502                                     rt
->rt_ifp
->if_xname 
: ""), rt
->rt_flags
, 
3505 #endif /* (DEVELOPMENT || DEBUG) */ 
3508                  * notify any listenning routing agents of the change 
3510                 rt_newaddrmsg(cmd
, ifa
, error
, rt
); 
3512                  * We just wanted to add it; we don't actually need a 
3513                  * reference.  This will result in a route that's added 
3514                  * to the routing table without a reference count.  The 
3515                  * RTM_DELETE code will do the necessary step to adjust 
3516                  * the reference count at deletion time. 
3518                 RT_REMREF_LOCKED(rt
); 
3531 rt_set_idleref(struct rtentry 
*rt
) 
3533         RT_LOCK_ASSERT_HELD(rt
); 
3536          * We currently keep idle refcnt only on unicast cloned routes 
3537          * that aren't marked with RTF_NOIFREF. 
3539         if (rt
->rt_parent 
!= NULL 
&& !(rt
->rt_flags 
& 
3540             (RTF_NOIFREF
|RTF_BROADCAST 
| RTF_MULTICAST
)) && 
3541             (rt
->rt_flags 
& (RTF_UP
|RTF_WASCLONED
|RTF_IFREF
)) == 
3542             (RTF_UP
|RTF_WASCLONED
)) { 
3543                 rt_clear_idleref(rt
);   /* drop existing refcnt if any  */ 
3544                 rt
->rt_if_ref_fn 
= rte_if_ref
; 
3545                 /* Become a regular mutex, just in case */ 
3546                 RT_CONVERT_LOCK(rt
); 
3547                 rt
->rt_if_ref_fn(rt
->rt_ifp
, 1); 
3548                 rt
->rt_flags 
|= RTF_IFREF
; 
3553 rt_clear_idleref(struct rtentry 
*rt
) 
3555         RT_LOCK_ASSERT_HELD(rt
); 
3557         if (rt
->rt_if_ref_fn 
!= NULL
) { 
3558                 VERIFY((rt
->rt_flags 
& (RTF_NOIFREF 
| RTF_IFREF
)) == RTF_IFREF
); 
3559                 /* Become a regular mutex, just in case */ 
3560                 RT_CONVERT_LOCK(rt
); 
3561                 rt
->rt_if_ref_fn(rt
->rt_ifp
, -1); 
3562                 rt
->rt_flags 
&= ~RTF_IFREF
; 
3563                 rt
->rt_if_ref_fn 
= NULL
; 
3568 rt_set_proxy(struct rtentry 
*rt
, boolean_t set
) 
3570         lck_mtx_lock(rnh_lock
); 
3573          * Search for any cloned routes which might have 
3574          * been formed from this node, and delete them. 
3576         if (rt
->rt_flags 
& (RTF_CLONING 
| RTF_PRCLONING
)) { 
3577                 struct radix_node_head 
*rnh 
= rt_tables
[rt_key(rt
)->sa_family
]; 
3580                         rt
->rt_flags 
|= RTF_PROXY
; 
3582                         rt
->rt_flags 
&= ~RTF_PROXY
; 
3585                 if (rnh 
!= NULL 
&& rt_mask(rt
)) { 
3586                         rnh
->rnh_walktree_from(rnh
, rt_key(rt
), rt_mask(rt
), 
3592         lck_mtx_unlock(rnh_lock
); 
3596 rte_lock_init(struct rtentry 
*rt
) 
3598         lck_mtx_init(&rt
->rt_lock
, rte_mtx_grp
, rte_mtx_attr
); 
3602 rte_lock_destroy(struct rtentry 
*rt
) 
3604         RT_LOCK_ASSERT_NOTHELD(rt
); 
3605         lck_mtx_destroy(&rt
->rt_lock
, rte_mtx_grp
); 
3609 rt_lock(struct rtentry 
*rt
, boolean_t spin
) 
3611         RT_LOCK_ASSERT_NOTHELD(rt
); 
3613                 lck_mtx_lock_spin(&rt
->rt_lock
); 
3615                 lck_mtx_lock(&rt
->rt_lock
); 
3616         if (rte_debug 
& RTD_DEBUG
) 
3617                 rte_lock_debug((struct rtentry_dbg 
*)rt
); 
3621 rt_unlock(struct rtentry 
*rt
) 
3623         if (rte_debug 
& RTD_DEBUG
) 
3624                 rte_unlock_debug((struct rtentry_dbg 
*)rt
); 
3625         lck_mtx_unlock(&rt
->rt_lock
); 
3630 rte_lock_debug(struct rtentry_dbg 
*rte
) 
3634         RT_LOCK_ASSERT_HELD((struct rtentry 
*)rte
); 
3635         idx 
= atomic_add_32_ov(&rte
->rtd_lock_cnt
, 1) % CTRACE_HIST_SIZE
; 
3636         if (rte_debug 
& RTD_TRACE
) 
3637                 ctrace_record(&rte
->rtd_lock
[idx
]); 
3641 rte_unlock_debug(struct rtentry_dbg 
*rte
) 
3645         RT_LOCK_ASSERT_HELD((struct rtentry 
*)rte
); 
3646         idx 
= atomic_add_32_ov(&rte
->rtd_unlock_cnt
, 1) % CTRACE_HIST_SIZE
; 
3647         if (rte_debug 
& RTD_TRACE
) 
3648                 ctrace_record(&rte
->rtd_unlock
[idx
]); 
3651 static struct rtentry 
* 
3654         if (rte_debug 
& RTD_DEBUG
) 
3655                 return (rte_alloc_debug()); 
3657         return ((struct rtentry 
*)zalloc(rte_zone
)); 
3661 rte_free(struct rtentry 
*p
) 
3663         if (rte_debug 
& RTD_DEBUG
) { 
3668         if (p
->rt_refcnt 
!= 0) { 
3669                 panic("rte_free: rte=%p refcnt=%d non-zero\n", p
, p
->rt_refcnt
); 
3677 rte_if_ref(struct ifnet 
*ifp
, int cnt
) 
3679         struct kev_msg ev_msg
; 
3680         struct net_event_data ev_data
; 
3683         /* Force cnt to 1 increment/decrement */ 
3684         if (cnt 
< -1 || cnt 
> 1) { 
3685                 panic("%s: invalid count argument (%d)", __func__
, cnt
); 
3688         old 
= atomic_add_32_ov(&ifp
->if_route_refcnt
, cnt
); 
3689         if (cnt 
< 0 && old 
== 0) { 
3690                 panic("%s: ifp=%p negative route refcnt!", __func__
, ifp
); 
3694          * The following is done without first holding the ifnet lock, 
3695          * for performance reasons.  The relevant ifnet fields, with 
3696          * the exception of the if_idle_flags, are never changed 
3697          * during the lifetime of the ifnet.  The if_idle_flags 
3698          * may possibly be modified, so in the event that the value 
3699          * is stale because IFRF_IDLE_NOTIFY was cleared, we'd end up 
3700          * sending the event anyway.  This is harmless as it is just 
3701          * a notification to the monitoring agent in user space, and 
3702          * it is expected to check via SIOCGIFGETRTREFCNT again anyway. 
3704         if ((ifp
->if_idle_flags 
& IFRF_IDLE_NOTIFY
) && cnt 
< 0 && old 
== 1) { 
3705                 bzero(&ev_msg
, sizeof (ev_msg
)); 
3706                 bzero(&ev_data
, sizeof (ev_data
)); 
3708                 ev_msg
.vendor_code      
= KEV_VENDOR_APPLE
; 
3709                 ev_msg
.kev_class        
= KEV_NETWORK_CLASS
; 
3710                 ev_msg
.kev_subclass     
= KEV_DL_SUBCLASS
; 
3711                 ev_msg
.event_code       
= KEV_DL_IF_IDLE_ROUTE_REFCNT
; 
3713                 strlcpy(&ev_data
.if_name
[0], ifp
->if_name
, IFNAMSIZ
); 
3715                 ev_data
.if_family       
= ifp
->if_family
; 
3716                 ev_data
.if_unit         
= ifp
->if_unit
; 
3717                 ev_msg
.dv
[0].data_length 
= sizeof (struct net_event_data
); 
3718                 ev_msg
.dv
[0].data_ptr   
= &ev_data
; 
3720                 dlil_post_complete_msg(NULL
, &ev_msg
); 
3724 static inline struct rtentry 
* 
3725 rte_alloc_debug(void) 
3727         struct rtentry_dbg 
*rte
; 
3729         rte 
= ((struct rtentry_dbg 
*)zalloc(rte_zone
)); 
3731                 bzero(rte
, sizeof (*rte
)); 
3732                 if (rte_debug 
& RTD_TRACE
) 
3733                         ctrace_record(&rte
->rtd_alloc
); 
3734                 rte
->rtd_inuse 
= RTD_INUSE
; 
3736         return ((struct rtentry 
*)rte
); 
3740 rte_free_debug(struct rtentry 
*p
) 
3742         struct rtentry_dbg 
*rte 
= (struct rtentry_dbg 
*)p
; 
3744         if (p
->rt_refcnt 
!= 0) { 
3745                 panic("rte_free: rte=%p refcnt=%d\n", p
, p
->rt_refcnt
); 
3748         if (rte
->rtd_inuse 
== RTD_FREED
) { 
3749                 panic("rte_free: double free rte=%p\n", rte
); 
3751         } else if (rte
->rtd_inuse 
!= RTD_INUSE
) { 
3752                 panic("rte_free: corrupted rte=%p\n", rte
); 
3755         bcopy((caddr_t
)p
, (caddr_t
)&rte
->rtd_entry_saved
, sizeof (*p
)); 
3756         /* Preserve rt_lock to help catch use-after-free cases */ 
3757         bzero((caddr_t
)p
, offsetof(struct rtentry
, rt_lock
)); 
3759         rte
->rtd_inuse 
= RTD_FREED
; 
3761         if (rte_debug 
& RTD_TRACE
) 
3762                 ctrace_record(&rte
->rtd_free
); 
3764         if (!(rte_debug 
& RTD_NO_FREE
)) 
3769 ctrace_record(ctrace_t 
*tr
) 
3771         tr
->th 
= current_thread(); 
3772         bzero(tr
->pc
, sizeof (tr
->pc
)); 
3773         (void) OSBacktrace(tr
->pc
, CTRACE_STACK_SIZE
); 
3777 route_copyout(struct route 
*dst
, const struct route 
*src
, size_t length
) 
3779         /* Copy everything (rt, srcif, flags, dst) from src */ 
3780         bcopy(src
, dst
, length
); 
3782         /* Hold one reference for the local copy of struct route */ 
3783         if (dst
->ro_rt 
!= NULL
) 
3784                 RT_ADDREF(dst
->ro_rt
); 
3786         /* Hold one reference for the local copy of struct lle */ 
3787         if (dst
->ro_lle 
!= NULL
) 
3788                 LLE_ADDREF(dst
->ro_lle
); 
3790         /* Hold one reference for the local copy of struct ifaddr */ 
3791         if (dst
->ro_srcia 
!= NULL
) 
3792                 IFA_ADDREF(dst
->ro_srcia
); 
3796 route_copyin(struct route 
*src
, struct route 
*dst
, size_t length
) 
3799          * No cached route at the destination? 
3800          * If none, then remove old references if present 
3801          * and copy entire src route. 
3803         if (dst
->ro_rt 
== NULL
) { 
3805                  * Ditch the cached link layer reference (dst) 
3806                  * since we're about to take everything there is in src 
3808                 if (dst
->ro_lle 
!= NULL
) 
3809                         LLE_REMREF(dst
->ro_lle
); 
3811                  * Ditch the address in the cached copy (dst) since 
3812                  * we're about to take everything there is in src. 
3814                 if (dst
->ro_srcia 
!= NULL
) 
3815                         IFA_REMREF(dst
->ro_srcia
); 
3817                  * Copy everything (rt, ro_lle, srcia, flags, dst) from src; the 
3818                  * references to rt and/or srcia were held at the time 
3819                  * of storage and are kept intact. 
3821                 bcopy(src
, dst
, length
); 
3826          * We know dst->ro_rt is not NULL here. 
3827          * If the src->ro_rt is the same, update ro_lle, srcia and flags 
3828          * and ditch the route in the local copy. 
3830         if (dst
->ro_rt 
== src
->ro_rt
) { 
3831                 dst
->ro_flags 
= src
->ro_flags
; 
3833                 if (dst
->ro_lle 
!= src
->ro_lle
) { 
3834                         if (dst
->ro_lle 
!= NULL
) 
3835                                 LLE_REMREF(dst
->ro_lle
); 
3836                         dst
->ro_lle 
= src
->ro_lle
; 
3837                 } else if (src
->ro_lle 
!= NULL
) { 
3838                         LLE_REMREF(src
->ro_lle
); 
3841                 if (dst
->ro_srcia 
!= src
->ro_srcia
) { 
3842                         if (dst
->ro_srcia 
!= NULL
) 
3843                                 IFA_REMREF(dst
->ro_srcia
); 
3844                         dst
->ro_srcia 
= src
->ro_srcia
; 
3845                 } else if (src
->ro_srcia 
!= NULL
) { 
3846                         IFA_REMREF(src
->ro_srcia
); 
3853          * If they are dst's ro_rt is not equal to src's, 
3854          * and src'd rt is not NULL, then remove old references 
3855          * if present and copy entire src route. 
3857         if (src
->ro_rt 
!= NULL
) { 
3860                 if (dst
->ro_lle 
!= NULL
) 
3861                         LLE_REMREF(dst
->ro_lle
); 
3862                 if (dst
->ro_srcia 
!= NULL
) 
3863                         IFA_REMREF(dst
->ro_srcia
); 
3864                 bcopy(src
, dst
, length
); 
3869          * Here, dst's cached route is not NULL but source's is. 
3870          * Just get rid of all the other cached reference in src. 
3872         if (src
->ro_srcia 
!= NULL
) { 
3874                  * Ditch src address in the local copy (src) since we're 
3875                  * not caching the route entry anyway (ro_rt is NULL). 
3877                 IFA_REMREF(src
->ro_srcia
); 
3879         if (src
->ro_lle 
!= NULL
) { 
3881                  * Ditch cache lle in the local copy (src) since we're 
3882                  * not caching the route anyway (ro_rt is NULL). 
3884                 LLE_REMREF(src
->ro_lle
); 
3887         /* This function consumes the references on src */ 
3890         src
->ro_srcia 
= NULL
; 
3894  * route_to_gwroute will find the gateway route for a given route. 
3896  * If the route is down, look the route up again. 
3897  * If the route goes through a gateway, get the route to the gateway. 
3898  * If the gateway route is down, look it up again. 
3899  * If the route is set to reject, verify it hasn't expired. 
3901  * If the returned route is non-NULL, the caller is responsible for 
3902  * releasing the reference and unlocking the route. 
3904 #define senderr(e) { error = (e); goto bad; } 
3906 route_to_gwroute(const struct sockaddr 
*net_dest
, struct rtentry 
*hint0
, 
3907     struct rtentry 
**out_route
) 
3910         struct rtentry 
*rt 
= hint0
, *hint 
= hint0
; 
3912         unsigned int ifindex
; 
3921          * Next hop determination.  Because we may involve the gateway route 
3922          * in addition to the original route, locking is rather complicated. 
3923          * The general concept is that regardless of whether the route points 
3924          * to the original route or to the gateway route, this routine takes 
3925          * an extra reference on such a route.  This extra reference will be 
3926          * released at the end. 
3928          * Care must be taken to ensure that the "hint0" route never gets freed 
3929          * via rtfree(), since the caller may have stored it inside a struct 
3930          * route with a reference held for that placeholder. 
3933         ifindex 
= rt
->rt_ifp
->if_index
; 
3934         RT_ADDREF_LOCKED(rt
); 
3935         if (!(rt
->rt_flags 
& RTF_UP
)) { 
3936                 RT_REMREF_LOCKED(rt
); 
3938                 /* route is down, find a new one */ 
3939                 hint 
= rt 
= rtalloc1_scoped((struct sockaddr 
*) 
3940                     (size_t)net_dest
, 1, 0, ifindex
); 
3943                         ifindex 
= rt
->rt_ifp
->if_index
; 
3945                         senderr(EHOSTUNREACH
); 
3950          * We have a reference to "rt" by now; it will either 
3951          * be released or freed at the end of this routine. 
3953         RT_LOCK_ASSERT_HELD(rt
); 
3954         if ((gwroute 
= (rt
->rt_flags 
& RTF_GATEWAY
))) { 
3955                 struct rtentry 
*gwrt 
= rt
->rt_gwroute
; 
3956                 struct sockaddr_storage ss
; 
3957                 struct sockaddr 
*gw 
= (struct sockaddr 
*)&ss
; 
3960                 RT_ADDREF_LOCKED(hint
); 
3962                 /* If there's no gateway rt, look it up */ 
3964                         bcopy(rt
->rt_gateway
, gw
, MIN(sizeof (ss
), 
3965                             rt
->rt_gateway
->sa_len
)); 
3969                 /* Become a regular mutex */ 
3970                 RT_CONVERT_LOCK(rt
); 
3973                  * Take gwrt's lock while holding route's lock; 
3974                  * this is okay since gwrt never points back 
3975                  * to "rt", so no lock ordering issues. 
3978                 if (!(gwrt
->rt_flags 
& RTF_UP
)) { 
3979                         rt
->rt_gwroute 
= NULL
; 
3981                         bcopy(rt
->rt_gateway
, gw
, MIN(sizeof (ss
), 
3982                             rt
->rt_gateway
->sa_len
)); 
3986                         lck_mtx_lock(rnh_lock
); 
3987                         gwrt 
= rtalloc1_scoped_locked(gw
, 1, 0, ifindex
); 
3991                          * Bail out if the route is down, no route 
3992                          * to gateway, circular route, or if the 
3993                          * gateway portion of "rt" has changed. 
3995                         if (!(rt
->rt_flags 
& RTF_UP
) || gwrt 
== NULL 
|| 
3996                             gwrt 
== rt 
|| !equal(gw
, rt
->rt_gateway
)) { 
3998                                         RT_REMREF_LOCKED(gwrt
); 
4002                                 RT_REMREF_LOCKED(hint
); 
4006                                         rtfree_locked(gwrt
); 
4007                                 lck_mtx_unlock(rnh_lock
); 
4008                                 senderr(EHOSTUNREACH
); 
4010                         VERIFY(gwrt 
!= NULL
); 
4012                          * Set gateway route; callee adds ref to gwrt; 
4013                          * gwrt has an extra ref from rtalloc1() for 
4016                         rt_set_gwroute(rt
, rt_key(rt
), gwrt
); 
4018                         RT_REMREF_LOCKED(rt
);   /* hint still holds a refcnt */ 
4020                         lck_mtx_unlock(rnh_lock
); 
4023                         RT_ADDREF_LOCKED(gwrt
); 
4026                         RT_REMREF_LOCKED(rt
);   /* hint still holds a refcnt */ 
4030                 VERIFY(rt 
== gwrt 
&& rt 
!= hint
); 
4033                  * This is an opportunity to revalidate the parent route's 
4034                  * rt_gwroute, in case it now points to a dead route entry. 
4035                  * Parent route won't go away since the clone (hint) holds 
4036                  * a reference to it.  rt == gwrt. 
4039                 if ((hint
->rt_flags 
& (RTF_WASCLONED 
| RTF_UP
)) == 
4040                     (RTF_WASCLONED 
| RTF_UP
)) { 
4041                         struct rtentry 
*prt 
= hint
->rt_parent
; 
4042                         VERIFY(prt 
!= NULL
); 
4044                         RT_CONVERT_LOCK(hint
); 
4047                         rt_revalidate_gwroute(prt
, rt
); 
4053                 /* Clean up "hint" now; see notes above regarding hint0 */ 
4060                 /* rt == gwrt; if it is now down, give up */ 
4062                 if (!(rt
->rt_flags 
& RTF_UP
)) { 
4064                         senderr(EHOSTUNREACH
); 
4068         if (rt
->rt_flags 
& RTF_REJECT
) { 
4069                 VERIFY(rt
->rt_expire 
== 0 || rt
->rt_rmx
.rmx_expire 
!= 0); 
4070                 VERIFY(rt
->rt_expire 
!= 0 || rt
->rt_rmx
.rmx_expire 
== 0); 
4071                 timenow 
= net_uptime(); 
4072                 if (rt
->rt_expire 
== 0 || timenow 
< rt
->rt_expire
) { 
4074                         senderr(!gwroute 
? EHOSTDOWN 
: EHOSTUNREACH
); 
4078         /* Become a regular mutex */ 
4079         RT_CONVERT_LOCK(rt
); 
4081         /* Caller is responsible for cleaning up "rt" */ 
4086         /* Clean up route (either it is "rt" or "gwrt") */ 
4090                         RT_REMREF_LOCKED(rt
); 
4102 rt_revalidate_gwroute(struct rtentry 
*rt
, struct rtentry 
*gwrt
) 
4104         VERIFY(gwrt 
!= NULL
); 
4107         if ((rt
->rt_flags 
& (RTF_GATEWAY 
| RTF_UP
)) == (RTF_GATEWAY 
| RTF_UP
) && 
4108             rt
->rt_ifp 
== gwrt
->rt_ifp 
&& rt
->rt_gateway
->sa_family 
== 
4109             rt_key(gwrt
)->sa_family 
&& (rt
->rt_gwroute 
== NULL 
|| 
4110             !(rt
->rt_gwroute
->rt_flags 
& RTF_UP
))) { 
4112                 VERIFY(rt
->rt_flags 
& (RTF_CLONING 
| RTF_PRCLONING
)); 
4114                 if (rt
->rt_gateway
->sa_family 
== AF_INET 
|| 
4115                     rt
->rt_gateway
->sa_family 
== AF_INET6
) { 
4116                         struct sockaddr_storage key_ss
, gw_ss
; 
4118                          * We need to compare rt_key and rt_gateway; create 
4119                          * local copies to get rid of any ifscope association. 
4121                         (void) sa_copy(rt_key(gwrt
), &key_ss
, NULL
); 
4122                         (void) sa_copy(rt
->rt_gateway
, &gw_ss
, NULL
); 
4124                         isequal 
= equal(SA(&key_ss
), SA(&gw_ss
)); 
4126                         isequal 
= equal(rt_key(gwrt
), rt
->rt_gateway
); 
4129                 /* If they are the same, update gwrt */ 
4132                         lck_mtx_lock(rnh_lock
); 
4134                         rt_set_gwroute(rt
, rt_key(rt
), gwrt
); 
4136                         lck_mtx_unlock(rnh_lock
); 
4146 rt_str4(struct rtentry 
*rt
, char *ds
, uint32_t dslen
, char *gs
, uint32_t gslen
) 
4148         VERIFY(rt_key(rt
)->sa_family 
== AF_INET
); 
4151                 (void) inet_ntop(AF_INET
, 
4152                     &SIN(rt_key(rt
))->sin_addr
.s_addr
, ds
, dslen
); 
4153                 if (dslen 
>= MAX_SCOPE_ADDR_STR_LEN 
&& 
4154                     SINIFSCOPE(rt_key(rt
))->sin_scope_id 
!= IFSCOPE_NONE
) { 
4157                         snprintf(scpstr
, sizeof(scpstr
), "@%u", 
4158                             SINIFSCOPE(rt_key(rt
))->sin_scope_id
); 
4160                         strlcat(ds
, scpstr
, dslen
); 
4165                 if (rt
->rt_flags 
& RTF_GATEWAY
) { 
4166                         (void) inet_ntop(AF_INET
, 
4167                             &SIN(rt
->rt_gateway
)->sin_addr
.s_addr
, gs
, gslen
); 
4168                 } else if (rt
->rt_ifp 
!= NULL
) { 
4169                         snprintf(gs
, gslen
, "link#%u", rt
->rt_ifp
->if_unit
); 
4171                         snprintf(gs
, gslen
, "%s", "link"); 
4178 rt_str6(struct rtentry 
*rt
, char *ds
, uint32_t dslen
, char *gs
, uint32_t gslen
) 
4180         VERIFY(rt_key(rt
)->sa_family 
== AF_INET6
); 
4183                 (void) inet_ntop(AF_INET6
, 
4184                     &SIN6(rt_key(rt
))->sin6_addr
, ds
, dslen
); 
4185                 if (dslen 
>= MAX_SCOPE_ADDR_STR_LEN 
&& 
4186                     SIN6IFSCOPE(rt_key(rt
))->sin6_scope_id 
!= IFSCOPE_NONE
) { 
4189                         snprintf(scpstr
, sizeof(scpstr
), "@%u", 
4190                             SIN6IFSCOPE(rt_key(rt
))->sin6_scope_id
); 
4192                         strlcat(ds
, scpstr
, dslen
); 
4197                 if (rt
->rt_flags 
& RTF_GATEWAY
) { 
4198                         (void) inet_ntop(AF_INET6
, 
4199                             &SIN6(rt
->rt_gateway
)->sin6_addr
, gs
, gslen
); 
4200                 } else if (rt
->rt_ifp 
!= NULL
) { 
4201                         snprintf(gs
, gslen
, "link#%u", rt
->rt_ifp
->if_unit
); 
4203                         snprintf(gs
, gslen
, "%s", "link"); 
4211 rt_str(struct rtentry 
*rt
, char *ds
, uint32_t dslen
, char *gs
, uint32_t gslen
) 
4213         switch (rt_key(rt
)->sa_family
) { 
4215                 rt_str4(rt
, ds
, dslen
, gs
, gslen
); 
4219                 rt_str6(rt
, ds
, dslen
, gs
, gslen
); 
4231 void route_event_init(struct route_event 
*p_route_ev
, struct rtentry 
*rt
, 
4232     struct rtentry 
*gwrt
, int route_ev_code
) 
4234         VERIFY(p_route_ev 
!= NULL
); 
4235         bzero(p_route_ev
, sizeof(*p_route_ev
)); 
4237         p_route_ev
->rt 
= rt
; 
4238         p_route_ev
->gwrt 
= gwrt
; 
4239         p_route_ev
->route_event_code 
= route_ev_code
; 
4243 route_event_callback(void *arg
) 
4245         struct route_event 
*p_rt_ev 
= (struct route_event 
*)arg
; 
4246         struct rtentry 
*rt 
= p_rt_ev
->rt
; 
4247         eventhandler_tag evtag 
= p_rt_ev
->evtag
; 
4248         int route_ev_code 
= p_rt_ev
->route_event_code
; 
4250         if (route_ev_code 
== ROUTE_EVHDLR_DEREGISTER
) { 
4251                 VERIFY(evtag 
!= NULL
); 
4252                 EVENTHANDLER_DEREGISTER(&rt
->rt_evhdlr_ctxt
, route_event
, 
4258         EVENTHANDLER_INVOKE(&rt
->rt_evhdlr_ctxt
, route_event
, rt_key(rt
), 
4259             route_ev_code
, (struct sockaddr 
*)&p_rt_ev
->rt_addr
, 
4262         /* The code enqueuing the route event held a reference */ 
4264         /* XXX No reference is taken on gwrt */ 
4268 route_event_walktree(struct radix_node 
*rn
, void *arg
) 
4270         struct route_event 
*p_route_ev 
= (struct route_event 
*)arg
; 
4271         struct rtentry 
*rt 
= (struct rtentry 
*)rn
; 
4272         struct rtentry 
*gwrt 
= p_route_ev
->rt
; 
4274         LCK_MTX_ASSERT(rnh_lock
, LCK_MTX_ASSERT_OWNED
); 
4278         /* Return if the entry is pending cleanup */ 
4279         if (rt
->rt_flags 
& RTPRF_OURS
) { 
4284         /* Return if it is not an indirect route */ 
4285         if (!(rt
->rt_flags 
& RTF_GATEWAY
)) { 
4290         if (rt
->rt_gwroute 
!= gwrt
) { 
4295         route_event_enqueue_nwk_wq_entry(rt
, gwrt
, p_route_ev
->route_event_code
, 
4302 struct route_event_nwk_wq_entry
 
4304         struct nwk_wq_entry nwk_wqe
; 
4305         struct route_event rt_ev_arg
; 
4309 route_event_enqueue_nwk_wq_entry(struct rtentry 
*rt
, struct rtentry 
*gwrt
, 
4310     uint32_t route_event_code
, eventhandler_tag evtag
, boolean_t rt_locked
) 
4312         struct route_event_nwk_wq_entry 
*p_rt_ev 
= NULL
; 
4313         struct sockaddr 
*p_gw_saddr 
= NULL
; 
4315         MALLOC(p_rt_ev
, struct route_event_nwk_wq_entry 
*, 
4316             sizeof(struct route_event_nwk_wq_entry
), 
4317             M_NWKWQ
, M_WAITOK 
| M_ZERO
); 
4320          * If the intent is to de-register, don't take 
4321          * reference, route event registration already takes 
4322          * a reference on route. 
4324         if (route_event_code 
!= ROUTE_EVHDLR_DEREGISTER
) { 
4325                 /* The reference is released by route_event_callback */ 
4327                         RT_ADDREF_LOCKED(rt
); 
4332         p_rt_ev
->rt_ev_arg
.rt 
= rt
; 
4333         p_rt_ev
->rt_ev_arg
.gwrt 
= gwrt
; 
4334         p_rt_ev
->rt_ev_arg
.evtag 
= evtag
; 
4337                 p_gw_saddr 
= gwrt
->rt_gateway
; 
4339                 p_gw_saddr 
= rt
->rt_gateway
; 
4341         VERIFY(p_gw_saddr
->sa_len 
<= sizeof(p_rt_ev
->rt_ev_arg
.rt_addr
)); 
4342         bcopy(p_gw_saddr
, &(p_rt_ev
->rt_ev_arg
.rt_addr
), p_gw_saddr
->sa_len
); 
4344         p_rt_ev
->rt_ev_arg
.route_event_code 
= route_event_code
; 
4345         p_rt_ev
->nwk_wqe
.func 
= route_event_callback
; 
4346         p_rt_ev
->nwk_wqe
.is_arg_managed 
= TRUE
; 
4347         p_rt_ev
->nwk_wqe
.arg 
= &p_rt_ev
->rt_ev_arg
; 
4348         nwk_wq_enqueue((struct nwk_wq_entry
*)p_rt_ev
); 
4352 route_event2str(int route_event
) 
4354         const char *route_event_str 
= "ROUTE_EVENT_UNKNOWN"; 
4355         switch (route_event
) { 
4356                 case ROUTE_STATUS_UPDATE
: 
4357                         route_event_str 
= "ROUTE_STATUS_UPDATE"; 
4359                 case ROUTE_ENTRY_REFRESH
: 
4360                         route_event_str 
= "ROUTE_ENTRY_REFRESH"; 
4362                 case ROUTE_ENTRY_DELETED
: 
4363                         route_event_str 
= "ROUTE_ENTRY_DELETED"; 
4365                 case ROUTE_LLENTRY_RESOLVED
: 
4366                         route_event_str 
= "ROUTE_LLENTRY_RESOLVED"; 
4368                 case ROUTE_LLENTRY_UNREACH
: 
4369                         route_event_str 
= "ROUTE_LLENTRY_UNREACH"; 
4371                 case ROUTE_LLENTRY_CHANGED
: 
4372                         route_event_str 
= "ROUTE_LLENTRY_CHANGED"; 
4374                 case ROUTE_LLENTRY_STALE
: 
4375                         route_event_str 
= "ROUTE_LLENTRY_STALE"; 
4377                 case ROUTE_LLENTRY_TIMEDOUT
: 
4378                         route_event_str 
= "ROUTE_LLENTRY_TIMEDOUT"; 
4380                 case ROUTE_LLENTRY_DELETED
: 
4381                         route_event_str 
= "ROUTE_LLENTRY_DELETED"; 
4383                 case ROUTE_LLENTRY_EXPIRED
: 
4384                         route_event_str 
= "ROUTE_LLENTRY_EXPIRED"; 
4386                 case ROUTE_LLENTRY_PROBED
: 
4387                         route_event_str 
= "ROUTE_LLENTRY_PROBED"; 
4389                 case ROUTE_EVHDLR_DEREGISTER
: 
4390                         route_event_str 
= "ROUTE_EVHDLR_DEREGISTER"; 
4393                         /* Init'd to ROUTE_EVENT_UNKNOWN */ 
4396         return  route_event_str
; 
4400 route_op_entitlement_check(struct socket 
*so
, 
4403     boolean_t allow_root
) 
4406                 if (route_op_type 
== ROUTE_OP_READ
) { 
4408                          * If needed we can later extend this for more 
4409                          * granular entitlements and return a bit set of 
4412                         if (soopt_cred_check(so
, PRIV_NET_RESTRICTED_ROUTE_NC_READ
, 
4418         } else if (cred 
!= NULL
) { 
4419                 uid_t uid 
= kauth_cred_getuid(cred
); 
4421                 /* uid is 0 for root */ 
4422                 if (uid 
!= 0 || !allow_root
) { 
4423                         if (route_op_type 
== ROUTE_OP_READ
) { 
4424                                 if (priv_check_cred(cred
, 
4425                                     PRIV_NET_RESTRICTED_ROUTE_NC_READ
, 0) == 0)