+lck_mtx_t *rnh_lock; /* global routing tables mutex */
+static lck_attr_t *rnh_lock_attr;
+static lck_grp_t *rnh_lock_grp;
+static lck_grp_attr_t *rnh_lock_grp_attr;
+
+/* Lock group and attribute for routing entry locks */
+static lck_attr_t *rte_mtx_attr;
+static lck_grp_t *rte_mtx_grp;
+static lck_grp_attr_t *rte_mtx_grp_attr;
+
+lck_mtx_t *route_domain_mtx; /*### global routing tables mutex for now */
+int rttrash = 0; /* routes not in table but not freed */
+
+unsigned int rte_debug;
+
+/* Possible flags for rte_debug */
+#define RTD_DEBUG 0x1 /* enable or disable rtentry debug facility */
+#define RTD_TRACE 0x2 /* trace alloc, free, refcnt and lock */
+#define RTD_NO_FREE 0x4 /* don't free (good to catch corruptions) */
+
+#define RTE_NAME "rtentry" /* name for zone and rt_lock */
+
+static struct zone *rte_zone; /* special zone for rtentry */
+#define RTE_ZONE_MAX 65536 /* maximum elements in zone */
+#define RTE_ZONE_NAME RTE_NAME /* name of rtentry zone */
+
+#define RTD_INUSE 0xFEEDFACE /* entry is in use */
+#define RTD_FREED 0xDEADBEEF /* entry is freed */
+
+/* For gdb */
+__private_extern__ unsigned int ctrace_stack_size = CTRACE_STACK_SIZE;
+__private_extern__ unsigned int ctrace_hist_size = CTRACE_HIST_SIZE;
+
+/*
+ * Debug variant of rtentry structure.
+ */
+struct rtentry_dbg {
+ struct rtentry rtd_entry; /* rtentry */
+ struct rtentry rtd_entry_saved; /* saved rtentry */
+ uint32_t rtd_inuse; /* in use pattern */
+ uint16_t rtd_refhold_cnt; /* # of rtref */
+ uint16_t rtd_refrele_cnt; /* # of rtunref */
+ uint32_t rtd_lock_cnt; /* # of locks */
+ uint32_t rtd_unlock_cnt; /* # of unlocks */
+ /*
+ * Alloc and free callers.
+ */
+ ctrace_t rtd_alloc;
+ ctrace_t rtd_free;
+ /*
+ * Circular lists of rtref and rtunref callers.
+ */
+ ctrace_t rtd_refhold[CTRACE_HIST_SIZE];
+ ctrace_t rtd_refrele[CTRACE_HIST_SIZE];
+ /*
+ * Circular lists of locks and unlocks.
+ */
+ ctrace_t rtd_lock[CTRACE_HIST_SIZE];
+ ctrace_t rtd_unlock[CTRACE_HIST_SIZE];
+ /*
+ * Trash list linkage
+ */
+ TAILQ_ENTRY(rtentry_dbg) rtd_trash_link;
+};
+
+#define atomic_add_16_ov(a, n) \
+ ((uint16_t) OSAddAtomic16(n, (volatile SInt16 *)a))
+#define atomic_add_32_ov(a, n) \
+ ((uint32_t) OSAddAtomic(n, a))
+
+/* List of trash route entries protected by rnh_lock */
+static TAILQ_HEAD(, rtentry_dbg) rttrash_head;
+
+static void rte_lock_init(struct rtentry *);
+static void rte_lock_destroy(struct rtentry *);
+static inline struct rtentry *rte_alloc_debug(void);
+static inline void rte_free_debug(struct rtentry *);
+static inline void rte_lock_debug(struct rtentry_dbg *);
+static inline void rte_unlock_debug(struct rtentry_dbg *);
+static void rt_maskedcopy(struct sockaddr *,
+ struct sockaddr *, struct sockaddr *);
+static void rtable_init(void **);
+static inline void rtref_audit(struct rtentry_dbg *);
+static inline void rtunref_audit(struct rtentry_dbg *);
+static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, uint32_t,
+ unsigned int);
+static int rtrequest_common_locked(int, struct sockaddr *,
+ struct sockaddr *, struct sockaddr *, int, struct rtentry **,
+ unsigned int);
+static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int);
+static inline void sa_set_ifscope(struct sockaddr *, unsigned int);
+static struct sockaddr *sin_copy(struct sockaddr_in *, struct sockaddr_in *,
+ unsigned int);
+static struct sockaddr *mask_copy(struct sockaddr *, struct sockaddr_in *,
+ unsigned int);
+static struct sockaddr *sa_trim(struct sockaddr *, int);
+static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *,
+ unsigned int);
+static struct radix_node *node_lookup_default(void);
+static int rn_match_ifscope(struct radix_node *, void *);
+static struct ifaddr *ifa_ifwithroute_common_locked(int,
+ const struct sockaddr *, const struct sockaddr *, unsigned int);
+static struct rtentry *rte_alloc(void);
+static void rte_free(struct rtentry *);
+static void rtfree_common(struct rtentry *, boolean_t);
+#if IFNET_ROUTE_REFCNT
+static void rte_if_ref(struct ifnet *, int);
+#endif /* IFNET_ROUTE_REFCNT */
+
+uint32_t route_generation = 0;
+
+/*
+ * sockaddr_in with embedded interface scope; this is used internally
+ * to keep track of scoped route entries in the routing table. The
+ * fact that such a scope is embedded in the structure is an artifact
+ * of the current implementation which could change in future.
+ */
+struct sockaddr_inifscope {
+ __uint8_t sin_len;
+ sa_family_t sin_family;
+ in_port_t sin_port;
+ struct in_addr sin_addr;
+ /*
+ * To avoid possible conflict with an overlaid sockaddr_inarp
+ * having sin_other set to SIN_PROXY, we use the first 4-bytes
+ * of sin_zero since sin_srcaddr is one of the unused fields
+ * in sockaddr_inarp.
+ */
+ union {
+ char sin_zero[8];
+ struct {
+ __uint32_t ifscope;
+ } _in_index;
+ } un;
+#define sin_ifscope un._in_index.ifscope
+};
+
+#define SIN(sa) ((struct sockaddr_in *)(size_t)(sa))
+#define SINIFSCOPE(sa) ((struct sockaddr_inifscope *)(size_t)(sa))
+
+#define ASSERT_SINIFSCOPE(sa) { \
+ if ((sa)->sa_family != AF_INET || \
+ (sa)->sa_len < sizeof (struct sockaddr_in)) \
+ panic("%s: bad sockaddr_in %p\n", __func__, sa); \
+}
+
+/*
+ * Argument to leaf-matching routine; at present it is scoped routing
+ * specific but can be expanded in future to include other search filters.
+ */
+struct matchleaf_arg {
+ unsigned int ifscope; /* interface scope */
+};
+
+/*
+ * For looking up the non-scoped default route (sockaddr instead
+ * of sockaddr_in for convenience).
+ */
+static struct sockaddr sin_def = {
+ sizeof (struct sockaddr_in), AF_INET, { 0, }
+};
+
+/*
+ * Interface index (scope) of the primary interface; determined at
+ * the time when the default, non-scoped route gets added, changed
+ * or deleted. Protected by rnh_lock.
+ */
+static unsigned int primary_ifscope = IFSCOPE_NONE;
+
+#define INET_DEFAULT(dst) \
+ ((dst)->sa_family == AF_INET && SIN(dst)->sin_addr.s_addr == 0)
+
+#define RT(r) ((struct rtentry *)r)
+#define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST)
+
+#if IFNET_ROUTE_REFCNT
+SYSCTL_DECL(_net_idle_route);
+
+static int rt_if_idle_expire_timeout = RT_IF_IDLE_EXPIRE_TIMEOUT;
+SYSCTL_INT(_net_idle_route, OID_AUTO, expire_timeout, CTLFLAG_RW,
+ &rt_if_idle_expire_timeout, 0, "Default expiration time on routes for "
+ "interface idle reference counting");
+#endif /* IFNET_ROUTE_REFCNT */
+
+/*
+ * Given a route, determine whether or not it is the non-scoped default
+ * route; dst typically comes from rt_key(rt) but may be coming from
+ * a separate place when rt is in the process of being created.
+ */
+boolean_t
+rt_inet_default(struct rtentry *rt, struct sockaddr *dst)
+{
+ return (INET_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE));
+}
+
+/*
+ * Set the ifscope of the primary interface; caller holds rnh_lock.
+ */
+void
+set_primary_ifscope(unsigned int ifscope)
+{
+ primary_ifscope = ifscope;
+}
+
+/*
+ * Return the ifscope of the primary interface; caller holds rnh_lock.
+ */
+unsigned int
+get_primary_ifscope(void)
+{
+ return (primary_ifscope);
+}
+
+/*
+ * Embed ifscope into a given a sockaddr_in.
+ */
+static inline void
+sa_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
+{
+ /* Caller must pass in sockaddr_in */
+ ASSERT_SINIFSCOPE(sa);
+
+ SINIFSCOPE(sa)->sin_ifscope = ifscope;
+}
+
+/*
+ * Given a sockaddr_in, return the embedded ifscope to the caller.
+ */
+unsigned int
+sa_get_ifscope(struct sockaddr *sa)
+{
+ /* Caller must pass in sockaddr_in */
+ ASSERT_SINIFSCOPE(sa);
+
+ return (SINIFSCOPE(sa)->sin_ifscope);
+}
+
+/*
+ * Copy a sockaddr_in src to dst and embed ifscope into dst.
+ */
+static struct sockaddr *
+sin_copy(struct sockaddr_in *src, struct sockaddr_in *dst, unsigned int ifscope)
+{
+ *dst = *src;
+ sa_set_ifscope(SA(dst), ifscope);
+
+ return (SA(dst));
+}
+
+/*
+ * Copy a mask from src to a sockaddr_in dst and embed ifscope into dst.
+ */
+static struct sockaddr *
+mask_copy(struct sockaddr *src, struct sockaddr_in *dst, unsigned int ifscope)
+{
+ /* We know dst is at least the size of sockaddr{_in} */
+ bzero(dst, sizeof (*dst));
+ rt_maskedcopy(src, SA(dst), src);
+
+ /*
+ * The length of the mask sockaddr would need to be adjusted
+ * to cover the additional sin_ifscope field; when ifscope is
+ * IFSCOPE_NONE, we'd end up clearing the embedded ifscope on
+ * the destination mask in addition to extending the length
+ * of the sockaddr, as a side effect. This is okay, as any
+ * trailing zeroes would be skipped by rn_addmask prior to
+ * inserting or looking up the mask in the mask tree.
+ */
+ SINIFSCOPE(dst)->sin_ifscope = ifscope;
+ SINIFSCOPE(dst)->sin_len =
+ offsetof(struct sockaddr_inifscope, sin_ifscope) +
+ sizeof (SINIFSCOPE(dst)->sin_ifscope);
+
+ return (SA(dst));
+}
+
+/*
+ * Trim trailing zeroes on a sockaddr and update its length.
+ */
+static struct sockaddr *
+sa_trim(struct sockaddr *sa, int skip)
+{
+ caddr_t cp, base = (caddr_t)sa + skip;
+
+ if (sa->sa_len <= skip)
+ return (sa);
+
+ for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0;)
+ cp--;
+
+ sa->sa_len = (cp - base) + skip;
+ if (sa->sa_len < skip) {
+ /* Must not happen, and if so, panic */
+ panic("%s: broken logic (sa_len %d < skip %d )", __func__,
+ sa->sa_len, skip);
+ /* NOTREACHED */
+ } else if (sa->sa_len == skip) {
+ /* If we end up with all zeroes, then there's no mask */
+ sa->sa_len = 0;
+ }
+
+ return (sa);
+}
+
+/*
+ * Called by rtm_msg{1,2} routines to "scrub" the embedded interface scope
+ * away from the socket address structure, so that clients of the routing
+ * socket will not be confused by the presence of the embedded scope, or the
+ * side effect of the increased length due to that. The source sockaddr is
+ * not modified; instead, the scrubbing happens on the destination sockaddr
+ * storage that is passed in by the caller.
+ */
+struct sockaddr *
+rtm_scrub_ifscope(int idx, struct sockaddr *hint, struct sockaddr *sa,
+ struct sockaddr_storage *ss)
+{
+ struct sockaddr *ret = sa;
+
+ switch (idx) {
+ case RTAX_DST:
+ /*
+ * If this is for an AF_INET destination address, call
+ * sin_copy() with IFSCOPE_NONE as it does what we need.
+ */
+ if (sa->sa_family == AF_INET &&
+ SINIFSCOPE(sa)->sin_ifscope != IFSCOPE_NONE) {
+ bzero(ss, sizeof (*ss));
+ ret = sin_copy(SIN(sa), SIN(ss), IFSCOPE_NONE);
+ }
+ break;