+
+static void
+rt_set_idleref(struct rtentry *rt)
+{
+ RT_LOCK_ASSERT_HELD(rt);
+
+ /*
+ * We currently keep idle refcnt only on unicast cloned routes
+ * that aren't marked with RTF_NOIFREF.
+ */
+ if (rt->rt_parent != NULL && !(rt->rt_flags &
+ (RTF_NOIFREF|RTF_BROADCAST | RTF_MULTICAST)) &&
+ (rt->rt_flags & (RTF_UP|RTF_WASCLONED|RTF_IFREF)) ==
+ (RTF_UP|RTF_WASCLONED)) {
+ rt_clear_idleref(rt); /* drop existing refcnt if any */
+ rt->rt_if_ref_fn = rte_if_ref;
+ /* Become a regular mutex, just in case */
+ RT_CONVERT_LOCK(rt);
+ rt->rt_if_ref_fn(rt->rt_ifp, 1);
+ rt->rt_flags |= RTF_IFREF;
+ }
+}
+
+void
+rt_clear_idleref(struct rtentry *rt)
+{
+ RT_LOCK_ASSERT_HELD(rt);
+
+ if (rt->rt_if_ref_fn != NULL) {
+ VERIFY((rt->rt_flags & (RTF_NOIFREF | RTF_IFREF)) == RTF_IFREF);
+ /* Become a regular mutex, just in case */
+ RT_CONVERT_LOCK(rt);
+ rt->rt_if_ref_fn(rt->rt_ifp, -1);
+ rt->rt_flags &= ~RTF_IFREF;
+ rt->rt_if_ref_fn = NULL;
+ }
+}
+
+void
+rt_set_proxy(struct rtentry *rt, boolean_t set)
+{
+ lck_mtx_lock(rnh_lock);
+ RT_LOCK(rt);
+ /*
+ * Search for any cloned routes which might have
+ * been formed from this node, and delete them.
+ */
+ if (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
+ struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family];
+
+ if (set)
+ rt->rt_flags |= RTF_PROXY;
+ else
+ rt->rt_flags &= ~RTF_PROXY;
+
+ RT_UNLOCK(rt);
+ if (rnh != NULL && rt_mask(rt)) {
+ rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
+ rt_fixdelete, rt);
+ }
+ } else {
+ RT_UNLOCK(rt);
+ }
+ lck_mtx_unlock(rnh_lock);
+}
+
+static void
+rte_lock_init(struct rtentry *rt)
+{
+ lck_mtx_init(&rt->rt_lock, rte_mtx_grp, rte_mtx_attr);
+}
+
+static void
+rte_lock_destroy(struct rtentry *rt)
+{
+ RT_LOCK_ASSERT_NOTHELD(rt);
+ lck_mtx_destroy(&rt->rt_lock, rte_mtx_grp);
+}
+
+void
+rt_lock(struct rtentry *rt, boolean_t spin)
+{
+ RT_LOCK_ASSERT_NOTHELD(rt);
+ if (spin)
+ lck_mtx_lock_spin(&rt->rt_lock);
+ else
+ lck_mtx_lock(&rt->rt_lock);
+ if (rte_debug & RTD_DEBUG)
+ rte_lock_debug((struct rtentry_dbg *)rt);
+}
+
+void
+rt_unlock(struct rtentry *rt)
+{
+ if (rte_debug & RTD_DEBUG)
+ rte_unlock_debug((struct rtentry_dbg *)rt);
+ lck_mtx_unlock(&rt->rt_lock);
+
+}
+
+static inline void
+rte_lock_debug(struct rtentry_dbg *rte)
+{
+ uint32_t idx;
+
+ RT_LOCK_ASSERT_HELD((struct rtentry *)rte);
+ idx = atomic_add_32_ov(&rte->rtd_lock_cnt, 1) % CTRACE_HIST_SIZE;
+ if (rte_debug & RTD_TRACE)
+ ctrace_record(&rte->rtd_lock[idx]);
+}
+
+static inline void
+rte_unlock_debug(struct rtentry_dbg *rte)
+{
+ uint32_t idx;
+
+ RT_LOCK_ASSERT_HELD((struct rtentry *)rte);
+ idx = atomic_add_32_ov(&rte->rtd_unlock_cnt, 1) % CTRACE_HIST_SIZE;
+ if (rte_debug & RTD_TRACE)
+ ctrace_record(&rte->rtd_unlock[idx]);
+}
+
+static struct rtentry *
+rte_alloc(void)
+{
+ if (rte_debug & RTD_DEBUG)
+ return (rte_alloc_debug());
+
+ return ((struct rtentry *)zalloc(rte_zone));
+}
+
+static void
+rte_free(struct rtentry *p)
+{
+ if (rte_debug & RTD_DEBUG) {
+ rte_free_debug(p);
+ return;
+ }
+
+ if (p->rt_refcnt != 0) {
+ panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt);
+ /* NOTREACHED */
+ }
+
+ zfree(rte_zone, p);
+}
+
+static void
+rte_if_ref(struct ifnet *ifp, int cnt)
+{
+ struct kev_msg ev_msg;
+ struct net_event_data ev_data;
+ uint32_t old;
+
+ /* Force cnt to 1 increment/decrement */
+ if (cnt < -1 || cnt > 1) {
+ panic("%s: invalid count argument (%d)", __func__, cnt);
+ /* NOTREACHED */
+ }
+ old = atomic_add_32_ov(&ifp->if_route_refcnt, cnt);
+ if (cnt < 0 && old == 0) {
+ panic("%s: ifp=%p negative route refcnt!", __func__, ifp);
+ /* NOTREACHED */
+ }
+ /*
+ * The following is done without first holding the ifnet lock,
+ * for performance reasons. The relevant ifnet fields, with
+ * the exception of the if_idle_flags, are never changed
+ * during the lifetime of the ifnet. The if_idle_flags
+ * may possibly be modified, so in the event that the value
+ * is stale because IFRF_IDLE_NOTIFY was cleared, we'd end up
+ * sending the event anyway. This is harmless as it is just
+ * a notification to the monitoring agent in user space, and
+ * it is expected to check via SIOCGIFGETRTREFCNT again anyway.
+ */
+ if ((ifp->if_idle_flags & IFRF_IDLE_NOTIFY) && cnt < 0 && old == 1) {
+ bzero(&ev_msg, sizeof (ev_msg));
+ bzero(&ev_data, sizeof (ev_data));
+
+ ev_msg.vendor_code = KEV_VENDOR_APPLE;
+ ev_msg.kev_class = KEV_NETWORK_CLASS;
+ ev_msg.kev_subclass = KEV_DL_SUBCLASS;
+ ev_msg.event_code = KEV_DL_IF_IDLE_ROUTE_REFCNT;
+
+ strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
+
+ ev_data.if_family = ifp->if_family;
+ ev_data.if_unit = ifp->if_unit;
+ ev_msg.dv[0].data_length = sizeof (struct net_event_data);
+ ev_msg.dv[0].data_ptr = &ev_data;
+
+ dlil_post_complete_msg(NULL, &ev_msg);
+ }
+}
+
+static inline struct rtentry *
+rte_alloc_debug(void)
+{
+ struct rtentry_dbg *rte;
+
+ rte = ((struct rtentry_dbg *)zalloc(rte_zone));
+ if (rte != NULL) {
+ bzero(rte, sizeof (*rte));
+ if (rte_debug & RTD_TRACE)
+ ctrace_record(&rte->rtd_alloc);
+ rte->rtd_inuse = RTD_INUSE;
+ }
+ return ((struct rtentry *)rte);
+}
+
+static inline void
+rte_free_debug(struct rtentry *p)
+{
+ struct rtentry_dbg *rte = (struct rtentry_dbg *)p;
+
+ if (p->rt_refcnt != 0) {
+ panic("rte_free: rte=%p refcnt=%d\n", p, p->rt_refcnt);
+ /* NOTREACHED */
+ }
+ if (rte->rtd_inuse == RTD_FREED) {
+ panic("rte_free: double free rte=%p\n", rte);
+ /* NOTREACHED */
+ } else if (rte->rtd_inuse != RTD_INUSE) {
+ panic("rte_free: corrupted rte=%p\n", rte);
+ /* NOTREACHED */
+ }
+ bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof (*p));
+ /* Preserve rt_lock to help catch use-after-free cases */
+ bzero((caddr_t)p, offsetof(struct rtentry, rt_lock));
+
+ rte->rtd_inuse = RTD_FREED;
+
+ if (rte_debug & RTD_TRACE)
+ ctrace_record(&rte->rtd_free);
+
+ if (!(rte_debug & RTD_NO_FREE))
+ zfree(rte_zone, p);
+}
+
+void
+ctrace_record(ctrace_t *tr)
+{
+ tr->th = current_thread();
+ bzero(tr->pc, sizeof (tr->pc));
+ (void) OSBacktrace(tr->pc, CTRACE_STACK_SIZE);
+}
+
+void
+route_copyout(struct route *dst, const struct route *src, size_t length)
+{
+ /* Copy everything (rt, srcif, flags, dst) from src */
+ bcopy(src, dst, length);
+
+ /* Hold one reference for the local copy of struct route */
+ if (dst->ro_rt != NULL)
+ RT_ADDREF(dst->ro_rt);
+
+ /* Hold one reference for the local copy of struct ifaddr */
+ if (dst->ro_srcia != NULL)
+ IFA_ADDREF(dst->ro_srcia);
+}
+
+void
+route_copyin(struct route *src, struct route *dst, size_t length)
+{
+ /* No cached route at the destination? */
+ if (dst->ro_rt == NULL) {
+ /*
+ * Ditch the address in the cached copy (dst) since
+ * we're about to take everything there is in src.
+ */
+ if (dst->ro_srcia != NULL)
+ IFA_REMREF(dst->ro_srcia);
+ /*
+ * Copy everything (rt, srcia, flags, dst) from src; the
+ * references to rt and/or srcia were held at the time
+ * of storage and are kept intact.
+ */
+ bcopy(src, dst, length);
+ } else if (src->ro_rt != NULL) {
+ /*
+ * If the same, update srcia and flags, and ditch the route
+ * in the local copy. Else ditch the one that is currently
+ * cached, and cache the new route.
+ */
+ if (dst->ro_rt == src->ro_rt) {
+ dst->ro_flags = src->ro_flags;
+ if (dst->ro_srcia != src->ro_srcia) {
+ if (dst->ro_srcia != NULL)
+ IFA_REMREF(dst->ro_srcia);
+ dst->ro_srcia = src->ro_srcia;
+ } else if (src->ro_srcia != NULL) {
+ IFA_REMREF(src->ro_srcia);
+ }
+ rtfree(src->ro_rt);
+ } else {
+ rtfree(dst->ro_rt);
+ if (dst->ro_srcia != NULL)
+ IFA_REMREF(dst->ro_srcia);
+ bcopy(src, dst, length);
+ }
+ } else if (src->ro_srcia != NULL) {
+ /*
+ * Ditch src address in the local copy (src) since we're
+ * not caching the route entry anyway (ro_rt is NULL).
+ */
+ IFA_REMREF(src->ro_srcia);
+ }
+
+ /* This function consumes the references on src */
+ src->ro_rt = NULL;
+ src->ro_srcia = NULL;
+}
+
+/*
+ * route_to_gwroute will find the gateway route for a given route.
+ *
+ * If the route is down, look the route up again.
+ * If the route goes through a gateway, get the route to the gateway.
+ * If the gateway route is down, look it up again.
+ * If the route is set to reject, verify it hasn't expired.
+ *
+ * If the returned route is non-NULL, the caller is responsible for
+ * releasing the reference and unlocking the route.
+ */
+#define senderr(e) { error = (e); goto bad; }
+errno_t
+route_to_gwroute(const struct sockaddr *net_dest, struct rtentry *hint0,
+ struct rtentry **out_route)
+{
+ uint64_t timenow;
+ struct rtentry *rt = hint0, *hint = hint0;
+ errno_t error = 0;
+ unsigned int ifindex;
+ boolean_t gwroute;
+
+ *out_route = NULL;
+
+ if (rt == NULL)
+ return (0);
+
+ /*
+ * Next hop determination. Because we may involve the gateway route
+ * in addition to the original route, locking is rather complicated.
+ * The general concept is that regardless of whether the route points
+ * to the original route or to the gateway route, this routine takes
+ * an extra reference on such a route. This extra reference will be
+ * released at the end.
+ *
+ * Care must be taken to ensure that the "hint0" route never gets freed
+ * via rtfree(), since the caller may have stored it inside a struct
+ * route with a reference held for that placeholder.
+ */
+ RT_LOCK_SPIN(rt);
+ ifindex = rt->rt_ifp->if_index;
+ RT_ADDREF_LOCKED(rt);
+ if (!(rt->rt_flags & RTF_UP)) {
+ RT_REMREF_LOCKED(rt);
+ RT_UNLOCK(rt);
+ /* route is down, find a new one */
+ hint = rt = rtalloc1_scoped((struct sockaddr *)
+ (size_t)net_dest, 1, 0, ifindex);
+ if (hint != NULL) {
+ RT_LOCK_SPIN(rt);
+ ifindex = rt->rt_ifp->if_index;
+ } else {
+ senderr(EHOSTUNREACH);
+ }
+ }
+
+ /*
+ * We have a reference to "rt" by now; it will either
+ * be released or freed at the end of this routine.
+ */
+ RT_LOCK_ASSERT_HELD(rt);
+ if ((gwroute = (rt->rt_flags & RTF_GATEWAY))) {
+ struct rtentry *gwrt = rt->rt_gwroute;
+ struct sockaddr_storage ss;
+ struct sockaddr *gw = (struct sockaddr *)&ss;
+
+ VERIFY(rt == hint);
+ RT_ADDREF_LOCKED(hint);
+
+ /* If there's no gateway rt, look it up */
+ if (gwrt == NULL) {
+ bcopy(rt->rt_gateway, gw, MIN(sizeof (ss),
+ rt->rt_gateway->sa_len));
+ RT_UNLOCK(rt);
+ goto lookup;
+ }
+ /* Become a regular mutex */
+ RT_CONVERT_LOCK(rt);
+
+ /*
+ * Take gwrt's lock while holding route's lock;
+ * this is okay since gwrt never points back
+ * to "rt", so no lock ordering issues.
+ */
+ RT_LOCK_SPIN(gwrt);
+ if (!(gwrt->rt_flags & RTF_UP)) {
+ rt->rt_gwroute = NULL;
+ RT_UNLOCK(gwrt);
+ bcopy(rt->rt_gateway, gw, MIN(sizeof (ss),
+ rt->rt_gateway->sa_len));
+ RT_UNLOCK(rt);
+ rtfree(gwrt);
+lookup:
+ lck_mtx_lock(rnh_lock);
+ gwrt = rtalloc1_scoped_locked(gw, 1, 0, ifindex);
+
+ RT_LOCK(rt);
+ /*
+ * Bail out if the route is down, no route
+ * to gateway, circular route, or if the
+ * gateway portion of "rt" has changed.
+ */
+ if (!(rt->rt_flags & RTF_UP) || gwrt == NULL ||
+ gwrt == rt || !equal(gw, rt->rt_gateway)) {
+ if (gwrt == rt) {
+ RT_REMREF_LOCKED(gwrt);
+ gwrt = NULL;
+ }
+ VERIFY(rt == hint);
+ RT_REMREF_LOCKED(hint);
+ hint = NULL;
+ RT_UNLOCK(rt);
+ if (gwrt != NULL)
+ rtfree_locked(gwrt);
+ lck_mtx_unlock(rnh_lock);
+ senderr(EHOSTUNREACH);
+ }
+ VERIFY(gwrt != NULL);
+ /*
+ * Set gateway route; callee adds ref to gwrt;
+ * gwrt has an extra ref from rtalloc1() for
+ * this routine.
+ */
+ rt_set_gwroute(rt, rt_key(rt), gwrt);
+ VERIFY(rt == hint);
+ RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */
+ RT_UNLOCK(rt);
+ lck_mtx_unlock(rnh_lock);
+ rt = gwrt;
+ } else {
+ RT_ADDREF_LOCKED(gwrt);
+ RT_UNLOCK(gwrt);
+ VERIFY(rt == hint);
+ RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */
+ RT_UNLOCK(rt);
+ rt = gwrt;
+ }
+ VERIFY(rt == gwrt && rt != hint);
+
+ /*
+ * This is an opportunity to revalidate the parent route's
+ * rt_gwroute, in case it now points to a dead route entry.
+ * Parent route won't go away since the clone (hint) holds
+ * a reference to it. rt == gwrt.
+ */
+ RT_LOCK_SPIN(hint);
+ if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) ==
+ (RTF_WASCLONED | RTF_UP)) {
+ struct rtentry *prt = hint->rt_parent;
+ VERIFY(prt != NULL);
+
+ RT_CONVERT_LOCK(hint);
+ RT_ADDREF(prt);
+ RT_UNLOCK(hint);
+ rt_revalidate_gwroute(prt, rt);
+ RT_REMREF(prt);
+ } else {
+ RT_UNLOCK(hint);
+ }
+
+ /* Clean up "hint" now; see notes above regarding hint0 */
+ if (hint == hint0)
+ RT_REMREF(hint);
+ else
+ rtfree(hint);
+ hint = NULL;
+
+ /* rt == gwrt; if it is now down, give up */
+ RT_LOCK_SPIN(rt);
+ if (!(rt->rt_flags & RTF_UP)) {
+ RT_UNLOCK(rt);
+ senderr(EHOSTUNREACH);
+ }
+ }
+
+ if (rt->rt_flags & RTF_REJECT) {
+ VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
+ VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
+ timenow = net_uptime();
+ if (rt->rt_expire == 0 || timenow < rt->rt_expire) {
+ RT_UNLOCK(rt);
+ senderr(!gwroute ? EHOSTDOWN : EHOSTUNREACH);
+ }
+ }
+
+ /* Become a regular mutex */
+ RT_CONVERT_LOCK(rt);
+
+ /* Caller is responsible for cleaning up "rt" */
+ *out_route = rt;
+ return (0);
+
+bad:
+ /* Clean up route (either it is "rt" or "gwrt") */
+ if (rt != NULL) {
+ RT_LOCK_SPIN(rt);
+ if (rt == hint0) {
+ RT_REMREF_LOCKED(rt);
+ RT_UNLOCK(rt);
+ } else {
+ RT_UNLOCK(rt);
+ rtfree(rt);
+ }
+ }
+ return (error);
+}
+#undef senderr
+
+void
+rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt)
+{
+ VERIFY(gwrt != NULL);
+
+ RT_LOCK_SPIN(rt);
+ if ((rt->rt_flags & (RTF_GATEWAY | RTF_UP)) == (RTF_GATEWAY | RTF_UP) &&
+ rt->rt_ifp == gwrt->rt_ifp && rt->rt_gateway->sa_family ==
+ rt_key(gwrt)->sa_family && (rt->rt_gwroute == NULL ||
+ !(rt->rt_gwroute->rt_flags & RTF_UP))) {
+ boolean_t isequal;
+ VERIFY(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING));
+
+ if (rt->rt_gateway->sa_family == AF_INET ||
+ rt->rt_gateway->sa_family == AF_INET6) {
+ struct sockaddr_storage key_ss, gw_ss;
+ /*
+ * We need to compare rt_key and rt_gateway; create
+ * local copies to get rid of any ifscope association.
+ */
+ (void) sa_copy(rt_key(gwrt), &key_ss, NULL);
+ (void) sa_copy(rt->rt_gateway, &gw_ss, NULL);
+
+ isequal = equal(SA(&key_ss), SA(&gw_ss));
+ } else {
+ isequal = equal(rt_key(gwrt), rt->rt_gateway);
+ }
+
+ /* If they are the same, update gwrt */
+ if (isequal) {
+ RT_UNLOCK(rt);
+ lck_mtx_lock(rnh_lock);
+ RT_LOCK(rt);
+ rt_set_gwroute(rt, rt_key(rt), gwrt);
+ RT_UNLOCK(rt);
+ lck_mtx_unlock(rnh_lock);
+ } else {
+ RT_UNLOCK(rt);
+ }
+ } else {
+ RT_UNLOCK(rt);
+ }
+}
+
+static void
+rt_str4(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
+{
+ VERIFY(rt_key(rt)->sa_family == AF_INET);
+
+ if (ds != NULL) {
+ (void) inet_ntop(AF_INET,
+ &SIN(rt_key(rt))->sin_addr.s_addr, ds, dslen);
+ if (dslen >= MAX_SCOPE_ADDR_STR_LEN &&
+ SINIFSCOPE(rt_key(rt))->sin_scope_id != IFSCOPE_NONE) {
+ char scpstr[16];
+
+ snprintf(scpstr, sizeof(scpstr), "@%u",
+ SINIFSCOPE(rt_key(rt))->sin_scope_id);
+
+ strlcat(ds, scpstr, dslen);
+ }
+ }
+
+ if (gs != NULL) {
+ if (rt->rt_flags & RTF_GATEWAY) {
+ (void) inet_ntop(AF_INET,
+ &SIN(rt->rt_gateway)->sin_addr.s_addr, gs, gslen);
+ } else if (rt->rt_ifp != NULL) {
+ snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit);
+ } else {
+ snprintf(gs, gslen, "%s", "link");
+ }
+ }
+}
+
+#if INET6
+static void
+rt_str6(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
+{
+ VERIFY(rt_key(rt)->sa_family == AF_INET6);
+
+ if (ds != NULL) {
+ (void) inet_ntop(AF_INET6,
+ &SIN6(rt_key(rt))->sin6_addr, ds, dslen);
+ if (dslen >= MAX_SCOPE_ADDR_STR_LEN &&
+ SIN6IFSCOPE(rt_key(rt))->sin6_scope_id != IFSCOPE_NONE) {
+ char scpstr[16];
+
+ snprintf(scpstr, sizeof(scpstr), "@%u",
+ SIN6IFSCOPE(rt_key(rt))->sin6_scope_id);
+
+ strlcat(ds, scpstr, dslen);
+ }
+ }
+
+ if (gs != NULL) {
+ if (rt->rt_flags & RTF_GATEWAY) {
+ (void) inet_ntop(AF_INET6,
+ &SIN6(rt->rt_gateway)->sin6_addr, gs, gslen);
+ } else if (rt->rt_ifp != NULL) {
+ snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit);
+ } else {
+ snprintf(gs, gslen, "%s", "link");
+ }
+ }
+}
+#endif /* INET6 */
+
+
+void
+rt_str(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
+{
+ switch (rt_key(rt)->sa_family) {
+ case AF_INET:
+ rt_str4(rt, ds, dslen, gs, gslen);
+ break;
+#if INET6
+ case AF_INET6:
+ rt_str6(rt, ds, dslen, gs, gslen);
+ break;
+#endif /* INET6 */
+ default:
+ if (ds != NULL)
+ bzero(ds, dslen);
+ if (gs != NULL)
+ bzero(gs, gslen);
+ break;
+ }
+}