X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c910b4d9d2451126ae3917b931cd4390c11e1d52..7ee9d059c4eecf68ae4f8b0fb99ae2471eda79af:/bsd/netinet/in_rmx.c diff --git a/bsd/netinet/in_rmx.c b/bsd/netinet/in_rmx.c index 419befad8..2d0c2735d 100644 --- a/bsd/netinet/in_rmx.c +++ b/bsd/netinet/in_rmx.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2008 Apple Inc. All rights reserved. + * Copyright (c) 2000-2011 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -75,17 +75,19 @@ #include #include #include +#include #include +#include #include #include #include #include #include +#include extern int tvtohz(struct timeval *); extern int in_inithead(void **head, int off); -extern u_long route_generation; #ifdef __APPLE__ static void in_rtqtimo(void *rock); @@ -107,13 +109,16 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); struct radix_node *ret; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + /* * For IP, all unicast non-host routes are automatically cloning. */ - if(IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) + if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) rt->rt_flags |= RTF_MULTICAST; - if(!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { + if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) { rt->rt_flags |= RTF_PRCLONING; } @@ -137,11 +142,15 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, if (in_broadcast(sin->sin_addr, rt->rt_ifp)) { rt->rt_flags |= RTF_BROADCAST; } else { + /* Become a regular mutex */ + RT_CONVERT_LOCK(rt); + IFA_LOCK_SPIN(rt->rt_ifa); #define satosin(sa) ((struct sockaddr_in *)sa) if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr == sin->sin_addr.s_addr) rt->rt_flags |= RTF_LOCAL; #undef satosin + IFA_UNLOCK(rt->rt_ifa); } } @@ -158,18 +167,27 @@ in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, * ARP entry and delete it if so. */ rt2 = rtalloc1_scoped_locked(rt_key(rt), 0, - RTF_CLONING | RTF_PRCLONING, sa_get_ifscope(rt_key(rt))); + RTF_CLONING | RTF_PRCLONING, sin_get_ifscope(rt_key(rt))); if (rt2) { - if (rt2->rt_flags & RTF_LLINFO && - rt2->rt_flags & RTF_HOST && - rt2->rt_gateway && - rt2->rt_gateway->sa_family == AF_LINK) { - rtrequest_locked(RTM_DELETE, - (struct sockaddr *)rt_key(rt2), - rt2->rt_gateway, - rt_mask(rt2), rt2->rt_flags, 0); + RT_LOCK(rt2); + if ((rt2->rt_flags & RTF_LLINFO) && + (rt2->rt_flags & RTF_HOST) && + rt2->rt_gateway != NULL && + rt2->rt_gateway->sa_family == AF_LINK) { + /* + * Safe to drop rt_lock and use rt_key, + * rt_gateway, since holding rnh_lock here + * prevents another thread from calling + * rt_setgate() on this route. + */ + RT_UNLOCK(rt2); + rtrequest_locked(RTM_DELETE, rt_key(rt2), + rt2->rt_gateway, rt_mask(rt2), + rt2->rt_flags, 0); ret = rn_addroute(v_arg, n_arg, head, treenodes); + } else { + RT_UNLOCK(rt2); } rtfree_locked(rt2); } @@ -185,10 +203,20 @@ in_validate(struct radix_node *rn) { struct rtentry *rt = (struct rtentry *)rn; + RT_LOCK_ASSERT_HELD(rt); + /* This is first reference? */ - if (rt != NULL && rt->rt_refcnt == 0 && (rt->rt_flags & RTPRF_OURS)) { - rt->rt_flags &= ~RTPRF_OURS; - rt->rt_rmx.rmx_expire = 0; + if (rt->rt_refcnt == 0) { + if (rt->rt_flags & RTPRF_OURS) { + /* It's one of ours; unexpire it */ + rt->rt_flags &= ~RTPRF_OURS; + rt_setexpire(rt, 0); + } else if ((rt->rt_flags & RTF_LLINFO) && + (rt->rt_flags & RTF_HOST) && rt->rt_gateway != NULL && + rt->rt_gateway->sa_family == AF_LINK) { + /* It's ARP; let it be handled there */ + arp_validate(rt); + } } return (rn); } @@ -213,24 +241,29 @@ in_matroute_args(void *v_arg, struct radix_node_head *head, { struct radix_node *rn = rn_match_args(v_arg, head, f, w); - return (in_validate(rn)); + if (rn != NULL) { + RT_LOCK_SPIN((struct rtentry *)rn); + in_validate(rn); + RT_UNLOCK((struct rtentry *)rn); + } + return (rn); } static int rtq_reallyold = 60*60; /* one hour is ``really old'' */ -SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW, +SYSCTL_INT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire, CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold , 0, "Default expiration time on dynamically learned routes"); static int rtq_minreallyold = 10; /* never automatically crank down to less */ -SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, +SYSCTL_INT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold , 0, "Minimum time to attempt to hold onto dynamically learned routes"); static int rtq_toomany = 128; /* 128 cached routes is ``too many'' */ -SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, +SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany , 0, "Upper limit on dynamically learned routes"); #ifdef __APPLE__ @@ -247,12 +280,12 @@ SYSCTL_INT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache, CTLFLAG_RW, * If for some reason a circular route is needed, turn this sysctl (net.inet.ip.check_route_selfref) to zero. */ int check_routeselfref = 1; -SYSCTL_INT(_net_inet_ip, OID_AUTO, check_route_selfref, CTLFLAG_RW, +SYSCTL_INT(_net_inet_ip, OID_AUTO, check_route_selfref, CTLFLAG_RW | CTLFLAG_LOCKED, &check_routeselfref , 0, ""); #endif -__private_extern__ int use_routegenid = 1; -SYSCTL_INT(_net_inet_ip, OID_AUTO, use_route_genid, CTLFLAG_RW, +int use_routegenid = 1; +SYSCTL_INT(_net_inet_ip, OID_AUTO, use_route_genid, CTLFLAG_RW | CTLFLAG_LOCKED, &use_routegenid , 0, ""); /* @@ -264,6 +297,9 @@ in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) { struct rtentry *rt = (struct rtentry *)rn; + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK_ASSERT_HELD(rt); + if (!(rt->rt_flags & RTF_UP)) return; /* prophylactic measures */ @@ -284,18 +320,26 @@ in_clsroute(struct radix_node *rn, __unused struct radix_node_head *head) * called when the route's reference count is 0, don't * deallocate it until we return from this routine by * telling rtrequest that we're interested in it. + * Safe to drop rt_lock and use rt_key, rt_gateway since + * holding rnh_lock here prevents another thread from + * calling rt_setgate() on this route. */ + RT_UNLOCK(rt); if (rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) { /* Now let the caller free it */ - rtunref(rt); + RT_LOCK(rt); + RT_REMREF_LOCKED(rt); + } else { + RT_LOCK(rt); } } else { - struct timeval timenow; + uint64_t timenow; - getmicrotime(&timenow); + timenow = net_uptime(); rt->rt_flags |= RTPRF_OURS; - rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold; + rt_setexpire(rt, + rt_expiry(rt, timenow, rtq_reallyold)); } } @@ -305,7 +349,7 @@ struct rtqk_arg { int killed; int found; int updating; - time_t nextstop; + uint64_t nextstop; }; /* @@ -319,37 +363,51 @@ in_rtqkill(struct radix_node *rn, void *rock) struct rtqk_arg *ap = rock; struct rtentry *rt = (struct rtentry *)rn; int err; - struct timeval timenow; + uint64_t timenow; - getmicrotime(&timenow); - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + timenow = net_uptime(); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + RT_LOCK(rt); if (rt->rt_flags & RTPRF_OURS) { ap->found++; - if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec) { + VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0); + VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0); + if (ap->draining || rt->rt_expire <= timenow) { if (rt->rt_refcnt > 0) panic("rtqkill route really not free"); - err = rtrequest_locked(RTM_DELETE, - (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags, 0); + /* + * Delete this route since we're done with it; + * the route may be freed afterwards, so we + * can no longer refer to 'rt' upon returning + * from rtrequest(). Safe to drop rt_lock and + * use rt_key, rt_gateway since holding rnh_lock + * here prevents another thread from calling + * rt_setgate() on this route. + */ + RT_UNLOCK(rt); + err = rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in_rtqkill: error %d\n", err); } else { ap->killed++; } } else { - if (ap->updating - && (rt->rt_rmx.rmx_expire - timenow.tv_sec - > rtq_reallyold)) { - rt->rt_rmx.rmx_expire = timenow.tv_sec - + rtq_reallyold; + if (ap->updating && + (rt->rt_expire - timenow) > + rt_expiry(rt, 0, rtq_reallyold)) { + rt_setexpire(rt, rt_expiry(rt, + timenow, rtq_reallyold)); } ap->nextstop = lmin(ap->nextstop, - rt->rt_rmx.rmx_expire); + rt->rt_expire); + RT_UNLOCK(rt); } + } else { + RT_UNLOCK(rt); } return 0; @@ -370,16 +428,16 @@ in_rtqtimo(void *rock) struct radix_node_head *rnh = rock; struct rtqk_arg arg; struct timeval atv; - static time_t last_adjusted_timeout = 0; - struct timeval timenow; + static uint64_t last_adjusted_timeout = 0; + uint64_t timenow; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); /* Get the timestamp after we acquire the lock for better accuracy */ - getmicrotime(&timenow); + timenow = net_uptime(); arg.found = arg.killed = 0; arg.rnh = rnh; - arg.nextstop = timenow.tv_sec + rtq_timeout; + arg.nextstop = timenow + rtq_timeout; arg.draining = arg.updating = 0; rnh->rnh_walktree(rnh, in_rtqkill, &arg); @@ -392,14 +450,14 @@ in_rtqtimo(void *rock) * hard. */ if((arg.found - arg.killed > rtq_toomany) - && (timenow.tv_sec - last_adjusted_timeout >= rtq_timeout) + && ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) && rtq_reallyold > rtq_minreallyold) { rtq_reallyold = 2*rtq_reallyold / 3; if(rtq_reallyold < rtq_minreallyold) { rtq_reallyold = rtq_minreallyold; } - last_adjusted_timeout = timenow.tv_sec; + last_adjusted_timeout = timenow; #if DIAGNOSTIC log(LOG_DEBUG, "in_rtqtimo: adjusted rtq_reallyold to %d\n", rtq_reallyold); @@ -410,8 +468,8 @@ in_rtqtimo(void *rock) } atv.tv_usec = 0; - atv.tv_sec = arg.nextstop - timenow.tv_sec; - lck_mtx_unlock(rt_mtx); + atv.tv_sec = arg.nextstop - timenow; + lck_mtx_unlock(rnh_lock); timeout(in_rtqtimo_funnel, rock, tvtohz(&atv)); } @@ -425,9 +483,9 @@ in_rtqdrain(void) arg.nextstop = 0; arg.draining = 1; arg.updating = 0; - lck_mtx_lock(rt_mtx); + lck_mtx_lock(rnh_lock); rnh->rnh_walktree(rnh, in_rtqkill, &arg); - lck_mtx_unlock(rt_mtx); + lck_mtx_unlock(rnh_lock); } /* @@ -481,6 +539,7 @@ in_ifadownkill(struct radix_node *rn, void *xap) struct rtentry *rt = (struct rtentry *)rn; int err; + RT_LOCK(rt); if (rt->rt_ifa == ap->ifa && (ap->del || !(rt->rt_flags & RTF_STATIC))) { /* @@ -489,14 +548,20 @@ in_ifadownkill(struct radix_node *rn, void *xap) * away the pointers that rn_walktree() needs in order * continue our descent. We will end up deleting all * the routes that rtrequest() would have in any case, - * so that behavior is not needed there. + * so that behavior is not needed there. Safe to drop + * rt_lock and use rt_key, rt_gateway, since holding + * rnh_lock here prevents another thread from calling + * rt_setgate() on this route. */ rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING); - err = rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); + RT_UNLOCK(rt); + err = rtrequest_locked(RTM_DELETE, rt_key(rt), + rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0); if (err) { log(LOG_WARNING, "in_ifadownkill: error %d\n", err); } + } else { + RT_UNLOCK(rt); } return 0; } @@ -507,19 +572,26 @@ in_ifadown(struct ifaddr *ifa, int delete) struct in_ifadown_arg arg; struct radix_node_head *rnh; - lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED); + lck_mtx_assert(rnh_lock, LCK_MTX_ASSERT_OWNED); + /* + * Holding rnh_lock here prevents the possibility of + * ifa from changing (e.g. in_ifinit), so it is safe + * to access its ifa_addr without locking. + */ if (ifa->ifa_addr->sa_family != AF_INET) - return 1; + return (1); /* trigger route cache reevaluation */ - if (use_routegenid) - route_generation++; + if (use_routegenid) + routegenid_update(); arg.rnh = rnh = rt_tables[AF_INET]; arg.ifa = ifa; arg.del = delete; rnh->rnh_walktree(rnh, in_ifadownkill, &arg); + IFA_LOCK_SPIN(ifa); ifa->ifa_flags &= ~IFA_ROUTE; - return 0; + IFA_UNLOCK(ifa); + return (0); }