]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/in_rmx.c
xnu-4903.241.1.tar.gz
[apple/xnu.git] / bsd / netinet / in_rmx.c
CommitLineData
1c79356b 1/*
d9a64523 2 * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39236c6e 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39236c6e 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39236c6e 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39236c6e 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright 1994, 1995 Massachusetts Institute of Technology
30 *
31 * Permission to use, copy, modify, and distribute this software and
32 * its documentation for any purpose and without fee is hereby
33 * granted, provided that both the above copyright notice and this
34 * permission notice appear in all copies, that both the above
35 * copyright notice and this permission notice appear in all
36 * supporting documentation, and that the name of M.I.T. not be used
37 * in advertising or publicity pertaining to distribution of the
38 * software without specific, written prior permission. M.I.T. makes
39 * no representations about the suitability of this software for any
40 * purpose. It is provided "as is" without express or implied
41 * warranty.
42 *
43 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
44 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
45 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
47 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
50 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
51 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
52 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
53 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54 * SUCH DAMAGE.
55 *
56 */
57
58/*
59 * This code does two things necessary for the enhanced TCP metrics to
60 * function in a useful manner:
61 * 1) It marks all non-host routes as `cloning', thus ensuring that
62 * every actual reference to such a route actually gets turned
63 * into a reference to a host route to the specific destination
64 * requested.
65 * 2) When such routes lose all their references, it arranges for them
66 * to be deleted in some random collection of circumstances, so that
67 * a large quantity of stale routing data is not kept in kernel memory
68 * indefinitely. See in_rtqtimo() below for the exact mechanism.
69 */
70
71#include <sys/param.h>
72#include <sys/systm.h>
73#include <sys/kernel.h>
74#include <sys/sysctl.h>
75#include <sys/socket.h>
76#include <sys/mbuf.h>
6d2010ae 77#include <sys/protosw.h>
1c79356b 78#include <sys/syslog.h>
6d2010ae 79#include <sys/mcache.h>
fe8ab488 80#include <kern/locks.h>
1c79356b
A
81
82#include <net/if.h>
83#include <net/route.h>
84#include <netinet/in.h>
85#include <netinet/in_var.h>
6d2010ae 86#include <netinet/in_arp.h>
d9a64523
A
87#include <netinet/ip.h>
88#include <netinet/ip6.h>
89#include <netinet6/nd6.h>
1c79356b 90
2d21ac55 91extern int tvtohz(struct timeval *);
1c79356b 92
39236c6e
A
93static int in_rtqtimo_run; /* in_rtqtimo is scheduled to run */
94static void in_rtqtimo(void *);
95static void in_sched_rtqtimo(struct timeval *);
9bccf70c 96
39236c6e
A
97static struct radix_node *in_addroute(void *, void *, struct radix_node_head *,
98 struct radix_node *);
99static struct radix_node *in_deleteroute(void *, void *,
100 struct radix_node_head *);
101static struct radix_node *in_matroute(void *, struct radix_node_head *);
c910b4d9
A
102static struct radix_node *in_matroute_args(void *, struct radix_node_head *,
103 rn_matchf_t *f, void *);
39236c6e
A
104static void in_clsroute(struct radix_node *, struct radix_node_head *);
105static int in_rtqkill(struct radix_node *, void *);
106
107static int in_ifadownkill(struct radix_node *, void *);
c910b4d9 108
1c79356b
A
109/*
110 * Do what we need to do when inserting a route.
111 */
112static struct radix_node *
113in_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
39236c6e 114 struct radix_node *treenodes)
1c79356b
A
115{
116 struct rtentry *rt = (struct rtentry *)treenodes;
316670eb 117 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)rt_key(rt);
1c79356b 118 struct radix_node *ret;
39236c6e
A
119 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
120 uint32_t flags = rt->rt_flags;
121 boolean_t verbose = (rt_verbose > 1);
1c79356b 122
5ba3f43e 123 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
124 RT_LOCK_ASSERT_HELD(rt);
125
39236c6e
A
126 if (verbose)
127 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
128
1c79356b
A
129 /*
130 * For IP, all unicast non-host routes are automatically cloning.
131 */
b0d623f7 132 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
1c79356b
A
133 rt->rt_flags |= RTF_MULTICAST;
134
39236c6e 135 if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST)))
1c79356b 136 rt->rt_flags |= RTF_PRCLONING;
1c79356b
A
137
138 /*
139 * A little bit of help for both IP output and input:
140 * For host routes, we make sure that RTF_BROADCAST
141 * is set for anything that looks like a broadcast address.
142 * This way, we can avoid an expensive call to in_broadcast()
143 * in ip_output() most of the time (because the route passed
144 * to ip_output() is almost always a host route).
145 *
146 * We also do the same for local addresses, with the thought
147 * that this might one day be used to speed up ip_input().
148 *
149 * We also mark routes to multicast addresses as such, because
150 * it's easy to do and might be useful (but this is much more
151 * dubious since it's so easy to inspect the address). (This
152 * is done above.)
153 */
154 if (rt->rt_flags & RTF_HOST) {
155 if (in_broadcast(sin->sin_addr, rt->rt_ifp)) {
156 rt->rt_flags |= RTF_BROADCAST;
157 } else {
6d2010ae
A
158 /* Become a regular mutex */
159 RT_CONVERT_LOCK(rt);
160 IFA_LOCK_SPIN(rt->rt_ifa);
39236c6e
A
161 if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr ==
162 sin->sin_addr.s_addr)
1c79356b 163 rt->rt_flags |= RTF_LOCAL;
6d2010ae 164 IFA_UNLOCK(rt->rt_ifa);
1c79356b
A
165 }
166 }
167
39236c6e 168 if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
d9a64523 169 rt->rt_ifp) {
1c79356b 170 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
d9a64523
A
171 if (INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
172 rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
173 /* Further adjust the size for CLAT46 expansion */
174 rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
175 }
176 }
1c79356b
A
177
178 ret = rn_addroute(v_arg, n_arg, head, treenodes);
39236c6e 179 if (ret == NULL && (rt->rt_flags & RTF_HOST)) {
1c79356b
A
180 struct rtentry *rt2;
181 /*
182 * We are trying to add a host route, but can't.
183 * Find out if it is because of an
184 * ARP entry and delete it if so.
185 */
c910b4d9 186 rt2 = rtalloc1_scoped_locked(rt_key(rt), 0,
6d2010ae 187 RTF_CLONING | RTF_PRCLONING, sin_get_ifscope(rt_key(rt)));
39236c6e
A
188 if (rt2 != NULL) {
189 char dbufc[MAX_IPv4_STR_LEN];
190
b0d623f7 191 RT_LOCK(rt2);
39236c6e
A
192 if (verbose)
193 rt_str(rt2, dbufc, sizeof (dbufc), NULL, 0);
194
b0d623f7
A
195 if ((rt2->rt_flags & RTF_LLINFO) &&
196 (rt2->rt_flags & RTF_HOST) &&
197 rt2->rt_gateway != NULL &&
198 rt2->rt_gateway->sa_family == AF_LINK) {
39236c6e
A
199 if (verbose) {
200 log(LOG_DEBUG, "%s: unable to insert "
201 "route to %s;%s, flags=%b, due to "
202 "existing ARP route %s->%s "
203 "flags=%b, attempting to delete\n",
204 __func__, dbuf,
205 (rt->rt_ifp != NULL) ?
206 rt->rt_ifp->if_xname : "",
207 rt->rt_flags, RTF_BITS, dbufc,
208 (rt2->rt_ifp != NULL) ?
209 rt2->rt_ifp->if_xname : "",
210 rt2->rt_flags, RTF_BITS);
211 }
b0d623f7
A
212 /*
213 * Safe to drop rt_lock and use rt_key,
214 * rt_gateway, since holding rnh_lock here
215 * prevents another thread from calling
216 * rt_setgate() on this route.
217 */
218 RT_UNLOCK(rt2);
39236c6e 219 (void) rtrequest_locked(RTM_DELETE, rt_key(rt2),
b0d623f7 220 rt2->rt_gateway, rt_mask(rt2),
39236c6e 221 rt2->rt_flags, NULL);
1c79356b 222 ret = rn_addroute(v_arg, n_arg, head,
39236c6e 223 treenodes);
b0d623f7
A
224 } else {
225 RT_UNLOCK(rt2);
1c79356b 226 }
91447636 227 rtfree_locked(rt2);
1c79356b
A
228 }
229 }
39236c6e
A
230
231 if (!verbose)
232 goto done;
233
234 if (ret != NULL) {
235 if (flags != rt->rt_flags) {
236 log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, "
237 "oflags=%b, flags=%b\n", __func__,
238 dbuf, gbuf, (rt->rt_ifp != NULL) ?
239 rt->rt_ifp->if_xname : "", flags, RTF_BITS,
240 rt->rt_flags, RTF_BITS);
241 } else {
242 log(LOG_DEBUG, "%s: route to %s->%s->%s inserted, "
243 "flags=%b\n", __func__, dbuf, gbuf,
244 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
245 rt->rt_flags, RTF_BITS);
246 }
247 } else {
248 log(LOG_DEBUG, "%s: unable to insert route to %s->%s->%s, "
249 "flags=%b, already exists\n", __func__, dbuf, gbuf,
250 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
251 rt->rt_flags, RTF_BITS);
252 }
253done:
254 return (ret);
255}
256
257static struct radix_node *
258in_deleteroute(void *v_arg, void *netmask_arg, struct radix_node_head *head)
259{
260 struct radix_node *rn;
261
5ba3f43e 262 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
39236c6e
A
263
264 rn = rn_delete(v_arg, netmask_arg, head);
265 if (rt_verbose > 1 && rn != NULL) {
266 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
267 struct rtentry *rt = (struct rtentry *)rn;
268
269 RT_LOCK(rt);
270 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
271 log(LOG_DEBUG, "%s: route to %s->%s->%s deleted, "
272 "flags=%b\n", __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ?
273 rt->rt_ifp->if_xname : "", rt->rt_flags, RTF_BITS);
274 RT_UNLOCK(rt);
275 }
276 return (rn);
1c79356b
A
277}
278
c910b4d9
A
279/*
280 * Validate (unexpire) an expiring AF_INET route.
281 */
282struct radix_node *
283in_validate(struct radix_node *rn)
284{
285 struct rtentry *rt = (struct rtentry *)rn;
286
b0d623f7
A
287 RT_LOCK_ASSERT_HELD(rt);
288
c910b4d9 289 /* This is first reference? */
6d2010ae 290 if (rt->rt_refcnt == 0) {
39236c6e
A
291 if (rt_verbose > 2) {
292 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
293
294 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
295 log(LOG_DEBUG, "%s: route to %s->%s->%s validated, "
296 "flags=%b\n", __func__, dbuf, gbuf,
297 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
298 rt->rt_flags, RTF_BITS);
299 }
300
301 /*
302 * It's one of ours; unexpire it. If the timer is already
303 * scheduled, let it run later as it won't re-arm itself
304 * if there's nothing to do.
305 */
6d2010ae 306 if (rt->rt_flags & RTPRF_OURS) {
6d2010ae
A
307 rt->rt_flags &= ~RTPRF_OURS;
308 rt_setexpire(rt, 0);
6d2010ae 309 }
c910b4d9
A
310 }
311 return (rn);
312}
313
314/*
315 * Similar to in_matroute_args except without the leaf-matching parameters.
316 */
317static struct radix_node *
318in_matroute(void *v_arg, struct radix_node_head *head)
319{
320 return (in_matroute_args(v_arg, head, NULL, NULL));
321}
322
1c79356b
A
323/*
324 * This code is the inverse of in_clsroute: on first reference, if we
325 * were managing the route, stop doing so and set the expiration timer
326 * back off again.
327 */
328static struct radix_node *
c910b4d9
A
329in_matroute_args(void *v_arg, struct radix_node_head *head,
330 rn_matchf_t *f, void *w)
1c79356b 331{
c910b4d9 332 struct radix_node *rn = rn_match_args(v_arg, head, f, w);
1c79356b 333
b0d623f7
A
334 if (rn != NULL) {
335 RT_LOCK_SPIN((struct rtentry *)rn);
336 in_validate(rn);
337 RT_UNLOCK((struct rtentry *)rn);
338 }
339 return (rn);
1c79356b
A
340}
341
39236c6e
A
342/* one hour is ``really old'' */
343static uint32_t rtq_reallyold = 60*60;
344SYSCTL_UINT(_net_inet_ip, IPCTL_RTEXPIRE, rtexpire,
345 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_reallyold, 0,
346 "Default expiration time on dynamically learned routes");
9bccf70c 347
39236c6e
A
348/* never automatically crank down to less */
349static uint32_t rtq_minreallyold = 10;
350SYSCTL_UINT(_net_inet_ip, IPCTL_RTMINEXPIRE, rtminexpire,
351 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_minreallyold, 0,
352 "Minimum time to attempt to hold onto dynamically learned routes");
353
354/* 128 cached routes is ``too many'' */
355static uint32_t rtq_toomany = 128;
356SYSCTL_UINT(_net_inet_ip, IPCTL_RTMAXCACHE, rtmaxcache,
357 CTLFLAG_RW | CTLFLAG_LOCKED, &rtq_toomany, 0,
358 "Upper limit on dynamically learned routes");
1c79356b
A
359
360/*
361 * On last reference drop, mark the route as belong to us so that it can be
362 * timed out.
363 */
364static void
39236c6e 365in_clsroute(struct radix_node *rn, struct radix_node_head *head)
1c79356b 366{
39236c6e
A
367#pragma unused(head)
368 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
1c79356b 369 struct rtentry *rt = (struct rtentry *)rn;
39236c6e 370 boolean_t verbose = (rt_verbose > 1);
1c79356b 371
5ba3f43e 372 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
373 RT_LOCK_ASSERT_HELD(rt);
374
2d21ac55 375 if (!(rt->rt_flags & RTF_UP))
5ba3f43e 376 return; /* prophylactic measures */
1c79356b 377
2d21ac55 378 if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
1c79356b
A
379 return;
380
39236c6e
A
381 if (rt->rt_flags & RTPRF_OURS)
382 return;
383
384 if (!(rt->rt_flags & (RTF_WASCLONED | RTF_DYNAMIC)))
1c79356b
A
385 return;
386
39236c6e
A
387 if (verbose)
388 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
389
1c79356b 390 /*
2d21ac55
A
391 * Delete the route immediately if RTF_DELCLONE is set or
392 * if route caching is disabled (rtq_reallyold set to 0).
393 * Otherwise, let it expire and be deleted by in_rtqkill().
1c79356b 394 */
2d21ac55 395 if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) {
39236c6e
A
396 int err;
397
398 if (verbose) {
399 log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, "
400 "flags=%b\n", __func__, dbuf, gbuf,
401 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
402 rt->rt_flags, RTF_BITS);
403 }
2d21ac55
A
404 /*
405 * Delete the route from the radix tree but since we are
406 * called when the route's reference count is 0, don't
407 * deallocate it until we return from this routine by
408 * telling rtrequest that we're interested in it.
b0d623f7
A
409 * Safe to drop rt_lock and use rt_key, rt_gateway since
410 * holding rnh_lock here prevents another thread from
411 * calling rt_setgate() on this route.
2d21ac55 412 */
b0d623f7 413 RT_UNLOCK(rt);
39236c6e
A
414 err = rtrequest_locked(RTM_DELETE, rt_key(rt),
415 rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt);
416 if (err == 0) {
2d21ac55 417 /* Now let the caller free it */
b0d623f7
A
418 RT_LOCK(rt);
419 RT_REMREF_LOCKED(rt);
420 } else {
421 RT_LOCK(rt);
39236c6e
A
422 if (!verbose)
423 rt_str(rt, dbuf, sizeof (dbuf),
424 gbuf, sizeof (gbuf));
425 log(LOG_ERR, "%s: error deleting route to "
426 "%s->%s->%s, flags=%b, err=%d\n", __func__,
427 dbuf, gbuf, (rt->rt_ifp != NULL) ?
428 rt->rt_ifp->if_xname : "", rt->rt_flags,
429 RTF_BITS, err);
2d21ac55
A
430 }
431 } else {
6d2010ae 432 uint64_t timenow;
2d21ac55 433
6d2010ae 434 timenow = net_uptime();
1c79356b 435 rt->rt_flags |= RTPRF_OURS;
39236c6e
A
436 rt_setexpire(rt, timenow + rtq_reallyold);
437
438 if (verbose) {
439 log(LOG_DEBUG, "%s: route to %s->%s->%s invalidated, "
440 "flags=%b, expire=T+%u\n", __func__, dbuf, gbuf,
441 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
442 rt->rt_flags, RTF_BITS, rt->rt_expire - timenow);
443 }
444
445 /* We have at least one entry; arm the timer if not already */
446 in_sched_rtqtimo(NULL);
1c79356b
A
447 }
448}
449
450struct rtqk_arg {
451 struct radix_node_head *rnh;
1c79356b 452 int updating;
39236c6e
A
453 int draining;
454 uint32_t killed;
455 uint32_t found;
6d2010ae 456 uint64_t nextstop;
1c79356b
A
457};
458
459/*
460 * Get rid of old routes. When draining, this deletes everything, even when
461 * the timeout is not expired yet. When updating, this makes sure that
462 * nothing has a timeout longer than the current value of rtq_reallyold.
463 */
464static int
465in_rtqkill(struct radix_node *rn, void *rock)
466{
467 struct rtqk_arg *ap = rock;
468 struct rtentry *rt = (struct rtentry *)rn;
39236c6e 469 boolean_t verbose = (rt_verbose > 1);
6d2010ae 470 uint64_t timenow;
39236c6e 471 int err;
1c79356b 472
6d2010ae 473 timenow = net_uptime();
5ba3f43e 474 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 475
b0d623f7 476 RT_LOCK(rt);
2d21ac55 477 if (rt->rt_flags & RTPRF_OURS) {
39236c6e
A
478 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
479
480 if (verbose)
481 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
1c79356b 482
39236c6e 483 ap->found++;
6d2010ae
A
484 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
485 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
486 if (ap->draining || rt->rt_expire <= timenow) {
39236c6e
A
487 if (rt->rt_refcnt > 0) {
488 panic("%s: route %p marked with RTPRF_OURS "
489 "with non-zero refcnt (%u)", __func__,
490 rt, rt->rt_refcnt);
491 /* NOTREACHED */
492 }
5ba3f43e 493
39236c6e
A
494 if (verbose) {
495 log(LOG_DEBUG, "%s: deleting route to "
496 "%s->%s->%s, flags=%b, draining=%d\n",
497 __func__, dbuf, gbuf, (rt->rt_ifp != NULL) ?
498 rt->rt_ifp->if_xname : "", rt->rt_flags,
499 RTF_BITS, ap->draining);
500 }
501 RT_ADDREF_LOCKED(rt); /* for us to free below */
b0d623f7
A
502 /*
503 * Delete this route since we're done with it;
504 * the route may be freed afterwards, so we
505 * can no longer refer to 'rt' upon returning
506 * from rtrequest(). Safe to drop rt_lock and
507 * use rt_key, rt_gateway since holding rnh_lock
508 * here prevents another thread from calling
509 * rt_setgate() on this route.
510 */
511 RT_UNLOCK(rt);
512 err = rtrequest_locked(RTM_DELETE, rt_key(rt),
39236c6e
A
513 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
514 if (err != 0) {
515 RT_LOCK(rt);
516 if (!verbose)
517 rt_str(rt, dbuf, sizeof (dbuf),
518 gbuf, sizeof (gbuf));
519 log(LOG_ERR, "%s: error deleting route to "
520 "%s->%s->%s, flags=%b, err=%d\n", __func__,
521 dbuf, gbuf, (rt->rt_ifp != NULL) ?
522 rt->rt_ifp->if_xname : "", rt->rt_flags,
523 RTF_BITS, err);
524 RT_UNLOCK(rt);
1c79356b
A
525 } else {
526 ap->killed++;
527 }
39236c6e 528 rtfree_locked(rt);
1c79356b 529 } else {
39236c6e
A
530 uint64_t expire = (rt->rt_expire - timenow);
531
532 if (ap->updating && expire > rtq_reallyold) {
533 rt_setexpire(rt, timenow + rtq_reallyold);
534 if (verbose) {
535 log(LOG_DEBUG, "%s: route to "
536 "%s->%s->%s, flags=%b, adjusted "
537 "expire=T+%u (was T+%u)\n",
538 __func__, dbuf, gbuf,
539 (rt->rt_ifp != NULL) ?
540 rt->rt_ifp->if_xname : "",
541 rt->rt_flags, RTF_BITS,
542 (rt->rt_expire - timenow), expire);
543 }
1c79356b 544 }
39236c6e 545 ap->nextstop = lmin(ap->nextstop, rt->rt_expire);
b0d623f7 546 RT_UNLOCK(rt);
1c79356b 547 }
b0d623f7
A
548 } else {
549 RT_UNLOCK(rt);
1c79356b
A
550 }
551
39236c6e 552 return (0);
1c79356b
A
553}
554
39236c6e 555#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */
1c79356b
A
556static int rtq_timeout = RTQ_TIMEOUT;
557
558static void
39236c6e 559in_rtqtimo(void *targ)
1c79356b 560{
39236c6e
A
561#pragma unused(targ)
562 struct radix_node_head *rnh;
1c79356b
A
563 struct rtqk_arg arg;
564 struct timeval atv;
6d2010ae 565 static uint64_t last_adjusted_timeout = 0;
39236c6e 566 boolean_t verbose = (rt_verbose > 1);
6d2010ae 567 uint64_t timenow;
39236c6e 568 uint32_t ours;
9bccf70c 569
b0d623f7 570 lck_mtx_lock(rnh_lock);
39236c6e
A
571 rnh = rt_tables[AF_INET];
572 VERIFY(rnh != NULL);
2d21ac55 573
39236c6e
A
574 /* Get the timestamp after we acquire the lock for better accuracy */
575 timenow = net_uptime();
576 if (verbose) {
577 log(LOG_DEBUG, "%s: initial nextstop is T+%u seconds\n",
578 __func__, rtq_timeout);
579 }
580 bzero(&arg, sizeof (arg));
1c79356b 581 arg.rnh = rnh;
6d2010ae 582 arg.nextstop = timenow + rtq_timeout;
1c79356b 583 rnh->rnh_walktree(rnh, in_rtqkill, &arg);
39236c6e
A
584 if (verbose) {
585 log(LOG_DEBUG, "%s: found %u, killed %u\n", __func__,
586 arg.found, arg.killed);
587 }
1c79356b
A
588 /*
589 * Attempt to be somewhat dynamic about this:
590 * If there are ``too many'' routes sitting around taking up space,
591 * then crank down the timeout, and see if we can't make some more
592 * go away. However, we make sure that we will never adjust more
593 * than once in rtq_timeout seconds, to keep from cranking down too
594 * hard.
595 */
39236c6e
A
596 ours = (arg.found - arg.killed);
597 if (ours > rtq_toomany &&
598 ((timenow - last_adjusted_timeout) >= (uint64_t)rtq_timeout) &&
599 rtq_reallyold > rtq_minreallyold) {
600 rtq_reallyold = 2 * rtq_reallyold / 3;
601 if (rtq_reallyold < rtq_minreallyold)
1c79356b 602 rtq_reallyold = rtq_minreallyold;
1c79356b 603
6d2010ae 604 last_adjusted_timeout = timenow;
39236c6e
A
605 if (verbose) {
606 log(LOG_DEBUG, "%s: adjusted rtq_reallyold to %d "
607 "seconds\n", __func__, rtq_reallyold);
608 }
1c79356b
A
609 arg.found = arg.killed = 0;
610 arg.updating = 1;
1c79356b 611 rnh->rnh_walktree(rnh, in_rtqkill, &arg);
1c79356b
A
612 }
613
614 atv.tv_usec = 0;
6d2010ae 615 atv.tv_sec = arg.nextstop - timenow;
39236c6e
A
616 /* re-arm the timer only if there's work to do */
617 in_rtqtimo_run = 0;
618 if (ours > 0)
619 in_sched_rtqtimo(&atv);
620 else if (verbose)
621 log(LOG_DEBUG, "%s: not rescheduling timer\n", __func__);
b0d623f7 622 lck_mtx_unlock(rnh_lock);
39236c6e
A
623}
624
625static void
626in_sched_rtqtimo(struct timeval *atv)
627{
5ba3f43e 628 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
39236c6e
A
629
630 if (!in_rtqtimo_run) {
631 struct timeval tv;
632
633 if (atv == NULL) {
634 tv.tv_usec = 0;
635 tv.tv_sec = MAX(rtq_timeout / 10, 1);
636 atv = &tv;
637 }
638 if (rt_verbose > 1) {
639 log(LOG_DEBUG, "%s: timer scheduled in "
640 "T+%llus.%lluu\n", __func__,
641 (uint64_t)atv->tv_sec, (uint64_t)atv->tv_usec);
642 }
643 in_rtqtimo_run = 1;
644 timeout(in_rtqtimo, NULL, tvtohz(atv));
645 }
1c79356b
A
646}
647
648void
649in_rtqdrain(void)
650{
39236c6e 651 struct radix_node_head *rnh;
1c79356b 652 struct rtqk_arg arg;
39236c6e
A
653
654 if (rt_verbose > 1)
655 log(LOG_DEBUG, "%s: draining routes\n", __func__);
656
657 lck_mtx_lock(rnh_lock);
658 rnh = rt_tables[AF_INET];
659 VERIFY(rnh != NULL);
660 bzero(&arg, sizeof (arg));
1c79356b 661 arg.rnh = rnh;
1c79356b 662 arg.draining = 1;
1c79356b 663 rnh->rnh_walktree(rnh, in_rtqkill, &arg);
b0d623f7 664 lck_mtx_unlock(rnh_lock);
1c79356b
A
665}
666
667/*
668 * Initialize our routing tree.
669 */
670int
671in_inithead(void **head, int off)
672{
673 struct radix_node_head *rnh;
9bccf70c 674
39236c6e
A
675 /* If called from route_init(), make sure it is exactly once */
676 VERIFY(head != (void **)&rt_tables[AF_INET] || *head == NULL);
1c79356b 677
39236c6e
A
678 if (!rn_inithead(head, off))
679 return (0);
1c79356b 680
39236c6e
A
681 /*
682 * We can get here from nfs_subs.c as well, in which case this
683 * won't be for the real routing table and thus we're done;
684 * this also takes care of the case when we're called more than
685 * once from anywhere but route_init().
686 */
687 if (head != (void **)&rt_tables[AF_INET])
688 return (1); /* only do this for the real routing table */
1c79356b
A
689
690 rnh = *head;
691 rnh->rnh_addaddr = in_addroute;
39236c6e 692 rnh->rnh_deladdr = in_deleteroute;
1c79356b 693 rnh->rnh_matchaddr = in_matroute;
c910b4d9 694 rnh->rnh_matchaddr_args = in_matroute_args;
1c79356b 695 rnh->rnh_close = in_clsroute;
39236c6e 696 return (1);
1c79356b
A
697}
698
1c79356b 699/*
9bccf70c
A
700 * This zaps old routes when the interface goes down or interface
701 * address is deleted. In the latter case, it deletes static routes
702 * that point to this address. If we don't do this, we may end up
703 * using the old address in the future. The ones we always want to
704 * get rid of are things like ARP entries, since the user might down
705 * the interface, walk over to a completely different network, and
706 * plug back in.
1c79356b
A
707 */
708struct in_ifadown_arg {
709 struct radix_node_head *rnh;
710 struct ifaddr *ifa;
9bccf70c 711 int del;
1c79356b
A
712};
713
714static int
715in_ifadownkill(struct radix_node *rn, void *xap)
716{
39236c6e 717 char dbuf[MAX_IPv4_STR_LEN], gbuf[MAX_IPv4_STR_LEN];
1c79356b
A
718 struct in_ifadown_arg *ap = xap;
719 struct rtentry *rt = (struct rtentry *)rn;
39236c6e 720 boolean_t verbose = (rt_verbose != 0);
1c79356b
A
721 int err;
722
5ba3f43e 723 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
39236c6e 724
b0d623f7 725 RT_LOCK(rt);
9bccf70c
A
726 if (rt->rt_ifa == ap->ifa &&
727 (ap->del || !(rt->rt_flags & RTF_STATIC))) {
39236c6e
A
728 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
729 if (verbose) {
730 log(LOG_DEBUG, "%s: deleting route to %s->%s->%s, "
731 "flags=%b\n", __func__, dbuf, gbuf,
732 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
733 rt->rt_flags, RTF_BITS);
734 }
735 RT_ADDREF_LOCKED(rt); /* for us to free below */
1c79356b
A
736 /*
737 * We need to disable the automatic prune that happens
738 * in this case in rtrequest() because it will blow
739 * away the pointers that rn_walktree() needs in order
740 * continue our descent. We will end up deleting all
741 * the routes that rtrequest() would have in any case,
b0d623f7
A
742 * so that behavior is not needed there. Safe to drop
743 * rt_lock and use rt_key, rt_gateway, since holding
744 * rnh_lock here prevents another thread from calling
745 * rt_setgate() on this route.
1c79356b 746 */
9bccf70c 747 rt->rt_flags &= ~(RTF_CLONING | RTF_PRCLONING);
b0d623f7
A
748 RT_UNLOCK(rt);
749 err = rtrequest_locked(RTM_DELETE, rt_key(rt),
39236c6e
A
750 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
751 if (err != 0) {
752 RT_LOCK(rt);
753 if (!verbose)
754 rt_str(rt, dbuf, sizeof (dbuf),
755 gbuf, sizeof (gbuf));
756 log(LOG_ERR, "%s: error deleting route to "
757 "%s->%s->%s, flags=%b, err=%d\n", __func__,
758 dbuf, gbuf, (rt->rt_ifp != NULL) ?
759 rt->rt_ifp->if_xname : "", rt->rt_flags,
760 RTF_BITS, err);
761 RT_UNLOCK(rt);
1c79356b 762 }
39236c6e 763 rtfree_locked(rt);
b0d623f7
A
764 } else {
765 RT_UNLOCK(rt);
1c79356b 766 }
39236c6e 767 return (0);
1c79356b
A
768}
769
770int
9bccf70c 771in_ifadown(struct ifaddr *ifa, int delete)
1c79356b
A
772{
773 struct in_ifadown_arg arg;
774 struct radix_node_head *rnh;
775
5ba3f43e 776 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
91447636 777
6d2010ae
A
778 /*
779 * Holding rnh_lock here prevents the possibility of
780 * ifa from changing (e.g. in_ifinit), so it is safe
781 * to access its ifa_addr without locking.
782 */
1c79356b 783 if (ifa->ifa_addr->sa_family != AF_INET)
6d2010ae 784 return (1);
1c79356b 785
2d21ac55 786 /* trigger route cache reevaluation */
39236c6e 787 routegenid_inet_update();
2d21ac55 788
1c79356b
A
789 arg.rnh = rnh = rt_tables[AF_INET];
790 arg.ifa = ifa;
9bccf70c 791 arg.del = delete;
1c79356b 792 rnh->rnh_walktree(rnh, in_ifadownkill, &arg);
6d2010ae 793 IFA_LOCK_SPIN(ifa);
1c79356b 794 ifa->ifa_flags &= ~IFA_ROUTE;
6d2010ae
A
795 IFA_UNLOCK(ifa);
796 return (0);
1c79356b 797}