]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet6/in6_rmx.c
xnu-1228.12.14.tar.gz
[apple/xnu.git] / bsd / netinet6 / in6_rmx.c
CommitLineData
2d21ac55 1/*
c910b4d9 2 * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
2d21ac55
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
9bccf70c
A
29/* $FreeBSD: src/sys/netinet6/in6_rmx.c,v 1.1.2.2 2001/07/03 11:01:52 ume Exp $ */
30/* $KAME: in6_rmx.c,v 1.10 2001/05/24 05:44:58 itojun Exp $ */
1c79356b
A
31
32/*
33 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the project nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 */
60
61/*
62 * Copyright 1994, 1995 Massachusetts Institute of Technology
63 *
64 * Permission to use, copy, modify, and distribute this software and
65 * its documentation for any purpose and without fee is hereby
66 * granted, provided that both the above copyright notice and this
67 * permission notice appear in all copies, that both the above
68 * copyright notice and this permission notice appear in all
69 * supporting documentation, and that the name of M.I.T. not be used
70 * in advertising or publicity pertaining to distribution of the
71 * software without specific, written prior permission. M.I.T. makes
72 * no representations about the suitability of this software for any
73 * purpose. It is provided "as is" without express or implied
74 * warranty.
75 *
76 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
77 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
78 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
79 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
80 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
81 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
82 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
83 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
84 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
85 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
86 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
87 * SUCH DAMAGE.
88 *
89 */
90
91/*
92 * This code does two things necessary for the enhanced TCP metrics to
93 * function in a useful manner:
94 * 1) It marks all non-host routes as `cloning', thus ensuring that
95 * every actual reference to such a route actually gets turned
96 * into a reference to a host route to the specific destination
97 * requested.
98 * 2) When such routes lose all their references, it arranges for them
99 * to be deleted in some random collection of circumstances, so that
100 * a large quantity of stale routing data is not kept in kernel memory
101 * indefinitely. See in6_rtqtimo() below for the exact mechanism.
102 */
103
104#include <sys/param.h>
105#include <sys/systm.h>
106#include <sys/kernel.h>
107#include <sys/sysctl.h>
108#include <kern/queue.h>
109#include <sys/socket.h>
110#include <sys/socketvar.h>
111#include <sys/mbuf.h>
112#include <sys/syslog.h>
91447636 113#include <kern/lock.h>
1c79356b
A
114
115#include <net/if.h>
116#include <net/route.h>
117#include <netinet/in.h>
1c79356b 118#include <netinet/ip_var.h>
1c79356b
A
119#include <netinet/in_var.h>
120
121#include <netinet/ip6.h>
122#include <netinet6/ip6_var.h>
123
124#include <netinet/icmp6.h>
125
1c79356b
A
126#include <netinet/tcp.h>
127#include <netinet/tcp_seq.h>
128#include <netinet/tcp_timer.h>
129#include <netinet/tcp_var.h>
1c79356b 130
91447636
A
131extern int in6_inithead(void **head, int off);
132static void in6_rtqtimo(void *rock);
133static void in6_mtutimo(void *rock);
2d21ac55 134extern int tvtohz(struct timeval *);
1c79356b 135
c910b4d9
A
136static struct radix_node *in6_matroute_args(void *, struct radix_node_head *,
137 rn_matchf_t *, void *);
138
1c79356b
A
139#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */
140
141/*
142 * Do what we need to do when inserting a route.
143 */
144static struct radix_node *
145in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
146 struct radix_node *treenodes)
147{
148 struct rtentry *rt = (struct rtentry *)treenodes;
149 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
150 struct radix_node *ret;
151
152 /*
153 * For IPv6, all unicast non-host routes are automatically cloning.
154 */
155 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
156 rt->rt_flags |= RTF_MULTICAST;
157
158 if (!(rt->rt_flags & (RTF_HOST | RTF_CLONING | RTF_MULTICAST))) {
159 rt->rt_flags |= RTF_PRCLONING;
160 }
161
162 /*
163 * A little bit of help for both IPv6 output and input:
164 * For local addresses, we make sure that RTF_LOCAL is set,
165 * with the thought that this might one day be used to speed up
166 * ip_input().
167 *
168 * We also mark routes to multicast addresses as such, because
169 * it's easy to do and might be useful (but this is much more
170 * dubious since it's so easy to inspect the address). (This
171 * is done above.)
172 *
173 * XXX
174 * should elaborate the code.
175 */
176 if (rt->rt_flags & RTF_HOST) {
177 if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
178 ->sin6_addr,
179 &sin6->sin6_addr)) {
180 rt->rt_flags |= RTF_LOCAL;
181 }
182 }
183
1c79356b
A
184 if (!rt->rt_rmx.rmx_mtu && !(rt->rt_rmx.rmx_locks & RTV_MTU)
185 && rt->rt_ifp)
186 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
187
188 ret = rn_addroute(v_arg, n_arg, head, treenodes);
189 if (ret == NULL && rt->rt_flags & RTF_HOST) {
190 struct rtentry *rt2;
191 /*
192 * We are trying to add a host route, but can't.
193 * Find out if it is because of an
194 * ARP entry and delete it if so.
195 */
91447636 196 rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0,
1c79356b
A
197 RTF_CLONING | RTF_PRCLONING);
198 if (rt2) {
199 if (rt2->rt_flags & RTF_LLINFO &&
200 rt2->rt_flags & RTF_HOST &&
201 rt2->rt_gateway &&
202 rt2->rt_gateway->sa_family == AF_LINK) {
91447636 203 rtrequest_locked(RTM_DELETE,
1c79356b
A
204 (struct sockaddr *)rt_key(rt2),
205 rt2->rt_gateway,
206 rt_mask(rt2), rt2->rt_flags, 0);
207 ret = rn_addroute(v_arg, n_arg, head,
208 treenodes);
209 }
91447636 210 rtfree_locked(rt2);
1c79356b
A
211 }
212 } else if (ret == NULL && rt->rt_flags & RTF_CLONING) {
213 struct rtentry *rt2;
214 /*
215 * We are trying to add a net route, but can't.
216 * The following case should be allowed, so we'll make a
217 * special check for this:
218 * Two IPv6 addresses with the same prefix is assigned
219 * to a single interrface.
220 * # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
221 * # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
222 * In this case, (*1) and (*2) want to add the same
223 * net route entry, 3ffe:0501:: -> if0.
224 * This case should not raise an error.
225 */
91447636 226 rt2 = rtalloc1_locked((struct sockaddr *)sin6, 0,
1c79356b
A
227 RTF_CLONING | RTF_PRCLONING);
228 if (rt2) {
229 if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY))
230 == RTF_CLONING
231 && rt2->rt_gateway
232 && rt2->rt_gateway->sa_family == AF_LINK
233 && rt2->rt_ifp == rt->rt_ifp) {
234 ret = rt2->rt_nodes;
235 }
91447636 236 rtfree_locked(rt2);
1c79356b
A
237 }
238 }
239 return ret;
240}
241
c910b4d9
A
242/*
243 * Similar to in6_matroute_args except without the leaf-matching parameters.
244 */
245static struct radix_node *
246in6_matroute(void *v_arg, struct radix_node_head *head)
247{
248 return (in6_matroute_args(v_arg, head, NULL, NULL));
249}
250
1c79356b
A
251/*
252 * This code is the inverse of in6_clsroute: on first reference, if we
253 * were managing the route, stop doing so and set the expiration timer
254 * back off again.
255 */
256static struct radix_node *
c910b4d9
A
257in6_matroute_args(void *v_arg, struct radix_node_head *head,
258 rn_matchf_t *f, void *w)
1c79356b 259{
c910b4d9 260 struct radix_node *rn = rn_match_args(v_arg, head, f, w);
1c79356b
A
261 struct rtentry *rt = (struct rtentry *)rn;
262
263 if (rt && rt->rt_refcnt == 0) { /* this is first reference */
264 if (rt->rt_flags & RTPRF_OURS) {
265 rt->rt_flags &= ~RTPRF_OURS;
266 rt->rt_rmx.rmx_expire = 0;
267 }
268 }
c910b4d9 269 return (rn);
1c79356b
A
270}
271
9bccf70c
A
272SYSCTL_DECL(_net_inet6_ip6);
273
1c79356b
A
274static int rtq_reallyold = 60*60;
275 /* one hour is ``really old'' */
9bccf70c
A
276SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire,
277 CTLFLAG_RW, &rtq_reallyold , 0, "");
278
1c79356b
A
279static int rtq_minreallyold = 10;
280 /* never automatically crank down to less */
9bccf70c
A
281SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire,
282 CTLFLAG_RW, &rtq_minreallyold , 0, "");
283
1c79356b
A
284static int rtq_toomany = 128;
285 /* 128 cached routes is ``too many'' */
9bccf70c
A
286SYSCTL_INT(_net_inet6_ip6, IPV6CTL_RTMAXCACHE, rtmaxcache,
287 CTLFLAG_RW, &rtq_toomany , 0, "");
288
1c79356b
A
289
290/*
291 * On last reference drop, mark the route as belong to us so that it can be
292 * timed out.
293 */
294static void
2d21ac55 295in6_clsroute(struct radix_node *rn, __unused struct radix_node_head *head)
1c79356b
A
296{
297 struct rtentry *rt = (struct rtentry *)rn;
298
299 if (!(rt->rt_flags & RTF_UP))
300 return; /* prophylactic measures */
301
302 if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
303 return;
304
2d21ac55 305 if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED)
1c79356b
A
306 return;
307
308 /*
2d21ac55
A
309 * Delete the route immediately if RTF_DELCLONE is set or
310 * if route caching is disabled (rtq_reallyold set to 0).
311 * Otherwise, let it expire and be deleted by in6_rtqkill().
1c79356b 312 */
2d21ac55
A
313 if ((rt->rt_flags & RTF_DELCLONE) || rtq_reallyold == 0) {
314 /*
315 * Delete the route from the radix tree but since we are
316 * called when the route's reference count is 0, don't
317 * deallocate it until we return from this routine by
318 * telling rtrequest that we're interested in it.
319 */
320 if (rtrequest_locked(RTM_DELETE, (struct sockaddr *)rt_key(rt),
321 rt->rt_gateway, rt_mask(rt), rt->rt_flags, &rt) == 0) {
322 /* Now let the caller free it */
323 rtunref(rt);
324 }
325 } else {
326 struct timeval timenow;
327
328 getmicrotime(&timenow);
1c79356b 329 rt->rt_flags |= RTPRF_OURS;
91447636 330 rt->rt_rmx.rmx_expire = timenow.tv_sec + rtq_reallyold;
1c79356b
A
331 }
332}
333
334struct rtqk_arg {
335 struct radix_node_head *rnh;
336 int mode;
337 int updating;
338 int draining;
339 int killed;
340 int found;
341 time_t nextstop;
342};
343
344/*
345 * Get rid of old routes. When draining, this deletes everything, even when
346 * the timeout is not expired yet. When updating, this makes sure that
347 * nothing has a timeout longer than the current value of rtq_reallyold.
348 */
349static int
350in6_rtqkill(struct radix_node *rn, void *rock)
351{
352 struct rtqk_arg *ap = rock;
353 struct rtentry *rt = (struct rtentry *)rn;
354 int err;
91447636
A
355 struct timeval timenow;
356
357 getmicrotime(&timenow);
91447636 358 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
2d21ac55 359
1c79356b
A
360 if (rt->rt_flags & RTPRF_OURS) {
361 ap->found++;
362
91447636 363 if (ap->draining || rt->rt_rmx.rmx_expire <= timenow.tv_sec) {
1c79356b
A
364 if (rt->rt_refcnt > 0)
365 panic("rtqkill route really not free");
366
91447636 367 err = rtrequest_locked(RTM_DELETE,
1c79356b
A
368 (struct sockaddr *)rt_key(rt),
369 rt->rt_gateway, rt_mask(rt),
370 rt->rt_flags, 0);
371 if (err) {
372 log(LOG_WARNING, "in6_rtqkill: error %d", err);
373 } else {
374 ap->killed++;
375 }
376 } else {
377 if (ap->updating
91447636 378 && (rt->rt_rmx.rmx_expire - timenow.tv_sec
1c79356b 379 > rtq_reallyold)) {
91447636 380 rt->rt_rmx.rmx_expire = timenow.tv_sec
1c79356b
A
381 + rtq_reallyold;
382 }
383 ap->nextstop = lmin(ap->nextstop,
384 rt->rt_rmx.rmx_expire);
385 }
386 }
387
388 return 0;
389}
390
391#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */
392static int rtq_timeout = RTQ_TIMEOUT;
393
394static void
395in6_rtqtimo(void *rock)
396{
397 struct radix_node_head *rnh = rock;
398 struct rtqk_arg arg;
399 struct timeval atv;
400 static time_t last_adjusted_timeout = 0;
91447636
A
401 struct timeval timenow;
402
2d21ac55
A
403 lck_mtx_lock(rt_mtx);
404 /* Get the timestamp after we acquire the lock for better accuracy */
91447636 405 getmicrotime(&timenow);
1c79356b
A
406
407 arg.found = arg.killed = 0;
408 arg.rnh = rnh;
91447636 409 arg.nextstop = timenow.tv_sec + rtq_timeout;
1c79356b 410 arg.draining = arg.updating = 0;
1c79356b 411 rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
1c79356b
A
412
413 /*
414 * Attempt to be somewhat dynamic about this:
415 * If there are ``too many'' routes sitting around taking up space,
416 * then crank down the timeout, and see if we can't make some more
417 * go away. However, we make sure that we will never adjust more
418 * than once in rtq_timeout seconds, to keep from cranking down too
419 * hard.
420 */
421 if ((arg.found - arg.killed > rtq_toomany)
91447636 422 && (timenow.tv_sec - last_adjusted_timeout >= rtq_timeout)
1c79356b
A
423 && rtq_reallyold > rtq_minreallyold) {
424 rtq_reallyold = 2*rtq_reallyold / 3;
425 if (rtq_reallyold < rtq_minreallyold) {
426 rtq_reallyold = rtq_minreallyold;
427 }
428
91447636 429 last_adjusted_timeout = timenow.tv_sec;
1c79356b
A
430#if DIAGNOSTIC
431 log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold to %d",
432 rtq_reallyold);
433#endif
434 arg.found = arg.killed = 0;
435 arg.updating = 1;
1c79356b 436 rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
1c79356b
A
437 }
438
439 atv.tv_usec = 0;
91447636
A
440 atv.tv_sec = arg.nextstop - timenow.tv_sec;
441 lck_mtx_unlock(rt_mtx);
442 timeout(in6_rtqtimo, rock, tvtohz(&atv));
1c79356b
A
443}
444
445/*
446 * Age old PMTUs.
447 */
448struct mtuex_arg {
449 struct radix_node_head *rnh;
450 time_t nextstop;
451};
452
453static int
454in6_mtuexpire(struct radix_node *rn, void *rock)
455{
456 struct rtentry *rt = (struct rtentry *)rn;
457 struct mtuex_arg *ap = rock;
91447636
A
458 struct timeval timenow;
459
460 getmicrotime(&timenow);
1c79356b
A
461
462 /* sanity */
463 if (!rt)
464 panic("rt == NULL in in6_mtuexpire");
465
466 if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
91447636 467 if (rt->rt_rmx.rmx_expire <= timenow.tv_sec) {
1c79356b
A
468 rt->rt_flags |= RTF_PROBEMTU;
469 } else {
470 ap->nextstop = lmin(ap->nextstop,
471 rt->rt_rmx.rmx_expire);
472 }
473 }
474
475 return 0;
476}
477
478#define MTUTIMO_DEFAULT (60*1)
479
480static void
481in6_mtutimo(void *rock)
482{
483 struct radix_node_head *rnh = rock;
484 struct mtuex_arg arg;
485 struct timeval atv;
91447636
A
486 struct timeval timenow;
487
488 getmicrotime(&timenow);
1c79356b
A
489
490 arg.rnh = rnh;
91447636
A
491 arg.nextstop = timenow.tv_sec + MTUTIMO_DEFAULT;
492 lck_mtx_lock(rt_mtx);
1c79356b 493 rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
1c79356b
A
494
495 atv.tv_usec = 0;
496 atv.tv_sec = arg.nextstop;
91447636 497 if (atv.tv_sec < timenow.tv_sec) {
9bccf70c
A
498#if DIAGNOSTIC
499 log(LOG_DEBUG, "IPv6: invalid mtu expiration time on routing table\n");
500#endif
91447636 501 arg.nextstop = timenow.tv_sec + 30; /*last resort*/
1c79356b 502 }
91447636
A
503 atv.tv_sec -= timenow.tv_sec;
504 lck_mtx_unlock(rt_mtx);
505 timeout(in6_mtutimo, rock, tvtohz(&atv));
1c79356b
A
506}
507
508#if 0
509void
510in6_rtqdrain()
511{
512 struct radix_node_head *rnh = rt_tables[AF_INET6];
513 struct rtqk_arg arg;
514 int s;
515 arg.found = arg.killed = 0;
516 arg.rnh = rnh;
517 arg.nextstop = 0;
518 arg.draining = 1;
519 arg.updating = 0;
520 s = splnet();
521 rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
522 splx(s);
523}
524#endif
525
526/*
527 * Initialize our routing tree.
528 */
529int
530in6_inithead(void **head, int off)
531{
532 struct radix_node_head *rnh;
533
534 if (!rn_inithead(head, off))
535 return 0;
536
537 if (head != (void **)&rt_tables[AF_INET6]) /* BOGUS! */
538 return 1; /* only do this for the real routing table */
539
540 rnh = *head;
541 rnh->rnh_addaddr = in6_addroute;
542 rnh->rnh_matchaddr = in6_matroute;
c910b4d9 543 rnh->rnh_matchaddr_args = in6_matroute_args;
1c79356b
A
544 rnh->rnh_close = in6_clsroute;
545 in6_rtqtimo(rnh); /* kick off timeout first time */
546 in6_mtutimo(rnh); /* kick off timeout first time */
547 return 1;
548}