]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/route.c
xnu-1228.12.14.tar.gz
[apple/xnu.git] / bsd / net / route.c
CommitLineData
1c79356b 1/*
c910b4d9 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1980, 1986, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)route.c 8.2 (Berkeley) 11/15/93
9bccf70c 61 * $FreeBSD: src/sys/net/route.c,v 1.59.2.3 2001/07/29 19:18:02 ume Exp $
1c79356b 62 */
9bccf70c 63
1c79356b
A
64#include <sys/param.h>
65#include <sys/systm.h>
66#include <sys/malloc.h>
67#include <sys/mbuf.h>
68#include <sys/socket.h>
69#include <sys/domain.h>
9bccf70c 70#include <sys/syslog.h>
2d21ac55 71#include <sys/queue.h>
91447636 72#include <kern/lock.h>
6601e61a 73#include <kern/zalloc.h>
1c79356b
A
74
75#include <net/if.h>
76#include <net/route.h>
77
78#include <netinet/in.h>
c910b4d9 79#include <netinet/in_var.h>
1c79356b 80#include <netinet/ip_mroute.h>
c910b4d9 81#include <netinet/ip_var.h>
1c79356b 82
55e303ae
A
83#include <net/if_dl.h>
84
2d21ac55
A
85#include <libkern/OSAtomic.h>
86#include <libkern/OSDebug.h>
87
88#include <pexpert/pexpert.h>
89
90#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
1c79356b
A
91#define SA(p) ((struct sockaddr *)(p))
92
2d21ac55
A
93extern void kdp_set_gateway_mac (void *gatewaymac);
94
91447636 95extern struct domain routedomain;
1c79356b 96struct route_cb route_cb;
91447636 97__private_extern__ struct rtstat rtstat = { 0, 0, 0, 0, 0 };
1c79356b
A
98struct radix_node_head *rt_tables[AF_MAX+1];
99
91447636
A
100lck_mtx_t *rt_mtx; /*### global routing tables mutex for now */
101lck_attr_t *rt_mtx_attr;
102lck_grp_t *rt_mtx_grp;
103lck_grp_attr_t *rt_mtx_grp_attr;
1c79356b 104
91447636 105lck_mtx_t *route_domain_mtx; /*### global routing tables mutex for now */
2d21ac55
A
106int rttrash = 0; /* routes not in table but not freed */
107
108static unsigned int rte_debug;
109
110/* Possible flags for rte_debug */
111#define RTD_DEBUG 0x1 /* enable or disable rtentry debug facility */
112#define RTD_TRACE 0x2 /* trace alloc, free and refcnt */
113#define RTD_NO_FREE 0x4 /* don't free (good to catch corruptions) */
91447636 114
6601e61a
A
115static struct zone *rte_zone; /* special zone for rtentry */
116#define RTE_ZONE_MAX 65536 /* maximum elements in zone */
117#define RTE_ZONE_NAME "rtentry" /* name of rtentry zone */
118
2d21ac55
A
119#define RTD_INUSE 0xFEEDFACE /* entry is in use */
120#define RTD_FREED 0xDEADBEEF /* entry is freed */
121
122#define RTD_TRSTACK_SIZE 8 /* depth of stack trace */
123#define RTD_REFHIST_SIZE 4 /* refcnt history size */
124
125/*
126 * Debug variant of rtentry structure.
127 */
128struct rtentry_dbg {
129 struct rtentry rtd_entry; /* rtentry */
130 struct rtentry rtd_entry_saved; /* saved rtentry */
131 u_int32_t rtd_inuse; /* in use pattern */
132 u_int16_t rtd_refhold_cnt; /* # of rtref */
133 u_int16_t rtd_refrele_cnt; /* # of rtunref */
134 /*
135 * Thread and PC stack trace up to RTD_TRSTACK_SIZE
136 * deep during alloc and free.
137 */
138 struct thread *rtd_alloc_thread;
139 void *rtd_alloc_stk_pc[RTD_TRSTACK_SIZE];
140 struct thread *rtd_free_thread;
141 void *rtd_free_stk_pc[RTD_TRSTACK_SIZE];
142 /*
143 * Circular lists of rtref and rtunref callers.
144 */
145 u_int16_t rtd_refhold_next;
146 u_int16_t rtd_refrele_next;
147 struct {
148 struct thread *th;
149 void *pc[RTD_TRSTACK_SIZE];
150 } rtd_refhold[RTD_REFHIST_SIZE];
151 struct {
152 struct thread *th;
153 void *pc[RTD_TRSTACK_SIZE];
154 } rtd_refrele[RTD_REFHIST_SIZE];
155 /*
156 * Trash list linkage
157 */
158 TAILQ_ENTRY(rtentry_dbg) rtd_trash_link;
159};
160
161/* List of trash route entries protected by rt_mtx */
162static TAILQ_HEAD(, rtentry_dbg) rttrash_head;
163
164static inline struct rtentry *rte_alloc_debug(void);
165static inline void rte_free_debug(struct rtentry *);
91447636
A
166static void rt_maskedcopy(struct sockaddr *,
167 struct sockaddr *, struct sockaddr *);
168static void rtable_init(void **);
2d21ac55
A
169static inline void rtref_audit(struct rtentry_dbg *);
170static inline void rtunref_audit(struct rtentry_dbg *);
c910b4d9
A
171static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, u_long,
172 unsigned int);
173static int rtrequest_common_locked(int, struct sockaddr *,
174 struct sockaddr *, struct sockaddr *, int, struct rtentry **,
175 unsigned int);
176static void rtalloc_ign_common_locked(struct route *, u_long, unsigned int);
177static inline void sa_set_ifscope(struct sockaddr *, unsigned int);
178static struct sockaddr *sin_copy(struct sockaddr_in *, struct sockaddr_in *,
179 unsigned int);
180static struct sockaddr *mask_copy(struct sockaddr *, struct sockaddr_in *,
181 unsigned int);
182static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *,
183 unsigned int);
184static struct radix_node *node_lookup_default(void);
185static int rn_match_ifscope(struct radix_node *, void *);
186static struct ifaddr *ifa_ifwithroute_common_locked(int,
187 const struct sockaddr *, const struct sockaddr *, unsigned int);
1c79356b 188
55e303ae
A
189__private_extern__ u_long route_generation = 0;
190extern int use_routegenid;
191
c910b4d9
A
192/*
193 * sockaddr_in with embedded interface scope; this is used internally
194 * to keep track of scoped route entries in the routing table. The
195 * fact that such a scope is embedded in the structure is an artifact
196 * of the current implementation which could change in future.
197 */
198struct sockaddr_inifscope {
199 __uint8_t sin_len;
200 sa_family_t sin_family;
201 in_port_t sin_port;
202 struct in_addr sin_addr;
203 /*
204 * To avoid possible conflict with an overlaid sockaddr_inarp
205 * having sin_other set to SIN_PROXY, we use the first 4-bytes
206 * of sin_zero since sin_srcaddr is one of the unused fields
207 * in sockaddr_inarp.
208 */
209 union {
210 char sin_zero[8];
211 struct {
212 __uint32_t ifscope;
213 } _in_index;
214 } un;
215#define sin_ifscope un._in_index.ifscope
216};
217
218#define SIN(sa) ((struct sockaddr_in *)(size_t)(sa))
219#define SINIFSCOPE(sa) ((struct sockaddr_inifscope *)(size_t)(sa))
220
221#define ASSERT_SINIFSCOPE(sa) { \
222 if ((sa)->sa_family != AF_INET || \
223 (sa)->sa_len < sizeof (struct sockaddr_in)) \
224 panic("%s: bad sockaddr_in %p\n", __func__, sa); \
225}
226
227/*
228 * Argument to leaf-matching routine; at present it is scoped routing
229 * specific but can be expanded in future to include other search filters.
230 */
231struct matchleaf_arg {
232 unsigned int ifscope; /* interface scope */
233};
234
235/*
236 * For looking up the non-scoped default route (sockaddr instead
237 * of sockaddr_in for convenience).
238 */
239static struct sockaddr sin_def = {
240 sizeof (struct sockaddr_in), AF_INET, { 0, }
241};
242
243/*
244 * Interface index (scope) of the primary interface; determined at
245 * the time when the default, non-scoped route gets added, changed
246 * or deleted. Protected by rt_mtx.
247 */
248static unsigned int primary_ifscope = IFSCOPE_NONE;
249
250#define INET_DEFAULT(dst) \
251 ((dst)->sa_family == AF_INET && SIN(dst)->sin_addr.s_addr == 0)
252
253#define RT(r) ((struct rtentry *)r)
254#define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST)
255
256/*
257 * Given a route, determine whether or not it is the non-scoped default
258 * route; dst typically comes from rt_key(rt) but may be coming from
259 * a separate place when rt is in the process of being created.
260 */
261boolean_t
262rt_inet_default(struct rtentry *rt, struct sockaddr *dst)
263{
264 return (INET_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE));
265}
266
267/*
268 * Set the ifscope of the primary interface; caller holds rt_mtx.
269 */
270void
271set_primary_ifscope(unsigned int ifscope)
272{
273 primary_ifscope = ifscope;
274}
275
276/*
277 * Return the ifscope of the primary interface; caller holds rt_mtx.
278 */
279unsigned int
280get_primary_ifscope(void)
281{
282 return (primary_ifscope);
283}
284
285/*
286 * Embed ifscope into a given a sockaddr_in.
287 */
288static inline void
289sa_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
290{
291 /* Caller must pass in sockaddr_in */
292 ASSERT_SINIFSCOPE(sa);
293
294 SINIFSCOPE(sa)->sin_ifscope = ifscope;
295}
296
297/*
298 * Given a sockaddr_in, return the embedded ifscope to the caller.
299 */
300unsigned int
301sa_get_ifscope(struct sockaddr *sa)
302{
303 /* Caller must pass in sockaddr_in */
304 ASSERT_SINIFSCOPE(sa);
305
306 return (SINIFSCOPE(sa)->sin_ifscope);
307}
308
309/*
310 * Copy a sockaddr_in src to dst and embed ifscope into dst.
311 */
312static struct sockaddr *
313sin_copy(struct sockaddr_in *src, struct sockaddr_in *dst, unsigned int ifscope)
314{
315 *dst = *src;
316 sa_set_ifscope(SA(dst), ifscope);
317
318 return (SA(dst));
319}
320
321/*
322 * Copy a mask from src to a sockaddr_in dst and embed ifscope into dst.
323 */
324static struct sockaddr *
325mask_copy(struct sockaddr *src, struct sockaddr_in *dst, unsigned int ifscope)
326{
327 /* We know dst is at least the size of sockaddr{_in} */
328 bzero(dst, sizeof (*dst));
329 rt_maskedcopy(src, SA(dst), src);
330
331 /*
332 * The length of the mask sockaddr would need to be adjusted
333 * to cover the additional sin_ifscope field; when ifscope is
334 * IFSCOPE_NONE, we'd end up clearing the embedded ifscope on
335 * the destination mask in addition to extending the length
336 * of the sockaddr, as a side effect. This is okay, as any
337 * trailing zeroes would be skipped by rn_addmask prior to
338 * inserting or looking up the mask in the mask tree.
339 */
340 SINIFSCOPE(dst)->sin_ifscope = ifscope;
341 SINIFSCOPE(dst)->sin_len =
342 offsetof(struct sockaddr_inifscope, sin_ifscope) +
343 sizeof (SINIFSCOPE(dst)->sin_ifscope);
344
345 return (SA(dst));
346}
347
348/*
349 * Callback leaf-matching routine for rn_matchaddr_args used
350 * for looking up an exact match for a scoped route entry.
351 */
352static int
353rn_match_ifscope(struct radix_node *rn, void *arg)
354{
355 struct rtentry *rt = (struct rtentry *)rn;
356 struct matchleaf_arg *ma = arg;
357
358 if (!(rt->rt_flags & RTF_IFSCOPE) || rt_key(rt)->sa_family != AF_INET)
359 return (0);
360
361 return (SINIFSCOPE(rt_key(rt))->sin_ifscope == ma->ifscope);
362}
55e303ae 363
1c79356b 364static void
2d21ac55 365rtable_init(void **table)
1c79356b
A
366{
367 struct domain *dom;
368 for (dom = domains; dom; dom = dom->dom_next)
369 if (dom->dom_rtattach)
370 dom->dom_rtattach(&table[dom->dom_family],
371 dom->dom_rtoffset);
372}
373
374void
2d21ac55 375route_init(void)
1c79356b 376{
2d21ac55
A
377 int size;
378
593a1d5f 379 PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug));
2d21ac55
A
380 if (rte_debug != 0)
381 rte_debug |= RTD_DEBUG;
382
91447636
A
383 rt_mtx_grp_attr = lck_grp_attr_alloc_init();
384
91447636
A
385 rt_mtx_grp = lck_grp_alloc_init("route", rt_mtx_grp_attr);
386
387 rt_mtx_attr = lck_attr_alloc_init();
388
91447636
A
389 if ((rt_mtx = lck_mtx_alloc_init(rt_mtx_grp, rt_mtx_attr)) == NULL) {
390 printf("route_init: can't alloc rt_mtx\n");
391 return;
392 }
393
394 lck_mtx_lock(rt_mtx);
1c79356b 395 rn_init(); /* initialize all zeroes, all ones, mask table */
91447636 396 lck_mtx_unlock(rt_mtx);
1c79356b 397 rtable_init((void **)rt_tables);
91447636 398 route_domain_mtx = routedomain.dom_mtx;
6601e61a 399
2d21ac55
A
400 if (rte_debug & RTD_DEBUG)
401 size = sizeof (struct rtentry_dbg);
402 else
403 size = sizeof (struct rtentry);
404
405 rte_zone = zinit(size, RTE_ZONE_MAX * size, 0, RTE_ZONE_NAME);
6601e61a
A
406 if (rte_zone == NULL)
407 panic("route_init: failed allocating rte_zone");
408
409 zone_change(rte_zone, Z_EXPAND, TRUE);
2d21ac55
A
410
411 TAILQ_INIT(&rttrash_head);
1c79356b
A
412}
413
414/*
415 * Packet routing routines.
416 */
417void
2d21ac55 418rtalloc(struct route *ro)
1c79356b 419{
9bccf70c 420 rtalloc_ign(ro, 0UL);
1c79356b
A
421}
422
423void
2d21ac55 424rtalloc_ign_locked(struct route *ro, u_long ignore)
c910b4d9
A
425{
426 return (rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE));
427}
428
429void
430rtalloc_scoped_ign_locked(struct route *ro, u_long ignore, unsigned int ifscope)
431{
432 return (rtalloc_ign_common_locked(ro, ignore, ifscope));
433}
434
435static void
436rtalloc_ign_common_locked(struct route *ro, u_long ignore,
437 unsigned int ifscope)
1c79356b 438{
9bccf70c 439 struct rtentry *rt;
9bccf70c
A
440
441 if ((rt = ro->ro_rt) != NULL) {
442 if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
443 return;
91447636 444 rtfree_locked(rt);
9bccf70c 445 ro->ro_rt = NULL;
9bccf70c 446 }
c910b4d9 447 ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope);
55e303ae
A
448 if (ro->ro_rt)
449 ro->ro_rt->generation_id = route_generation;
1c79356b 450}
91447636 451void
2d21ac55 452rtalloc_ign(struct route *ro, u_long ignore)
91447636
A
453{
454 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
455 lck_mtx_lock(rt_mtx);
456 rtalloc_ign_locked(ro, ignore);
457 lck_mtx_unlock(rt_mtx);
458}
1c79356b 459
c910b4d9
A
460struct rtentry *
461rtalloc1_locked(struct sockaddr *dst, int report, u_long ignflags)
462{
463 return (rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE));
464}
465
466struct rtentry *
467rtalloc1_scoped_locked(struct sockaddr *dst, int report, u_long ignflags,
468 unsigned int ifscope)
469{
470 return (rtalloc1_common_locked(dst, report, ignflags, ifscope));
471}
472
1c79356b
A
473/*
474 * Look up the route that matches the address given
475 * Or, at least try.. Create a cloned route if needed.
476 */
c910b4d9
A
477static struct rtentry *
478rtalloc1_common_locked(struct sockaddr *dst, int report, u_long ignflags,
479 unsigned int ifscope)
1c79356b 480{
2d21ac55 481 struct radix_node_head *rnh = rt_tables[dst->sa_family];
c910b4d9 482 struct rtentry *rt, *newrt = NULL;
1c79356b
A
483 struct rt_addrinfo info;
484 u_long nflags;
91447636 485 int err = 0, msgtype = RTM_MISS;
c910b4d9
A
486
487 if (rnh == NULL)
488 goto unreachable;
489
9bccf70c 490 /*
c910b4d9
A
491 * Find the longest prefix or exact (in the scoped case) address match;
492 * callee adds a reference to entry and checks for root node as well
1c79356b 493 */
c910b4d9
A
494 rt = rt_lookup(FALSE, dst, NULL, rnh, ifscope);
495 if (rt == NULL)
496 goto unreachable;
497
498 newrt = rt;
499 nflags = rt->rt_flags & ~ignflags;
500 if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
1c79356b 501 /*
c910b4d9
A
502 * We are apparently adding (report = 0 in delete).
503 * If it requires that it be cloned, do so.
504 * (This implies it wasn't a HOST route.)
1c79356b 505 */
c910b4d9
A
506 err = rtrequest_locked(RTM_RESOLVE, dst, NULL, NULL, 0, &newrt);
507 if (err) {
1c79356b 508 /*
c910b4d9
A
509 * If the cloning didn't succeed, maybe what we
510 * have from lookup above will do. Return that;
511 * no need to hold another reference since it's
512 * already done.
1c79356b 513 */
c910b4d9
A
514 newrt = rt;
515 goto miss;
516 }
517
1c79356b 518 /*
c910b4d9
A
519 * We cloned it; drop the original route found during lookup.
520 * The resulted cloned route (newrt) would now have an extra
521 * reference held during rtrequest.
1c79356b 522 */
c910b4d9
A
523 rtfree_locked(rt);
524 if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
1c79356b 525 /*
c910b4d9
A
526 * If the new route specifies it be
527 * externally resolved, then go do that.
1c79356b 528 */
c910b4d9
A
529 msgtype = RTM_RESOLVE;
530 goto miss;
1c79356b
A
531 }
532 }
c910b4d9
A
533 goto done;
534
535unreachable:
536 /*
537 * Either we hit the root or couldn't find any match,
538 * Which basically means "cant get there from here"
539 */
540 rtstat.rts_unreach++;
541miss:
542 if (report) {
543 /*
544 * If required, report the failure to the supervising
545 * Authorities.
546 * For a delete, this is not an error. (report == 0)
547 */
548 bzero((caddr_t)&info, sizeof(info));
549 info.rti_info[RTAX_DST] = dst;
550 rt_missmsg(msgtype, &info, 0, err);
551 }
552done:
1c79356b
A
553 return (newrt);
554}
555
91447636 556struct rtentry *
2d21ac55 557rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
91447636
A
558{
559 struct rtentry * entry;
560 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
561 lck_mtx_lock(rt_mtx);
562 entry = rtalloc1_locked(dst, report, ignflags);
563 lck_mtx_unlock(rt_mtx);
564 return (entry);
565}
566
1c79356b
A
567/*
568 * Remove a reference count from an rtentry.
569 * If the count gets low enough, take it out of the routing table
570 */
571void
2d21ac55 572rtfree_locked(struct rtentry *rt)
1c79356b
A
573{
574 /*
575 * find the tree for that address family
55e303ae 576 * Note: in the case of igmp packets, there might not be an rnh
1c79356b 577 */
2d21ac55 578 struct radix_node_head *rnh;
1c79356b 579
91447636
A
580 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
581
582 /* See 3582620 - We hit this during the transition from funnels to locks */
583 if (rt == 0) {
584 printf("rtfree - rt is NULL\n");
585 return;
586 }
587
588 rnh = rt_tables[rt_key(rt)->sa_family];
1c79356b
A
589
590 /*
591 * decrement the reference count by one and if it reaches 0,
592 * and there is a close function defined, call the close function
593 */
2d21ac55
A
594 rtunref(rt);
595 if (rt->rt_refcnt > 0)
596 return;
597
2d21ac55
A
598 /*
599 * On last reference give the "close method" a chance to cleanup
600 * private state. This also permits (for IPv4 and IPv6) a chance
601 * to decide if the routing table entry should be purged immediately
602 * or at a later time. When an immediate purge is to happen the
603 * close routine typically issues RTM_DELETE which clears the RTF_UP
604 * flag on the entry so that the code below reclaims the storage.
605 */
4a3eedf9 606 if (rnh && rnh->rnh_close && rt->rt_refcnt == 0)
1c79356b 607 rnh->rnh_close((struct radix_node *)rt, rnh);
1c79356b
A
608
609 /*
610 * If we are no longer "up" (and ref == 0)
611 * then we can free the resources associated
612 * with the route.
613 */
2d21ac55 614 if (!(rt->rt_flags & RTF_UP)) {
1c79356b
A
615 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
616 panic ("rtfree 2");
9bccf70c 617 /*
1c79356b
A
618 * the rtentry must have been removed from the routing table
619 * so it is represented in rttrash.. remove that now.
620 */
2d21ac55
A
621 (void) OSDecrementAtomic((SInt32 *)&rttrash);
622 if (rte_debug & RTD_DEBUG) {
623 TAILQ_REMOVE(&rttrash_head, (struct rtentry_dbg *)rt,
624 rtd_trash_link);
625 }
1c79356b
A
626
627#ifdef DIAGNOSTIC
628 if (rt->rt_refcnt < 0) {
2d21ac55
A
629 printf("rtfree: %p not freed (neg refs) cnt=%d\n",
630 rt, rt->rt_refcnt);
1c79356b
A
631 return;
632 }
633#endif
634
9bccf70c 635 /*
1c79356b
A
636 * release references on items we hold them on..
637 * e.g other routes and ifaddrs.
638 */
9bccf70c 639 if (rt->rt_parent)
91447636 640 rtfree_locked(rt->rt_parent);
9bccf70c 641
91447636 642 if(rt->rt_ifa) {
9bccf70c 643 ifafree(rt->rt_ifa);
91447636 644 rt->rt_ifa = NULL;
1c79356b
A
645 }
646
647 /*
648 * The key is separatly alloc'd so free it (see rt_setgate()).
649 * This also frees the gateway, as they are always malloc'd
650 * together.
651 */
91447636 652 R_Free(rt_key(rt));
1c79356b
A
653
654 /*
655 * and the rtentry itself of course
656 */
6601e61a 657 rte_free(rt);
1c79356b
A
658 }
659}
660
91447636 661void
2d21ac55 662rtfree(struct rtentry *rt)
91447636
A
663{
664 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
665 lck_mtx_lock(rt_mtx);
666 rtfree_locked(rt);
667 lck_mtx_unlock(rt_mtx);
668}
669
9bccf70c
A
670/*
671 * Decrements the refcount but does not free the route when
672 * the refcount reaches zero. Unless you have really good reason,
673 * use rtfree not rtunref.
674 */
675void
2d21ac55 676rtunref(struct rtentry *p)
9bccf70c 677{
91447636
A
678 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
679
2d21ac55
A
680 if (p->rt_refcnt <= 0)
681 panic("rtunref: bad refcnt %d for rt=%p\n", p->rt_refcnt, p);
682
683 if (rte_debug & RTD_DEBUG)
684 rtunref_audit((struct rtentry_dbg *)p);
685
686 p->rt_refcnt--;
687}
688
689static inline void
690rtunref_audit(struct rtentry_dbg *rte)
691{
692 if (rte->rtd_inuse != RTD_INUSE)
693 panic("rtunref: on freed rte=%p\n", rte);
694
695 rte->rtd_refrele_cnt++;
696
697 if (rte_debug & RTD_TRACE) {
698 rte->rtd_refrele[rte->rtd_refrele_next].th = current_thread();
699 bzero(rte->rtd_refrele[rte->rtd_refrele_next].pc,
700 sizeof (rte->rtd_refrele[rte->rtd_refrele_next].pc));
701 (void) OSBacktrace(rte->rtd_refrele[rte->rtd_refrele_next].pc,
702 RTD_TRSTACK_SIZE);
703
704 rte->rtd_refrele_next =
705 (rte->rtd_refrele_next + 1) % RTD_REFHIST_SIZE;
706 }
9bccf70c
A
707}
708
709/*
710 * Add a reference count from an rtentry.
711 */
712void
2d21ac55 713rtref(struct rtentry *p)
9bccf70c 714{
91447636
A
715 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
716
2d21ac55
A
717 if (p->rt_refcnt < 0)
718 panic("rtref: bad refcnt %d for rt=%p\n", p->rt_refcnt, p);
719
720 if (rte_debug & RTD_DEBUG)
721 rtref_audit((struct rtentry_dbg *)p);
722
723 p->rt_refcnt++;
2d21ac55
A
724}
725
726static inline void
727rtref_audit(struct rtentry_dbg *rte)
728{
729 if (rte->rtd_inuse != RTD_INUSE)
730 panic("rtref_audit: on freed rte=%p\n", rte);
9bccf70c 731
2d21ac55
A
732 rte->rtd_refhold_cnt++;
733
734 if (rte_debug & RTD_TRACE) {
735 rte->rtd_refhold[rte->rtd_refhold_next].th = current_thread();
736 bzero(rte->rtd_refhold[rte->rtd_refhold_next].pc,
737 sizeof (rte->rtd_refhold[rte->rtd_refhold_next].pc));
738 (void) OSBacktrace(rte->rtd_refhold[rte->rtd_refhold_next].pc,
739 RTD_TRSTACK_SIZE);
740
741 rte->rtd_refhold_next =
742 (rte->rtd_refhold_next + 1) % RTD_REFHIST_SIZE;
743 }
9bccf70c
A
744}
745
746void
747rtsetifa(struct rtentry *rt, struct ifaddr* ifa)
748{
749 if (rt == NULL)
750 panic("rtsetifa");
751
752 if (rt->rt_ifa == ifa)
753 return;
754
91447636
A
755 /* Release the old ifa */
756 if (rt->rt_ifa)
9bccf70c
A
757 ifafree(rt->rt_ifa);
758
759 /* Set rt_ifa */
760 rt->rt_ifa = ifa;
761
91447636
A
762 /* Take a reference to the ifa */
763 if (rt->rt_ifa)
9bccf70c
A
764 ifaref(rt->rt_ifa);
765}
766
1c79356b 767void
2d21ac55 768ifafree(struct ifaddr *ifa)
1c79356b 769{
2d21ac55
A
770 int oldval;
771
1c79356b
A
772 if (ifa == NULL)
773 panic("ifafree");
2d21ac55
A
774
775 oldval = OSAddAtomic(-1, (SInt32 *)&ifa->ifa_refcnt);
91447636
A
776
777 if (oldval == 0) {
2d21ac55 778 if ((ifa->ifa_debug & IFA_ATTACHED) != 0) {
91447636 779 panic("ifa attached to ifp is being freed\n");
9bccf70c 780 }
1c79356b 781 FREE(ifa, M_IFADDR);
9bccf70c 782 }
1c79356b
A
783}
784
9bccf70c
A
785void
786ifaref(struct ifaddr *ifa)
787{
788 if (ifa == NULL)
789 panic("ifaref");
91447636 790
2d21ac55 791 if (OSAddAtomic(1, (SInt32 *)&ifa->ifa_refcnt) == 0xffffffff)
91447636 792 panic("ifaref - reference count rolled over!");
9bccf70c 793}
9bccf70c 794
1c79356b
A
795/*
796 * Force a routing table entry to the specified
797 * destination to go through the given gateway.
798 * Normally called as a result of a routing redirect
799 * message from the network layer.
1c79356b
A
800 */
801void
c910b4d9
A
802rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway,
803 struct sockaddr *netmask, int flags, struct sockaddr *src,
804 struct rtentry **rtp)
1c79356b 805{
c910b4d9 806 struct rtentry *rt = NULL;
1c79356b
A
807 int error = 0;
808 short *stat = 0;
809 struct rt_addrinfo info;
91447636 810 struct ifaddr *ifa = NULL;
c910b4d9
A
811 unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
812 struct sockaddr_in sin;
91447636
A
813
814 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
815 lck_mtx_lock(rt_mtx);
1c79356b 816
c910b4d9
A
817 /*
818 * Verify the gateway is directly reachable; if scoped routing
819 * is enabled, verify that it is reachable from the interface
820 * where the ICMP redirect arrived on.
821 */
822 if ((ifa = ifa_ifwithnet_scoped(gateway, ifscope)) == NULL) {
1c79356b
A
823 error = ENETUNREACH;
824 goto out;
825 }
91447636 826
c910b4d9
A
827 /* Lookup route to the destination (from the original IP header) */
828 rt = rtalloc1_scoped_locked(dst, 0, RTF_CLONING|RTF_PRCLONING, ifscope);
829
830 /* Embed scope in src for comparison against rt_gateway below */
831 if (ip_doscopedroute && src->sa_family == AF_INET)
832 src = sin_copy(SIN(src), &sin, ifscope);
833
1c79356b
A
834 /*
835 * If the redirect isn't from our current router for this dst,
836 * it's either old or wrong. If it redirects us to ourselves,
837 * we have a routing loop, perhaps as a result of an interface
838 * going down recently.
839 */
1c79356b 840 if (!(flags & RTF_DONE) && rt &&
2d21ac55
A
841 (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr,
842 ifa->ifa_addr))) {
1c79356b 843 error = EINVAL;
2d21ac55 844 } else {
91447636
A
845 ifafree(ifa);
846 if ((ifa = ifa_ifwithaddr(gateway))) {
847 ifafree(ifa);
848 ifa = NULL;
849 error = EHOSTUNREACH;
850 }
851 }
852
853 if (ifa) {
854 ifafree(ifa);
855 ifa = NULL;
856 }
857
1c79356b
A
858 if (error)
859 goto done;
860 /*
861 * Create a new entry if we just got back a wildcard entry
862 * or the the lookup failed. This is necessary for hosts
863 * which use routing redirects generated by smart gateways
864 * to dynamically build the routing tables.
865 */
866 if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
867 goto create;
868 /*
869 * Don't listen to the redirect if it's
870 * for a route to an interface.
871 */
872 if (rt->rt_flags & RTF_GATEWAY) {
873 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
874 /*
875 * Changing from route to net => route to host.
c910b4d9
A
876 * Create new route, rather than smashing route
877 * to net; similar to cloned routes, the newly
878 * created host route is scoped as well.
1c79356b
A
879 */
880 create:
881 flags |= RTF_GATEWAY | RTF_DYNAMIC;
c910b4d9
A
882 error = rtrequest_scoped_locked(RTM_ADD, dst,
883 gateway, netmask, flags, NULL, ifscope);
1c79356b
A
884 stat = &rtstat.rts_dynamic;
885 } else {
886 /*
887 * Smash the current notion of the gateway to
888 * this destination. Should check about netmask!!!
889 */
890 rt->rt_flags |= RTF_MODIFIED;
891 flags |= RTF_MODIFIED;
892 stat = &rtstat.rts_newgateway;
893 /*
894 * add the key and gateway (in one malloc'd chunk).
895 */
c910b4d9 896 error = rt_setgate(rt, rt_key(rt), gateway);
1c79356b 897 }
c910b4d9 898 } else {
1c79356b 899 error = EHOSTUNREACH;
c910b4d9 900 }
1c79356b
A
901done:
902 if (rt) {
903 if (rtp && !error)
904 *rtp = rt;
905 else
91447636 906 rtfree_locked(rt);
1c79356b
A
907 }
908out:
c910b4d9 909 if (error) {
1c79356b 910 rtstat.rts_badredirect++;
c910b4d9
A
911 } else {
912 if (stat != NULL)
913 (*stat)++;
914 if (use_routegenid)
915 route_generation++;
916 }
1c79356b
A
917 bzero((caddr_t)&info, sizeof(info));
918 info.rti_info[RTAX_DST] = dst;
919 info.rti_info[RTAX_GATEWAY] = gateway;
920 info.rti_info[RTAX_NETMASK] = netmask;
921 info.rti_info[RTAX_AUTHOR] = src;
922 rt_missmsg(RTM_REDIRECT, &info, flags, error);
91447636 923 lck_mtx_unlock(rt_mtx);
1c79356b
A
924}
925
926/*
927* Routing table ioctl interface.
928*/
929int
2d21ac55 930rtioctl(int req, caddr_t data, struct proc *p)
1c79356b 931{
2d21ac55
A
932#pragma unused(p)
933#if INET && MROUTING
1c79356b
A
934 return mrt_ioctl(req, data);
935#else
1c79356b 936 return ENXIO;
2d21ac55 937#endif
1c79356b
A
938}
939
940struct ifaddr *
91447636
A
941ifa_ifwithroute(
942 int flags,
943 const struct sockaddr *dst,
944 const struct sockaddr *gateway)
1c79356b 945{
2d21ac55
A
946 struct ifaddr *ifa;
947
948 lck_mtx_lock(rt_mtx);
949 ifa = ifa_ifwithroute_locked(flags, dst, gateway);
950 lck_mtx_unlock(rt_mtx);
951
952 return (ifa);
953}
954
955struct ifaddr *
c910b4d9
A
956ifa_ifwithroute_locked(int flags, const struct sockaddr *dst,
957 const struct sockaddr *gateway)
958{
959 return (ifa_ifwithroute_common_locked((flags & ~RTF_IFSCOPE), dst,
960 gateway, IFSCOPE_NONE));
961}
962
963struct ifaddr *
964ifa_ifwithroute_scoped_locked(int flags, const struct sockaddr *dst,
965 const struct sockaddr *gateway, unsigned int ifscope)
966{
967 if (ifscope != IFSCOPE_NONE)
968 flags |= RTF_IFSCOPE;
969 else
970 flags &= ~RTF_IFSCOPE;
971
972 return (ifa_ifwithroute_common_locked(flags, dst, gateway, ifscope));
973}
974
975static struct ifaddr *
976ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
977 const struct sockaddr *gateway, unsigned int ifscope)
2d21ac55
A
978{
979 struct ifaddr *ifa = NULL;
980 struct rtentry *rt = NULL;
c910b4d9 981 struct sockaddr_in dst_in, gw_in;
91447636
A
982
983 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
984
c910b4d9
A
985 if (ip_doscopedroute) {
986 /*
987 * Just in case the sockaddr passed in by the caller
988 * contains embedded scope, make sure to clear it since
989 * IPv4 interface addresses aren't scoped.
990 */
991 if (dst != NULL && dst->sa_family == AF_INET)
992 dst = sin_copy(SIN(dst), &dst_in, IFSCOPE_NONE);
993 if (gateway != NULL && gateway->sa_family == AF_INET)
994 gateway = sin_copy(SIN(gateway), &gw_in, IFSCOPE_NONE);
995 }
996
2d21ac55 997 if (!(flags & RTF_GATEWAY)) {
1c79356b
A
998 /*
999 * If we are adding a route to an interface,
1000 * and the interface is a pt to pt link
1001 * we should search for the destination
1002 * as our clue to the interface. Otherwise
1003 * we can use the local address.
1004 */
1c79356b
A
1005 if (flags & RTF_HOST) {
1006 ifa = ifa_ifwithdstaddr(dst);
1007 }
2d21ac55 1008 if (ifa == NULL)
c910b4d9 1009 ifa = ifa_ifwithaddr_scoped(gateway, ifscope);
1c79356b
A
1010 } else {
1011 /*
1012 * If we are adding a route to a remote net
1013 * or host, the gateway may still be on the
1014 * other end of a pt to pt link.
1015 */
1016 ifa = ifa_ifwithdstaddr(gateway);
1017 }
2d21ac55 1018 if (ifa == NULL)
c910b4d9 1019 ifa = ifa_ifwithnet_scoped(gateway, ifscope);
2d21ac55
A
1020 if (ifa == NULL) {
1021 /* Workaround to avoid gcc warning regarding const variable */
c910b4d9
A
1022 rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst,
1023 0, 0UL, ifscope);
2d21ac55
A
1024 if (rt != NULL) {
1025 ifa = rt->rt_ifa;
1026 if (ifa != NULL)
1027 ifaref(ifa);
1028 rtunref(rt);
1029 rt = NULL;
1030 }
1c79356b 1031 }
2d21ac55 1032 if (ifa != NULL && ifa->ifa_addr->sa_family != dst->sa_family) {
91447636 1033 struct ifaddr *newifa;
2d21ac55 1034 /* Callee adds reference to newifa upon success */
91447636 1035 newifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
2d21ac55 1036 if (newifa != NULL) {
91447636
A
1037 ifafree(ifa);
1038 ifa = newifa;
1039 }
1c79356b 1040 }
2d21ac55
A
1041 /*
1042 * If we are adding a gateway, it is quite possible that the
1043 * routing table has a static entry in place for the gateway,
1044 * that may not agree with info garnered from the interfaces.
1045 * The routing table should carry more precedence than the
1046 * interfaces in this matter. Must be careful not to stomp
1047 * on new entries from rtinit, hence (ifa->ifa_addr != gateway).
1048 */
1049 if ((ifa == NULL ||
1050 !equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gateway)) &&
c910b4d9
A
1051 (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gateway,
1052 0, 0UL, ifscope)) != NULL) {
2d21ac55
A
1053 if (ifa != NULL)
1054 ifafree(ifa);
1055 ifa = rt->rt_ifa;
1056 if (ifa != NULL)
1057 ifaref(ifa);
1058 rtunref(rt);
1059 }
c910b4d9
A
1060 /*
1061 * If an interface scope was specified, the interface index of
1062 * the found ifaddr must be equivalent to that of the scope;
1063 * otherwise there is no match.
1064 */
1065 if ((flags & RTF_IFSCOPE) &&
1066 ifa != NULL && ifa->ifa_ifp->if_index != ifscope) {
1067 ifafree(ifa);
1068 ifa = NULL;
1069 }
1070
1c79356b
A
1071 return (ifa);
1072}
1073
1074#define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
1075
1076static int rt_fixdelete __P((struct radix_node *, void *));
1077static int rt_fixchange __P((struct radix_node *, void *));
1078
1079struct rtfc_arg {
1080 struct rtentry *rt0;
1081 struct radix_node_head *rnh;
1082};
1083
c910b4d9
A
1084int
1085rtrequest_locked(int req, struct sockaddr *dst, struct sockaddr *gateway,
1086 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1087{
1088 return (rtrequest_common_locked(req, dst, gateway, netmask,
1089 (flags & ~RTF_IFSCOPE), ret_nrt, IFSCOPE_NONE));
1090}
1091
1092int
1093rtrequest_scoped_locked(int req, struct sockaddr *dst,
1094 struct sockaddr *gateway, struct sockaddr *netmask, int flags,
1095 struct rtentry **ret_nrt, unsigned int ifscope)
1096{
1097 if (ifscope != IFSCOPE_NONE)
1098 flags |= RTF_IFSCOPE;
1099 else
1100 flags &= ~RTF_IFSCOPE;
1101
1102 return (rtrequest_common_locked(req, dst, gateway, netmask,
1103 flags, ret_nrt, ifscope));
1104}
1105
1c79356b 1106/*
c910b4d9
A
1107 * Do appropriate manipulations of a routing tree given all the bits of
1108 * info needed.
1109 *
1110 * Embedding the scope in the radix key is an internal job that should be
1111 * left to routines in this module. Callers should specify the scope value
1112 * to the "scoped" variants of route routines instead of manipulating the
1113 * key itself. This is typically done when creating a scoped route, e.g.
1114 * rtrequest(RTM_ADD). Once such a route is created and marked with the
1115 * RTF_IFSCOPE flag, callers can simply use its rt_key(rt) to clone it
1116 * (RTM_RESOLVE) or to remove it (RTM_DELETE). An exception to this is
1117 * during certain routing socket operations where the search key might be
1118 * derived from the routing message itself, in which case the caller must
1119 * specify the destination address and scope value for RTM_ADD/RTM_DELETE.
1c79356b 1120 */
c910b4d9
A
1121static int
1122rtrequest_common_locked(int req, struct sockaddr *dst0,
1123 struct sockaddr *gateway, struct sockaddr *netmask, int flags,
1124 struct rtentry **ret_nrt, unsigned int ifscope)
1c79356b 1125{
91447636 1126 int error = 0;
2d21ac55
A
1127 struct rtentry *rt;
1128 struct radix_node *rn;
1129 struct radix_node_head *rnh;
91447636 1130 struct ifaddr *ifa = NULL;
c910b4d9
A
1131 struct sockaddr *ndst, *dst = dst0;
1132 struct sockaddr_in sin, mask;
1c79356b
A
1133#define senderr(x) { error = x ; goto bad; }
1134
91447636 1135 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
1c79356b
A
1136 /*
1137 * Find the correct routing tree to use for this Address Family
1138 */
1139 if ((rnh = rt_tables[dst->sa_family]) == 0)
1140 senderr(ESRCH);
1141 /*
1142 * If we are adding a host route then we don't want to put
1143 * a netmask in the tree
1144 */
1145 if (flags & RTF_HOST)
1146 netmask = 0;
c910b4d9
A
1147
1148 /*
1149 * If RTF_IFSCOPE is specified, use a local copy of the destination
1150 * address to embed the scope into. This logic is repeated below
1151 * in the RTM_RESOLVE handler since the caller does not normally
1152 * specify such a flag during a resolve; instead it passes in the
1153 * route used for cloning for which the scope info is derived from.
1154 * Note also that in the case of RTM_DELETE, the address passed in
1155 * by the caller might already contain the embedded scope info when
1156 * it is the key itself, thus making RTF_IFSCOPE unnecessary; one
1157 * instance where it is explicitly set is inside route_output()
1158 * as part of handling a routing socket request.
1159 */
1160 if (req != RTM_RESOLVE && (flags & RTF_IFSCOPE)) {
1161 /* Scoped routing is for AF_INET only */
1162 if (dst->sa_family != AF_INET ||
1163 (req == RTM_ADD && !ip_doscopedroute))
1164 senderr(EINVAL);
1165
1166 if (ifscope == IFSCOPE_NONE) {
1167 flags &= ~RTF_IFSCOPE;
1168 } else {
1169 /* Embed ifscope into the key (local copy) */
1170 dst = sin_copy(SIN(dst), &sin, ifscope);
1171
1172 /* Embed ifscope into netmask (local copy) */
1173 if (netmask != NULL)
1174 netmask = mask_copy(netmask, &mask, ifscope);
1175 }
1176 }
1177
1c79356b
A
1178 switch (req) {
1179 case RTM_DELETE:
1180 /*
1181 * Remove the item from the tree and return it.
1182 * Complain if it is not there and do no more processing.
1183 */
1184 if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0)
1185 senderr(ESRCH);
1186 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
1187 panic ("rtrequest delete");
1188 rt = (struct rtentry *)rn;
1189
2d21ac55
A
1190 /*
1191 * Take an extra reference to handle the deletion of a route
1192 * entry whose reference count is already 0; e.g. an expiring
1193 * cloned route entry or an entry that was added to the table
1194 * with 0 reference. If the caller is interested in this route,
1195 * we will return it with the reference intact. Otherwise we
1196 * will decrement the reference via rtfree_locked() and then
1197 * possibly deallocate it.
1198 */
1199 rtref(rt);
1200 rt->rt_flags &= ~RTF_UP;
1201
1c79356b
A
1202 /*
1203 * Now search what's left of the subtree for any cloned
1204 * routes which might have been formed from this node.
1205 */
9bccf70c
A
1206 if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
1207 rt_mask(rt)) {
1208 rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
1c79356b
A
1209 rt_fixdelete, rt);
1210 }
1211
1212 /*
1213 * Remove any external references we may have.
1214 * This might result in another rtentry being freed if
1215 * we held its last reference.
1216 */
1217 if (rt->rt_gwroute) {
1218 rt = rt->rt_gwroute;
91447636 1219 rtfree_locked(rt);
1c79356b
A
1220 (rt = (struct rtentry *)rn)->rt_gwroute = 0;
1221 }
1222
9bccf70c 1223 /*
1c79356b
A
1224 * give the protocol a chance to keep things in sync.
1225 */
1226 if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
1227 ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
91447636 1228 ifa = NULL;
1c79356b
A
1229
1230 /*
1231 * one more rtentry floating around that is not
1232 * linked to the routing table.
1233 */
2d21ac55
A
1234 (void) OSIncrementAtomic((SInt32 *)&rttrash);
1235 if (rte_debug & RTD_DEBUG) {
1236 TAILQ_INSERT_TAIL(&rttrash_head,
1237 (struct rtentry_dbg *)rt, rtd_trash_link);
1238 }
1c79356b 1239
c910b4d9
A
1240 /*
1241 * If this is the (non-scoped) default route, clear
1242 * the interface index used for the primary ifscope.
1243 */
1244 if (rt_inet_default(rt, rt_key(rt)))
1245 set_primary_ifscope(IFSCOPE_NONE);
1246
1c79356b
A
1247 /*
1248 * If the caller wants it, then it can have it,
1249 * but it's up to it to free the rtentry as we won't be
1250 * doing it.
1251 */
2d21ac55
A
1252 if (ret_nrt != NULL) {
1253 /* Return the route to caller with reference intact */
1c79356b 1254 *ret_nrt = rt;
2d21ac55
A
1255 } else {
1256 /* Dereference or deallocate the route */
91447636 1257 rtfree_locked(rt);
1c79356b
A
1258 }
1259 break;
1260
1261 case RTM_RESOLVE:
1262 if (ret_nrt == 0 || (rt = *ret_nrt) == 0)
1263 senderr(EINVAL);
1264 ifa = rt->rt_ifa;
91447636 1265 ifaref(ifa);
1c79356b
A
1266 flags = rt->rt_flags &
1267 ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
1268 flags |= RTF_WASCLONED;
1269 gateway = rt->rt_gateway;
1270 if ((netmask = rt->rt_genmask) == 0)
1271 flags |= RTF_HOST;
c910b4d9
A
1272
1273 if (!ip_doscopedroute || dst->sa_family != AF_INET)
1274 goto makeroute;
1275 /*
1276 * When scoped routing is enabled, cloned entries are
1277 * always scoped according to the interface portion of
1278 * the parent route. The exception to this are IPv4
1279 * link local addresses.
1280 */
1281 if (!IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) {
1282 if (flags & RTF_IFSCOPE) {
1283 ifscope = sa_get_ifscope(rt_key(rt));
1284 } else {
1285 ifscope = rt->rt_ifp->if_index;
1286 flags |= RTF_IFSCOPE;
1287 }
1288 } else {
1289 ifscope = IFSCOPE_NONE;
1290 flags &= ~RTF_IFSCOPE;
1291 }
1292
1293 /* Embed or clear ifscope into/from the key (local copy) */
1294 dst = sin_copy(SIN(dst), &sin, ifscope);
1295
1296 /* Embed or clear ifscope into/from netmask (local copy) */
1297 if (netmask != NULL)
1298 netmask = mask_copy(netmask, &mask, ifscope);
1299
1c79356b
A
1300 goto makeroute;
1301
1302 case RTM_ADD:
1303 if ((flags & RTF_GATEWAY) && !gateway)
c910b4d9 1304 panic("rtrequest: RTF_GATEWAY but no gateway");
1c79356b 1305
c910b4d9
A
1306 if (flags & RTF_IFSCOPE) {
1307 ifa = ifa_ifwithroute_scoped_locked(flags, dst0,
1308 gateway, ifscope);
1309 } else {
1310 ifa = ifa_ifwithroute_locked(flags, dst0, gateway);
1311 }
1312 if (ifa == NULL)
1c79356b 1313 senderr(ENETUNREACH);
c910b4d9 1314makeroute:
6601e61a 1315 if ((rt = rte_alloc()) == NULL)
1c79356b
A
1316 senderr(ENOBUFS);
1317 Bzero(rt, sizeof(*rt));
1318 rt->rt_flags = RTF_UP | flags;
c910b4d9 1319
1c79356b
A
1320 /*
1321 * Add the gateway. Possibly re-malloc-ing the storage for it
1322 * also add the rt_gwroute if possible.
1323 */
9bccf70c 1324 if ((error = rt_setgate(rt, dst, gateway)) != 0) {
6601e61a 1325 rte_free(rt);
1c79356b
A
1326 senderr(error);
1327 }
1328
1329 /*
1330 * point to the (possibly newly malloc'd) dest address.
1331 */
1332 ndst = rt_key(rt);
1333
1334 /*
1335 * make sure it contains the value we want (masked if needed).
1336 */
c910b4d9 1337 if (netmask)
1c79356b 1338 rt_maskedcopy(dst, ndst, netmask);
c910b4d9 1339 else
1c79356b
A
1340 Bcopy(dst, ndst, dst->sa_len);
1341
1342 /*
1343 * Note that we now have a reference to the ifa.
1344 * This moved from below so that rnh->rnh_addaddr() can
1345 * examine the ifa and ifa->ifa_ifp if it so desires.
1346 */
91447636
A
1347 rtsetifa(rt, ifa);
1348 rt->rt_ifp = rt->rt_ifa->ifa_ifp;
55e303ae 1349
9bccf70c
A
1350 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
1351
1c79356b
A
1352 rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
1353 rnh, rt->rt_nodes);
1354 if (rn == 0) {
1355 struct rtentry *rt2;
1356 /*
1357 * Uh-oh, we already have one of these in the tree.
1358 * We do a special hack: if the route that's already
1359 * there was generated by the protocol-cloning
1360 * mechanism, then we just blow it away and retry
1361 * the insertion of the new one.
1362 */
c910b4d9
A
1363 if (flags & RTF_IFSCOPE) {
1364 rt2 = rtalloc1_scoped_locked(dst0, 0,
1365 RTF_CLONING | RTF_PRCLONING, ifscope);
1366 } else {
1367 rt2 = rtalloc1_locked(dst, 0,
1368 RTF_CLONING | RTF_PRCLONING);
1369 }
1c79356b 1370 if (rt2 && rt2->rt_parent) {
91447636 1371 rtrequest_locked(RTM_DELETE,
1c79356b
A
1372 (struct sockaddr *)rt_key(rt2),
1373 rt2->rt_gateway,
1374 rt_mask(rt2), rt2->rt_flags, 0);
91447636 1375 rtfree_locked(rt2);
1c79356b
A
1376 rn = rnh->rnh_addaddr((caddr_t)ndst,
1377 (caddr_t)netmask,
1378 rnh, rt->rt_nodes);
1379 } else if (rt2) {
1380 /* undo the extra ref we got */
91447636 1381 rtfree_locked(rt2);
1c79356b
A
1382 }
1383 }
1384
1385 /*
1386 * If it still failed to go into the tree,
1387 * then un-make it (this should be a function)
1388 */
1389 if (rn == 0) {
1390 if (rt->rt_gwroute)
91447636 1391 rtfree_locked(rt->rt_gwroute);
1c79356b 1392 if (rt->rt_ifa) {
9bccf70c 1393 ifafree(rt->rt_ifa);
1c79356b 1394 }
91447636 1395 R_Free(rt_key(rt));
6601e61a 1396 rte_free(rt);
1c79356b
A
1397 senderr(EEXIST);
1398 }
1399
1400 rt->rt_parent = 0;
1401
9bccf70c 1402 /*
1c79356b 1403 * If we got here from RESOLVE, then we are cloning
9bccf70c 1404 * so clone the rest, and note that we
1c79356b
A
1405 * are a clone (and increment the parent's references)
1406 */
1407 if (req == RTM_RESOLVE) {
1408 rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
9bccf70c 1409 if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
1c79356b 1410 rt->rt_parent = (*ret_nrt);
9bccf70c 1411 rtref(*ret_nrt);
1c79356b
A
1412 }
1413 }
1414
1415 /*
1416 * if this protocol has something to add to this then
1417 * allow it to do that as well.
1418 */
1419 if (ifa->ifa_rtrequest)
1420 ifa->ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : 0));
91447636
A
1421 ifafree(ifa);
1422 ifa = 0;
1c79356b
A
1423
1424 /*
1425 * We repeat the same procedure from rt_setgate() here because
1426 * it doesn't fire when we call it there because the node
1427 * hasn't been added to the tree yet.
1428 */
1429 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
1430 struct rtfc_arg arg;
1431 arg.rnh = rnh;
1432 arg.rt0 = rt;
1433 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
1434 rt_fixchange, &arg);
1435 }
1436
c910b4d9
A
1437 /*
1438 * If this is the (non-scoped) default route, record
1439 * the interface index used for the primary ifscope.
1440 */
1441 if (rt_inet_default(rt, rt_key(rt)))
1442 set_primary_ifscope(rt->rt_ifp->if_index);
1443
1c79356b
A
1444 /*
1445 * actually return a resultant rtentry and
1446 * give the caller a single reference.
1447 */
1448 if (ret_nrt) {
1449 *ret_nrt = rt;
9bccf70c 1450 rtref(rt);
1c79356b
A
1451 }
1452 break;
1453 }
1454bad:
91447636
A
1455 if (ifa)
1456 ifafree(ifa);
1c79356b
A
1457 return (error);
1458}
1459
91447636
A
1460int
1461rtrequest(
1462 int req,
1463 struct sockaddr *dst,
1464 struct sockaddr *gateway,
1465 struct sockaddr *netmask,
1466 int flags,
1467 struct rtentry **ret_nrt)
1468{
1469 int error;
1470 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
1471 lck_mtx_lock(rt_mtx);
1472 error = rtrequest_locked(req, dst, gateway, netmask, flags, ret_nrt);
1473 lck_mtx_unlock(rt_mtx);
1474 return (error);
1475}
1c79356b
A
1476/*
1477 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
1478 * (i.e., the routes related to it by the operation of cloning). This
1479 * routine is iterated over all potential former-child-routes by way of
1480 * rnh->rnh_walktree_from() above, and those that actually are children of
1481 * the late parent (passed in as VP here) are themselves deleted.
1482 */
1483static int
2d21ac55 1484rt_fixdelete(struct radix_node *rn, void *vp)
1c79356b
A
1485{
1486 struct rtentry *rt = (struct rtentry *)rn;
1487 struct rtentry *rt0 = vp;
1488
91447636
A
1489 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
1490
2d21ac55
A
1491 if (rt->rt_parent == rt0 &&
1492 !(rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING))) {
91447636 1493 return rtrequest_locked(RTM_DELETE, rt_key(rt),
1c79356b
A
1494 (struct sockaddr *)0, rt_mask(rt),
1495 rt->rt_flags, (struct rtentry **)0);
1496 }
1497 return 0;
1498}
1499
1500/*
1501 * This routine is called from rt_setgate() to do the analogous thing for
1502 * adds and changes. There is the added complication in this case of a
1503 * middle insert; i.e., insertion of a new network route between an older
1504 * network route and (cloned) host routes. For this reason, a simple check
1505 * of rt->rt_parent is insufficient; each candidate route must be tested
1506 * against the (mask, value) of the new route (passed as before in vp)
9bccf70c 1507 * to see if the new route matches it.
1c79356b
A
1508 *
1509 * XXX - it may be possible to do fixdelete() for changes and reserve this
1510 * routine just for adds. I'm not sure why I thought it was necessary to do
1511 * changes this way.
1512 */
1c79356b 1513static int
2d21ac55 1514rt_fixchange(struct radix_node *rn, void *vp)
1c79356b
A
1515{
1516 struct rtentry *rt = (struct rtentry *)rn;
1517 struct rtfc_arg *ap = vp;
1518 struct rtentry *rt0 = ap->rt0;
1519 struct radix_node_head *rnh = ap->rnh;
9bccf70c
A
1520 u_char *xk1, *xm1, *xk2, *xmp;
1521 int i, len, mlen;
1c79356b 1522
91447636
A
1523 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
1524
2d21ac55 1525 if (!rt->rt_parent ||
c910b4d9
A
1526 (rt->rt_flags & (RTF_PINNED | RTF_CLONING | RTF_PRCLONING)))
1527 return (0);
1c79356b 1528
c910b4d9
A
1529 if (rt->rt_parent == rt0)
1530 goto delete_rt;
1c79356b
A
1531
1532 /*
1533 * There probably is a function somewhere which does this...
1534 * if not, there should be.
1535 */
c910b4d9 1536 len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
1c79356b
A
1537
1538 xk1 = (u_char *)rt_key(rt0);
1539 xm1 = (u_char *)rt_mask(rt0);
1540 xk2 = (u_char *)rt_key(rt);
1541
9bccf70c
A
1542 /* avoid applying a less specific route */
1543 xmp = (u_char *)rt_mask(rt->rt_parent);
c910b4d9
A
1544 mlen = rt_key(rt->rt_parent)->sa_len;
1545 if (mlen > rt_key(rt0)->sa_len)
1546 return (0);
1547
9bccf70c 1548 for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
c910b4d9
A
1549 if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i])
1550 return (0);
9bccf70c
A
1551 }
1552
1553 for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
c910b4d9
A
1554 if ((xk2[i] & xm1[i]) != xk1[i])
1555 return (0);
1c79356b
A
1556 }
1557
1558 /*
1559 * OK, this node is a clone, and matches the node currently being
1560 * changed/added under the node's mask. So, get rid of it.
1561 */
c910b4d9
A
1562delete_rt:
1563 return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
1564 rt_mask(rt), rt->rt_flags, NULL));
1c79356b
A
1565}
1566
1567int
c910b4d9 1568rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
1c79356b 1569{
1c79356b 1570 int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
1c79356b 1571 struct radix_node_head *rnh = rt_tables[dst->sa_family];
c910b4d9
A
1572
1573 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
1574
1c79356b
A
1575 /*
1576 * A host route with the destination equal to the gateway
1577 * will interfere with keeping LLINFO in the routing
1578 * table, so disallow it.
1579 */
c910b4d9
A
1580 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
1581 (RTF_HOST|RTF_GATEWAY)) && (dst->sa_len == gate->sa_len) &&
1c79356b
A
1582 (bcmp(dst, gate, dst->sa_len) == 0)) {
1583 /*
1584 * The route might already exist if this is an RTM_CHANGE
1585 * or a routing redirect, so try to delete it.
1586 */
c910b4d9
A
1587 if (rt_key(rt))
1588 rtrequest_locked(RTM_DELETE, rt_key(rt),
1589 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
1590 return (EADDRNOTAVAIL);
1c79356b
A
1591 }
1592
1593 /*
c910b4d9
A
1594 * The destination is not directly reachable. Get a route
1595 * to the next-hop gateway and store it in rt_gwroute.
1c79356b 1596 */
c910b4d9
A
1597 if (rt->rt_flags & RTF_GATEWAY) {
1598 struct rtentry *gwrt;
1599 unsigned int ifscope;
1600
1601 ifscope = (dst->sa_family == AF_INET) ?
1602 sa_get_ifscope(dst) : IFSCOPE_NONE;
1603
1604 gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope);
1605
1606 /*
1607 * Cloning loop avoidance:
1608 *
1609 * In the presence of protocol-cloning and bad configuration,
1610 * it is possible to get stuck in bottomless mutual recursion
1611 * (rtrequest rt_setgate rtalloc1). We avoid this by not
1612 * allowing protocol-cloning to operate for gateways (which
1613 * is probably the correct choice anyway), and avoid the
1614 * resulting reference loops by disallowing any route to run
1615 * through itself as a gateway. This is obviously mandatory
1616 * when we get rt->rt_output(). It implies that a route to
1617 * the gateway must already be present in the system in order
1618 * for the gateway to be referred to by another route.
1619 */
1620 if (gwrt == rt) {
1621 rtunref(gwrt);
1622 return (EADDRINUSE); /* failure */
1623 }
1624
1625 /* If scoped, the gateway route must use the same interface */
1626 if (ifscope != IFSCOPE_NONE && (rt->rt_flags & RTF_IFSCOPE) &&
1627 gwrt != NULL && gwrt->rt_ifp != NULL &&
1628 gwrt->rt_ifp->if_index != ifscope) {
1629 rtfree_locked(gwrt);
1630 return ((rt->rt_flags & RTF_HOST) ?
1631 EHOSTUNREACH : ENETUNREACH);
1632 }
1633
1634 if (rt->rt_gwroute != NULL)
1635 rtfree_locked(rt->rt_gwroute);
1636 rt->rt_gwroute = gwrt;
1637
1638 /*
1639 * In case the (non-scoped) default route gets modified via
1640 * an ICMP redirect, record the interface index used for the
1641 * primary ifscope. Also done in rt_setif() to take care
1642 * of the non-redirect cases.
1643 */
1644 if (rt_inet_default(rt, dst) && rt->rt_ifp != NULL)
1645 set_primary_ifscope(rt->rt_ifp->if_index);
1646
1c79356b 1647 /*
c910b4d9
A
1648 * Tell the kernel debugger about the new default gateway
1649 * if the gateway route uses the primary interface, or
1650 * if we are in a transient state before the non-scoped
1651 * default gateway is installed (similar to how the system
1652 * was behaving in the past). In future, it would be good
1653 * to do all this only when KDP is enabled.
1c79356b 1654 */
c910b4d9
A
1655 if ((dst->sa_family == AF_INET) &&
1656 gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK &&
1657 (gwrt->rt_ifp->if_index == get_primary_ifscope() ||
1658 get_primary_ifscope() == IFSCOPE_NONE))
1659 kdp_set_gateway_mac(SDL(gwrt->rt_gateway)->sdl_data);
1c79356b
A
1660 }
1661
1662 /*
c910b4d9
A
1663 * Prepare to store the gateway in rt_gateway. Both dst and gateway
1664 * are stored one after the other in the same malloc'd chunk. If we
1665 * have room, reuse the old buffer since rt_gateway already points
1666 * to the right place. Otherwise, malloc a new block and update
1667 * the 'dst' address and point rt_gateway to the right place.
1c79356b 1668 */
c910b4d9
A
1669 if (rt->rt_gateway == NULL || glen > ROUNDUP(rt->rt_gateway->sa_len)) {
1670 caddr_t new;
1c79356b 1671
c910b4d9
A
1672 /* The underlying allocation is done with M_WAITOK set */
1673 R_Malloc(new, caddr_t, dlen + glen);
1674 if (new == NULL) {
1675 if (rt->rt_gwroute != NULL)
1676 rtfree_locked(rt->rt_gwroute);
1677 rt->rt_gwroute = NULL;
1678 return (ENOBUFS);
1679 }
1680
1681 /*
1682 * Copy from 'dst' and not rt_key(rt) because we can get
1683 * here to initialize a newly allocated route entry, in
1684 * which case rt_key(rt) is NULL (and so does rt_gateway).
1685 */
1c79356b 1686 Bcopy(dst, new, dlen);
c910b4d9
A
1687 R_Free(rt_key(rt)); /* free old block; NULL is okay */
1688 rt->rt_nodes->rn_key = new;
1689 rt->rt_gateway = (struct sockaddr *)(new + dlen);
1c79356b
A
1690 }
1691
1692 /*
c910b4d9 1693 * Copy the new gateway value into the memory chunk.
1c79356b 1694 */
c910b4d9
A
1695 Bcopy(gate, rt->rt_gateway, glen);
1696
1c79356b 1697 /*
c910b4d9 1698 * For consistency between rt_gateway and rt_key(gwrt).
1c79356b 1699 */
c910b4d9
A
1700 if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL &&
1701 (rt->rt_gwroute->rt_flags & RTF_IFSCOPE) &&
1702 rt->rt_gateway->sa_family == AF_INET &&
1703 rt_key(rt->rt_gwroute)->sa_family == AF_INET) {
1704 sa_set_ifscope(rt->rt_gateway,
1705 sa_get_ifscope(rt_key(rt->rt_gwroute)));
1c79356b
A
1706 }
1707
1708 /*
1709 * This isn't going to do anything useful for host routes, so
1710 * don't bother. Also make sure we have a reasonable mask
1711 * (we don't yet have one during adds).
1712 */
1713 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
1714 struct rtfc_arg arg;
1715 arg.rnh = rnh;
1716 arg.rt0 = rt;
1717 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
c910b4d9 1718 rt_fixchange, &arg);
1c79356b
A
1719 }
1720
c910b4d9 1721 return (0);
1c79356b
A
1722}
1723
1724static void
2d21ac55
A
1725rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst,
1726 struct sockaddr *netmask)
1c79356b 1727{
2d21ac55
A
1728 u_char *cp1 = (u_char *)src;
1729 u_char *cp2 = (u_char *)dst;
1730 u_char *cp3 = (u_char *)netmask;
1c79356b
A
1731 u_char *cplim = cp2 + *cp3;
1732 u_char *cplim2 = cp2 + *cp1;
1733
1734 *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1735 cp3 += 2;
1736 if (cplim > cplim2)
1737 cplim = cplim2;
1738 while (cp2 < cplim)
1739 *cp2++ = *cp1++ & *cp3++;
1740 if (cp2 < cplim2)
1741 bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1742}
1743
c910b4d9
A
1744/*
1745 * Lookup an AF_INET scoped or non-scoped route depending on the ifscope
1746 * value passed in by the caller (IFSCOPE_NONE implies non-scoped).
1747 */
1748static struct radix_node *
1749node_lookup(struct sockaddr *dst, struct sockaddr *netmask,
1750 unsigned int ifscope)
1751{
1752 struct radix_node_head *rnh = rt_tables[AF_INET];
1753 struct radix_node *rn;
1754 struct sockaddr_in sin, mask;
1755 struct matchleaf_arg ma = { ifscope };
1756 rn_matchf_t *f = rn_match_ifscope;
1757 void *w = &ma;
1758
1759 if (dst->sa_family != AF_INET)
1760 return (NULL);
1761
1762 /*
1763 * Embed ifscope into the search key; for a non-scoped
1764 * search this will clear out any embedded scope value.
1765 */
1766 dst = sin_copy(SIN(dst), &sin, ifscope);
1767
1768 /* Embed (or clear) ifscope into netmask */
1769 if (netmask != NULL)
1770 netmask = mask_copy(netmask, &mask, ifscope);
1771
1772 if (ifscope == IFSCOPE_NONE)
1773 f = w = NULL;
1774
1775 rn = rnh->rnh_lookup_args(dst, netmask, rnh, f, w);
1776 if (rn != NULL && (rn->rn_flags & RNF_ROOT))
1777 rn = NULL;
1778
1779 return (rn);
1780}
1781
1782/*
1783 * Lookup the AF_INET non-scoped default route.
1784 */
1785static struct radix_node *
1786node_lookup_default(void)
1787{
1788 struct radix_node_head *rnh = rt_tables[AF_INET];
1789 return (rnh->rnh_lookup(&sin_def, NULL, rnh));
1790}
1791
1792/*
1793 * Common routine to lookup/match a route. It invokes the lookup/matchaddr
1794 * callback which could be address family-specific. The main difference
1795 * between the two (at least for AF_INET/AF_INET6) is that a lookup does
1796 * not alter the expiring state of a route, whereas a match would unexpire
1797 * or revalidate the route.
1798 *
1799 * The optional scope or interface index property of a route allows for a
1800 * per-interface route instance. This permits multiple route entries having
1801 * the same destination (but not necessarily the same gateway) to exist in
1802 * the routing table; each of these entries is specific to the corresponding
1803 * interface. This is made possible by embedding the scope value into the
1804 * radix key, thus making each route entry unique. These scoped entries
1805 * exist along with the regular, non-scoped entries in the same radix tree
1806 * for a given address family (currently AF_INET only); the scope logically
1807 * partitions it into multiple per-interface sub-trees.
1808 *
1809 * When a scoped route lookup is performed, the routing table is searched for
1810 * the best match that would result in a route using the same interface as the
1811 * one associated with the scope (the exception to this are routes that point
1812 * to the loopback interface). The search rule follows the longest matching
1813 * prefix with the additional interface constraint.
1814 */
1815struct rtentry *
1816rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
1817 struct radix_node_head *rnh, unsigned int ifscope)
1818{
1819 struct radix_node *rn0, *rn;
1820 boolean_t dontcare = (ifscope == IFSCOPE_NONE);
1821
1822 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_OWNED);
1823
1824 if (!lookup_only)
1825 netmask = NULL;
1826
1827 /*
1828 * Non-scoped route lookup.
1829 */
1830 if (!ip_doscopedroute || dst->sa_family != AF_INET) {
1831 if (lookup_only)
1832 rn = rnh->rnh_lookup(dst, netmask, rnh);
1833 else
1834 rn = rnh->rnh_matchaddr(dst, rnh);
1835 goto done;
1836 }
1837
1838 /*
1839 * Scoped route lookup:
1840 *
1841 * We first perform a non-scoped lookup for the original result.
1842 * Afterwards, depending on whether or not the caller has specified
1843 * a scope, we perform a more specific scoped search and fallback
1844 * to this original result upon failure.
1845 */
1846 rn0 = rn = node_lookup(dst, netmask, IFSCOPE_NONE);
1847
1848 /*
1849 * If the caller did not specify a scope, use the primary scope
1850 * derived from the system's non-scoped default route. If, for
1851 * any reason, there is no primary interface, return what we have.
1852 */
1853 if (dontcare && (ifscope = get_primary_ifscope()) == IFSCOPE_NONE)
1854 goto validate;
1855
1856 /*
1857 * Keep the original result if either of the following is true:
1858 *
1859 * 1) The interface portion of the route has the same interface
1860 * index as the scope value and it is marked with RTF_IFSCOPE.
1861 * 2) The route uses the loopback interface, in which case the
1862 * destination (host/net) is local/loopback.
1863 *
1864 * Otherwise, do a more specified search using the scope.
1865 */
1866 if (rn != NULL) {
1867 struct rtentry *rt = RT(rn);
1868 if (rt->rt_ifp != lo_ifp) {
1869 if (rt->rt_ifp->if_index != ifscope) {
1870 /*
1871 * Wrong interface; keep the original result
1872 * only if the caller did not specify a scope,
1873 * and do a more specific scoped search using
1874 * the scope of the found route. Otherwise,
1875 * start again from scratch.
1876 */
1877 rn = NULL;
1878 if (dontcare)
1879 ifscope = rt->rt_ifp->if_index;
1880 else
1881 rn0 = NULL;
1882 } else if (!(rt->rt_flags & RTF_IFSCOPE)) {
1883 /*
1884 * Right interface, except that this route
1885 * isn't marked with RTF_IFSCOPE. Do a more
1886 * specific scoped search. Keep the original
1887 * result and return it it in case the scoped
1888 * search fails.
1889 */
1890 rn = NULL;
1891 }
1892 }
1893 }
1894
1895 /*
1896 * Scoped search. Find the most specific entry having the same
1897 * interface scope as the one requested. The following will result
1898 * in searching for the longest prefix scoped match.
1899 */
1900 if (rn == NULL)
1901 rn = node_lookup(dst, netmask, ifscope);
1902
1903 /*
1904 * Use the original result if either of the following is true:
1905 *
1906 * 1) The scoped search did not yield any result.
1907 * 2) The result from the scoped search is a scoped default route,
1908 * and the original (non-scoped) result is not a default route,
1909 * i.e. the original result is a more specific host/net route.
1910 * 3) The scoped search yielded a net route but the original
1911 * result is a host route, i.e. the original result is treated
1912 * as a more specific route.
1913 */
1914 if (rn == NULL || (rn0 != NULL &&
1915 ((INET_DEFAULT(rt_key(RT(rn))) && !INET_DEFAULT(rt_key(RT(rn0)))) ||
1916 (!RT_HOST(rn) && RT_HOST(rn0)))))
1917 rn = rn0;
1918
1919 /*
1920 * If we still don't have a route, use the non-scoped default
1921 * route as long as the interface portion satistifes the scope.
1922 */
1923 if (rn == NULL && (rn = node_lookup_default()) != NULL &&
1924 RT(rn)->rt_ifp->if_index != ifscope)
1925 rn = NULL;
1926
1927validate:
1928 if (rn != NULL && !lookup_only)
1929 (void) in_validate(rn);
1930
1931done:
1932 if (rn != NULL && (rn->rn_flags & RNF_ROOT))
1933 rn = NULL;
1934 else if (rn != NULL)
1935 rtref(RT(rn));
1936
1937 return (RT(rn));
1938}
1939
1c79356b
A
1940/*
1941 * Set up a routing table entry, normally
1942 * for an interface.
1943 */
1944int
2d21ac55 1945rtinit(struct ifaddr *ifa, int cmd, int flags)
91447636
A
1946{
1947 int error;
1948 lck_mtx_assert(rt_mtx, LCK_MTX_ASSERT_NOTOWNED);
1949 lck_mtx_lock(rt_mtx);
1950 error = rtinit_locked(ifa, cmd, flags);
1951 lck_mtx_unlock(rt_mtx);
1952 return (error);
1953}
1954
1955int
2d21ac55 1956rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
1c79356b 1957{
2d21ac55
A
1958 struct rtentry *rt;
1959 struct sockaddr *dst;
1960 struct sockaddr *deldst;
1c79356b
A
1961 struct mbuf *m = 0;
1962 struct rtentry *nrt = 0;
1963 int error;
1964
1965 dst = flags & RTF_HOST ? ifa->ifa_dstaddr : ifa->ifa_addr;
1966 /*
1967 * If it's a delete, check that if it exists, it's on the correct
1968 * interface or we might scrub a route to another ifa which would
1969 * be confusing at best and possibly worse.
1970 */
1971 if (cmd == RTM_DELETE) {
9bccf70c 1972 /*
1c79356b
A
1973 * It's a delete, so it should already exist..
1974 * If it's a net, mask off the host bits
1975 * (Assuming we have a mask)
1976 */
1977 if ((flags & RTF_HOST) == 0 && ifa->ifa_netmask) {
9bccf70c 1978 m = m_get(M_DONTWAIT, MT_SONAME);
91447636 1979 if (m == NULL) {
9bccf70c 1980 return(ENOBUFS);
91447636 1981 }
1c79356b
A
1982 deldst = mtod(m, struct sockaddr *);
1983 rt_maskedcopy(dst, deldst, ifa->ifa_netmask);
1984 dst = deldst;
1985 }
1986 /*
1987 * Get an rtentry that is in the routing tree and
1988 * contains the correct info. (if this fails, can't get there).
1989 * We set "report" to FALSE so that if it doesn't exist,
1990 * it doesn't report an error or clone a route, etc. etc.
1991 */
91447636 1992 rt = rtalloc1_locked(dst, 0, 0UL);
1c79356b
A
1993 if (rt) {
1994 /*
1995 * Ok so we found the rtentry. it has an extra reference
1996 * for us at this stage. we won't need that so
1997 * lop that off now.
1998 */
9bccf70c 1999 rtunref(rt);
1c79356b
A
2000 if (rt->rt_ifa != ifa) {
2001 /*
2002 * If the interface in the rtentry doesn't match
2003 * the interface we are using, then we don't
2004 * want to delete it, so return an error.
9bccf70c 2005 * This seems to be the only point of
1c79356b
A
2006 * this whole RTM_DELETE clause.
2007 */
2008 if (m)
2009 (void) m_free(m);
2010 return (flags & RTF_HOST ? EHOSTUNREACH
2011 : ENETUNREACH);
2012 }
2013 }
2014 /* XXX */
2015#if 0
2016 else {
9bccf70c 2017 /*
1c79356b
A
2018 * One would think that as we are deleting, and we know
2019 * it doesn't exist, we could just return at this point
2020 * with an "ELSE" clause, but apparently not..
2021 */
91447636 2022 lck_mtx_unlock(rt_mtx);
1c79356b
A
2023 return (flags & RTF_HOST ? EHOSTUNREACH
2024 : ENETUNREACH);
2025 }
2026#endif
2027 }
2028 /*
2029 * Do the actual request
2030 */
91447636 2031 error = rtrequest_locked(cmd, dst, ifa->ifa_addr, ifa->ifa_netmask,
1c79356b
A
2032 flags | ifa->ifa_flags, &nrt);
2033 if (m)
2034 (void) m_free(m);
2035 /*
2036 * If we are deleting, and we found an entry, then
2037 * it's been removed from the tree.. now throw it away.
2038 */
2039 if (cmd == RTM_DELETE && error == 0 && (rt = nrt)) {
2040 /*
2041 * notify any listenning routing agents of the change
2042 */
2043 rt_newaddrmsg(cmd, ifa, error, nrt);
55e303ae
A
2044 if (use_routegenid)
2045 route_generation++;
2d21ac55 2046 rtfree_locked(rt);
1c79356b
A
2047 }
2048
2049 /*
2050 * We are adding, and we have a returned routing entry.
2051 * We need to sanity check the result.
2052 */
2053 if (cmd == RTM_ADD && error == 0 && (rt = nrt)) {
1c79356b 2054 /*
9bccf70c 2055 * If it came back with an unexpected interface, then it must
1c79356b
A
2056 * have already existed or something. (XXX)
2057 */
2058 if (rt->rt_ifa != ifa) {
9bccf70c
A
2059 if (!(rt->rt_ifa->ifa_ifp->if_flags &
2060 (IFF_POINTOPOINT|IFF_LOOPBACK)))
2061 printf("rtinit: wrong ifa (%p) was (%p)\n",
2062 ifa, rt->rt_ifa);
1c79356b
A
2063 /*
2064 * Ask that the protocol in question
2065 * remove anything it has associated with
2066 * this route and ifaddr.
2067 */
2068 if (rt->rt_ifa->ifa_rtrequest)
2069 rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, SA(0));
9bccf70c
A
2070 /*
2071 * Set the route's ifa.
1c79356b 2072 */
9bccf70c 2073 rtsetifa(rt, ifa);
1c79356b
A
2074 /*
2075 * And substitute in references to the ifaddr
2076 * we are adding.
2077 */
1c79356b 2078 rt->rt_ifp = ifa->ifa_ifp;
9bccf70c 2079 rt->rt_rmx.rmx_mtu = ifa->ifa_ifp->if_mtu; /*XXX*/
1c79356b
A
2080 /*
2081 * Now ask the protocol to check if it needs
2082 * any special processing in its new form.
2083 */
2084 if (ifa->ifa_rtrequest)
2085 ifa->ifa_rtrequest(RTM_ADD, rt, SA(0));
2086 }
2087 /*
2088 * notify any listenning routing agents of the change
2089 */
2090 rt_newaddrmsg(cmd, ifa, error, nrt);
55e303ae
A
2091 if (use_routegenid)
2092 route_generation++;
2d21ac55
A
2093 /*
2094 * We just wanted to add it; we don't actually need a
2095 * reference. This will result in a route that's added
2096 * to the routing table without a reference count. The
2097 * RTM_DELETE code will do the necessary step to adjust
2098 * the reference count at deletion time.
2099 */
2100 rtunref(rt);
2101 }
1c79356b
A
2102 return (error);
2103}
6601e61a 2104
2d21ac55 2105struct rtentry *
6601e61a
A
2106rte_alloc(void)
2107{
2d21ac55
A
2108 if (rte_debug & RTD_DEBUG)
2109 return (rte_alloc_debug());
2110
6601e61a
A
2111 return ((struct rtentry *)zalloc(rte_zone));
2112}
2113
2d21ac55 2114void
6601e61a
A
2115rte_free(struct rtentry *p)
2116{
2d21ac55
A
2117 if (rte_debug & RTD_DEBUG) {
2118 rte_free_debug(p);
2119 return;
2120 }
2121
6601e61a
A
2122 if (p->rt_refcnt != 0)
2123 panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt);
2124
6601e61a
A
2125 zfree(rte_zone, p);
2126}
0c530ab8 2127
2d21ac55
A
2128static inline struct rtentry *
2129rte_alloc_debug(void)
2130{
2131 struct rtentry_dbg *rte;
2132
2133 rte = ((struct rtentry_dbg *)zalloc(rte_zone));
2134 if (rte != NULL) {
2135 bzero(rte, sizeof (*rte));
2136 if (rte_debug & RTD_TRACE) {
2137 rte->rtd_alloc_thread = current_thread();
2138 (void) OSBacktrace(rte->rtd_alloc_stk_pc,
2139 RTD_TRSTACK_SIZE);
2140 }
2141 rte->rtd_inuse = RTD_INUSE;
2142 }
2143 return ((struct rtentry *)rte);
2144}
2145
2146static inline void
2147rte_free_debug(struct rtentry *p)
2148{
2149 struct rtentry_dbg *rte = (struct rtentry_dbg *)p;
2150
2151 if (p->rt_refcnt != 0)
2152 panic("rte_free: rte=%p refcnt=%d\n", p, p->rt_refcnt);
2153
2154 if (rte->rtd_inuse == RTD_FREED)
2155 panic("rte_free: double free rte=%p\n", rte);
2156 else if (rte->rtd_inuse != RTD_INUSE)
2157 panic("rte_free: corrupted rte=%p\n", rte);
2158
2159 bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof (*p));
2160 bzero((caddr_t)p, sizeof (*p));
2161
2162 rte->rtd_inuse = RTD_FREED;
2163
2164 if (rte_debug & RTD_TRACE) {
2165 rte->rtd_free_thread = current_thread();
2166 (void) OSBacktrace(rte->rtd_free_stk_pc, RTD_TRSTACK_SIZE);
2167 }
2168
2169 if (!(rte_debug & RTD_NO_FREE))
2170 zfree(rte_zone, p);
2171}