]> git.saurik.com Git - apple/xnu.git/blame - bsd/net/route.c
xnu-6153.121.1.tar.gz
[apple/xnu.git] / bsd / net / route.c
CommitLineData
1c79356b 1/*
cb323159 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
316670eb 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
316670eb 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
316670eb 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
316670eb 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1980, 1986, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)route.c 8.2 (Berkeley) 11/15/93
9bccf70c 61 * $FreeBSD: src/sys/net/route.c,v 1.59.2.3 2001/07/29 19:18:02 ume Exp $
1c79356b 62 */
316670eb 63
1c79356b 64#include <sys/param.h>
d1ecb069 65#include <sys/sysctl.h>
1c79356b
A
66#include <sys/systm.h>
67#include <sys/malloc.h>
68#include <sys/mbuf.h>
69#include <sys/socket.h>
70#include <sys/domain.h>
39037602
A
71#include <sys/stat.h>
72#include <sys/ubc.h>
73#include <sys/vnode.h>
9bccf70c 74#include <sys/syslog.h>
2d21ac55 75#include <sys/queue.h>
6d2010ae 76#include <sys/mcache.h>
a39ff7e2 77#include <sys/priv.h>
6d2010ae 78#include <sys/protosw.h>
cb323159 79#include <sys/sdt.h>
39236c6e 80#include <sys/kernel.h>
fe8ab488 81#include <kern/locks.h>
6601e61a 82#include <kern/zalloc.h>
1c79356b 83
39236c6e 84#include <net/dlil.h>
1c79356b
A
85#include <net/if.h>
86#include <net/route.h>
6d2010ae 87#include <net/ntstat.h>
5ba3f43e
A
88#include <net/nwk_wq.h>
89#if NECP
90#include <net/necp.h>
91#endif /* NECP */
1c79356b
A
92
93#include <netinet/in.h>
c910b4d9 94#include <netinet/in_var.h>
c910b4d9 95#include <netinet/ip_var.h>
d9a64523 96#include <netinet/ip.h>
6d2010ae 97#include <netinet/ip6.h>
5ba3f43e 98#include <netinet/in_arp.h>
6d2010ae
A
99
100#if INET6
101#include <netinet6/ip6_var.h>
102#include <netinet6/in6_var.h>
39236c6e 103#include <netinet6/nd6.h>
6d2010ae 104#endif /* INET6 */
1c79356b 105
55e303ae
A
106#include <net/if_dl.h>
107
2d21ac55
A
108#include <libkern/OSAtomic.h>
109#include <libkern/OSDebug.h>
110
111#include <pexpert/pexpert.h>
112
39236c6e
A
113#if CONFIG_MACF
114#include <sys/kauth.h>
115#endif
116
b0d623f7
A
117/*
118 * Synchronization notes:
119 *
120 * Routing entries fall under two locking domains: the global routing table
121 * lock (rnh_lock) and the per-entry lock (rt_lock); the latter is a mutex that
122 * resides (statically defined) in the rtentry structure.
123 *
124 * The locking domains for routing are defined as follows:
125 *
126 * The global routing lock is used to serialize all accesses to the radix
127 * trees defined by rt_tables[], as well as the tree of masks. This includes
128 * lookups, insertions and removals of nodes to/from the respective tree.
129 * It is also used to protect certain fields in the route entry that aren't
130 * often modified and/or require global serialization (more details below.)
131 *
132 * The per-route entry lock is used to serialize accesses to several routing
133 * entry fields (more details below.) Acquiring and releasing this lock is
134 * done via RT_LOCK() and RT_UNLOCK() routines.
135 *
136 * In cases where both rnh_lock and rt_lock must be held, the former must be
137 * acquired first in order to maintain lock ordering. It is not a requirement
138 * that rnh_lock be acquired first before rt_lock, but in case both must be
139 * acquired in succession, the correct lock ordering must be followed.
140 *
141 * The fields of the rtentry structure are protected in the following way:
142 *
143 * rt_nodes[]
144 *
145 * - Routing table lock (rnh_lock).
146 *
39236c6e 147 * rt_parent, rt_mask, rt_llinfo_free, rt_tree_genid
b0d623f7
A
148 *
149 * - Set once during creation and never changes; no locks to read.
150 *
151 * rt_flags, rt_genmask, rt_llinfo, rt_rmx, rt_refcnt, rt_gwroute
152 *
153 * - Routing entry lock (rt_lock) for read/write access.
154 *
155 * - Some values of rt_flags are either set once at creation time,
156 * or aren't currently used, and thus checking against them can
157 * be done without rt_lock: RTF_GATEWAY, RTF_HOST, RTF_DYNAMIC,
158 * RTF_DONE, RTF_XRESOLVE, RTF_STATIC, RTF_BLACKHOLE, RTF_ANNOUNCE,
159 * RTF_USETRAILERS, RTF_WASCLONED, RTF_PINNED, RTF_LOCAL,
d1ecb069 160 * RTF_BROADCAST, RTF_MULTICAST, RTF_IFSCOPE, RTF_IFREF.
b0d623f7
A
161 *
162 * rt_key, rt_gateway, rt_ifp, rt_ifa
163 *
164 * - Always written/modified with both rnh_lock and rt_lock held.
165 *
166 * - May be read freely with rnh_lock held, else must hold rt_lock
167 * for read access; holding both locks for read is also okay.
168 *
169 * - In the event rnh_lock is not acquired, or is not possible to be
170 * acquired across the operation, setting RTF_CONDEMNED on a route
171 * entry will prevent its rt_key, rt_gateway, rt_ifp and rt_ifa
172 * from being modified. This is typically done on a route that
173 * has been chosen for a removal (from the tree) prior to dropping
174 * the rt_lock, so that those values will remain the same until
175 * the route is freed.
176 *
177 * When rnh_lock is held rt_setgate(), rt_setif(), and rtsetifa() are
178 * single-threaded, thus exclusive. This flag will also prevent the
179 * route from being looked up via rt_lookup().
180 *
39236c6e 181 * rt_genid
b0d623f7
A
182 *
183 * - Assumes that 32-bit writes are atomic; no locks.
184 *
185 * rt_dlt, rt_output
186 *
187 * - Currently unused; no locks.
188 *
189 * Operations on a route entry can be described as follows:
190 *
191 * CREATE an entry with reference count set to 0 as part of RTM_ADD/RESOLVE.
192 *
193 * INSERTION of an entry into the radix tree holds the rnh_lock, checks
194 * for duplicates and then adds the entry. rtrequest returns the entry
195 * after bumping up the reference count to 1 (for the caller).
196 *
197 * LOOKUP of an entry holds the rnh_lock and bumps up the reference count
198 * before returning; it is valid to also bump up the reference count using
199 * RT_ADDREF after the lookup has returned an entry.
200 *
201 * REMOVAL of an entry from the radix tree holds the rnh_lock, removes the
202 * entry but does not decrement the reference count. Removal happens when
203 * the route is explicitly deleted (RTM_DELETE) or when it is in the cached
204 * state and it expires. The route is said to be "down" when it is no
205 * longer present in the tree. Freeing the entry will happen on the last
206 * reference release of such a "down" route.
207 *
208 * RT_ADDREF/RT_REMREF operates on the routing entry which increments/
209 * decrements the reference count, rt_refcnt, atomically on the rtentry.
210 * rt_refcnt is modified only using this routine. The general rule is to
211 * do RT_ADDREF in the function that is passing the entry as an argument,
212 * in order to prevent the entry from being freed by the callee.
213 */
214
2d21ac55 215#define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
1c79356b 216
39236c6e 217extern void kdp_set_gateway_mac(void *gatewaymac);
2d21ac55 218
cb323159
A
219__private_extern__ struct rtstat rtstat = {
220 .rts_badredirect = 0,
221 .rts_dynamic = 0,
222 .rts_newgateway = 0,
223 .rts_unreach = 0,
224 .rts_wildcard = 0,
225 .rts_badrtgwroute = 0
226};
1c79356b
A
227struct radix_node_head *rt_tables[AF_MAX+1];
228
39236c6e 229decl_lck_mtx_data(, rnh_lock_data); /* global routing tables mutex */
316670eb 230lck_mtx_t *rnh_lock = &rnh_lock_data;
b0d623f7
A
231static lck_attr_t *rnh_lock_attr;
232static lck_grp_t *rnh_lock_grp;
233static lck_grp_attr_t *rnh_lock_grp_attr;
234
235/* Lock group and attribute for routing entry locks */
236static lck_attr_t *rte_mtx_attr;
237static lck_grp_t *rte_mtx_grp;
238static lck_grp_attr_t *rte_mtx_grp_attr;
1c79356b 239
2d21ac55
A
240int rttrash = 0; /* routes not in table but not freed */
241
cb323159 242boolean_t trigger_v6_defrtr_select = FALSE;
5ba3f43e 243unsigned int rte_debug = 0;
2d21ac55
A
244
245/* Possible flags for rte_debug */
246#define RTD_DEBUG 0x1 /* enable or disable rtentry debug facility */
b0d623f7 247#define RTD_TRACE 0x2 /* trace alloc, free, refcnt and lock */
2d21ac55 248#define RTD_NO_FREE 0x4 /* don't free (good to catch corruptions) */
91447636 249
b0d623f7
A
250#define RTE_NAME "rtentry" /* name for zone and rt_lock */
251
6601e61a
A
252static struct zone *rte_zone; /* special zone for rtentry */
253#define RTE_ZONE_MAX 65536 /* maximum elements in zone */
b0d623f7 254#define RTE_ZONE_NAME RTE_NAME /* name of rtentry zone */
6601e61a 255
2d21ac55
A
256#define RTD_INUSE 0xFEEDFACE /* entry is in use */
257#define RTD_FREED 0xDEADBEEF /* entry is freed */
258
3e170ce0
A
259#define MAX_SCOPE_ADDR_STR_LEN (MAX_IPv6_STR_LEN + 6)
260
b0d623f7
A
261/* For gdb */
262__private_extern__ unsigned int ctrace_stack_size = CTRACE_STACK_SIZE;
263__private_extern__ unsigned int ctrace_hist_size = CTRACE_HIST_SIZE;
2d21ac55
A
264
265/*
266 * Debug variant of rtentry structure.
267 */
268struct rtentry_dbg {
269 struct rtentry rtd_entry; /* rtentry */
270 struct rtentry rtd_entry_saved; /* saved rtentry */
b0d623f7
A
271 uint32_t rtd_inuse; /* in use pattern */
272 uint16_t rtd_refhold_cnt; /* # of rtref */
273 uint16_t rtd_refrele_cnt; /* # of rtunref */
274 uint32_t rtd_lock_cnt; /* # of locks */
275 uint32_t rtd_unlock_cnt; /* # of unlocks */
2d21ac55 276 /*
b0d623f7 277 * Alloc and free callers.
2d21ac55 278 */
b0d623f7
A
279 ctrace_t rtd_alloc;
280 ctrace_t rtd_free;
2d21ac55
A
281 /*
282 * Circular lists of rtref and rtunref callers.
283 */
b0d623f7
A
284 ctrace_t rtd_refhold[CTRACE_HIST_SIZE];
285 ctrace_t rtd_refrele[CTRACE_HIST_SIZE];
286 /*
287 * Circular lists of locks and unlocks.
288 */
289 ctrace_t rtd_lock[CTRACE_HIST_SIZE];
290 ctrace_t rtd_unlock[CTRACE_HIST_SIZE];
2d21ac55
A
291 /*
292 * Trash list linkage
293 */
294 TAILQ_ENTRY(rtentry_dbg) rtd_trash_link;
295};
296
b0d623f7 297/* List of trash route entries protected by rnh_lock */
2d21ac55
A
298static TAILQ_HEAD(, rtentry_dbg) rttrash_head;
299
b0d623f7
A
300static void rte_lock_init(struct rtentry *);
301static void rte_lock_destroy(struct rtentry *);
2d21ac55
A
302static inline struct rtentry *rte_alloc_debug(void);
303static inline void rte_free_debug(struct rtentry *);
b0d623f7
A
304static inline void rte_lock_debug(struct rtentry_dbg *);
305static inline void rte_unlock_debug(struct rtentry_dbg *);
3e170ce0
A
306static void rt_maskedcopy(const struct sockaddr *,
307 struct sockaddr *, const struct sockaddr *);
91447636 308static void rtable_init(void **);
2d21ac55
A
309static inline void rtref_audit(struct rtentry_dbg *);
310static inline void rtunref_audit(struct rtentry_dbg *);
b0d623f7 311static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, uint32_t,
c910b4d9
A
312 unsigned int);
313static int rtrequest_common_locked(int, struct sockaddr *,
314 struct sockaddr *, struct sockaddr *, int, struct rtentry **,
315 unsigned int);
6d2010ae 316static struct rtentry *rtalloc1_locked(struct sockaddr *, int, uint32_t);
b0d623f7 317static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int);
6d2010ae
A
318static inline void sin6_set_ifscope(struct sockaddr *, unsigned int);
319static inline void sin6_set_embedded_ifscope(struct sockaddr *, unsigned int);
320static inline unsigned int sin6_get_embedded_ifscope(struct sockaddr *);
6d2010ae
A
321static struct sockaddr *ma_copy(int, struct sockaddr *,
322 struct sockaddr_storage *, unsigned int);
b0d623f7 323static struct sockaddr *sa_trim(struct sockaddr *, int);
c910b4d9
A
324static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *,
325 unsigned int);
6d2010ae 326static struct radix_node *node_lookup_default(int);
39236c6e
A
327static struct rtentry *rt_lookup_common(boolean_t, boolean_t, struct sockaddr *,
328 struct sockaddr *, struct radix_node_head *, unsigned int);
c910b4d9
A
329static int rn_match_ifscope(struct radix_node *, void *);
330static struct ifaddr *ifa_ifwithroute_common_locked(int,
331 const struct sockaddr *, const struct sockaddr *, unsigned int);
b0d623f7
A
332static struct rtentry *rte_alloc(void);
333static void rte_free(struct rtentry *);
334static void rtfree_common(struct rtentry *, boolean_t);
d1ecb069 335static void rte_if_ref(struct ifnet *, int);
39236c6e
A
336static void rt_set_idleref(struct rtentry *);
337static void rt_clear_idleref(struct rtentry *);
5ba3f43e 338static void route_event_callback(void *);
39236c6e
A
339static void rt_str4(struct rtentry *, char *, uint32_t, char *, uint32_t);
340#if INET6
341static void rt_str6(struct rtentry *, char *, uint32_t, char *, uint32_t);
342#endif /* INET6 */
1c79356b 343
39236c6e
A
344uint32_t route_genid_inet = 0;
345#if INET6
346uint32_t route_genid_inet6 = 0;
347#endif /* INET6 */
c910b4d9
A
348
349#define ASSERT_SINIFSCOPE(sa) { \
350 if ((sa)->sa_family != AF_INET || \
351 (sa)->sa_len < sizeof (struct sockaddr_in)) \
352 panic("%s: bad sockaddr_in %p\n", __func__, sa); \
353}
354
6d2010ae
A
355#define ASSERT_SIN6IFSCOPE(sa) { \
356 if ((sa)->sa_family != AF_INET6 || \
357 (sa)->sa_len < sizeof (struct sockaddr_in6)) \
fe8ab488 358 panic("%s: bad sockaddr_in6 %p\n", __func__, sa); \
6d2010ae
A
359}
360
c910b4d9
A
361/*
362 * Argument to leaf-matching routine; at present it is scoped routing
363 * specific but can be expanded in future to include other search filters.
364 */
365struct matchleaf_arg {
366 unsigned int ifscope; /* interface scope */
367};
368
369/*
370 * For looking up the non-scoped default route (sockaddr instead
371 * of sockaddr_in for convenience).
372 */
373static struct sockaddr sin_def = {
cb323159
A
374 .sa_len = sizeof (struct sockaddr_in),
375 .sa_family = AF_INET,
376 .sa_data = { 0, }
c910b4d9
A
377};
378
6d2010ae 379static struct sockaddr_in6 sin6_def = {
cb323159
A
380 .sin6_len = sizeof (struct sockaddr_in6),
381 .sin6_family = AF_INET6,
382 .sin6_port = 0,
383 .sin6_flowinfo = 0,
384 .sin6_addr = IN6ADDR_ANY_INIT,
385 .sin6_scope_id = 0
6d2010ae
A
386};
387
c910b4d9
A
388/*
389 * Interface index (scope) of the primary interface; determined at
390 * the time when the default, non-scoped route gets added, changed
b0d623f7 391 * or deleted. Protected by rnh_lock.
c910b4d9
A
392 */
393static unsigned int primary_ifscope = IFSCOPE_NONE;
6d2010ae
A
394static unsigned int primary6_ifscope = IFSCOPE_NONE;
395
396#define INET_DEFAULT(sa) \
397 ((sa)->sa_family == AF_INET && SIN(sa)->sin_addr.s_addr == 0)
c910b4d9 398
6d2010ae
A
399#define INET6_DEFAULT(sa) \
400 ((sa)->sa_family == AF_INET6 && \
401 IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr))
c910b4d9 402
6d2010ae 403#define SA_DEFAULT(sa) (INET_DEFAULT(sa) || INET6_DEFAULT(sa))
c910b4d9 404#define RT(r) ((struct rtentry *)r)
6d2010ae 405#define RN(r) ((struct radix_node *)r)
c910b4d9
A
406#define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST)
407
39037602
A
408unsigned int rt_verbose = 0;
409#if (DEVELOPMENT || DEBUG)
39236c6e 410SYSCTL_DECL(_net_route);
39236c6e
A
411SYSCTL_UINT(_net_route, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
412 &rt_verbose, 0, "");
39037602 413#endif /* (DEVELOPMENT || DEBUG) */
39236c6e
A
414
415static void
416rtable_init(void **table)
417{
418 struct domain *dom;
419
420 domain_proto_mtx_lock_assert_held();
421
422 TAILQ_FOREACH(dom, &domains, dom_entry) {
423 if (dom->dom_rtattach != NULL)
424 dom->dom_rtattach(&table[dom->dom_family],
425 dom->dom_rtoffset);
426 }
427}
428
429/*
430 * Called by route_dinit().
431 */
432void
433route_init(void)
434{
435 int size;
436
437#if INET6
438 _CASSERT(offsetof(struct route, ro_rt) ==
439 offsetof(struct route_in6, ro_rt));
5ba3f43e
A
440 _CASSERT(offsetof(struct route, ro_lle) ==
441 offsetof(struct route_in6, ro_lle));
39236c6e
A
442 _CASSERT(offsetof(struct route, ro_srcia) ==
443 offsetof(struct route_in6, ro_srcia));
444 _CASSERT(offsetof(struct route, ro_flags) ==
445 offsetof(struct route_in6, ro_flags));
446 _CASSERT(offsetof(struct route, ro_dst) ==
447 offsetof(struct route_in6, ro_dst));
448#endif /* INET6 */
449
450 PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug));
451 if (rte_debug != 0)
452 rte_debug |= RTD_DEBUG;
453
454 rnh_lock_grp_attr = lck_grp_attr_alloc_init();
455 rnh_lock_grp = lck_grp_alloc_init("route", rnh_lock_grp_attr);
456 rnh_lock_attr = lck_attr_alloc_init();
457 lck_mtx_init(rnh_lock, rnh_lock_grp, rnh_lock_attr);
458
459 rte_mtx_grp_attr = lck_grp_attr_alloc_init();
460 rte_mtx_grp = lck_grp_alloc_init(RTE_NAME, rte_mtx_grp_attr);
461 rte_mtx_attr = lck_attr_alloc_init();
462
463 lck_mtx_lock(rnh_lock);
464 rn_init(); /* initialize all zeroes, all ones, mask table */
465 lck_mtx_unlock(rnh_lock);
466 rtable_init((void **)rt_tables);
467
468 if (rte_debug & RTD_DEBUG)
469 size = sizeof (struct rtentry_dbg);
470 else
471 size = sizeof (struct rtentry);
472
473 rte_zone = zinit(size, RTE_ZONE_MAX * size, 0, RTE_ZONE_NAME);
474 if (rte_zone == NULL) {
475 panic("%s: failed allocating rte_zone", __func__);
476 /* NOTREACHED */
477 }
478 zone_change(rte_zone, Z_EXPAND, TRUE);
479 zone_change(rte_zone, Z_CALLERACCT, FALSE);
480 zone_change(rte_zone, Z_NOENCRYPT, TRUE);
d1ecb069 481
39236c6e
A
482 TAILQ_INIT(&rttrash_head);
483}
d1ecb069 484
c910b4d9
A
485/*
486 * Given a route, determine whether or not it is the non-scoped default
487 * route; dst typically comes from rt_key(rt) but may be coming from
488 * a separate place when rt is in the process of being created.
489 */
490boolean_t
6d2010ae 491rt_primary_default(struct rtentry *rt, struct sockaddr *dst)
c910b4d9 492{
6d2010ae 493 return (SA_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE));
c910b4d9
A
494}
495
496/*
b0d623f7 497 * Set the ifscope of the primary interface; caller holds rnh_lock.
c910b4d9
A
498 */
499void
6d2010ae 500set_primary_ifscope(int af, unsigned int ifscope)
c910b4d9 501{
6d2010ae
A
502 if (af == AF_INET)
503 primary_ifscope = ifscope;
504 else
505 primary6_ifscope = ifscope;
c910b4d9
A
506}
507
508/*
b0d623f7 509 * Return the ifscope of the primary interface; caller holds rnh_lock.
c910b4d9
A
510 */
511unsigned int
6d2010ae 512get_primary_ifscope(int af)
c910b4d9 513{
6d2010ae 514 return (af == AF_INET ? primary_ifscope : primary6_ifscope);
c910b4d9
A
515}
516
517/*
6d2010ae 518 * Set the scope ID of a given a sockaddr_in.
c910b4d9 519 */
6d2010ae
A
520void
521sin_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
c910b4d9
A
522{
523 /* Caller must pass in sockaddr_in */
524 ASSERT_SINIFSCOPE(sa);
525
6d2010ae 526 SINIFSCOPE(sa)->sin_scope_id = ifscope;
c910b4d9
A
527}
528
529/*
6d2010ae
A
530 * Set the scope ID of given a sockaddr_in6.
531 */
532static inline void
533sin6_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
534{
535 /* Caller must pass in sockaddr_in6 */
536 ASSERT_SIN6IFSCOPE(sa);
537
538 SIN6IFSCOPE(sa)->sin6_scope_id = ifscope;
539}
540
541/*
542 * Given a sockaddr_in, return the scope ID to the caller.
c910b4d9
A
543 */
544unsigned int
6d2010ae 545sin_get_ifscope(struct sockaddr *sa)
c910b4d9
A
546{
547 /* Caller must pass in sockaddr_in */
548 ASSERT_SINIFSCOPE(sa);
549
6d2010ae 550 return (SINIFSCOPE(sa)->sin_scope_id);
c910b4d9
A
551}
552
553/*
6d2010ae
A
554 * Given a sockaddr_in6, return the scope ID to the caller.
555 */
556unsigned int
557sin6_get_ifscope(struct sockaddr *sa)
558{
559 /* Caller must pass in sockaddr_in6 */
560 ASSERT_SIN6IFSCOPE(sa);
561
562 return (SIN6IFSCOPE(sa)->sin6_scope_id);
563}
564
565static inline void
566sin6_set_embedded_ifscope(struct sockaddr *sa, unsigned int ifscope)
567{
568 /* Caller must pass in sockaddr_in6 */
569 ASSERT_SIN6IFSCOPE(sa);
570 VERIFY(IN6_IS_SCOPE_EMBED(&(SIN6(sa)->sin6_addr)));
571
572 SIN6(sa)->sin6_addr.s6_addr16[1] = htons(ifscope);
573}
574
575static inline unsigned int
576sin6_get_embedded_ifscope(struct sockaddr *sa)
577{
578 /* Caller must pass in sockaddr_in6 */
579 ASSERT_SIN6IFSCOPE(sa);
580
581 return (ntohs(SIN6(sa)->sin6_addr.s6_addr16[1]));
582}
583
584/*
585 * Copy a sockaddr_{in,in6} src to a dst storage and set scope ID into dst.
586 *
587 * To clear the scope ID, pass is a NULL pifscope. To set the scope ID, pass
588 * in a non-NULL pifscope with non-zero ifscope. Otherwise if pifscope is
589 * non-NULL and ifscope is IFSCOPE_NONE, the existing scope ID is left intact.
590 * In any case, the effective scope ID value is returned to the caller via
591 * pifscope, if it is non-NULL.
c910b4d9 592 */
3e170ce0 593struct sockaddr *
6d2010ae
A
594sa_copy(struct sockaddr *src, struct sockaddr_storage *dst,
595 unsigned int *pifscope)
c910b4d9 596{
6d2010ae
A
597 int af = src->sa_family;
598 unsigned int ifscope = (pifscope != NULL) ? *pifscope : IFSCOPE_NONE;
599
600 VERIFY(af == AF_INET || af == AF_INET6);
601
602 bzero(dst, sizeof (*dst));
603
604 if (af == AF_INET) {
605 bcopy(src, dst, sizeof (struct sockaddr_in));
4ba76501 606 dst->ss_len = sizeof(struct sockaddr_in);
6d2010ae
A
607 if (pifscope == NULL || ifscope != IFSCOPE_NONE)
608 sin_set_ifscope(SA(dst), ifscope);
609 } else {
610 bcopy(src, dst, sizeof (struct sockaddr_in6));
4ba76501 611 dst->ss_len = sizeof(struct sockaddr_in6);
6d2010ae
A
612 if (pifscope != NULL &&
613 IN6_IS_SCOPE_EMBED(&SIN6(dst)->sin6_addr)) {
614 unsigned int eifscope;
615 /*
616 * If the address contains the embedded scope ID,
617 * use that as the value for sin6_scope_id as long
618 * the caller doesn't insist on clearing it (by
619 * passing NULL) or setting it.
620 */
621 eifscope = sin6_get_embedded_ifscope(SA(dst));
622 if (eifscope != IFSCOPE_NONE && ifscope == IFSCOPE_NONE)
623 ifscope = eifscope;
3e170ce0
A
624 if (ifscope != IFSCOPE_NONE) {
625 /* Set ifscope from pifscope or eifscope */
626 sin6_set_ifscope(SA(dst), ifscope);
627 } else {
628 /* If sin6_scope_id has a value, use that one */
629 ifscope = sin6_get_ifscope(SA(dst));
630 }
6d2010ae
A
631 /*
632 * If sin6_scope_id is set but the address doesn't
633 * contain the equivalent embedded value, set it.
634 */
635 if (ifscope != IFSCOPE_NONE && eifscope != ifscope)
636 sin6_set_embedded_ifscope(SA(dst), ifscope);
637 } else if (pifscope == NULL || ifscope != IFSCOPE_NONE) {
638 sin6_set_ifscope(SA(dst), ifscope);
639 }
640 }
641
642 if (pifscope != NULL) {
643 *pifscope = (af == AF_INET) ? sin_get_ifscope(SA(dst)) :
644 sin6_get_ifscope(SA(dst));
645 }
c910b4d9
A
646
647 return (SA(dst));
648}
649
650/*
6d2010ae 651 * Copy a mask from src to a dst storage and set scope ID into dst.
c910b4d9
A
652 */
653static struct sockaddr *
6d2010ae
A
654ma_copy(int af, struct sockaddr *src, struct sockaddr_storage *dst,
655 unsigned int ifscope)
c910b4d9 656{
6d2010ae
A
657 VERIFY(af == AF_INET || af == AF_INET6);
658
c910b4d9
A
659 bzero(dst, sizeof (*dst));
660 rt_maskedcopy(src, SA(dst), src);
661
662 /*
663 * The length of the mask sockaddr would need to be adjusted
6d2010ae
A
664 * to cover the additional {sin,sin6}_ifscope field; when ifscope
665 * is IFSCOPE_NONE, we'd end up clearing the scope ID field on
c910b4d9
A
666 * the destination mask in addition to extending the length
667 * of the sockaddr, as a side effect. This is okay, as any
668 * trailing zeroes would be skipped by rn_addmask prior to
669 * inserting or looking up the mask in the mask tree.
670 */
6d2010ae
A
671 if (af == AF_INET) {
672 SINIFSCOPE(dst)->sin_scope_id = ifscope;
673 SINIFSCOPE(dst)->sin_len =
674 offsetof(struct sockaddr_inifscope, sin_scope_id) +
675 sizeof (SINIFSCOPE(dst)->sin_scope_id);
676 } else {
677 SIN6IFSCOPE(dst)->sin6_scope_id = ifscope;
678 SIN6IFSCOPE(dst)->sin6_len =
679 offsetof(struct sockaddr_in6, sin6_scope_id) +
680 sizeof (SIN6IFSCOPE(dst)->sin6_scope_id);
681 }
c910b4d9
A
682
683 return (SA(dst));
684}
685
b0d623f7
A
686/*
687 * Trim trailing zeroes on a sockaddr and update its length.
688 */
689static struct sockaddr *
690sa_trim(struct sockaddr *sa, int skip)
691{
692 caddr_t cp, base = (caddr_t)sa + skip;
693
694 if (sa->sa_len <= skip)
695 return (sa);
696
39236c6e 697 for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0; )
b0d623f7
A
698 cp--;
699
700 sa->sa_len = (cp - base) + skip;
701 if (sa->sa_len < skip) {
702 /* Must not happen, and if so, panic */
703 panic("%s: broken logic (sa_len %d < skip %d )", __func__,
704 sa->sa_len, skip);
705 /* NOTREACHED */
706 } else if (sa->sa_len == skip) {
707 /* If we end up with all zeroes, then there's no mask */
708 sa->sa_len = 0;
709 }
710
711 return (sa);
712}
713
714/*
39236c6e
A
715 * Called by rtm_msg{1,2} routines to "scrub" socket address structures of
716 * kernel private information, so that clients of the routing socket will
6d2010ae
A
717 * not be confused by the presence of the information, or the side effect of
718 * the increased length due to that. The source sockaddr is not modified;
719 * instead, the scrubbing happens on the destination sockaddr storage that
720 * is passed in by the caller.
39236c6e
A
721 *
722 * Scrubbing entails:
723 * - removing embedded scope identifiers from network mask and destination
724 * IPv4 and IPv6 socket addresses
725 * - optionally removing global scope interface hardware addresses from
726 * link-layer interface addresses when the MAC framework check fails.
b0d623f7
A
727 */
728struct sockaddr *
39236c6e 729rtm_scrub(int type, int idx, struct sockaddr *hint, struct sockaddr *sa,
5ba3f43e 730 void *buf, uint32_t buflen, kauth_cred_t *credp)
b0d623f7 731{
39236c6e 732 struct sockaddr_storage *ss = (struct sockaddr_storage *)buf;
b0d623f7
A
733 struct sockaddr *ret = sa;
734
39236c6e
A
735 VERIFY(buf != NULL && buflen >= sizeof (*ss));
736 bzero(buf, buflen);
737
b0d623f7
A
738 switch (idx) {
739 case RTAX_DST:
740 /*
6d2010ae
A
741 * If this is for an AF_INET/AF_INET6 destination address,
742 * call sa_copy() to clear the scope ID field.
b0d623f7
A
743 */
744 if (sa->sa_family == AF_INET &&
6d2010ae
A
745 SINIFSCOPE(sa)->sin_scope_id != IFSCOPE_NONE) {
746 ret = sa_copy(sa, ss, NULL);
747 } else if (sa->sa_family == AF_INET6 &&
748 SIN6IFSCOPE(sa)->sin6_scope_id != IFSCOPE_NONE) {
749 ret = sa_copy(sa, ss, NULL);
b0d623f7
A
750 }
751 break;
752
753 case RTAX_NETMASK: {
6d2010ae 754 int skip, af;
b0d623f7 755 /*
6d2010ae
A
756 * If this is for a mask, we can't tell whether or not there
757 * is an valid scope ID value, as the span of bytes between
758 * sa_len and the beginning of the mask (offset of sin_addr in
759 * the case of AF_INET, or sin6_addr for AF_INET6) may be
760 * filled with all-ones by rn_addmask(), and hence we cannot
761 * rely on sa_family. Because of this, we use the sa_family
762 * of the hint sockaddr (RTAX_{DST,IFA}) as indicator as to
763 * whether or not the mask is to be treated as one for AF_INET
764 * or AF_INET6. Clearing the scope ID field involves setting
765 * it to IFSCOPE_NONE followed by calling sa_trim() to trim
766 * trailing zeroes from the storage sockaddr, which reverses
767 * what was done earlier by ma_copy() on the source sockaddr.
b0d623f7 768 */
6d2010ae
A
769 if (hint == NULL ||
770 ((af = hint->sa_family) != AF_INET && af != AF_INET6))
771 break; /* nothing to do */
772
773 skip = (af == AF_INET) ?
774 offsetof(struct sockaddr_in, sin_addr) :
775 offsetof(struct sockaddr_in6, sin6_addr);
776
777 if (sa->sa_len > skip && sa->sa_len <= sizeof (*ss)) {
b0d623f7 778 bcopy(sa, ss, sa->sa_len);
6d2010ae
A
779 /*
780 * Don't use {sin,sin6}_set_ifscope() as sa_family
781 * and sa_len for the netmask might not be set to
782 * the corresponding expected values of the hint.
783 */
784 if (hint->sa_family == AF_INET)
785 SINIFSCOPE(ss)->sin_scope_id = IFSCOPE_NONE;
786 else
787 SIN6IFSCOPE(ss)->sin6_scope_id = IFSCOPE_NONE;
b0d623f7 788 ret = sa_trim(SA(ss), skip);
6d2010ae
A
789
790 /*
791 * For AF_INET6 mask, set sa_len appropriately unless
792 * this is requested via systl_dumpentry(), in which
793 * case we return the raw value.
794 */
795 if (hint->sa_family == AF_INET6 &&
796 type != RTM_GET && type != RTM_GET2)
797 SA(ret)->sa_len = sizeof (struct sockaddr_in6);
b0d623f7
A
798 }
799 break;
800 }
d190cdc3
A
801 case RTAX_GATEWAY: {
802 /*
803 * Break if the gateway is not AF_LINK type (indirect routes)
804 *
805 * Else, if is, check if it is resolved. If not yet resolved
806 * simply break else scrub the link layer address.
807 */
808 if ((sa->sa_family != AF_LINK) || (SDL(sa)->sdl_alen == 0))
809 break;
810 /* fallthrough */
811 }
39236c6e 812 case RTAX_IFP: {
5ba3f43e 813 if (sa->sa_family == AF_LINK && credp) {
39236c6e
A
814 struct sockaddr_dl *sdl = SDL(buf);
815 const void *bytes;
816 size_t size;
817
818 /* caller should handle worst case: SOCK_MAXADDRLEN */
819 VERIFY(buflen >= sa->sa_len);
820
821 bcopy(sa, sdl, sa->sa_len);
822 bytes = dlil_ifaddr_bytes(sdl, &size, credp);
823 if (bytes != CONST_LLADDR(sdl)) {
824 VERIFY(sdl->sdl_alen == size);
825 bcopy(bytes, LLADDR(sdl), size);
826 }
827 ret = (struct sockaddr *)sdl;
828 }
829 break;
830 }
b0d623f7
A
831 default:
832 break;
833 }
834
835 return (ret);
836}
837
c910b4d9
A
838/*
839 * Callback leaf-matching routine for rn_matchaddr_args used
840 * for looking up an exact match for a scoped route entry.
841 */
842static int
843rn_match_ifscope(struct radix_node *rn, void *arg)
844{
845 struct rtentry *rt = (struct rtentry *)rn;
846 struct matchleaf_arg *ma = arg;
6d2010ae 847 int af = rt_key(rt)->sa_family;
c910b4d9 848
6d2010ae 849 if (!(rt->rt_flags & RTF_IFSCOPE) || (af != AF_INET && af != AF_INET6))
c910b4d9
A
850 return (0);
851
6d2010ae
A
852 return (af == AF_INET ?
853 (SINIFSCOPE(rt_key(rt))->sin_scope_id == ma->ifscope) :
854 (SIN6IFSCOPE(rt_key(rt))->sin6_scope_id == ma->ifscope));
c910b4d9 855}
55e303ae 856
39236c6e
A
857/*
858 * Atomically increment route generation counter
859 */
860void
861routegenid_update(void)
1c79356b 862{
39236c6e
A
863 routegenid_inet_update();
864#if INET6
865 routegenid_inet6_update();
866#endif /* INET6 */
1c79356b
A
867}
868
869void
39236c6e 870routegenid_inet_update(void)
1c79356b 871{
39236c6e 872 atomic_add_32(&route_genid_inet, 1);
1c79356b
A
873}
874
39236c6e 875#if INET6
b0d623f7 876void
39236c6e 877routegenid_inet6_update(void)
b0d623f7 878{
39236c6e 879 atomic_add_32(&route_genid_inet6, 1);
b0d623f7 880}
39236c6e 881#endif /* INET6 */
b0d623f7 882
1c79356b
A
883/*
884 * Packet routing routines.
885 */
886void
2d21ac55 887rtalloc(struct route *ro)
1c79356b 888{
b0d623f7 889 rtalloc_ign(ro, 0);
1c79356b
A
890}
891
892void
6d2010ae 893rtalloc_scoped(struct route *ro, unsigned int ifscope)
c910b4d9 894{
6d2010ae 895 rtalloc_scoped_ign(ro, 0, ifscope);
c910b4d9
A
896}
897
898static void
b0d623f7 899rtalloc_ign_common_locked(struct route *ro, uint32_t ignore,
c910b4d9 900 unsigned int ifscope)
1c79356b 901{
9bccf70c 902 struct rtentry *rt;
9bccf70c
A
903
904 if ((rt = ro->ro_rt) != NULL) {
b0d623f7 905 RT_LOCK_SPIN(rt);
39236c6e 906 if (rt->rt_ifp != NULL && !ROUTE_UNUSABLE(ro)) {
b0d623f7 907 RT_UNLOCK(rt);
9bccf70c 908 return;
b0d623f7
A
909 }
910 RT_UNLOCK(rt);
39236c6e 911 ROUTE_RELEASE_LOCKED(ro); /* rnh_lock already held */
9bccf70c 912 }
c910b4d9 913 ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope);
b0d623f7 914 if (ro->ro_rt != NULL) {
39236c6e 915 RT_GENID_SYNC(ro->ro_rt);
b0d623f7
A
916 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
917 }
1c79356b 918}
b0d623f7 919
91447636 920void
b0d623f7 921rtalloc_ign(struct route *ro, uint32_t ignore)
91447636 922{
5ba3f43e 923 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
b0d623f7 924 lck_mtx_lock(rnh_lock);
6d2010ae 925 rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE);
b0d623f7
A
926 lck_mtx_unlock(rnh_lock);
927}
928
929void
930rtalloc_scoped_ign(struct route *ro, uint32_t ignore, unsigned int ifscope)
931{
5ba3f43e 932 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
b0d623f7 933 lck_mtx_lock(rnh_lock);
6d2010ae 934 rtalloc_ign_common_locked(ro, ignore, ifscope);
b0d623f7 935 lck_mtx_unlock(rnh_lock);
91447636 936}
1c79356b 937
6d2010ae 938static struct rtentry *
b0d623f7 939rtalloc1_locked(struct sockaddr *dst, int report, uint32_t ignflags)
c910b4d9
A
940{
941 return (rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE));
942}
943
944struct rtentry *
b0d623f7 945rtalloc1_scoped_locked(struct sockaddr *dst, int report, uint32_t ignflags,
c910b4d9
A
946 unsigned int ifscope)
947{
948 return (rtalloc1_common_locked(dst, report, ignflags, ifscope));
949}
950
39037602 951struct rtentry *
b0d623f7 952rtalloc1_common_locked(struct sockaddr *dst, int report, uint32_t ignflags,
c910b4d9 953 unsigned int ifscope)
1c79356b 954{
2d21ac55 955 struct radix_node_head *rnh = rt_tables[dst->sa_family];
c910b4d9 956 struct rtentry *rt, *newrt = NULL;
1c79356b 957 struct rt_addrinfo info;
b0d623f7 958 uint32_t nflags;
91447636 959 int err = 0, msgtype = RTM_MISS;
c910b4d9
A
960
961 if (rnh == NULL)
962 goto unreachable;
963
9bccf70c 964 /*
c910b4d9
A
965 * Find the longest prefix or exact (in the scoped case) address match;
966 * callee adds a reference to entry and checks for root node as well
1c79356b 967 */
c910b4d9
A
968 rt = rt_lookup(FALSE, dst, NULL, rnh, ifscope);
969 if (rt == NULL)
970 goto unreachable;
971
b0d623f7 972 RT_LOCK_SPIN(rt);
c910b4d9
A
973 newrt = rt;
974 nflags = rt->rt_flags & ~ignflags;
b0d623f7 975 RT_UNLOCK(rt);
c910b4d9 976 if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
1c79356b 977 /*
c910b4d9
A
978 * We are apparently adding (report = 0 in delete).
979 * If it requires that it be cloned, do so.
980 * (This implies it wasn't a HOST route.)
1c79356b 981 */
c910b4d9
A
982 err = rtrequest_locked(RTM_RESOLVE, dst, NULL, NULL, 0, &newrt);
983 if (err) {
1c79356b 984 /*
c910b4d9
A
985 * If the cloning didn't succeed, maybe what we
986 * have from lookup above will do. Return that;
987 * no need to hold another reference since it's
988 * already done.
1c79356b 989 */
c910b4d9
A
990 newrt = rt;
991 goto miss;
992 }
993
1c79356b 994 /*
c910b4d9
A
995 * We cloned it; drop the original route found during lookup.
996 * The resulted cloned route (newrt) would now have an extra
997 * reference held during rtrequest.
1c79356b 998 */
c910b4d9 999 rtfree_locked(rt);
3e170ce0
A
1000
1001 /*
1002 * If the newly created cloned route is a direct host route
1003 * then also check if it is to a router or not.
1004 * If it is, then set the RTF_ROUTER flag on the host route
1005 * for the gateway.
1006 *
1007 * XXX It is possible for the default route to be created post
1008 * cloned route creation of router's IP.
1009 * We can handle that corner case by special handing for RTM_ADD
1010 * of default route.
1011 */
1012 if ((newrt->rt_flags & (RTF_HOST | RTF_LLINFO)) ==
1013 (RTF_HOST | RTF_LLINFO)) {
1014 struct rtentry *defrt = NULL;
1015 struct sockaddr_storage def_key;
1016
1017 bzero(&def_key, sizeof(def_key));
1018 def_key.ss_len = rt_key(newrt)->sa_len;
1019 def_key.ss_family = rt_key(newrt)->sa_family;
1020
1021 defrt = rtalloc1_scoped_locked((struct sockaddr *)&def_key,
1022 0, 0, newrt->rt_ifp->if_index);
1023
1024 if (defrt) {
1025 if (equal(rt_key(newrt), defrt->rt_gateway)) {
1026 newrt->rt_flags |= RTF_ROUTER;
1027 }
1028 rtfree_locked(defrt);
1029 }
1030 }
1031
c910b4d9 1032 if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
1c79356b 1033 /*
c910b4d9
A
1034 * If the new route specifies it be
1035 * externally resolved, then go do that.
1c79356b 1036 */
c910b4d9
A
1037 msgtype = RTM_RESOLVE;
1038 goto miss;
1c79356b
A
1039 }
1040 }
c910b4d9
A
1041 goto done;
1042
1043unreachable:
1044 /*
1045 * Either we hit the root or couldn't find any match,
1046 * Which basically means "cant get there from here"
1047 */
1048 rtstat.rts_unreach++;
39037602 1049
c910b4d9
A
1050miss:
1051 if (report) {
1052 /*
1053 * If required, report the failure to the supervising
1054 * Authorities.
1055 * For a delete, this is not an error. (report == 0)
1056 */
1057 bzero((caddr_t)&info, sizeof(info));
1058 info.rti_info[RTAX_DST] = dst;
1059 rt_missmsg(msgtype, &info, 0, err);
1060 }
1061done:
1c79356b
A
1062 return (newrt);
1063}
1064
91447636 1065struct rtentry *
b0d623f7 1066rtalloc1(struct sockaddr *dst, int report, uint32_t ignflags)
91447636 1067{
39236c6e 1068 struct rtentry *entry;
5ba3f43e 1069 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
b0d623f7 1070 lck_mtx_lock(rnh_lock);
91447636 1071 entry = rtalloc1_locked(dst, report, ignflags);
b0d623f7
A
1072 lck_mtx_unlock(rnh_lock);
1073 return (entry);
1074}
1075
1076struct rtentry *
1077rtalloc1_scoped(struct sockaddr *dst, int report, uint32_t ignflags,
1078 unsigned int ifscope)
1079{
39236c6e 1080 struct rtentry *entry;
5ba3f43e 1081 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
b0d623f7
A
1082 lck_mtx_lock(rnh_lock);
1083 entry = rtalloc1_scoped_locked(dst, report, ignflags, ifscope);
1084 lck_mtx_unlock(rnh_lock);
91447636
A
1085 return (entry);
1086}
1087
1c79356b
A
1088/*
1089 * Remove a reference count from an rtentry.
1090 * If the count gets low enough, take it out of the routing table
1091 */
1092void
2d21ac55 1093rtfree_locked(struct rtentry *rt)
1c79356b 1094{
b0d623f7
A
1095 rtfree_common(rt, TRUE);
1096}
1c79356b 1097
b0d623f7
A
1098static void
1099rtfree_common(struct rtentry *rt, boolean_t locked)
1100{
1101 struct radix_node_head *rnh;
91447636 1102
5ba3f43e 1103 LCK_MTX_ASSERT(rnh_lock, locked ?
39236c6e
A
1104 LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
1105
b0d623f7
A
1106 /*
1107 * Atomically decrement the reference count and if it reaches 0,
1108 * and there is a close function defined, call the close function.
1109 */
1110 RT_LOCK_SPIN(rt);
1111 if (rtunref(rt) > 0) {
1112 RT_UNLOCK(rt);
91447636
A
1113 return;
1114 }
1c79356b
A
1115
1116 /*
b0d623f7
A
1117 * To avoid violating lock ordering, we must drop rt_lock before
1118 * trying to acquire the global rnh_lock. If we are called with
1119 * rnh_lock held, then we already have exclusive access; otherwise
1120 * we do the lock dance.
1c79356b 1121 */
b0d623f7
A
1122 if (!locked) {
1123 /*
39236c6e
A
1124 * Note that we check it again below after grabbing rnh_lock,
1125 * since it is possible that another thread doing a lookup wins
1126 * the race, grabs the rnh_lock first, and bumps up reference
1127 * count in which case the route should be left alone as it is
1128 * still in use. It's also possible that another thread frees
1129 * the route after we drop rt_lock; to prevent the route from
1130 * being freed, we hold an extra reference.
1131 */
b0d623f7
A
1132 RT_ADDREF_LOCKED(rt);
1133 RT_UNLOCK(rt);
1134 lck_mtx_lock(rnh_lock);
1135 RT_LOCK_SPIN(rt);
39236c6e 1136 if (rtunref(rt) > 0) {
b0d623f7
A
1137 /* We've lost the race, so abort */
1138 RT_UNLOCK(rt);
1139 goto done;
1140 }
1141 }
1142
1143 /*
1144 * We may be blocked on other lock(s) as part of freeing
1145 * the entry below, so convert from spin to full mutex.
1146 */
1147 RT_CONVERT_LOCK(rt);
1148
5ba3f43e 1149 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
1150
1151 /* Negative refcnt must never happen */
39236c6e 1152 if (rt->rt_refcnt != 0) {
b0d623f7 1153 panic("rt %p invalid refcnt %d", rt, rt->rt_refcnt);
39236c6e
A
1154 /* NOTREACHED */
1155 }
1156 /* Idle refcnt must have been dropped during rtunref() */
1157 VERIFY(!(rt->rt_flags & RTF_IFREF));
b0d623f7
A
1158
1159 /*
1160 * find the tree for that address family
1161 * Note: in the case of igmp packets, there might not be an rnh
1162 */
1163 rnh = rt_tables[rt_key(rt)->sa_family];
2d21ac55 1164
2d21ac55
A
1165 /*
1166 * On last reference give the "close method" a chance to cleanup
1167 * private state. This also permits (for IPv4 and IPv6) a chance
1168 * to decide if the routing table entry should be purged immediately
1169 * or at a later time. When an immediate purge is to happen the
1170 * close routine typically issues RTM_DELETE which clears the RTF_UP
1171 * flag on the entry so that the code below reclaims the storage.
1172 */
b0d623f7 1173 if (rnh != NULL && rnh->rnh_close != NULL)
1c79356b 1174 rnh->rnh_close((struct radix_node *)rt, rnh);
1c79356b
A
1175
1176 /*
b0d623f7
A
1177 * If we are no longer "up" (and ref == 0) then we can free the
1178 * resources associated with the route.
1c79356b 1179 */
2d21ac55 1180 if (!(rt->rt_flags & RTF_UP)) {
6d2010ae
A
1181 struct rtentry *rt_parent;
1182 struct ifaddr *rt_ifa;
1183
5ba3f43e 1184 rt->rt_flags |= RTF_DEAD;
39236c6e 1185 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) {
b0d623f7 1186 panic("rt %p freed while in radix tree\n", rt);
39236c6e
A
1187 /* NOTREACHED */
1188 }
9bccf70c 1189 /*
1c79356b 1190 * the rtentry must have been removed from the routing table
b0d623f7 1191 * so it is represented in rttrash; remove that now.
1c79356b 1192 */
b0d623f7 1193 (void) OSDecrementAtomic(&rttrash);
2d21ac55
A
1194 if (rte_debug & RTD_DEBUG) {
1195 TAILQ_REMOVE(&rttrash_head, (struct rtentry_dbg *)rt,
1196 rtd_trash_link);
1197 }
1c79356b 1198
9bccf70c 1199 /*
1c79356b
A
1200 * release references on items we hold them on..
1201 * e.g other routes and ifaddrs.
1202 */
6d2010ae 1203 if ((rt_parent = rt->rt_parent) != NULL)
b0d623f7 1204 rt->rt_parent = NULL;
9bccf70c 1205
6d2010ae 1206 if ((rt_ifa = rt->rt_ifa) != NULL)
91447636 1207 rt->rt_ifa = NULL;
1c79356b
A
1208
1209 /*
b0d623f7
A
1210 * Now free any attached link-layer info.
1211 */
1212 if (rt->rt_llinfo != NULL) {
1213 if (rt->rt_llinfo_free != NULL)
1214 (*rt->rt_llinfo_free)(rt->rt_llinfo);
1215 else
1216 R_Free(rt->rt_llinfo);
1217 rt->rt_llinfo = NULL;
1218 }
1219
5ba3f43e
A
1220 /* Destroy eventhandler lists context */
1221 eventhandler_lists_ctxt_destroy(&rt->rt_evhdlr_ctxt);
1222
6d2010ae
A
1223 /*
1224 * Route is no longer in the tree and refcnt is 0;
1225 * we have exclusive access, so destroy it.
1226 */
1227 RT_UNLOCK(rt);
5ba3f43e 1228 rte_lock_destroy(rt);
6d2010ae
A
1229
1230 if (rt_parent != NULL)
1231 rtfree_locked(rt_parent);
1232
1233 if (rt_ifa != NULL)
1234 IFA_REMREF(rt_ifa);
1235
b0d623f7
A
1236 /*
1237 * The key is separately alloc'd so free it (see rt_setgate()).
1c79356b
A
1238 * This also frees the gateway, as they are always malloc'd
1239 * together.
1240 */
91447636 1241 R_Free(rt_key(rt));
1c79356b 1242
6d2010ae
A
1243 /*
1244 * Free any statistics that may have been allocated
1245 */
1246 nstat_route_detach(rt);
1247
1c79356b
A
1248 /*
1249 * and the rtentry itself of course
1250 */
6601e61a 1251 rte_free(rt);
b0d623f7
A
1252 } else {
1253 /*
1254 * The "close method" has been called, but the route is
1255 * still in the radix tree with zero refcnt, i.e. "up"
1256 * and in the cached state.
1257 */
1258 RT_UNLOCK(rt);
1c79356b 1259 }
b0d623f7
A
1260done:
1261 if (!locked)
1262 lck_mtx_unlock(rnh_lock);
1c79356b
A
1263}
1264
91447636 1265void
2d21ac55 1266rtfree(struct rtentry *rt)
91447636 1267{
b0d623f7 1268 rtfree_common(rt, FALSE);
91447636
A
1269}
1270
9bccf70c
A
1271/*
1272 * Decrements the refcount but does not free the route when
1273 * the refcount reaches zero. Unless you have really good reason,
1274 * use rtfree not rtunref.
1275 */
b0d623f7 1276int
2d21ac55 1277rtunref(struct rtentry *p)
9bccf70c 1278{
b0d623f7 1279 RT_LOCK_ASSERT_HELD(p);
91447636 1280
39236c6e 1281 if (p->rt_refcnt == 0) {
b0d623f7 1282 panic("%s(%p) bad refcnt\n", __func__, p);
39236c6e
A
1283 /* NOTREACHED */
1284 } else if (--p->rt_refcnt == 0) {
1285 /*
1286 * Release any idle reference count held on the interface;
1287 * if the route is eligible, still UP and the refcnt becomes
1288 * non-zero at some point in future before it is purged from
1289 * the routing table, rt_set_idleref() will undo this.
1290 */
1291 rt_clear_idleref(p);
1292 }
2d21ac55
A
1293
1294 if (rte_debug & RTD_DEBUG)
1295 rtunref_audit((struct rtentry_dbg *)p);
1296
b0d623f7
A
1297 /* Return new value */
1298 return (p->rt_refcnt);
2d21ac55
A
1299}
1300
1301static inline void
1302rtunref_audit(struct rtentry_dbg *rte)
1303{
b0d623f7
A
1304 uint16_t idx;
1305
39236c6e 1306 if (rte->rtd_inuse != RTD_INUSE) {
2d21ac55 1307 panic("rtunref: on freed rte=%p\n", rte);
39236c6e
A
1308 /* NOTREACHED */
1309 }
b0d623f7
A
1310 idx = atomic_add_16_ov(&rte->rtd_refrele_cnt, 1) % CTRACE_HIST_SIZE;
1311 if (rte_debug & RTD_TRACE)
1312 ctrace_record(&rte->rtd_refrele[idx]);
9bccf70c
A
1313}
1314
1315/*
1316 * Add a reference count from an rtentry.
1317 */
1318void
2d21ac55 1319rtref(struct rtentry *p)
9bccf70c 1320{
b0d623f7 1321 RT_LOCK_ASSERT_HELD(p);
91447636 1322
5ba3f43e 1323 VERIFY((p->rt_flags & RTF_DEAD) == 0);
39236c6e 1324 if (++p->rt_refcnt == 0) {
b0d623f7 1325 panic("%s(%p) bad refcnt\n", __func__, p);
39236c6e
A
1326 /* NOTREACHED */
1327 } else if (p->rt_refcnt == 1) {
1328 /*
1329 * Hold an idle reference count on the interface,
1330 * if the route is eligible for it.
1331 */
1332 rt_set_idleref(p);
1333 }
2d21ac55
A
1334
1335 if (rte_debug & RTD_DEBUG)
1336 rtref_audit((struct rtentry_dbg *)p);
2d21ac55
A
1337}
1338
1339static inline void
1340rtref_audit(struct rtentry_dbg *rte)
1341{
b0d623f7
A
1342 uint16_t idx;
1343
39236c6e 1344 if (rte->rtd_inuse != RTD_INUSE) {
2d21ac55 1345 panic("rtref_audit: on freed rte=%p\n", rte);
39236c6e
A
1346 /* NOTREACHED */
1347 }
b0d623f7
A
1348 idx = atomic_add_16_ov(&rte->rtd_refhold_cnt, 1) % CTRACE_HIST_SIZE;
1349 if (rte_debug & RTD_TRACE)
1350 ctrace_record(&rte->rtd_refhold[idx]);
9bccf70c
A
1351}
1352
1353void
39236c6e 1354rtsetifa(struct rtentry *rt, struct ifaddr *ifa)
9bccf70c 1355{
5ba3f43e 1356 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
1357
1358 RT_LOCK_ASSERT_HELD(rt);
9bccf70c
A
1359
1360 if (rt->rt_ifa == ifa)
1361 return;
1362
6d2010ae
A
1363 /* Become a regular mutex, just in case */
1364 RT_CONVERT_LOCK(rt);
1365
91447636
A
1366 /* Release the old ifa */
1367 if (rt->rt_ifa)
6d2010ae 1368 IFA_REMREF(rt->rt_ifa);
9bccf70c
A
1369
1370 /* Set rt_ifa */
1371 rt->rt_ifa = ifa;
1372
91447636
A
1373 /* Take a reference to the ifa */
1374 if (rt->rt_ifa)
6d2010ae 1375 IFA_ADDREF(rt->rt_ifa);
9bccf70c
A
1376}
1377
1c79356b
A
1378/*
1379 * Force a routing table entry to the specified
1380 * destination to go through the given gateway.
1381 * Normally called as a result of a routing redirect
1382 * message from the network layer.
1c79356b
A
1383 */
1384void
c910b4d9 1385rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway,
39236c6e
A
1386 struct sockaddr *netmask, int flags, struct sockaddr *src,
1387 struct rtentry **rtp)
1c79356b 1388{
c910b4d9 1389 struct rtentry *rt = NULL;
1c79356b
A
1390 int error = 0;
1391 short *stat = 0;
1392 struct rt_addrinfo info;
91447636 1393 struct ifaddr *ifa = NULL;
c910b4d9 1394 unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
6d2010ae 1395 struct sockaddr_storage ss;
39236c6e 1396 int af = src->sa_family;
91447636 1397
5ba3f43e 1398 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
b0d623f7 1399 lck_mtx_lock(rnh_lock);
1c79356b 1400
6d2010ae
A
1401 /*
1402 * Transform src into the internal routing table form for
1403 * comparison against rt_gateway below.
1404 */
1405#if INET6
0a7de745 1406 if ((af == AF_INET) || (af == AF_INET6)) {
6d2010ae 1407#else
0a7de745 1408 if (af == AF_INET) {
6d2010ae
A
1409#endif /* !INET6 */
1410 src = sa_copy(src, &ss, &ifscope);
0a7de745 1411 }
6d2010ae 1412
c910b4d9
A
1413 /*
1414 * Verify the gateway is directly reachable; if scoped routing
1415 * is enabled, verify that it is reachable from the interface
1416 * where the ICMP redirect arrived on.
1417 */
1418 if ((ifa = ifa_ifwithnet_scoped(gateway, ifscope)) == NULL) {
1c79356b
A
1419 error = ENETUNREACH;
1420 goto out;
1421 }
91447636 1422
c910b4d9
A
1423 /* Lookup route to the destination (from the original IP header) */
1424 rt = rtalloc1_scoped_locked(dst, 0, RTF_CLONING|RTF_PRCLONING, ifscope);
b0d623f7
A
1425 if (rt != NULL)
1426 RT_LOCK(rt);
c910b4d9 1427
1c79356b
A
1428 /*
1429 * If the redirect isn't from our current router for this dst,
1430 * it's either old or wrong. If it redirects us to ourselves,
1431 * we have a routing loop, perhaps as a result of an interface
6d2010ae
A
1432 * going down recently. Holding rnh_lock here prevents the
1433 * possibility of rt_ifa/ifa's ifa_addr from changing (e.g.
1434 * in_ifinit), so okay to access ifa_addr without locking.
1c79356b 1435 */
b0d623f7 1436 if (!(flags & RTF_DONE) && rt != NULL &&
39236c6e
A
1437 (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr,
1438 ifa->ifa_addr))) {
1c79356b 1439 error = EINVAL;
2d21ac55 1440 } else {
6d2010ae 1441 IFA_REMREF(ifa);
91447636 1442 if ((ifa = ifa_ifwithaddr(gateway))) {
6d2010ae 1443 IFA_REMREF(ifa);
91447636
A
1444 ifa = NULL;
1445 error = EHOSTUNREACH;
1446 }
1447 }
b0d623f7 1448
91447636 1449 if (ifa) {
6d2010ae 1450 IFA_REMREF(ifa);
91447636
A
1451 ifa = NULL;
1452 }
b0d623f7
A
1453
1454 if (error) {
1455 if (rt != NULL)
1456 RT_UNLOCK(rt);
1c79356b 1457 goto done;
b0d623f7
A
1458 }
1459
1c79356b
A
1460 /*
1461 * Create a new entry if we just got back a wildcard entry
1462 * or the the lookup failed. This is necessary for hosts
1463 * which use routing redirects generated by smart gateways
1464 * to dynamically build the routing tables.
1465 */
b0d623f7 1466 if ((rt == NULL) || (rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2))
1c79356b
A
1467 goto create;
1468 /*
1469 * Don't listen to the redirect if it's
1470 * for a route to an interface.
1471 */
b0d623f7 1472 RT_LOCK_ASSERT_HELD(rt);
1c79356b
A
1473 if (rt->rt_flags & RTF_GATEWAY) {
1474 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
1475 /*
1476 * Changing from route to net => route to host.
c910b4d9
A
1477 * Create new route, rather than smashing route
1478 * to net; similar to cloned routes, the newly
1479 * created host route is scoped as well.
1c79356b 1480 */
b0d623f7
A
1481create:
1482 if (rt != NULL)
1483 RT_UNLOCK(rt);
1c79356b 1484 flags |= RTF_GATEWAY | RTF_DYNAMIC;
c910b4d9
A
1485 error = rtrequest_scoped_locked(RTM_ADD, dst,
1486 gateway, netmask, flags, NULL, ifscope);
1c79356b
A
1487 stat = &rtstat.rts_dynamic;
1488 } else {
1489 /*
1490 * Smash the current notion of the gateway to
1491 * this destination. Should check about netmask!!!
1492 */
1493 rt->rt_flags |= RTF_MODIFIED;
1494 flags |= RTF_MODIFIED;
1495 stat = &rtstat.rts_newgateway;
1496 /*
1497 * add the key and gateway (in one malloc'd chunk).
1498 */
c910b4d9 1499 error = rt_setgate(rt, rt_key(rt), gateway);
b0d623f7 1500 RT_UNLOCK(rt);
1c79356b 1501 }
c910b4d9 1502 } else {
b0d623f7 1503 RT_UNLOCK(rt);
1c79356b 1504 error = EHOSTUNREACH;
c910b4d9 1505 }
1c79356b 1506done:
b0d623f7
A
1507 if (rt != NULL) {
1508 RT_LOCK_ASSERT_NOTHELD(rt);
a39ff7e2
A
1509 if (!error) {
1510 /* Enqueue event to refresh flow route entries */
1511 route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, FALSE);
1512 if (rtp)
1513 *rtp = rt;
1514 else
1515 rtfree_locked(rt);
1516 }
1c79356b 1517 else
91447636 1518 rtfree_locked(rt);
1c79356b
A
1519 }
1520out:
c910b4d9 1521 if (error) {
1c79356b 1522 rtstat.rts_badredirect++;
c910b4d9
A
1523 } else {
1524 if (stat != NULL)
1525 (*stat)++;
39236c6e
A
1526
1527 if (af == AF_INET)
1528 routegenid_inet_update();
1529#if INET6
1530 else if (af == AF_INET6)
1531 routegenid_inet6_update();
1532#endif /* INET6 */
c910b4d9 1533 }
b0d623f7 1534 lck_mtx_unlock(rnh_lock);
1c79356b
A
1535 bzero((caddr_t)&info, sizeof(info));
1536 info.rti_info[RTAX_DST] = dst;
1537 info.rti_info[RTAX_GATEWAY] = gateway;
1538 info.rti_info[RTAX_NETMASK] = netmask;
1539 info.rti_info[RTAX_AUTHOR] = src;
1540 rt_missmsg(RTM_REDIRECT, &info, flags, error);
1541}
1542
1543/*
1544* Routing table ioctl interface.
1545*/
1546int
b0d623f7 1547rtioctl(unsigned long req, caddr_t data, struct proc *p)
1c79356b 1548{
fe8ab488 1549#pragma unused(p, req, data)
39236c6e 1550 return (ENXIO);
1c79356b
A
1551}
1552
1553struct ifaddr *
91447636
A
1554ifa_ifwithroute(
1555 int flags,
1556 const struct sockaddr *dst,
1557 const struct sockaddr *gateway)
1c79356b 1558{
2d21ac55
A
1559 struct ifaddr *ifa;
1560
b0d623f7 1561 lck_mtx_lock(rnh_lock);
2d21ac55 1562 ifa = ifa_ifwithroute_locked(flags, dst, gateway);
b0d623f7 1563 lck_mtx_unlock(rnh_lock);
2d21ac55
A
1564
1565 return (ifa);
1566}
1567
1568struct ifaddr *
c910b4d9
A
1569ifa_ifwithroute_locked(int flags, const struct sockaddr *dst,
1570 const struct sockaddr *gateway)
1571{
1572 return (ifa_ifwithroute_common_locked((flags & ~RTF_IFSCOPE), dst,
1573 gateway, IFSCOPE_NONE));
1574}
1575
1576struct ifaddr *
1577ifa_ifwithroute_scoped_locked(int flags, const struct sockaddr *dst,
1578 const struct sockaddr *gateway, unsigned int ifscope)
1579{
1580 if (ifscope != IFSCOPE_NONE)
1581 flags |= RTF_IFSCOPE;
1582 else
1583 flags &= ~RTF_IFSCOPE;
1584
1585 return (ifa_ifwithroute_common_locked(flags, dst, gateway, ifscope));
1586}
1587
1588static struct ifaddr *
1589ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
6d2010ae 1590 const struct sockaddr *gw, unsigned int ifscope)
2d21ac55
A
1591{
1592 struct ifaddr *ifa = NULL;
1593 struct rtentry *rt = NULL;
6d2010ae 1594 struct sockaddr_storage dst_ss, gw_ss;
91447636 1595
5ba3f43e 1596 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
91447636 1597
6d2010ae
A
1598 /*
1599 * Just in case the sockaddr passed in by the caller
1600 * contains a scope ID, make sure to clear it since
1601 * interface addresses aren't scoped.
1602 */
1603#if INET6
1604 if (dst != NULL &&
39037602
A
1605 ((dst->sa_family == AF_INET) ||
1606 (dst->sa_family == AF_INET6)))
6d2010ae 1607#else
39037602 1608 if (dst != NULL && dst->sa_family == AF_INET)
6d2010ae 1609#endif /* !INET6 */
316670eb 1610 dst = sa_copy(SA((uintptr_t)dst), &dst_ss, NULL);
6d2010ae
A
1611
1612#if INET6
1613 if (gw != NULL &&
39037602
A
1614 ((gw->sa_family == AF_INET) ||
1615 (gw->sa_family == AF_INET6)))
6d2010ae 1616#else
39037602 1617 if (gw != NULL && gw->sa_family == AF_INET)
6d2010ae 1618#endif /* !INET6 */
316670eb 1619 gw = sa_copy(SA((uintptr_t)gw), &gw_ss, NULL);
c910b4d9 1620
2d21ac55 1621 if (!(flags & RTF_GATEWAY)) {
1c79356b
A
1622 /*
1623 * If we are adding a route to an interface,
1624 * and the interface is a pt to pt link
1625 * we should search for the destination
1626 * as our clue to the interface. Otherwise
1627 * we can use the local address.
1628 */
1c79356b
A
1629 if (flags & RTF_HOST) {
1630 ifa = ifa_ifwithdstaddr(dst);
1631 }
2d21ac55 1632 if (ifa == NULL)
6d2010ae 1633 ifa = ifa_ifwithaddr_scoped(gw, ifscope);
1c79356b
A
1634 } else {
1635 /*
1636 * If we are adding a route to a remote net
1637 * or host, the gateway may still be on the
1638 * other end of a pt to pt link.
1639 */
6d2010ae 1640 ifa = ifa_ifwithdstaddr(gw);
1c79356b 1641 }
2d21ac55 1642 if (ifa == NULL)
6d2010ae 1643 ifa = ifa_ifwithnet_scoped(gw, ifscope);
2d21ac55
A
1644 if (ifa == NULL) {
1645 /* Workaround to avoid gcc warning regarding const variable */
c910b4d9 1646 rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst,
b0d623f7 1647 0, 0, ifscope);
2d21ac55 1648 if (rt != NULL) {
b0d623f7 1649 RT_LOCK_SPIN(rt);
2d21ac55 1650 ifa = rt->rt_ifa;
6d2010ae
A
1651 if (ifa != NULL) {
1652 /* Become a regular mutex */
1653 RT_CONVERT_LOCK(rt);
1654 IFA_ADDREF(ifa);
1655 }
b0d623f7
A
1656 RT_REMREF_LOCKED(rt);
1657 RT_UNLOCK(rt);
2d21ac55
A
1658 rt = NULL;
1659 }
1c79356b 1660 }
6d2010ae
A
1661 /*
1662 * Holding rnh_lock here prevents the possibility of ifa from
1663 * changing (e.g. in_ifinit), so it is safe to access its
1664 * ifa_addr (here and down below) without locking.
1665 */
2d21ac55 1666 if (ifa != NULL && ifa->ifa_addr->sa_family != dst->sa_family) {
91447636 1667 struct ifaddr *newifa;
2d21ac55 1668 /* Callee adds reference to newifa upon success */
91447636 1669 newifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
2d21ac55 1670 if (newifa != NULL) {
6d2010ae 1671 IFA_REMREF(ifa);
91447636
A
1672 ifa = newifa;
1673 }
1c79356b 1674 }
2d21ac55
A
1675 /*
1676 * If we are adding a gateway, it is quite possible that the
1677 * routing table has a static entry in place for the gateway,
1678 * that may not agree with info garnered from the interfaces.
1679 * The routing table should carry more precedence than the
1680 * interfaces in this matter. Must be careful not to stomp
6d2010ae 1681 * on new entries from rtinit, hence (ifa->ifa_addr != gw).
2d21ac55
A
1682 */
1683 if ((ifa == NULL ||
6d2010ae
A
1684 !equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gw)) &&
1685 (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gw,
b0d623f7 1686 0, 0, ifscope)) != NULL) {
2d21ac55 1687 if (ifa != NULL)
6d2010ae 1688 IFA_REMREF(ifa);
b0d623f7 1689 RT_LOCK_SPIN(rt);
2d21ac55 1690 ifa = rt->rt_ifa;
6d2010ae
A
1691 if (ifa != NULL) {
1692 /* Become a regular mutex */
1693 RT_CONVERT_LOCK(rt);
1694 IFA_ADDREF(ifa);
1695 }
b0d623f7
A
1696 RT_REMREF_LOCKED(rt);
1697 RT_UNLOCK(rt);
2d21ac55 1698 }
c910b4d9
A
1699 /*
1700 * If an interface scope was specified, the interface index of
1701 * the found ifaddr must be equivalent to that of the scope;
1702 * otherwise there is no match.
1703 */
1704 if ((flags & RTF_IFSCOPE) &&
1705 ifa != NULL && ifa->ifa_ifp->if_index != ifscope) {
6d2010ae 1706 IFA_REMREF(ifa);
c910b4d9
A
1707 ifa = NULL;
1708 }
1709
d9a64523
A
1710 /*
1711 * ifa's address family must match destination's address family
1712 * after all is said and done.
1713 */
1714 if (ifa != NULL &&
1715 ifa->ifa_addr->sa_family != dst->sa_family) {
1716 IFA_REMREF(ifa);
1717 ifa = NULL;
1718 }
1719
1c79356b
A
1720 return (ifa);
1721}
1722
b0d623f7
A
1723static int rt_fixdelete(struct radix_node *, void *);
1724static int rt_fixchange(struct radix_node *, void *);
1c79356b
A
1725
1726struct rtfc_arg {
1727 struct rtentry *rt0;
1728 struct radix_node_head *rnh;
1729};
1730
c910b4d9
A
1731int
1732rtrequest_locked(int req, struct sockaddr *dst, struct sockaddr *gateway,
1733 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1734{
1735 return (rtrequest_common_locked(req, dst, gateway, netmask,
1736 (flags & ~RTF_IFSCOPE), ret_nrt, IFSCOPE_NONE));
1737}
1738
1739int
1740rtrequest_scoped_locked(int req, struct sockaddr *dst,
1741 struct sockaddr *gateway, struct sockaddr *netmask, int flags,
1742 struct rtentry **ret_nrt, unsigned int ifscope)
1743{
1744 if (ifscope != IFSCOPE_NONE)
1745 flags |= RTF_IFSCOPE;
1746 else
1747 flags &= ~RTF_IFSCOPE;
1748
1749 return (rtrequest_common_locked(req, dst, gateway, netmask,
1750 flags, ret_nrt, ifscope));
1751}
1752
1c79356b 1753/*
c910b4d9
A
1754 * Do appropriate manipulations of a routing tree given all the bits of
1755 * info needed.
1756 *
6d2010ae 1757 * Storing the scope ID in the radix key is an internal job that should be
c910b4d9
A
1758 * left to routines in this module. Callers should specify the scope value
1759 * to the "scoped" variants of route routines instead of manipulating the
1760 * key itself. This is typically done when creating a scoped route, e.g.
1761 * rtrequest(RTM_ADD). Once such a route is created and marked with the
1762 * RTF_IFSCOPE flag, callers can simply use its rt_key(rt) to clone it
1763 * (RTM_RESOLVE) or to remove it (RTM_DELETE). An exception to this is
1764 * during certain routing socket operations where the search key might be
1765 * derived from the routing message itself, in which case the caller must
1766 * specify the destination address and scope value for RTM_ADD/RTM_DELETE.
1c79356b 1767 */
c910b4d9
A
1768static int
1769rtrequest_common_locked(int req, struct sockaddr *dst0,
1770 struct sockaddr *gateway, struct sockaddr *netmask, int flags,
1771 struct rtentry **ret_nrt, unsigned int ifscope)
1c79356b 1772{
91447636 1773 int error = 0;
2d21ac55
A
1774 struct rtentry *rt;
1775 struct radix_node *rn;
1776 struct radix_node_head *rnh;
91447636 1777 struct ifaddr *ifa = NULL;
c910b4d9 1778 struct sockaddr *ndst, *dst = dst0;
6d2010ae 1779 struct sockaddr_storage ss, mask;
39236c6e 1780 struct timeval caltime;
6d2010ae
A
1781 int af = dst->sa_family;
1782 void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
1783
39236c6e 1784#define senderr(x) { error = x; goto bad; }
1c79356b 1785
cb323159
A
1786 DTRACE_ROUTE6(rtrequest, int, req, struct sockaddr *, dst0,
1787 struct sockaddr *, gateway, struct sockaddr *, netmask,
1788 int, flags, unsigned int, ifscope);
1789
5ba3f43e 1790 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1c79356b
A
1791 /*
1792 * Find the correct routing tree to use for this Address Family
1793 */
6d2010ae 1794 if ((rnh = rt_tables[af]) == NULL)
1c79356b
A
1795 senderr(ESRCH);
1796 /*
1797 * If we are adding a host route then we don't want to put
1798 * a netmask in the tree
1799 */
1800 if (flags & RTF_HOST)
6d2010ae 1801 netmask = NULL;
c910b4d9
A
1802
1803 /*
6d2010ae
A
1804 * If Scoped Routing is enabled, use a local copy of the destination
1805 * address to store the scope ID into. This logic is repeated below
c910b4d9 1806 * in the RTM_RESOLVE handler since the caller does not normally
6d2010ae
A
1807 * specify such a flag during a resolve, as well as for the handling
1808 * of IPv4 link-local address; instead, it passes in the route used for
1809 * cloning for which the scope info is derived from. Note also that
1810 * in the case of RTM_DELETE, the address passed in by the caller
1811 * might already contain the scope ID info when it is the key itself,
1812 * thus making RTF_IFSCOPE unnecessary; one instance where it is
1813 * explicitly set is inside route_output() as part of handling a
1814 * routing socket request.
c910b4d9 1815 */
6d2010ae 1816#if INET6
39037602 1817 if (req != RTM_RESOLVE && ((af == AF_INET) || (af == AF_INET6))) {
6d2010ae 1818#else
39037602 1819 if (req != RTM_RESOLVE && af == AF_INET) {
6d2010ae
A
1820#endif /* !INET6 */
1821 /* Transform dst into the internal routing table form */
1822 dst = sa_copy(dst, &ss, &ifscope);
c910b4d9 1823
6d2010ae
A
1824 /* Transform netmask into the internal routing table form */
1825 if (netmask != NULL)
1826 netmask = ma_copy(af, netmask, &mask, ifscope);
c910b4d9 1827
6d2010ae
A
1828 if (ifscope != IFSCOPE_NONE)
1829 flags |= RTF_IFSCOPE;
39037602
A
1830 } else if ((flags & RTF_IFSCOPE) &&
1831 (af != AF_INET && af != AF_INET6)) {
1832 senderr(EINVAL);
c910b4d9
A
1833 }
1834
6d2010ae
A
1835 if (ifscope == IFSCOPE_NONE)
1836 flags &= ~RTF_IFSCOPE;
1837
1c79356b 1838 switch (req) {
6d2010ae
A
1839 case RTM_DELETE: {
1840 struct rtentry *gwrt = NULL;
5ba3f43e
A
1841 boolean_t was_router = FALSE;
1842 uint32_t old_rt_refcnt = 0;
1c79356b
A
1843 /*
1844 * Remove the item from the tree and return it.
1845 * Complain if it is not there and do no more processing.
1846 */
6d2010ae 1847 if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL)
1c79356b 1848 senderr(ESRCH);
39236c6e
A
1849 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) {
1850 panic("rtrequest delete");
1851 /* NOTREACHED */
1852 }
1c79356b
A
1853 rt = (struct rtentry *)rn;
1854
39236c6e 1855 RT_LOCK(rt);
5ba3f43e 1856 old_rt_refcnt = rt->rt_refcnt;
39236c6e
A
1857 rt->rt_flags &= ~RTF_UP;
1858 /*
1859 * Release any idle reference count held on the interface
1860 * as this route is no longer externally visible.
1861 */
1862 rt_clear_idleref(rt);
2d21ac55
A
1863 /*
1864 * Take an extra reference to handle the deletion of a route
1865 * entry whose reference count is already 0; e.g. an expiring
1866 * cloned route entry or an entry that was added to the table
1867 * with 0 reference. If the caller is interested in this route,
1868 * we will return it with the reference intact. Otherwise we
1869 * will decrement the reference via rtfree_locked() and then
1870 * possibly deallocate it.
1871 */
b0d623f7 1872 RT_ADDREF_LOCKED(rt);
2d21ac55 1873
b0d623f7
A
1874 /*
1875 * For consistency, in case the caller didn't set the flag.
1876 */
1877 rt->rt_flags |= RTF_CONDEMNED;
1878
316670eb
A
1879 /*
1880 * Clear RTF_ROUTER if it's set.
1881 */
1882 if (rt->rt_flags & RTF_ROUTER) {
5ba3f43e 1883 was_router = TRUE;
316670eb
A
1884 VERIFY(rt->rt_flags & RTF_HOST);
1885 rt->rt_flags &= ~RTF_ROUTER;
1886 }
1887
5ba3f43e
A
1888 /*
1889 * Enqueue work item to invoke callback for this route entry
1890 *
1891 * If the old count is 0, it implies that last reference is being
1892 * removed and there's no one listening for this route event.
1893 */
1894 if (old_rt_refcnt != 0)
1895 route_event_enqueue_nwk_wq_entry(rt, NULL,
1896 ROUTE_ENTRY_DELETED, NULL, TRUE);
1897
1c79356b
A
1898 /*
1899 * Now search what's left of the subtree for any cloned
1900 * routes which might have been formed from this node.
1901 */
9bccf70c
A
1902 if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
1903 rt_mask(rt)) {
b0d623f7 1904 RT_UNLOCK(rt);
9bccf70c 1905 rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
39236c6e 1906 rt_fixdelete, rt);
b0d623f7 1907 RT_LOCK(rt);
1c79356b
A
1908 }
1909
5ba3f43e
A
1910 if (was_router) {
1911 struct route_event rt_ev;
1912 route_event_init(&rt_ev, rt, NULL, ROUTE_LLENTRY_DELETED);
1913 RT_UNLOCK(rt);
1914 (void) rnh->rnh_walktree(rnh,
1915 route_event_walktree, (void *)&rt_ev);
1916 RT_LOCK(rt);
1917 }
1918
1c79356b
A
1919 /*
1920 * Remove any external references we may have.
1c79356b 1921 */
6d2010ae 1922 if ((gwrt = rt->rt_gwroute) != NULL)
b0d623f7 1923 rt->rt_gwroute = NULL;
1c79356b 1924
9bccf70c 1925 /*
1c79356b
A
1926 * give the protocol a chance to keep things in sync.
1927 */
6d2010ae
A
1928 if ((ifa = rt->rt_ifa) != NULL) {
1929 IFA_LOCK_SPIN(ifa);
1930 ifa_rtrequest = ifa->ifa_rtrequest;
1931 IFA_UNLOCK(ifa);
1932 if (ifa_rtrequest != NULL)
1933 ifa_rtrequest(RTM_DELETE, rt, NULL);
1934 /* keep reference on rt_ifa */
1935 ifa = NULL;
1936 }
1c79356b
A
1937
1938 /*
1939 * one more rtentry floating around that is not
1940 * linked to the routing table.
1941 */
b0d623f7 1942 (void) OSIncrementAtomic(&rttrash);
2d21ac55
A
1943 if (rte_debug & RTD_DEBUG) {
1944 TAILQ_INSERT_TAIL(&rttrash_head,
1945 (struct rtentry_dbg *)rt, rtd_trash_link);
1946 }
1c79356b 1947
c910b4d9
A
1948 /*
1949 * If this is the (non-scoped) default route, clear
1950 * the interface index used for the primary ifscope.
1951 */
6d2010ae
A
1952 if (rt_primary_default(rt, rt_key(rt))) {
1953 set_primary_ifscope(rt_key(rt)->sa_family,
1954 IFSCOPE_NONE);
cb323159
A
1955 if ((rt->rt_flags & RTF_STATIC) &&
1956 rt_key(rt)->sa_family == PF_INET6) {
1957 trigger_v6_defrtr_select = TRUE;
1958 }
d1ecb069 1959 }
d1ecb069 1960
5ba3f43e
A
1961#if NECP
1962 /*
1963 * If this is a change in a default route, update
1964 * necp client watchers to re-evaluate
1965 */
1966 if (SA_DEFAULT(rt_key(rt))) {
9d749ea3
A
1967 if (rt->rt_ifp != NULL) {
1968 ifnet_touch_lastupdown(rt->rt_ifp);
1969 }
5ba3f43e
A
1970 necp_update_all_clients();
1971 }
1972#endif /* NECP */
1973
b0d623f7
A
1974 RT_UNLOCK(rt);
1975
6d2010ae
A
1976 /*
1977 * This might result in another rtentry being freed if
1978 * we held its last reference. Do this after the rtentry
1979 * lock is dropped above, as it could lead to the same
1980 * lock being acquired if gwrt is a clone of rt.
1981 */
1982 if (gwrt != NULL)
1983 rtfree_locked(gwrt);
1984
1c79356b
A
1985 /*
1986 * If the caller wants it, then it can have it,
1987 * but it's up to it to free the rtentry as we won't be
1988 * doing it.
1989 */
2d21ac55
A
1990 if (ret_nrt != NULL) {
1991 /* Return the route to caller with reference intact */
1c79356b 1992 *ret_nrt = rt;
2d21ac55
A
1993 } else {
1994 /* Dereference or deallocate the route */
91447636 1995 rtfree_locked(rt);
1c79356b 1996 }
39236c6e
A
1997 if (af == AF_INET)
1998 routegenid_inet_update();
1999#if INET6
2000 else if (af == AF_INET6)
2001 routegenid_inet6_update();
2002#endif /* INET6 */
1c79356b 2003 break;
6d2010ae 2004 }
1c79356b 2005 case RTM_RESOLVE:
6d2010ae 2006 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
1c79356b 2007 senderr(EINVAL);
fe8ab488
A
2008 /*
2009 * According to the UNIX conformance tests, we need to return
39037602 2010 * ENETUNREACH when the parent route is RTF_REJECT.
fe8ab488
A
2011 * However, there isn't any point in cloning RTF_REJECT
2012 * routes, so we immediately return an error.
2013 */
2014 if (rt->rt_flags & RTF_REJECT) {
2015 if (rt->rt_flags & RTF_HOST) {
2016 senderr(EHOSTUNREACH);
2017 } else {
2018 senderr(ENETUNREACH);
2019 }
2020 }
b0d623f7
A
2021 /*
2022 * If cloning, we have the parent route given by the caller
2023 * and will use its rt_gateway, rt_rmx as part of the cloning
2024 * process below. Since rnh_lock is held at this point, the
2025 * parent's rt_ifa and rt_gateway will not change, and its
2026 * relevant rt_flags will not change as well. The only thing
2027 * that could change are the metrics, and thus we hold the
2028 * parent route's rt_lock later on during the actual copying
2029 * of rt_rmx.
2030 */
1c79356b 2031 ifa = rt->rt_ifa;
6d2010ae 2032 IFA_ADDREF(ifa);
1c79356b
A
2033 flags = rt->rt_flags &
2034 ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
2035 flags |= RTF_WASCLONED;
2036 gateway = rt->rt_gateway;
6d2010ae 2037 if ((netmask = rt->rt_genmask) == NULL)
1c79356b 2038 flags |= RTF_HOST;
c910b4d9 2039
6d2010ae 2040#if INET6
39037602 2041 if (af != AF_INET && af != AF_INET6)
6d2010ae 2042#else
39037602 2043 if (af != AF_INET)
6d2010ae 2044#endif /* !INET6 */
c910b4d9 2045 goto makeroute;
6d2010ae 2046
c910b4d9
A
2047 /*
2048 * When scoped routing is enabled, cloned entries are
2049 * always scoped according to the interface portion of
2050 * the parent route. The exception to this are IPv4
316670eb
A
2051 * link local addresses, or those routes that are cloned
2052 * from a RTF_PROXY route. For the latter, the clone
2053 * gets to keep the RTF_PROXY flag.
c910b4d9 2054 */
316670eb
A
2055 if ((af == AF_INET &&
2056 IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) ||
2057 (rt->rt_flags & RTF_PROXY)) {
6d2010ae
A
2058 ifscope = IFSCOPE_NONE;
2059 flags &= ~RTF_IFSCOPE;
39236c6e
A
2060 /*
2061 * These types of cloned routes aren't currently
2062 * eligible for idle interface reference counting.
2063 */
2064 flags |= RTF_NOIFREF;
6d2010ae 2065 } else {
c910b4d9 2066 if (flags & RTF_IFSCOPE) {
6d2010ae
A
2067 ifscope = (af == AF_INET) ?
2068 sin_get_ifscope(rt_key(rt)) :
2069 sin6_get_ifscope(rt_key(rt));
c910b4d9
A
2070 } else {
2071 ifscope = rt->rt_ifp->if_index;
2072 flags |= RTF_IFSCOPE;
2073 }
6d2010ae 2074 VERIFY(ifscope != IFSCOPE_NONE);
c910b4d9
A
2075 }
2076
6d2010ae
A
2077 /*
2078 * Transform dst into the internal routing table form,
2079 * clearing out the scope ID field if ifscope isn't set.
2080 */
2081 dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ?
2082 NULL : &ifscope);
c910b4d9 2083
6d2010ae 2084 /* Transform netmask into the internal routing table form */
c910b4d9 2085 if (netmask != NULL)
6d2010ae 2086 netmask = ma_copy(af, netmask, &mask, ifscope);
c910b4d9 2087
1c79356b
A
2088 goto makeroute;
2089
2090 case RTM_ADD:
39236c6e 2091 if ((flags & RTF_GATEWAY) && !gateway) {
c910b4d9 2092 panic("rtrequest: RTF_GATEWAY but no gateway");
39236c6e
A
2093 /* NOTREACHED */
2094 }
c910b4d9
A
2095 if (flags & RTF_IFSCOPE) {
2096 ifa = ifa_ifwithroute_scoped_locked(flags, dst0,
2097 gateway, ifscope);
2098 } else {
2099 ifa = ifa_ifwithroute_locked(flags, dst0, gateway);
2100 }
2101 if (ifa == NULL)
1c79356b 2102 senderr(ENETUNREACH);
c910b4d9 2103makeroute:
5ba3f43e
A
2104 /*
2105 * We land up here for both RTM_RESOLVE and RTM_ADD
2106 * when we decide to create a route.
2107 */
6601e61a 2108 if ((rt = rte_alloc()) == NULL)
1c79356b
A
2109 senderr(ENOBUFS);
2110 Bzero(rt, sizeof(*rt));
b0d623f7 2111 rte_lock_init(rt);
5ba3f43e 2112 eventhandler_lists_ctxt_init(&rt->rt_evhdlr_ctxt);
39236c6e
A
2113 getmicrotime(&caltime);
2114 rt->base_calendartime = caltime.tv_sec;
6d2010ae 2115 rt->base_uptime = net_uptime();
b0d623f7 2116 RT_LOCK(rt);
1c79356b 2117 rt->rt_flags = RTF_UP | flags;
c910b4d9 2118
39236c6e
A
2119 /*
2120 * Point the generation ID to the tree's.
2121 */
2122 switch (af) {
2123 case AF_INET:
2124 rt->rt_tree_genid = &route_genid_inet;
2125 break;
2126#if INET6
2127 case AF_INET6:
2128 rt->rt_tree_genid = &route_genid_inet6;
2129 break;
2130#endif /* INET6 */
2131 default:
2132 break;
2133 }
2134
1c79356b
A
2135 /*
2136 * Add the gateway. Possibly re-malloc-ing the storage for it
2137 * also add the rt_gwroute if possible.
2138 */
9bccf70c 2139 if ((error = rt_setgate(rt, dst, gateway)) != 0) {
316670eb 2140 int tmp = error;
b0d623f7 2141 RT_UNLOCK(rt);
6d2010ae 2142 nstat_route_detach(rt);
b0d623f7 2143 rte_lock_destroy(rt);
6601e61a 2144 rte_free(rt);
316670eb 2145 senderr(tmp);
1c79356b
A
2146 }
2147
2148 /*
2149 * point to the (possibly newly malloc'd) dest address.
2150 */
2151 ndst = rt_key(rt);
2152
2153 /*
2154 * make sure it contains the value we want (masked if needed).
2155 */
c910b4d9 2156 if (netmask)
1c79356b 2157 rt_maskedcopy(dst, ndst, netmask);
c910b4d9 2158 else
1c79356b
A
2159 Bcopy(dst, ndst, dst->sa_len);
2160
2161 /*
2162 * Note that we now have a reference to the ifa.
2163 * This moved from below so that rnh->rnh_addaddr() can
2164 * examine the ifa and ifa->ifa_ifp if it so desires.
2165 */
91447636
A
2166 rtsetifa(rt, ifa);
2167 rt->rt_ifp = rt->rt_ifa->ifa_ifp;
55e303ae 2168
9bccf70c
A
2169 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
2170
1c79356b 2171 rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
39236c6e 2172 rnh, rt->rt_nodes);
1c79356b
A
2173 if (rn == 0) {
2174 struct rtentry *rt2;
2175 /*
2176 * Uh-oh, we already have one of these in the tree.
2177 * We do a special hack: if the route that's already
2178 * there was generated by the protocol-cloning
2179 * mechanism, then we just blow it away and retry
2180 * the insertion of the new one.
2181 */
c910b4d9
A
2182 if (flags & RTF_IFSCOPE) {
2183 rt2 = rtalloc1_scoped_locked(dst0, 0,
2184 RTF_CLONING | RTF_PRCLONING, ifscope);
2185 } else {
2186 rt2 = rtalloc1_locked(dst, 0,
2187 RTF_CLONING | RTF_PRCLONING);
2188 }
1c79356b 2189 if (rt2 && rt2->rt_parent) {
b0d623f7
A
2190 /*
2191 * rnh_lock is held here, so rt_key and
2192 * rt_gateway of rt2 will not change.
2193 */
2194 (void) rtrequest_locked(RTM_DELETE, rt_key(rt2),
2195 rt2->rt_gateway, rt_mask(rt2),
2196 rt2->rt_flags, 0);
91447636 2197 rtfree_locked(rt2);
1c79356b 2198 rn = rnh->rnh_addaddr((caddr_t)ndst,
39236c6e 2199 (caddr_t)netmask, rnh, rt->rt_nodes);
1c79356b
A
2200 } else if (rt2) {
2201 /* undo the extra ref we got */
91447636 2202 rtfree_locked(rt2);
1c79356b
A
2203 }
2204 }
2205
2206 /*
2207 * If it still failed to go into the tree,
2208 * then un-make it (this should be a function)
2209 */
6d2010ae 2210 if (rn == NULL) {
316670eb
A
2211 /* Clear gateway route */
2212 rt_set_gwroute(rt, rt_key(rt), NULL);
1c79356b 2213 if (rt->rt_ifa) {
6d2010ae 2214 IFA_REMREF(rt->rt_ifa);
b0d623f7 2215 rt->rt_ifa = NULL;
1c79356b 2216 }
91447636 2217 R_Free(rt_key(rt));
b0d623f7 2218 RT_UNLOCK(rt);
6d2010ae 2219 nstat_route_detach(rt);
b0d623f7 2220 rte_lock_destroy(rt);
6601e61a 2221 rte_free(rt);
1c79356b
A
2222 senderr(EEXIST);
2223 }
2224
6d2010ae 2225 rt->rt_parent = NULL;
1c79356b 2226
9bccf70c 2227 /*
b0d623f7
A
2228 * If we got here from RESOLVE, then we are cloning so clone
2229 * the rest, and note that we are a clone (and increment the
2230 * parent's references). rnh_lock is still held, which prevents
2231 * a lookup from returning the newly-created route. Hence
2232 * holding and releasing the parent's rt_lock while still
2233 * holding the route's rt_lock is safe since the new route
2234 * is not yet externally visible.
1c79356b
A
2235 */
2236 if (req == RTM_RESOLVE) {
b0d623f7 2237 RT_LOCK_SPIN(*ret_nrt);
316670eb
A
2238 VERIFY((*ret_nrt)->rt_expire == 0 ||
2239 (*ret_nrt)->rt_rmx.rmx_expire != 0);
2240 VERIFY((*ret_nrt)->rt_expire != 0 ||
2241 (*ret_nrt)->rt_rmx.rmx_expire == 0);
6d2010ae
A
2242 rt->rt_rmx = (*ret_nrt)->rt_rmx;
2243 rt_setexpire(rt, (*ret_nrt)->rt_expire);
39236c6e
A
2244 if ((*ret_nrt)->rt_flags &
2245 (RTF_CLONING | RTF_PRCLONING)) {
1c79356b 2246 rt->rt_parent = (*ret_nrt);
b0d623f7 2247 RT_ADDREF_LOCKED(*ret_nrt);
1c79356b 2248 }
b0d623f7 2249 RT_UNLOCK(*ret_nrt);
1c79356b
A
2250 }
2251
2252 /*
2253 * if this protocol has something to add to this then
2254 * allow it to do that as well.
2255 */
6d2010ae
A
2256 IFA_LOCK_SPIN(ifa);
2257 ifa_rtrequest = ifa->ifa_rtrequest;
2258 IFA_UNLOCK(ifa);
2259 if (ifa_rtrequest != NULL)
2260 ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : NULL));
2261 IFA_REMREF(ifa);
2262 ifa = NULL;
1c79356b 2263
c910b4d9
A
2264 /*
2265 * If this is the (non-scoped) default route, record
2266 * the interface index used for the primary ifscope.
2267 */
6d2010ae
A
2268 if (rt_primary_default(rt, rt_key(rt))) {
2269 set_primary_ifscope(rt_key(rt)->sa_family,
2270 rt->rt_ifp->if_index);
2271 }
c910b4d9 2272
5ba3f43e
A
2273#if NECP
2274 /*
2275 * If this is a change in a default route, update
2276 * necp client watchers to re-evaluate
2277 */
2278 if (SA_DEFAULT(rt_key(rt))) {
9d749ea3
A
2279 if (rt->rt_ifp != NULL) {
2280 ifnet_touch_lastupdown(rt->rt_ifp);
2281 }
5ba3f43e
A
2282 necp_update_all_clients();
2283 }
2284#endif /* NECP */
2285
1c79356b
A
2286 /*
2287 * actually return a resultant rtentry and
2288 * give the caller a single reference.
2289 */
2290 if (ret_nrt) {
2291 *ret_nrt = rt;
b0d623f7
A
2292 RT_ADDREF_LOCKED(rt);
2293 }
2294
39236c6e
A
2295 if (af == AF_INET)
2296 routegenid_inet_update();
2297#if INET6
2298 else if (af == AF_INET6)
2299 routegenid_inet6_update();
2300#endif /* INET6 */
2301
2302 RT_GENID_SYNC(rt);
2303
b0d623f7 2304 /*
316670eb
A
2305 * We repeat the same procedures from rt_setgate() here
2306 * because they weren't completed when we called it earlier,
2307 * since the node was embryonic.
b0d623f7 2308 */
316670eb
A
2309 if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL)
2310 rt_set_gwroute(rt, rt_key(rt), rt->rt_gwroute);
2311
7e4a7d39 2312 if (req == RTM_ADD &&
6d2010ae 2313 !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
b0d623f7
A
2314 struct rtfc_arg arg;
2315 arg.rnh = rnh;
2316 arg.rt0 = rt;
2317 RT_UNLOCK(rt);
2318 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
39236c6e 2319 rt_fixchange, &arg);
b0d623f7
A
2320 } else {
2321 RT_UNLOCK(rt);
1c79356b 2322 }
316670eb 2323
6d2010ae 2324 nstat_route_new_entry(rt);
1c79356b
A
2325 break;
2326 }
2327bad:
91447636 2328 if (ifa)
6d2010ae 2329 IFA_REMREF(ifa);
1c79356b
A
2330 return (error);
2331}
316670eb 2332#undef senderr
1c79356b 2333
91447636 2334int
6d2010ae
A
2335rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway,
2336 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
91447636
A
2337{
2338 int error;
5ba3f43e 2339 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
b0d623f7 2340 lck_mtx_lock(rnh_lock);
91447636 2341 error = rtrequest_locked(req, dst, gateway, netmask, flags, ret_nrt);
b0d623f7 2342 lck_mtx_unlock(rnh_lock);
91447636
A
2343 return (error);
2344}
6d2010ae
A
2345
2346int
2347rtrequest_scoped(int req, struct sockaddr *dst, struct sockaddr *gateway,
2348 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt,
2349 unsigned int ifscope)
2350{
2351 int error;
5ba3f43e 2352 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
6d2010ae
A
2353 lck_mtx_lock(rnh_lock);
2354 error = rtrequest_scoped_locked(req, dst, gateway, netmask, flags,
2355 ret_nrt, ifscope);
2356 lck_mtx_unlock(rnh_lock);
2357 return (error);
2358}
2359
1c79356b
A
2360/*
2361 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
2362 * (i.e., the routes related to it by the operation of cloning). This
2363 * routine is iterated over all potential former-child-routes by way of
2364 * rnh->rnh_walktree_from() above, and those that actually are children of
2365 * the late parent (passed in as VP here) are themselves deleted.
2366 */
2367static int
2d21ac55 2368rt_fixdelete(struct radix_node *rn, void *vp)
1c79356b
A
2369{
2370 struct rtentry *rt = (struct rtentry *)rn;
2371 struct rtentry *rt0 = vp;
2372
5ba3f43e 2373 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
91447636 2374
b0d623f7 2375 RT_LOCK(rt);
2d21ac55 2376 if (rt->rt_parent == rt0 &&
39236c6e 2377 !(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) {
b0d623f7
A
2378 /*
2379 * Safe to drop rt_lock and use rt_key, since holding
2380 * rnh_lock here prevents another thread from calling
2381 * rt_setgate() on this route.
2382 */
2383 RT_UNLOCK(rt);
2384 return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
2385 rt_mask(rt), rt->rt_flags, NULL));
1c79356b 2386 }
b0d623f7 2387 RT_UNLOCK(rt);
39236c6e 2388 return (0);
1c79356b
A
2389}
2390
2391/*
2392 * This routine is called from rt_setgate() to do the analogous thing for
2393 * adds and changes. There is the added complication in this case of a
2394 * middle insert; i.e., insertion of a new network route between an older
2395 * network route and (cloned) host routes. For this reason, a simple check
2396 * of rt->rt_parent is insufficient; each candidate route must be tested
2397 * against the (mask, value) of the new route (passed as before in vp)
9bccf70c 2398 * to see if the new route matches it.
1c79356b
A
2399 *
2400 * XXX - it may be possible to do fixdelete() for changes and reserve this
2401 * routine just for adds. I'm not sure why I thought it was necessary to do
2402 * changes this way.
2403 */
1c79356b 2404static int
2d21ac55 2405rt_fixchange(struct radix_node *rn, void *vp)
1c79356b
A
2406{
2407 struct rtentry *rt = (struct rtentry *)rn;
2408 struct rtfc_arg *ap = vp;
2409 struct rtentry *rt0 = ap->rt0;
2410 struct radix_node_head *rnh = ap->rnh;
9bccf70c 2411 u_char *xk1, *xm1, *xk2, *xmp;
7e4a7d39 2412 int i, len;
1c79356b 2413
5ba3f43e 2414 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
2415
2416 RT_LOCK(rt);
91447636 2417
2d21ac55 2418 if (!rt->rt_parent ||
39236c6e 2419 (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) {
b0d623f7 2420 RT_UNLOCK(rt);
c910b4d9 2421 return (0);
b0d623f7 2422 }
1c79356b 2423
c910b4d9
A
2424 if (rt->rt_parent == rt0)
2425 goto delete_rt;
1c79356b
A
2426
2427 /*
2428 * There probably is a function somewhere which does this...
2429 * if not, there should be.
2430 */
c910b4d9 2431 len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
1c79356b
A
2432
2433 xk1 = (u_char *)rt_key(rt0);
2434 xm1 = (u_char *)rt_mask(rt0);
2435 xk2 = (u_char *)rt_key(rt);
2436
7e4a7d39
A
2437 /*
2438 * Avoid applying a less specific route; do this only if the parent
2439 * route (rt->rt_parent) is a network route, since otherwise its mask
2440 * will be NULL if it is a cloning host route.
2441 */
2442 if ((xmp = (u_char *)rt_mask(rt->rt_parent)) != NULL) {
2443 int mlen = rt_mask(rt->rt_parent)->sa_len;
2444 if (mlen > rt_mask(rt0)->sa_len) {
b0d623f7 2445 RT_UNLOCK(rt);
c910b4d9 2446 return (0);
b0d623f7 2447 }
7e4a7d39
A
2448
2449 for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
2450 if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) {
2451 RT_UNLOCK(rt);
2452 return (0);
2453 }
2454 }
9bccf70c
A
2455 }
2456
2457 for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
b0d623f7
A
2458 if ((xk2[i] & xm1[i]) != xk1[i]) {
2459 RT_UNLOCK(rt);
c910b4d9 2460 return (0);
b0d623f7 2461 }
1c79356b
A
2462 }
2463
2464 /*
2465 * OK, this node is a clone, and matches the node currently being
2466 * changed/added under the node's mask. So, get rid of it.
2467 */
c910b4d9 2468delete_rt:
b0d623f7
A
2469 /*
2470 * Safe to drop rt_lock and use rt_key, since holding rnh_lock here
2471 * prevents another thread from calling rt_setgate() on this route.
2472 */
2473 RT_UNLOCK(rt);
c910b4d9
A
2474 return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
2475 rt_mask(rt), rt->rt_flags, NULL));
1c79356b
A
2476}
2477
b0d623f7
A
2478/*
2479 * Round up sockaddr len to multiples of 32-bytes. This will reduce
2480 * or even eliminate the need to re-allocate the chunk of memory used
2481 * for rt_key and rt_gateway in the event the gateway portion changes.
cb323159 2482 * Certain code paths (e.g. IPsec) are notorious for caching the address
b0d623f7
A
2483 * of rt_gateway; this rounding-up would help ensure that the gateway
2484 * portion never gets deallocated (though it may change contents) and
2485 * thus greatly simplifies things.
2486 */
2487#define SA_SIZE(x) (-(-((uintptr_t)(x)) & -(32)))
2488
2489/*
2490 * Sets the gateway and/or gateway route portion of a route; may be
2491 * called on an existing route to modify the gateway portion. Both
2492 * rt_key and rt_gateway are allocated out of the same memory chunk.
2493 * Route entry lock must be held by caller; this routine will return
2494 * with the lock held.
2495 */
1c79356b 2496int
c910b4d9 2497rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
1c79356b 2498{
b0d623f7 2499 int dlen = SA_SIZE(dst->sa_len), glen = SA_SIZE(gate->sa_len);
fe8ab488 2500 struct radix_node_head *rnh = NULL;
316670eb 2501 boolean_t loop = FALSE;
c910b4d9 2502
fe8ab488
A
2503 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
2504 return (EINVAL);
2505 }
2506
2507 rnh = rt_tables[dst->sa_family];
5ba3f43e 2508 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7
A
2509 RT_LOCK_ASSERT_HELD(rt);
2510
2511 /*
2512 * If this is for a route that is on its way of being removed,
2513 * or is temporarily frozen, reject the modification request.
2514 */
fe8ab488 2515 if (rt->rt_flags & RTF_CONDEMNED) {
b0d623f7 2516 return (EBUSY);
fe8ab488 2517 }
b0d623f7
A
2518
2519 /* Add an extra ref for ourselves */
2520 RT_ADDREF_LOCKED(rt);
c910b4d9 2521
316670eb
A
2522 if (rt->rt_flags & RTF_GATEWAY) {
2523 if ((dst->sa_len == gate->sa_len) &&
2524 (dst->sa_family == AF_INET || dst->sa_family == AF_INET6)) {
2525 struct sockaddr_storage dst_ss, gate_ss;
2526
2527 (void) sa_copy(dst, &dst_ss, NULL);
2528 (void) sa_copy(gate, &gate_ss, NULL);
2529
2530 loop = equal(SA(&dst_ss), SA(&gate_ss));
2531 } else {
2532 loop = (dst->sa_len == gate->sa_len &&
2533 equal(dst, gate));
2534 }
2535 }
2536
2537 /*
2538 * A (cloning) network route with the destination equal to the gateway
2539 * will create an endless loop (see notes below), so disallow it.
2540 */
2541 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
2542 RTF_GATEWAY) && loop) {
2543 /* Release extra ref */
2544 RT_REMREF_LOCKED(rt);
2545 return (EADDRNOTAVAIL);
2546 }
2547
1c79356b
A
2548 /*
2549 * A host route with the destination equal to the gateway
2550 * will interfere with keeping LLINFO in the routing
2551 * table, so disallow it.
2552 */
c910b4d9 2553 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
316670eb 2554 (RTF_HOST|RTF_GATEWAY)) && loop) {
1c79356b
A
2555 /*
2556 * The route might already exist if this is an RTM_CHANGE
2557 * or a routing redirect, so try to delete it.
2558 */
b0d623f7
A
2559 if (rt_key(rt) != NULL) {
2560 /*
2561 * Safe to drop rt_lock and use rt_key, rt_gateway,
2562 * since holding rnh_lock here prevents another thread
2563 * from calling rt_setgate() on this route.
2564 */
2565 RT_UNLOCK(rt);
2566 (void) rtrequest_locked(RTM_DELETE, rt_key(rt),
c910b4d9 2567 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
b0d623f7
A
2568 RT_LOCK(rt);
2569 }
2570 /* Release extra ref */
2571 RT_REMREF_LOCKED(rt);
c910b4d9 2572 return (EADDRNOTAVAIL);
1c79356b
A
2573 }
2574
2575 /*
c910b4d9
A
2576 * The destination is not directly reachable. Get a route
2577 * to the next-hop gateway and store it in rt_gwroute.
1c79356b 2578 */
c910b4d9
A
2579 if (rt->rt_flags & RTF_GATEWAY) {
2580 struct rtentry *gwrt;
2581 unsigned int ifscope;
2582
6d2010ae
A
2583 if (dst->sa_family == AF_INET)
2584 ifscope = sin_get_ifscope(dst);
2585 else if (dst->sa_family == AF_INET6)
2586 ifscope = sin6_get_ifscope(dst);
2587 else
2588 ifscope = IFSCOPE_NONE;
c910b4d9 2589
b0d623f7 2590 RT_UNLOCK(rt);
316670eb
A
2591 /*
2592 * Don't ignore RTF_CLONING, since we prefer that rt_gwroute
2593 * points to a clone rather than a cloning route; see above
2594 * check for cloning loop avoidance (dst == gate).
2595 */
2596 gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope);
b0d623f7
A
2597 if (gwrt != NULL)
2598 RT_LOCK_ASSERT_NOTHELD(gwrt);
2599 RT_LOCK(rt);
c910b4d9
A
2600
2601 /*
2602 * Cloning loop avoidance:
2603 *
2604 * In the presence of protocol-cloning and bad configuration,
2605 * it is possible to get stuck in bottomless mutual recursion
2606 * (rtrequest rt_setgate rtalloc1). We avoid this by not
2607 * allowing protocol-cloning to operate for gateways (which
2608 * is probably the correct choice anyway), and avoid the
2609 * resulting reference loops by disallowing any route to run
2610 * through itself as a gateway. This is obviously mandatory
2611 * when we get rt->rt_output(). It implies that a route to
2612 * the gateway must already be present in the system in order
2613 * for the gateway to be referred to by another route.
2614 */
2615 if (gwrt == rt) {
b0d623f7
A
2616 RT_REMREF_LOCKED(gwrt);
2617 /* Release extra ref */
2618 RT_REMREF_LOCKED(rt);
c910b4d9
A
2619 return (EADDRINUSE); /* failure */
2620 }
2621
b0d623f7
A
2622 /*
2623 * If scoped, the gateway route must use the same interface;
2624 * we're holding rnh_lock now, so rt_gateway and rt_ifp of gwrt
2625 * should not change and are freely accessible.
2626 */
c910b4d9
A
2627 if (ifscope != IFSCOPE_NONE && (rt->rt_flags & RTF_IFSCOPE) &&
2628 gwrt != NULL && gwrt->rt_ifp != NULL &&
2629 gwrt->rt_ifp->if_index != ifscope) {
b0d623f7
A
2630 rtfree_locked(gwrt); /* rt != gwrt, no deadlock */
2631 /* Release extra ref */
2632 RT_REMREF_LOCKED(rt);
c910b4d9
A
2633 return ((rt->rt_flags & RTF_HOST) ?
2634 EHOSTUNREACH : ENETUNREACH);
2635 }
2636
b0d623f7
A
2637 /* Check again since we dropped the lock above */
2638 if (rt->rt_flags & RTF_CONDEMNED) {
2639 if (gwrt != NULL)
2640 rtfree_locked(gwrt);
2641 /* Release extra ref */
2642 RT_REMREF_LOCKED(rt);
2643 return (EBUSY);
2644 }
2645
316670eb
A
2646 /* Set gateway route; callee adds ref to gwrt if non-NULL */
2647 rt_set_gwroute(rt, dst, gwrt);
c910b4d9
A
2648
2649 /*
2650 * In case the (non-scoped) default route gets modified via
2651 * an ICMP redirect, record the interface index used for the
2652 * primary ifscope. Also done in rt_setif() to take care
2653 * of the non-redirect cases.
2654 */
6d2010ae
A
2655 if (rt_primary_default(rt, dst) && rt->rt_ifp != NULL) {
2656 set_primary_ifscope(dst->sa_family,
2657 rt->rt_ifp->if_index);
2658 }
c910b4d9 2659
5ba3f43e
A
2660#if NECP
2661 /*
2662 * If this is a change in a default route, update
2663 * necp client watchers to re-evaluate
2664 */
2665 if (SA_DEFAULT(dst)) {
2666 necp_update_all_clients();
2667 }
2668#endif /* NECP */
2669
1c79356b 2670 /*
c910b4d9
A
2671 * Tell the kernel debugger about the new default gateway
2672 * if the gateway route uses the primary interface, or
2673 * if we are in a transient state before the non-scoped
2674 * default gateway is installed (similar to how the system
2675 * was behaving in the past). In future, it would be good
2676 * to do all this only when KDP is enabled.
1c79356b 2677 */
c910b4d9
A
2678 if ((dst->sa_family == AF_INET) &&
2679 gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK &&
6d2010ae 2680 (gwrt->rt_ifp->if_index == get_primary_ifscope(AF_INET) ||
316670eb
A
2681 get_primary_ifscope(AF_INET) == IFSCOPE_NONE)) {
2682 kdp_set_gateway_mac(SDL((void *)gwrt->rt_gateway)->
2683 sdl_data);
2684 }
2685
2686 /* Release extra ref from rtalloc1() */
2687 if (gwrt != NULL)
2688 RT_REMREF(gwrt);
1c79356b
A
2689 }
2690
2691 /*
c910b4d9
A
2692 * Prepare to store the gateway in rt_gateway. Both dst and gateway
2693 * are stored one after the other in the same malloc'd chunk. If we
2694 * have room, reuse the old buffer since rt_gateway already points
2695 * to the right place. Otherwise, malloc a new block and update
2696 * the 'dst' address and point rt_gateway to the right place.
1c79356b 2697 */
b0d623f7 2698 if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway->sa_len)) {
c910b4d9 2699 caddr_t new;
1c79356b 2700
c910b4d9
A
2701 /* The underlying allocation is done with M_WAITOK set */
2702 R_Malloc(new, caddr_t, dlen + glen);
2703 if (new == NULL) {
316670eb
A
2704 /* Clear gateway route */
2705 rt_set_gwroute(rt, dst, NULL);
b0d623f7
A
2706 /* Release extra ref */
2707 RT_REMREF_LOCKED(rt);
c910b4d9
A
2708 return (ENOBUFS);
2709 }
2710
2711 /*
2712 * Copy from 'dst' and not rt_key(rt) because we can get
2713 * here to initialize a newly allocated route entry, in
2714 * which case rt_key(rt) is NULL (and so does rt_gateway).
2715 */
b0d623f7
A
2716 bzero(new, dlen + glen);
2717 Bcopy(dst, new, dst->sa_len);
c910b4d9
A
2718 R_Free(rt_key(rt)); /* free old block; NULL is okay */
2719 rt->rt_nodes->rn_key = new;
2720 rt->rt_gateway = (struct sockaddr *)(new + dlen);
1c79356b
A
2721 }
2722
2723 /*
c910b4d9 2724 * Copy the new gateway value into the memory chunk.
1c79356b 2725 */
b0d623f7 2726 Bcopy(gate, rt->rt_gateway, gate->sa_len);
c910b4d9 2727
1c79356b 2728 /*
c910b4d9 2729 * For consistency between rt_gateway and rt_key(gwrt).
1c79356b 2730 */
c910b4d9 2731 if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL &&
6d2010ae
A
2732 (rt->rt_gwroute->rt_flags & RTF_IFSCOPE)) {
2733 if (rt->rt_gateway->sa_family == AF_INET &&
2734 rt_key(rt->rt_gwroute)->sa_family == AF_INET) {
2735 sin_set_ifscope(rt->rt_gateway,
2736 sin_get_ifscope(rt_key(rt->rt_gwroute)));
2737 } else if (rt->rt_gateway->sa_family == AF_INET6 &&
2738 rt_key(rt->rt_gwroute)->sa_family == AF_INET6) {
2739 sin6_set_ifscope(rt->rt_gateway,
2740 sin6_get_ifscope(rt_key(rt->rt_gwroute)));
2741 }
1c79356b
A
2742 }
2743
2744 /*
2745 * This isn't going to do anything useful for host routes, so
2746 * don't bother. Also make sure we have a reasonable mask
2747 * (we don't yet have one during adds).
2748 */
2749 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
2750 struct rtfc_arg arg;
2751 arg.rnh = rnh;
2752 arg.rt0 = rt;
b0d623f7 2753 RT_UNLOCK(rt);
1c79356b 2754 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
c910b4d9 2755 rt_fixchange, &arg);
b0d623f7 2756 RT_LOCK(rt);
1c79356b
A
2757 }
2758
b0d623f7
A
2759 /* Release extra ref */
2760 RT_REMREF_LOCKED(rt);
c910b4d9 2761 return (0);
1c79356b
A
2762}
2763
b0d623f7
A
2764#undef SA_SIZE
2765
316670eb
A
2766void
2767rt_set_gwroute(struct rtentry *rt, struct sockaddr *dst, struct rtentry *gwrt)
2768{
2769 boolean_t gwrt_isrouter;
2770
5ba3f43e 2771 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
316670eb
A
2772 RT_LOCK_ASSERT_HELD(rt);
2773
2774 if (gwrt != NULL)
2775 RT_ADDREF(gwrt); /* for this routine */
2776
2777 /*
2778 * Get rid of existing gateway route; if rt_gwroute is already
2779 * set to gwrt, this is slightly redundant (though safe since
2780 * we held an extra ref above) but makes the code simpler.
2781 */
2782 if (rt->rt_gwroute != NULL) {
2783 struct rtentry *ogwrt = rt->rt_gwroute;
2784
2785 VERIFY(rt != ogwrt); /* sanity check */
2786 rt->rt_gwroute = NULL;
2787 RT_UNLOCK(rt);
2788 rtfree_locked(ogwrt);
2789 RT_LOCK(rt);
2790 VERIFY(rt->rt_gwroute == NULL);
2791 }
2792
2793 /*
2794 * And associate the new gateway route.
2795 */
2796 if ((rt->rt_gwroute = gwrt) != NULL) {
2797 RT_ADDREF(gwrt); /* for rt */
2798
2799 if (rt->rt_flags & RTF_WASCLONED) {
2800 /* rt_parent might be NULL if rt is embryonic */
2801 gwrt_isrouter = (rt->rt_parent != NULL &&
2802 SA_DEFAULT(rt_key(rt->rt_parent)) &&
2803 !RT_HOST(rt->rt_parent));
2804 } else {
2805 gwrt_isrouter = (SA_DEFAULT(dst) && !RT_HOST(rt));
2806 }
2807
2808 /* If gwrt points to a default router, mark it accordingly */
2809 if (gwrt_isrouter && RT_HOST(gwrt) &&
2810 !(gwrt->rt_flags & RTF_ROUTER)) {
2811 RT_LOCK(gwrt);
2812 gwrt->rt_flags |= RTF_ROUTER;
2813 RT_UNLOCK(gwrt);
2814 }
2815
2816 RT_REMREF(gwrt); /* for this routine */
2817 }
2818}
2819
1c79356b 2820static void
3e170ce0
A
2821rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
2822 const struct sockaddr *netmask)
1c79356b 2823{
3e170ce0
A
2824 const char *netmaskp = &netmask->sa_data[0];
2825 const char *srcp = &src->sa_data[0];
2826 char *dstp = &dst->sa_data[0];
2827 const char *maskend = (char *)dst
2828 + MIN(netmask->sa_len, src->sa_len);
2829 const char *srcend = (char *)dst + src->sa_len;
2830
2831 dst->sa_len = src->sa_len;
2832 dst->sa_family = src->sa_family;
1c79356b 2833
3e170ce0
A
2834 while (dstp < maskend)
2835 *dstp++ = *srcp++ & *netmaskp++;
2836 if (dstp < srcend)
2837 memset(dstp, 0, (size_t)(srcend - dstp));
1c79356b
A
2838}
2839
c910b4d9 2840/*
6d2010ae
A
2841 * Lookup an AF_INET/AF_INET6 scoped or non-scoped route depending on the
2842 * ifscope value passed in by the caller (IFSCOPE_NONE implies non-scoped).
c910b4d9
A
2843 */
2844static struct radix_node *
2845node_lookup(struct sockaddr *dst, struct sockaddr *netmask,
2846 unsigned int ifscope)
2847{
6d2010ae 2848 struct radix_node_head *rnh;
c910b4d9 2849 struct radix_node *rn;
6d2010ae
A
2850 struct sockaddr_storage ss, mask;
2851 int af = dst->sa_family;
cb323159 2852 struct matchleaf_arg ma = { .ifscope = ifscope };
c910b4d9
A
2853 rn_matchf_t *f = rn_match_ifscope;
2854 void *w = &ma;
2855
6d2010ae 2856 if (af != AF_INET && af != AF_INET6)
c910b4d9
A
2857 return (NULL);
2858
6d2010ae
A
2859 rnh = rt_tables[af];
2860
c910b4d9 2861 /*
6d2010ae
A
2862 * Transform dst into the internal routing table form,
2863 * clearing out the scope ID field if ifscope isn't set.
c910b4d9 2864 */
6d2010ae 2865 dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ? NULL : &ifscope);
c910b4d9 2866
6d2010ae 2867 /* Transform netmask into the internal routing table form */
c910b4d9 2868 if (netmask != NULL)
6d2010ae 2869 netmask = ma_copy(af, netmask, &mask, ifscope);
c910b4d9
A
2870
2871 if (ifscope == IFSCOPE_NONE)
2872 f = w = NULL;
2873
2874 rn = rnh->rnh_lookup_args(dst, netmask, rnh, f, w);
2875 if (rn != NULL && (rn->rn_flags & RNF_ROOT))
2876 rn = NULL;
2877
2878 return (rn);
2879}
2880
2881/*
6d2010ae 2882 * Lookup the AF_INET/AF_INET6 non-scoped default route.
c910b4d9
A
2883 */
2884static struct radix_node *
6d2010ae 2885node_lookup_default(int af)
c910b4d9 2886{
6d2010ae
A
2887 struct radix_node_head *rnh;
2888
2889 VERIFY(af == AF_INET || af == AF_INET6);
2890 rnh = rt_tables[af];
2891
2892 return (af == AF_INET ? rnh->rnh_lookup(&sin_def, NULL, rnh) :
2893 rnh->rnh_lookup(&sin6_def, NULL, rnh));
c910b4d9
A
2894}
2895
3e170ce0
A
2896boolean_t
2897rt_ifa_is_dst(struct sockaddr *dst, struct ifaddr *ifa)
2898{
2899 boolean_t result = FALSE;
2900
2901 if (ifa == NULL || ifa->ifa_addr == NULL)
2902 return (result);
2903
2904 IFA_LOCK_SPIN(ifa);
2905
2906 if (dst->sa_family == ifa->ifa_addr->sa_family &&
2907 ((dst->sa_family == AF_INET &&
2908 SIN(dst)->sin_addr.s_addr ==
2909 SIN(ifa->ifa_addr)->sin_addr.s_addr) ||
2910 (dst->sa_family == AF_INET6 &&
2911 SA6_ARE_ADDR_EQUAL(SIN6(dst), SIN6(ifa->ifa_addr)))))
2912 result = TRUE;
2913
2914 IFA_UNLOCK(ifa);
2915
2916 return (result);
2917}
2918
c910b4d9
A
2919/*
2920 * Common routine to lookup/match a route. It invokes the lookup/matchaddr
2921 * callback which could be address family-specific. The main difference
2922 * between the two (at least for AF_INET/AF_INET6) is that a lookup does
2923 * not alter the expiring state of a route, whereas a match would unexpire
2924 * or revalidate the route.
2925 *
2926 * The optional scope or interface index property of a route allows for a
2927 * per-interface route instance. This permits multiple route entries having
2928 * the same destination (but not necessarily the same gateway) to exist in
2929 * the routing table; each of these entries is specific to the corresponding
6d2010ae 2930 * interface. This is made possible by storing the scope ID value into the
c910b4d9
A
2931 * radix key, thus making each route entry unique. These scoped entries
2932 * exist along with the regular, non-scoped entries in the same radix tree
6d2010ae 2933 * for a given address family (AF_INET/AF_INET6); the scope logically
c910b4d9
A
2934 * partitions it into multiple per-interface sub-trees.
2935 *
2936 * When a scoped route lookup is performed, the routing table is searched for
2937 * the best match that would result in a route using the same interface as the
2938 * one associated with the scope (the exception to this are routes that point
2939 * to the loopback interface). The search rule follows the longest matching
2940 * prefix with the additional interface constraint.
2941 */
39236c6e
A
2942static struct rtentry *
2943rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
2944 struct sockaddr *netmask, struct radix_node_head *rnh, unsigned int ifscope)
c910b4d9 2945{
39037602 2946 struct radix_node *rn0, *rn = NULL;
6d2010ae 2947 int af = dst->sa_family;
39037602
A
2948 struct sockaddr_storage dst_ss;
2949 struct sockaddr_storage mask_ss;
2950 boolean_t dontcare;
2951#if (DEVELOPMENT || DEBUG)
3e170ce0 2952 char dbuf[MAX_SCOPE_ADDR_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
39037602
A
2953 char s_dst[MAX_IPv6_STR_LEN], s_netmask[MAX_IPv6_STR_LEN];
2954#endif
39236c6e
A
2955 VERIFY(!coarse || ifscope == IFSCOPE_NONE);
2956
5ba3f43e 2957 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
39236c6e
A
2958#if INET6
2959 /*
2960 * While we have rnh_lock held, see if we need to schedule the timer.
2961 */
2962 if (nd6_sched_timeout_want)
2963 nd6_sched_timeout(NULL, NULL);
2964#endif /* INET6 */
c910b4d9
A
2965
2966 if (!lookup_only)
2967 netmask = NULL;
2968
2969 /*
2970 * Non-scoped route lookup.
2971 */
6d2010ae 2972#if INET6
39037602 2973 if (af != AF_INET && af != AF_INET6) {
6d2010ae 2974#else
39037602 2975 if (af != AF_INET) {
6d2010ae
A
2976#endif /* !INET6 */
2977 rn = rnh->rnh_matchaddr(dst, rnh);
b0d623f7
A
2978
2979 /*
2980 * Don't return a root node; also, rnh_matchaddr callback
2981 * would have done the necessary work to clear RTPRF_OURS
2982 * for certain protocol families.
2983 */
2984 if (rn != NULL && (rn->rn_flags & RNF_ROOT))
2985 rn = NULL;
2986 if (rn != NULL) {
2987 RT_LOCK_SPIN(RT(rn));
2988 if (!(RT(rn)->rt_flags & RTF_CONDEMNED)) {
2989 RT_ADDREF_LOCKED(RT(rn));
2990 RT_UNLOCK(RT(rn));
2991 } else {
2992 RT_UNLOCK(RT(rn));
2993 rn = NULL;
2994 }
2995 }
2996 return (RT(rn));
c910b4d9
A
2997 }
2998
6d2010ae
A
2999 /* Transform dst/netmask into the internal routing table form */
3000 dst = sa_copy(dst, &dst_ss, &ifscope);
3001 if (netmask != NULL)
3002 netmask = ma_copy(af, netmask, &mask_ss, ifscope);
3003 dontcare = (ifscope == IFSCOPE_NONE);
3004
39037602 3005#if (DEVELOPMENT || DEBUG)
3e170ce0
A
3006 if (rt_verbose) {
3007 if (af == AF_INET)
3008 (void) inet_ntop(af, &SIN(dst)->sin_addr.s_addr,
3009 s_dst, sizeof (s_dst));
3010 else
3011 (void) inet_ntop(af, &SIN6(dst)->sin6_addr,
3012 s_dst, sizeof (s_dst));
3013
3014 if (netmask != NULL && af == AF_INET)
3015 (void) inet_ntop(af, &SIN(netmask)->sin_addr.s_addr,
3016 s_netmask, sizeof (s_netmask));
3017 if (netmask != NULL && af == AF_INET6)
3018 (void) inet_ntop(af, &SIN6(netmask)->sin6_addr,
3019 s_netmask, sizeof (s_netmask));
3020 else
3021 *s_netmask = '\0';
3022 printf("%s (%d, %d, %s, %s, %u)\n",
3023 __func__, lookup_only, coarse, s_dst, s_netmask, ifscope);
3024 }
39037602 3025#endif
3e170ce0 3026
c910b4d9
A
3027 /*
3028 * Scoped route lookup:
3029 *
3030 * We first perform a non-scoped lookup for the original result.
3031 * Afterwards, depending on whether or not the caller has specified
3032 * a scope, we perform a more specific scoped search and fallback
3033 * to this original result upon failure.
3034 */
3035 rn0 = rn = node_lookup(dst, netmask, IFSCOPE_NONE);
3036
3037 /*
3038 * If the caller did not specify a scope, use the primary scope
3039 * derived from the system's non-scoped default route. If, for
6d2010ae
A
3040 * any reason, there is no primary interface, ifscope will be
3041 * set to IFSCOPE_NONE; if the above lookup resulted in a route,
3042 * we'll do a more-specific search below, scoped to the interface
3043 * of that route.
c910b4d9 3044 */
6d2010ae
A
3045 if (dontcare)
3046 ifscope = get_primary_ifscope(af);
c910b4d9
A
3047
3048 /*
3049 * Keep the original result if either of the following is true:
3050 *
3051 * 1) The interface portion of the route has the same interface
3052 * index as the scope value and it is marked with RTF_IFSCOPE.
3053 * 2) The route uses the loopback interface, in which case the
3054 * destination (host/net) is local/loopback.
3055 *
b0d623f7
A
3056 * Otherwise, do a more specified search using the scope;
3057 * we're holding rnh_lock now, so rt_ifp should not change.
c910b4d9
A
3058 */
3059 if (rn != NULL) {
3060 struct rtentry *rt = RT(rn);
39037602 3061#if (DEVELOPMENT || DEBUG)
3e170ce0
A
3062 if (rt_verbose) {
3063 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
3064 printf("%s unscoped search %p to %s->%s->%s ifa_ifp %s\n",
3065 __func__, rt,
3066 dbuf, gbuf,
3067 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
3068 (rt->rt_ifa->ifa_ifp != NULL) ?
3069 rt->rt_ifa->ifa_ifp->if_xname : "");
3070 }
39037602
A
3071#endif
3072 if (!(rt->rt_ifp->if_flags & IFF_LOOPBACK) ||
3073 (rt->rt_flags & RTF_GATEWAY)) {
c910b4d9
A
3074 if (rt->rt_ifp->if_index != ifscope) {
3075 /*
3076 * Wrong interface; keep the original result
3077 * only if the caller did not specify a scope,
3078 * and do a more specific scoped search using
3079 * the scope of the found route. Otherwise,
3080 * start again from scratch.
3e170ce0
A
3081 *
3082 * For loopback scope we keep the unscoped
3083 * route for local addresses
c910b4d9
A
3084 */
3085 rn = NULL;
3086 if (dontcare)
3087 ifscope = rt->rt_ifp->if_index;
3e170ce0
A
3088 else if (ifscope != lo_ifp->if_index ||
3089 rt_ifa_is_dst(dst, rt->rt_ifa) == FALSE)
c910b4d9
A
3090 rn0 = NULL;
3091 } else if (!(rt->rt_flags & RTF_IFSCOPE)) {
3092 /*
3093 * Right interface, except that this route
3094 * isn't marked with RTF_IFSCOPE. Do a more
3095 * specific scoped search. Keep the original
3096 * result and return it it in case the scoped
3097 * search fails.
3098 */
3099 rn = NULL;
3100 }
3101 }
3102 }
3103
3104 /*
3105 * Scoped search. Find the most specific entry having the same
3106 * interface scope as the one requested. The following will result
3107 * in searching for the longest prefix scoped match.
3108 */
3e170ce0 3109 if (rn == NULL) {
c910b4d9 3110 rn = node_lookup(dst, netmask, ifscope);
39037602 3111#if (DEVELOPMENT || DEBUG)
3e170ce0
A
3112 if (rt_verbose && rn != NULL) {
3113 struct rtentry *rt = RT(rn);
3114
3115 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
3116 printf("%s scoped search %p to %s->%s->%s ifa %s\n",
3117 __func__, rt,
3118 dbuf, gbuf,
3119 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
3120 (rt->rt_ifa->ifa_ifp != NULL) ?
3121 rt->rt_ifa->ifa_ifp->if_xname : "");
3122 }
39037602 3123#endif
3e170ce0 3124 }
c910b4d9
A
3125 /*
3126 * Use the original result if either of the following is true:
3127 *
3128 * 1) The scoped search did not yield any result.
39236c6e
A
3129 * 2) The caller insists on performing a coarse-grained lookup.
3130 * 3) The result from the scoped search is a scoped default route,
c910b4d9
A
3131 * and the original (non-scoped) result is not a default route,
3132 * i.e. the original result is a more specific host/net route.
39236c6e 3133 * 4) The scoped search yielded a net route but the original
c910b4d9
A
3134 * result is a host route, i.e. the original result is treated
3135 * as a more specific route.
3136 */
39236c6e 3137 if (rn == NULL || coarse || (rn0 != NULL &&
6d2010ae 3138 ((SA_DEFAULT(rt_key(RT(rn))) && !SA_DEFAULT(rt_key(RT(rn0)))) ||
c910b4d9
A
3139 (!RT_HOST(rn) && RT_HOST(rn0)))))
3140 rn = rn0;
3141
3142 /*
3143 * If we still don't have a route, use the non-scoped default
3144 * route as long as the interface portion satistifes the scope.
3145 */
6d2010ae 3146 if (rn == NULL && (rn = node_lookup_default(af)) != NULL &&
3e170ce0 3147 RT(rn)->rt_ifp->if_index != ifscope) {
c910b4d9 3148 rn = NULL;
3e170ce0 3149 }
c910b4d9 3150
b0d623f7
A
3151 if (rn != NULL) {
3152 /*
6d2010ae 3153 * Manually clear RTPRF_OURS using rt_validate() and
b0d623f7 3154 * bump up the reference count after, and not before;
6d2010ae
A
3155 * we only get here for AF_INET/AF_INET6. node_lookup()
3156 * has done the check against RNF_ROOT, so we can be sure
b0d623f7
A
3157 * that we're not returning a root node here.
3158 */
3159 RT_LOCK_SPIN(RT(rn));
6d2010ae 3160 if (rt_validate(RT(rn))) {
b0d623f7
A
3161 RT_ADDREF_LOCKED(RT(rn));
3162 RT_UNLOCK(RT(rn));
3163 } else {
3164 RT_UNLOCK(RT(rn));
3165 rn = NULL;
3166 }
3167 }
39037602 3168#if (DEVELOPMENT || DEBUG)
3e170ce0
A
3169 if (rt_verbose) {
3170 if (rn == NULL)
3171 printf("%s %u return NULL\n", __func__, ifscope);
3172 else {
3173 struct rtentry *rt = RT(rn);
3174
3175 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
3176
3177 printf("%s %u return %p to %s->%s->%s ifa_ifp %s\n",
3178 __func__, ifscope, rt,
3179 dbuf, gbuf,
3180 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
3181 (rt->rt_ifa->ifa_ifp != NULL) ?
3182 rt->rt_ifa->ifa_ifp->if_xname : "");
3183 }
3184 }
39037602 3185#endif
c910b4d9
A
3186 return (RT(rn));
3187}
3188
39236c6e
A
3189struct rtentry *
3190rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
3191 struct radix_node_head *rnh, unsigned int ifscope)
3192{
3193 return (rt_lookup_common(lookup_only, FALSE, dst, netmask,
3194 rnh, ifscope));
3195}
3196
3197struct rtentry *
3198rt_lookup_coarse(boolean_t lookup_only, struct sockaddr *dst,
3199 struct sockaddr *netmask, struct radix_node_head *rnh)
3200{
3201 return (rt_lookup_common(lookup_only, TRUE, dst, netmask,
3202 rnh, IFSCOPE_NONE));
3203}
3204
6d2010ae
A
3205boolean_t
3206rt_validate(struct rtentry *rt)
3207{
3208 RT_LOCK_ASSERT_HELD(rt);
3209
316670eb 3210 if ((rt->rt_flags & (RTF_UP | RTF_CONDEMNED)) == RTF_UP) {
6d2010ae
A
3211 int af = rt_key(rt)->sa_family;
3212
3213 if (af == AF_INET)
3214 (void) in_validate(RN(rt));
3215 else if (af == AF_INET6)
3216 (void) in6_validate(RN(rt));
3217 } else {
3218 rt = NULL;
3219 }
3220
3221 return (rt != NULL);
3222}
3223
1c79356b
A
3224/*
3225 * Set up a routing table entry, normally
3226 * for an interface.
3227 */
3228int
2d21ac55 3229rtinit(struct ifaddr *ifa, int cmd, int flags)
91447636
A
3230{
3231 int error;
39236c6e 3232
5ba3f43e 3233 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
39236c6e 3234
b0d623f7 3235 lck_mtx_lock(rnh_lock);
91447636 3236 error = rtinit_locked(ifa, cmd, flags);
b0d623f7 3237 lck_mtx_unlock(rnh_lock);
39236c6e 3238
91447636
A
3239 return (error);
3240}
3241
3242int
2d21ac55 3243rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
1c79356b 3244{
39236c6e
A
3245 struct radix_node_head *rnh;
3246 uint8_t nbuf[128]; /* long enough for IPv6 */
39037602 3247#if (DEVELOPMENT || DEBUG)
39236c6e
A
3248 char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
3249 char abuf[MAX_IPv6_STR_LEN];
39037602 3250#endif
39236c6e 3251 struct rtentry *rt = NULL;
2d21ac55 3252 struct sockaddr *dst;
39236c6e
A
3253 struct sockaddr *netmask;
3254 int error = 0;
1c79356b 3255
6d2010ae
A
3256 /*
3257 * Holding rnh_lock here prevents the possibility of ifa from
3258 * changing (e.g. in_ifinit), so it is safe to access its
3259 * ifa_{dst}addr (here and down below) without locking.
3260 */
5ba3f43e 3261 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
39236c6e
A
3262
3263 if (flags & RTF_HOST) {
3264 dst = ifa->ifa_dstaddr;
3265 netmask = NULL;
3266 } else {
3267 dst = ifa->ifa_addr;
3268 netmask = ifa->ifa_netmask;
3269 }
3270
3271 if (dst->sa_len == 0) {
3272 log(LOG_ERR, "%s: %s failed, invalid dst sa_len %d\n",
3273 __func__, rtm2str(cmd), dst->sa_len);
3274 error = EINVAL;
3275 goto done;
3276 }
3277 if (netmask != NULL && netmask->sa_len > sizeof (nbuf)) {
3278 log(LOG_ERR, "%s: %s failed, mask sa_len %d too large\n",
3279 __func__, rtm2str(cmd), dst->sa_len);
3280 error = EINVAL;
3281 goto done;
3282 }
3283
39037602 3284#if (DEVELOPMENT || DEBUG)
39236c6e
A
3285 if (dst->sa_family == AF_INET) {
3286 (void) inet_ntop(AF_INET, &SIN(dst)->sin_addr.s_addr,
3287 abuf, sizeof (abuf));
3288 }
3289#if INET6
3290 else if (dst->sa_family == AF_INET6) {
3291 (void) inet_ntop(AF_INET6, &SIN6(dst)->sin6_addr,
3292 abuf, sizeof (abuf));
3293 }
3294#endif /* INET6 */
39037602 3295#endif /* (DEVELOPMENT || DEBUG) */
39236c6e
A
3296
3297 if ((rnh = rt_tables[dst->sa_family]) == NULL) {
3298 error = EINVAL;
3299 goto done;
3300 }
3301
1c79356b
A
3302 /*
3303 * If it's a delete, check that if it exists, it's on the correct
3304 * interface or we might scrub a route to another ifa which would
3305 * be confusing at best and possibly worse.
3306 */
3307 if (cmd == RTM_DELETE) {
9bccf70c 3308 /*
1c79356b
A
3309 * It's a delete, so it should already exist..
3310 * If it's a net, mask off the host bits
3311 * (Assuming we have a mask)
3312 */
39236c6e
A
3313 if (netmask != NULL) {
3314 rt_maskedcopy(dst, SA(nbuf), netmask);
3315 dst = SA(nbuf);
1c79356b
A
3316 }
3317 /*
39236c6e
A
3318 * Get an rtentry that is in the routing tree and contains
3319 * the correct info. Note that we perform a coarse-grained
3320 * lookup here, in case there is a scoped variant of the
3321 * subnet/prefix route which we should ignore, as we never
3322 * add a scoped subnet/prefix route as part of adding an
3323 * interface address.
1c79356b 3324 */
39236c6e
A
3325 rt = rt_lookup_coarse(TRUE, dst, NULL, rnh);
3326 if (rt != NULL) {
39037602 3327#if (DEVELOPMENT || DEBUG)
39236c6e 3328 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
39037602 3329#endif
1c79356b
A
3330 /*
3331 * Ok so we found the rtentry. it has an extra reference
3332 * for us at this stage. we won't need that so
3333 * lop that off now.
3334 */
39236c6e 3335 RT_LOCK(rt);
1c79356b 3336 if (rt->rt_ifa != ifa) {
39236c6e
A
3337 /*
3338 * If the interface address in the rtentry
3339 * doesn't match the interface we are using,
3340 * then we don't want to delete it, so return
3341 * an error. This seems to be the only point
3342 * of this whole RTM_DELETE clause.
3343 */
39037602 3344#if (DEVELOPMENT || DEBUG)
39236c6e
A
3345 if (rt_verbose) {
3346 log(LOG_DEBUG, "%s: not removing "
3347 "route to %s->%s->%s, flags %b, "
3348 "ifaddr %s, rt_ifa 0x%llx != "
3349 "ifa 0x%llx\n", __func__, dbuf,
3350 gbuf, ((rt->rt_ifp != NULL) ?
3351 rt->rt_ifp->if_xname : ""),
3352 rt->rt_flags, RTF_BITS, abuf,
3353 (uint64_t)VM_KERNEL_ADDRPERM(
3354 rt->rt_ifa),
3355 (uint64_t)VM_KERNEL_ADDRPERM(ifa));
3356 }
39037602 3357#endif /* (DEVELOPMENT || DEBUG) */
b0d623f7
A
3358 RT_REMREF_LOCKED(rt);
3359 RT_UNLOCK(rt);
39236c6e
A
3360 rt = NULL;
3361 error = ((flags & RTF_HOST) ?
3362 EHOSTUNREACH : ENETUNREACH);
3363 goto done;
3364 } else if (rt->rt_flags & RTF_STATIC) {
1c79356b 3365 /*
39236c6e
A
3366 * Don't remove the subnet/prefix route if
3367 * this was manually added from above.
1c79356b 3368 */
39037602 3369#if (DEVELOPMENT || DEBUG)
39236c6e
A
3370 if (rt_verbose) {
3371 log(LOG_DEBUG, "%s: not removing "
3372 "static route to %s->%s->%s, "
3373 "flags %b, ifaddr %s\n", __func__,
3374 dbuf, gbuf, ((rt->rt_ifp != NULL) ?
3375 rt->rt_ifp->if_xname : ""),
3376 rt->rt_flags, RTF_BITS, abuf);
3377 }
39037602 3378#endif /* (DEVELOPMENT || DEBUG) */
b0d623f7
A
3379 RT_REMREF_LOCKED(rt);
3380 RT_UNLOCK(rt);
39236c6e
A
3381 rt = NULL;
3382 error = EBUSY;
3383 goto done;
1c79356b 3384 }
39037602 3385#if (DEVELOPMENT || DEBUG)
39236c6e
A
3386 if (rt_verbose) {
3387 log(LOG_DEBUG, "%s: removing route to "
3388 "%s->%s->%s, flags %b, ifaddr %s\n",
3389 __func__, dbuf, gbuf,
3390 ((rt->rt_ifp != NULL) ?
3391 rt->rt_ifp->if_xname : ""),
3392 rt->rt_flags, RTF_BITS, abuf);
3393 }
39037602 3394#endif /* (DEVELOPMENT || DEBUG) */
39236c6e
A
3395 RT_REMREF_LOCKED(rt);
3396 RT_UNLOCK(rt);
3397 rt = NULL;
1c79356b 3398 }
1c79356b
A
3399 }
3400 /*
3401 * Do the actual request
3402 */
39236c6e
A
3403 if ((error = rtrequest_locked(cmd, dst, ifa->ifa_addr, netmask,
3404 flags | ifa->ifa_flags, &rt)) != 0)
3405 goto done;
3406
3407 VERIFY(rt != NULL);
39037602 3408#if (DEVELOPMENT || DEBUG)
39236c6e 3409 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
39037602 3410#endif /* (DEVELOPMENT || DEBUG) */
39236c6e
A
3411 switch (cmd) {
3412 case RTM_DELETE:
1c79356b 3413 /*
39236c6e
A
3414 * If we are deleting, and we found an entry, then it's
3415 * been removed from the tree. Notify any listening
3416 * routing agents of the change and throw it away.
1c79356b 3417 */
b0d623f7 3418 RT_LOCK(rt);
39236c6e 3419 rt_newaddrmsg(cmd, ifa, error, rt);
b0d623f7 3420 RT_UNLOCK(rt);
39037602 3421#if (DEVELOPMENT || DEBUG)
39236c6e
A
3422 if (rt_verbose) {
3423 log(LOG_DEBUG, "%s: removed route to %s->%s->%s, "
3424 "flags %b, ifaddr %s\n", __func__, dbuf, gbuf,
3425 ((rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : ""),
3426 rt->rt_flags, RTF_BITS, abuf);
3427 }
39037602 3428#endif /* (DEVELOPMENT || DEBUG) */
2d21ac55 3429 rtfree_locked(rt);
39236c6e 3430 break;
1c79356b 3431
39236c6e 3432 case RTM_ADD:
1c79356b 3433 /*
39236c6e
A
3434 * We are adding, and we have a returned routing entry.
3435 * We need to sanity check the result. If it came back
3436 * with an unexpected interface, then it must have already
3437 * existed or something.
1c79356b 3438 */
39236c6e 3439 RT_LOCK(rt);
1c79356b 3440 if (rt->rt_ifa != ifa) {
6d2010ae
A
3441 void (*ifa_rtrequest)
3442 (int, struct rtentry *, struct sockaddr *);
39037602 3443#if (DEVELOPMENT || DEBUG)
39236c6e 3444 if (rt_verbose) {
39037602
A
3445 if (!(rt->rt_ifa->ifa_ifp->if_flags &
3446 (IFF_POINTOPOINT|IFF_LOOPBACK))) {
3447 log(LOG_ERR, "%s: %s route to %s->%s->%s, "
3448 "flags %b, ifaddr %s, rt_ifa 0x%llx != "
3449 "ifa 0x%llx\n", __func__, rtm2str(cmd),
3450 dbuf, gbuf, ((rt->rt_ifp != NULL) ?
3451 rt->rt_ifp->if_xname : ""), rt->rt_flags,
3452 RTF_BITS, abuf,
3453 (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa),
3454 (uint64_t)VM_KERNEL_ADDRPERM(ifa));
3455 }
3456
39236c6e
A
3457 log(LOG_DEBUG, "%s: %s route to %s->%s->%s, "
3458 "flags %b, ifaddr %s, rt_ifa was 0x%llx "
3459 "now 0x%llx\n", __func__, rtm2str(cmd),
3460 dbuf, gbuf, ((rt->rt_ifp != NULL) ?
3461 rt->rt_ifp->if_xname : ""), rt->rt_flags,
3462 RTF_BITS, abuf,
3463 (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa),
3464 (uint64_t)VM_KERNEL_ADDRPERM(ifa));
3465 }
39037602 3466#endif /* (DEVELOPMENT || DEBUG) */
39236c6e 3467
1c79356b
A
3468 /*
3469 * Ask that the protocol in question
3470 * remove anything it has associated with
3471 * this route and ifaddr.
3472 */
6d2010ae 3473 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
6d2010ae 3474 if (ifa_rtrequest != NULL)
39236c6e 3475 ifa_rtrequest(RTM_DELETE, rt, NULL);
9bccf70c
A
3476 /*
3477 * Set the route's ifa.
1c79356b 3478 */
9bccf70c 3479 rtsetifa(rt, ifa);
6d2010ae
A
3480
3481 if (rt->rt_ifp != ifa->ifa_ifp) {
3482 /*
3483 * Purge any link-layer info caching.
3484 */
3485 if (rt->rt_llinfo_purge != NULL)
3486 rt->rt_llinfo_purge(rt);
3487 /*
3488 * Adjust route ref count for the interfaces.
3489 */
3490 if (rt->rt_if_ref_fn != NULL) {
3491 rt->rt_if_ref_fn(ifa->ifa_ifp, 1);
3492 rt->rt_if_ref_fn(rt->rt_ifp, -1);
3493 }
d1ecb069 3494 }
6d2010ae 3495
1c79356b
A
3496 /*
3497 * And substitute in references to the ifaddr
3498 * we are adding.
3499 */
1c79356b 3500 rt->rt_ifp = ifa->ifa_ifp;
39236c6e
A
3501 /*
3502 * If rmx_mtu is not locked, update it
3503 * to the MTU used by the new interface.
3504 */
d9a64523 3505 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
39236c6e 3506 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
d9a64523
A
3507 if (dst->sa_family == AF_INET &&
3508 INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
3509 rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
3510 /* Further adjust the size for CLAT46 expansion */
3511 rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
3512 }
3513 }
39236c6e 3514
1c79356b
A
3515 /*
3516 * Now ask the protocol to check if it needs
3517 * any special processing in its new form.
3518 */
6d2010ae 3519 ifa_rtrequest = ifa->ifa_rtrequest;
6d2010ae 3520 if (ifa_rtrequest != NULL)
39236c6e
A
3521 ifa_rtrequest(RTM_ADD, rt, NULL);
3522 } else {
39037602 3523#if (DEVELOPMENT || DEBUG)
39236c6e
A
3524 if (rt_verbose) {
3525 log(LOG_DEBUG, "%s: added route to %s->%s->%s, "
3526 "flags %b, ifaddr %s\n", __func__, dbuf,
3527 gbuf, ((rt->rt_ifp != NULL) ?
3528 rt->rt_ifp->if_xname : ""), rt->rt_flags,
3529 RTF_BITS, abuf);
3530 }
39037602 3531#endif /* (DEVELOPMENT || DEBUG) */
1c79356b
A
3532 }
3533 /*
3534 * notify any listenning routing agents of the change
3535 */
39236c6e 3536 rt_newaddrmsg(cmd, ifa, error, rt);
2d21ac55
A
3537 /*
3538 * We just wanted to add it; we don't actually need a
3539 * reference. This will result in a route that's added
3540 * to the routing table without a reference count. The
3541 * RTM_DELETE code will do the necessary step to adjust
3542 * the reference count at deletion time.
3543 */
b0d623f7
A
3544 RT_REMREF_LOCKED(rt);
3545 RT_UNLOCK(rt);
39236c6e
A
3546 break;
3547
3548 default:
3549 VERIFY(0);
3550 /* NOTREACHED */
2d21ac55 3551 }
39236c6e 3552done:
1c79356b
A
3553 return (error);
3554}
6601e61a 3555
39236c6e 3556static void
6d2010ae
A
3557rt_set_idleref(struct rtentry *rt)
3558{
3559 RT_LOCK_ASSERT_HELD(rt);
3560
39236c6e
A
3561 /*
3562 * We currently keep idle refcnt only on unicast cloned routes
3563 * that aren't marked with RTF_NOIFREF.
3564 */
3565 if (rt->rt_parent != NULL && !(rt->rt_flags &
3566 (RTF_NOIFREF|RTF_BROADCAST | RTF_MULTICAST)) &&
3567 (rt->rt_flags & (RTF_UP|RTF_WASCLONED|RTF_IFREF)) ==
3568 (RTF_UP|RTF_WASCLONED)) {
3569 rt_clear_idleref(rt); /* drop existing refcnt if any */
3570 rt->rt_if_ref_fn = rte_if_ref;
3571 /* Become a regular mutex, just in case */
3572 RT_CONVERT_LOCK(rt);
3573 rt->rt_if_ref_fn(rt->rt_ifp, 1);
3574 rt->rt_flags |= RTF_IFREF;
3575 }
6d2010ae
A
3576}
3577
3578void
3579rt_clear_idleref(struct rtentry *rt)
3580{
3581 RT_LOCK_ASSERT_HELD(rt);
3582
3583 if (rt->rt_if_ref_fn != NULL) {
39236c6e
A
3584 VERIFY((rt->rt_flags & (RTF_NOIFREF | RTF_IFREF)) == RTF_IFREF);
3585 /* Become a regular mutex, just in case */
3586 RT_CONVERT_LOCK(rt);
6d2010ae
A
3587 rt->rt_if_ref_fn(rt->rt_ifp, -1);
3588 rt->rt_flags &= ~RTF_IFREF;
3589 rt->rt_if_ref_fn = NULL;
3590 }
d1ecb069
A
3591}
3592
316670eb
A
3593void
3594rt_set_proxy(struct rtentry *rt, boolean_t set)
3595{
3596 lck_mtx_lock(rnh_lock);
3597 RT_LOCK(rt);
3598 /*
3599 * Search for any cloned routes which might have
3600 * been formed from this node, and delete them.
3601 */
3602 if (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
3603 struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family];
3604
3605 if (set)
3606 rt->rt_flags |= RTF_PROXY;
3607 else
3608 rt->rt_flags &= ~RTF_PROXY;
3609
3610 RT_UNLOCK(rt);
3611 if (rnh != NULL && rt_mask(rt)) {
3612 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
3613 rt_fixdelete, rt);
3614 }
3615 } else {
3616 RT_UNLOCK(rt);
3617 }
3618 lck_mtx_unlock(rnh_lock);
3619}
3620
b0d623f7
A
3621static void
3622rte_lock_init(struct rtentry *rt)
3623{
3624 lck_mtx_init(&rt->rt_lock, rte_mtx_grp, rte_mtx_attr);
3625}
3626
3627static void
3628rte_lock_destroy(struct rtentry *rt)
3629{
3630 RT_LOCK_ASSERT_NOTHELD(rt);
3631 lck_mtx_destroy(&rt->rt_lock, rte_mtx_grp);
3632}
3633
3634void
3635rt_lock(struct rtentry *rt, boolean_t spin)
3636{
3637 RT_LOCK_ASSERT_NOTHELD(rt);
3638 if (spin)
3639 lck_mtx_lock_spin(&rt->rt_lock);
3640 else
3641 lck_mtx_lock(&rt->rt_lock);
3642 if (rte_debug & RTD_DEBUG)
3643 rte_lock_debug((struct rtentry_dbg *)rt);
3644}
3645
3646void
3647rt_unlock(struct rtentry *rt)
3648{
b0d623f7
A
3649 if (rte_debug & RTD_DEBUG)
3650 rte_unlock_debug((struct rtentry_dbg *)rt);
3651 lck_mtx_unlock(&rt->rt_lock);
3652
3653}
3654
3655static inline void
3656rte_lock_debug(struct rtentry_dbg *rte)
3657{
3658 uint32_t idx;
3659
39236c6e 3660 RT_LOCK_ASSERT_HELD((struct rtentry *)rte);
b0d623f7
A
3661 idx = atomic_add_32_ov(&rte->rtd_lock_cnt, 1) % CTRACE_HIST_SIZE;
3662 if (rte_debug & RTD_TRACE)
3663 ctrace_record(&rte->rtd_lock[idx]);
3664}
3665
3666static inline void
3667rte_unlock_debug(struct rtentry_dbg *rte)
3668{
3669 uint32_t idx;
3670
39236c6e 3671 RT_LOCK_ASSERT_HELD((struct rtentry *)rte);
b0d623f7
A
3672 idx = atomic_add_32_ov(&rte->rtd_unlock_cnt, 1) % CTRACE_HIST_SIZE;
3673 if (rte_debug & RTD_TRACE)
3674 ctrace_record(&rte->rtd_unlock[idx]);
3675}
3676
3677static struct rtentry *
6601e61a
A
3678rte_alloc(void)
3679{
2d21ac55
A
3680 if (rte_debug & RTD_DEBUG)
3681 return (rte_alloc_debug());
3682
6601e61a
A
3683 return ((struct rtentry *)zalloc(rte_zone));
3684}
3685
b0d623f7 3686static void
6601e61a
A
3687rte_free(struct rtentry *p)
3688{
2d21ac55
A
3689 if (rte_debug & RTD_DEBUG) {
3690 rte_free_debug(p);
3691 return;
3692 }
3693
39236c6e 3694 if (p->rt_refcnt != 0) {
6601e61a 3695 panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt);
39236c6e
A
3696 /* NOTREACHED */
3697 }
3e170ce0 3698
6601e61a
A
3699 zfree(rte_zone, p);
3700}
0c530ab8 3701
d1ecb069
A
3702static void
3703rte_if_ref(struct ifnet *ifp, int cnt)
3704{
3705 struct kev_msg ev_msg;
3706 struct net_event_data ev_data;
3707 uint32_t old;
3708
3709 /* Force cnt to 1 increment/decrement */
39236c6e 3710 if (cnt < -1 || cnt > 1) {
d1ecb069 3711 panic("%s: invalid count argument (%d)", __func__, cnt);
39236c6e
A
3712 /* NOTREACHED */
3713 }
d1ecb069 3714 old = atomic_add_32_ov(&ifp->if_route_refcnt, cnt);
39236c6e 3715 if (cnt < 0 && old == 0) {
d1ecb069 3716 panic("%s: ifp=%p negative route refcnt!", __func__, ifp);
39236c6e
A
3717 /* NOTREACHED */
3718 }
d1ecb069
A
3719 /*
3720 * The following is done without first holding the ifnet lock,
3721 * for performance reasons. The relevant ifnet fields, with
3722 * the exception of the if_idle_flags, are never changed
3723 * during the lifetime of the ifnet. The if_idle_flags
3724 * may possibly be modified, so in the event that the value
3725 * is stale because IFRF_IDLE_NOTIFY was cleared, we'd end up
3726 * sending the event anyway. This is harmless as it is just
3727 * a notification to the monitoring agent in user space, and
3728 * it is expected to check via SIOCGIFGETRTREFCNT again anyway.
3729 */
3730 if ((ifp->if_idle_flags & IFRF_IDLE_NOTIFY) && cnt < 0 && old == 1) {
3731 bzero(&ev_msg, sizeof (ev_msg));
3732 bzero(&ev_data, sizeof (ev_data));
3733
3734 ev_msg.vendor_code = KEV_VENDOR_APPLE;
3735 ev_msg.kev_class = KEV_NETWORK_CLASS;
3736 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3737 ev_msg.event_code = KEV_DL_IF_IDLE_ROUTE_REFCNT;
3738
3739 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3740
3741 ev_data.if_family = ifp->if_family;
3742 ev_data.if_unit = ifp->if_unit;
3743 ev_msg.dv[0].data_length = sizeof (struct net_event_data);
3744 ev_msg.dv[0].data_ptr = &ev_data;
3745
39037602 3746 dlil_post_complete_msg(NULL, &ev_msg);
d1ecb069
A
3747 }
3748}
d1ecb069 3749
2d21ac55
A
3750static inline struct rtentry *
3751rte_alloc_debug(void)
3752{
3753 struct rtentry_dbg *rte;
3754
3755 rte = ((struct rtentry_dbg *)zalloc(rte_zone));
3756 if (rte != NULL) {
3757 bzero(rte, sizeof (*rte));
b0d623f7
A
3758 if (rte_debug & RTD_TRACE)
3759 ctrace_record(&rte->rtd_alloc);
2d21ac55
A
3760 rte->rtd_inuse = RTD_INUSE;
3761 }
3762 return ((struct rtentry *)rte);
3763}
3764
3765static inline void
3766rte_free_debug(struct rtentry *p)
3767{
3768 struct rtentry_dbg *rte = (struct rtentry_dbg *)p;
3769
39236c6e 3770 if (p->rt_refcnt != 0) {
2d21ac55 3771 panic("rte_free: rte=%p refcnt=%d\n", p, p->rt_refcnt);
39236c6e
A
3772 /* NOTREACHED */
3773 }
3774 if (rte->rtd_inuse == RTD_FREED) {
2d21ac55 3775 panic("rte_free: double free rte=%p\n", rte);
39236c6e
A
3776 /* NOTREACHED */
3777 } else if (rte->rtd_inuse != RTD_INUSE) {
2d21ac55 3778 panic("rte_free: corrupted rte=%p\n", rte);
39236c6e
A
3779 /* NOTREACHED */
3780 }
2d21ac55 3781 bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof (*p));
b0d623f7
A
3782 /* Preserve rt_lock to help catch use-after-free cases */
3783 bzero((caddr_t)p, offsetof(struct rtentry, rt_lock));
2d21ac55
A
3784
3785 rte->rtd_inuse = RTD_FREED;
3786
b0d623f7
A
3787 if (rte_debug & RTD_TRACE)
3788 ctrace_record(&rte->rtd_free);
2d21ac55
A
3789
3790 if (!(rte_debug & RTD_NO_FREE))
3791 zfree(rte_zone, p);
3792}
b0d623f7
A
3793
3794void
3795ctrace_record(ctrace_t *tr)
3796{
3797 tr->th = current_thread();
3798 bzero(tr->pc, sizeof (tr->pc));
3799 (void) OSBacktrace(tr->pc, CTRACE_STACK_SIZE);
3800}
6d2010ae 3801
39236c6e
A
3802void
3803route_copyout(struct route *dst, const struct route *src, size_t length)
6d2010ae 3804{
39236c6e 3805 /* Copy everything (rt, srcif, flags, dst) from src */
6d2010ae
A
3806 bcopy(src, dst, length);
3807
3808 /* Hold one reference for the local copy of struct route */
3809 if (dst->ro_rt != NULL)
3810 RT_ADDREF(dst->ro_rt);
39236c6e 3811
5ba3f43e
A
3812 /* Hold one reference for the local copy of struct lle */
3813 if (dst->ro_lle != NULL)
3814 LLE_ADDREF(dst->ro_lle);
3815
39236c6e
A
3816 /* Hold one reference for the local copy of struct ifaddr */
3817 if (dst->ro_srcia != NULL)
3818 IFA_ADDREF(dst->ro_srcia);
6d2010ae
A
3819}
3820
39236c6e
A
3821void
3822route_copyin(struct route *src, struct route *dst, size_t length)
6d2010ae 3823{
5ba3f43e
A
3824 /*
3825 * No cached route at the destination?
3826 * If none, then remove old references if present
3827 * and copy entire src route.
3828 */
6d2010ae 3829 if (dst->ro_rt == NULL) {
5ba3f43e
A
3830 /*
3831 * Ditch the cached link layer reference (dst)
3832 * since we're about to take everything there is in src
3833 */
3834 if (dst->ro_lle != NULL)
3835 LLE_REMREF(dst->ro_lle);
6d2010ae 3836 /*
39236c6e
A
3837 * Ditch the address in the cached copy (dst) since
3838 * we're about to take everything there is in src.
3839 */
3840 if (dst->ro_srcia != NULL)
3841 IFA_REMREF(dst->ro_srcia);
3842 /*
5ba3f43e 3843 * Copy everything (rt, ro_lle, srcia, flags, dst) from src; the
39236c6e
A
3844 * references to rt and/or srcia were held at the time
3845 * of storage and are kept intact.
6d2010ae
A
3846 */
3847 bcopy(src, dst, length);
5ba3f43e
A
3848 goto done;
3849 }
3850
3851 /*
3852 * We know dst->ro_rt is not NULL here.
3853 * If the src->ro_rt is the same, update ro_lle, srcia and flags
3854 * and ditch the route in the local copy.
3855 */
3856 if (dst->ro_rt == src->ro_rt) {
3857 dst->ro_flags = src->ro_flags;
3858
3859 if (dst->ro_lle != src->ro_lle) {
3860 if (dst->ro_lle != NULL)
3861 LLE_REMREF(dst->ro_lle);
3862 dst->ro_lle = src->ro_lle;
3863 } else if (src->ro_lle != NULL) {
3864 LLE_REMREF(src->ro_lle);
3865 }
3866
3867 if (dst->ro_srcia != src->ro_srcia) {
39236c6e
A
3868 if (dst->ro_srcia != NULL)
3869 IFA_REMREF(dst->ro_srcia);
5ba3f43e
A
3870 dst->ro_srcia = src->ro_srcia;
3871 } else if (src->ro_srcia != NULL) {
3872 IFA_REMREF(src->ro_srcia);
6d2010ae 3873 }
5ba3f43e
A
3874 rtfree(src->ro_rt);
3875 goto done;
3876 }
3877
3878 /*
3879 * If they are dst's ro_rt is not equal to src's,
3880 * and src'd rt is not NULL, then remove old references
3881 * if present and copy entire src route.
3882 */
3883 if (src->ro_rt != NULL) {
3884 rtfree(dst->ro_rt);
3885
3886 if (dst->ro_lle != NULL)
3887 LLE_REMREF(dst->ro_lle);
3888 if (dst->ro_srcia != NULL)
3889 IFA_REMREF(dst->ro_srcia);
3890 bcopy(src, dst, length);
3891 goto done;
3892 }
3893
3894 /*
3895 * Here, dst's cached route is not NULL but source's is.
3896 * Just get rid of all the other cached reference in src.
3897 */
3898 if (src->ro_srcia != NULL) {
39236c6e
A
3899 /*
3900 * Ditch src address in the local copy (src) since we're
3901 * not caching the route entry anyway (ro_rt is NULL).
3902 */
3903 IFA_REMREF(src->ro_srcia);
6d2010ae 3904 }
5ba3f43e
A
3905 if (src->ro_lle != NULL) {
3906 /*
3907 * Ditch cache lle in the local copy (src) since we're
3908 * not caching the route anyway (ro_rt is NULL).
3909 */
3910 LLE_REMREF(src->ro_lle);
3911 }
3912done:
39236c6e 3913 /* This function consumes the references on src */
5ba3f43e 3914 src->ro_lle = NULL;
6d2010ae 3915 src->ro_rt = NULL;
39236c6e 3916 src->ro_srcia = NULL;
6d2010ae 3917}
316670eb
A
3918
3919/*
3920 * route_to_gwroute will find the gateway route for a given route.
3921 *
3922 * If the route is down, look the route up again.
3923 * If the route goes through a gateway, get the route to the gateway.
3924 * If the gateway route is down, look it up again.
3925 * If the route is set to reject, verify it hasn't expired.
3926 *
3927 * If the returned route is non-NULL, the caller is responsible for
3928 * releasing the reference and unlocking the route.
3929 */
39236c6e 3930#define senderr(e) { error = (e); goto bad; }
316670eb
A
3931errno_t
3932route_to_gwroute(const struct sockaddr *net_dest, struct rtentry *hint0,
39236c6e 3933 struct rtentry **out_route)
316670eb
A
3934{
3935 uint64_t timenow;
3936 struct rtentry *rt = hint0, *hint = hint0;
3937 errno_t error = 0;
3938 unsigned int ifindex;
3939 boolean_t gwroute;
3940
3941 *out_route = NULL;
3942
3943 if (rt == NULL)
3944 return (0);
3945
3946 /*
3947 * Next hop determination. Because we may involve the gateway route
3948 * in addition to the original route, locking is rather complicated.
3949 * The general concept is that regardless of whether the route points
3950 * to the original route or to the gateway route, this routine takes
3951 * an extra reference on such a route. This extra reference will be
3952 * released at the end.
3953 *
3954 * Care must be taken to ensure that the "hint0" route never gets freed
3955 * via rtfree(), since the caller may have stored it inside a struct
3956 * route with a reference held for that placeholder.
3957 */
3958 RT_LOCK_SPIN(rt);
3959 ifindex = rt->rt_ifp->if_index;
3960 RT_ADDREF_LOCKED(rt);
3961 if (!(rt->rt_flags & RTF_UP)) {
3962 RT_REMREF_LOCKED(rt);
3963 RT_UNLOCK(rt);
3964 /* route is down, find a new one */
3965 hint = rt = rtalloc1_scoped((struct sockaddr *)
3966 (size_t)net_dest, 1, 0, ifindex);
3967 if (hint != NULL) {
3968 RT_LOCK_SPIN(rt);
3969 ifindex = rt->rt_ifp->if_index;
3970 } else {
3971 senderr(EHOSTUNREACH);
3972 }
3973 }
3974
3975 /*
3976 * We have a reference to "rt" by now; it will either
3977 * be released or freed at the end of this routine.
3978 */
3979 RT_LOCK_ASSERT_HELD(rt);
3980 if ((gwroute = (rt->rt_flags & RTF_GATEWAY))) {
3981 struct rtentry *gwrt = rt->rt_gwroute;
3982 struct sockaddr_storage ss;
3983 struct sockaddr *gw = (struct sockaddr *)&ss;
3984
3985 VERIFY(rt == hint);
3986 RT_ADDREF_LOCKED(hint);
3987
3988 /* If there's no gateway rt, look it up */
3989 if (gwrt == NULL) {
3990 bcopy(rt->rt_gateway, gw, MIN(sizeof (ss),
3991 rt->rt_gateway->sa_len));
3992 RT_UNLOCK(rt);
3993 goto lookup;
3994 }
3995 /* Become a regular mutex */
3996 RT_CONVERT_LOCK(rt);
3997
3998 /*
3999 * Take gwrt's lock while holding route's lock;
4000 * this is okay since gwrt never points back
4001 * to "rt", so no lock ordering issues.
4002 */
4003 RT_LOCK_SPIN(gwrt);
4004 if (!(gwrt->rt_flags & RTF_UP)) {
4005 rt->rt_gwroute = NULL;
4006 RT_UNLOCK(gwrt);
4007 bcopy(rt->rt_gateway, gw, MIN(sizeof (ss),
4008 rt->rt_gateway->sa_len));
4009 RT_UNLOCK(rt);
4010 rtfree(gwrt);
4011lookup:
4012 lck_mtx_lock(rnh_lock);
4013 gwrt = rtalloc1_scoped_locked(gw, 1, 0, ifindex);
4014
4015 RT_LOCK(rt);
4016 /*
4017 * Bail out if the route is down, no route
4018 * to gateway, circular route, or if the
4019 * gateway portion of "rt" has changed.
4020 */
4021 if (!(rt->rt_flags & RTF_UP) || gwrt == NULL ||
4022 gwrt == rt || !equal(gw, rt->rt_gateway)) {
4023 if (gwrt == rt) {
4024 RT_REMREF_LOCKED(gwrt);
4025 gwrt = NULL;
4026 }
4027 VERIFY(rt == hint);
4028 RT_REMREF_LOCKED(hint);
4029 hint = NULL;
4030 RT_UNLOCK(rt);
4031 if (gwrt != NULL)
4032 rtfree_locked(gwrt);
4033 lck_mtx_unlock(rnh_lock);
4034 senderr(EHOSTUNREACH);
4035 }
4036 VERIFY(gwrt != NULL);
4037 /*
4038 * Set gateway route; callee adds ref to gwrt;
4039 * gwrt has an extra ref from rtalloc1() for
4040 * this routine.
4041 */
4042 rt_set_gwroute(rt, rt_key(rt), gwrt);
4043 VERIFY(rt == hint);
4044 RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */
4045 RT_UNLOCK(rt);
4046 lck_mtx_unlock(rnh_lock);
4047 rt = gwrt;
4048 } else {
4049 RT_ADDREF_LOCKED(gwrt);
4050 RT_UNLOCK(gwrt);
4051 VERIFY(rt == hint);
4052 RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */
4053 RT_UNLOCK(rt);
4054 rt = gwrt;
4055 }
4056 VERIFY(rt == gwrt && rt != hint);
4057
4058 /*
4059 * This is an opportunity to revalidate the parent route's
4060 * rt_gwroute, in case it now points to a dead route entry.
4061 * Parent route won't go away since the clone (hint) holds
4062 * a reference to it. rt == gwrt.
4063 */
4064 RT_LOCK_SPIN(hint);
4065 if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) ==
4066 (RTF_WASCLONED | RTF_UP)) {
4067 struct rtentry *prt = hint->rt_parent;
4068 VERIFY(prt != NULL);
4069
4070 RT_CONVERT_LOCK(hint);
4071 RT_ADDREF(prt);
4072 RT_UNLOCK(hint);
4073 rt_revalidate_gwroute(prt, rt);
4074 RT_REMREF(prt);
4075 } else {
4076 RT_UNLOCK(hint);
4077 }
4078
4079 /* Clean up "hint" now; see notes above regarding hint0 */
4080 if (hint == hint0)
4081 RT_REMREF(hint);
4082 else
4083 rtfree(hint);
4084 hint = NULL;
4085
4086 /* rt == gwrt; if it is now down, give up */
4087 RT_LOCK_SPIN(rt);
4088 if (!(rt->rt_flags & RTF_UP)) {
4089 RT_UNLOCK(rt);
4090 senderr(EHOSTUNREACH);
4091 }
4092 }
4093
4094 if (rt->rt_flags & RTF_REJECT) {
4095 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
4096 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
4097 timenow = net_uptime();
4098 if (rt->rt_expire == 0 || timenow < rt->rt_expire) {
4099 RT_UNLOCK(rt);
4100 senderr(!gwroute ? EHOSTDOWN : EHOSTUNREACH);
4101 }
4102 }
4103
4104 /* Become a regular mutex */
4105 RT_CONVERT_LOCK(rt);
4106
4107 /* Caller is responsible for cleaning up "rt" */
4108 *out_route = rt;
4109 return (0);
4110
4111bad:
4112 /* Clean up route (either it is "rt" or "gwrt") */
4113 if (rt != NULL) {
4114 RT_LOCK_SPIN(rt);
4115 if (rt == hint0) {
4116 RT_REMREF_LOCKED(rt);
4117 RT_UNLOCK(rt);
4118 } else {
4119 RT_UNLOCK(rt);
4120 rtfree(rt);
4121 }
4122 }
4123 return (error);
4124}
4125#undef senderr
4126
4127void
4128rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt)
4129{
316670eb
A
4130 VERIFY(gwrt != NULL);
4131
4132 RT_LOCK_SPIN(rt);
4133 if ((rt->rt_flags & (RTF_GATEWAY | RTF_UP)) == (RTF_GATEWAY | RTF_UP) &&
4134 rt->rt_ifp == gwrt->rt_ifp && rt->rt_gateway->sa_family ==
4135 rt_key(gwrt)->sa_family && (rt->rt_gwroute == NULL ||
4136 !(rt->rt_gwroute->rt_flags & RTF_UP))) {
4137 boolean_t isequal;
fe8ab488 4138 VERIFY(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING));
316670eb
A
4139
4140 if (rt->rt_gateway->sa_family == AF_INET ||
4141 rt->rt_gateway->sa_family == AF_INET6) {
4142 struct sockaddr_storage key_ss, gw_ss;
4143 /*
4144 * We need to compare rt_key and rt_gateway; create
4145 * local copies to get rid of any ifscope association.
4146 */
4147 (void) sa_copy(rt_key(gwrt), &key_ss, NULL);
4148 (void) sa_copy(rt->rt_gateway, &gw_ss, NULL);
4149
4150 isequal = equal(SA(&key_ss), SA(&gw_ss));
4151 } else {
4152 isequal = equal(rt_key(gwrt), rt->rt_gateway);
4153 }
4154
4155 /* If they are the same, update gwrt */
4156 if (isequal) {
4157 RT_UNLOCK(rt);
4158 lck_mtx_lock(rnh_lock);
4159 RT_LOCK(rt);
4160 rt_set_gwroute(rt, rt_key(rt), gwrt);
4161 RT_UNLOCK(rt);
4162 lck_mtx_unlock(rnh_lock);
4163 } else {
4164 RT_UNLOCK(rt);
4165 }
4166 } else {
4167 RT_UNLOCK(rt);
4168 }
4169}
39236c6e
A
4170
4171static void
4172rt_str4(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
4173{
4174 VERIFY(rt_key(rt)->sa_family == AF_INET);
4175
3e170ce0 4176 if (ds != NULL) {
39236c6e
A
4177 (void) inet_ntop(AF_INET,
4178 &SIN(rt_key(rt))->sin_addr.s_addr, ds, dslen);
3e170ce0
A
4179 if (dslen >= MAX_SCOPE_ADDR_STR_LEN &&
4180 SINIFSCOPE(rt_key(rt))->sin_scope_id != IFSCOPE_NONE) {
4181 char scpstr[16];
4182
4183 snprintf(scpstr, sizeof(scpstr), "@%u",
4184 SINIFSCOPE(rt_key(rt))->sin_scope_id);
4185
4186 strlcat(ds, scpstr, dslen);
4187 }
4188 }
4189
39236c6e
A
4190 if (gs != NULL) {
4191 if (rt->rt_flags & RTF_GATEWAY) {
4192 (void) inet_ntop(AF_INET,
4193 &SIN(rt->rt_gateway)->sin_addr.s_addr, gs, gslen);
4194 } else if (rt->rt_ifp != NULL) {
4195 snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit);
4196 } else {
4197 snprintf(gs, gslen, "%s", "link");
4198 }
4199 }
4200}
4201
4202#if INET6
4203static void
4204rt_str6(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
4205{
4206 VERIFY(rt_key(rt)->sa_family == AF_INET6);
4207
3e170ce0 4208 if (ds != NULL) {
39236c6e
A
4209 (void) inet_ntop(AF_INET6,
4210 &SIN6(rt_key(rt))->sin6_addr, ds, dslen);
3e170ce0
A
4211 if (dslen >= MAX_SCOPE_ADDR_STR_LEN &&
4212 SIN6IFSCOPE(rt_key(rt))->sin6_scope_id != IFSCOPE_NONE) {
4213 char scpstr[16];
4214
4215 snprintf(scpstr, sizeof(scpstr), "@%u",
4216 SIN6IFSCOPE(rt_key(rt))->sin6_scope_id);
4217
4218 strlcat(ds, scpstr, dslen);
4219 }
4220 }
4221
39236c6e
A
4222 if (gs != NULL) {
4223 if (rt->rt_flags & RTF_GATEWAY) {
4224 (void) inet_ntop(AF_INET6,
4225 &SIN6(rt->rt_gateway)->sin6_addr, gs, gslen);
4226 } else if (rt->rt_ifp != NULL) {
4227 snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit);
4228 } else {
4229 snprintf(gs, gslen, "%s", "link");
4230 }
4231 }
4232}
4233#endif /* INET6 */
4234
4235
4236void
4237rt_str(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
4238{
4239 switch (rt_key(rt)->sa_family) {
4240 case AF_INET:
4241 rt_str4(rt, ds, dslen, gs, gslen);
4242 break;
4243#if INET6
4244 case AF_INET6:
4245 rt_str6(rt, ds, dslen, gs, gslen);
4246 break;
4247#endif /* INET6 */
4248 default:
4249 if (ds != NULL)
4250 bzero(ds, dslen);
4251 if (gs != NULL)
4252 bzero(gs, gslen);
4253 break;
4254 }
4255}
5ba3f43e
A
4256
4257void route_event_init(struct route_event *p_route_ev, struct rtentry *rt,
4258 struct rtentry *gwrt, int route_ev_code)
4259{
4260 VERIFY(p_route_ev != NULL);
4261 bzero(p_route_ev, sizeof(*p_route_ev));
4262
4263 p_route_ev->rt = rt;
4264 p_route_ev->gwrt = gwrt;
4265 p_route_ev->route_event_code = route_ev_code;
4266}
4267
4268static void
4269route_event_callback(void *arg)
4270{
4271 struct route_event *p_rt_ev = (struct route_event *)arg;
4272 struct rtentry *rt = p_rt_ev->rt;
4273 eventhandler_tag evtag = p_rt_ev->evtag;
4274 int route_ev_code = p_rt_ev->route_event_code;
4275
4276 if (route_ev_code == ROUTE_EVHDLR_DEREGISTER) {
4277 VERIFY(evtag != NULL);
4278 EVENTHANDLER_DEREGISTER(&rt->rt_evhdlr_ctxt, route_event,
4279 evtag);
4280 rtfree(rt);
4281 return;
4282 }
4283
4284 EVENTHANDLER_INVOKE(&rt->rt_evhdlr_ctxt, route_event, rt_key(rt),
4285 route_ev_code, (struct sockaddr *)&p_rt_ev->rt_addr,
4286 rt->rt_flags);
4287
4288 /* The code enqueuing the route event held a reference */
4289 rtfree(rt);
4290 /* XXX No reference is taken on gwrt */
4291}
4292
4293int
4294route_event_walktree(struct radix_node *rn, void *arg)
4295{
4296 struct route_event *p_route_ev = (struct route_event *)arg;
4297 struct rtentry *rt = (struct rtentry *)rn;
4298 struct rtentry *gwrt = p_route_ev->rt;
4299
4300 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
4301
4302 RT_LOCK(rt);
4303
4304 /* Return if the entry is pending cleanup */
4305 if (rt->rt_flags & RTPRF_OURS) {
4306 RT_UNLOCK(rt);
4307 return (0);
4308 }
4309
4310 /* Return if it is not an indirect route */
4311 if (!(rt->rt_flags & RTF_GATEWAY)) {
4312 RT_UNLOCK(rt);
4313 return (0);
4314 }
4315
4316 if (rt->rt_gwroute != gwrt) {
4317 RT_UNLOCK(rt);
4318 return (0);
4319 }
4320
4321 route_event_enqueue_nwk_wq_entry(rt, gwrt, p_route_ev->route_event_code,
4322 NULL, TRUE);
4323 RT_UNLOCK(rt);
4324
4325 return (0);
4326}
4327
4328struct route_event_nwk_wq_entry
4329{
4330 struct nwk_wq_entry nwk_wqe;
4331 struct route_event rt_ev_arg;
4332};
4333
4334void
4335route_event_enqueue_nwk_wq_entry(struct rtentry *rt, struct rtentry *gwrt,
4336 uint32_t route_event_code, eventhandler_tag evtag, boolean_t rt_locked)
4337{
4338 struct route_event_nwk_wq_entry *p_rt_ev = NULL;
4339 struct sockaddr *p_gw_saddr = NULL;
4340
4341 MALLOC(p_rt_ev, struct route_event_nwk_wq_entry *,
4342 sizeof(struct route_event_nwk_wq_entry),
4343 M_NWKWQ, M_WAITOK | M_ZERO);
4344
4345 /*
4346 * If the intent is to de-register, don't take
4347 * reference, route event registration already takes
4348 * a reference on route.
4349 */
4350 if (route_event_code != ROUTE_EVHDLR_DEREGISTER) {
4351 /* The reference is released by route_event_callback */
4352 if (rt_locked)
4353 RT_ADDREF_LOCKED(rt);
4354 else
4355 RT_ADDREF(rt);
4356 }
4357
4358 p_rt_ev->rt_ev_arg.rt = rt;
4359 p_rt_ev->rt_ev_arg.gwrt = gwrt;
4360 p_rt_ev->rt_ev_arg.evtag = evtag;
4361
4362 if (gwrt != NULL)
4363 p_gw_saddr = gwrt->rt_gateway;
4364 else
4365 p_gw_saddr = rt->rt_gateway;
4366
4367 VERIFY(p_gw_saddr->sa_len <= sizeof(p_rt_ev->rt_ev_arg.rt_addr));
4368 bcopy(p_gw_saddr, &(p_rt_ev->rt_ev_arg.rt_addr), p_gw_saddr->sa_len);
4369
4370 p_rt_ev->rt_ev_arg.route_event_code = route_event_code;
4371 p_rt_ev->nwk_wqe.func = route_event_callback;
4372 p_rt_ev->nwk_wqe.is_arg_managed = TRUE;
4373 p_rt_ev->nwk_wqe.arg = &p_rt_ev->rt_ev_arg;
4374 nwk_wq_enqueue((struct nwk_wq_entry*)p_rt_ev);
4375}
4376
4377const char *
4378route_event2str(int route_event)
4379{
4380 const char *route_event_str = "ROUTE_EVENT_UNKNOWN";
4381 switch (route_event) {
4382 case ROUTE_STATUS_UPDATE:
4383 route_event_str = "ROUTE_STATUS_UPDATE";
4384 break;
4385 case ROUTE_ENTRY_REFRESH:
4386 route_event_str = "ROUTE_ENTRY_REFRESH";
4387 break;
4388 case ROUTE_ENTRY_DELETED:
4389 route_event_str = "ROUTE_ENTRY_DELETED";
4390 break;
4391 case ROUTE_LLENTRY_RESOLVED:
4392 route_event_str = "ROUTE_LLENTRY_RESOLVED";
4393 break;
4394 case ROUTE_LLENTRY_UNREACH:
4395 route_event_str = "ROUTE_LLENTRY_UNREACH";
4396 break;
4397 case ROUTE_LLENTRY_CHANGED:
4398 route_event_str = "ROUTE_LLENTRY_CHANGED";
4399 break;
4400 case ROUTE_LLENTRY_STALE:
4401 route_event_str = "ROUTE_LLENTRY_STALE";
4402 break;
4403 case ROUTE_LLENTRY_TIMEDOUT:
4404 route_event_str = "ROUTE_LLENTRY_TIMEDOUT";
4405 break;
4406 case ROUTE_LLENTRY_DELETED:
4407 route_event_str = "ROUTE_LLENTRY_DELETED";
4408 break;
4409 case ROUTE_LLENTRY_EXPIRED:
4410 route_event_str = "ROUTE_LLENTRY_EXPIRED";
4411 break;
4412 case ROUTE_LLENTRY_PROBED:
4413 route_event_str = "ROUTE_LLENTRY_PROBED";
4414 break;
4415 case ROUTE_EVHDLR_DEREGISTER:
4416 route_event_str = "ROUTE_EVHDLR_DEREGISTER";
4417 break;
4418 default:
4419 /* Init'd to ROUTE_EVENT_UNKNOWN */
4420 break;
4421 }
4422 return route_event_str;
4423}
4424
a39ff7e2
A
4425int
4426route_op_entitlement_check(struct socket *so,
4427 kauth_cred_t cred,
4428 int route_op_type,
4429 boolean_t allow_root)
4430{
4431 if (so != NULL) {
4432 if (route_op_type == ROUTE_OP_READ) {
4433 /*
4434 * If needed we can later extend this for more
4435 * granular entitlements and return a bit set of
4436 * allowed accesses.
4437 */
4438 if (soopt_cred_check(so, PRIV_NET_RESTRICTED_ROUTE_NC_READ,
cb323159 4439 allow_root, false) == 0)
a39ff7e2
A
4440 return (0);
4441 else
4442 return (-1);
4443 }
4444 } else if (cred != NULL) {
4445 uid_t uid = kauth_cred_getuid(cred);
4446
4447 /* uid is 0 for root */
4448 if (uid != 0 || !allow_root) {
4449 if (route_op_type == ROUTE_OP_READ) {
4450 if (priv_check_cred(cred,
4451 PRIV_NET_RESTRICTED_ROUTE_NC_READ, 0) == 0)
4452 return (0);
4453 else
4454 return (-1);
4455 }
4456 }
4457 }
4458 return (-1);
4459}