]> git.saurik.com Git - apple/xnu.git/blob - bsd/net/route.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / bsd / net / route.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1980, 1986, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)route.c 8.2 (Berkeley) 11/15/93
61 * $FreeBSD: src/sys/net/route.c,v 1.59.2.3 2001/07/29 19:18:02 ume Exp $
62 */
63
64 #include <sys/param.h>
65 #include <sys/sysctl.h>
66 #include <sys/systm.h>
67 #include <sys/malloc.h>
68 #include <sys/mbuf.h>
69 #include <sys/socket.h>
70 #include <sys/domain.h>
71 #include <sys/stat.h>
72 #include <sys/ubc.h>
73 #include <sys/vnode.h>
74 #include <sys/syslog.h>
75 #include <sys/queue.h>
76 #include <sys/mcache.h>
77 #include <sys/priv.h>
78 #include <sys/protosw.h>
79 #include <sys/sdt.h>
80 #include <sys/kernel.h>
81 #include <kern/locks.h>
82 #include <kern/zalloc.h>
83
84 #include <net/dlil.h>
85 #include <net/if.h>
86 #include <net/route.h>
87 #include <net/ntstat.h>
88 #include <net/nwk_wq.h>
89 #if NECP
90 #include <net/necp.h>
91 #endif /* NECP */
92
93 #include <netinet/in.h>
94 #include <netinet/in_var.h>
95 #include <netinet/ip_var.h>
96 #include <netinet/ip.h>
97 #include <netinet/ip6.h>
98 #include <netinet/in_arp.h>
99
100 #if INET6
101 #include <netinet6/ip6_var.h>
102 #include <netinet6/in6_var.h>
103 #include <netinet6/nd6.h>
104 #endif /* INET6 */
105
106 #include <net/if_dl.h>
107
108 #include <libkern/OSAtomic.h>
109 #include <libkern/OSDebug.h>
110
111 #include <pexpert/pexpert.h>
112
113 #if CONFIG_MACF
114 #include <sys/kauth.h>
115 #endif
116
117 /*
118 * Synchronization notes:
119 *
120 * Routing entries fall under two locking domains: the global routing table
121 * lock (rnh_lock) and the per-entry lock (rt_lock); the latter is a mutex that
122 * resides (statically defined) in the rtentry structure.
123 *
124 * The locking domains for routing are defined as follows:
125 *
126 * The global routing lock is used to serialize all accesses to the radix
127 * trees defined by rt_tables[], as well as the tree of masks. This includes
128 * lookups, insertions and removals of nodes to/from the respective tree.
129 * It is also used to protect certain fields in the route entry that aren't
130 * often modified and/or require global serialization (more details below.)
131 *
132 * The per-route entry lock is used to serialize accesses to several routing
133 * entry fields (more details below.) Acquiring and releasing this lock is
134 * done via RT_LOCK() and RT_UNLOCK() routines.
135 *
136 * In cases where both rnh_lock and rt_lock must be held, the former must be
137 * acquired first in order to maintain lock ordering. It is not a requirement
138 * that rnh_lock be acquired first before rt_lock, but in case both must be
139 * acquired in succession, the correct lock ordering must be followed.
140 *
141 * The fields of the rtentry structure are protected in the following way:
142 *
143 * rt_nodes[]
144 *
145 * - Routing table lock (rnh_lock).
146 *
147 * rt_parent, rt_mask, rt_llinfo_free, rt_tree_genid
148 *
149 * - Set once during creation and never changes; no locks to read.
150 *
151 * rt_flags, rt_genmask, rt_llinfo, rt_rmx, rt_refcnt, rt_gwroute
152 *
153 * - Routing entry lock (rt_lock) for read/write access.
154 *
155 * - Some values of rt_flags are either set once at creation time,
156 * or aren't currently used, and thus checking against them can
157 * be done without rt_lock: RTF_GATEWAY, RTF_HOST, RTF_DYNAMIC,
158 * RTF_DONE, RTF_XRESOLVE, RTF_STATIC, RTF_BLACKHOLE, RTF_ANNOUNCE,
159 * RTF_USETRAILERS, RTF_WASCLONED, RTF_PINNED, RTF_LOCAL,
160 * RTF_BROADCAST, RTF_MULTICAST, RTF_IFSCOPE, RTF_IFREF.
161 *
162 * rt_key, rt_gateway, rt_ifp, rt_ifa
163 *
164 * - Always written/modified with both rnh_lock and rt_lock held.
165 *
166 * - May be read freely with rnh_lock held, else must hold rt_lock
167 * for read access; holding both locks for read is also okay.
168 *
169 * - In the event rnh_lock is not acquired, or is not possible to be
170 * acquired across the operation, setting RTF_CONDEMNED on a route
171 * entry will prevent its rt_key, rt_gateway, rt_ifp and rt_ifa
172 * from being modified. This is typically done on a route that
173 * has been chosen for a removal (from the tree) prior to dropping
174 * the rt_lock, so that those values will remain the same until
175 * the route is freed.
176 *
177 * When rnh_lock is held rt_setgate(), rt_setif(), and rtsetifa() are
178 * single-threaded, thus exclusive. This flag will also prevent the
179 * route from being looked up via rt_lookup().
180 *
181 * rt_genid
182 *
183 * - Assumes that 32-bit writes are atomic; no locks.
184 *
185 * rt_dlt, rt_output
186 *
187 * - Currently unused; no locks.
188 *
189 * Operations on a route entry can be described as follows:
190 *
191 * CREATE an entry with reference count set to 0 as part of RTM_ADD/RESOLVE.
192 *
193 * INSERTION of an entry into the radix tree holds the rnh_lock, checks
194 * for duplicates and then adds the entry. rtrequest returns the entry
195 * after bumping up the reference count to 1 (for the caller).
196 *
197 * LOOKUP of an entry holds the rnh_lock and bumps up the reference count
198 * before returning; it is valid to also bump up the reference count using
199 * RT_ADDREF after the lookup has returned an entry.
200 *
201 * REMOVAL of an entry from the radix tree holds the rnh_lock, removes the
202 * entry but does not decrement the reference count. Removal happens when
203 * the route is explicitly deleted (RTM_DELETE) or when it is in the cached
204 * state and it expires. The route is said to be "down" when it is no
205 * longer present in the tree. Freeing the entry will happen on the last
206 * reference release of such a "down" route.
207 *
208 * RT_ADDREF/RT_REMREF operates on the routing entry which increments/
209 * decrements the reference count, rt_refcnt, atomically on the rtentry.
210 * rt_refcnt is modified only using this routine. The general rule is to
211 * do RT_ADDREF in the function that is passing the entry as an argument,
212 * in order to prevent the entry from being freed by the callee.
213 */
214
215 #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
216
217 extern void kdp_set_gateway_mac(void *gatewaymac);
218
219 __private_extern__ struct rtstat rtstat = {
220 .rts_badredirect = 0,
221 .rts_dynamic = 0,
222 .rts_newgateway = 0,
223 .rts_unreach = 0,
224 .rts_wildcard = 0,
225 .rts_badrtgwroute = 0
226 };
227 struct radix_node_head *rt_tables[AF_MAX+1];
228
229 decl_lck_mtx_data(, rnh_lock_data); /* global routing tables mutex */
230 lck_mtx_t *rnh_lock = &rnh_lock_data;
231 static lck_attr_t *rnh_lock_attr;
232 static lck_grp_t *rnh_lock_grp;
233 static lck_grp_attr_t *rnh_lock_grp_attr;
234
235 /* Lock group and attribute for routing entry locks */
236 static lck_attr_t *rte_mtx_attr;
237 static lck_grp_t *rte_mtx_grp;
238 static lck_grp_attr_t *rte_mtx_grp_attr;
239
240 int rttrash = 0; /* routes not in table but not freed */
241
242 boolean_t trigger_v6_defrtr_select = FALSE;
243 unsigned int rte_debug = 0;
244
245 /* Possible flags for rte_debug */
246 #define RTD_DEBUG 0x1 /* enable or disable rtentry debug facility */
247 #define RTD_TRACE 0x2 /* trace alloc, free, refcnt and lock */
248 #define RTD_NO_FREE 0x4 /* don't free (good to catch corruptions) */
249
250 #define RTE_NAME "rtentry" /* name for zone and rt_lock */
251
252 static struct zone *rte_zone; /* special zone for rtentry */
253 #define RTE_ZONE_MAX 65536 /* maximum elements in zone */
254 #define RTE_ZONE_NAME RTE_NAME /* name of rtentry zone */
255
256 #define RTD_INUSE 0xFEEDFACE /* entry is in use */
257 #define RTD_FREED 0xDEADBEEF /* entry is freed */
258
259 #define MAX_SCOPE_ADDR_STR_LEN (MAX_IPv6_STR_LEN + 6)
260
261 /* For gdb */
262 __private_extern__ unsigned int ctrace_stack_size = CTRACE_STACK_SIZE;
263 __private_extern__ unsigned int ctrace_hist_size = CTRACE_HIST_SIZE;
264
265 /*
266 * Debug variant of rtentry structure.
267 */
268 struct rtentry_dbg {
269 struct rtentry rtd_entry; /* rtentry */
270 struct rtentry rtd_entry_saved; /* saved rtentry */
271 uint32_t rtd_inuse; /* in use pattern */
272 uint16_t rtd_refhold_cnt; /* # of rtref */
273 uint16_t rtd_refrele_cnt; /* # of rtunref */
274 uint32_t rtd_lock_cnt; /* # of locks */
275 uint32_t rtd_unlock_cnt; /* # of unlocks */
276 /*
277 * Alloc and free callers.
278 */
279 ctrace_t rtd_alloc;
280 ctrace_t rtd_free;
281 /*
282 * Circular lists of rtref and rtunref callers.
283 */
284 ctrace_t rtd_refhold[CTRACE_HIST_SIZE];
285 ctrace_t rtd_refrele[CTRACE_HIST_SIZE];
286 /*
287 * Circular lists of locks and unlocks.
288 */
289 ctrace_t rtd_lock[CTRACE_HIST_SIZE];
290 ctrace_t rtd_unlock[CTRACE_HIST_SIZE];
291 /*
292 * Trash list linkage
293 */
294 TAILQ_ENTRY(rtentry_dbg) rtd_trash_link;
295 };
296
297 /* List of trash route entries protected by rnh_lock */
298 static TAILQ_HEAD(, rtentry_dbg) rttrash_head;
299
300 static void rte_lock_init(struct rtentry *);
301 static void rte_lock_destroy(struct rtentry *);
302 static inline struct rtentry *rte_alloc_debug(void);
303 static inline void rte_free_debug(struct rtentry *);
304 static inline void rte_lock_debug(struct rtentry_dbg *);
305 static inline void rte_unlock_debug(struct rtentry_dbg *);
306 static void rt_maskedcopy(const struct sockaddr *,
307 struct sockaddr *, const struct sockaddr *);
308 static void rtable_init(void **);
309 static inline void rtref_audit(struct rtentry_dbg *);
310 static inline void rtunref_audit(struct rtentry_dbg *);
311 static struct rtentry *rtalloc1_common_locked(struct sockaddr *, int, uint32_t,
312 unsigned int);
313 static int rtrequest_common_locked(int, struct sockaddr *,
314 struct sockaddr *, struct sockaddr *, int, struct rtentry **,
315 unsigned int);
316 static struct rtentry *rtalloc1_locked(struct sockaddr *, int, uint32_t);
317 static void rtalloc_ign_common_locked(struct route *, uint32_t, unsigned int);
318 static inline void sin6_set_ifscope(struct sockaddr *, unsigned int);
319 static inline void sin6_set_embedded_ifscope(struct sockaddr *, unsigned int);
320 static inline unsigned int sin6_get_embedded_ifscope(struct sockaddr *);
321 static struct sockaddr *ma_copy(int, struct sockaddr *,
322 struct sockaddr_storage *, unsigned int);
323 static struct sockaddr *sa_trim(struct sockaddr *, int);
324 static struct radix_node *node_lookup(struct sockaddr *, struct sockaddr *,
325 unsigned int);
326 static struct radix_node *node_lookup_default(int);
327 static struct rtentry *rt_lookup_common(boolean_t, boolean_t, struct sockaddr *,
328 struct sockaddr *, struct radix_node_head *, unsigned int);
329 static int rn_match_ifscope(struct radix_node *, void *);
330 static struct ifaddr *ifa_ifwithroute_common_locked(int,
331 const struct sockaddr *, const struct sockaddr *, unsigned int);
332 static struct rtentry *rte_alloc(void);
333 static void rte_free(struct rtentry *);
334 static void rtfree_common(struct rtentry *, boolean_t);
335 static void rte_if_ref(struct ifnet *, int);
336 static void rt_set_idleref(struct rtentry *);
337 static void rt_clear_idleref(struct rtentry *);
338 static void route_event_callback(void *);
339 static void rt_str4(struct rtentry *, char *, uint32_t, char *, uint32_t);
340 #if INET6
341 static void rt_str6(struct rtentry *, char *, uint32_t, char *, uint32_t);
342 #endif /* INET6 */
343
344 uint32_t route_genid_inet = 0;
345 #if INET6
346 uint32_t route_genid_inet6 = 0;
347 #endif /* INET6 */
348
349 #define ASSERT_SINIFSCOPE(sa) { \
350 if ((sa)->sa_family != AF_INET || \
351 (sa)->sa_len < sizeof (struct sockaddr_in)) \
352 panic("%s: bad sockaddr_in %p\n", __func__, sa); \
353 }
354
355 #define ASSERT_SIN6IFSCOPE(sa) { \
356 if ((sa)->sa_family != AF_INET6 || \
357 (sa)->sa_len < sizeof (struct sockaddr_in6)) \
358 panic("%s: bad sockaddr_in6 %p\n", __func__, sa); \
359 }
360
361 /*
362 * Argument to leaf-matching routine; at present it is scoped routing
363 * specific but can be expanded in future to include other search filters.
364 */
365 struct matchleaf_arg {
366 unsigned int ifscope; /* interface scope */
367 };
368
369 /*
370 * For looking up the non-scoped default route (sockaddr instead
371 * of sockaddr_in for convenience).
372 */
373 static struct sockaddr sin_def = {
374 .sa_len = sizeof (struct sockaddr_in),
375 .sa_family = AF_INET,
376 .sa_data = { 0, }
377 };
378
379 static struct sockaddr_in6 sin6_def = {
380 .sin6_len = sizeof (struct sockaddr_in6),
381 .sin6_family = AF_INET6,
382 .sin6_port = 0,
383 .sin6_flowinfo = 0,
384 .sin6_addr = IN6ADDR_ANY_INIT,
385 .sin6_scope_id = 0
386 };
387
388 /*
389 * Interface index (scope) of the primary interface; determined at
390 * the time when the default, non-scoped route gets added, changed
391 * or deleted. Protected by rnh_lock.
392 */
393 static unsigned int primary_ifscope = IFSCOPE_NONE;
394 static unsigned int primary6_ifscope = IFSCOPE_NONE;
395
396 #define INET_DEFAULT(sa) \
397 ((sa)->sa_family == AF_INET && SIN(sa)->sin_addr.s_addr == 0)
398
399 #define INET6_DEFAULT(sa) \
400 ((sa)->sa_family == AF_INET6 && \
401 IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa)->sin6_addr))
402
403 #define SA_DEFAULT(sa) (INET_DEFAULT(sa) || INET6_DEFAULT(sa))
404 #define RT(r) ((struct rtentry *)r)
405 #define RN(r) ((struct radix_node *)r)
406 #define RT_HOST(r) (RT(r)->rt_flags & RTF_HOST)
407
408 unsigned int rt_verbose = 0;
409 #if (DEVELOPMENT || DEBUG)
410 SYSCTL_DECL(_net_route);
411 SYSCTL_UINT(_net_route, OID_AUTO, verbose, CTLFLAG_RW | CTLFLAG_LOCKED,
412 &rt_verbose, 0, "");
413 #endif /* (DEVELOPMENT || DEBUG) */
414
415 static void
416 rtable_init(void **table)
417 {
418 struct domain *dom;
419
420 domain_proto_mtx_lock_assert_held();
421
422 TAILQ_FOREACH(dom, &domains, dom_entry) {
423 if (dom->dom_rtattach != NULL)
424 dom->dom_rtattach(&table[dom->dom_family],
425 dom->dom_rtoffset);
426 }
427 }
428
429 /*
430 * Called by route_dinit().
431 */
432 void
433 route_init(void)
434 {
435 int size;
436
437 #if INET6
438 _CASSERT(offsetof(struct route, ro_rt) ==
439 offsetof(struct route_in6, ro_rt));
440 _CASSERT(offsetof(struct route, ro_lle) ==
441 offsetof(struct route_in6, ro_lle));
442 _CASSERT(offsetof(struct route, ro_srcia) ==
443 offsetof(struct route_in6, ro_srcia));
444 _CASSERT(offsetof(struct route, ro_flags) ==
445 offsetof(struct route_in6, ro_flags));
446 _CASSERT(offsetof(struct route, ro_dst) ==
447 offsetof(struct route_in6, ro_dst));
448 #endif /* INET6 */
449
450 PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug));
451 if (rte_debug != 0)
452 rte_debug |= RTD_DEBUG;
453
454 rnh_lock_grp_attr = lck_grp_attr_alloc_init();
455 rnh_lock_grp = lck_grp_alloc_init("route", rnh_lock_grp_attr);
456 rnh_lock_attr = lck_attr_alloc_init();
457 lck_mtx_init(rnh_lock, rnh_lock_grp, rnh_lock_attr);
458
459 rte_mtx_grp_attr = lck_grp_attr_alloc_init();
460 rte_mtx_grp = lck_grp_alloc_init(RTE_NAME, rte_mtx_grp_attr);
461 rte_mtx_attr = lck_attr_alloc_init();
462
463 lck_mtx_lock(rnh_lock);
464 rn_init(); /* initialize all zeroes, all ones, mask table */
465 lck_mtx_unlock(rnh_lock);
466 rtable_init((void **)rt_tables);
467
468 if (rte_debug & RTD_DEBUG)
469 size = sizeof (struct rtentry_dbg);
470 else
471 size = sizeof (struct rtentry);
472
473 rte_zone = zinit(size, RTE_ZONE_MAX * size, 0, RTE_ZONE_NAME);
474 if (rte_zone == NULL) {
475 panic("%s: failed allocating rte_zone", __func__);
476 /* NOTREACHED */
477 }
478 zone_change(rte_zone, Z_EXPAND, TRUE);
479 zone_change(rte_zone, Z_CALLERACCT, FALSE);
480 zone_change(rte_zone, Z_NOENCRYPT, TRUE);
481
482 TAILQ_INIT(&rttrash_head);
483 }
484
485 /*
486 * Given a route, determine whether or not it is the non-scoped default
487 * route; dst typically comes from rt_key(rt) but may be coming from
488 * a separate place when rt is in the process of being created.
489 */
490 boolean_t
491 rt_primary_default(struct rtentry *rt, struct sockaddr *dst)
492 {
493 return (SA_DEFAULT(dst) && !(rt->rt_flags & RTF_IFSCOPE));
494 }
495
496 /*
497 * Set the ifscope of the primary interface; caller holds rnh_lock.
498 */
499 void
500 set_primary_ifscope(int af, unsigned int ifscope)
501 {
502 if (af == AF_INET)
503 primary_ifscope = ifscope;
504 else
505 primary6_ifscope = ifscope;
506 }
507
508 /*
509 * Return the ifscope of the primary interface; caller holds rnh_lock.
510 */
511 unsigned int
512 get_primary_ifscope(int af)
513 {
514 return (af == AF_INET ? primary_ifscope : primary6_ifscope);
515 }
516
517 /*
518 * Set the scope ID of a given a sockaddr_in.
519 */
520 void
521 sin_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
522 {
523 /* Caller must pass in sockaddr_in */
524 ASSERT_SINIFSCOPE(sa);
525
526 SINIFSCOPE(sa)->sin_scope_id = ifscope;
527 }
528
529 /*
530 * Set the scope ID of given a sockaddr_in6.
531 */
532 static inline void
533 sin6_set_ifscope(struct sockaddr *sa, unsigned int ifscope)
534 {
535 /* Caller must pass in sockaddr_in6 */
536 ASSERT_SIN6IFSCOPE(sa);
537
538 SIN6IFSCOPE(sa)->sin6_scope_id = ifscope;
539 }
540
541 /*
542 * Given a sockaddr_in, return the scope ID to the caller.
543 */
544 unsigned int
545 sin_get_ifscope(struct sockaddr *sa)
546 {
547 /* Caller must pass in sockaddr_in */
548 ASSERT_SINIFSCOPE(sa);
549
550 return (SINIFSCOPE(sa)->sin_scope_id);
551 }
552
553 /*
554 * Given a sockaddr_in6, return the scope ID to the caller.
555 */
556 unsigned int
557 sin6_get_ifscope(struct sockaddr *sa)
558 {
559 /* Caller must pass in sockaddr_in6 */
560 ASSERT_SIN6IFSCOPE(sa);
561
562 return (SIN6IFSCOPE(sa)->sin6_scope_id);
563 }
564
565 static inline void
566 sin6_set_embedded_ifscope(struct sockaddr *sa, unsigned int ifscope)
567 {
568 /* Caller must pass in sockaddr_in6 */
569 ASSERT_SIN6IFSCOPE(sa);
570 VERIFY(IN6_IS_SCOPE_EMBED(&(SIN6(sa)->sin6_addr)));
571
572 SIN6(sa)->sin6_addr.s6_addr16[1] = htons(ifscope);
573 }
574
575 static inline unsigned int
576 sin6_get_embedded_ifscope(struct sockaddr *sa)
577 {
578 /* Caller must pass in sockaddr_in6 */
579 ASSERT_SIN6IFSCOPE(sa);
580
581 return (ntohs(SIN6(sa)->sin6_addr.s6_addr16[1]));
582 }
583
584 /*
585 * Copy a sockaddr_{in,in6} src to a dst storage and set scope ID into dst.
586 *
587 * To clear the scope ID, pass is a NULL pifscope. To set the scope ID, pass
588 * in a non-NULL pifscope with non-zero ifscope. Otherwise if pifscope is
589 * non-NULL and ifscope is IFSCOPE_NONE, the existing scope ID is left intact.
590 * In any case, the effective scope ID value is returned to the caller via
591 * pifscope, if it is non-NULL.
592 */
593 struct sockaddr *
594 sa_copy(struct sockaddr *src, struct sockaddr_storage *dst,
595 unsigned int *pifscope)
596 {
597 int af = src->sa_family;
598 unsigned int ifscope = (pifscope != NULL) ? *pifscope : IFSCOPE_NONE;
599
600 VERIFY(af == AF_INET || af == AF_INET6);
601
602 bzero(dst, sizeof (*dst));
603
604 if (af == AF_INET) {
605 bcopy(src, dst, sizeof (struct sockaddr_in));
606 dst->ss_len = sizeof(struct sockaddr_in);
607 if (pifscope == NULL || ifscope != IFSCOPE_NONE)
608 sin_set_ifscope(SA(dst), ifscope);
609 } else {
610 bcopy(src, dst, sizeof (struct sockaddr_in6));
611 dst->ss_len = sizeof(struct sockaddr_in6);
612 if (pifscope != NULL &&
613 IN6_IS_SCOPE_EMBED(&SIN6(dst)->sin6_addr)) {
614 unsigned int eifscope;
615 /*
616 * If the address contains the embedded scope ID,
617 * use that as the value for sin6_scope_id as long
618 * the caller doesn't insist on clearing it (by
619 * passing NULL) or setting it.
620 */
621 eifscope = sin6_get_embedded_ifscope(SA(dst));
622 if (eifscope != IFSCOPE_NONE && ifscope == IFSCOPE_NONE)
623 ifscope = eifscope;
624 if (ifscope != IFSCOPE_NONE) {
625 /* Set ifscope from pifscope or eifscope */
626 sin6_set_ifscope(SA(dst), ifscope);
627 } else {
628 /* If sin6_scope_id has a value, use that one */
629 ifscope = sin6_get_ifscope(SA(dst));
630 }
631 /*
632 * If sin6_scope_id is set but the address doesn't
633 * contain the equivalent embedded value, set it.
634 */
635 if (ifscope != IFSCOPE_NONE && eifscope != ifscope)
636 sin6_set_embedded_ifscope(SA(dst), ifscope);
637 } else if (pifscope == NULL || ifscope != IFSCOPE_NONE) {
638 sin6_set_ifscope(SA(dst), ifscope);
639 }
640 }
641
642 if (pifscope != NULL) {
643 *pifscope = (af == AF_INET) ? sin_get_ifscope(SA(dst)) :
644 sin6_get_ifscope(SA(dst));
645 }
646
647 return (SA(dst));
648 }
649
650 /*
651 * Copy a mask from src to a dst storage and set scope ID into dst.
652 */
653 static struct sockaddr *
654 ma_copy(int af, struct sockaddr *src, struct sockaddr_storage *dst,
655 unsigned int ifscope)
656 {
657 VERIFY(af == AF_INET || af == AF_INET6);
658
659 bzero(dst, sizeof (*dst));
660 rt_maskedcopy(src, SA(dst), src);
661
662 /*
663 * The length of the mask sockaddr would need to be adjusted
664 * to cover the additional {sin,sin6}_ifscope field; when ifscope
665 * is IFSCOPE_NONE, we'd end up clearing the scope ID field on
666 * the destination mask in addition to extending the length
667 * of the sockaddr, as a side effect. This is okay, as any
668 * trailing zeroes would be skipped by rn_addmask prior to
669 * inserting or looking up the mask in the mask tree.
670 */
671 if (af == AF_INET) {
672 SINIFSCOPE(dst)->sin_scope_id = ifscope;
673 SINIFSCOPE(dst)->sin_len =
674 offsetof(struct sockaddr_inifscope, sin_scope_id) +
675 sizeof (SINIFSCOPE(dst)->sin_scope_id);
676 } else {
677 SIN6IFSCOPE(dst)->sin6_scope_id = ifscope;
678 SIN6IFSCOPE(dst)->sin6_len =
679 offsetof(struct sockaddr_in6, sin6_scope_id) +
680 sizeof (SIN6IFSCOPE(dst)->sin6_scope_id);
681 }
682
683 return (SA(dst));
684 }
685
686 /*
687 * Trim trailing zeroes on a sockaddr and update its length.
688 */
689 static struct sockaddr *
690 sa_trim(struct sockaddr *sa, int skip)
691 {
692 caddr_t cp, base = (caddr_t)sa + skip;
693
694 if (sa->sa_len <= skip)
695 return (sa);
696
697 for (cp = base + (sa->sa_len - skip); cp > base && cp[-1] == 0; )
698 cp--;
699
700 sa->sa_len = (cp - base) + skip;
701 if (sa->sa_len < skip) {
702 /* Must not happen, and if so, panic */
703 panic("%s: broken logic (sa_len %d < skip %d )", __func__,
704 sa->sa_len, skip);
705 /* NOTREACHED */
706 } else if (sa->sa_len == skip) {
707 /* If we end up with all zeroes, then there's no mask */
708 sa->sa_len = 0;
709 }
710
711 return (sa);
712 }
713
714 /*
715 * Called by rtm_msg{1,2} routines to "scrub" socket address structures of
716 * kernel private information, so that clients of the routing socket will
717 * not be confused by the presence of the information, or the side effect of
718 * the increased length due to that. The source sockaddr is not modified;
719 * instead, the scrubbing happens on the destination sockaddr storage that
720 * is passed in by the caller.
721 *
722 * Scrubbing entails:
723 * - removing embedded scope identifiers from network mask and destination
724 * IPv4 and IPv6 socket addresses
725 * - optionally removing global scope interface hardware addresses from
726 * link-layer interface addresses when the MAC framework check fails.
727 */
728 struct sockaddr *
729 rtm_scrub(int type, int idx, struct sockaddr *hint, struct sockaddr *sa,
730 void *buf, uint32_t buflen, kauth_cred_t *credp)
731 {
732 struct sockaddr_storage *ss = (struct sockaddr_storage *)buf;
733 struct sockaddr *ret = sa;
734
735 VERIFY(buf != NULL && buflen >= sizeof (*ss));
736 bzero(buf, buflen);
737
738 switch (idx) {
739 case RTAX_DST:
740 /*
741 * If this is for an AF_INET/AF_INET6 destination address,
742 * call sa_copy() to clear the scope ID field.
743 */
744 if (sa->sa_family == AF_INET &&
745 SINIFSCOPE(sa)->sin_scope_id != IFSCOPE_NONE) {
746 ret = sa_copy(sa, ss, NULL);
747 } else if (sa->sa_family == AF_INET6 &&
748 SIN6IFSCOPE(sa)->sin6_scope_id != IFSCOPE_NONE) {
749 ret = sa_copy(sa, ss, NULL);
750 }
751 break;
752
753 case RTAX_NETMASK: {
754 int skip, af;
755 /*
756 * If this is for a mask, we can't tell whether or not there
757 * is an valid scope ID value, as the span of bytes between
758 * sa_len and the beginning of the mask (offset of sin_addr in
759 * the case of AF_INET, or sin6_addr for AF_INET6) may be
760 * filled with all-ones by rn_addmask(), and hence we cannot
761 * rely on sa_family. Because of this, we use the sa_family
762 * of the hint sockaddr (RTAX_{DST,IFA}) as indicator as to
763 * whether or not the mask is to be treated as one for AF_INET
764 * or AF_INET6. Clearing the scope ID field involves setting
765 * it to IFSCOPE_NONE followed by calling sa_trim() to trim
766 * trailing zeroes from the storage sockaddr, which reverses
767 * what was done earlier by ma_copy() on the source sockaddr.
768 */
769 if (hint == NULL ||
770 ((af = hint->sa_family) != AF_INET && af != AF_INET6))
771 break; /* nothing to do */
772
773 skip = (af == AF_INET) ?
774 offsetof(struct sockaddr_in, sin_addr) :
775 offsetof(struct sockaddr_in6, sin6_addr);
776
777 if (sa->sa_len > skip && sa->sa_len <= sizeof (*ss)) {
778 bcopy(sa, ss, sa->sa_len);
779 /*
780 * Don't use {sin,sin6}_set_ifscope() as sa_family
781 * and sa_len for the netmask might not be set to
782 * the corresponding expected values of the hint.
783 */
784 if (hint->sa_family == AF_INET)
785 SINIFSCOPE(ss)->sin_scope_id = IFSCOPE_NONE;
786 else
787 SIN6IFSCOPE(ss)->sin6_scope_id = IFSCOPE_NONE;
788 ret = sa_trim(SA(ss), skip);
789
790 /*
791 * For AF_INET6 mask, set sa_len appropriately unless
792 * this is requested via systl_dumpentry(), in which
793 * case we return the raw value.
794 */
795 if (hint->sa_family == AF_INET6 &&
796 type != RTM_GET && type != RTM_GET2)
797 SA(ret)->sa_len = sizeof (struct sockaddr_in6);
798 }
799 break;
800 }
801 case RTAX_GATEWAY: {
802 /*
803 * Break if the gateway is not AF_LINK type (indirect routes)
804 *
805 * Else, if is, check if it is resolved. If not yet resolved
806 * simply break else scrub the link layer address.
807 */
808 if ((sa->sa_family != AF_LINK) || (SDL(sa)->sdl_alen == 0))
809 break;
810 /* fallthrough */
811 }
812 case RTAX_IFP: {
813 if (sa->sa_family == AF_LINK && credp) {
814 struct sockaddr_dl *sdl = SDL(buf);
815 const void *bytes;
816 size_t size;
817
818 /* caller should handle worst case: SOCK_MAXADDRLEN */
819 VERIFY(buflen >= sa->sa_len);
820
821 bcopy(sa, sdl, sa->sa_len);
822 bytes = dlil_ifaddr_bytes(sdl, &size, credp);
823 if (bytes != CONST_LLADDR(sdl)) {
824 VERIFY(sdl->sdl_alen == size);
825 bcopy(bytes, LLADDR(sdl), size);
826 }
827 ret = (struct sockaddr *)sdl;
828 }
829 break;
830 }
831 default:
832 break;
833 }
834
835 return (ret);
836 }
837
838 /*
839 * Callback leaf-matching routine for rn_matchaddr_args used
840 * for looking up an exact match for a scoped route entry.
841 */
842 static int
843 rn_match_ifscope(struct radix_node *rn, void *arg)
844 {
845 struct rtentry *rt = (struct rtentry *)rn;
846 struct matchleaf_arg *ma = arg;
847 int af = rt_key(rt)->sa_family;
848
849 if (!(rt->rt_flags & RTF_IFSCOPE) || (af != AF_INET && af != AF_INET6))
850 return (0);
851
852 return (af == AF_INET ?
853 (SINIFSCOPE(rt_key(rt))->sin_scope_id == ma->ifscope) :
854 (SIN6IFSCOPE(rt_key(rt))->sin6_scope_id == ma->ifscope));
855 }
856
857 /*
858 * Atomically increment route generation counter
859 */
860 void
861 routegenid_update(void)
862 {
863 routegenid_inet_update();
864 #if INET6
865 routegenid_inet6_update();
866 #endif /* INET6 */
867 }
868
869 void
870 routegenid_inet_update(void)
871 {
872 atomic_add_32(&route_genid_inet, 1);
873 }
874
875 #if INET6
876 void
877 routegenid_inet6_update(void)
878 {
879 atomic_add_32(&route_genid_inet6, 1);
880 }
881 #endif /* INET6 */
882
883 /*
884 * Packet routing routines.
885 */
886 void
887 rtalloc(struct route *ro)
888 {
889 rtalloc_ign(ro, 0);
890 }
891
892 void
893 rtalloc_scoped(struct route *ro, unsigned int ifscope)
894 {
895 rtalloc_scoped_ign(ro, 0, ifscope);
896 }
897
898 static void
899 rtalloc_ign_common_locked(struct route *ro, uint32_t ignore,
900 unsigned int ifscope)
901 {
902 struct rtentry *rt;
903
904 if ((rt = ro->ro_rt) != NULL) {
905 RT_LOCK_SPIN(rt);
906 if (rt->rt_ifp != NULL && !ROUTE_UNUSABLE(ro)) {
907 RT_UNLOCK(rt);
908 return;
909 }
910 RT_UNLOCK(rt);
911 ROUTE_RELEASE_LOCKED(ro); /* rnh_lock already held */
912 }
913 ro->ro_rt = rtalloc1_common_locked(&ro->ro_dst, 1, ignore, ifscope);
914 if (ro->ro_rt != NULL) {
915 RT_GENID_SYNC(ro->ro_rt);
916 RT_LOCK_ASSERT_NOTHELD(ro->ro_rt);
917 }
918 }
919
920 void
921 rtalloc_ign(struct route *ro, uint32_t ignore)
922 {
923 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
924 lck_mtx_lock(rnh_lock);
925 rtalloc_ign_common_locked(ro, ignore, IFSCOPE_NONE);
926 lck_mtx_unlock(rnh_lock);
927 }
928
929 void
930 rtalloc_scoped_ign(struct route *ro, uint32_t ignore, unsigned int ifscope)
931 {
932 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
933 lck_mtx_lock(rnh_lock);
934 rtalloc_ign_common_locked(ro, ignore, ifscope);
935 lck_mtx_unlock(rnh_lock);
936 }
937
938 static struct rtentry *
939 rtalloc1_locked(struct sockaddr *dst, int report, uint32_t ignflags)
940 {
941 return (rtalloc1_common_locked(dst, report, ignflags, IFSCOPE_NONE));
942 }
943
944 struct rtentry *
945 rtalloc1_scoped_locked(struct sockaddr *dst, int report, uint32_t ignflags,
946 unsigned int ifscope)
947 {
948 return (rtalloc1_common_locked(dst, report, ignflags, ifscope));
949 }
950
951 struct rtentry *
952 rtalloc1_common_locked(struct sockaddr *dst, int report, uint32_t ignflags,
953 unsigned int ifscope)
954 {
955 struct radix_node_head *rnh = rt_tables[dst->sa_family];
956 struct rtentry *rt, *newrt = NULL;
957 struct rt_addrinfo info;
958 uint32_t nflags;
959 int err = 0, msgtype = RTM_MISS;
960
961 if (rnh == NULL)
962 goto unreachable;
963
964 /*
965 * Find the longest prefix or exact (in the scoped case) address match;
966 * callee adds a reference to entry and checks for root node as well
967 */
968 rt = rt_lookup(FALSE, dst, NULL, rnh, ifscope);
969 if (rt == NULL)
970 goto unreachable;
971
972 RT_LOCK_SPIN(rt);
973 newrt = rt;
974 nflags = rt->rt_flags & ~ignflags;
975 RT_UNLOCK(rt);
976 if (report && (nflags & (RTF_CLONING | RTF_PRCLONING))) {
977 /*
978 * We are apparently adding (report = 0 in delete).
979 * If it requires that it be cloned, do so.
980 * (This implies it wasn't a HOST route.)
981 */
982 err = rtrequest_locked(RTM_RESOLVE, dst, NULL, NULL, 0, &newrt);
983 if (err) {
984 /*
985 * If the cloning didn't succeed, maybe what we
986 * have from lookup above will do. Return that;
987 * no need to hold another reference since it's
988 * already done.
989 */
990 newrt = rt;
991 goto miss;
992 }
993
994 /*
995 * We cloned it; drop the original route found during lookup.
996 * The resulted cloned route (newrt) would now have an extra
997 * reference held during rtrequest.
998 */
999 rtfree_locked(rt);
1000
1001 /*
1002 * If the newly created cloned route is a direct host route
1003 * then also check if it is to a router or not.
1004 * If it is, then set the RTF_ROUTER flag on the host route
1005 * for the gateway.
1006 *
1007 * XXX It is possible for the default route to be created post
1008 * cloned route creation of router's IP.
1009 * We can handle that corner case by special handing for RTM_ADD
1010 * of default route.
1011 */
1012 if ((newrt->rt_flags & (RTF_HOST | RTF_LLINFO)) ==
1013 (RTF_HOST | RTF_LLINFO)) {
1014 struct rtentry *defrt = NULL;
1015 struct sockaddr_storage def_key;
1016
1017 bzero(&def_key, sizeof(def_key));
1018 def_key.ss_len = rt_key(newrt)->sa_len;
1019 def_key.ss_family = rt_key(newrt)->sa_family;
1020
1021 defrt = rtalloc1_scoped_locked((struct sockaddr *)&def_key,
1022 0, 0, newrt->rt_ifp->if_index);
1023
1024 if (defrt) {
1025 if (equal(rt_key(newrt), defrt->rt_gateway)) {
1026 newrt->rt_flags |= RTF_ROUTER;
1027 }
1028 rtfree_locked(defrt);
1029 }
1030 }
1031
1032 if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
1033 /*
1034 * If the new route specifies it be
1035 * externally resolved, then go do that.
1036 */
1037 msgtype = RTM_RESOLVE;
1038 goto miss;
1039 }
1040 }
1041 goto done;
1042
1043 unreachable:
1044 /*
1045 * Either we hit the root or couldn't find any match,
1046 * Which basically means "cant get there from here"
1047 */
1048 rtstat.rts_unreach++;
1049
1050 miss:
1051 if (report) {
1052 /*
1053 * If required, report the failure to the supervising
1054 * Authorities.
1055 * For a delete, this is not an error. (report == 0)
1056 */
1057 bzero((caddr_t)&info, sizeof(info));
1058 info.rti_info[RTAX_DST] = dst;
1059 rt_missmsg(msgtype, &info, 0, err);
1060 }
1061 done:
1062 return (newrt);
1063 }
1064
1065 struct rtentry *
1066 rtalloc1(struct sockaddr *dst, int report, uint32_t ignflags)
1067 {
1068 struct rtentry *entry;
1069 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1070 lck_mtx_lock(rnh_lock);
1071 entry = rtalloc1_locked(dst, report, ignflags);
1072 lck_mtx_unlock(rnh_lock);
1073 return (entry);
1074 }
1075
1076 struct rtentry *
1077 rtalloc1_scoped(struct sockaddr *dst, int report, uint32_t ignflags,
1078 unsigned int ifscope)
1079 {
1080 struct rtentry *entry;
1081 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1082 lck_mtx_lock(rnh_lock);
1083 entry = rtalloc1_scoped_locked(dst, report, ignflags, ifscope);
1084 lck_mtx_unlock(rnh_lock);
1085 return (entry);
1086 }
1087
1088 /*
1089 * Remove a reference count from an rtentry.
1090 * If the count gets low enough, take it out of the routing table
1091 */
1092 void
1093 rtfree_locked(struct rtentry *rt)
1094 {
1095 rtfree_common(rt, TRUE);
1096 }
1097
1098 static void
1099 rtfree_common(struct rtentry *rt, boolean_t locked)
1100 {
1101 struct radix_node_head *rnh;
1102
1103 LCK_MTX_ASSERT(rnh_lock, locked ?
1104 LCK_MTX_ASSERT_OWNED : LCK_MTX_ASSERT_NOTOWNED);
1105
1106 /*
1107 * Atomically decrement the reference count and if it reaches 0,
1108 * and there is a close function defined, call the close function.
1109 */
1110 RT_LOCK_SPIN(rt);
1111 if (rtunref(rt) > 0) {
1112 RT_UNLOCK(rt);
1113 return;
1114 }
1115
1116 /*
1117 * To avoid violating lock ordering, we must drop rt_lock before
1118 * trying to acquire the global rnh_lock. If we are called with
1119 * rnh_lock held, then we already have exclusive access; otherwise
1120 * we do the lock dance.
1121 */
1122 if (!locked) {
1123 /*
1124 * Note that we check it again below after grabbing rnh_lock,
1125 * since it is possible that another thread doing a lookup wins
1126 * the race, grabs the rnh_lock first, and bumps up reference
1127 * count in which case the route should be left alone as it is
1128 * still in use. It's also possible that another thread frees
1129 * the route after we drop rt_lock; to prevent the route from
1130 * being freed, we hold an extra reference.
1131 */
1132 RT_ADDREF_LOCKED(rt);
1133 RT_UNLOCK(rt);
1134 lck_mtx_lock(rnh_lock);
1135 RT_LOCK_SPIN(rt);
1136 if (rtunref(rt) > 0) {
1137 /* We've lost the race, so abort */
1138 RT_UNLOCK(rt);
1139 goto done;
1140 }
1141 }
1142
1143 /*
1144 * We may be blocked on other lock(s) as part of freeing
1145 * the entry below, so convert from spin to full mutex.
1146 */
1147 RT_CONVERT_LOCK(rt);
1148
1149 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1150
1151 /* Negative refcnt must never happen */
1152 if (rt->rt_refcnt != 0) {
1153 panic("rt %p invalid refcnt %d", rt, rt->rt_refcnt);
1154 /* NOTREACHED */
1155 }
1156 /* Idle refcnt must have been dropped during rtunref() */
1157 VERIFY(!(rt->rt_flags & RTF_IFREF));
1158
1159 /*
1160 * find the tree for that address family
1161 * Note: in the case of igmp packets, there might not be an rnh
1162 */
1163 rnh = rt_tables[rt_key(rt)->sa_family];
1164
1165 /*
1166 * On last reference give the "close method" a chance to cleanup
1167 * private state. This also permits (for IPv4 and IPv6) a chance
1168 * to decide if the routing table entry should be purged immediately
1169 * or at a later time. When an immediate purge is to happen the
1170 * close routine typically issues RTM_DELETE which clears the RTF_UP
1171 * flag on the entry so that the code below reclaims the storage.
1172 */
1173 if (rnh != NULL && rnh->rnh_close != NULL)
1174 rnh->rnh_close((struct radix_node *)rt, rnh);
1175
1176 /*
1177 * If we are no longer "up" (and ref == 0) then we can free the
1178 * resources associated with the route.
1179 */
1180 if (!(rt->rt_flags & RTF_UP)) {
1181 struct rtentry *rt_parent;
1182 struct ifaddr *rt_ifa;
1183
1184 rt->rt_flags |= RTF_DEAD;
1185 if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) {
1186 panic("rt %p freed while in radix tree\n", rt);
1187 /* NOTREACHED */
1188 }
1189 /*
1190 * the rtentry must have been removed from the routing table
1191 * so it is represented in rttrash; remove that now.
1192 */
1193 (void) OSDecrementAtomic(&rttrash);
1194 if (rte_debug & RTD_DEBUG) {
1195 TAILQ_REMOVE(&rttrash_head, (struct rtentry_dbg *)rt,
1196 rtd_trash_link);
1197 }
1198
1199 /*
1200 * release references on items we hold them on..
1201 * e.g other routes and ifaddrs.
1202 */
1203 if ((rt_parent = rt->rt_parent) != NULL)
1204 rt->rt_parent = NULL;
1205
1206 if ((rt_ifa = rt->rt_ifa) != NULL)
1207 rt->rt_ifa = NULL;
1208
1209 /*
1210 * Now free any attached link-layer info.
1211 */
1212 if (rt->rt_llinfo != NULL) {
1213 if (rt->rt_llinfo_free != NULL)
1214 (*rt->rt_llinfo_free)(rt->rt_llinfo);
1215 else
1216 R_Free(rt->rt_llinfo);
1217 rt->rt_llinfo = NULL;
1218 }
1219
1220 /* Destroy eventhandler lists context */
1221 eventhandler_lists_ctxt_destroy(&rt->rt_evhdlr_ctxt);
1222
1223 /*
1224 * Route is no longer in the tree and refcnt is 0;
1225 * we have exclusive access, so destroy it.
1226 */
1227 RT_UNLOCK(rt);
1228 rte_lock_destroy(rt);
1229
1230 if (rt_parent != NULL)
1231 rtfree_locked(rt_parent);
1232
1233 if (rt_ifa != NULL)
1234 IFA_REMREF(rt_ifa);
1235
1236 /*
1237 * The key is separately alloc'd so free it (see rt_setgate()).
1238 * This also frees the gateway, as they are always malloc'd
1239 * together.
1240 */
1241 R_Free(rt_key(rt));
1242
1243 /*
1244 * Free any statistics that may have been allocated
1245 */
1246 nstat_route_detach(rt);
1247
1248 /*
1249 * and the rtentry itself of course
1250 */
1251 rte_free(rt);
1252 } else {
1253 /*
1254 * The "close method" has been called, but the route is
1255 * still in the radix tree with zero refcnt, i.e. "up"
1256 * and in the cached state.
1257 */
1258 RT_UNLOCK(rt);
1259 }
1260 done:
1261 if (!locked)
1262 lck_mtx_unlock(rnh_lock);
1263 }
1264
1265 void
1266 rtfree(struct rtentry *rt)
1267 {
1268 rtfree_common(rt, FALSE);
1269 }
1270
1271 /*
1272 * Decrements the refcount but does not free the route when
1273 * the refcount reaches zero. Unless you have really good reason,
1274 * use rtfree not rtunref.
1275 */
1276 int
1277 rtunref(struct rtentry *p)
1278 {
1279 RT_LOCK_ASSERT_HELD(p);
1280
1281 if (p->rt_refcnt == 0) {
1282 panic("%s(%p) bad refcnt\n", __func__, p);
1283 /* NOTREACHED */
1284 } else if (--p->rt_refcnt == 0) {
1285 /*
1286 * Release any idle reference count held on the interface;
1287 * if the route is eligible, still UP and the refcnt becomes
1288 * non-zero at some point in future before it is purged from
1289 * the routing table, rt_set_idleref() will undo this.
1290 */
1291 rt_clear_idleref(p);
1292 }
1293
1294 if (rte_debug & RTD_DEBUG)
1295 rtunref_audit((struct rtentry_dbg *)p);
1296
1297 /* Return new value */
1298 return (p->rt_refcnt);
1299 }
1300
1301 static inline void
1302 rtunref_audit(struct rtentry_dbg *rte)
1303 {
1304 uint16_t idx;
1305
1306 if (rte->rtd_inuse != RTD_INUSE) {
1307 panic("rtunref: on freed rte=%p\n", rte);
1308 /* NOTREACHED */
1309 }
1310 idx = atomic_add_16_ov(&rte->rtd_refrele_cnt, 1) % CTRACE_HIST_SIZE;
1311 if (rte_debug & RTD_TRACE)
1312 ctrace_record(&rte->rtd_refrele[idx]);
1313 }
1314
1315 /*
1316 * Add a reference count from an rtentry.
1317 */
1318 void
1319 rtref(struct rtentry *p)
1320 {
1321 RT_LOCK_ASSERT_HELD(p);
1322
1323 VERIFY((p->rt_flags & RTF_DEAD) == 0);
1324 if (++p->rt_refcnt == 0) {
1325 panic("%s(%p) bad refcnt\n", __func__, p);
1326 /* NOTREACHED */
1327 } else if (p->rt_refcnt == 1) {
1328 /*
1329 * Hold an idle reference count on the interface,
1330 * if the route is eligible for it.
1331 */
1332 rt_set_idleref(p);
1333 }
1334
1335 if (rte_debug & RTD_DEBUG)
1336 rtref_audit((struct rtentry_dbg *)p);
1337 }
1338
1339 static inline void
1340 rtref_audit(struct rtentry_dbg *rte)
1341 {
1342 uint16_t idx;
1343
1344 if (rte->rtd_inuse != RTD_INUSE) {
1345 panic("rtref_audit: on freed rte=%p\n", rte);
1346 /* NOTREACHED */
1347 }
1348 idx = atomic_add_16_ov(&rte->rtd_refhold_cnt, 1) % CTRACE_HIST_SIZE;
1349 if (rte_debug & RTD_TRACE)
1350 ctrace_record(&rte->rtd_refhold[idx]);
1351 }
1352
1353 void
1354 rtsetifa(struct rtentry *rt, struct ifaddr *ifa)
1355 {
1356 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1357
1358 RT_LOCK_ASSERT_HELD(rt);
1359
1360 if (rt->rt_ifa == ifa)
1361 return;
1362
1363 /* Become a regular mutex, just in case */
1364 RT_CONVERT_LOCK(rt);
1365
1366 /* Release the old ifa */
1367 if (rt->rt_ifa)
1368 IFA_REMREF(rt->rt_ifa);
1369
1370 /* Set rt_ifa */
1371 rt->rt_ifa = ifa;
1372
1373 /* Take a reference to the ifa */
1374 if (rt->rt_ifa)
1375 IFA_ADDREF(rt->rt_ifa);
1376 }
1377
1378 /*
1379 * Force a routing table entry to the specified
1380 * destination to go through the given gateway.
1381 * Normally called as a result of a routing redirect
1382 * message from the network layer.
1383 */
1384 void
1385 rtredirect(struct ifnet *ifp, struct sockaddr *dst, struct sockaddr *gateway,
1386 struct sockaddr *netmask, int flags, struct sockaddr *src,
1387 struct rtentry **rtp)
1388 {
1389 struct rtentry *rt = NULL;
1390 int error = 0;
1391 short *stat = 0;
1392 struct rt_addrinfo info;
1393 struct ifaddr *ifa = NULL;
1394 unsigned int ifscope = (ifp != NULL) ? ifp->if_index : IFSCOPE_NONE;
1395 struct sockaddr_storage ss;
1396 int af = src->sa_family;
1397
1398 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
1399 lck_mtx_lock(rnh_lock);
1400
1401 /*
1402 * Transform src into the internal routing table form for
1403 * comparison against rt_gateway below.
1404 */
1405 #if INET6
1406 if ((af == AF_INET) || (af == AF_INET6)) {
1407 #else
1408 if (af == AF_INET) {
1409 #endif /* !INET6 */
1410 src = sa_copy(src, &ss, &ifscope);
1411 }
1412
1413 /*
1414 * Verify the gateway is directly reachable; if scoped routing
1415 * is enabled, verify that it is reachable from the interface
1416 * where the ICMP redirect arrived on.
1417 */
1418 if ((ifa = ifa_ifwithnet_scoped(gateway, ifscope)) == NULL) {
1419 error = ENETUNREACH;
1420 goto out;
1421 }
1422
1423 /* Lookup route to the destination (from the original IP header) */
1424 rt = rtalloc1_scoped_locked(dst, 0, RTF_CLONING|RTF_PRCLONING, ifscope);
1425 if (rt != NULL)
1426 RT_LOCK(rt);
1427
1428 /*
1429 * If the redirect isn't from our current router for this dst,
1430 * it's either old or wrong. If it redirects us to ourselves,
1431 * we have a routing loop, perhaps as a result of an interface
1432 * going down recently. Holding rnh_lock here prevents the
1433 * possibility of rt_ifa/ifa's ifa_addr from changing (e.g.
1434 * in_ifinit), so okay to access ifa_addr without locking.
1435 */
1436 if (!(flags & RTF_DONE) && rt != NULL &&
1437 (!equal(src, rt->rt_gateway) || !equal(rt->rt_ifa->ifa_addr,
1438 ifa->ifa_addr))) {
1439 error = EINVAL;
1440 } else {
1441 IFA_REMREF(ifa);
1442 if ((ifa = ifa_ifwithaddr(gateway))) {
1443 IFA_REMREF(ifa);
1444 ifa = NULL;
1445 error = EHOSTUNREACH;
1446 }
1447 }
1448
1449 if (ifa) {
1450 IFA_REMREF(ifa);
1451 ifa = NULL;
1452 }
1453
1454 if (error) {
1455 if (rt != NULL)
1456 RT_UNLOCK(rt);
1457 goto done;
1458 }
1459
1460 /*
1461 * Create a new entry if we just got back a wildcard entry
1462 * or the the lookup failed. This is necessary for hosts
1463 * which use routing redirects generated by smart gateways
1464 * to dynamically build the routing tables.
1465 */
1466 if ((rt == NULL) || (rt_mask(rt) != NULL && rt_mask(rt)->sa_len < 2))
1467 goto create;
1468 /*
1469 * Don't listen to the redirect if it's
1470 * for a route to an interface.
1471 */
1472 RT_LOCK_ASSERT_HELD(rt);
1473 if (rt->rt_flags & RTF_GATEWAY) {
1474 if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
1475 /*
1476 * Changing from route to net => route to host.
1477 * Create new route, rather than smashing route
1478 * to net; similar to cloned routes, the newly
1479 * created host route is scoped as well.
1480 */
1481 create:
1482 if (rt != NULL)
1483 RT_UNLOCK(rt);
1484 flags |= RTF_GATEWAY | RTF_DYNAMIC;
1485 error = rtrequest_scoped_locked(RTM_ADD, dst,
1486 gateway, netmask, flags, NULL, ifscope);
1487 stat = &rtstat.rts_dynamic;
1488 } else {
1489 /*
1490 * Smash the current notion of the gateway to
1491 * this destination. Should check about netmask!!!
1492 */
1493 rt->rt_flags |= RTF_MODIFIED;
1494 flags |= RTF_MODIFIED;
1495 stat = &rtstat.rts_newgateway;
1496 /*
1497 * add the key and gateway (in one malloc'd chunk).
1498 */
1499 error = rt_setgate(rt, rt_key(rt), gateway);
1500 RT_UNLOCK(rt);
1501 }
1502 } else {
1503 RT_UNLOCK(rt);
1504 error = EHOSTUNREACH;
1505 }
1506 done:
1507 if (rt != NULL) {
1508 RT_LOCK_ASSERT_NOTHELD(rt);
1509 if (!error) {
1510 /* Enqueue event to refresh flow route entries */
1511 route_event_enqueue_nwk_wq_entry(rt, NULL, ROUTE_ENTRY_REFRESH, NULL, FALSE);
1512 if (rtp)
1513 *rtp = rt;
1514 else
1515 rtfree_locked(rt);
1516 }
1517 else
1518 rtfree_locked(rt);
1519 }
1520 out:
1521 if (error) {
1522 rtstat.rts_badredirect++;
1523 } else {
1524 if (stat != NULL)
1525 (*stat)++;
1526
1527 if (af == AF_INET)
1528 routegenid_inet_update();
1529 #if INET6
1530 else if (af == AF_INET6)
1531 routegenid_inet6_update();
1532 #endif /* INET6 */
1533 }
1534 lck_mtx_unlock(rnh_lock);
1535 bzero((caddr_t)&info, sizeof(info));
1536 info.rti_info[RTAX_DST] = dst;
1537 info.rti_info[RTAX_GATEWAY] = gateway;
1538 info.rti_info[RTAX_NETMASK] = netmask;
1539 info.rti_info[RTAX_AUTHOR] = src;
1540 rt_missmsg(RTM_REDIRECT, &info, flags, error);
1541 }
1542
1543 /*
1544 * Routing table ioctl interface.
1545 */
1546 int
1547 rtioctl(unsigned long req, caddr_t data, struct proc *p)
1548 {
1549 #pragma unused(p, req, data)
1550 return (ENXIO);
1551 }
1552
1553 struct ifaddr *
1554 ifa_ifwithroute(
1555 int flags,
1556 const struct sockaddr *dst,
1557 const struct sockaddr *gateway)
1558 {
1559 struct ifaddr *ifa;
1560
1561 lck_mtx_lock(rnh_lock);
1562 ifa = ifa_ifwithroute_locked(flags, dst, gateway);
1563 lck_mtx_unlock(rnh_lock);
1564
1565 return (ifa);
1566 }
1567
1568 struct ifaddr *
1569 ifa_ifwithroute_locked(int flags, const struct sockaddr *dst,
1570 const struct sockaddr *gateway)
1571 {
1572 return (ifa_ifwithroute_common_locked((flags & ~RTF_IFSCOPE), dst,
1573 gateway, IFSCOPE_NONE));
1574 }
1575
1576 struct ifaddr *
1577 ifa_ifwithroute_scoped_locked(int flags, const struct sockaddr *dst,
1578 const struct sockaddr *gateway, unsigned int ifscope)
1579 {
1580 if (ifscope != IFSCOPE_NONE)
1581 flags |= RTF_IFSCOPE;
1582 else
1583 flags &= ~RTF_IFSCOPE;
1584
1585 return (ifa_ifwithroute_common_locked(flags, dst, gateway, ifscope));
1586 }
1587
1588 static struct ifaddr *
1589 ifa_ifwithroute_common_locked(int flags, const struct sockaddr *dst,
1590 const struct sockaddr *gw, unsigned int ifscope)
1591 {
1592 struct ifaddr *ifa = NULL;
1593 struct rtentry *rt = NULL;
1594 struct sockaddr_storage dst_ss, gw_ss;
1595
1596 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1597
1598 /*
1599 * Just in case the sockaddr passed in by the caller
1600 * contains a scope ID, make sure to clear it since
1601 * interface addresses aren't scoped.
1602 */
1603 #if INET6
1604 if (dst != NULL &&
1605 ((dst->sa_family == AF_INET) ||
1606 (dst->sa_family == AF_INET6)))
1607 #else
1608 if (dst != NULL && dst->sa_family == AF_INET)
1609 #endif /* !INET6 */
1610 dst = sa_copy(SA((uintptr_t)dst), &dst_ss, NULL);
1611
1612 #if INET6
1613 if (gw != NULL &&
1614 ((gw->sa_family == AF_INET) ||
1615 (gw->sa_family == AF_INET6)))
1616 #else
1617 if (gw != NULL && gw->sa_family == AF_INET)
1618 #endif /* !INET6 */
1619 gw = sa_copy(SA((uintptr_t)gw), &gw_ss, NULL);
1620
1621 if (!(flags & RTF_GATEWAY)) {
1622 /*
1623 * If we are adding a route to an interface,
1624 * and the interface is a pt to pt link
1625 * we should search for the destination
1626 * as our clue to the interface. Otherwise
1627 * we can use the local address.
1628 */
1629 if (flags & RTF_HOST) {
1630 ifa = ifa_ifwithdstaddr(dst);
1631 }
1632 if (ifa == NULL)
1633 ifa = ifa_ifwithaddr_scoped(gw, ifscope);
1634 } else {
1635 /*
1636 * If we are adding a route to a remote net
1637 * or host, the gateway may still be on the
1638 * other end of a pt to pt link.
1639 */
1640 ifa = ifa_ifwithdstaddr(gw);
1641 }
1642 if (ifa == NULL)
1643 ifa = ifa_ifwithnet_scoped(gw, ifscope);
1644 if (ifa == NULL) {
1645 /* Workaround to avoid gcc warning regarding const variable */
1646 rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)dst,
1647 0, 0, ifscope);
1648 if (rt != NULL) {
1649 RT_LOCK_SPIN(rt);
1650 ifa = rt->rt_ifa;
1651 if (ifa != NULL) {
1652 /* Become a regular mutex */
1653 RT_CONVERT_LOCK(rt);
1654 IFA_ADDREF(ifa);
1655 }
1656 RT_REMREF_LOCKED(rt);
1657 RT_UNLOCK(rt);
1658 rt = NULL;
1659 }
1660 }
1661 /*
1662 * Holding rnh_lock here prevents the possibility of ifa from
1663 * changing (e.g. in_ifinit), so it is safe to access its
1664 * ifa_addr (here and down below) without locking.
1665 */
1666 if (ifa != NULL && ifa->ifa_addr->sa_family != dst->sa_family) {
1667 struct ifaddr *newifa;
1668 /* Callee adds reference to newifa upon success */
1669 newifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
1670 if (newifa != NULL) {
1671 IFA_REMREF(ifa);
1672 ifa = newifa;
1673 }
1674 }
1675 /*
1676 * If we are adding a gateway, it is quite possible that the
1677 * routing table has a static entry in place for the gateway,
1678 * that may not agree with info garnered from the interfaces.
1679 * The routing table should carry more precedence than the
1680 * interfaces in this matter. Must be careful not to stomp
1681 * on new entries from rtinit, hence (ifa->ifa_addr != gw).
1682 */
1683 if ((ifa == NULL ||
1684 !equal(ifa->ifa_addr, (struct sockaddr *)(size_t)gw)) &&
1685 (rt = rtalloc1_scoped_locked((struct sockaddr *)(size_t)gw,
1686 0, 0, ifscope)) != NULL) {
1687 if (ifa != NULL)
1688 IFA_REMREF(ifa);
1689 RT_LOCK_SPIN(rt);
1690 ifa = rt->rt_ifa;
1691 if (ifa != NULL) {
1692 /* Become a regular mutex */
1693 RT_CONVERT_LOCK(rt);
1694 IFA_ADDREF(ifa);
1695 }
1696 RT_REMREF_LOCKED(rt);
1697 RT_UNLOCK(rt);
1698 }
1699 /*
1700 * If an interface scope was specified, the interface index of
1701 * the found ifaddr must be equivalent to that of the scope;
1702 * otherwise there is no match.
1703 */
1704 if ((flags & RTF_IFSCOPE) &&
1705 ifa != NULL && ifa->ifa_ifp->if_index != ifscope) {
1706 IFA_REMREF(ifa);
1707 ifa = NULL;
1708 }
1709
1710 /*
1711 * ifa's address family must match destination's address family
1712 * after all is said and done.
1713 */
1714 if (ifa != NULL &&
1715 ifa->ifa_addr->sa_family != dst->sa_family) {
1716 IFA_REMREF(ifa);
1717 ifa = NULL;
1718 }
1719
1720 return (ifa);
1721 }
1722
1723 static int rt_fixdelete(struct radix_node *, void *);
1724 static int rt_fixchange(struct radix_node *, void *);
1725
1726 struct rtfc_arg {
1727 struct rtentry *rt0;
1728 struct radix_node_head *rnh;
1729 };
1730
1731 int
1732 rtrequest_locked(int req, struct sockaddr *dst, struct sockaddr *gateway,
1733 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
1734 {
1735 return (rtrequest_common_locked(req, dst, gateway, netmask,
1736 (flags & ~RTF_IFSCOPE), ret_nrt, IFSCOPE_NONE));
1737 }
1738
1739 int
1740 rtrequest_scoped_locked(int req, struct sockaddr *dst,
1741 struct sockaddr *gateway, struct sockaddr *netmask, int flags,
1742 struct rtentry **ret_nrt, unsigned int ifscope)
1743 {
1744 if (ifscope != IFSCOPE_NONE)
1745 flags |= RTF_IFSCOPE;
1746 else
1747 flags &= ~RTF_IFSCOPE;
1748
1749 return (rtrequest_common_locked(req, dst, gateway, netmask,
1750 flags, ret_nrt, ifscope));
1751 }
1752
1753 /*
1754 * Do appropriate manipulations of a routing tree given all the bits of
1755 * info needed.
1756 *
1757 * Storing the scope ID in the radix key is an internal job that should be
1758 * left to routines in this module. Callers should specify the scope value
1759 * to the "scoped" variants of route routines instead of manipulating the
1760 * key itself. This is typically done when creating a scoped route, e.g.
1761 * rtrequest(RTM_ADD). Once such a route is created and marked with the
1762 * RTF_IFSCOPE flag, callers can simply use its rt_key(rt) to clone it
1763 * (RTM_RESOLVE) or to remove it (RTM_DELETE). An exception to this is
1764 * during certain routing socket operations where the search key might be
1765 * derived from the routing message itself, in which case the caller must
1766 * specify the destination address and scope value for RTM_ADD/RTM_DELETE.
1767 */
1768 static int
1769 rtrequest_common_locked(int req, struct sockaddr *dst0,
1770 struct sockaddr *gateway, struct sockaddr *netmask, int flags,
1771 struct rtentry **ret_nrt, unsigned int ifscope)
1772 {
1773 int error = 0;
1774 struct rtentry *rt;
1775 struct radix_node *rn;
1776 struct radix_node_head *rnh;
1777 struct ifaddr *ifa = NULL;
1778 struct sockaddr *ndst, *dst = dst0;
1779 struct sockaddr_storage ss, mask;
1780 struct timeval caltime;
1781 int af = dst->sa_family;
1782 void (*ifa_rtrequest)(int, struct rtentry *, struct sockaddr *);
1783
1784 #define senderr(x) { error = x; goto bad; }
1785
1786 DTRACE_ROUTE6(rtrequest, int, req, struct sockaddr *, dst0,
1787 struct sockaddr *, gateway, struct sockaddr *, netmask,
1788 int, flags, unsigned int, ifscope);
1789
1790 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
1791 /*
1792 * Find the correct routing tree to use for this Address Family
1793 */
1794 if ((rnh = rt_tables[af]) == NULL)
1795 senderr(ESRCH);
1796 /*
1797 * If we are adding a host route then we don't want to put
1798 * a netmask in the tree
1799 */
1800 if (flags & RTF_HOST)
1801 netmask = NULL;
1802
1803 /*
1804 * If Scoped Routing is enabled, use a local copy of the destination
1805 * address to store the scope ID into. This logic is repeated below
1806 * in the RTM_RESOLVE handler since the caller does not normally
1807 * specify such a flag during a resolve, as well as for the handling
1808 * of IPv4 link-local address; instead, it passes in the route used for
1809 * cloning for which the scope info is derived from. Note also that
1810 * in the case of RTM_DELETE, the address passed in by the caller
1811 * might already contain the scope ID info when it is the key itself,
1812 * thus making RTF_IFSCOPE unnecessary; one instance where it is
1813 * explicitly set is inside route_output() as part of handling a
1814 * routing socket request.
1815 */
1816 #if INET6
1817 if (req != RTM_RESOLVE && ((af == AF_INET) || (af == AF_INET6))) {
1818 #else
1819 if (req != RTM_RESOLVE && af == AF_INET) {
1820 #endif /* !INET6 */
1821 /* Transform dst into the internal routing table form */
1822 dst = sa_copy(dst, &ss, &ifscope);
1823
1824 /* Transform netmask into the internal routing table form */
1825 if (netmask != NULL)
1826 netmask = ma_copy(af, netmask, &mask, ifscope);
1827
1828 if (ifscope != IFSCOPE_NONE)
1829 flags |= RTF_IFSCOPE;
1830 } else if ((flags & RTF_IFSCOPE) &&
1831 (af != AF_INET && af != AF_INET6)) {
1832 senderr(EINVAL);
1833 }
1834
1835 if (ifscope == IFSCOPE_NONE)
1836 flags &= ~RTF_IFSCOPE;
1837
1838 switch (req) {
1839 case RTM_DELETE: {
1840 struct rtentry *gwrt = NULL;
1841 boolean_t was_router = FALSE;
1842 uint32_t old_rt_refcnt = 0;
1843 /*
1844 * Remove the item from the tree and return it.
1845 * Complain if it is not there and do no more processing.
1846 */
1847 if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == NULL)
1848 senderr(ESRCH);
1849 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) {
1850 panic("rtrequest delete");
1851 /* NOTREACHED */
1852 }
1853 rt = (struct rtentry *)rn;
1854
1855 RT_LOCK(rt);
1856 old_rt_refcnt = rt->rt_refcnt;
1857 rt->rt_flags &= ~RTF_UP;
1858 /*
1859 * Release any idle reference count held on the interface
1860 * as this route is no longer externally visible.
1861 */
1862 rt_clear_idleref(rt);
1863 /*
1864 * Take an extra reference to handle the deletion of a route
1865 * entry whose reference count is already 0; e.g. an expiring
1866 * cloned route entry or an entry that was added to the table
1867 * with 0 reference. If the caller is interested in this route,
1868 * we will return it with the reference intact. Otherwise we
1869 * will decrement the reference via rtfree_locked() and then
1870 * possibly deallocate it.
1871 */
1872 RT_ADDREF_LOCKED(rt);
1873
1874 /*
1875 * For consistency, in case the caller didn't set the flag.
1876 */
1877 rt->rt_flags |= RTF_CONDEMNED;
1878
1879 /*
1880 * Clear RTF_ROUTER if it's set.
1881 */
1882 if (rt->rt_flags & RTF_ROUTER) {
1883 was_router = TRUE;
1884 VERIFY(rt->rt_flags & RTF_HOST);
1885 rt->rt_flags &= ~RTF_ROUTER;
1886 }
1887
1888 /*
1889 * Enqueue work item to invoke callback for this route entry
1890 *
1891 * If the old count is 0, it implies that last reference is being
1892 * removed and there's no one listening for this route event.
1893 */
1894 if (old_rt_refcnt != 0)
1895 route_event_enqueue_nwk_wq_entry(rt, NULL,
1896 ROUTE_ENTRY_DELETED, NULL, TRUE);
1897
1898 /*
1899 * Now search what's left of the subtree for any cloned
1900 * routes which might have been formed from this node.
1901 */
1902 if ((rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) &&
1903 rt_mask(rt)) {
1904 RT_UNLOCK(rt);
1905 rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
1906 rt_fixdelete, rt);
1907 RT_LOCK(rt);
1908 }
1909
1910 if (was_router) {
1911 struct route_event rt_ev;
1912 route_event_init(&rt_ev, rt, NULL, ROUTE_LLENTRY_DELETED);
1913 RT_UNLOCK(rt);
1914 (void) rnh->rnh_walktree(rnh,
1915 route_event_walktree, (void *)&rt_ev);
1916 RT_LOCK(rt);
1917 }
1918
1919 /*
1920 * Remove any external references we may have.
1921 */
1922 if ((gwrt = rt->rt_gwroute) != NULL)
1923 rt->rt_gwroute = NULL;
1924
1925 /*
1926 * give the protocol a chance to keep things in sync.
1927 */
1928 if ((ifa = rt->rt_ifa) != NULL) {
1929 IFA_LOCK_SPIN(ifa);
1930 ifa_rtrequest = ifa->ifa_rtrequest;
1931 IFA_UNLOCK(ifa);
1932 if (ifa_rtrequest != NULL)
1933 ifa_rtrequest(RTM_DELETE, rt, NULL);
1934 /* keep reference on rt_ifa */
1935 ifa = NULL;
1936 }
1937
1938 /*
1939 * one more rtentry floating around that is not
1940 * linked to the routing table.
1941 */
1942 (void) OSIncrementAtomic(&rttrash);
1943 if (rte_debug & RTD_DEBUG) {
1944 TAILQ_INSERT_TAIL(&rttrash_head,
1945 (struct rtentry_dbg *)rt, rtd_trash_link);
1946 }
1947
1948 /*
1949 * If this is the (non-scoped) default route, clear
1950 * the interface index used for the primary ifscope.
1951 */
1952 if (rt_primary_default(rt, rt_key(rt))) {
1953 set_primary_ifscope(rt_key(rt)->sa_family,
1954 IFSCOPE_NONE);
1955 if ((rt->rt_flags & RTF_STATIC) &&
1956 rt_key(rt)->sa_family == PF_INET6) {
1957 trigger_v6_defrtr_select = TRUE;
1958 }
1959 }
1960
1961 #if NECP
1962 /*
1963 * If this is a change in a default route, update
1964 * necp client watchers to re-evaluate
1965 */
1966 if (SA_DEFAULT(rt_key(rt))) {
1967 if (rt->rt_ifp != NULL) {
1968 ifnet_touch_lastupdown(rt->rt_ifp);
1969 }
1970 necp_update_all_clients();
1971 }
1972 #endif /* NECP */
1973
1974 RT_UNLOCK(rt);
1975
1976 /*
1977 * This might result in another rtentry being freed if
1978 * we held its last reference. Do this after the rtentry
1979 * lock is dropped above, as it could lead to the same
1980 * lock being acquired if gwrt is a clone of rt.
1981 */
1982 if (gwrt != NULL)
1983 rtfree_locked(gwrt);
1984
1985 /*
1986 * If the caller wants it, then it can have it,
1987 * but it's up to it to free the rtentry as we won't be
1988 * doing it.
1989 */
1990 if (ret_nrt != NULL) {
1991 /* Return the route to caller with reference intact */
1992 *ret_nrt = rt;
1993 } else {
1994 /* Dereference or deallocate the route */
1995 rtfree_locked(rt);
1996 }
1997 if (af == AF_INET)
1998 routegenid_inet_update();
1999 #if INET6
2000 else if (af == AF_INET6)
2001 routegenid_inet6_update();
2002 #endif /* INET6 */
2003 break;
2004 }
2005 case RTM_RESOLVE:
2006 if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
2007 senderr(EINVAL);
2008 /*
2009 * According to the UNIX conformance tests, we need to return
2010 * ENETUNREACH when the parent route is RTF_REJECT.
2011 * However, there isn't any point in cloning RTF_REJECT
2012 * routes, so we immediately return an error.
2013 */
2014 if (rt->rt_flags & RTF_REJECT) {
2015 if (rt->rt_flags & RTF_HOST) {
2016 senderr(EHOSTUNREACH);
2017 } else {
2018 senderr(ENETUNREACH);
2019 }
2020 }
2021 /*
2022 * If cloning, we have the parent route given by the caller
2023 * and will use its rt_gateway, rt_rmx as part of the cloning
2024 * process below. Since rnh_lock is held at this point, the
2025 * parent's rt_ifa and rt_gateway will not change, and its
2026 * relevant rt_flags will not change as well. The only thing
2027 * that could change are the metrics, and thus we hold the
2028 * parent route's rt_lock later on during the actual copying
2029 * of rt_rmx.
2030 */
2031 ifa = rt->rt_ifa;
2032 IFA_ADDREF(ifa);
2033 flags = rt->rt_flags &
2034 ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
2035 flags |= RTF_WASCLONED;
2036 gateway = rt->rt_gateway;
2037 if ((netmask = rt->rt_genmask) == NULL)
2038 flags |= RTF_HOST;
2039
2040 #if INET6
2041 if (af != AF_INET && af != AF_INET6)
2042 #else
2043 if (af != AF_INET)
2044 #endif /* !INET6 */
2045 goto makeroute;
2046
2047 /*
2048 * When scoped routing is enabled, cloned entries are
2049 * always scoped according to the interface portion of
2050 * the parent route. The exception to this are IPv4
2051 * link local addresses, or those routes that are cloned
2052 * from a RTF_PROXY route. For the latter, the clone
2053 * gets to keep the RTF_PROXY flag.
2054 */
2055 if ((af == AF_INET &&
2056 IN_LINKLOCAL(ntohl(SIN(dst)->sin_addr.s_addr))) ||
2057 (rt->rt_flags & RTF_PROXY)) {
2058 ifscope = IFSCOPE_NONE;
2059 flags &= ~RTF_IFSCOPE;
2060 /*
2061 * These types of cloned routes aren't currently
2062 * eligible for idle interface reference counting.
2063 */
2064 flags |= RTF_NOIFREF;
2065 } else {
2066 if (flags & RTF_IFSCOPE) {
2067 ifscope = (af == AF_INET) ?
2068 sin_get_ifscope(rt_key(rt)) :
2069 sin6_get_ifscope(rt_key(rt));
2070 } else {
2071 ifscope = rt->rt_ifp->if_index;
2072 flags |= RTF_IFSCOPE;
2073 }
2074 VERIFY(ifscope != IFSCOPE_NONE);
2075 }
2076
2077 /*
2078 * Transform dst into the internal routing table form,
2079 * clearing out the scope ID field if ifscope isn't set.
2080 */
2081 dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ?
2082 NULL : &ifscope);
2083
2084 /* Transform netmask into the internal routing table form */
2085 if (netmask != NULL)
2086 netmask = ma_copy(af, netmask, &mask, ifscope);
2087
2088 goto makeroute;
2089
2090 case RTM_ADD:
2091 if ((flags & RTF_GATEWAY) && !gateway) {
2092 panic("rtrequest: RTF_GATEWAY but no gateway");
2093 /* NOTREACHED */
2094 }
2095 if (flags & RTF_IFSCOPE) {
2096 ifa = ifa_ifwithroute_scoped_locked(flags, dst0,
2097 gateway, ifscope);
2098 } else {
2099 ifa = ifa_ifwithroute_locked(flags, dst0, gateway);
2100 }
2101 if (ifa == NULL)
2102 senderr(ENETUNREACH);
2103 makeroute:
2104 /*
2105 * We land up here for both RTM_RESOLVE and RTM_ADD
2106 * when we decide to create a route.
2107 */
2108 if ((rt = rte_alloc()) == NULL)
2109 senderr(ENOBUFS);
2110 Bzero(rt, sizeof(*rt));
2111 rte_lock_init(rt);
2112 eventhandler_lists_ctxt_init(&rt->rt_evhdlr_ctxt);
2113 getmicrotime(&caltime);
2114 rt->base_calendartime = caltime.tv_sec;
2115 rt->base_uptime = net_uptime();
2116 RT_LOCK(rt);
2117 rt->rt_flags = RTF_UP | flags;
2118
2119 /*
2120 * Point the generation ID to the tree's.
2121 */
2122 switch (af) {
2123 case AF_INET:
2124 rt->rt_tree_genid = &route_genid_inet;
2125 break;
2126 #if INET6
2127 case AF_INET6:
2128 rt->rt_tree_genid = &route_genid_inet6;
2129 break;
2130 #endif /* INET6 */
2131 default:
2132 break;
2133 }
2134
2135 /*
2136 * Add the gateway. Possibly re-malloc-ing the storage for it
2137 * also add the rt_gwroute if possible.
2138 */
2139 if ((error = rt_setgate(rt, dst, gateway)) != 0) {
2140 int tmp = error;
2141 RT_UNLOCK(rt);
2142 nstat_route_detach(rt);
2143 rte_lock_destroy(rt);
2144 rte_free(rt);
2145 senderr(tmp);
2146 }
2147
2148 /*
2149 * point to the (possibly newly malloc'd) dest address.
2150 */
2151 ndst = rt_key(rt);
2152
2153 /*
2154 * make sure it contains the value we want (masked if needed).
2155 */
2156 if (netmask)
2157 rt_maskedcopy(dst, ndst, netmask);
2158 else
2159 Bcopy(dst, ndst, dst->sa_len);
2160
2161 /*
2162 * Note that we now have a reference to the ifa.
2163 * This moved from below so that rnh->rnh_addaddr() can
2164 * examine the ifa and ifa->ifa_ifp if it so desires.
2165 */
2166 rtsetifa(rt, ifa);
2167 rt->rt_ifp = rt->rt_ifa->ifa_ifp;
2168
2169 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
2170
2171 rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
2172 rnh, rt->rt_nodes);
2173 if (rn == 0) {
2174 struct rtentry *rt2;
2175 /*
2176 * Uh-oh, we already have one of these in the tree.
2177 * We do a special hack: if the route that's already
2178 * there was generated by the protocol-cloning
2179 * mechanism, then we just blow it away and retry
2180 * the insertion of the new one.
2181 */
2182 if (flags & RTF_IFSCOPE) {
2183 rt2 = rtalloc1_scoped_locked(dst0, 0,
2184 RTF_CLONING | RTF_PRCLONING, ifscope);
2185 } else {
2186 rt2 = rtalloc1_locked(dst, 0,
2187 RTF_CLONING | RTF_PRCLONING);
2188 }
2189 if (rt2 && rt2->rt_parent) {
2190 /*
2191 * rnh_lock is held here, so rt_key and
2192 * rt_gateway of rt2 will not change.
2193 */
2194 (void) rtrequest_locked(RTM_DELETE, rt_key(rt2),
2195 rt2->rt_gateway, rt_mask(rt2),
2196 rt2->rt_flags, 0);
2197 rtfree_locked(rt2);
2198 rn = rnh->rnh_addaddr((caddr_t)ndst,
2199 (caddr_t)netmask, rnh, rt->rt_nodes);
2200 } else if (rt2) {
2201 /* undo the extra ref we got */
2202 rtfree_locked(rt2);
2203 }
2204 }
2205
2206 /*
2207 * If it still failed to go into the tree,
2208 * then un-make it (this should be a function)
2209 */
2210 if (rn == NULL) {
2211 /* Clear gateway route */
2212 rt_set_gwroute(rt, rt_key(rt), NULL);
2213 if (rt->rt_ifa) {
2214 IFA_REMREF(rt->rt_ifa);
2215 rt->rt_ifa = NULL;
2216 }
2217 R_Free(rt_key(rt));
2218 RT_UNLOCK(rt);
2219 nstat_route_detach(rt);
2220 rte_lock_destroy(rt);
2221 rte_free(rt);
2222 senderr(EEXIST);
2223 }
2224
2225 rt->rt_parent = NULL;
2226
2227 /*
2228 * If we got here from RESOLVE, then we are cloning so clone
2229 * the rest, and note that we are a clone (and increment the
2230 * parent's references). rnh_lock is still held, which prevents
2231 * a lookup from returning the newly-created route. Hence
2232 * holding and releasing the parent's rt_lock while still
2233 * holding the route's rt_lock is safe since the new route
2234 * is not yet externally visible.
2235 */
2236 if (req == RTM_RESOLVE) {
2237 RT_LOCK_SPIN(*ret_nrt);
2238 VERIFY((*ret_nrt)->rt_expire == 0 ||
2239 (*ret_nrt)->rt_rmx.rmx_expire != 0);
2240 VERIFY((*ret_nrt)->rt_expire != 0 ||
2241 (*ret_nrt)->rt_rmx.rmx_expire == 0);
2242 rt->rt_rmx = (*ret_nrt)->rt_rmx;
2243 rt_setexpire(rt, (*ret_nrt)->rt_expire);
2244 if ((*ret_nrt)->rt_flags &
2245 (RTF_CLONING | RTF_PRCLONING)) {
2246 rt->rt_parent = (*ret_nrt);
2247 RT_ADDREF_LOCKED(*ret_nrt);
2248 }
2249 RT_UNLOCK(*ret_nrt);
2250 }
2251
2252 /*
2253 * if this protocol has something to add to this then
2254 * allow it to do that as well.
2255 */
2256 IFA_LOCK_SPIN(ifa);
2257 ifa_rtrequest = ifa->ifa_rtrequest;
2258 IFA_UNLOCK(ifa);
2259 if (ifa_rtrequest != NULL)
2260 ifa_rtrequest(req, rt, SA(ret_nrt ? *ret_nrt : NULL));
2261 IFA_REMREF(ifa);
2262 ifa = NULL;
2263
2264 /*
2265 * If this is the (non-scoped) default route, record
2266 * the interface index used for the primary ifscope.
2267 */
2268 if (rt_primary_default(rt, rt_key(rt))) {
2269 set_primary_ifscope(rt_key(rt)->sa_family,
2270 rt->rt_ifp->if_index);
2271 }
2272
2273 #if NECP
2274 /*
2275 * If this is a change in a default route, update
2276 * necp client watchers to re-evaluate
2277 */
2278 if (SA_DEFAULT(rt_key(rt))) {
2279 if (rt->rt_ifp != NULL) {
2280 ifnet_touch_lastupdown(rt->rt_ifp);
2281 }
2282 necp_update_all_clients();
2283 }
2284 #endif /* NECP */
2285
2286 /*
2287 * actually return a resultant rtentry and
2288 * give the caller a single reference.
2289 */
2290 if (ret_nrt) {
2291 *ret_nrt = rt;
2292 RT_ADDREF_LOCKED(rt);
2293 }
2294
2295 if (af == AF_INET)
2296 routegenid_inet_update();
2297 #if INET6
2298 else if (af == AF_INET6)
2299 routegenid_inet6_update();
2300 #endif /* INET6 */
2301
2302 RT_GENID_SYNC(rt);
2303
2304 /*
2305 * We repeat the same procedures from rt_setgate() here
2306 * because they weren't completed when we called it earlier,
2307 * since the node was embryonic.
2308 */
2309 if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL)
2310 rt_set_gwroute(rt, rt_key(rt), rt->rt_gwroute);
2311
2312 if (req == RTM_ADD &&
2313 !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
2314 struct rtfc_arg arg;
2315 arg.rnh = rnh;
2316 arg.rt0 = rt;
2317 RT_UNLOCK(rt);
2318 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
2319 rt_fixchange, &arg);
2320 } else {
2321 RT_UNLOCK(rt);
2322 }
2323
2324 nstat_route_new_entry(rt);
2325 break;
2326 }
2327 bad:
2328 if (ifa)
2329 IFA_REMREF(ifa);
2330 return (error);
2331 }
2332 #undef senderr
2333
2334 int
2335 rtrequest(int req, struct sockaddr *dst, struct sockaddr *gateway,
2336 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt)
2337 {
2338 int error;
2339 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
2340 lck_mtx_lock(rnh_lock);
2341 error = rtrequest_locked(req, dst, gateway, netmask, flags, ret_nrt);
2342 lck_mtx_unlock(rnh_lock);
2343 return (error);
2344 }
2345
2346 int
2347 rtrequest_scoped(int req, struct sockaddr *dst, struct sockaddr *gateway,
2348 struct sockaddr *netmask, int flags, struct rtentry **ret_nrt,
2349 unsigned int ifscope)
2350 {
2351 int error;
2352 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
2353 lck_mtx_lock(rnh_lock);
2354 error = rtrequest_scoped_locked(req, dst, gateway, netmask, flags,
2355 ret_nrt, ifscope);
2356 lck_mtx_unlock(rnh_lock);
2357 return (error);
2358 }
2359
2360 /*
2361 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
2362 * (i.e., the routes related to it by the operation of cloning). This
2363 * routine is iterated over all potential former-child-routes by way of
2364 * rnh->rnh_walktree_from() above, and those that actually are children of
2365 * the late parent (passed in as VP here) are themselves deleted.
2366 */
2367 static int
2368 rt_fixdelete(struct radix_node *rn, void *vp)
2369 {
2370 struct rtentry *rt = (struct rtentry *)rn;
2371 struct rtentry *rt0 = vp;
2372
2373 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2374
2375 RT_LOCK(rt);
2376 if (rt->rt_parent == rt0 &&
2377 !(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) {
2378 /*
2379 * Safe to drop rt_lock and use rt_key, since holding
2380 * rnh_lock here prevents another thread from calling
2381 * rt_setgate() on this route.
2382 */
2383 RT_UNLOCK(rt);
2384 return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
2385 rt_mask(rt), rt->rt_flags, NULL));
2386 }
2387 RT_UNLOCK(rt);
2388 return (0);
2389 }
2390
2391 /*
2392 * This routine is called from rt_setgate() to do the analogous thing for
2393 * adds and changes. There is the added complication in this case of a
2394 * middle insert; i.e., insertion of a new network route between an older
2395 * network route and (cloned) host routes. For this reason, a simple check
2396 * of rt->rt_parent is insufficient; each candidate route must be tested
2397 * against the (mask, value) of the new route (passed as before in vp)
2398 * to see if the new route matches it.
2399 *
2400 * XXX - it may be possible to do fixdelete() for changes and reserve this
2401 * routine just for adds. I'm not sure why I thought it was necessary to do
2402 * changes this way.
2403 */
2404 static int
2405 rt_fixchange(struct radix_node *rn, void *vp)
2406 {
2407 struct rtentry *rt = (struct rtentry *)rn;
2408 struct rtfc_arg *ap = vp;
2409 struct rtentry *rt0 = ap->rt0;
2410 struct radix_node_head *rnh = ap->rnh;
2411 u_char *xk1, *xm1, *xk2, *xmp;
2412 int i, len;
2413
2414 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2415
2416 RT_LOCK(rt);
2417
2418 if (!rt->rt_parent ||
2419 (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING))) {
2420 RT_UNLOCK(rt);
2421 return (0);
2422 }
2423
2424 if (rt->rt_parent == rt0)
2425 goto delete_rt;
2426
2427 /*
2428 * There probably is a function somewhere which does this...
2429 * if not, there should be.
2430 */
2431 len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
2432
2433 xk1 = (u_char *)rt_key(rt0);
2434 xm1 = (u_char *)rt_mask(rt0);
2435 xk2 = (u_char *)rt_key(rt);
2436
2437 /*
2438 * Avoid applying a less specific route; do this only if the parent
2439 * route (rt->rt_parent) is a network route, since otherwise its mask
2440 * will be NULL if it is a cloning host route.
2441 */
2442 if ((xmp = (u_char *)rt_mask(rt->rt_parent)) != NULL) {
2443 int mlen = rt_mask(rt->rt_parent)->sa_len;
2444 if (mlen > rt_mask(rt0)->sa_len) {
2445 RT_UNLOCK(rt);
2446 return (0);
2447 }
2448
2449 for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++) {
2450 if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i]) {
2451 RT_UNLOCK(rt);
2452 return (0);
2453 }
2454 }
2455 }
2456
2457 for (i = rnh->rnh_treetop->rn_offset; i < len; i++) {
2458 if ((xk2[i] & xm1[i]) != xk1[i]) {
2459 RT_UNLOCK(rt);
2460 return (0);
2461 }
2462 }
2463
2464 /*
2465 * OK, this node is a clone, and matches the node currently being
2466 * changed/added under the node's mask. So, get rid of it.
2467 */
2468 delete_rt:
2469 /*
2470 * Safe to drop rt_lock and use rt_key, since holding rnh_lock here
2471 * prevents another thread from calling rt_setgate() on this route.
2472 */
2473 RT_UNLOCK(rt);
2474 return (rtrequest_locked(RTM_DELETE, rt_key(rt), NULL,
2475 rt_mask(rt), rt->rt_flags, NULL));
2476 }
2477
2478 /*
2479 * Round up sockaddr len to multiples of 32-bytes. This will reduce
2480 * or even eliminate the need to re-allocate the chunk of memory used
2481 * for rt_key and rt_gateway in the event the gateway portion changes.
2482 * Certain code paths (e.g. IPsec) are notorious for caching the address
2483 * of rt_gateway; this rounding-up would help ensure that the gateway
2484 * portion never gets deallocated (though it may change contents) and
2485 * thus greatly simplifies things.
2486 */
2487 #define SA_SIZE(x) (-(-((uintptr_t)(x)) & -(32)))
2488
2489 /*
2490 * Sets the gateway and/or gateway route portion of a route; may be
2491 * called on an existing route to modify the gateway portion. Both
2492 * rt_key and rt_gateway are allocated out of the same memory chunk.
2493 * Route entry lock must be held by caller; this routine will return
2494 * with the lock held.
2495 */
2496 int
2497 rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
2498 {
2499 int dlen = SA_SIZE(dst->sa_len), glen = SA_SIZE(gate->sa_len);
2500 struct radix_node_head *rnh = NULL;
2501 boolean_t loop = FALSE;
2502
2503 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
2504 return (EINVAL);
2505 }
2506
2507 rnh = rt_tables[dst->sa_family];
2508 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2509 RT_LOCK_ASSERT_HELD(rt);
2510
2511 /*
2512 * If this is for a route that is on its way of being removed,
2513 * or is temporarily frozen, reject the modification request.
2514 */
2515 if (rt->rt_flags & RTF_CONDEMNED) {
2516 return (EBUSY);
2517 }
2518
2519 /* Add an extra ref for ourselves */
2520 RT_ADDREF_LOCKED(rt);
2521
2522 if (rt->rt_flags & RTF_GATEWAY) {
2523 if ((dst->sa_len == gate->sa_len) &&
2524 (dst->sa_family == AF_INET || dst->sa_family == AF_INET6)) {
2525 struct sockaddr_storage dst_ss, gate_ss;
2526
2527 (void) sa_copy(dst, &dst_ss, NULL);
2528 (void) sa_copy(gate, &gate_ss, NULL);
2529
2530 loop = equal(SA(&dst_ss), SA(&gate_ss));
2531 } else {
2532 loop = (dst->sa_len == gate->sa_len &&
2533 equal(dst, gate));
2534 }
2535 }
2536
2537 /*
2538 * A (cloning) network route with the destination equal to the gateway
2539 * will create an endless loop (see notes below), so disallow it.
2540 */
2541 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
2542 RTF_GATEWAY) && loop) {
2543 /* Release extra ref */
2544 RT_REMREF_LOCKED(rt);
2545 return (EADDRNOTAVAIL);
2546 }
2547
2548 /*
2549 * A host route with the destination equal to the gateway
2550 * will interfere with keeping LLINFO in the routing
2551 * table, so disallow it.
2552 */
2553 if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
2554 (RTF_HOST|RTF_GATEWAY)) && loop) {
2555 /*
2556 * The route might already exist if this is an RTM_CHANGE
2557 * or a routing redirect, so try to delete it.
2558 */
2559 if (rt_key(rt) != NULL) {
2560 /*
2561 * Safe to drop rt_lock and use rt_key, rt_gateway,
2562 * since holding rnh_lock here prevents another thread
2563 * from calling rt_setgate() on this route.
2564 */
2565 RT_UNLOCK(rt);
2566 (void) rtrequest_locked(RTM_DELETE, rt_key(rt),
2567 rt->rt_gateway, rt_mask(rt), rt->rt_flags, NULL);
2568 RT_LOCK(rt);
2569 }
2570 /* Release extra ref */
2571 RT_REMREF_LOCKED(rt);
2572 return (EADDRNOTAVAIL);
2573 }
2574
2575 /*
2576 * The destination is not directly reachable. Get a route
2577 * to the next-hop gateway and store it in rt_gwroute.
2578 */
2579 if (rt->rt_flags & RTF_GATEWAY) {
2580 struct rtentry *gwrt;
2581 unsigned int ifscope;
2582
2583 if (dst->sa_family == AF_INET)
2584 ifscope = sin_get_ifscope(dst);
2585 else if (dst->sa_family == AF_INET6)
2586 ifscope = sin6_get_ifscope(dst);
2587 else
2588 ifscope = IFSCOPE_NONE;
2589
2590 RT_UNLOCK(rt);
2591 /*
2592 * Don't ignore RTF_CLONING, since we prefer that rt_gwroute
2593 * points to a clone rather than a cloning route; see above
2594 * check for cloning loop avoidance (dst == gate).
2595 */
2596 gwrt = rtalloc1_scoped_locked(gate, 1, RTF_PRCLONING, ifscope);
2597 if (gwrt != NULL)
2598 RT_LOCK_ASSERT_NOTHELD(gwrt);
2599 RT_LOCK(rt);
2600
2601 /*
2602 * Cloning loop avoidance:
2603 *
2604 * In the presence of protocol-cloning and bad configuration,
2605 * it is possible to get stuck in bottomless mutual recursion
2606 * (rtrequest rt_setgate rtalloc1). We avoid this by not
2607 * allowing protocol-cloning to operate for gateways (which
2608 * is probably the correct choice anyway), and avoid the
2609 * resulting reference loops by disallowing any route to run
2610 * through itself as a gateway. This is obviously mandatory
2611 * when we get rt->rt_output(). It implies that a route to
2612 * the gateway must already be present in the system in order
2613 * for the gateway to be referred to by another route.
2614 */
2615 if (gwrt == rt) {
2616 RT_REMREF_LOCKED(gwrt);
2617 /* Release extra ref */
2618 RT_REMREF_LOCKED(rt);
2619 return (EADDRINUSE); /* failure */
2620 }
2621
2622 /*
2623 * If scoped, the gateway route must use the same interface;
2624 * we're holding rnh_lock now, so rt_gateway and rt_ifp of gwrt
2625 * should not change and are freely accessible.
2626 */
2627 if (ifscope != IFSCOPE_NONE && (rt->rt_flags & RTF_IFSCOPE) &&
2628 gwrt != NULL && gwrt->rt_ifp != NULL &&
2629 gwrt->rt_ifp->if_index != ifscope) {
2630 rtfree_locked(gwrt); /* rt != gwrt, no deadlock */
2631 /* Release extra ref */
2632 RT_REMREF_LOCKED(rt);
2633 return ((rt->rt_flags & RTF_HOST) ?
2634 EHOSTUNREACH : ENETUNREACH);
2635 }
2636
2637 /* Check again since we dropped the lock above */
2638 if (rt->rt_flags & RTF_CONDEMNED) {
2639 if (gwrt != NULL)
2640 rtfree_locked(gwrt);
2641 /* Release extra ref */
2642 RT_REMREF_LOCKED(rt);
2643 return (EBUSY);
2644 }
2645
2646 /* Set gateway route; callee adds ref to gwrt if non-NULL */
2647 rt_set_gwroute(rt, dst, gwrt);
2648
2649 /*
2650 * In case the (non-scoped) default route gets modified via
2651 * an ICMP redirect, record the interface index used for the
2652 * primary ifscope. Also done in rt_setif() to take care
2653 * of the non-redirect cases.
2654 */
2655 if (rt_primary_default(rt, dst) && rt->rt_ifp != NULL) {
2656 set_primary_ifscope(dst->sa_family,
2657 rt->rt_ifp->if_index);
2658 }
2659
2660 #if NECP
2661 /*
2662 * If this is a change in a default route, update
2663 * necp client watchers to re-evaluate
2664 */
2665 if (SA_DEFAULT(dst)) {
2666 necp_update_all_clients();
2667 }
2668 #endif /* NECP */
2669
2670 /*
2671 * Tell the kernel debugger about the new default gateway
2672 * if the gateway route uses the primary interface, or
2673 * if we are in a transient state before the non-scoped
2674 * default gateway is installed (similar to how the system
2675 * was behaving in the past). In future, it would be good
2676 * to do all this only when KDP is enabled.
2677 */
2678 if ((dst->sa_family == AF_INET) &&
2679 gwrt != NULL && gwrt->rt_gateway->sa_family == AF_LINK &&
2680 (gwrt->rt_ifp->if_index == get_primary_ifscope(AF_INET) ||
2681 get_primary_ifscope(AF_INET) == IFSCOPE_NONE)) {
2682 kdp_set_gateway_mac(SDL((void *)gwrt->rt_gateway)->
2683 sdl_data);
2684 }
2685
2686 /* Release extra ref from rtalloc1() */
2687 if (gwrt != NULL)
2688 RT_REMREF(gwrt);
2689 }
2690
2691 /*
2692 * Prepare to store the gateway in rt_gateway. Both dst and gateway
2693 * are stored one after the other in the same malloc'd chunk. If we
2694 * have room, reuse the old buffer since rt_gateway already points
2695 * to the right place. Otherwise, malloc a new block and update
2696 * the 'dst' address and point rt_gateway to the right place.
2697 */
2698 if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway->sa_len)) {
2699 caddr_t new;
2700
2701 /* The underlying allocation is done with M_WAITOK set */
2702 R_Malloc(new, caddr_t, dlen + glen);
2703 if (new == NULL) {
2704 /* Clear gateway route */
2705 rt_set_gwroute(rt, dst, NULL);
2706 /* Release extra ref */
2707 RT_REMREF_LOCKED(rt);
2708 return (ENOBUFS);
2709 }
2710
2711 /*
2712 * Copy from 'dst' and not rt_key(rt) because we can get
2713 * here to initialize a newly allocated route entry, in
2714 * which case rt_key(rt) is NULL (and so does rt_gateway).
2715 */
2716 bzero(new, dlen + glen);
2717 Bcopy(dst, new, dst->sa_len);
2718 R_Free(rt_key(rt)); /* free old block; NULL is okay */
2719 rt->rt_nodes->rn_key = new;
2720 rt->rt_gateway = (struct sockaddr *)(new + dlen);
2721 }
2722
2723 /*
2724 * Copy the new gateway value into the memory chunk.
2725 */
2726 Bcopy(gate, rt->rt_gateway, gate->sa_len);
2727
2728 /*
2729 * For consistency between rt_gateway and rt_key(gwrt).
2730 */
2731 if ((rt->rt_flags & RTF_GATEWAY) && rt->rt_gwroute != NULL &&
2732 (rt->rt_gwroute->rt_flags & RTF_IFSCOPE)) {
2733 if (rt->rt_gateway->sa_family == AF_INET &&
2734 rt_key(rt->rt_gwroute)->sa_family == AF_INET) {
2735 sin_set_ifscope(rt->rt_gateway,
2736 sin_get_ifscope(rt_key(rt->rt_gwroute)));
2737 } else if (rt->rt_gateway->sa_family == AF_INET6 &&
2738 rt_key(rt->rt_gwroute)->sa_family == AF_INET6) {
2739 sin6_set_ifscope(rt->rt_gateway,
2740 sin6_get_ifscope(rt_key(rt->rt_gwroute)));
2741 }
2742 }
2743
2744 /*
2745 * This isn't going to do anything useful for host routes, so
2746 * don't bother. Also make sure we have a reasonable mask
2747 * (we don't yet have one during adds).
2748 */
2749 if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
2750 struct rtfc_arg arg;
2751 arg.rnh = rnh;
2752 arg.rt0 = rt;
2753 RT_UNLOCK(rt);
2754 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
2755 rt_fixchange, &arg);
2756 RT_LOCK(rt);
2757 }
2758
2759 /* Release extra ref */
2760 RT_REMREF_LOCKED(rt);
2761 return (0);
2762 }
2763
2764 #undef SA_SIZE
2765
2766 void
2767 rt_set_gwroute(struct rtentry *rt, struct sockaddr *dst, struct rtentry *gwrt)
2768 {
2769 boolean_t gwrt_isrouter;
2770
2771 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2772 RT_LOCK_ASSERT_HELD(rt);
2773
2774 if (gwrt != NULL)
2775 RT_ADDREF(gwrt); /* for this routine */
2776
2777 /*
2778 * Get rid of existing gateway route; if rt_gwroute is already
2779 * set to gwrt, this is slightly redundant (though safe since
2780 * we held an extra ref above) but makes the code simpler.
2781 */
2782 if (rt->rt_gwroute != NULL) {
2783 struct rtentry *ogwrt = rt->rt_gwroute;
2784
2785 VERIFY(rt != ogwrt); /* sanity check */
2786 rt->rt_gwroute = NULL;
2787 RT_UNLOCK(rt);
2788 rtfree_locked(ogwrt);
2789 RT_LOCK(rt);
2790 VERIFY(rt->rt_gwroute == NULL);
2791 }
2792
2793 /*
2794 * And associate the new gateway route.
2795 */
2796 if ((rt->rt_gwroute = gwrt) != NULL) {
2797 RT_ADDREF(gwrt); /* for rt */
2798
2799 if (rt->rt_flags & RTF_WASCLONED) {
2800 /* rt_parent might be NULL if rt is embryonic */
2801 gwrt_isrouter = (rt->rt_parent != NULL &&
2802 SA_DEFAULT(rt_key(rt->rt_parent)) &&
2803 !RT_HOST(rt->rt_parent));
2804 } else {
2805 gwrt_isrouter = (SA_DEFAULT(dst) && !RT_HOST(rt));
2806 }
2807
2808 /* If gwrt points to a default router, mark it accordingly */
2809 if (gwrt_isrouter && RT_HOST(gwrt) &&
2810 !(gwrt->rt_flags & RTF_ROUTER)) {
2811 RT_LOCK(gwrt);
2812 gwrt->rt_flags |= RTF_ROUTER;
2813 RT_UNLOCK(gwrt);
2814 }
2815
2816 RT_REMREF(gwrt); /* for this routine */
2817 }
2818 }
2819
2820 static void
2821 rt_maskedcopy(const struct sockaddr *src, struct sockaddr *dst,
2822 const struct sockaddr *netmask)
2823 {
2824 const char *netmaskp = &netmask->sa_data[0];
2825 const char *srcp = &src->sa_data[0];
2826 char *dstp = &dst->sa_data[0];
2827 const char *maskend = (char *)dst
2828 + MIN(netmask->sa_len, src->sa_len);
2829 const char *srcend = (char *)dst + src->sa_len;
2830
2831 dst->sa_len = src->sa_len;
2832 dst->sa_family = src->sa_family;
2833
2834 while (dstp < maskend)
2835 *dstp++ = *srcp++ & *netmaskp++;
2836 if (dstp < srcend)
2837 memset(dstp, 0, (size_t)(srcend - dstp));
2838 }
2839
2840 /*
2841 * Lookup an AF_INET/AF_INET6 scoped or non-scoped route depending on the
2842 * ifscope value passed in by the caller (IFSCOPE_NONE implies non-scoped).
2843 */
2844 static struct radix_node *
2845 node_lookup(struct sockaddr *dst, struct sockaddr *netmask,
2846 unsigned int ifscope)
2847 {
2848 struct radix_node_head *rnh;
2849 struct radix_node *rn;
2850 struct sockaddr_storage ss, mask;
2851 int af = dst->sa_family;
2852 struct matchleaf_arg ma = { .ifscope = ifscope };
2853 rn_matchf_t *f = rn_match_ifscope;
2854 void *w = &ma;
2855
2856 if (af != AF_INET && af != AF_INET6)
2857 return (NULL);
2858
2859 rnh = rt_tables[af];
2860
2861 /*
2862 * Transform dst into the internal routing table form,
2863 * clearing out the scope ID field if ifscope isn't set.
2864 */
2865 dst = sa_copy(dst, &ss, (ifscope == IFSCOPE_NONE) ? NULL : &ifscope);
2866
2867 /* Transform netmask into the internal routing table form */
2868 if (netmask != NULL)
2869 netmask = ma_copy(af, netmask, &mask, ifscope);
2870
2871 if (ifscope == IFSCOPE_NONE)
2872 f = w = NULL;
2873
2874 rn = rnh->rnh_lookup_args(dst, netmask, rnh, f, w);
2875 if (rn != NULL && (rn->rn_flags & RNF_ROOT))
2876 rn = NULL;
2877
2878 return (rn);
2879 }
2880
2881 /*
2882 * Lookup the AF_INET/AF_INET6 non-scoped default route.
2883 */
2884 static struct radix_node *
2885 node_lookup_default(int af)
2886 {
2887 struct radix_node_head *rnh;
2888
2889 VERIFY(af == AF_INET || af == AF_INET6);
2890 rnh = rt_tables[af];
2891
2892 return (af == AF_INET ? rnh->rnh_lookup(&sin_def, NULL, rnh) :
2893 rnh->rnh_lookup(&sin6_def, NULL, rnh));
2894 }
2895
2896 boolean_t
2897 rt_ifa_is_dst(struct sockaddr *dst, struct ifaddr *ifa)
2898 {
2899 boolean_t result = FALSE;
2900
2901 if (ifa == NULL || ifa->ifa_addr == NULL)
2902 return (result);
2903
2904 IFA_LOCK_SPIN(ifa);
2905
2906 if (dst->sa_family == ifa->ifa_addr->sa_family &&
2907 ((dst->sa_family == AF_INET &&
2908 SIN(dst)->sin_addr.s_addr ==
2909 SIN(ifa->ifa_addr)->sin_addr.s_addr) ||
2910 (dst->sa_family == AF_INET6 &&
2911 SA6_ARE_ADDR_EQUAL(SIN6(dst), SIN6(ifa->ifa_addr)))))
2912 result = TRUE;
2913
2914 IFA_UNLOCK(ifa);
2915
2916 return (result);
2917 }
2918
2919 /*
2920 * Common routine to lookup/match a route. It invokes the lookup/matchaddr
2921 * callback which could be address family-specific. The main difference
2922 * between the two (at least for AF_INET/AF_INET6) is that a lookup does
2923 * not alter the expiring state of a route, whereas a match would unexpire
2924 * or revalidate the route.
2925 *
2926 * The optional scope or interface index property of a route allows for a
2927 * per-interface route instance. This permits multiple route entries having
2928 * the same destination (but not necessarily the same gateway) to exist in
2929 * the routing table; each of these entries is specific to the corresponding
2930 * interface. This is made possible by storing the scope ID value into the
2931 * radix key, thus making each route entry unique. These scoped entries
2932 * exist along with the regular, non-scoped entries in the same radix tree
2933 * for a given address family (AF_INET/AF_INET6); the scope logically
2934 * partitions it into multiple per-interface sub-trees.
2935 *
2936 * When a scoped route lookup is performed, the routing table is searched for
2937 * the best match that would result in a route using the same interface as the
2938 * one associated with the scope (the exception to this are routes that point
2939 * to the loopback interface). The search rule follows the longest matching
2940 * prefix with the additional interface constraint.
2941 */
2942 static struct rtentry *
2943 rt_lookup_common(boolean_t lookup_only, boolean_t coarse, struct sockaddr *dst,
2944 struct sockaddr *netmask, struct radix_node_head *rnh, unsigned int ifscope)
2945 {
2946 struct radix_node *rn0, *rn = NULL;
2947 int af = dst->sa_family;
2948 struct sockaddr_storage dst_ss;
2949 struct sockaddr_storage mask_ss;
2950 boolean_t dontcare;
2951 #if (DEVELOPMENT || DEBUG)
2952 char dbuf[MAX_SCOPE_ADDR_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
2953 char s_dst[MAX_IPv6_STR_LEN], s_netmask[MAX_IPv6_STR_LEN];
2954 #endif
2955 VERIFY(!coarse || ifscope == IFSCOPE_NONE);
2956
2957 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
2958 #if INET6
2959 /*
2960 * While we have rnh_lock held, see if we need to schedule the timer.
2961 */
2962 if (nd6_sched_timeout_want)
2963 nd6_sched_timeout(NULL, NULL);
2964 #endif /* INET6 */
2965
2966 if (!lookup_only)
2967 netmask = NULL;
2968
2969 /*
2970 * Non-scoped route lookup.
2971 */
2972 #if INET6
2973 if (af != AF_INET && af != AF_INET6) {
2974 #else
2975 if (af != AF_INET) {
2976 #endif /* !INET6 */
2977 rn = rnh->rnh_matchaddr(dst, rnh);
2978
2979 /*
2980 * Don't return a root node; also, rnh_matchaddr callback
2981 * would have done the necessary work to clear RTPRF_OURS
2982 * for certain protocol families.
2983 */
2984 if (rn != NULL && (rn->rn_flags & RNF_ROOT))
2985 rn = NULL;
2986 if (rn != NULL) {
2987 RT_LOCK_SPIN(RT(rn));
2988 if (!(RT(rn)->rt_flags & RTF_CONDEMNED)) {
2989 RT_ADDREF_LOCKED(RT(rn));
2990 RT_UNLOCK(RT(rn));
2991 } else {
2992 RT_UNLOCK(RT(rn));
2993 rn = NULL;
2994 }
2995 }
2996 return (RT(rn));
2997 }
2998
2999 /* Transform dst/netmask into the internal routing table form */
3000 dst = sa_copy(dst, &dst_ss, &ifscope);
3001 if (netmask != NULL)
3002 netmask = ma_copy(af, netmask, &mask_ss, ifscope);
3003 dontcare = (ifscope == IFSCOPE_NONE);
3004
3005 #if (DEVELOPMENT || DEBUG)
3006 if (rt_verbose) {
3007 if (af == AF_INET)
3008 (void) inet_ntop(af, &SIN(dst)->sin_addr.s_addr,
3009 s_dst, sizeof (s_dst));
3010 else
3011 (void) inet_ntop(af, &SIN6(dst)->sin6_addr,
3012 s_dst, sizeof (s_dst));
3013
3014 if (netmask != NULL && af == AF_INET)
3015 (void) inet_ntop(af, &SIN(netmask)->sin_addr.s_addr,
3016 s_netmask, sizeof (s_netmask));
3017 if (netmask != NULL && af == AF_INET6)
3018 (void) inet_ntop(af, &SIN6(netmask)->sin6_addr,
3019 s_netmask, sizeof (s_netmask));
3020 else
3021 *s_netmask = '\0';
3022 printf("%s (%d, %d, %s, %s, %u)\n",
3023 __func__, lookup_only, coarse, s_dst, s_netmask, ifscope);
3024 }
3025 #endif
3026
3027 /*
3028 * Scoped route lookup:
3029 *
3030 * We first perform a non-scoped lookup for the original result.
3031 * Afterwards, depending on whether or not the caller has specified
3032 * a scope, we perform a more specific scoped search and fallback
3033 * to this original result upon failure.
3034 */
3035 rn0 = rn = node_lookup(dst, netmask, IFSCOPE_NONE);
3036
3037 /*
3038 * If the caller did not specify a scope, use the primary scope
3039 * derived from the system's non-scoped default route. If, for
3040 * any reason, there is no primary interface, ifscope will be
3041 * set to IFSCOPE_NONE; if the above lookup resulted in a route,
3042 * we'll do a more-specific search below, scoped to the interface
3043 * of that route.
3044 */
3045 if (dontcare)
3046 ifscope = get_primary_ifscope(af);
3047
3048 /*
3049 * Keep the original result if either of the following is true:
3050 *
3051 * 1) The interface portion of the route has the same interface
3052 * index as the scope value and it is marked with RTF_IFSCOPE.
3053 * 2) The route uses the loopback interface, in which case the
3054 * destination (host/net) is local/loopback.
3055 *
3056 * Otherwise, do a more specified search using the scope;
3057 * we're holding rnh_lock now, so rt_ifp should not change.
3058 */
3059 if (rn != NULL) {
3060 struct rtentry *rt = RT(rn);
3061 #if (DEVELOPMENT || DEBUG)
3062 if (rt_verbose) {
3063 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
3064 printf("%s unscoped search %p to %s->%s->%s ifa_ifp %s\n",
3065 __func__, rt,
3066 dbuf, gbuf,
3067 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
3068 (rt->rt_ifa->ifa_ifp != NULL) ?
3069 rt->rt_ifa->ifa_ifp->if_xname : "");
3070 }
3071 #endif
3072 if (!(rt->rt_ifp->if_flags & IFF_LOOPBACK) ||
3073 (rt->rt_flags & RTF_GATEWAY)) {
3074 if (rt->rt_ifp->if_index != ifscope) {
3075 /*
3076 * Wrong interface; keep the original result
3077 * only if the caller did not specify a scope,
3078 * and do a more specific scoped search using
3079 * the scope of the found route. Otherwise,
3080 * start again from scratch.
3081 *
3082 * For loopback scope we keep the unscoped
3083 * route for local addresses
3084 */
3085 rn = NULL;
3086 if (dontcare)
3087 ifscope = rt->rt_ifp->if_index;
3088 else if (ifscope != lo_ifp->if_index ||
3089 rt_ifa_is_dst(dst, rt->rt_ifa) == FALSE)
3090 rn0 = NULL;
3091 } else if (!(rt->rt_flags & RTF_IFSCOPE)) {
3092 /*
3093 * Right interface, except that this route
3094 * isn't marked with RTF_IFSCOPE. Do a more
3095 * specific scoped search. Keep the original
3096 * result and return it it in case the scoped
3097 * search fails.
3098 */
3099 rn = NULL;
3100 }
3101 }
3102 }
3103
3104 /*
3105 * Scoped search. Find the most specific entry having the same
3106 * interface scope as the one requested. The following will result
3107 * in searching for the longest prefix scoped match.
3108 */
3109 if (rn == NULL) {
3110 rn = node_lookup(dst, netmask, ifscope);
3111 #if (DEVELOPMENT || DEBUG)
3112 if (rt_verbose && rn != NULL) {
3113 struct rtentry *rt = RT(rn);
3114
3115 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
3116 printf("%s scoped search %p to %s->%s->%s ifa %s\n",
3117 __func__, rt,
3118 dbuf, gbuf,
3119 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
3120 (rt->rt_ifa->ifa_ifp != NULL) ?
3121 rt->rt_ifa->ifa_ifp->if_xname : "");
3122 }
3123 #endif
3124 }
3125 /*
3126 * Use the original result if either of the following is true:
3127 *
3128 * 1) The scoped search did not yield any result.
3129 * 2) The caller insists on performing a coarse-grained lookup.
3130 * 3) The result from the scoped search is a scoped default route,
3131 * and the original (non-scoped) result is not a default route,
3132 * i.e. the original result is a more specific host/net route.
3133 * 4) The scoped search yielded a net route but the original
3134 * result is a host route, i.e. the original result is treated
3135 * as a more specific route.
3136 */
3137 if (rn == NULL || coarse || (rn0 != NULL &&
3138 ((SA_DEFAULT(rt_key(RT(rn))) && !SA_DEFAULT(rt_key(RT(rn0)))) ||
3139 (!RT_HOST(rn) && RT_HOST(rn0)))))
3140 rn = rn0;
3141
3142 /*
3143 * If we still don't have a route, use the non-scoped default
3144 * route as long as the interface portion satistifes the scope.
3145 */
3146 if (rn == NULL && (rn = node_lookup_default(af)) != NULL &&
3147 RT(rn)->rt_ifp->if_index != ifscope) {
3148 rn = NULL;
3149 }
3150
3151 if (rn != NULL) {
3152 /*
3153 * Manually clear RTPRF_OURS using rt_validate() and
3154 * bump up the reference count after, and not before;
3155 * we only get here for AF_INET/AF_INET6. node_lookup()
3156 * has done the check against RNF_ROOT, so we can be sure
3157 * that we're not returning a root node here.
3158 */
3159 RT_LOCK_SPIN(RT(rn));
3160 if (rt_validate(RT(rn))) {
3161 RT_ADDREF_LOCKED(RT(rn));
3162 RT_UNLOCK(RT(rn));
3163 } else {
3164 RT_UNLOCK(RT(rn));
3165 rn = NULL;
3166 }
3167 }
3168 #if (DEVELOPMENT || DEBUG)
3169 if (rt_verbose) {
3170 if (rn == NULL)
3171 printf("%s %u return NULL\n", __func__, ifscope);
3172 else {
3173 struct rtentry *rt = RT(rn);
3174
3175 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
3176
3177 printf("%s %u return %p to %s->%s->%s ifa_ifp %s\n",
3178 __func__, ifscope, rt,
3179 dbuf, gbuf,
3180 (rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : "",
3181 (rt->rt_ifa->ifa_ifp != NULL) ?
3182 rt->rt_ifa->ifa_ifp->if_xname : "");
3183 }
3184 }
3185 #endif
3186 return (RT(rn));
3187 }
3188
3189 struct rtentry *
3190 rt_lookup(boolean_t lookup_only, struct sockaddr *dst, struct sockaddr *netmask,
3191 struct radix_node_head *rnh, unsigned int ifscope)
3192 {
3193 return (rt_lookup_common(lookup_only, FALSE, dst, netmask,
3194 rnh, ifscope));
3195 }
3196
3197 struct rtentry *
3198 rt_lookup_coarse(boolean_t lookup_only, struct sockaddr *dst,
3199 struct sockaddr *netmask, struct radix_node_head *rnh)
3200 {
3201 return (rt_lookup_common(lookup_only, TRUE, dst, netmask,
3202 rnh, IFSCOPE_NONE));
3203 }
3204
3205 boolean_t
3206 rt_validate(struct rtentry *rt)
3207 {
3208 RT_LOCK_ASSERT_HELD(rt);
3209
3210 if ((rt->rt_flags & (RTF_UP | RTF_CONDEMNED)) == RTF_UP) {
3211 int af = rt_key(rt)->sa_family;
3212
3213 if (af == AF_INET)
3214 (void) in_validate(RN(rt));
3215 else if (af == AF_INET6)
3216 (void) in6_validate(RN(rt));
3217 } else {
3218 rt = NULL;
3219 }
3220
3221 return (rt != NULL);
3222 }
3223
3224 /*
3225 * Set up a routing table entry, normally
3226 * for an interface.
3227 */
3228 int
3229 rtinit(struct ifaddr *ifa, int cmd, int flags)
3230 {
3231 int error;
3232
3233 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_NOTOWNED);
3234
3235 lck_mtx_lock(rnh_lock);
3236 error = rtinit_locked(ifa, cmd, flags);
3237 lck_mtx_unlock(rnh_lock);
3238
3239 return (error);
3240 }
3241
3242 int
3243 rtinit_locked(struct ifaddr *ifa, int cmd, int flags)
3244 {
3245 struct radix_node_head *rnh;
3246 uint8_t nbuf[128]; /* long enough for IPv6 */
3247 #if (DEVELOPMENT || DEBUG)
3248 char dbuf[MAX_IPv6_STR_LEN], gbuf[MAX_IPv6_STR_LEN];
3249 char abuf[MAX_IPv6_STR_LEN];
3250 #endif
3251 struct rtentry *rt = NULL;
3252 struct sockaddr *dst;
3253 struct sockaddr *netmask;
3254 int error = 0;
3255
3256 /*
3257 * Holding rnh_lock here prevents the possibility of ifa from
3258 * changing (e.g. in_ifinit), so it is safe to access its
3259 * ifa_{dst}addr (here and down below) without locking.
3260 */
3261 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
3262
3263 if (flags & RTF_HOST) {
3264 dst = ifa->ifa_dstaddr;
3265 netmask = NULL;
3266 } else {
3267 dst = ifa->ifa_addr;
3268 netmask = ifa->ifa_netmask;
3269 }
3270
3271 if (dst->sa_len == 0) {
3272 log(LOG_ERR, "%s: %s failed, invalid dst sa_len %d\n",
3273 __func__, rtm2str(cmd), dst->sa_len);
3274 error = EINVAL;
3275 goto done;
3276 }
3277 if (netmask != NULL && netmask->sa_len > sizeof (nbuf)) {
3278 log(LOG_ERR, "%s: %s failed, mask sa_len %d too large\n",
3279 __func__, rtm2str(cmd), dst->sa_len);
3280 error = EINVAL;
3281 goto done;
3282 }
3283
3284 #if (DEVELOPMENT || DEBUG)
3285 if (dst->sa_family == AF_INET) {
3286 (void) inet_ntop(AF_INET, &SIN(dst)->sin_addr.s_addr,
3287 abuf, sizeof (abuf));
3288 }
3289 #if INET6
3290 else if (dst->sa_family == AF_INET6) {
3291 (void) inet_ntop(AF_INET6, &SIN6(dst)->sin6_addr,
3292 abuf, sizeof (abuf));
3293 }
3294 #endif /* INET6 */
3295 #endif /* (DEVELOPMENT || DEBUG) */
3296
3297 if ((rnh = rt_tables[dst->sa_family]) == NULL) {
3298 error = EINVAL;
3299 goto done;
3300 }
3301
3302 /*
3303 * If it's a delete, check that if it exists, it's on the correct
3304 * interface or we might scrub a route to another ifa which would
3305 * be confusing at best and possibly worse.
3306 */
3307 if (cmd == RTM_DELETE) {
3308 /*
3309 * It's a delete, so it should already exist..
3310 * If it's a net, mask off the host bits
3311 * (Assuming we have a mask)
3312 */
3313 if (netmask != NULL) {
3314 rt_maskedcopy(dst, SA(nbuf), netmask);
3315 dst = SA(nbuf);
3316 }
3317 /*
3318 * Get an rtentry that is in the routing tree and contains
3319 * the correct info. Note that we perform a coarse-grained
3320 * lookup here, in case there is a scoped variant of the
3321 * subnet/prefix route which we should ignore, as we never
3322 * add a scoped subnet/prefix route as part of adding an
3323 * interface address.
3324 */
3325 rt = rt_lookup_coarse(TRUE, dst, NULL, rnh);
3326 if (rt != NULL) {
3327 #if (DEVELOPMENT || DEBUG)
3328 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
3329 #endif
3330 /*
3331 * Ok so we found the rtentry. it has an extra reference
3332 * for us at this stage. we won't need that so
3333 * lop that off now.
3334 */
3335 RT_LOCK(rt);
3336 if (rt->rt_ifa != ifa) {
3337 /*
3338 * If the interface address in the rtentry
3339 * doesn't match the interface we are using,
3340 * then we don't want to delete it, so return
3341 * an error. This seems to be the only point
3342 * of this whole RTM_DELETE clause.
3343 */
3344 #if (DEVELOPMENT || DEBUG)
3345 if (rt_verbose) {
3346 log(LOG_DEBUG, "%s: not removing "
3347 "route to %s->%s->%s, flags %b, "
3348 "ifaddr %s, rt_ifa 0x%llx != "
3349 "ifa 0x%llx\n", __func__, dbuf,
3350 gbuf, ((rt->rt_ifp != NULL) ?
3351 rt->rt_ifp->if_xname : ""),
3352 rt->rt_flags, RTF_BITS, abuf,
3353 (uint64_t)VM_KERNEL_ADDRPERM(
3354 rt->rt_ifa),
3355 (uint64_t)VM_KERNEL_ADDRPERM(ifa));
3356 }
3357 #endif /* (DEVELOPMENT || DEBUG) */
3358 RT_REMREF_LOCKED(rt);
3359 RT_UNLOCK(rt);
3360 rt = NULL;
3361 error = ((flags & RTF_HOST) ?
3362 EHOSTUNREACH : ENETUNREACH);
3363 goto done;
3364 } else if (rt->rt_flags & RTF_STATIC) {
3365 /*
3366 * Don't remove the subnet/prefix route if
3367 * this was manually added from above.
3368 */
3369 #if (DEVELOPMENT || DEBUG)
3370 if (rt_verbose) {
3371 log(LOG_DEBUG, "%s: not removing "
3372 "static route to %s->%s->%s, "
3373 "flags %b, ifaddr %s\n", __func__,
3374 dbuf, gbuf, ((rt->rt_ifp != NULL) ?
3375 rt->rt_ifp->if_xname : ""),
3376 rt->rt_flags, RTF_BITS, abuf);
3377 }
3378 #endif /* (DEVELOPMENT || DEBUG) */
3379 RT_REMREF_LOCKED(rt);
3380 RT_UNLOCK(rt);
3381 rt = NULL;
3382 error = EBUSY;
3383 goto done;
3384 }
3385 #if (DEVELOPMENT || DEBUG)
3386 if (rt_verbose) {
3387 log(LOG_DEBUG, "%s: removing route to "
3388 "%s->%s->%s, flags %b, ifaddr %s\n",
3389 __func__, dbuf, gbuf,
3390 ((rt->rt_ifp != NULL) ?
3391 rt->rt_ifp->if_xname : ""),
3392 rt->rt_flags, RTF_BITS, abuf);
3393 }
3394 #endif /* (DEVELOPMENT || DEBUG) */
3395 RT_REMREF_LOCKED(rt);
3396 RT_UNLOCK(rt);
3397 rt = NULL;
3398 }
3399 }
3400 /*
3401 * Do the actual request
3402 */
3403 if ((error = rtrequest_locked(cmd, dst, ifa->ifa_addr, netmask,
3404 flags | ifa->ifa_flags, &rt)) != 0)
3405 goto done;
3406
3407 VERIFY(rt != NULL);
3408 #if (DEVELOPMENT || DEBUG)
3409 rt_str(rt, dbuf, sizeof (dbuf), gbuf, sizeof (gbuf));
3410 #endif /* (DEVELOPMENT || DEBUG) */
3411 switch (cmd) {
3412 case RTM_DELETE:
3413 /*
3414 * If we are deleting, and we found an entry, then it's
3415 * been removed from the tree. Notify any listening
3416 * routing agents of the change and throw it away.
3417 */
3418 RT_LOCK(rt);
3419 rt_newaddrmsg(cmd, ifa, error, rt);
3420 RT_UNLOCK(rt);
3421 #if (DEVELOPMENT || DEBUG)
3422 if (rt_verbose) {
3423 log(LOG_DEBUG, "%s: removed route to %s->%s->%s, "
3424 "flags %b, ifaddr %s\n", __func__, dbuf, gbuf,
3425 ((rt->rt_ifp != NULL) ? rt->rt_ifp->if_xname : ""),
3426 rt->rt_flags, RTF_BITS, abuf);
3427 }
3428 #endif /* (DEVELOPMENT || DEBUG) */
3429 rtfree_locked(rt);
3430 break;
3431
3432 case RTM_ADD:
3433 /*
3434 * We are adding, and we have a returned routing entry.
3435 * We need to sanity check the result. If it came back
3436 * with an unexpected interface, then it must have already
3437 * existed or something.
3438 */
3439 RT_LOCK(rt);
3440 if (rt->rt_ifa != ifa) {
3441 void (*ifa_rtrequest)
3442 (int, struct rtentry *, struct sockaddr *);
3443 #if (DEVELOPMENT || DEBUG)
3444 if (rt_verbose) {
3445 if (!(rt->rt_ifa->ifa_ifp->if_flags &
3446 (IFF_POINTOPOINT|IFF_LOOPBACK))) {
3447 log(LOG_ERR, "%s: %s route to %s->%s->%s, "
3448 "flags %b, ifaddr %s, rt_ifa 0x%llx != "
3449 "ifa 0x%llx\n", __func__, rtm2str(cmd),
3450 dbuf, gbuf, ((rt->rt_ifp != NULL) ?
3451 rt->rt_ifp->if_xname : ""), rt->rt_flags,
3452 RTF_BITS, abuf,
3453 (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa),
3454 (uint64_t)VM_KERNEL_ADDRPERM(ifa));
3455 }
3456
3457 log(LOG_DEBUG, "%s: %s route to %s->%s->%s, "
3458 "flags %b, ifaddr %s, rt_ifa was 0x%llx "
3459 "now 0x%llx\n", __func__, rtm2str(cmd),
3460 dbuf, gbuf, ((rt->rt_ifp != NULL) ?
3461 rt->rt_ifp->if_xname : ""), rt->rt_flags,
3462 RTF_BITS, abuf,
3463 (uint64_t)VM_KERNEL_ADDRPERM(rt->rt_ifa),
3464 (uint64_t)VM_KERNEL_ADDRPERM(ifa));
3465 }
3466 #endif /* (DEVELOPMENT || DEBUG) */
3467
3468 /*
3469 * Ask that the protocol in question
3470 * remove anything it has associated with
3471 * this route and ifaddr.
3472 */
3473 ifa_rtrequest = rt->rt_ifa->ifa_rtrequest;
3474 if (ifa_rtrequest != NULL)
3475 ifa_rtrequest(RTM_DELETE, rt, NULL);
3476 /*
3477 * Set the route's ifa.
3478 */
3479 rtsetifa(rt, ifa);
3480
3481 if (rt->rt_ifp != ifa->ifa_ifp) {
3482 /*
3483 * Purge any link-layer info caching.
3484 */
3485 if (rt->rt_llinfo_purge != NULL)
3486 rt->rt_llinfo_purge(rt);
3487 /*
3488 * Adjust route ref count for the interfaces.
3489 */
3490 if (rt->rt_if_ref_fn != NULL) {
3491 rt->rt_if_ref_fn(ifa->ifa_ifp, 1);
3492 rt->rt_if_ref_fn(rt->rt_ifp, -1);
3493 }
3494 }
3495
3496 /*
3497 * And substitute in references to the ifaddr
3498 * we are adding.
3499 */
3500 rt->rt_ifp = ifa->ifa_ifp;
3501 /*
3502 * If rmx_mtu is not locked, update it
3503 * to the MTU used by the new interface.
3504 */
3505 if (!(rt->rt_rmx.rmx_locks & RTV_MTU)) {
3506 rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu;
3507 if (dst->sa_family == AF_INET &&
3508 INTF_ADJUST_MTU_FOR_CLAT46(rt->rt_ifp)) {
3509 rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
3510 /* Further adjust the size for CLAT46 expansion */
3511 rt->rt_rmx.rmx_mtu -= CLAT46_HDR_EXPANSION_OVERHD;
3512 }
3513 }
3514
3515 /*
3516 * Now ask the protocol to check if it needs
3517 * any special processing in its new form.
3518 */
3519 ifa_rtrequest = ifa->ifa_rtrequest;
3520 if (ifa_rtrequest != NULL)
3521 ifa_rtrequest(RTM_ADD, rt, NULL);
3522 } else {
3523 #if (DEVELOPMENT || DEBUG)
3524 if (rt_verbose) {
3525 log(LOG_DEBUG, "%s: added route to %s->%s->%s, "
3526 "flags %b, ifaddr %s\n", __func__, dbuf,
3527 gbuf, ((rt->rt_ifp != NULL) ?
3528 rt->rt_ifp->if_xname : ""), rt->rt_flags,
3529 RTF_BITS, abuf);
3530 }
3531 #endif /* (DEVELOPMENT || DEBUG) */
3532 }
3533 /*
3534 * notify any listenning routing agents of the change
3535 */
3536 rt_newaddrmsg(cmd, ifa, error, rt);
3537 /*
3538 * We just wanted to add it; we don't actually need a
3539 * reference. This will result in a route that's added
3540 * to the routing table without a reference count. The
3541 * RTM_DELETE code will do the necessary step to adjust
3542 * the reference count at deletion time.
3543 */
3544 RT_REMREF_LOCKED(rt);
3545 RT_UNLOCK(rt);
3546 break;
3547
3548 default:
3549 VERIFY(0);
3550 /* NOTREACHED */
3551 }
3552 done:
3553 return (error);
3554 }
3555
3556 static void
3557 rt_set_idleref(struct rtentry *rt)
3558 {
3559 RT_LOCK_ASSERT_HELD(rt);
3560
3561 /*
3562 * We currently keep idle refcnt only on unicast cloned routes
3563 * that aren't marked with RTF_NOIFREF.
3564 */
3565 if (rt->rt_parent != NULL && !(rt->rt_flags &
3566 (RTF_NOIFREF|RTF_BROADCAST | RTF_MULTICAST)) &&
3567 (rt->rt_flags & (RTF_UP|RTF_WASCLONED|RTF_IFREF)) ==
3568 (RTF_UP|RTF_WASCLONED)) {
3569 rt_clear_idleref(rt); /* drop existing refcnt if any */
3570 rt->rt_if_ref_fn = rte_if_ref;
3571 /* Become a regular mutex, just in case */
3572 RT_CONVERT_LOCK(rt);
3573 rt->rt_if_ref_fn(rt->rt_ifp, 1);
3574 rt->rt_flags |= RTF_IFREF;
3575 }
3576 }
3577
3578 void
3579 rt_clear_idleref(struct rtentry *rt)
3580 {
3581 RT_LOCK_ASSERT_HELD(rt);
3582
3583 if (rt->rt_if_ref_fn != NULL) {
3584 VERIFY((rt->rt_flags & (RTF_NOIFREF | RTF_IFREF)) == RTF_IFREF);
3585 /* Become a regular mutex, just in case */
3586 RT_CONVERT_LOCK(rt);
3587 rt->rt_if_ref_fn(rt->rt_ifp, -1);
3588 rt->rt_flags &= ~RTF_IFREF;
3589 rt->rt_if_ref_fn = NULL;
3590 }
3591 }
3592
3593 void
3594 rt_set_proxy(struct rtentry *rt, boolean_t set)
3595 {
3596 lck_mtx_lock(rnh_lock);
3597 RT_LOCK(rt);
3598 /*
3599 * Search for any cloned routes which might have
3600 * been formed from this node, and delete them.
3601 */
3602 if (rt->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
3603 struct radix_node_head *rnh = rt_tables[rt_key(rt)->sa_family];
3604
3605 if (set)
3606 rt->rt_flags |= RTF_PROXY;
3607 else
3608 rt->rt_flags &= ~RTF_PROXY;
3609
3610 RT_UNLOCK(rt);
3611 if (rnh != NULL && rt_mask(rt)) {
3612 rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
3613 rt_fixdelete, rt);
3614 }
3615 } else {
3616 RT_UNLOCK(rt);
3617 }
3618 lck_mtx_unlock(rnh_lock);
3619 }
3620
3621 static void
3622 rte_lock_init(struct rtentry *rt)
3623 {
3624 lck_mtx_init(&rt->rt_lock, rte_mtx_grp, rte_mtx_attr);
3625 }
3626
3627 static void
3628 rte_lock_destroy(struct rtentry *rt)
3629 {
3630 RT_LOCK_ASSERT_NOTHELD(rt);
3631 lck_mtx_destroy(&rt->rt_lock, rte_mtx_grp);
3632 }
3633
3634 void
3635 rt_lock(struct rtentry *rt, boolean_t spin)
3636 {
3637 RT_LOCK_ASSERT_NOTHELD(rt);
3638 if (spin)
3639 lck_mtx_lock_spin(&rt->rt_lock);
3640 else
3641 lck_mtx_lock(&rt->rt_lock);
3642 if (rte_debug & RTD_DEBUG)
3643 rte_lock_debug((struct rtentry_dbg *)rt);
3644 }
3645
3646 void
3647 rt_unlock(struct rtentry *rt)
3648 {
3649 if (rte_debug & RTD_DEBUG)
3650 rte_unlock_debug((struct rtentry_dbg *)rt);
3651 lck_mtx_unlock(&rt->rt_lock);
3652
3653 }
3654
3655 static inline void
3656 rte_lock_debug(struct rtentry_dbg *rte)
3657 {
3658 uint32_t idx;
3659
3660 RT_LOCK_ASSERT_HELD((struct rtentry *)rte);
3661 idx = atomic_add_32_ov(&rte->rtd_lock_cnt, 1) % CTRACE_HIST_SIZE;
3662 if (rte_debug & RTD_TRACE)
3663 ctrace_record(&rte->rtd_lock[idx]);
3664 }
3665
3666 static inline void
3667 rte_unlock_debug(struct rtentry_dbg *rte)
3668 {
3669 uint32_t idx;
3670
3671 RT_LOCK_ASSERT_HELD((struct rtentry *)rte);
3672 idx = atomic_add_32_ov(&rte->rtd_unlock_cnt, 1) % CTRACE_HIST_SIZE;
3673 if (rte_debug & RTD_TRACE)
3674 ctrace_record(&rte->rtd_unlock[idx]);
3675 }
3676
3677 static struct rtentry *
3678 rte_alloc(void)
3679 {
3680 if (rte_debug & RTD_DEBUG)
3681 return (rte_alloc_debug());
3682
3683 return ((struct rtentry *)zalloc(rte_zone));
3684 }
3685
3686 static void
3687 rte_free(struct rtentry *p)
3688 {
3689 if (rte_debug & RTD_DEBUG) {
3690 rte_free_debug(p);
3691 return;
3692 }
3693
3694 if (p->rt_refcnt != 0) {
3695 panic("rte_free: rte=%p refcnt=%d non-zero\n", p, p->rt_refcnt);
3696 /* NOTREACHED */
3697 }
3698
3699 zfree(rte_zone, p);
3700 }
3701
3702 static void
3703 rte_if_ref(struct ifnet *ifp, int cnt)
3704 {
3705 struct kev_msg ev_msg;
3706 struct net_event_data ev_data;
3707 uint32_t old;
3708
3709 /* Force cnt to 1 increment/decrement */
3710 if (cnt < -1 || cnt > 1) {
3711 panic("%s: invalid count argument (%d)", __func__, cnt);
3712 /* NOTREACHED */
3713 }
3714 old = atomic_add_32_ov(&ifp->if_route_refcnt, cnt);
3715 if (cnt < 0 && old == 0) {
3716 panic("%s: ifp=%p negative route refcnt!", __func__, ifp);
3717 /* NOTREACHED */
3718 }
3719 /*
3720 * The following is done without first holding the ifnet lock,
3721 * for performance reasons. The relevant ifnet fields, with
3722 * the exception of the if_idle_flags, are never changed
3723 * during the lifetime of the ifnet. The if_idle_flags
3724 * may possibly be modified, so in the event that the value
3725 * is stale because IFRF_IDLE_NOTIFY was cleared, we'd end up
3726 * sending the event anyway. This is harmless as it is just
3727 * a notification to the monitoring agent in user space, and
3728 * it is expected to check via SIOCGIFGETRTREFCNT again anyway.
3729 */
3730 if ((ifp->if_idle_flags & IFRF_IDLE_NOTIFY) && cnt < 0 && old == 1) {
3731 bzero(&ev_msg, sizeof (ev_msg));
3732 bzero(&ev_data, sizeof (ev_data));
3733
3734 ev_msg.vendor_code = KEV_VENDOR_APPLE;
3735 ev_msg.kev_class = KEV_NETWORK_CLASS;
3736 ev_msg.kev_subclass = KEV_DL_SUBCLASS;
3737 ev_msg.event_code = KEV_DL_IF_IDLE_ROUTE_REFCNT;
3738
3739 strlcpy(&ev_data.if_name[0], ifp->if_name, IFNAMSIZ);
3740
3741 ev_data.if_family = ifp->if_family;
3742 ev_data.if_unit = ifp->if_unit;
3743 ev_msg.dv[0].data_length = sizeof (struct net_event_data);
3744 ev_msg.dv[0].data_ptr = &ev_data;
3745
3746 dlil_post_complete_msg(NULL, &ev_msg);
3747 }
3748 }
3749
3750 static inline struct rtentry *
3751 rte_alloc_debug(void)
3752 {
3753 struct rtentry_dbg *rte;
3754
3755 rte = ((struct rtentry_dbg *)zalloc(rte_zone));
3756 if (rte != NULL) {
3757 bzero(rte, sizeof (*rte));
3758 if (rte_debug & RTD_TRACE)
3759 ctrace_record(&rte->rtd_alloc);
3760 rte->rtd_inuse = RTD_INUSE;
3761 }
3762 return ((struct rtentry *)rte);
3763 }
3764
3765 static inline void
3766 rte_free_debug(struct rtentry *p)
3767 {
3768 struct rtentry_dbg *rte = (struct rtentry_dbg *)p;
3769
3770 if (p->rt_refcnt != 0) {
3771 panic("rte_free: rte=%p refcnt=%d\n", p, p->rt_refcnt);
3772 /* NOTREACHED */
3773 }
3774 if (rte->rtd_inuse == RTD_FREED) {
3775 panic("rte_free: double free rte=%p\n", rte);
3776 /* NOTREACHED */
3777 } else if (rte->rtd_inuse != RTD_INUSE) {
3778 panic("rte_free: corrupted rte=%p\n", rte);
3779 /* NOTREACHED */
3780 }
3781 bcopy((caddr_t)p, (caddr_t)&rte->rtd_entry_saved, sizeof (*p));
3782 /* Preserve rt_lock to help catch use-after-free cases */
3783 bzero((caddr_t)p, offsetof(struct rtentry, rt_lock));
3784
3785 rte->rtd_inuse = RTD_FREED;
3786
3787 if (rte_debug & RTD_TRACE)
3788 ctrace_record(&rte->rtd_free);
3789
3790 if (!(rte_debug & RTD_NO_FREE))
3791 zfree(rte_zone, p);
3792 }
3793
3794 void
3795 ctrace_record(ctrace_t *tr)
3796 {
3797 tr->th = current_thread();
3798 bzero(tr->pc, sizeof (tr->pc));
3799 (void) OSBacktrace(tr->pc, CTRACE_STACK_SIZE);
3800 }
3801
3802 void
3803 route_copyout(struct route *dst, const struct route *src, size_t length)
3804 {
3805 /* Copy everything (rt, srcif, flags, dst) from src */
3806 bcopy(src, dst, length);
3807
3808 /* Hold one reference for the local copy of struct route */
3809 if (dst->ro_rt != NULL)
3810 RT_ADDREF(dst->ro_rt);
3811
3812 /* Hold one reference for the local copy of struct lle */
3813 if (dst->ro_lle != NULL)
3814 LLE_ADDREF(dst->ro_lle);
3815
3816 /* Hold one reference for the local copy of struct ifaddr */
3817 if (dst->ro_srcia != NULL)
3818 IFA_ADDREF(dst->ro_srcia);
3819 }
3820
3821 void
3822 route_copyin(struct route *src, struct route *dst, size_t length)
3823 {
3824 /*
3825 * No cached route at the destination?
3826 * If none, then remove old references if present
3827 * and copy entire src route.
3828 */
3829 if (dst->ro_rt == NULL) {
3830 /*
3831 * Ditch the cached link layer reference (dst)
3832 * since we're about to take everything there is in src
3833 */
3834 if (dst->ro_lle != NULL)
3835 LLE_REMREF(dst->ro_lle);
3836 /*
3837 * Ditch the address in the cached copy (dst) since
3838 * we're about to take everything there is in src.
3839 */
3840 if (dst->ro_srcia != NULL)
3841 IFA_REMREF(dst->ro_srcia);
3842 /*
3843 * Copy everything (rt, ro_lle, srcia, flags, dst) from src; the
3844 * references to rt and/or srcia were held at the time
3845 * of storage and are kept intact.
3846 */
3847 bcopy(src, dst, length);
3848 goto done;
3849 }
3850
3851 /*
3852 * We know dst->ro_rt is not NULL here.
3853 * If the src->ro_rt is the same, update ro_lle, srcia and flags
3854 * and ditch the route in the local copy.
3855 */
3856 if (dst->ro_rt == src->ro_rt) {
3857 dst->ro_flags = src->ro_flags;
3858
3859 if (dst->ro_lle != src->ro_lle) {
3860 if (dst->ro_lle != NULL)
3861 LLE_REMREF(dst->ro_lle);
3862 dst->ro_lle = src->ro_lle;
3863 } else if (src->ro_lle != NULL) {
3864 LLE_REMREF(src->ro_lle);
3865 }
3866
3867 if (dst->ro_srcia != src->ro_srcia) {
3868 if (dst->ro_srcia != NULL)
3869 IFA_REMREF(dst->ro_srcia);
3870 dst->ro_srcia = src->ro_srcia;
3871 } else if (src->ro_srcia != NULL) {
3872 IFA_REMREF(src->ro_srcia);
3873 }
3874 rtfree(src->ro_rt);
3875 goto done;
3876 }
3877
3878 /*
3879 * If they are dst's ro_rt is not equal to src's,
3880 * and src'd rt is not NULL, then remove old references
3881 * if present and copy entire src route.
3882 */
3883 if (src->ro_rt != NULL) {
3884 rtfree(dst->ro_rt);
3885
3886 if (dst->ro_lle != NULL)
3887 LLE_REMREF(dst->ro_lle);
3888 if (dst->ro_srcia != NULL)
3889 IFA_REMREF(dst->ro_srcia);
3890 bcopy(src, dst, length);
3891 goto done;
3892 }
3893
3894 /*
3895 * Here, dst's cached route is not NULL but source's is.
3896 * Just get rid of all the other cached reference in src.
3897 */
3898 if (src->ro_srcia != NULL) {
3899 /*
3900 * Ditch src address in the local copy (src) since we're
3901 * not caching the route entry anyway (ro_rt is NULL).
3902 */
3903 IFA_REMREF(src->ro_srcia);
3904 }
3905 if (src->ro_lle != NULL) {
3906 /*
3907 * Ditch cache lle in the local copy (src) since we're
3908 * not caching the route anyway (ro_rt is NULL).
3909 */
3910 LLE_REMREF(src->ro_lle);
3911 }
3912 done:
3913 /* This function consumes the references on src */
3914 src->ro_lle = NULL;
3915 src->ro_rt = NULL;
3916 src->ro_srcia = NULL;
3917 }
3918
3919 /*
3920 * route_to_gwroute will find the gateway route for a given route.
3921 *
3922 * If the route is down, look the route up again.
3923 * If the route goes through a gateway, get the route to the gateway.
3924 * If the gateway route is down, look it up again.
3925 * If the route is set to reject, verify it hasn't expired.
3926 *
3927 * If the returned route is non-NULL, the caller is responsible for
3928 * releasing the reference and unlocking the route.
3929 */
3930 #define senderr(e) { error = (e); goto bad; }
3931 errno_t
3932 route_to_gwroute(const struct sockaddr *net_dest, struct rtentry *hint0,
3933 struct rtentry **out_route)
3934 {
3935 uint64_t timenow;
3936 struct rtentry *rt = hint0, *hint = hint0;
3937 errno_t error = 0;
3938 unsigned int ifindex;
3939 boolean_t gwroute;
3940
3941 *out_route = NULL;
3942
3943 if (rt == NULL)
3944 return (0);
3945
3946 /*
3947 * Next hop determination. Because we may involve the gateway route
3948 * in addition to the original route, locking is rather complicated.
3949 * The general concept is that regardless of whether the route points
3950 * to the original route or to the gateway route, this routine takes
3951 * an extra reference on such a route. This extra reference will be
3952 * released at the end.
3953 *
3954 * Care must be taken to ensure that the "hint0" route never gets freed
3955 * via rtfree(), since the caller may have stored it inside a struct
3956 * route with a reference held for that placeholder.
3957 */
3958 RT_LOCK_SPIN(rt);
3959 ifindex = rt->rt_ifp->if_index;
3960 RT_ADDREF_LOCKED(rt);
3961 if (!(rt->rt_flags & RTF_UP)) {
3962 RT_REMREF_LOCKED(rt);
3963 RT_UNLOCK(rt);
3964 /* route is down, find a new one */
3965 hint = rt = rtalloc1_scoped((struct sockaddr *)
3966 (size_t)net_dest, 1, 0, ifindex);
3967 if (hint != NULL) {
3968 RT_LOCK_SPIN(rt);
3969 ifindex = rt->rt_ifp->if_index;
3970 } else {
3971 senderr(EHOSTUNREACH);
3972 }
3973 }
3974
3975 /*
3976 * We have a reference to "rt" by now; it will either
3977 * be released or freed at the end of this routine.
3978 */
3979 RT_LOCK_ASSERT_HELD(rt);
3980 if ((gwroute = (rt->rt_flags & RTF_GATEWAY))) {
3981 struct rtentry *gwrt = rt->rt_gwroute;
3982 struct sockaddr_storage ss;
3983 struct sockaddr *gw = (struct sockaddr *)&ss;
3984
3985 VERIFY(rt == hint);
3986 RT_ADDREF_LOCKED(hint);
3987
3988 /* If there's no gateway rt, look it up */
3989 if (gwrt == NULL) {
3990 bcopy(rt->rt_gateway, gw, MIN(sizeof (ss),
3991 rt->rt_gateway->sa_len));
3992 RT_UNLOCK(rt);
3993 goto lookup;
3994 }
3995 /* Become a regular mutex */
3996 RT_CONVERT_LOCK(rt);
3997
3998 /*
3999 * Take gwrt's lock while holding route's lock;
4000 * this is okay since gwrt never points back
4001 * to "rt", so no lock ordering issues.
4002 */
4003 RT_LOCK_SPIN(gwrt);
4004 if (!(gwrt->rt_flags & RTF_UP)) {
4005 rt->rt_gwroute = NULL;
4006 RT_UNLOCK(gwrt);
4007 bcopy(rt->rt_gateway, gw, MIN(sizeof (ss),
4008 rt->rt_gateway->sa_len));
4009 RT_UNLOCK(rt);
4010 rtfree(gwrt);
4011 lookup:
4012 lck_mtx_lock(rnh_lock);
4013 gwrt = rtalloc1_scoped_locked(gw, 1, 0, ifindex);
4014
4015 RT_LOCK(rt);
4016 /*
4017 * Bail out if the route is down, no route
4018 * to gateway, circular route, or if the
4019 * gateway portion of "rt" has changed.
4020 */
4021 if (!(rt->rt_flags & RTF_UP) || gwrt == NULL ||
4022 gwrt == rt || !equal(gw, rt->rt_gateway)) {
4023 if (gwrt == rt) {
4024 RT_REMREF_LOCKED(gwrt);
4025 gwrt = NULL;
4026 }
4027 VERIFY(rt == hint);
4028 RT_REMREF_LOCKED(hint);
4029 hint = NULL;
4030 RT_UNLOCK(rt);
4031 if (gwrt != NULL)
4032 rtfree_locked(gwrt);
4033 lck_mtx_unlock(rnh_lock);
4034 senderr(EHOSTUNREACH);
4035 }
4036 VERIFY(gwrt != NULL);
4037 /*
4038 * Set gateway route; callee adds ref to gwrt;
4039 * gwrt has an extra ref from rtalloc1() for
4040 * this routine.
4041 */
4042 rt_set_gwroute(rt, rt_key(rt), gwrt);
4043 VERIFY(rt == hint);
4044 RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */
4045 RT_UNLOCK(rt);
4046 lck_mtx_unlock(rnh_lock);
4047 rt = gwrt;
4048 } else {
4049 RT_ADDREF_LOCKED(gwrt);
4050 RT_UNLOCK(gwrt);
4051 VERIFY(rt == hint);
4052 RT_REMREF_LOCKED(rt); /* hint still holds a refcnt */
4053 RT_UNLOCK(rt);
4054 rt = gwrt;
4055 }
4056 VERIFY(rt == gwrt && rt != hint);
4057
4058 /*
4059 * This is an opportunity to revalidate the parent route's
4060 * rt_gwroute, in case it now points to a dead route entry.
4061 * Parent route won't go away since the clone (hint) holds
4062 * a reference to it. rt == gwrt.
4063 */
4064 RT_LOCK_SPIN(hint);
4065 if ((hint->rt_flags & (RTF_WASCLONED | RTF_UP)) ==
4066 (RTF_WASCLONED | RTF_UP)) {
4067 struct rtentry *prt = hint->rt_parent;
4068 VERIFY(prt != NULL);
4069
4070 RT_CONVERT_LOCK(hint);
4071 RT_ADDREF(prt);
4072 RT_UNLOCK(hint);
4073 rt_revalidate_gwroute(prt, rt);
4074 RT_REMREF(prt);
4075 } else {
4076 RT_UNLOCK(hint);
4077 }
4078
4079 /* Clean up "hint" now; see notes above regarding hint0 */
4080 if (hint == hint0)
4081 RT_REMREF(hint);
4082 else
4083 rtfree(hint);
4084 hint = NULL;
4085
4086 /* rt == gwrt; if it is now down, give up */
4087 RT_LOCK_SPIN(rt);
4088 if (!(rt->rt_flags & RTF_UP)) {
4089 RT_UNLOCK(rt);
4090 senderr(EHOSTUNREACH);
4091 }
4092 }
4093
4094 if (rt->rt_flags & RTF_REJECT) {
4095 VERIFY(rt->rt_expire == 0 || rt->rt_rmx.rmx_expire != 0);
4096 VERIFY(rt->rt_expire != 0 || rt->rt_rmx.rmx_expire == 0);
4097 timenow = net_uptime();
4098 if (rt->rt_expire == 0 || timenow < rt->rt_expire) {
4099 RT_UNLOCK(rt);
4100 senderr(!gwroute ? EHOSTDOWN : EHOSTUNREACH);
4101 }
4102 }
4103
4104 /* Become a regular mutex */
4105 RT_CONVERT_LOCK(rt);
4106
4107 /* Caller is responsible for cleaning up "rt" */
4108 *out_route = rt;
4109 return (0);
4110
4111 bad:
4112 /* Clean up route (either it is "rt" or "gwrt") */
4113 if (rt != NULL) {
4114 RT_LOCK_SPIN(rt);
4115 if (rt == hint0) {
4116 RT_REMREF_LOCKED(rt);
4117 RT_UNLOCK(rt);
4118 } else {
4119 RT_UNLOCK(rt);
4120 rtfree(rt);
4121 }
4122 }
4123 return (error);
4124 }
4125 #undef senderr
4126
4127 void
4128 rt_revalidate_gwroute(struct rtentry *rt, struct rtentry *gwrt)
4129 {
4130 VERIFY(gwrt != NULL);
4131
4132 RT_LOCK_SPIN(rt);
4133 if ((rt->rt_flags & (RTF_GATEWAY | RTF_UP)) == (RTF_GATEWAY | RTF_UP) &&
4134 rt->rt_ifp == gwrt->rt_ifp && rt->rt_gateway->sa_family ==
4135 rt_key(gwrt)->sa_family && (rt->rt_gwroute == NULL ||
4136 !(rt->rt_gwroute->rt_flags & RTF_UP))) {
4137 boolean_t isequal;
4138 VERIFY(rt->rt_flags & (RTF_CLONING | RTF_PRCLONING));
4139
4140 if (rt->rt_gateway->sa_family == AF_INET ||
4141 rt->rt_gateway->sa_family == AF_INET6) {
4142 struct sockaddr_storage key_ss, gw_ss;
4143 /*
4144 * We need to compare rt_key and rt_gateway; create
4145 * local copies to get rid of any ifscope association.
4146 */
4147 (void) sa_copy(rt_key(gwrt), &key_ss, NULL);
4148 (void) sa_copy(rt->rt_gateway, &gw_ss, NULL);
4149
4150 isequal = equal(SA(&key_ss), SA(&gw_ss));
4151 } else {
4152 isequal = equal(rt_key(gwrt), rt->rt_gateway);
4153 }
4154
4155 /* If they are the same, update gwrt */
4156 if (isequal) {
4157 RT_UNLOCK(rt);
4158 lck_mtx_lock(rnh_lock);
4159 RT_LOCK(rt);
4160 rt_set_gwroute(rt, rt_key(rt), gwrt);
4161 RT_UNLOCK(rt);
4162 lck_mtx_unlock(rnh_lock);
4163 } else {
4164 RT_UNLOCK(rt);
4165 }
4166 } else {
4167 RT_UNLOCK(rt);
4168 }
4169 }
4170
4171 static void
4172 rt_str4(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
4173 {
4174 VERIFY(rt_key(rt)->sa_family == AF_INET);
4175
4176 if (ds != NULL) {
4177 (void) inet_ntop(AF_INET,
4178 &SIN(rt_key(rt))->sin_addr.s_addr, ds, dslen);
4179 if (dslen >= MAX_SCOPE_ADDR_STR_LEN &&
4180 SINIFSCOPE(rt_key(rt))->sin_scope_id != IFSCOPE_NONE) {
4181 char scpstr[16];
4182
4183 snprintf(scpstr, sizeof(scpstr), "@%u",
4184 SINIFSCOPE(rt_key(rt))->sin_scope_id);
4185
4186 strlcat(ds, scpstr, dslen);
4187 }
4188 }
4189
4190 if (gs != NULL) {
4191 if (rt->rt_flags & RTF_GATEWAY) {
4192 (void) inet_ntop(AF_INET,
4193 &SIN(rt->rt_gateway)->sin_addr.s_addr, gs, gslen);
4194 } else if (rt->rt_ifp != NULL) {
4195 snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit);
4196 } else {
4197 snprintf(gs, gslen, "%s", "link");
4198 }
4199 }
4200 }
4201
4202 #if INET6
4203 static void
4204 rt_str6(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
4205 {
4206 VERIFY(rt_key(rt)->sa_family == AF_INET6);
4207
4208 if (ds != NULL) {
4209 (void) inet_ntop(AF_INET6,
4210 &SIN6(rt_key(rt))->sin6_addr, ds, dslen);
4211 if (dslen >= MAX_SCOPE_ADDR_STR_LEN &&
4212 SIN6IFSCOPE(rt_key(rt))->sin6_scope_id != IFSCOPE_NONE) {
4213 char scpstr[16];
4214
4215 snprintf(scpstr, sizeof(scpstr), "@%u",
4216 SIN6IFSCOPE(rt_key(rt))->sin6_scope_id);
4217
4218 strlcat(ds, scpstr, dslen);
4219 }
4220 }
4221
4222 if (gs != NULL) {
4223 if (rt->rt_flags & RTF_GATEWAY) {
4224 (void) inet_ntop(AF_INET6,
4225 &SIN6(rt->rt_gateway)->sin6_addr, gs, gslen);
4226 } else if (rt->rt_ifp != NULL) {
4227 snprintf(gs, gslen, "link#%u", rt->rt_ifp->if_unit);
4228 } else {
4229 snprintf(gs, gslen, "%s", "link");
4230 }
4231 }
4232 }
4233 #endif /* INET6 */
4234
4235
4236 void
4237 rt_str(struct rtentry *rt, char *ds, uint32_t dslen, char *gs, uint32_t gslen)
4238 {
4239 switch (rt_key(rt)->sa_family) {
4240 case AF_INET:
4241 rt_str4(rt, ds, dslen, gs, gslen);
4242 break;
4243 #if INET6
4244 case AF_INET6:
4245 rt_str6(rt, ds, dslen, gs, gslen);
4246 break;
4247 #endif /* INET6 */
4248 default:
4249 if (ds != NULL)
4250 bzero(ds, dslen);
4251 if (gs != NULL)
4252 bzero(gs, gslen);
4253 break;
4254 }
4255 }
4256
4257 void route_event_init(struct route_event *p_route_ev, struct rtentry *rt,
4258 struct rtentry *gwrt, int route_ev_code)
4259 {
4260 VERIFY(p_route_ev != NULL);
4261 bzero(p_route_ev, sizeof(*p_route_ev));
4262
4263 p_route_ev->rt = rt;
4264 p_route_ev->gwrt = gwrt;
4265 p_route_ev->route_event_code = route_ev_code;
4266 }
4267
4268 static void
4269 route_event_callback(void *arg)
4270 {
4271 struct route_event *p_rt_ev = (struct route_event *)arg;
4272 struct rtentry *rt = p_rt_ev->rt;
4273 eventhandler_tag evtag = p_rt_ev->evtag;
4274 int route_ev_code = p_rt_ev->route_event_code;
4275
4276 if (route_ev_code == ROUTE_EVHDLR_DEREGISTER) {
4277 VERIFY(evtag != NULL);
4278 EVENTHANDLER_DEREGISTER(&rt->rt_evhdlr_ctxt, route_event,
4279 evtag);
4280 rtfree(rt);
4281 return;
4282 }
4283
4284 EVENTHANDLER_INVOKE(&rt->rt_evhdlr_ctxt, route_event, rt_key(rt),
4285 route_ev_code, (struct sockaddr *)&p_rt_ev->rt_addr,
4286 rt->rt_flags);
4287
4288 /* The code enqueuing the route event held a reference */
4289 rtfree(rt);
4290 /* XXX No reference is taken on gwrt */
4291 }
4292
4293 int
4294 route_event_walktree(struct radix_node *rn, void *arg)
4295 {
4296 struct route_event *p_route_ev = (struct route_event *)arg;
4297 struct rtentry *rt = (struct rtentry *)rn;
4298 struct rtentry *gwrt = p_route_ev->rt;
4299
4300 LCK_MTX_ASSERT(rnh_lock, LCK_MTX_ASSERT_OWNED);
4301
4302 RT_LOCK(rt);
4303
4304 /* Return if the entry is pending cleanup */
4305 if (rt->rt_flags & RTPRF_OURS) {
4306 RT_UNLOCK(rt);
4307 return (0);
4308 }
4309
4310 /* Return if it is not an indirect route */
4311 if (!(rt->rt_flags & RTF_GATEWAY)) {
4312 RT_UNLOCK(rt);
4313 return (0);
4314 }
4315
4316 if (rt->rt_gwroute != gwrt) {
4317 RT_UNLOCK(rt);
4318 return (0);
4319 }
4320
4321 route_event_enqueue_nwk_wq_entry(rt, gwrt, p_route_ev->route_event_code,
4322 NULL, TRUE);
4323 RT_UNLOCK(rt);
4324
4325 return (0);
4326 }
4327
4328 struct route_event_nwk_wq_entry
4329 {
4330 struct nwk_wq_entry nwk_wqe;
4331 struct route_event rt_ev_arg;
4332 };
4333
4334 void
4335 route_event_enqueue_nwk_wq_entry(struct rtentry *rt, struct rtentry *gwrt,
4336 uint32_t route_event_code, eventhandler_tag evtag, boolean_t rt_locked)
4337 {
4338 struct route_event_nwk_wq_entry *p_rt_ev = NULL;
4339 struct sockaddr *p_gw_saddr = NULL;
4340
4341 MALLOC(p_rt_ev, struct route_event_nwk_wq_entry *,
4342 sizeof(struct route_event_nwk_wq_entry),
4343 M_NWKWQ, M_WAITOK | M_ZERO);
4344
4345 /*
4346 * If the intent is to de-register, don't take
4347 * reference, route event registration already takes
4348 * a reference on route.
4349 */
4350 if (route_event_code != ROUTE_EVHDLR_DEREGISTER) {
4351 /* The reference is released by route_event_callback */
4352 if (rt_locked)
4353 RT_ADDREF_LOCKED(rt);
4354 else
4355 RT_ADDREF(rt);
4356 }
4357
4358 p_rt_ev->rt_ev_arg.rt = rt;
4359 p_rt_ev->rt_ev_arg.gwrt = gwrt;
4360 p_rt_ev->rt_ev_arg.evtag = evtag;
4361
4362 if (gwrt != NULL)
4363 p_gw_saddr = gwrt->rt_gateway;
4364 else
4365 p_gw_saddr = rt->rt_gateway;
4366
4367 VERIFY(p_gw_saddr->sa_len <= sizeof(p_rt_ev->rt_ev_arg.rt_addr));
4368 bcopy(p_gw_saddr, &(p_rt_ev->rt_ev_arg.rt_addr), p_gw_saddr->sa_len);
4369
4370 p_rt_ev->rt_ev_arg.route_event_code = route_event_code;
4371 p_rt_ev->nwk_wqe.func = route_event_callback;
4372 p_rt_ev->nwk_wqe.is_arg_managed = TRUE;
4373 p_rt_ev->nwk_wqe.arg = &p_rt_ev->rt_ev_arg;
4374 nwk_wq_enqueue((struct nwk_wq_entry*)p_rt_ev);
4375 }
4376
4377 const char *
4378 route_event2str(int route_event)
4379 {
4380 const char *route_event_str = "ROUTE_EVENT_UNKNOWN";
4381 switch (route_event) {
4382 case ROUTE_STATUS_UPDATE:
4383 route_event_str = "ROUTE_STATUS_UPDATE";
4384 break;
4385 case ROUTE_ENTRY_REFRESH:
4386 route_event_str = "ROUTE_ENTRY_REFRESH";
4387 break;
4388 case ROUTE_ENTRY_DELETED:
4389 route_event_str = "ROUTE_ENTRY_DELETED";
4390 break;
4391 case ROUTE_LLENTRY_RESOLVED:
4392 route_event_str = "ROUTE_LLENTRY_RESOLVED";
4393 break;
4394 case ROUTE_LLENTRY_UNREACH:
4395 route_event_str = "ROUTE_LLENTRY_UNREACH";
4396 break;
4397 case ROUTE_LLENTRY_CHANGED:
4398 route_event_str = "ROUTE_LLENTRY_CHANGED";
4399 break;
4400 case ROUTE_LLENTRY_STALE:
4401 route_event_str = "ROUTE_LLENTRY_STALE";
4402 break;
4403 case ROUTE_LLENTRY_TIMEDOUT:
4404 route_event_str = "ROUTE_LLENTRY_TIMEDOUT";
4405 break;
4406 case ROUTE_LLENTRY_DELETED:
4407 route_event_str = "ROUTE_LLENTRY_DELETED";
4408 break;
4409 case ROUTE_LLENTRY_EXPIRED:
4410 route_event_str = "ROUTE_LLENTRY_EXPIRED";
4411 break;
4412 case ROUTE_LLENTRY_PROBED:
4413 route_event_str = "ROUTE_LLENTRY_PROBED";
4414 break;
4415 case ROUTE_EVHDLR_DEREGISTER:
4416 route_event_str = "ROUTE_EVHDLR_DEREGISTER";
4417 break;
4418 default:
4419 /* Init'd to ROUTE_EVENT_UNKNOWN */
4420 break;
4421 }
4422 return route_event_str;
4423 }
4424
4425 int
4426 route_op_entitlement_check(struct socket *so,
4427 kauth_cred_t cred,
4428 int route_op_type,
4429 boolean_t allow_root)
4430 {
4431 if (so != NULL) {
4432 if (route_op_type == ROUTE_OP_READ) {
4433 /*
4434 * If needed we can later extend this for more
4435 * granular entitlements and return a bit set of
4436 * allowed accesses.
4437 */
4438 if (soopt_cred_check(so, PRIV_NET_RESTRICTED_ROUTE_NC_READ,
4439 allow_root, false) == 0)
4440 return (0);
4441 else
4442 return (-1);
4443 }
4444 } else if (cred != NULL) {
4445 uid_t uid = kauth_cred_getuid(cred);
4446
4447 /* uid is 0 for root */
4448 if (uid != 0 || !allow_root) {
4449 if (route_op_type == ROUTE_OP_READ) {
4450 if (priv_check_cred(cred,
4451 PRIV_NET_RESTRICTED_ROUTE_NC_READ, 0) == 0)
4452 return (0);
4453 else
4454 return (-1);
4455 }
4456 }
4457 }
4458 return (-1);
4459 }