2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1980, 1986, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)route.c 8.2 (Berkeley) 11/15/93
61 * $FreeBSD: src/sys/net/route.c,v 1.59.2.3 2001/07/29 19:18:02 ume Exp $
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/malloc.h>
68 #include <sys/socket.h>
69 #include <sys/domain.h>
70 #include <sys/syslog.h>
71 #include <sys/queue.h>
72 #include <kern/lock.h>
73 #include <kern/zalloc.h>
76 #include <net/route.h>
78 #include <netinet/in.h>
79 #include <netinet/ip_mroute.h>
81 #include <net/if_dl.h>
83 #include <libkern/OSAtomic.h>
84 #include <libkern/OSDebug.h>
86 #include <pexpert/pexpert.h>
88 #define equal(a1, a2) (bcmp((caddr_t)(a1), (caddr_t)(a2), (a1)->sa_len) == 0)
89 #define SA(p) ((struct sockaddr *)(p))
91 extern void kdp_set_gateway_mac (void *gatewaymac
);
93 extern struct domain routedomain
;
94 struct route_cb route_cb
;
95 __private_extern__
struct rtstat rtstat
= { 0, 0, 0, 0, 0 };
96 struct radix_node_head
*rt_tables
[AF_MAX
+1];
98 lck_mtx_t
*rt_mtx
; /*### global routing tables mutex for now */
99 lck_attr_t
*rt_mtx_attr
;
100 lck_grp_t
*rt_mtx_grp
;
101 lck_grp_attr_t
*rt_mtx_grp_attr
;
103 lck_mtx_t
*route_domain_mtx
; /*### global routing tables mutex for now */
104 int rttrash
= 0; /* routes not in table but not freed */
106 static unsigned int rte_debug
;
108 /* Possible flags for rte_debug */
109 #define RTD_DEBUG 0x1 /* enable or disable rtentry debug facility */
110 #define RTD_TRACE 0x2 /* trace alloc, free and refcnt */
111 #define RTD_NO_FREE 0x4 /* don't free (good to catch corruptions) */
113 static struct zone
*rte_zone
; /* special zone for rtentry */
114 #define RTE_ZONE_MAX 65536 /* maximum elements in zone */
115 #define RTE_ZONE_NAME "rtentry" /* name of rtentry zone */
117 #define RTD_INUSE 0xFEEDFACE /* entry is in use */
118 #define RTD_FREED 0xDEADBEEF /* entry is freed */
120 #define RTD_TRSTACK_SIZE 8 /* depth of stack trace */
121 #define RTD_REFHIST_SIZE 4 /* refcnt history size */
124 * Debug variant of rtentry structure.
127 struct rtentry rtd_entry
; /* rtentry */
128 struct rtentry rtd_entry_saved
; /* saved rtentry */
129 u_int32_t rtd_inuse
; /* in use pattern */
130 u_int16_t rtd_refhold_cnt
; /* # of rtref */
131 u_int16_t rtd_refrele_cnt
; /* # of rtunref */
133 * Thread and PC stack trace up to RTD_TRSTACK_SIZE
134 * deep during alloc and free.
136 struct thread
*rtd_alloc_thread
;
137 void *rtd_alloc_stk_pc
[RTD_TRSTACK_SIZE
];
138 struct thread
*rtd_free_thread
;
139 void *rtd_free_stk_pc
[RTD_TRSTACK_SIZE
];
141 * Circular lists of rtref and rtunref callers.
143 u_int16_t rtd_refhold_next
;
144 u_int16_t rtd_refrele_next
;
147 void *pc
[RTD_TRSTACK_SIZE
];
148 } rtd_refhold
[RTD_REFHIST_SIZE
];
151 void *pc
[RTD_TRSTACK_SIZE
];
152 } rtd_refrele
[RTD_REFHIST_SIZE
];
156 TAILQ_ENTRY(rtentry_dbg
) rtd_trash_link
;
159 /* List of trash route entries protected by rt_mtx */
160 static TAILQ_HEAD(, rtentry_dbg
) rttrash_head
;
162 static inline struct rtentry
*rte_alloc_debug(void);
163 static inline void rte_free_debug(struct rtentry
*);
164 static void rt_maskedcopy(struct sockaddr
*,
165 struct sockaddr
*, struct sockaddr
*);
166 static void rtable_init(void **);
167 static inline void rtref_audit(struct rtentry_dbg
*);
168 static inline void rtunref_audit(struct rtentry_dbg
*);
170 __private_extern__ u_long route_generation
= 0;
171 extern int use_routegenid
;
175 rtable_init(void **table
)
178 for (dom
= domains
; dom
; dom
= dom
->dom_next
)
179 if (dom
->dom_rtattach
)
180 dom
->dom_rtattach(&table
[dom
->dom_family
],
189 PE_parse_boot_arg("rte_debug", &rte_debug
);
191 rte_debug
|= RTD_DEBUG
;
193 rt_mtx_grp_attr
= lck_grp_attr_alloc_init();
195 rt_mtx_grp
= lck_grp_alloc_init("route", rt_mtx_grp_attr
);
197 rt_mtx_attr
= lck_attr_alloc_init();
199 if ((rt_mtx
= lck_mtx_alloc_init(rt_mtx_grp
, rt_mtx_attr
)) == NULL
) {
200 printf("route_init: can't alloc rt_mtx\n");
204 lck_mtx_lock(rt_mtx
);
205 rn_init(); /* initialize all zeroes, all ones, mask table */
206 lck_mtx_unlock(rt_mtx
);
207 rtable_init((void **)rt_tables
);
208 route_domain_mtx
= routedomain
.dom_mtx
;
210 if (rte_debug
& RTD_DEBUG
)
211 size
= sizeof (struct rtentry_dbg
);
213 size
= sizeof (struct rtentry
);
215 rte_zone
= zinit(size
, RTE_ZONE_MAX
* size
, 0, RTE_ZONE_NAME
);
216 if (rte_zone
== NULL
)
217 panic("route_init: failed allocating rte_zone");
219 zone_change(rte_zone
, Z_EXPAND
, TRUE
);
221 TAILQ_INIT(&rttrash_head
);
225 * Packet routing routines.
228 rtalloc(struct route
*ro
)
230 rtalloc_ign(ro
, 0UL);
234 rtalloc_ign_locked(struct route
*ro
, u_long ignore
)
238 if ((rt
= ro
->ro_rt
) != NULL
) {
239 if (rt
->rt_ifp
!= NULL
&& rt
->rt_flags
& RTF_UP
)
241 /* XXX - We are probably always at splnet here already. */
245 ro
->ro_rt
= rtalloc1_locked(&ro
->ro_dst
, 1, ignore
);
247 ro
->ro_rt
->generation_id
= route_generation
;
250 rtalloc_ign(struct route
*ro
, u_long ignore
)
252 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_NOTOWNED
);
253 lck_mtx_lock(rt_mtx
);
254 rtalloc_ign_locked(ro
, ignore
);
255 lck_mtx_unlock(rt_mtx
);
259 * Look up the route that matches the address given
260 * Or, at least try.. Create a cloned route if needed.
263 rtalloc1_locked(struct sockaddr
*dst
, int report
, u_long ignflags
)
265 struct radix_node_head
*rnh
= rt_tables
[dst
->sa_family
];
267 struct radix_node
*rn
;
268 struct rtentry
*newrt
= 0;
269 struct rt_addrinfo info
;
271 int err
= 0, msgtype
= RTM_MISS
;
273 * Look up the address in the table for that Address Family
275 if (rnh
&& (rn
= rnh
->rnh_matchaddr((caddr_t
)dst
, rnh
)) &&
276 ((rn
->rn_flags
& RNF_ROOT
) == 0)) {
278 * If we find it and it's not the root node, then
279 * get a refernce on the rtentry associated.
281 newrt
= rt
= (struct rtentry
*)rn
;
282 nflags
= rt
->rt_flags
& ~ignflags
;
283 if (report
&& (nflags
& (RTF_CLONING
| RTF_PRCLONING
))) {
285 * We are apparently adding (report = 0 in delete).
286 * If it requires that it be cloned, do so.
287 * (This implies it wasn't a HOST route.)
289 err
= rtrequest_locked(RTM_RESOLVE
, dst
, SA(0),
293 * If the cloning didn't succeed, maybe
294 * what we have will do. Return that.
300 if ((rt
= newrt
) && (rt
->rt_flags
& RTF_XRESOLVE
)) {
302 * If the new route specifies it be
303 * externally resolved, then go do that.
305 msgtype
= RTM_RESOLVE
;
312 * Either we hit the root or couldn't find any match,
313 * Which basically means
314 * "caint get there frm here"
316 rtstat
.rts_unreach
++;
319 * If required, report the failure to the supervising
321 * For a delete, this is not an error. (report == 0)
323 bzero((caddr_t
)&info
, sizeof(info
));
324 info
.rti_info
[RTAX_DST
] = dst
;
325 rt_missmsg(msgtype
, &info
, 0, err
);
332 rtalloc1(struct sockaddr
*dst
, int report
, u_long ignflags
)
334 struct rtentry
* entry
;
335 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_NOTOWNED
);
336 lck_mtx_lock(rt_mtx
);
337 entry
= rtalloc1_locked(dst
, report
, ignflags
);
338 lck_mtx_unlock(rt_mtx
);
343 * Remove a reference count from an rtentry.
344 * If the count gets low enough, take it out of the routing table
347 rtfree_locked(struct rtentry
*rt
)
350 * find the tree for that address family
351 * Note: in the case of igmp packets, there might not be an rnh
353 struct radix_node_head
*rnh
;
355 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
357 /* See 3582620 - We hit this during the transition from funnels to locks */
359 printf("rtfree - rt is NULL\n");
363 rnh
= rt_tables
[rt_key(rt
)->sa_family
];
366 * decrement the reference count by one and if it reaches 0,
367 * and there is a close function defined, call the close function
370 if (rt
->rt_refcnt
> 0)
373 if ((rt
->rt_flags
& RTF_TRACKREFS
) != 0)
374 printf("%s rt(%p)->rt_refcnt(%d), caller=%p\n", __FUNCTION__
,
375 rt
, rt
->rt_refcnt
, __builtin_return_address(0));
378 * On last reference give the "close method" a chance to cleanup
379 * private state. This also permits (for IPv4 and IPv6) a chance
380 * to decide if the routing table entry should be purged immediately
381 * or at a later time. When an immediate purge is to happen the
382 * close routine typically issues RTM_DELETE which clears the RTF_UP
383 * flag on the entry so that the code below reclaims the storage.
385 if (rnh
&& rnh
->rnh_close
&& rt
->rt_refcnt
== 0)
386 rnh
->rnh_close((struct radix_node
*)rt
, rnh
);
389 * If we are no longer "up" (and ref == 0)
390 * then we can free the resources associated
393 if (!(rt
->rt_flags
& RTF_UP
)) {
394 if (rt
->rt_nodes
->rn_flags
& (RNF_ACTIVE
| RNF_ROOT
))
397 * the rtentry must have been removed from the routing table
398 * so it is represented in rttrash.. remove that now.
400 (void) OSDecrementAtomic((SInt32
*)&rttrash
);
401 if (rte_debug
& RTD_DEBUG
) {
402 TAILQ_REMOVE(&rttrash_head
, (struct rtentry_dbg
*)rt
,
407 if (rt
->rt_refcnt
< 0) {
408 printf("rtfree: %p not freed (neg refs) cnt=%d\n",
415 * release references on items we hold them on..
416 * e.g other routes and ifaddrs.
419 rtfree_locked(rt
->rt_parent
);
427 * The key is separatly alloc'd so free it (see rt_setgate()).
428 * This also frees the gateway, as they are always malloc'd
434 * and the rtentry itself of course
441 rtfree(struct rtentry
*rt
)
443 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_NOTOWNED
);
444 lck_mtx_lock(rt_mtx
);
446 lck_mtx_unlock(rt_mtx
);
450 * Decrements the refcount but does not free the route when
451 * the refcount reaches zero. Unless you have really good reason,
452 * use rtfree not rtunref.
455 rtunref(struct rtentry
*p
)
457 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
459 if (p
->rt_refcnt
<= 0)
460 panic("rtunref: bad refcnt %d for rt=%p\n", p
->rt_refcnt
, p
);
462 if (rte_debug
& RTD_DEBUG
)
463 rtunref_audit((struct rtentry_dbg
*)p
);
469 rtunref_audit(struct rtentry_dbg
*rte
)
471 if (rte
->rtd_inuse
!= RTD_INUSE
)
472 panic("rtunref: on freed rte=%p\n", rte
);
474 rte
->rtd_refrele_cnt
++;
476 if (rte_debug
& RTD_TRACE
) {
477 rte
->rtd_refrele
[rte
->rtd_refrele_next
].th
= current_thread();
478 bzero(rte
->rtd_refrele
[rte
->rtd_refrele_next
].pc
,
479 sizeof (rte
->rtd_refrele
[rte
->rtd_refrele_next
].pc
));
480 (void) OSBacktrace(rte
->rtd_refrele
[rte
->rtd_refrele_next
].pc
,
483 rte
->rtd_refrele_next
=
484 (rte
->rtd_refrele_next
+ 1) % RTD_REFHIST_SIZE
;
489 * Add a reference count from an rtentry.
492 rtref(struct rtentry
*p
)
494 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
496 if (p
->rt_refcnt
< 0)
497 panic("rtref: bad refcnt %d for rt=%p\n", p
->rt_refcnt
, p
);
499 if (rte_debug
& RTD_DEBUG
)
500 rtref_audit((struct rtentry_dbg
*)p
);
504 if ((p
->rt_flags
& RTF_TRACKREFS
) != 0)
505 printf("%s rt(%p)->rt_refcnt(%d), caller=%p\n", __FUNCTION__
,
506 p
, p
->rt_refcnt
, __builtin_return_address(0));
510 rtref_audit(struct rtentry_dbg
*rte
)
512 if (rte
->rtd_inuse
!= RTD_INUSE
)
513 panic("rtref_audit: on freed rte=%p\n", rte
);
515 rte
->rtd_refhold_cnt
++;
517 if (rte_debug
& RTD_TRACE
) {
518 rte
->rtd_refhold
[rte
->rtd_refhold_next
].th
= current_thread();
519 bzero(rte
->rtd_refhold
[rte
->rtd_refhold_next
].pc
,
520 sizeof (rte
->rtd_refhold
[rte
->rtd_refhold_next
].pc
));
521 (void) OSBacktrace(rte
->rtd_refhold
[rte
->rtd_refhold_next
].pc
,
524 rte
->rtd_refhold_next
=
525 (rte
->rtd_refhold_next
+ 1) % RTD_REFHIST_SIZE
;
530 rtsetifa(struct rtentry
*rt
, struct ifaddr
* ifa
)
535 if (rt
->rt_ifa
== ifa
)
538 /* Release the old ifa */
545 /* Take a reference to the ifa */
551 ifafree(struct ifaddr
*ifa
)
558 oldval
= OSAddAtomic(-1, (SInt32
*)&ifa
->ifa_refcnt
);
561 if ((ifa
->ifa_debug
& IFA_ATTACHED
) != 0) {
562 panic("ifa attached to ifp is being freed\n");
569 ifaref(struct ifaddr
*ifa
)
574 if (OSAddAtomic(1, (SInt32
*)&ifa
->ifa_refcnt
) == 0xffffffff)
575 panic("ifaref - reference count rolled over!");
579 * Force a routing table entry to the specified
580 * destination to go through the given gateway.
581 * Normally called as a result of a routing redirect
582 * message from the network layer.
584 * N.B.: must be called at splnet
588 rtredirect(struct sockaddr
*dst
, struct sockaddr
*gateway
,
589 struct sockaddr
*netmask
, int flags
, struct sockaddr
*src
,
590 struct rtentry
**rtp
)
595 struct rt_addrinfo info
;
596 struct ifaddr
*ifa
= NULL
;
598 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_NOTOWNED
);
599 lck_mtx_lock(rt_mtx
);
601 /* verify the gateway is directly reachable */
602 if ((ifa
= ifa_ifwithnet(gateway
)) == 0) {
607 rt
= rtalloc1_locked(dst
, 0, RTF_CLONING
| RTF_PRCLONING
);
609 * If the redirect isn't from our current router for this dst,
610 * it's either old or wrong. If it redirects us to ourselves,
611 * we have a routing loop, perhaps as a result of an interface
612 * going down recently.
614 if (!(flags
& RTF_DONE
) && rt
&&
615 (!equal(src
, rt
->rt_gateway
) || !equal(rt
->rt_ifa
->ifa_addr
,
620 if ((ifa
= ifa_ifwithaddr(gateway
))) {
623 error
= EHOSTUNREACH
;
635 * Create a new entry if we just got back a wildcard entry
636 * or the the lookup failed. This is necessary for hosts
637 * which use routing redirects generated by smart gateways
638 * to dynamically build the routing tables.
640 if ((rt
== 0) || (rt_mask(rt
) && rt_mask(rt
)->sa_len
< 2))
643 * Don't listen to the redirect if it's
644 * for a route to an interface.
646 if (rt
->rt_flags
& RTF_GATEWAY
) {
647 if (((rt
->rt_flags
& RTF_HOST
) == 0) && (flags
& RTF_HOST
)) {
649 * Changing from route to net => route to host.
650 * Create new route, rather than smashing route to net.
653 flags
|= RTF_GATEWAY
| RTF_DYNAMIC
;
654 error
= rtrequest_locked((int)RTM_ADD
, dst
, gateway
,
656 (struct rtentry
**)0);
657 stat
= &rtstat
.rts_dynamic
;
660 * Smash the current notion of the gateway to
661 * this destination. Should check about netmask!!!
663 rt
->rt_flags
|= RTF_MODIFIED
;
664 flags
|= RTF_MODIFIED
;
665 stat
= &rtstat
.rts_newgateway
;
667 * add the key and gateway (in one malloc'd chunk).
669 rt_setgate(rt
, rt_key(rt
), gateway
);
672 error
= EHOSTUNREACH
;
682 rtstat
.rts_badredirect
++;
683 else if (stat
!= NULL
)
685 bzero((caddr_t
)&info
, sizeof(info
));
686 info
.rti_info
[RTAX_DST
] = dst
;
687 info
.rti_info
[RTAX_GATEWAY
] = gateway
;
688 info
.rti_info
[RTAX_NETMASK
] = netmask
;
689 info
.rti_info
[RTAX_AUTHOR
] = src
;
690 rt_missmsg(RTM_REDIRECT
, &info
, flags
, error
);
691 lck_mtx_unlock(rt_mtx
);
695 * Routing table ioctl interface.
698 rtioctl(int req
, caddr_t data
, struct proc
*p
)
702 return mrt_ioctl(req
, data
);
711 const struct sockaddr
*dst
,
712 const struct sockaddr
*gateway
)
716 lck_mtx_lock(rt_mtx
);
717 ifa
= ifa_ifwithroute_locked(flags
, dst
, gateway
);
718 lck_mtx_unlock(rt_mtx
);
724 ifa_ifwithroute_locked(
726 const struct sockaddr
*dst
,
727 const struct sockaddr
*gateway
)
729 struct ifaddr
*ifa
= NULL
;
730 struct rtentry
*rt
= NULL
;
732 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
734 if (!(flags
& RTF_GATEWAY
)) {
736 * If we are adding a route to an interface,
737 * and the interface is a pt to pt link
738 * we should search for the destination
739 * as our clue to the interface. Otherwise
740 * we can use the local address.
742 if (flags
& RTF_HOST
) {
743 ifa
= ifa_ifwithdstaddr(dst
);
746 ifa
= ifa_ifwithaddr(gateway
);
749 * If we are adding a route to a remote net
750 * or host, the gateway may still be on the
751 * other end of a pt to pt link.
753 ifa
= ifa_ifwithdstaddr(gateway
);
756 ifa
= ifa_ifwithnet(gateway
);
758 /* Workaround to avoid gcc warning regarding const variable */
759 rt
= rtalloc1_locked((struct sockaddr
*)(size_t)dst
, 0, 0UL);
768 if (ifa
!= NULL
&& ifa
->ifa_addr
->sa_family
!= dst
->sa_family
) {
769 struct ifaddr
*newifa
;
770 /* Callee adds reference to newifa upon success */
771 newifa
= ifaof_ifpforaddr(dst
, ifa
->ifa_ifp
);
772 if (newifa
!= NULL
) {
778 * If we are adding a gateway, it is quite possible that the
779 * routing table has a static entry in place for the gateway,
780 * that may not agree with info garnered from the interfaces.
781 * The routing table should carry more precedence than the
782 * interfaces in this matter. Must be careful not to stomp
783 * on new entries from rtinit, hence (ifa->ifa_addr != gateway).
786 !equal(ifa
->ifa_addr
, (struct sockaddr
*)(size_t)gateway
)) &&
787 (rt
= rtalloc1_locked((struct sockaddr
*)(size_t)gateway
,
799 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
801 static int rt_fixdelete
__P((struct radix_node
*, void *));
802 static int rt_fixchange
__P((struct radix_node
*, void *));
806 struct radix_node_head
*rnh
;
810 * Do appropriate manipulations of a routing tree given
811 * all the bits of info needed
816 struct sockaddr
*dst
,
817 struct sockaddr
*gateway
,
818 struct sockaddr
*netmask
,
820 struct rtentry
**ret_nrt
)
824 struct radix_node
*rn
;
825 struct radix_node_head
*rnh
;
826 struct ifaddr
*ifa
= NULL
;
827 struct sockaddr
*ndst
;
828 #define senderr(x) { error = x ; goto bad; }
830 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
832 * Find the correct routing tree to use for this Address Family
834 if ((rnh
= rt_tables
[dst
->sa_family
]) == 0)
837 * If we are adding a host route then we don't want to put
838 * a netmask in the tree
840 if (flags
& RTF_HOST
)
845 * Remove the item from the tree and return it.
846 * Complain if it is not there and do no more processing.
848 if ((rn
= rnh
->rnh_deladdr(dst
, netmask
, rnh
)) == 0)
850 if (rn
->rn_flags
& (RNF_ACTIVE
| RNF_ROOT
))
851 panic ("rtrequest delete");
852 rt
= (struct rtentry
*)rn
;
855 * Take an extra reference to handle the deletion of a route
856 * entry whose reference count is already 0; e.g. an expiring
857 * cloned route entry or an entry that was added to the table
858 * with 0 reference. If the caller is interested in this route,
859 * we will return it with the reference intact. Otherwise we
860 * will decrement the reference via rtfree_locked() and then
861 * possibly deallocate it.
864 rt
->rt_flags
&= ~RTF_UP
;
867 * Now search what's left of the subtree for any cloned
868 * routes which might have been formed from this node.
870 if ((rt
->rt_flags
& (RTF_CLONING
| RTF_PRCLONING
)) &&
872 rnh
->rnh_walktree_from(rnh
, dst
, rt_mask(rt
),
877 * Remove any external references we may have.
878 * This might result in another rtentry being freed if
879 * we held its last reference.
881 if (rt
->rt_gwroute
) {
884 (rt
= (struct rtentry
*)rn
)->rt_gwroute
= 0;
888 * give the protocol a chance to keep things in sync.
890 if ((ifa
= rt
->rt_ifa
) && ifa
->ifa_rtrequest
)
891 ifa
->ifa_rtrequest(RTM_DELETE
, rt
, SA(0));
895 * one more rtentry floating around that is not
896 * linked to the routing table.
898 (void) OSIncrementAtomic((SInt32
*)&rttrash
);
899 if (rte_debug
& RTD_DEBUG
) {
900 TAILQ_INSERT_TAIL(&rttrash_head
,
901 (struct rtentry_dbg
*)rt
, rtd_trash_link
);
905 * If the caller wants it, then it can have it,
906 * but it's up to it to free the rtentry as we won't be
909 if (ret_nrt
!= NULL
) {
910 /* Return the route to caller with reference intact */
913 /* Dereference or deallocate the route */
919 if (ret_nrt
== 0 || (rt
= *ret_nrt
) == 0)
923 flags
= rt
->rt_flags
&
924 ~(RTF_CLONING
| RTF_PRCLONING
| RTF_STATIC
);
925 flags
|= RTF_WASCLONED
;
926 gateway
= rt
->rt_gateway
;
927 if ((netmask
= rt
->rt_genmask
) == 0)
932 if ((flags
& RTF_GATEWAY
) && !gateway
)
933 panic("rtrequest: GATEWAY but no gateway");
935 if ((ifa
= ifa_ifwithroute_locked(flags
, dst
, gateway
)) == 0)
936 senderr(ENETUNREACH
);
939 if ((rt
= rte_alloc()) == NULL
)
941 Bzero(rt
, sizeof(*rt
));
942 rt
->rt_flags
= RTF_UP
| flags
;
944 * Add the gateway. Possibly re-malloc-ing the storage for it
945 * also add the rt_gwroute if possible.
947 if ((error
= rt_setgate(rt
, dst
, gateway
)) != 0) {
953 * point to the (possibly newly malloc'd) dest address.
958 * make sure it contains the value we want (masked if needed).
961 rt_maskedcopy(dst
, ndst
, netmask
);
963 Bcopy(dst
, ndst
, dst
->sa_len
);
966 * Note that we now have a reference to the ifa.
967 * This moved from below so that rnh->rnh_addaddr() can
968 * examine the ifa and ifa->ifa_ifp if it so desires.
971 rt
->rt_ifp
= rt
->rt_ifa
->ifa_ifp
;
973 /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
975 rn
= rnh
->rnh_addaddr((caddr_t
)ndst
, (caddr_t
)netmask
,
980 * Uh-oh, we already have one of these in the tree.
981 * We do a special hack: if the route that's already
982 * there was generated by the protocol-cloning
983 * mechanism, then we just blow it away and retry
984 * the insertion of the new one.
986 rt2
= rtalloc1_locked(dst
, 0,
987 RTF_CLONING
| RTF_PRCLONING
);
988 if (rt2
&& rt2
->rt_parent
) {
989 rtrequest_locked(RTM_DELETE
,
990 (struct sockaddr
*)rt_key(rt2
),
992 rt_mask(rt2
), rt2
->rt_flags
, 0);
994 rn
= rnh
->rnh_addaddr((caddr_t
)ndst
,
998 /* undo the extra ref we got */
1004 * If it still failed to go into the tree,
1005 * then un-make it (this should be a function)
1009 rtfree_locked(rt
->rt_gwroute
);
1011 ifafree(rt
->rt_ifa
);
1021 * If we got here from RESOLVE, then we are cloning
1022 * so clone the rest, and note that we
1023 * are a clone (and increment the parent's references)
1025 if (req
== RTM_RESOLVE
) {
1026 rt
->rt_rmx
= (*ret_nrt
)->rt_rmx
; /* copy metrics */
1027 if ((*ret_nrt
)->rt_flags
& (RTF_CLONING
| RTF_PRCLONING
)) {
1028 rt
->rt_parent
= (*ret_nrt
);
1034 * if this protocol has something to add to this then
1035 * allow it to do that as well.
1037 if (ifa
->ifa_rtrequest
)
1038 ifa
->ifa_rtrequest(req
, rt
, SA(ret_nrt
? *ret_nrt
: 0));
1043 * We repeat the same procedure from rt_setgate() here because
1044 * it doesn't fire when we call it there because the node
1045 * hasn't been added to the tree yet.
1047 if (!(rt
->rt_flags
& RTF_HOST
) && rt_mask(rt
) != 0) {
1048 struct rtfc_arg arg
;
1051 rnh
->rnh_walktree_from(rnh
, rt_key(rt
), rt_mask(rt
),
1052 rt_fixchange
, &arg
);
1056 * actually return a resultant rtentry and
1057 * give the caller a single reference.
1074 struct sockaddr
*dst
,
1075 struct sockaddr
*gateway
,
1076 struct sockaddr
*netmask
,
1078 struct rtentry
**ret_nrt
)
1081 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_NOTOWNED
);
1082 lck_mtx_lock(rt_mtx
);
1083 error
= rtrequest_locked(req
, dst
, gateway
, netmask
, flags
, ret_nrt
);
1084 lck_mtx_unlock(rt_mtx
);
1088 * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
1089 * (i.e., the routes related to it by the operation of cloning). This
1090 * routine is iterated over all potential former-child-routes by way of
1091 * rnh->rnh_walktree_from() above, and those that actually are children of
1092 * the late parent (passed in as VP here) are themselves deleted.
1095 rt_fixdelete(struct radix_node
*rn
, void *vp
)
1097 struct rtentry
*rt
= (struct rtentry
*)rn
;
1098 struct rtentry
*rt0
= vp
;
1100 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
1102 if (rt
->rt_parent
== rt0
&&
1103 !(rt
->rt_flags
& (RTF_PINNED
| RTF_CLONING
| RTF_PRCLONING
))) {
1104 return rtrequest_locked(RTM_DELETE
, rt_key(rt
),
1105 (struct sockaddr
*)0, rt_mask(rt
),
1106 rt
->rt_flags
, (struct rtentry
**)0);
1112 * This routine is called from rt_setgate() to do the analogous thing for
1113 * adds and changes. There is the added complication in this case of a
1114 * middle insert; i.e., insertion of a new network route between an older
1115 * network route and (cloned) host routes. For this reason, a simple check
1116 * of rt->rt_parent is insufficient; each candidate route must be tested
1117 * against the (mask, value) of the new route (passed as before in vp)
1118 * to see if the new route matches it.
1120 * XXX - it may be possible to do fixdelete() for changes and reserve this
1121 * routine just for adds. I'm not sure why I thought it was necessary to do
1125 static int rtfcdebug
= 0;
1129 rt_fixchange(struct radix_node
*rn
, void *vp
)
1131 struct rtentry
*rt
= (struct rtentry
*)rn
;
1132 struct rtfc_arg
*ap
= vp
;
1133 struct rtentry
*rt0
= ap
->rt0
;
1134 struct radix_node_head
*rnh
= ap
->rnh
;
1135 u_char
*xk1
, *xm1
, *xk2
, *xmp
;
1140 printf("rt_fixchange: rt %p, rt0 %p\n", rt
, rt0
);
1143 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
1145 if (!rt
->rt_parent
||
1146 (rt
->rt_flags
& (RTF_PINNED
| RTF_CLONING
| RTF_PRCLONING
))) {
1148 if(rtfcdebug
) printf("no parent or pinned\n");
1153 if (rt
->rt_parent
== rt0
) {
1155 if(rtfcdebug
) printf("parent match\n");
1157 return rtrequest_locked(RTM_DELETE
, rt_key(rt
),
1158 (struct sockaddr
*)0, rt_mask(rt
),
1159 rt
->rt_flags
, (struct rtentry
**)0);
1163 * There probably is a function somewhere which does this...
1164 * if not, there should be.
1166 len
= imin(((struct sockaddr
*)rt_key(rt0
))->sa_len
,
1167 ((struct sockaddr
*)rt_key(rt
))->sa_len
);
1169 xk1
= (u_char
*)rt_key(rt0
);
1170 xm1
= (u_char
*)rt_mask(rt0
);
1171 xk2
= (u_char
*)rt_key(rt
);
1173 /* avoid applying a less specific route */
1174 xmp
= (u_char
*)rt_mask(rt
->rt_parent
);
1175 mlen
= ((struct sockaddr
*)rt_key(rt
->rt_parent
))->sa_len
;
1176 if (mlen
> ((struct sockaddr
*)rt_key(rt0
))->sa_len
) {
1179 printf("rt_fixchange: inserting a less "
1180 "specific route\n");
1184 for (i
= rnh
->rnh_treetop
->rn_offset
; i
< mlen
; i
++) {
1185 if ((xmp
[i
] & ~(xmp
[i
] ^ xm1
[i
])) != xmp
[i
]) {
1188 printf("rt_fixchange: inserting a less "
1189 "specific route\n");
1195 for (i
= rnh
->rnh_treetop
->rn_offset
; i
< len
; i
++) {
1196 if ((xk2
[i
] & xm1
[i
]) != xk1
[i
]) {
1198 if(rtfcdebug
) printf("no match\n");
1205 * OK, this node is a clone, and matches the node currently being
1206 * changed/added under the node's mask. So, get rid of it.
1209 if(rtfcdebug
) printf("deleting\n");
1211 return rtrequest_locked(RTM_DELETE
, rt_key(rt
), (struct sockaddr
*)0,
1212 rt_mask(rt
), rt
->rt_flags
, (struct rtentry
**)0);
1216 rt_setgate(struct rtentry
*rt0
, struct sockaddr
*dst
, struct sockaddr
*gate
)
1219 int dlen
= ROUNDUP(dst
->sa_len
), glen
= ROUNDUP(gate
->sa_len
);
1220 struct rtentry
*rt
= rt0
;
1221 struct radix_node_head
*rnh
= rt_tables
[dst
->sa_family
];
1223 * A host route with the destination equal to the gateway
1224 * will interfere with keeping LLINFO in the routing
1225 * table, so disallow it.
1228 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
1230 if (((rt0
->rt_flags
& (RTF_HOST
|RTF_GATEWAY
|RTF_LLINFO
)) ==
1231 (RTF_HOST
|RTF_GATEWAY
)) &&
1232 (dst
->sa_len
== gate
->sa_len
) &&
1233 (bcmp(dst
, gate
, dst
->sa_len
) == 0)) {
1235 * The route might already exist if this is an RTM_CHANGE
1236 * or a routing redirect, so try to delete it.
1239 rtrequest_locked(RTM_DELETE
, (struct sockaddr
*)rt_key(rt0
),
1240 rt0
->rt_gateway
, rt_mask(rt0
), rt0
->rt_flags
, 0);
1241 return EADDRNOTAVAIL
;
1245 * Both dst and gateway are stored in the same malloc'd chunk
1246 * (If I ever get my hands on....)
1247 * if we need to malloc a new chunk, then keep the old one around
1248 * till we don't need it any more.
1250 if (rt
->rt_gateway
== 0 || glen
> ROUNDUP(rt
->rt_gateway
->sa_len
)) {
1251 old
= (caddr_t
)rt_key(rt
);
1252 R_Malloc(new, caddr_t
, dlen
+ glen
);
1255 rt
->rt_nodes
->rn_key
= new;
1258 * otherwise just overwrite the old one
1260 new = rt
->rt_nodes
->rn_key
;
1265 * copy the new gateway value into the memory chunk
1267 Bcopy(gate
, (rt
->rt_gateway
= (struct sockaddr
*)(new + dlen
)), glen
);
1270 * if we are replacing the chunk (or it's new) we need to
1271 * replace the dst as well
1274 Bcopy(dst
, new, dlen
);
1279 * If there is already a gwroute, it's now almost definitly wrong
1282 if (rt
->rt_gwroute
) {
1283 rt
= rt
->rt_gwroute
; rtfree_locked(rt
);
1284 rt
= rt0
; rt
->rt_gwroute
= 0;
1287 * Cloning loop avoidance:
1288 * In the presence of protocol-cloning and bad configuration,
1289 * it is possible to get stuck in bottomless mutual recursion
1290 * (rtrequest rt_setgate rtalloc1). We avoid this by not allowing
1291 * protocol-cloning to operate for gateways (which is probably the
1292 * correct choice anyway), and avoid the resulting reference loops
1293 * by disallowing any route to run through itself as a gateway.
1294 * This is obviously mandatory when we get rt->rt_output().
1296 if (rt
->rt_flags
& RTF_GATEWAY
) {
1297 rt
->rt_gwroute
= rtalloc1_locked(gate
, 1, RTF_PRCLONING
);
1298 if (rt
->rt_gwroute
== rt
) {
1299 rtfree_locked(rt
->rt_gwroute
);
1301 return EDQUOT
; /* failure */
1303 /* Tell the kernel debugger about the new default gateway */
1304 if ((AF_INET
== rt
->rt_gateway
->sa_family
) &&
1305 rt
->rt_gwroute
&& rt
->rt_gwroute
->rt_gateway
&&
1306 (AF_LINK
== rt
->rt_gwroute
->rt_gateway
->sa_family
)) {
1307 kdp_set_gateway_mac(((struct sockaddr_dl
*)rt0
->rt_gwroute
->rt_gateway
)->sdl_data
);
1312 * This isn't going to do anything useful for host routes, so
1313 * don't bother. Also make sure we have a reasonable mask
1314 * (we don't yet have one during adds).
1316 if (!(rt
->rt_flags
& RTF_HOST
) && rt_mask(rt
) != 0) {
1317 struct rtfc_arg arg
;
1320 rnh
->rnh_walktree_from(rnh
, rt_key(rt
), rt_mask(rt
),
1321 rt_fixchange
, &arg
);
1328 rt_maskedcopy(struct sockaddr
*src
, struct sockaddr
*dst
,
1329 struct sockaddr
*netmask
)
1331 u_char
*cp1
= (u_char
*)src
;
1332 u_char
*cp2
= (u_char
*)dst
;
1333 u_char
*cp3
= (u_char
*)netmask
;
1334 u_char
*cplim
= cp2
+ *cp3
;
1335 u_char
*cplim2
= cp2
+ *cp1
;
1337 *cp2
++ = *cp1
++; *cp2
++ = *cp1
++; /* copies sa_len & sa_family */
1342 *cp2
++ = *cp1
++ & *cp3
++;
1344 bzero((caddr_t
)cp2
, (unsigned)(cplim2
- cp2
));
1348 * Set up a routing table entry, normally
1352 rtinit(struct ifaddr
*ifa
, int cmd
, int flags
)
1355 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_NOTOWNED
);
1356 lck_mtx_lock(rt_mtx
);
1357 error
= rtinit_locked(ifa
, cmd
, flags
);
1358 lck_mtx_unlock(rt_mtx
);
1363 rtinit_locked(struct ifaddr
*ifa
, int cmd
, int flags
)
1366 struct sockaddr
*dst
;
1367 struct sockaddr
*deldst
;
1369 struct rtentry
*nrt
= 0;
1372 dst
= flags
& RTF_HOST
? ifa
->ifa_dstaddr
: ifa
->ifa_addr
;
1374 * If it's a delete, check that if it exists, it's on the correct
1375 * interface or we might scrub a route to another ifa which would
1376 * be confusing at best and possibly worse.
1378 if (cmd
== RTM_DELETE
) {
1380 * It's a delete, so it should already exist..
1381 * If it's a net, mask off the host bits
1382 * (Assuming we have a mask)
1384 if ((flags
& RTF_HOST
) == 0 && ifa
->ifa_netmask
) {
1385 m
= m_get(M_DONTWAIT
, MT_SONAME
);
1389 deldst
= mtod(m
, struct sockaddr
*);
1390 rt_maskedcopy(dst
, deldst
, ifa
->ifa_netmask
);
1394 * Get an rtentry that is in the routing tree and
1395 * contains the correct info. (if this fails, can't get there).
1396 * We set "report" to FALSE so that if it doesn't exist,
1397 * it doesn't report an error or clone a route, etc. etc.
1399 rt
= rtalloc1_locked(dst
, 0, 0UL);
1402 * Ok so we found the rtentry. it has an extra reference
1403 * for us at this stage. we won't need that so
1407 if (rt
->rt_ifa
!= ifa
) {
1409 * If the interface in the rtentry doesn't match
1410 * the interface we are using, then we don't
1411 * want to delete it, so return an error.
1412 * This seems to be the only point of
1413 * this whole RTM_DELETE clause.
1417 return (flags
& RTF_HOST
? EHOSTUNREACH
1425 * One would think that as we are deleting, and we know
1426 * it doesn't exist, we could just return at this point
1427 * with an "ELSE" clause, but apparently not..
1429 lck_mtx_unlock(rt_mtx
);
1430 return (flags
& RTF_HOST
? EHOSTUNREACH
1436 * Do the actual request
1438 error
= rtrequest_locked(cmd
, dst
, ifa
->ifa_addr
, ifa
->ifa_netmask
,
1439 flags
| ifa
->ifa_flags
, &nrt
);
1443 * If we are deleting, and we found an entry, then
1444 * it's been removed from the tree.. now throw it away.
1446 if (cmd
== RTM_DELETE
&& error
== 0 && (rt
= nrt
)) {
1448 * notify any listenning routing agents of the change
1450 rt_newaddrmsg(cmd
, ifa
, error
, nrt
);
1457 * We are adding, and we have a returned routing entry.
1458 * We need to sanity check the result.
1460 if (cmd
== RTM_ADD
&& error
== 0 && (rt
= nrt
)) {
1462 * If it came back with an unexpected interface, then it must
1463 * have already existed or something. (XXX)
1465 if (rt
->rt_ifa
!= ifa
) {
1466 if (!(rt
->rt_ifa
->ifa_ifp
->if_flags
&
1467 (IFF_POINTOPOINT
|IFF_LOOPBACK
)))
1468 printf("rtinit: wrong ifa (%p) was (%p)\n",
1471 * Ask that the protocol in question
1472 * remove anything it has associated with
1473 * this route and ifaddr.
1475 if (rt
->rt_ifa
->ifa_rtrequest
)
1476 rt
->rt_ifa
->ifa_rtrequest(RTM_DELETE
, rt
, SA(0));
1478 * Set the route's ifa.
1482 * And substitute in references to the ifaddr
1485 rt
->rt_ifp
= ifa
->ifa_ifp
;
1486 rt
->rt_rmx
.rmx_mtu
= ifa
->ifa_ifp
->if_mtu
; /*XXX*/
1488 * Now ask the protocol to check if it needs
1489 * any special processing in its new form.
1491 if (ifa
->ifa_rtrequest
)
1492 ifa
->ifa_rtrequest(RTM_ADD
, rt
, SA(0));
1495 * notify any listenning routing agents of the change
1497 rt_newaddrmsg(cmd
, ifa
, error
, nrt
);
1501 * We just wanted to add it; we don't actually need a
1502 * reference. This will result in a route that's added
1503 * to the routing table without a reference count. The
1504 * RTM_DELETE code will do the necessary step to adjust
1505 * the reference count at deletion time.
1515 if (rte_debug
& RTD_DEBUG
)
1516 return (rte_alloc_debug());
1518 return ((struct rtentry
*)zalloc(rte_zone
));
1522 rte_free(struct rtentry
*p
)
1524 if (rte_debug
& RTD_DEBUG
) {
1529 if (p
->rt_refcnt
!= 0)
1530 panic("rte_free: rte=%p refcnt=%d non-zero\n", p
, p
->rt_refcnt
);
1535 static inline struct rtentry
*
1536 rte_alloc_debug(void)
1538 struct rtentry_dbg
*rte
;
1540 rte
= ((struct rtentry_dbg
*)zalloc(rte_zone
));
1542 bzero(rte
, sizeof (*rte
));
1543 if (rte_debug
& RTD_TRACE
) {
1544 rte
->rtd_alloc_thread
= current_thread();
1545 (void) OSBacktrace(rte
->rtd_alloc_stk_pc
,
1548 rte
->rtd_inuse
= RTD_INUSE
;
1550 return ((struct rtentry
*)rte
);
1554 rte_free_debug(struct rtentry
*p
)
1556 struct rtentry_dbg
*rte
= (struct rtentry_dbg
*)p
;
1558 if (p
->rt_refcnt
!= 0)
1559 panic("rte_free: rte=%p refcnt=%d\n", p
, p
->rt_refcnt
);
1561 if (rte
->rtd_inuse
== RTD_FREED
)
1562 panic("rte_free: double free rte=%p\n", rte
);
1563 else if (rte
->rtd_inuse
!= RTD_INUSE
)
1564 panic("rte_free: corrupted rte=%p\n", rte
);
1566 bcopy((caddr_t
)p
, (caddr_t
)&rte
->rtd_entry_saved
, sizeof (*p
));
1567 bzero((caddr_t
)p
, sizeof (*p
));
1569 rte
->rtd_inuse
= RTD_FREED
;
1571 if (rte_debug
& RTD_TRACE
) {
1572 rte
->rtd_free_thread
= current_thread();
1573 (void) OSBacktrace(rte
->rtd_free_stk_pc
, RTD_TRSTACK_SIZE
);
1576 if (!(rte_debug
& RTD_NO_FREE
))