2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1988, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)rtsock.c 8.5 (Berkeley) 11/2/94
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/kernel.h>
67 #include <sys/sysctl.h>
69 #include <sys/malloc.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/syslog.h>
76 #include <kern/lock.h>
79 #include <net/route.h>
80 #include <net/raw_cb.h>
81 #include <netinet/in.h>
83 #include <machine/spl.h>
85 extern struct rtstat rtstat
;
86 extern u_long route_generation
;
87 extern int use_routegenid
;
88 extern int check_routeselfref
;
90 MALLOC_DEFINE(M_RTABLE
, "routetbl", "routing tables");
92 extern lck_mtx_t
*rt_mtx
;
93 static struct sockaddr route_dst
= { 2, PF_ROUTE
, { 0, } };
94 static struct sockaddr route_src
= { 2, PF_ROUTE
, { 0, } };
95 static struct sockaddr sa_zero
= { sizeof(sa_zero
), AF_INET
, { 0, } };
96 static struct sockproto route_proto
= { PF_ROUTE
, 0 };
102 struct sysctl_req
*w_req
;
106 rt_msg1(int, struct rt_addrinfo
*);
107 static int rt_msg2(int, struct rt_addrinfo
*, caddr_t
, struct walkarg
*);
108 static int rt_xaddrs(caddr_t
, caddr_t
, struct rt_addrinfo
*);
109 static int sysctl_dumpentry(struct radix_node
*rn
, void *vw
);
110 static int sysctl_iflist(int af
, struct walkarg
*w
);
111 static int sysctl_iflist2(int af
, struct walkarg
*w
);
112 static int route_output(struct mbuf
*, struct socket
*);
113 static void rt_setmetrics(u_long
, struct rt_metrics
*, struct rt_metrics
*);
114 static void rt_setif(struct rtentry
*, struct sockaddr
*, struct sockaddr
*,
115 struct sockaddr
*, unsigned int);
117 #define SIN(sa) ((struct sockaddr_in *)(size_t)(sa))
119 /* Sleazy use of local variables throughout file, warning!!!! */
120 #define dst info.rti_info[RTAX_DST]
121 #define gate info.rti_info[RTAX_GATEWAY]
122 #define netmask info.rti_info[RTAX_NETMASK]
123 #define genmask info.rti_info[RTAX_GENMASK]
124 #define ifpaddr info.rti_info[RTAX_IFP]
125 #define ifaaddr info.rti_info[RTAX_IFA]
126 #define brdaddr info.rti_info[RTAX_BRD]
129 * It really doesn't make any sense at all for this code to share much
130 * with raw_usrreq.c, since its functionality is so restricted. XXX
133 rts_abort(struct socket
*so
)
137 error
= raw_usrreqs
.pru_abort(so
);
141 /* pru_accept is EOPNOTSUPP */
144 rts_attach(struct socket
*so
, int proto
, __unused
struct proc
*p
)
149 if (sotorawcb(so
) != 0)
150 return EISCONN
; /* XXX panic? */
151 MALLOC(rp
, struct rawcb
*, sizeof *rp
, M_PCB
, M_WAITOK
); /* XXX */
154 bzero(rp
, sizeof *rp
);
157 * The splnet() is necessary to block protocols from sending
158 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
159 * this PCB is extant but incompletely initialized.
160 * Probably we should try to do more of this work beforehand and
163 so
->so_pcb
= (caddr_t
)rp
;
164 error
= raw_attach(so
, proto
); /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
169 so
->so_flags
|= SOF_PCBCLEARING
;
173 switch(rp
->rcb_proto
.sp_protocol
) {
174 //####LD route_cb needs looking
179 route_cb
.ip6_count
++;
182 route_cb
.ipx_count
++;
188 rp
->rcb_faddr
= &route_src
;
189 route_cb
.any_count
++;
190 /* the socket is already locked when we enter rts_attach */
192 so
->so_options
|= SO_USELOOPBACK
;
197 rts_bind(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
200 error
= raw_usrreqs
.pru_bind(so
, nam
, p
); /* xxx just EINVAL */
205 rts_connect(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
208 error
= raw_usrreqs
.pru_connect(so
, nam
, p
); /* XXX just EINVAL */
212 /* pru_connect2 is EOPNOTSUPP */
213 /* pru_control is EOPNOTSUPP */
216 rts_detach(struct socket
*so
)
218 struct rawcb
*rp
= sotorawcb(so
);
222 switch(rp
->rcb_proto
.sp_protocol
) {
227 route_cb
.ip6_count
--;
230 route_cb
.ipx_count
--;
236 route_cb
.any_count
--;
238 error
= raw_usrreqs
.pru_detach(so
);
243 rts_disconnect(struct socket
*so
)
246 error
= raw_usrreqs
.pru_disconnect(so
);
250 /* pru_listen is EOPNOTSUPP */
253 rts_peeraddr(struct socket
*so
, struct sockaddr
**nam
)
256 error
= raw_usrreqs
.pru_peeraddr(so
, nam
);
260 /* pru_rcvd is EOPNOTSUPP */
261 /* pru_rcvoob is EOPNOTSUPP */
264 rts_send(struct socket
*so
, int flags
, struct mbuf
*m
, struct sockaddr
*nam
,
265 struct mbuf
*control
, struct proc
*p
)
268 error
= raw_usrreqs
.pru_send(so
, flags
, m
, nam
, control
, p
);
272 /* pru_sense is null */
275 rts_shutdown(struct socket
*so
)
278 error
= raw_usrreqs
.pru_shutdown(so
);
283 rts_sockaddr(struct socket
*so
, struct sockaddr
**nam
)
286 error
= raw_usrreqs
.pru_sockaddr(so
, nam
);
290 static struct pr_usrreqs route_usrreqs
= {
291 rts_abort
, pru_accept_notsupp
, rts_attach
, rts_bind
,
292 rts_connect
, pru_connect2_notsupp
, pru_control_notsupp
,
293 rts_detach
, rts_disconnect
, pru_listen_notsupp
, rts_peeraddr
,
294 pru_rcvd_notsupp
, pru_rcvoob_notsupp
, rts_send
, pru_sense_null
,
295 rts_shutdown
, rts_sockaddr
, sosend
, soreceive
, pru_sopoll_notsupp
300 route_output(struct mbuf
*m
, struct socket
*so
)
302 struct rt_msghdr
*rtm
= NULL
;
303 struct rtentry
*rt
= NULL
;
304 struct rtentry
*saved_nrt
= NULL
;
305 struct radix_node_head
*rnh
;
306 struct rt_addrinfo info
;
308 struct ifnet
*ifp
= NULL
;
310 struct proc
*curproc
= current_proc();
312 struct sockaddr_in dst_in
, gate_in
;
313 int sendonlytoself
= 0;
314 unsigned int ifscope
= IFSCOPE_NONE
;
316 #define senderr(e) { error = e; goto flush;}
318 ((m
->m_len
< sizeof(long)) && (m
= m_pullup(m
, sizeof(long))) == 0))
320 if ((m
->m_flags
& M_PKTHDR
) == 0)
321 panic("route_output");
323 /* unlock the socket (but keep a reference) it won't be accessed until raw_input appends to it. */
324 socket_unlock(so
, 0);
325 lck_mtx_lock(rt_mtx
);
327 len
= m
->m_pkthdr
.len
;
328 if (len
< sizeof(*rtm
) ||
329 len
!= mtod(m
, struct rt_msghdr
*)->rtm_msglen
) {
333 R_Malloc(rtm
, struct rt_msghdr
*, len
);
338 m_copydata(m
, 0, len
, (caddr_t
)rtm
);
339 if (rtm
->rtm_version
!= RTM_VERSION
) {
341 senderr(EPROTONOSUPPORT
);
345 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
346 * all RTM_GETs to be silent in the future, so this is private for now.
348 if (rtm
->rtm_type
== RTM_GET_SILENT
) {
349 if ((so
->so_options
& SO_USELOOPBACK
) == 0)
352 rtm
->rtm_type
= RTM_GET
;
356 * Perform permission checking, only privileged sockets
357 * may perform operations other than RTM_GET
359 if (rtm
->rtm_type
!= RTM_GET
&& (so
->so_state
& SS_PRIV
) == 0) {
364 rtm
->rtm_pid
= proc_selfpid();
365 info
.rti_addrs
= rtm
->rtm_addrs
;
366 if (rt_xaddrs((caddr_t
)(rtm
+ 1), len
+ (caddr_t
)rtm
, &info
)) {
370 if (dst
== NULL
|| (dst
->sa_family
>= AF_MAX
) ||
371 (gate
!= NULL
&& (gate
->sa_family
>= AF_MAX
))) {
375 if (dst
->sa_family
== AF_INET
&& dst
->sa_len
!= sizeof (dst_in
)) {
376 /* At minimum, we need up to sin_addr */
377 if (dst
->sa_len
< offsetof(struct sockaddr_in
, sin_zero
))
379 bzero(&dst_in
, sizeof (dst_in
));
380 dst_in
.sin_len
= sizeof (dst_in
);
381 dst_in
.sin_family
= AF_INET
;
382 dst_in
.sin_port
= SIN(dst
)->sin_port
;
383 dst_in
.sin_addr
= SIN(dst
)->sin_addr
;
384 dst
= (struct sockaddr
*)&dst_in
;
388 gate
->sa_family
== AF_INET
&& gate
->sa_len
!= sizeof (gate_in
)) {
389 /* At minimum, we need up to sin_addr */
390 if (gate
->sa_len
< offsetof(struct sockaddr_in
, sin_zero
))
392 bzero(&gate_in
, sizeof (gate_in
));
393 gate_in
.sin_len
= sizeof (gate_in
);
394 gate_in
.sin_family
= AF_INET
;
395 gate_in
.sin_port
= SIN(gate
)->sin_port
;
396 gate_in
.sin_addr
= SIN(gate
)->sin_addr
;
397 gate
= (struct sockaddr
*)&gate_in
;
401 struct radix_node
*t
;
402 t
= rn_addmask((caddr_t
)genmask
, 0, 1);
403 if (t
&& Bcmp(genmask
, t
->rn_key
, *(u_char
*)genmask
) == 0)
404 genmask
= (struct sockaddr
*)(t
->rn_key
);
410 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
412 if (rtm
->rtm_flags
& RTF_IFSCOPE
) {
413 /* Scoped routing is for AF_INET only */
414 if (dst
->sa_family
!= AF_INET
)
416 ifscope
= rtm
->rtm_index
;
419 switch (rtm
->rtm_type
) {
426 /* XXX LD11JUL02 Special case for AOL 5.1.2 connectivity issue to AirPort BS (Radar 2969954)
427 * AOL is adding a circular route ("10.0.1.1/32 10.0.1.1") when establishing its ppp tunnel
428 * to the AP BaseStation by removing the default gateway and replacing it with their tunnel entry point.
429 * There is no apparent reason to add this route as there is a valid 10.0.1.1/24 route to the BS.
430 * That circular route was ignored on previous version of MacOS X because of a routing bug
431 * corrected with the merge to FreeBSD4.4 (a route generated from an RTF_CLONING route had the RTF_WASCLONED
432 * flag set but did not have a reference to the parent route) and that entry was left in the RT. This workaround is
433 * made in order to provide binary compatibility with AOL.
434 * If we catch a process adding a circular route with a /32 from the routing socket, we error it out instead of
435 * confusing the routing table with a wrong route to the previous default gateway
438 #define satosinaddr(sa) (((struct sockaddr_in *)sa)->sin_addr.s_addr)
440 if (check_routeselfref
&& (dst
&& dst
->sa_family
== AF_INET
) &&
441 (netmask
&& satosinaddr(netmask
) == INADDR_BROADCAST
) &&
442 (gate
&& satosinaddr(dst
) == satosinaddr(gate
))) {
443 log(LOG_WARNING
, "route_output: circular route %ld.%ld.%ld.%ld/32 ignored\n",
444 (ntohl(satosinaddr(gate
)>>24))&0xff,
445 (ntohl(satosinaddr(gate
)>>16))&0xff,
446 (ntohl(satosinaddr(gate
)>>8))&0xff,
447 (ntohl(satosinaddr(gate
)))&0xff);
453 error
= rtrequest_scoped_locked(RTM_ADD
, dst
, gate
,
454 netmask
, rtm
->rtm_flags
, &saved_nrt
, ifscope
);
455 if (error
== 0 && saved_nrt
) {
458 * If the route request specified an interface with
459 * IFA and/or IFP, we set the requested interface on
460 * the route with rt_setif. It would be much better
461 * to do this inside rtrequest, but that would
462 * require passing the desired interface, in some
463 * form, to rtrequest. Since rtrequest is called in
464 * so many places (roughly 40 in our source), adding
465 * a parameter is to much for us to swallow; this is
466 * something for the FreeBSD developers to tackle.
467 * Instead, we let rtrequest compute whatever
468 * interface it wants, then come in behind it and
469 * stick in the interface that we really want. This
470 * works reasonably well except when rtrequest can't
471 * figure out what interface to use (with
472 * ifa_withroute) and returns ENETUNREACH. Ideally
473 * it shouldn't matter if rtrequest can't figure out
474 * the interface if we're going to explicitly set it
475 * ourselves anyway. But practically we can't
476 * recover here because rtrequest will not do any of
477 * the work necessary to add the route if it can't
478 * find an interface. As long as there is a default
479 * route that leads to some interface, rtrequest will
480 * find an interface, so this problem should be
481 * rarely encountered.
485 rt_setif(saved_nrt
, ifpaddr
, ifaaddr
, gate
,
488 rt_setmetrics(rtm
->rtm_inits
,
489 &rtm
->rtm_rmx
, &saved_nrt
->rt_rmx
);
490 saved_nrt
->rt_rmx
.rmx_locks
&= ~(rtm
->rtm_inits
);
491 saved_nrt
->rt_rmx
.rmx_locks
|=
492 (rtm
->rtm_inits
& rtm
->rtm_rmx
.rmx_locks
);
493 saved_nrt
->rt_genmask
= genmask
;
499 error
= rtrequest_scoped_locked(RTM_DELETE
, dst
,
500 gate
, netmask
, rtm
->rtm_flags
, &saved_nrt
, ifscope
);
510 if ((rnh
= rt_tables
[dst
->sa_family
]) == NULL
)
511 senderr(EAFNOSUPPORT
);
514 * Lookup the best match based on the key-mask pair;
515 * callee adds a reference and checks for root node.
517 rt
= rt_lookup(TRUE
, dst
, netmask
, rnh
, ifscope
);
521 switch(rtm
->rtm_type
) {
527 gate
= rt
->rt_gateway
;
528 netmask
= rt_mask(rt
);
529 genmask
= rt
->rt_genmask
;
530 if (rtm
->rtm_addrs
& (RTA_IFP
| RTA_IFA
)) {
533 ifnet_lock_shared(ifp
);
534 ifa2
= ifp
->if_addrhead
.tqh_first
;
535 ifpaddr
= ifa2
->ifa_addr
;
536 ifnet_lock_done(ifp
);
537 ifaaddr
= rt
->rt_ifa
->ifa_addr
;
538 rtm
->rtm_index
= ifp
->if_index
;
544 len
= rt_msg2(rtm
->rtm_type
, &info
, (caddr_t
)0,
545 (struct walkarg
*)0);
546 if (len
> rtm
->rtm_msglen
) {
547 struct rt_msghdr
*new_rtm
;
548 R_Malloc(new_rtm
, struct rt_msghdr
*, len
);
552 Bcopy(rtm
, new_rtm
, rtm
->rtm_msglen
);
553 R_Free(rtm
); rtm
= new_rtm
;
555 (void)rt_msg2(rtm
->rtm_type
, &info
, (caddr_t
)rtm
,
556 (struct walkarg
*)0);
557 rtm
->rtm_flags
= rt
->rt_flags
;
558 rtm
->rtm_rmx
= rt
->rt_rmx
;
559 rtm
->rtm_addrs
= info
.rti_addrs
;
564 if (gate
&& (error
= rt_setgate(rt
, rt_key(rt
), gate
)))
568 * If they tried to change things but didn't specify
569 * the required gateway, then just use the old one.
570 * This can happen if the user tries to change the
571 * flags on the default route without changing the
572 * default gateway. Changing flags still doesn't work.
574 if ((rt
->rt_flags
& RTF_GATEWAY
) && !gate
)
575 gate
= rt
->rt_gateway
;
579 * On Darwin, we call rt_setif which contains the
580 * equivalent to the code found at this very spot
583 rt_setif(rt
, ifpaddr
, ifaaddr
, gate
,
587 rt_setmetrics(rtm
->rtm_inits
, &rtm
->rtm_rmx
,
590 /* rt_setif, called above does this for us on darwin */
591 if (rt
->rt_ifa
&& rt
->rt_ifa
->ifa_rtrequest
)
592 rt
->rt_ifa
->ifa_rtrequest(RTM_ADD
, rt
, gate
);
595 rt
->rt_genmask
= genmask
;
600 rt
->rt_rmx
.rmx_locks
&= ~(rtm
->rtm_inits
);
601 rt
->rt_rmx
.rmx_locks
|=
602 (rtm
->rtm_inits
& rtm
->rtm_rmx
.rmx_locks
);
613 rtm
->rtm_errno
= error
;
615 rtm
->rtm_flags
|= RTF_DONE
;
619 lck_mtx_unlock(rt_mtx
);
620 socket_lock(so
, 0); /* relock the socket now */
622 struct rawcb
*rp
= 0;
624 * Check to see if we don't want our own messages.
626 if ((so
->so_options
& SO_USELOOPBACK
) == 0) {
627 if (route_cb
.any_count
<= 1) {
633 /* There is another listener, so construct message */
637 m_copyback(m
, 0, rtm
->rtm_msglen
, (caddr_t
)rtm
);
638 if (m
->m_pkthdr
.len
< rtm
->rtm_msglen
) {
641 } else if (m
->m_pkthdr
.len
> rtm
->rtm_msglen
)
642 m_adj(m
, rtm
->rtm_msglen
- m
->m_pkthdr
.len
);
645 if (sendonlytoself
&& m
) {
647 if (sbappendaddr(&so
->so_rcv
, &route_src
, m
, (struct mbuf
*)0, &error
) != 0) {
654 rp
->rcb_proto
.sp_family
= 0; /* Avoid us */
656 route_proto
.sp_protocol
= dst
->sa_family
;
658 socket_unlock(so
, 0);
659 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
663 rp
->rcb_proto
.sp_family
= PF_ROUTE
;
670 rt_setmetrics(u_long which
, struct rt_metrics
*in
, struct rt_metrics
*out
)
672 #define metric(f, e) if (which & (f)) out->e = in->e;
673 metric(RTV_RPIPE
, rmx_recvpipe
);
674 metric(RTV_SPIPE
, rmx_sendpipe
);
675 metric(RTV_SSTHRESH
, rmx_ssthresh
);
676 metric(RTV_RTT
, rmx_rtt
);
677 metric(RTV_RTTVAR
, rmx_rttvar
);
678 metric(RTV_HOPCOUNT
, rmx_hopcount
);
679 metric(RTV_MTU
, rmx_mtu
);
680 metric(RTV_EXPIRE
, rmx_expire
);
685 * Set route's interface given ifpaddr, ifaaddr, and gateway.
688 rt_setif(struct rtentry
*rt
, struct sockaddr
*Ifpaddr
, struct sockaddr
*Ifaaddr
,
689 struct sockaddr
*Gate
, unsigned int ifscope
)
691 struct ifaddr
*ifa
= 0;
692 struct ifnet
*ifp
= 0;
694 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
696 /* trigger route cache reevaluation */
701 * New gateway could require new ifaddr, ifp; flags may also
702 * be different; ifp may be specified by ll sockaddr when
703 * protocol address is ambiguous.
705 if (Ifpaddr
&& (ifa
= ifa_ifwithnet_scoped(Ifpaddr
, ifscope
)) &&
706 (ifp
= ifa
->ifa_ifp
) && (Ifaaddr
|| Gate
)) {
708 ifa
= ifaof_ifpforaddr(Ifaaddr
? Ifaaddr
: Gate
, ifp
);
714 if (Ifpaddr
&& (ifp
= if_withname(Ifpaddr
)) ) {
716 ifa
= ifaof_ifpforaddr(Gate
, ifp
);
718 ifnet_lock_shared(ifp
);
719 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
721 ifnet_lock_done(ifp
);
723 } else if (Ifaaddr
&&
724 (ifa
= ifa_ifwithaddr_scoped(Ifaaddr
, ifscope
))) {
727 (ifa
= ifa_ifwithroute_scoped_locked(rt
->rt_flags
,
728 rt_key(rt
), Gate
, ifscope
))) {
733 struct ifaddr
*oifa
= rt
->rt_ifa
;
735 if (oifa
&& oifa
->ifa_rtrequest
)
736 oifa
->ifa_rtrequest(RTM_DELETE
, rt
, Gate
);
740 * If this is the (non-scoped) default route, record
741 * the interface index used for the primary ifscope.
743 if (rt_inet_default(rt
, rt_key(rt
)))
744 set_primary_ifscope(rt
->rt_ifp
->if_index
);
745 rt
->rt_rmx
.rmx_mtu
= ifp
->if_mtu
;
746 if (rt
->rt_ifa
&& rt
->rt_ifa
->ifa_rtrequest
)
747 rt
->rt_ifa
->ifa_rtrequest(RTM_ADD
, rt
, Gate
);
756 /* XXX: to reset gateway to correct value, at RTM_CHANGE */
757 if (rt
->rt_ifa
&& rt
->rt_ifa
->ifa_rtrequest
)
758 rt
->rt_ifa
->ifa_rtrequest(RTM_ADD
, rt
, Gate
);
763 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
764 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
768 * Extract the addresses of the passed sockaddrs.
769 * Do a little sanity checking so as to avoid bad memory references.
770 * This data is derived straight from userland.
773 rt_xaddrs(caddr_t cp
, caddr_t cplim
, struct rt_addrinfo
*rtinfo
)
778 bzero(rtinfo
->rti_info
, sizeof(rtinfo
->rti_info
));
779 for (i
= 0; (i
< RTAX_MAX
) && (cp
< cplim
); i
++) {
780 if ((rtinfo
->rti_addrs
& (1 << i
)) == 0)
782 sa
= (struct sockaddr
*)cp
;
786 if ( (cp
+ sa
->sa_len
) > cplim
) {
791 * there are no more.. quit now
792 * If there are more bits, they are in error.
793 * I've seen this. route(1) can evidently generate these.
794 * This causes kernel to core dump.
795 * for compatibility, If we see this, point to a safe address.
797 if (sa
->sa_len
== 0) {
798 rtinfo
->rti_info
[i
] = &sa_zero
;
799 return (0); /* should be EINVAL but for compat */
803 rtinfo
->rti_info
[i
] = sa
;
812 struct rt_addrinfo
*rtinfo
)
814 struct rt_msghdr
*rtm
;
824 len
= sizeof(struct ifa_msghdr
);
829 len
= sizeof(struct ifma_msghdr
);
833 len
= sizeof(struct if_msghdr
);
837 len
= sizeof(struct rt_msghdr
);
841 m
= m_gethdr(M_DONTWAIT
, MT_DATA
);
842 if (m
&& len
> MHLEN
) {
843 MCLGET(m
, M_DONTWAIT
);
844 if ((m
->m_flags
& M_EXT
) == 0) {
851 m
->m_pkthdr
.len
= m
->m_len
= len
;
852 m
->m_pkthdr
.rcvif
= 0;
853 rtm
= mtod(m
, struct rt_msghdr
*);
854 bzero((caddr_t
)rtm
, len
);
855 for (i
= 0; i
< RTAX_MAX
; i
++) {
856 if ((sa
= rtinfo
->rti_info
[i
]) == NULL
)
858 rtinfo
->rti_addrs
|= (1 << i
);
859 dlen
= ROUNDUP(sa
->sa_len
);
860 m_copyback(m
, len
, dlen
, (caddr_t
)sa
);
863 if (m
->m_pkthdr
.len
!= len
) {
867 rtm
->rtm_msglen
= len
;
868 rtm
->rtm_version
= RTM_VERSION
;
869 rtm
->rtm_type
= type
;
874 rt_msg2(int type
, struct rt_addrinfo
*rtinfo
, caddr_t cp
, struct walkarg
*w
)
877 int len
, dlen
, second_time
= 0;
880 rtinfo
->rti_addrs
= 0;
886 len
= sizeof(struct ifa_msghdr
);
891 len
= sizeof(struct ifma_msghdr
);
895 len
= sizeof(struct if_msghdr
);
899 len
= sizeof(struct if_msghdr2
);
903 len
= sizeof(struct ifma_msghdr2
);
907 len
= sizeof(struct rt_msghdr2
);
911 len
= sizeof(struct rt_msghdr
);
916 for (i
= 0; i
< RTAX_MAX
; i
++) {
919 if ((sa
= rtinfo
->rti_info
[i
]) == 0)
921 rtinfo
->rti_addrs
|= (1 << i
);
922 dlen
= ROUNDUP(sa
->sa_len
);
924 bcopy((caddr_t
)sa
, cp
, (unsigned)dlen
);
929 if (cp
== 0 && w
!= NULL
&& !second_time
) {
930 struct walkarg
*rw
= w
;
933 if (rw
->w_tmemsize
< len
) {
935 FREE(rw
->w_tmem
, M_RTABLE
);
936 rw
->w_tmem
= (caddr_t
)
937 _MALLOC(len
, M_RTABLE
, M_WAITOK
); /*###LD0412 was NOWAIT */
939 rw
->w_tmemsize
= len
;
949 struct rt_msghdr
*rtm
= (struct rt_msghdr
*)cp0
;
951 rtm
->rtm_version
= RTM_VERSION
;
952 rtm
->rtm_type
= type
;
953 rtm
->rtm_msglen
= len
;
959 * This routine is called to generate a message from the routing
960 * socket indicating that a redirect has occurred, a routing lookup
961 * has failed, or that a protocol has detected timeouts to a particular
965 rt_missmsg(int type
, struct rt_addrinfo
*rtinfo
, int flags
, int error
)
967 struct rt_msghdr
*rtm
;
969 struct sockaddr
*sa
= rtinfo
->rti_info
[RTAX_DST
];
971 lck_mtx_assert(rt_mtx
, LCK_MTX_ASSERT_OWNED
);
973 if (route_cb
.any_count
== 0)
975 m
= rt_msg1(type
, rtinfo
);
978 rtm
= mtod(m
, struct rt_msghdr
*);
979 rtm
->rtm_flags
= RTF_DONE
| flags
;
980 rtm
->rtm_errno
= error
;
981 rtm
->rtm_addrs
= rtinfo
->rti_addrs
;
982 route_proto
.sp_protocol
= sa
? sa
->sa_family
: 0;
983 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
987 * This routine is called to generate a message from the routing
988 * socket indicating that the status of a network interface has changed.
994 struct if_msghdr
*ifm
;
996 struct rt_addrinfo info
;
998 if (route_cb
.any_count
== 0)
1000 bzero((caddr_t
)&info
, sizeof(info
));
1001 m
= rt_msg1(RTM_IFINFO
, &info
);
1004 ifm
= mtod(m
, struct if_msghdr
*);
1005 ifm
->ifm_index
= ifp
->if_index
;
1006 ifm
->ifm_flags
= (u_short
)ifp
->if_flags
;
1007 if_data_internal_to_if_data(ifp
, &ifp
->if_data
, &ifm
->ifm_data
);
1009 route_proto
.sp_protocol
= 0;
1010 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
1014 * This is called to generate messages from the routing socket
1015 * indicating a network interface has had addresses associated with it.
1016 * if we ever reverse the logic and replace messages TO the routing
1017 * socket indicate a request to configure interfaces, then it will
1018 * be unnecessary as the routing socket will automatically generate
1021 * Since this is coming from the interface, it is expected that the
1022 * interface will be locked.
1025 rt_newaddrmsg(int cmd
, struct ifaddr
*ifa
, int error
, struct rtentry
*rt
)
1027 struct rt_addrinfo info
;
1028 struct sockaddr
*sa
= 0;
1031 struct ifnet
*ifp
= ifa
->ifa_ifp
;
1033 if (route_cb
.any_count
== 0)
1035 for (pass
= 1; pass
< 3; pass
++) {
1036 bzero((caddr_t
)&info
, sizeof(info
));
1037 if ((cmd
== RTM_ADD
&& pass
== 1) ||
1038 (cmd
== RTM_DELETE
&& pass
== 2)) {
1039 struct ifa_msghdr
*ifam
;
1040 int ncmd
= cmd
== RTM_ADD
? RTM_NEWADDR
: RTM_DELADDR
;
1042 ifaaddr
= sa
= ifa
->ifa_addr
;
1043 ifpaddr
= ifp
->if_addrhead
.tqh_first
->ifa_addr
;
1044 netmask
= ifa
->ifa_netmask
;
1045 brdaddr
= ifa
->ifa_dstaddr
;
1046 if ((m
= rt_msg1(ncmd
, &info
)) == NULL
)
1048 ifam
= mtod(m
, struct ifa_msghdr
*);
1049 ifam
->ifam_index
= ifp
->if_index
;
1050 ifam
->ifam_metric
= ifa
->ifa_metric
;
1051 ifam
->ifam_flags
= ifa
->ifa_flags
;
1052 ifam
->ifam_addrs
= info
.rti_addrs
;
1054 if ((cmd
== RTM_ADD
&& pass
== 2) ||
1055 (cmd
== RTM_DELETE
&& pass
== 1)) {
1056 struct rt_msghdr
*rtm
;
1060 netmask
= rt_mask(rt
);
1061 dst
= sa
= rt_key(rt
);
1062 gate
= rt
->rt_gateway
;
1063 if ((m
= rt_msg1(cmd
, &info
)) == NULL
)
1065 rtm
= mtod(m
, struct rt_msghdr
*);
1066 rtm
->rtm_index
= ifp
->if_index
;
1067 rtm
->rtm_flags
|= rt
->rt_flags
;
1068 rtm
->rtm_errno
= error
;
1069 rtm
->rtm_addrs
= info
.rti_addrs
;
1071 route_proto
.sp_protocol
= sa
? sa
->sa_family
: 0;
1072 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
1077 * This is the analogue to the rt_newaddrmsg which performs the same
1078 * function but for multicast group memberhips. This is easier since
1079 * there is no route state to worry about.
1082 rt_newmaddrmsg(int cmd
, struct ifmultiaddr
*ifma
)
1084 struct rt_addrinfo info
;
1086 struct ifnet
*ifp
= ifma
->ifma_ifp
;
1087 struct ifma_msghdr
*ifmam
;
1089 if (route_cb
.any_count
== 0)
1092 bzero((caddr_t
)&info
, sizeof(info
));
1093 ifaaddr
= ifma
->ifma_addr
;
1094 if (ifp
&& ifp
->if_addrhead
.tqh_first
)
1095 ifpaddr
= ifp
->if_addrhead
.tqh_first
->ifa_addr
;
1099 * If a link-layer address is present, present it as a ``gateway''
1100 * (similarly to how ARP entries, e.g., are presented).
1102 gate
= ifma
->ifma_ll
->ifma_addr
;
1103 if ((m
= rt_msg1(cmd
, &info
)) == NULL
)
1105 ifmam
= mtod(m
, struct ifma_msghdr
*);
1106 ifmam
->ifmam_index
= ifp
? ifp
->if_index
: 0;
1107 ifmam
->ifmam_addrs
= info
.rti_addrs
;
1108 route_proto
.sp_protocol
= ifma
->ifma_addr
->sa_family
;
1109 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
1113 * This is used in dumping the kernel table via sysctl().
1116 sysctl_dumpentry(struct radix_node
*rn
, void *vw
)
1118 struct walkarg
*w
= vw
;
1119 struct rtentry
*rt
= (struct rtentry
*)rn
;
1120 int error
= 0, size
;
1121 struct rt_addrinfo info
;
1123 if (w
->w_op
== NET_RT_FLAGS
&& !(rt
->rt_flags
& w
->w_arg
))
1125 bzero((caddr_t
)&info
, sizeof(info
));
1127 gate
= rt
->rt_gateway
;
1128 netmask
= rt_mask(rt
);
1129 genmask
= rt
->rt_genmask
;
1130 if (w
->w_op
!= NET_RT_DUMP2
) {
1131 size
= rt_msg2(RTM_GET
, &info
, 0, w
);
1132 if (w
->w_req
&& w
->w_tmem
) {
1133 struct rt_msghdr
*rtm
= (struct rt_msghdr
*)w
->w_tmem
;
1135 rtm
->rtm_flags
= rt
->rt_flags
;
1136 rtm
->rtm_use
= rt
->rt_use
;
1137 rtm
->rtm_rmx
= rt
->rt_rmx
;
1138 rtm
->rtm_index
= rt
->rt_ifp
->if_index
;
1142 rtm
->rtm_addrs
= info
.rti_addrs
;
1143 error
= SYSCTL_OUT(w
->w_req
, (caddr_t
)rtm
, size
);
1147 size
= rt_msg2(RTM_GET2
, &info
, 0, w
);
1148 if (w
->w_req
&& w
->w_tmem
) {
1149 struct rt_msghdr2
*rtm
= (struct rt_msghdr2
*)w
->w_tmem
;
1151 rtm
->rtm_flags
= rt
->rt_flags
;
1152 rtm
->rtm_use
= rt
->rt_use
;
1153 rtm
->rtm_rmx
= rt
->rt_rmx
;
1154 rtm
->rtm_index
= rt
->rt_ifp
->if_index
;
1155 rtm
->rtm_refcnt
= rt
->rt_refcnt
;
1157 rtm
->rtm_parentflags
= rt
->rt_parent
->rt_flags
;
1159 rtm
->rtm_parentflags
= 0;
1160 rtm
->rtm_reserved
= 0;
1161 rtm
->rtm_addrs
= info
.rti_addrs
;
1162 error
= SYSCTL_OUT(w
->w_req
, (caddr_t
)rtm
, size
);
1177 struct rt_addrinfo info
;
1180 bzero((caddr_t
)&info
, sizeof(info
));
1181 ifnet_head_lock_shared();
1182 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1185 if (w
->w_arg
&& w
->w_arg
!= ifp
->if_index
)
1187 ifnet_lock_shared(ifp
);
1188 ifa
= ifp
->if_addrhead
.tqh_first
;
1189 ifpaddr
= ifa
->ifa_addr
;
1190 len
= rt_msg2(RTM_IFINFO
, &info
, (caddr_t
)0, w
);
1192 if (w
->w_req
&& w
->w_tmem
) {
1193 struct if_msghdr
*ifm
;
1195 ifm
= (struct if_msghdr
*)w
->w_tmem
;
1196 ifm
->ifm_index
= ifp
->if_index
;
1197 ifm
->ifm_flags
= (u_short
)ifp
->if_flags
;
1198 if_data_internal_to_if_data(ifp
, &ifp
->if_data
, &ifm
->ifm_data
);
1199 ifm
->ifm_addrs
= info
.rti_addrs
;
1200 error
= SYSCTL_OUT(w
->w_req
,(caddr_t
)ifm
, len
);
1202 ifnet_lock_done(ifp
);
1206 while ((ifa
= ifa
->ifa_link
.tqe_next
) != 0) {
1207 if (af
&& af
!= ifa
->ifa_addr
->sa_family
)
1209 ifaaddr
= ifa
->ifa_addr
;
1210 netmask
= ifa
->ifa_netmask
;
1211 brdaddr
= ifa
->ifa_dstaddr
;
1212 len
= rt_msg2(RTM_NEWADDR
, &info
, 0, w
);
1213 if (w
->w_req
&& w
->w_tmem
) {
1214 struct ifa_msghdr
*ifam
;
1216 ifam
= (struct ifa_msghdr
*)w
->w_tmem
;
1217 ifam
->ifam_index
= ifa
->ifa_ifp
->if_index
;
1218 ifam
->ifam_flags
= ifa
->ifa_flags
;
1219 ifam
->ifam_metric
= ifa
->ifa_metric
;
1220 ifam
->ifam_addrs
= info
.rti_addrs
;
1221 error
= SYSCTL_OUT(w
->w_req
, w
->w_tmem
, len
);
1226 ifnet_lock_done(ifp
);
1227 ifaaddr
= netmask
= brdaddr
= 0;
1240 struct rt_addrinfo info
;
1243 bzero((caddr_t
)&info
, sizeof(info
));
1244 ifnet_head_lock_shared();
1245 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1248 if (w
->w_arg
&& w
->w_arg
!= ifp
->if_index
)
1250 ifnet_lock_shared(ifp
);
1251 ifa
= ifp
->if_addrhead
.tqh_first
;
1252 ifpaddr
= ifa
->ifa_addr
;
1253 len
= rt_msg2(RTM_IFINFO2
, &info
, (caddr_t
)0, w
);
1255 if (w
->w_req
&& w
->w_tmem
) {
1256 struct if_msghdr2
*ifm
;
1258 ifm
= (struct if_msghdr2
*)w
->w_tmem
;
1259 ifm
->ifm_addrs
= info
.rti_addrs
;
1260 ifm
->ifm_flags
= (u_short
)ifp
->if_flags
;
1261 ifm
->ifm_index
= ifp
->if_index
;
1262 ifm
->ifm_snd_len
= ifp
->if_snd
.ifq_len
;
1263 ifm
->ifm_snd_maxlen
= ifp
->if_snd
.ifq_maxlen
;
1264 ifm
->ifm_snd_drops
= ifp
->if_snd
.ifq_drops
;
1265 ifm
->ifm_timer
= ifp
->if_timer
;
1266 if_data_internal_to_if_data64(ifp
, &ifp
->if_data
, &ifm
->ifm_data
);
1267 error
= SYSCTL_OUT(w
->w_req
, w
->w_tmem
, len
);
1269 ifnet_lock_done(ifp
);
1273 while ((ifa
= ifa
->ifa_link
.tqe_next
) != 0) {
1274 if (af
&& af
!= ifa
->ifa_addr
->sa_family
)
1276 ifaaddr
= ifa
->ifa_addr
;
1277 netmask
= ifa
->ifa_netmask
;
1278 brdaddr
= ifa
->ifa_dstaddr
;
1279 len
= rt_msg2(RTM_NEWADDR
, &info
, 0, w
);
1280 if (w
->w_req
&& w
->w_tmem
) {
1281 struct ifa_msghdr
*ifam
;
1283 ifam
= (struct ifa_msghdr
*)w
->w_tmem
;
1284 ifam
->ifam_index
= ifa
->ifa_ifp
->if_index
;
1285 ifam
->ifam_flags
= ifa
->ifa_flags
;
1286 ifam
->ifam_metric
= ifa
->ifa_metric
;
1287 ifam
->ifam_addrs
= info
.rti_addrs
;
1288 error
= SYSCTL_OUT(w
->w_req
, w
->w_tmem
, len
);
1294 ifnet_lock_done(ifp
);
1298 struct ifmultiaddr
*ifma
;
1300 for (ifma
= ifp
->if_multiaddrs
.lh_first
; ifma
;
1301 ifma
= ifma
->ifma_link
.le_next
) {
1302 if (af
&& af
!= ifma
->ifma_addr
->sa_family
)
1304 bzero((caddr_t
)&info
, sizeof(info
));
1305 ifaaddr
= ifma
->ifma_addr
;
1306 if (ifp
->if_addrhead
.tqh_first
)
1307 ifpaddr
= ifp
->if_addrhead
.tqh_first
->ifa_addr
;
1309 gate
= ifma
->ifma_ll
->ifma_addr
;
1310 len
= rt_msg2(RTM_NEWMADDR2
, &info
, 0, w
);
1311 if (w
->w_req
&& w
->w_tmem
) {
1312 struct ifma_msghdr2
*ifmam
;
1314 ifmam
= (struct ifma_msghdr2
*)w
->w_tmem
;
1315 ifmam
->ifmam_addrs
= info
.rti_addrs
;
1316 ifmam
->ifmam_flags
= 0;
1317 ifmam
->ifmam_index
= ifma
->ifma_ifp
->if_index
;
1318 ifmam
->ifmam_refcount
= ifma
->ifma_refcount
;
1319 error
= SYSCTL_OUT(w
->w_req
, w
->w_tmem
, len
);
1325 ifnet_lock_done(ifp
);
1326 ifaaddr
= netmask
= brdaddr
= 0;
1334 sysctl_rtstat(struct sysctl_req
*req
)
1338 error
= SYSCTL_OUT(req
, &rtstat
, sizeof(struct rtstat
));
1346 sysctl_rttrash(struct sysctl_req
*req
)
1350 error
= SYSCTL_OUT(req
, &rttrash
, sizeof(rttrash
));
1359 sysctl_rtsock SYSCTL_HANDLER_ARGS
1361 #pragma unused(oidp)
1362 int *name
= (int *)arg1
;
1363 u_int namelen
= arg2
;
1364 struct radix_node_head
*rnh
;
1365 int i
, error
= EINVAL
;
1376 Bzero(&w
, sizeof(w
));
1386 lck_mtx_lock(rt_mtx
);
1387 for (i
= 1; i
<= AF_MAX
; i
++)
1388 if ((rnh
= rt_tables
[i
]) && (af
== 0 || af
== i
) &&
1389 (error
= rnh
->rnh_walktree(rnh
,
1390 sysctl_dumpentry
, &w
)))
1392 lck_mtx_unlock(rt_mtx
);
1395 error
= sysctl_iflist(af
, &w
);
1397 case NET_RT_IFLIST2
:
1398 error
= sysctl_iflist2(af
, &w
);
1401 error
= sysctl_rtstat(req
);
1404 error
= sysctl_rttrash(req
);
1408 FREE(w
.w_tmem
, M_RTABLE
);
1412 SYSCTL_NODE(_net
, PF_ROUTE
, routetable
, CTLFLAG_RD
, sysctl_rtsock
, "");
1415 * Definitions of protocols supported in the ROUTE domain.
1418 struct domain routedomain
; /* or at least forward */
1420 static struct protosw routesw
[] = {
1421 { SOCK_RAW
, &routedomain
, 0, PR_ATOMIC
|PR_ADDR
,
1422 0, route_output
, raw_ctlinput
, 0,
1432 struct domain routedomain
=
1433 { PF_ROUTE
, "route", route_init
, 0, 0,
1435 NULL
, NULL
, 0, 0, 0, 0, NULL
, 0,