2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1988, 1991, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * @(#)rtsock.c 8.5 (Berkeley) 11/2/94
64 #include <sys/param.h>
65 #include <sys/systm.h>
66 #include <sys/kernel.h>
67 #include <sys/sysctl.h>
69 #include <sys/malloc.h>
71 #include <sys/socket.h>
72 #include <sys/socketvar.h>
73 #include <sys/domain.h>
74 #include <sys/protosw.h>
75 #include <sys/syslog.h>
76 #include <sys/mcache.h>
77 #include <kern/lock.h>
80 #include <net/route.h>
82 #include <net/raw_cb.h>
83 #include <netinet/in.h>
84 #include <netinet/in_var.h>
85 #include <netinet/in_arp.h>
86 #include <netinet6/nd6.h>
88 #include <machine/spl.h>
90 extern struct rtstat rtstat
;
91 extern int check_routeselfref
;
92 extern struct domain routedomain
;
94 MALLOC_DEFINE(M_RTABLE
, "routetbl", "routing tables");
96 static struct sockaddr route_dst
= { 2, PF_ROUTE
, { 0, } };
97 static struct sockaddr route_src
= { 2, PF_ROUTE
, { 0, } };
98 static struct sockaddr sa_zero
= { sizeof(sa_zero
), AF_INET
, { 0, } };
104 struct sysctl_req
*w_req
;
107 static struct mbuf
*rt_msg1(int, struct rt_addrinfo
*);
108 static int rt_msg2(int, struct rt_addrinfo
*, caddr_t
, struct walkarg
*);
109 static int rt_xaddrs(caddr_t
, caddr_t
, struct rt_addrinfo
*);
110 static int sysctl_dumpentry(struct radix_node
*rn
, void *vw
);
111 static int sysctl_dumpentry_ext(struct radix_node
*rn
, void *vw
);
112 static int sysctl_iflist(int af
, struct walkarg
*w
);
113 static int sysctl_iflist2(int af
, struct walkarg
*w
);
114 static int route_output(struct mbuf
*, struct socket
*);
115 static void rt_setmetrics(u_int32_t
, struct rt_metrics
*, struct rtentry
*);
116 static void rt_getmetrics(struct rtentry
*, struct rt_metrics
*);
117 static void rt_setif(struct rtentry
*, struct sockaddr
*, struct sockaddr
*,
118 struct sockaddr
*, unsigned int);
119 static void rt_drainall(void);
121 #define SIN(sa) ((struct sockaddr_in *)(size_t)(sa))
124 SYSCTL_NODE(_net
, OID_AUTO
, idle
, CTLFLAG_RW
, 0, "idle network monitoring");
126 static struct timeval last_ts
;
128 SYSCTL_NODE(_net_idle
, OID_AUTO
, route
, CTLFLAG_RW
, 0, "idle route monitoring");
130 static int rt_if_idle_drain_interval
= RT_IF_IDLE_DRAIN_INTERVAL
;
131 SYSCTL_INT(_net_idle_route
, OID_AUTO
, drain_interval
, CTLFLAG_RW
,
132 &rt_if_idle_drain_interval
, 0, "Default interval for draining "
133 "routes when doing interface idle reference counting.");
136 * This macro calculates skew in wall clock, just in case the user changes the
137 * system time. This skew adjustment is required because we now keep the route
138 * expiration times in uptime terms in the kernel, but the userland still
139 * expects expiration times in terms of calendar times.
141 #define CALCULATE_CLOCKSKEW(cc, ic, cu, iu)\
142 ((cc.tv_sec - ic) - (cu - iu))
145 * It really doesn't make any sense at all for this code to share much
146 * with raw_usrreq.c, since its functionality is so restricted. XXX
149 rts_abort(struct socket
*so
)
153 error
= raw_usrreqs
.pru_abort(so
);
157 /* pru_accept is EOPNOTSUPP */
160 rts_attach(struct socket
*so
, int proto
, __unused
struct proc
*p
)
165 if (sotorawcb(so
) != 0)
166 return EISCONN
; /* XXX panic? */
167 MALLOC(rp
, struct rawcb
*, sizeof *rp
, M_PCB
, M_WAITOK
); /* XXX */
170 bzero(rp
, sizeof *rp
);
173 * The splnet() is necessary to block protocols from sending
174 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
175 * this PCB is extant but incompletely initialized.
176 * Probably we should try to do more of this work beforehand and
179 so
->so_pcb
= (caddr_t
)rp
;
180 error
= raw_attach(so
, proto
); /* don't use raw_usrreqs.pru_attach, it checks for SS_PRIV */
185 so
->so_flags
|= SOF_PCBCLEARING
;
189 switch(rp
->rcb_proto
.sp_protocol
) {
190 //####LD route_cb needs looking
195 route_cb
.ip6_count
++;
198 route_cb
.ipx_count
++;
204 rp
->rcb_faddr
= &route_src
;
205 route_cb
.any_count
++;
206 /* the socket is already locked when we enter rts_attach */
208 so
->so_options
|= SO_USELOOPBACK
;
213 rts_bind(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
216 error
= raw_usrreqs
.pru_bind(so
, nam
, p
); /* xxx just EINVAL */
221 rts_connect(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
224 error
= raw_usrreqs
.pru_connect(so
, nam
, p
); /* XXX just EINVAL */
228 /* pru_connect2 is EOPNOTSUPP */
229 /* pru_control is EOPNOTSUPP */
232 rts_detach(struct socket
*so
)
234 struct rawcb
*rp
= sotorawcb(so
);
238 switch(rp
->rcb_proto
.sp_protocol
) {
243 route_cb
.ip6_count
--;
246 route_cb
.ipx_count
--;
252 route_cb
.any_count
--;
254 error
= raw_usrreqs
.pru_detach(so
);
259 rts_disconnect(struct socket
*so
)
262 error
= raw_usrreqs
.pru_disconnect(so
);
266 /* pru_listen is EOPNOTSUPP */
269 rts_peeraddr(struct socket
*so
, struct sockaddr
**nam
)
272 error
= raw_usrreqs
.pru_peeraddr(so
, nam
);
276 /* pru_rcvd is EOPNOTSUPP */
277 /* pru_rcvoob is EOPNOTSUPP */
280 rts_send(struct socket
*so
, int flags
, struct mbuf
*m
, struct sockaddr
*nam
,
281 struct mbuf
*control
, struct proc
*p
)
284 error
= raw_usrreqs
.pru_send(so
, flags
, m
, nam
, control
, p
);
288 /* pru_sense is null */
291 rts_shutdown(struct socket
*so
)
294 error
= raw_usrreqs
.pru_shutdown(so
);
299 rts_sockaddr(struct socket
*so
, struct sockaddr
**nam
)
302 error
= raw_usrreqs
.pru_sockaddr(so
, nam
);
306 static struct pr_usrreqs route_usrreqs
= {
307 rts_abort
, pru_accept_notsupp
, rts_attach
, rts_bind
,
308 rts_connect
, pru_connect2_notsupp
, pru_control_notsupp
,
309 rts_detach
, rts_disconnect
, pru_listen_notsupp
, rts_peeraddr
,
310 pru_rcvd_notsupp
, pru_rcvoob_notsupp
, rts_send
, pru_sense_null
,
311 rts_shutdown
, rts_sockaddr
, sosend
, soreceive
, pru_sopoll_notsupp
316 route_output(struct mbuf
*m
, struct socket
*so
)
318 struct rt_msghdr
*rtm
= NULL
;
319 struct rtentry
*rt
= NULL
;
320 struct rtentry
*saved_nrt
= NULL
;
321 struct radix_node_head
*rnh
;
322 struct rt_addrinfo info
;
324 sa_family_t dst_sa_family
= 0;
325 struct ifnet
*ifp
= NULL
;
327 struct proc
*curproc
= current_proc();
329 struct sockaddr_in dst_in
, gate_in
;
330 int sendonlytoself
= 0;
331 unsigned int ifscope
= IFSCOPE_NONE
;
333 #define senderr(e) { error = e; goto flush;}
335 ((m
->m_len
< sizeof(intptr_t)) && (m
= m_pullup(m
, sizeof(intptr_t))) == 0))
337 if ((m
->m_flags
& M_PKTHDR
) == 0)
338 panic("route_output");
340 /* unlock the socket (but keep a reference) it won't be accessed until raw_input appends to it. */
341 socket_unlock(so
, 0);
342 lck_mtx_lock(rnh_lock
);
344 len
= m
->m_pkthdr
.len
;
345 if (len
< sizeof(*rtm
) ||
346 len
!= mtod(m
, struct rt_msghdr
*)->rtm_msglen
) {
347 info
.rti_info
[RTAX_DST
] = NULL
;
350 R_Malloc(rtm
, struct rt_msghdr
*, len
);
352 info
.rti_info
[RTAX_DST
] = NULL
;
355 m_copydata(m
, 0, len
, (caddr_t
)rtm
);
356 if (rtm
->rtm_version
!= RTM_VERSION
) {
357 info
.rti_info
[RTAX_DST
] = NULL
;
358 senderr(EPROTONOSUPPORT
);
362 * Silent version of RTM_GET for Reachabiltiy APIs. We may change
363 * all RTM_GETs to be silent in the future, so this is private for now.
365 if (rtm
->rtm_type
== RTM_GET_SILENT
) {
366 if ((so
->so_options
& SO_USELOOPBACK
) == 0)
369 rtm
->rtm_type
= RTM_GET
;
373 * Perform permission checking, only privileged sockets
374 * may perform operations other than RTM_GET
376 if (rtm
->rtm_type
!= RTM_GET
&& (so
->so_state
& SS_PRIV
) == 0) {
377 info
.rti_info
[RTAX_DST
] = NULL
;
381 rtm
->rtm_pid
= proc_selfpid();
382 info
.rti_addrs
= rtm
->rtm_addrs
;
383 if (rt_xaddrs((caddr_t
)(rtm
+ 1), len
+ (caddr_t
)rtm
, &info
)) {
384 info
.rti_info
[RTAX_DST
] = NULL
;
387 if (info
.rti_info
[RTAX_DST
] == NULL
|| (info
.rti_info
[RTAX_DST
]->sa_family
>= AF_MAX
) ||
388 (info
.rti_info
[RTAX_GATEWAY
] != NULL
&& (info
.rti_info
[RTAX_GATEWAY
]->sa_family
>= AF_MAX
))) {
392 if (info
.rti_info
[RTAX_DST
]->sa_family
== AF_INET
&& info
.rti_info
[RTAX_DST
]->sa_len
!= sizeof (dst_in
)) {
393 /* At minimum, we need up to sin_addr */
394 if (info
.rti_info
[RTAX_DST
]->sa_len
< offsetof(struct sockaddr_in
, sin_zero
))
396 bzero(&dst_in
, sizeof (dst_in
));
397 dst_in
.sin_len
= sizeof (dst_in
);
398 dst_in
.sin_family
= AF_INET
;
399 dst_in
.sin_port
= SIN(info
.rti_info
[RTAX_DST
])->sin_port
;
400 dst_in
.sin_addr
= SIN(info
.rti_info
[RTAX_DST
])->sin_addr
;
401 info
.rti_info
[RTAX_DST
] = (struct sockaddr
*)&dst_in
;
402 dst_sa_family
= info
.rti_info
[RTAX_DST
]->sa_family
;
405 if (info
.rti_info
[RTAX_GATEWAY
] != NULL
&&
406 info
.rti_info
[RTAX_GATEWAY
]->sa_family
== AF_INET
&& info
.rti_info
[RTAX_GATEWAY
]->sa_len
!= sizeof (gate_in
)) {
407 /* At minimum, we need up to sin_addr */
408 if (info
.rti_info
[RTAX_GATEWAY
]->sa_len
< offsetof(struct sockaddr_in
, sin_zero
))
410 bzero(&gate_in
, sizeof (gate_in
));
411 gate_in
.sin_len
= sizeof (gate_in
);
412 gate_in
.sin_family
= AF_INET
;
413 gate_in
.sin_port
= SIN(info
.rti_info
[RTAX_GATEWAY
])->sin_port
;
414 gate_in
.sin_addr
= SIN(info
.rti_info
[RTAX_GATEWAY
])->sin_addr
;
415 info
.rti_info
[RTAX_GATEWAY
] = (struct sockaddr
*)&gate_in
;
418 if (info
.rti_info
[RTAX_GENMASK
]) {
419 struct radix_node
*t
;
420 t
= rn_addmask((caddr_t
)info
.rti_info
[RTAX_GENMASK
], 0, 1);
421 if (t
&& Bcmp(info
.rti_info
[RTAX_GENMASK
], t
->rn_key
, *(u_char
*)info
.rti_info
[RTAX_GENMASK
]) == 0)
422 info
.rti_info
[RTAX_GENMASK
] = (struct sockaddr
*)(t
->rn_key
);
428 * If RTF_IFSCOPE flag is set, then rtm_index specifies the scope.
430 if (rtm
->rtm_flags
& RTF_IFSCOPE
) {
431 if (info
.rti_info
[RTAX_DST
]->sa_family
!= AF_INET
&& info
.rti_info
[RTAX_DST
]->sa_family
!= AF_INET6
)
433 ifscope
= rtm
->rtm_index
;
437 * For AF_INET, always zero out the embedded scope ID. If this is
438 * a scoped request, it must be done explicitly by setting RTF_IFSCOPE
439 * flag and the corresponding rtm_index value. This is to prevent
440 * false interpretation of the scope ID because it's using the sin_zero
441 * field, which might not be properly cleared by the requestor.
443 if (info
.rti_info
[RTAX_DST
]->sa_family
== AF_INET
)
444 sin_set_ifscope(info
.rti_info
[RTAX_DST
], IFSCOPE_NONE
);
445 if (info
.rti_info
[RTAX_GATEWAY
] != NULL
&& info
.rti_info
[RTAX_GATEWAY
]->sa_family
== AF_INET
)
446 sin_set_ifscope(info
.rti_info
[RTAX_GATEWAY
], IFSCOPE_NONE
);
448 switch (rtm
->rtm_type
) {
451 if (info
.rti_info
[RTAX_GATEWAY
] == NULL
)
455 /* XXX LD11JUL02 Special case for AOL 5.1.2 connectivity issue to AirPort BS (Radar 2969954)
456 * AOL is adding a circular route ("10.0.1.1/32 10.0.1.1") when establishing its ppp tunnel
457 * to the AP BaseStation by removing the default gateway and replacing it with their tunnel entry point.
458 * There is no apparent reason to add this route as there is a valid 10.0.1.1/24 route to the BS.
459 * That circular route was ignored on previous version of MacOS X because of a routing bug
460 * corrected with the merge to FreeBSD4.4 (a route generated from an RTF_CLONING route had the RTF_WASCLONED
461 * flag set but did not have a reference to the parent route) and that entry was left in the RT. This workaround is
462 * made in order to provide binary compatibility with AOL.
463 * If we catch a process adding a circular route with a /32 from the routing socket, we error it out instead of
464 * confusing the routing table with a wrong route to the previous default gateway
467 #define satosinaddr(sa) (((struct sockaddr_in *)sa)->sin_addr.s_addr)
469 if (check_routeselfref
&& (info
.rti_info
[RTAX_DST
] && info
.rti_info
[RTAX_DST
]->sa_family
== AF_INET
) &&
470 (info
.rti_info
[RTAX_NETMASK
] && satosinaddr(info
.rti_info
[RTAX_NETMASK
]) == INADDR_BROADCAST
) &&
471 (info
.rti_info
[RTAX_GATEWAY
] && satosinaddr(info
.rti_info
[RTAX_DST
]) == satosinaddr(info
.rti_info
[RTAX_GATEWAY
]))) {
472 log(LOG_WARNING
, "route_output: circular route %ld.%ld.%ld.%ld/32 ignored\n",
473 (ntohl(satosinaddr(info
.rti_info
[RTAX_GATEWAY
])>>24))&0xff,
474 (ntohl(satosinaddr(info
.rti_info
[RTAX_GATEWAY
])>>16))&0xff,
475 (ntohl(satosinaddr(info
.rti_info
[RTAX_GATEWAY
])>>8))&0xff,
476 (ntohl(satosinaddr(info
.rti_info
[RTAX_GATEWAY
])))&0xff);
482 error
= rtrequest_scoped_locked(RTM_ADD
, info
.rti_info
[RTAX_DST
], info
.rti_info
[RTAX_GATEWAY
],
483 info
.rti_info
[RTAX_NETMASK
], rtm
->rtm_flags
, &saved_nrt
, ifscope
);
484 if (error
== 0 && saved_nrt
) {
488 * If the route request specified an interface with
489 * IFA and/or IFP, we set the requested interface on
490 * the route with rt_setif. It would be much better
491 * to do this inside rtrequest, but that would
492 * require passing the desired interface, in some
493 * form, to rtrequest. Since rtrequest is called in
494 * so many places (roughly 40 in our source), adding
495 * a parameter is to much for us to swallow; this is
496 * something for the FreeBSD developers to tackle.
497 * Instead, we let rtrequest compute whatever
498 * interface it wants, then come in behind it and
499 * stick in the interface that we really want. This
500 * works reasonably well except when rtrequest can't
501 * figure out what interface to use (with
502 * ifa_withroute) and returns ENETUNREACH. Ideally
503 * it shouldn't matter if rtrequest can't figure out
504 * the interface if we're going to explicitly set it
505 * ourselves anyway. But practically we can't
506 * recover here because rtrequest will not do any of
507 * the work necessary to add the route if it can't
508 * find an interface. As long as there is a default
509 * route that leads to some interface, rtrequest will
510 * find an interface, so this problem should be
511 * rarely encountered.
515 rt_setif(saved_nrt
, info
.rti_info
[RTAX_IFP
], info
.rti_info
[RTAX_IFA
], info
.rti_info
[RTAX_GATEWAY
],
518 rt_setmetrics(rtm
->rtm_inits
,
519 &rtm
->rtm_rmx
, saved_nrt
);
520 saved_nrt
->rt_rmx
.rmx_locks
&= ~(rtm
->rtm_inits
);
521 saved_nrt
->rt_rmx
.rmx_locks
|=
522 (rtm
->rtm_inits
& rtm
->rtm_rmx
.rmx_locks
);
523 saved_nrt
->rt_genmask
= info
.rti_info
[RTAX_GENMASK
];
524 RT_REMREF_LOCKED(saved_nrt
);
525 RT_UNLOCK(saved_nrt
);
530 error
= rtrequest_scoped_locked(RTM_DELETE
, info
.rti_info
[RTAX_DST
],
531 info
.rti_info
[RTAX_GATEWAY
], info
.rti_info
[RTAX_NETMASK
], rtm
->rtm_flags
, &saved_nrt
, ifscope
);
542 if ((rnh
= rt_tables
[info
.rti_info
[RTAX_DST
]->sa_family
]) == NULL
)
543 senderr(EAFNOSUPPORT
);
546 * Lookup the best match based on the key-mask pair;
547 * callee adds a reference and checks for root node.
549 rt
= rt_lookup(TRUE
, info
.rti_info
[RTAX_DST
], info
.rti_info
[RTAX_NETMASK
], rnh
, ifscope
);
555 * Holding rnh_lock here prevents the possibility of
556 * ifa from changing (e.g. in_ifinit), so it is safe
557 * to access its ifa_addr (down below) without locking.
559 switch(rtm
->rtm_type
) {
565 RT_LOCK_ASSERT_HELD(rt
);
566 info
.rti_info
[RTAX_DST
] = rt_key(rt
);
567 dst_sa_family
= info
.rti_info
[RTAX_DST
]->sa_family
;
568 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
569 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
570 info
.rti_info
[RTAX_GENMASK
] = rt
->rt_genmask
;
571 if (rtm
->rtm_addrs
& (RTA_IFP
| RTA_IFA
)) {
574 ifnet_lock_shared(ifp
);
575 ifa2
= ifp
->if_lladdr
;
576 info
.rti_info
[RTAX_IFP
] = ifa2
->ifa_addr
;
578 ifnet_lock_done(ifp
);
579 info
.rti_info
[RTAX_IFA
] = rt
->rt_ifa
->ifa_addr
;
580 rtm
->rtm_index
= ifp
->if_index
;
582 info
.rti_info
[RTAX_IFP
] = NULL
;
583 info
.rti_info
[RTAX_IFA
] = NULL
;
585 } else if ((ifp
= rt
->rt_ifp
) != NULL
) {
586 rtm
->rtm_index
= ifp
->if_index
;
590 len
= rt_msg2(rtm
->rtm_type
, &info
, (caddr_t
)0,
591 (struct walkarg
*)0);
594 if (len
> rtm
->rtm_msglen
) {
595 struct rt_msghdr
*new_rtm
;
596 R_Malloc(new_rtm
, struct rt_msghdr
*, len
);
603 Bcopy(rtm
, new_rtm
, rtm
->rtm_msglen
);
604 R_Free(rtm
); rtm
= new_rtm
;
608 (void)rt_msg2(rtm
->rtm_type
, &info
, (caddr_t
)rtm
,
609 (struct walkarg
*)0);
612 rtm
->rtm_flags
= rt
->rt_flags
;
613 rt_getmetrics(rt
, &rtm
->rtm_rmx
);
614 rtm
->rtm_addrs
= info
.rti_addrs
;
621 if (info
.rti_info
[RTAX_GATEWAY
] && (error
= rt_setgate(rt
,
622 rt_key(rt
), info
.rti_info
[RTAX_GATEWAY
]))) {
627 * If they tried to change things but didn't specify
628 * the required gateway, then just use the old one.
629 * This can happen if the user tries to change the
630 * flags on the default route without changing the
631 * default gateway. Changing flags still doesn't work.
633 if ((rt
->rt_flags
& RTF_GATEWAY
) && !info
.rti_info
[RTAX_GATEWAY
])
634 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
638 * On Darwin, we call rt_setif which contains the
639 * equivalent to the code found at this very spot
642 rt_setif(rt
, info
.rti_info
[RTAX_IFP
], info
.rti_info
[RTAX_IFA
], info
.rti_info
[RTAX_GATEWAY
],
646 rt_setmetrics(rtm
->rtm_inits
, &rtm
->rtm_rmx
,
649 /* rt_setif, called above does this for us on darwin */
650 if (rt
->rt_ifa
&& rt
->rt_ifa
->ifa_rtrequest
)
651 rt
->rt_ifa
->ifa_rtrequest(RTM_ADD
, rt
, info
.rti_info
[RTAX_GATEWAY
]);
653 if (info
.rti_info
[RTAX_GENMASK
])
654 rt
->rt_genmask
= info
.rti_info
[RTAX_GENMASK
];
659 rt
->rt_rmx
.rmx_locks
&= ~(rtm
->rtm_inits
);
660 rt
->rt_rmx
.rmx_locks
|=
661 (rtm
->rtm_inits
& rtm
->rtm_rmx
.rmx_locks
);
673 rtm
->rtm_errno
= error
;
675 rtm
->rtm_flags
|= RTF_DONE
;
678 RT_LOCK_ASSERT_NOTHELD(rt
);
681 lck_mtx_unlock(rnh_lock
);
682 socket_lock(so
, 0); /* relock the socket now */
684 struct rawcb
*rp
= 0;
686 * Check to see if we don't want our own messages.
688 if ((so
->so_options
& SO_USELOOPBACK
) == 0) {
689 if (route_cb
.any_count
<= 1) {
695 /* There is another listener, so construct message */
699 m_copyback(m
, 0, rtm
->rtm_msglen
, (caddr_t
)rtm
);
700 if (m
->m_pkthdr
.len
< rtm
->rtm_msglen
) {
703 } else if (m
->m_pkthdr
.len
> rtm
->rtm_msglen
)
704 m_adj(m
, rtm
->rtm_msglen
- m
->m_pkthdr
.len
);
707 if (sendonlytoself
&& m
) {
709 if (sbappendaddr(&so
->so_rcv
, &route_src
, m
, (struct mbuf
*)0, &error
) != 0) {
715 struct sockproto route_proto
= {PF_ROUTE
, 0};
717 rp
->rcb_proto
.sp_family
= 0; /* Avoid us */
718 if (dst_sa_family
!= 0)
719 route_proto
.sp_protocol
= dst_sa_family
;
721 socket_unlock(so
, 0);
722 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
726 rp
->rcb_proto
.sp_family
= PF_ROUTE
;
733 rt_setexpire(struct rtentry
*rt
, uint64_t expiry
)
735 /* set both rt_expire and rmx_expire */
736 rt
->rt_expire
= expiry
;
738 rt
->rt_rmx
.rmx_expire
= expiry
+ rt
->base_calendartime
-
741 rt
->rt_rmx
.rmx_expire
= 0;
745 rt_setmetrics(u_int32_t which
, struct rt_metrics
*in
, struct rtentry
*out
)
747 struct timeval curr_calendar_time
;
748 uint64_t curr_uptime
;
750 getmicrotime(&curr_calendar_time
);
751 curr_uptime
= net_uptime();
753 #define metric(f, e) if (which & (f)) out->rt_rmx.e = in->e;
754 metric(RTV_RPIPE
, rmx_recvpipe
);
755 metric(RTV_SPIPE
, rmx_sendpipe
);
756 metric(RTV_SSTHRESH
, rmx_ssthresh
);
757 metric(RTV_RTT
, rmx_rtt
);
758 metric(RTV_RTTVAR
, rmx_rttvar
);
759 metric(RTV_HOPCOUNT
, rmx_hopcount
);
760 metric(RTV_MTU
, rmx_mtu
);
761 metric(RTV_EXPIRE
, rmx_expire
);
764 if (out
->rt_rmx
.rmx_expire
> 0) {
765 /* account for system time change */
766 curr_uptime
= net_uptime();
767 getmicrotime(&curr_calendar_time
);
768 out
->base_calendartime
+=
769 CALCULATE_CLOCKSKEW(curr_calendar_time
,
770 out
->base_calendartime
,
771 curr_uptime
, out
->base_uptime
);
773 out
->rt_rmx
.rmx_expire
-
774 out
->base_calendartime
+
777 rt_setexpire(out
, 0);
780 VERIFY(out
->rt_expire
== 0 || out
->rt_rmx
.rmx_expire
!= 0);
781 VERIFY(out
->rt_expire
!= 0 || out
->rt_rmx
.rmx_expire
== 0);
785 rt_getmetrics(struct rtentry
*in
, struct rt_metrics
*out
)
787 struct timeval curr_calendar_time
;
788 uint64_t curr_uptime
;
790 VERIFY(in
->rt_expire
== 0 || in
->rt_rmx
.rmx_expire
!= 0);
791 VERIFY(in
->rt_expire
!= 0 || in
->rt_rmx
.rmx_expire
== 0);
796 /* account for system time change */
797 getmicrotime(&curr_calendar_time
);
798 curr_uptime
= net_uptime();
800 in
->base_calendartime
+=
801 CALCULATE_CLOCKSKEW(curr_calendar_time
,
802 in
->base_calendartime
,
803 curr_uptime
, in
->base_uptime
);
805 out
->rmx_expire
= in
->base_calendartime
+
806 in
->rt_expire
- in
->base_uptime
;
812 * Set route's interface given info.rti_info[RTAX_IFP], info.rti_info[RTAX_IFA], and gateway.
815 rt_setif(struct rtentry
*rt
, struct sockaddr
*Ifpaddr
, struct sockaddr
*Ifaaddr
,
816 struct sockaddr
*Gate
, unsigned int ifscope
)
818 struct ifaddr
*ifa
= NULL
;
819 struct ifnet
*ifp
= NULL
;
820 void (*ifa_rtrequest
)
821 (int, struct rtentry
*, struct sockaddr
*);
823 lck_mtx_assert(rnh_lock
, LCK_MTX_ASSERT_OWNED
);
825 RT_LOCK_ASSERT_HELD(rt
);
827 /* trigger route cache reevaluation */
831 /* Don't update a defunct route */
832 if (rt
->rt_flags
& RTF_CONDEMNED
)
835 /* Add an extra ref for ourselves */
836 RT_ADDREF_LOCKED(rt
);
838 /* Become a regular mutex, just in case */
842 * New gateway could require new ifaddr, ifp; flags may also
843 * be different; ifp may be specified by ll sockaddr when
844 * protocol address is ambiguous.
846 if (Ifpaddr
&& (ifa
= ifa_ifwithnet_scoped(Ifpaddr
, ifscope
)) &&
847 (ifp
= ifa
->ifa_ifp
) && (Ifaaddr
|| Gate
)) {
849 ifa
= ifaof_ifpforaddr(Ifaaddr
? Ifaaddr
: Gate
, ifp
);
855 if (Ifpaddr
&& (ifp
= if_withname(Ifpaddr
)) ) {
857 ifa
= ifaof_ifpforaddr(Gate
, ifp
);
859 ifnet_lock_shared(ifp
);
860 ifa
= TAILQ_FIRST(&ifp
->if_addrhead
);
863 ifnet_lock_done(ifp
);
865 } else if (Ifaaddr
&&
866 (ifa
= ifa_ifwithaddr_scoped(Ifaaddr
, ifscope
))) {
868 } else if (Gate
!= NULL
) {
870 * Safe to drop rt_lock and use rt_key, since holding
871 * rnh_lock here prevents another thread from calling
872 * rt_setgate() on this route. We cannot hold the
873 * lock across ifa_ifwithroute since the lookup done
874 * by that routine may point to the same route.
877 if ((ifa
= ifa_ifwithroute_scoped_locked(rt
->rt_flags
,
878 rt_key(rt
), Gate
, ifscope
)) != NULL
)
881 /* Don't update a defunct route */
882 if (rt
->rt_flags
& RTF_CONDEMNED
) {
885 /* Release extra ref */
886 RT_REMREF_LOCKED(rt
);
892 struct ifaddr
*oifa
= rt
->rt_ifa
;
896 ifa_rtrequest
= oifa
->ifa_rtrequest
;
898 if (ifa_rtrequest
!= NULL
)
899 ifa_rtrequest(RTM_DELETE
, rt
, Gate
);
903 if (rt
->rt_ifp
!= ifp
) {
905 * Purge any link-layer info caching.
907 if (rt
->rt_llinfo_purge
!= NULL
)
908 rt
->rt_llinfo_purge(rt
);
911 * Adjust route ref count for the interfaces.
913 if (rt
->rt_if_ref_fn
!= NULL
) {
914 rt
->rt_if_ref_fn(ifp
, 1);
915 rt
->rt_if_ref_fn(rt
->rt_ifp
, -1);
920 * If this is the (non-scoped) default route, record
921 * the interface index used for the primary ifscope.
923 if (rt_primary_default(rt
, rt_key(rt
))) {
924 set_primary_ifscope(rt_key(rt
)->sa_family
,
925 rt
->rt_ifp
->if_index
);
927 rt
->rt_rmx
.rmx_mtu
= ifp
->if_mtu
;
928 if (rt
->rt_ifa
!= NULL
) {
929 IFA_LOCK_SPIN(rt
->rt_ifa
);
930 ifa_rtrequest
= rt
->rt_ifa
->ifa_rtrequest
;
931 IFA_UNLOCK(rt
->rt_ifa
);
932 if (ifa_rtrequest
!= NULL
)
933 ifa_rtrequest(RTM_ADD
, rt
, Gate
);
936 /* Release extra ref */
937 RT_REMREF_LOCKED(rt
);
943 /* XXX: to reset gateway to correct value, at RTM_CHANGE */
944 if (rt
->rt_ifa
!= NULL
) {
945 IFA_LOCK_SPIN(rt
->rt_ifa
);
946 ifa_rtrequest
= rt
->rt_ifa
->ifa_rtrequest
;
947 IFA_UNLOCK(rt
->rt_ifa
);
948 if (ifa_rtrequest
!= NULL
)
949 ifa_rtrequest(RTM_ADD
, rt
, Gate
);
952 /* Release extra ref */
953 RT_REMREF_LOCKED(rt
);
956 #define ROUNDUP32(a) \
957 ((a) > 0 ? (1 + (((a) - 1) | (sizeof(uint32_t) - 1))) : sizeof(uint32_t))
958 #define ADVANCE32(x, n) (x += ROUNDUP32((n)->sa_len))
962 * Extract the addresses of the passed sockaddrs.
963 * Do a little sanity checking so as to avoid bad memory references.
964 * This data is derived straight from userland.
967 rt_xaddrs(caddr_t cp
, caddr_t cplim
, struct rt_addrinfo
*rtinfo
)
972 bzero(rtinfo
->rti_info
, sizeof(rtinfo
->rti_info
));
973 for (i
= 0; (i
< RTAX_MAX
) && (cp
< cplim
); i
++) {
974 if ((rtinfo
->rti_addrs
& (1 << i
)) == 0)
976 sa
= (struct sockaddr
*)cp
;
980 if ( (cp
+ sa
->sa_len
) > cplim
) {
985 * there are no more.. quit now
986 * If there are more bits, they are in error.
987 * I've seen this. route(1) can evidently generate these.
988 * This causes kernel to core dump.
989 * for compatibility, If we see this, point to a safe address.
991 if (sa
->sa_len
== 0) {
992 rtinfo
->rti_info
[i
] = &sa_zero
;
993 return (0); /* should be EINVAL but for compat */
997 rtinfo
->rti_info
[i
] = sa
;
1003 static struct mbuf
*
1004 rt_msg1(int type
, struct rt_addrinfo
*rtinfo
)
1006 struct rt_msghdr
*rtm
;
1015 len
= sizeof(struct ifa_msghdr
);
1020 len
= sizeof(struct ifma_msghdr
);
1024 len
= sizeof(struct if_msghdr
);
1028 len
= sizeof(struct rt_msghdr
);
1032 m
= m_gethdr(M_DONTWAIT
, MT_DATA
);
1033 if (m
&& len
> MHLEN
) {
1034 MCLGET(m
, M_DONTWAIT
);
1035 if ((m
->m_flags
& M_EXT
) == 0) {
1042 m
->m_pkthdr
.len
= m
->m_len
= len
;
1043 m
->m_pkthdr
.rcvif
= 0;
1044 rtm
= mtod(m
, struct rt_msghdr
*);
1045 bzero((caddr_t
)rtm
, len
);
1046 for (i
= 0; i
< RTAX_MAX
; i
++) {
1047 struct sockaddr
*sa
, *hint
;
1048 struct sockaddr_storage ss
;
1050 if ((sa
= rtinfo
->rti_info
[i
]) == NULL
)
1056 if ((hint
= rtinfo
->rti_info
[RTAX_DST
]) == NULL
)
1057 hint
= rtinfo
->rti_info
[RTAX_IFA
];
1059 /* Scrub away any trace of embedded interface scope */
1060 sa
= rtm_scrub_ifscope(type
, i
, hint
, sa
, &ss
);
1067 rtinfo
->rti_addrs
|= (1 << i
);
1068 dlen
= ROUNDUP32(sa
->sa_len
);
1069 m_copyback(m
, len
, dlen
, (caddr_t
)sa
);
1072 if (m
->m_pkthdr
.len
!= len
) {
1076 rtm
->rtm_msglen
= len
;
1077 rtm
->rtm_version
= RTM_VERSION
;
1078 rtm
->rtm_type
= type
;
1083 rt_msg2(int type
, struct rt_addrinfo
*rtinfo
, caddr_t cp
, struct walkarg
*w
)
1086 int len
, dlen
, second_time
= 0;
1089 rtinfo
->rti_addrs
= 0;
1095 len
= sizeof(struct ifa_msghdr
);
1100 len
= sizeof(struct ifma_msghdr
);
1104 len
= sizeof(struct if_msghdr
);
1108 len
= sizeof(struct if_msghdr2
);
1112 len
= sizeof(struct ifma_msghdr2
);
1116 len
= sizeof (struct rt_msghdr_ext
);
1120 len
= sizeof(struct rt_msghdr2
);
1124 len
= sizeof(struct rt_msghdr
);
1129 for (i
= 0; i
< RTAX_MAX
; i
++) {
1130 struct sockaddr
*sa
, *hint
;
1131 struct sockaddr_storage ss
;
1133 if ((sa
= rtinfo
->rti_info
[i
]) == 0)
1139 if ((hint
= rtinfo
->rti_info
[RTAX_DST
]) == NULL
)
1140 hint
= rtinfo
->rti_info
[RTAX_IFA
];
1142 /* Scrub away any trace of embedded interface scope */
1143 sa
= rtm_scrub_ifscope(type
, i
, hint
, sa
, &ss
);
1150 rtinfo
->rti_addrs
|= (1 << i
);
1151 dlen
= ROUNDUP32(sa
->sa_len
);
1153 bcopy((caddr_t
)sa
, cp
, (unsigned)dlen
);
1158 if (cp
== 0 && w
!= NULL
&& !second_time
) {
1159 struct walkarg
*rw
= w
;
1162 if (rw
->w_tmemsize
< len
) {
1164 FREE(rw
->w_tmem
, M_RTABLE
);
1165 rw
->w_tmem
= (caddr_t
)
1166 _MALLOC(len
, M_RTABLE
, M_WAITOK
); /*###LD0412 was NOWAIT */
1168 rw
->w_tmemsize
= len
;
1178 struct rt_msghdr
*rtm
= (struct rt_msghdr
*)cp0
;
1180 rtm
->rtm_version
= RTM_VERSION
;
1181 rtm
->rtm_type
= type
;
1182 rtm
->rtm_msglen
= len
;
1188 * This routine is called to generate a message from the routing
1189 * socket indicating that a redirect has occurred, a routing lookup
1190 * has failed, or that a protocol has detected timeouts to a particular
1194 rt_missmsg(int type
, struct rt_addrinfo
*rtinfo
, int flags
, int error
)
1196 struct rt_msghdr
*rtm
;
1198 struct sockaddr
*sa
= rtinfo
->rti_info
[RTAX_DST
];
1199 struct sockproto route_proto
= {PF_ROUTE
, 0};
1201 if (route_cb
.any_count
== 0)
1203 m
= rt_msg1(type
, rtinfo
);
1206 rtm
= mtod(m
, struct rt_msghdr
*);
1207 rtm
->rtm_flags
= RTF_DONE
| flags
;
1208 rtm
->rtm_errno
= error
;
1209 rtm
->rtm_addrs
= rtinfo
->rti_addrs
;
1210 route_proto
.sp_family
= sa
? sa
->sa_family
: 0;
1211 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
1215 * This routine is called to generate a message from the routing
1216 * socket indicating that the status of a network interface has changed.
1222 struct if_msghdr
*ifm
;
1224 struct rt_addrinfo info
;
1225 struct sockproto route_proto
= {PF_ROUTE
, 0};
1227 if (route_cb
.any_count
== 0)
1229 bzero((caddr_t
)&info
, sizeof(info
));
1230 m
= rt_msg1(RTM_IFINFO
, &info
);
1233 ifm
= mtod(m
, struct if_msghdr
*);
1234 ifm
->ifm_index
= ifp
->if_index
;
1235 ifm
->ifm_flags
= (u_short
)ifp
->if_flags
;
1236 if_data_internal_to_if_data(ifp
, &ifp
->if_data
, &ifm
->ifm_data
);
1238 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
1242 * This is called to generate messages from the routing socket
1243 * indicating a network interface has had addresses associated with it.
1244 * if we ever reverse the logic and replace messages TO the routing
1245 * socket indicate a request to configure interfaces, then it will
1246 * be unnecessary as the routing socket will automatically generate
1249 * Since this is coming from the interface, it is expected that the
1250 * interface will be locked. Caller must hold rnh_lock and rt_lock.
1253 rt_newaddrmsg(int cmd
, struct ifaddr
*ifa
, int error
, struct rtentry
*rt
)
1255 struct rt_addrinfo info
;
1256 struct sockaddr
*sa
= 0;
1259 struct ifnet
*ifp
= ifa
->ifa_ifp
;
1260 struct sockproto route_proto
= {PF_ROUTE
, 0};
1262 lck_mtx_assert(rnh_lock
, LCK_MTX_ASSERT_OWNED
);
1263 RT_LOCK_ASSERT_HELD(rt
);
1265 if (route_cb
.any_count
== 0)
1268 /* Become a regular mutex, just in case */
1269 RT_CONVERT_LOCK(rt
);
1270 for (pass
= 1; pass
< 3; pass
++) {
1271 bzero((caddr_t
)&info
, sizeof(info
));
1272 if ((cmd
== RTM_ADD
&& pass
== 1) ||
1273 (cmd
== RTM_DELETE
&& pass
== 2)) {
1274 struct ifa_msghdr
*ifam
;
1275 int ncmd
= cmd
== RTM_ADD
? RTM_NEWADDR
: RTM_DELADDR
;
1277 /* Lock ifp for if_lladdr */
1278 ifnet_lock_shared(ifp
);
1280 info
.rti_info
[RTAX_IFA
] = sa
= ifa
->ifa_addr
;
1282 * Holding ifnet lock here prevents the link address
1283 * from changing contents, so no need to hold its
1284 * lock. The link address is always present; it's
1287 info
.rti_info
[RTAX_IFP
] = ifp
->if_lladdr
->ifa_addr
;
1288 info
.rti_info
[RTAX_NETMASK
] = ifa
->ifa_netmask
;
1289 info
.rti_info
[RTAX_BRD
] = ifa
->ifa_dstaddr
;
1290 if ((m
= rt_msg1(ncmd
, &info
)) == NULL
) {
1292 ifnet_lock_done(ifp
);
1296 ifnet_lock_done(ifp
);
1297 ifam
= mtod(m
, struct ifa_msghdr
*);
1298 ifam
->ifam_index
= ifp
->if_index
;
1300 ifam
->ifam_metric
= ifa
->ifa_metric
;
1301 ifam
->ifam_flags
= ifa
->ifa_flags
;
1303 ifam
->ifam_addrs
= info
.rti_addrs
;
1305 if ((cmd
== RTM_ADD
&& pass
== 2) ||
1306 (cmd
== RTM_DELETE
&& pass
== 1)) {
1307 struct rt_msghdr
*rtm
;
1311 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
1312 info
.rti_info
[RTAX_DST
] = sa
= rt_key(rt
);
1313 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
1314 if ((m
= rt_msg1(cmd
, &info
)) == NULL
)
1316 rtm
= mtod(m
, struct rt_msghdr
*);
1317 rtm
->rtm_index
= ifp
->if_index
;
1318 rtm
->rtm_flags
|= rt
->rt_flags
;
1319 rtm
->rtm_errno
= error
;
1320 rtm
->rtm_addrs
= info
.rti_addrs
;
1322 route_proto
.sp_protocol
= sa
? sa
->sa_family
: 0;
1323 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
1328 * This is the analogue to the rt_newaddrmsg which performs the same
1329 * function but for multicast group memberhips. This is easier since
1330 * there is no route state to worry about.
1333 rt_newmaddrmsg(int cmd
, struct ifmultiaddr
*ifma
)
1335 struct rt_addrinfo info
;
1337 struct ifnet
*ifp
= ifma
->ifma_ifp
;
1338 struct ifma_msghdr
*ifmam
;
1339 struct sockproto route_proto
= {PF_ROUTE
, 0};
1341 if (route_cb
.any_count
== 0)
1344 /* Lock ifp for if_lladdr */
1345 ifnet_lock_shared(ifp
);
1346 bzero((caddr_t
)&info
, sizeof(info
));
1348 info
.rti_info
[RTAX_IFA
] = ifma
->ifma_addr
;
1349 info
.rti_info
[RTAX_IFP
] = ifp
->if_lladdr
->ifa_addr
; /* lladdr doesn't need lock */
1352 * If a link-layer address is present, present it as a ``gateway''
1353 * (similarly to how ARP entries, e.g., are presented).
1355 info
.rti_info
[RTAX_GATEWAY
] = (ifma
->ifma_ll
!= NULL
) ? ifma
->ifma_ll
->ifma_addr
: NULL
;
1356 if ((m
= rt_msg1(cmd
, &info
)) == NULL
) {
1358 ifnet_lock_done(ifp
);
1361 ifmam
= mtod(m
, struct ifma_msghdr
*);
1362 ifmam
->ifmam_index
= ifp
->if_index
;
1363 ifmam
->ifmam_addrs
= info
.rti_addrs
;
1364 route_proto
.sp_protocol
= ifma
->ifma_addr
->sa_family
;
1366 ifnet_lock_done(ifp
);
1367 raw_input(m
, &route_proto
, &route_src
, &route_dst
);
1371 * This is used in dumping the kernel table via sysctl().
1374 sysctl_dumpentry(struct radix_node
*rn
, void *vw
)
1376 struct walkarg
*w
= vw
;
1377 struct rtentry
*rt
= (struct rtentry
*)rn
;
1378 int error
= 0, size
;
1379 struct rt_addrinfo info
;
1382 if (w
->w_op
== NET_RT_FLAGS
&& !(rt
->rt_flags
& w
->w_arg
)) {
1386 bzero((caddr_t
)&info
, sizeof(info
));
1387 info
.rti_info
[RTAX_DST
] = rt_key(rt
);
1388 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
1389 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
1390 info
.rti_info
[RTAX_GENMASK
] = rt
->rt_genmask
;
1392 if (w
->w_op
!= NET_RT_DUMP2
) {
1393 size
= rt_msg2(RTM_GET
, &info
, 0, w
);
1394 if (w
->w_req
&& w
->w_tmem
) {
1395 struct rt_msghdr
*rtm
= (struct rt_msghdr
*)w
->w_tmem
;
1397 rtm
->rtm_flags
= rt
->rt_flags
;
1398 rtm
->rtm_use
= rt
->rt_use
;
1399 rt_getmetrics(rt
, &rtm
->rtm_rmx
);
1400 rtm
->rtm_index
= rt
->rt_ifp
->if_index
;
1404 rtm
->rtm_addrs
= info
.rti_addrs
;
1405 error
= SYSCTL_OUT(w
->w_req
, (caddr_t
)rtm
, size
);
1410 size
= rt_msg2(RTM_GET2
, &info
, 0, w
);
1411 if (w
->w_req
&& w
->w_tmem
) {
1412 struct rt_msghdr2
*rtm
= (struct rt_msghdr2
*)w
->w_tmem
;
1414 rtm
->rtm_flags
= rt
->rt_flags
;
1415 rtm
->rtm_use
= rt
->rt_use
;
1416 rt_getmetrics(rt
, &rtm
->rtm_rmx
);
1417 rtm
->rtm_index
= rt
->rt_ifp
->if_index
;
1418 rtm
->rtm_refcnt
= rt
->rt_refcnt
;
1420 rtm
->rtm_parentflags
= rt
->rt_parent
->rt_flags
;
1422 rtm
->rtm_parentflags
= 0;
1423 rtm
->rtm_reserved
= 0;
1424 rtm
->rtm_addrs
= info
.rti_addrs
;
1425 error
= SYSCTL_OUT(w
->w_req
, (caddr_t
)rtm
, size
);
1435 * This is used for dumping extended information from route entries.
1438 sysctl_dumpentry_ext(struct radix_node
*rn
, void *vw
)
1440 struct walkarg
*w
= vw
;
1441 struct rtentry
*rt
= (struct rtentry
*)rn
;
1442 int error
= 0, size
;
1443 struct rt_addrinfo info
;
1446 if (w
->w_op
== NET_RT_DUMPX_FLAGS
&& !(rt
->rt_flags
& w
->w_arg
)) {
1450 bzero(&info
, sizeof (info
));
1451 info
.rti_info
[RTAX_DST
] = rt_key(rt
);
1452 info
.rti_info
[RTAX_GATEWAY
] = rt
->rt_gateway
;
1453 info
.rti_info
[RTAX_NETMASK
] = rt_mask(rt
);
1454 info
.rti_info
[RTAX_GENMASK
] = rt
->rt_genmask
;
1456 size
= rt_msg2(RTM_GET_EXT
, &info
, 0, w
);
1457 if (w
->w_req
&& w
->w_tmem
) {
1458 struct rt_msghdr_ext
*ertm
= (struct rt_msghdr_ext
*)w
->w_tmem
;
1460 ertm
->rtm_flags
= rt
->rt_flags
;
1461 ertm
->rtm_use
= rt
->rt_use
;
1462 rt_getmetrics(rt
, &ertm
->rtm_rmx
);
1463 ertm
->rtm_index
= rt
->rt_ifp
->if_index
;
1466 ertm
->rtm_errno
= 0;
1467 ertm
->rtm_addrs
= info
.rti_addrs
;
1468 if (rt
->rt_llinfo_get_ri
== NULL
)
1469 bzero(&ertm
->rtm_ri
, sizeof (ertm
->rtm_ri
));
1471 rt
->rt_llinfo_get_ri(rt
, &ertm
->rtm_ri
);
1473 error
= SYSCTL_OUT(w
->w_req
, (caddr_t
)ertm
, size
);
1483 * To avoid to call copyout() while holding locks and to cause problems
1484 * in the paging path, sysctl_iflist() and sysctl_iflist2() contstruct
1485 * the list in two passes. In the first pass we compute the total
1486 * length of the data we are going to copyout, then we release
1487 * all locks to allocate a temporary buffer that gets filled
1488 * in the second pass.
1490 * Note that we are verifying the assumption that _MALLOC returns a buffer
1491 * that is at least 32 bits aligned and that the messages and addresses are
1496 sysctl_iflist(int af
, struct walkarg
*w
)
1500 struct rt_addrinfo info
;
1503 int total_len
= 0, current_len
= 0;
1504 char *total_buffer
= NULL
, *cp
= NULL
;
1506 bzero((caddr_t
)&info
, sizeof(info
));
1508 for (pass
= 0; pass
< 2; pass
++) {
1509 ifnet_head_lock_shared();
1511 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1514 if (w
->w_arg
&& w
->w_arg
!= ifp
->if_index
)
1516 ifnet_lock_shared(ifp
);
1518 * Holding ifnet lock here prevents the link address from
1519 * changing contents, so no need to hold the ifa lock.
1520 * The link address is always present; it's never freed.
1522 ifa
= ifp
->if_lladdr
;
1523 info
.rti_info
[RTAX_IFP
] = ifa
->ifa_addr
;
1524 len
= rt_msg2(RTM_IFINFO
, &info
, (caddr_t
)0, NULL
);
1528 struct if_msghdr
*ifm
;
1530 if (current_len
+ len
> total_len
) {
1531 ifnet_lock_done(ifp
);
1532 printf("sysctl_iflist: current_len (%d) + len (%d) > total_len (%d)\n",
1533 current_len
, len
, total_len
);
1537 info
.rti_info
[RTAX_IFP
] = ifa
->ifa_addr
;
1538 len
= rt_msg2(RTM_IFINFO
, &info
, (caddr_t
)cp
, NULL
);
1539 info
.rti_info
[RTAX_IFP
] = NULL
;
1541 ifm
= (struct if_msghdr
*)cp
;
1542 ifm
->ifm_index
= ifp
->if_index
;
1543 ifm
->ifm_flags
= (u_short
)ifp
->if_flags
;
1544 if_data_internal_to_if_data(ifp
, &ifp
->if_data
,
1546 ifm
->ifm_addrs
= info
.rti_addrs
;
1549 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
1552 while ((ifa
= ifa
->ifa_link
.tqe_next
) != 0) {
1554 if (af
&& af
!= ifa
->ifa_addr
->sa_family
) {
1558 info
.rti_info
[RTAX_IFA
] = ifa
->ifa_addr
;
1559 info
.rti_info
[RTAX_NETMASK
] = ifa
->ifa_netmask
;
1560 info
.rti_info
[RTAX_BRD
] = ifa
->ifa_dstaddr
;
1561 len
= rt_msg2(RTM_NEWADDR
, &info
, 0, 0);
1565 struct ifa_msghdr
*ifam
;
1567 if (current_len
+ len
> total_len
) {
1569 printf("sysctl_iflist: current_len (%d) + len (%d) > total_len (%d)\n",
1570 current_len
, len
, total_len
);
1574 len
= rt_msg2(RTM_NEWADDR
, &info
, (caddr_t
)cp
, NULL
);
1576 ifam
= (struct ifa_msghdr
*)cp
;
1577 ifam
->ifam_index
= ifa
->ifa_ifp
->if_index
;
1578 ifam
->ifam_flags
= ifa
->ifa_flags
;
1579 ifam
->ifam_metric
= ifa
->ifa_metric
;
1580 ifam
->ifam_addrs
= info
.rti_addrs
;
1583 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
1588 ifnet_lock_done(ifp
);
1589 info
.rti_info
[RTAX_IFA
] = info
.rti_info
[RTAX_NETMASK
] =
1590 info
.rti_info
[RTAX_BRD
] = NULL
;
1599 /* Better to return zero length buffer than ENOBUFS */
1602 total_len
+= total_len
>> 3;
1603 total_buffer
= _MALLOC(total_len
, M_RTABLE
, M_ZERO
| M_WAITOK
);
1604 if (total_buffer
== NULL
) {
1605 printf("sysctl_iflist: _MALLOC(%d) failed\n", total_len
);
1610 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
1612 error
= SYSCTL_OUT(w
->w_req
, total_buffer
, current_len
);
1618 if (total_buffer
!= NULL
)
1619 _FREE(total_buffer
, M_RTABLE
);
1625 sysctl_iflist2(int af
, struct walkarg
*w
)
1629 struct rt_addrinfo info
;
1632 int total_len
= 0, current_len
= 0;
1633 char *total_buffer
= NULL
, *cp
= NULL
;
1635 bzero((caddr_t
)&info
, sizeof(info
));
1637 for (pass
= 0; pass
< 2; pass
++) {
1638 ifnet_head_lock_shared();
1640 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) {
1643 if (w
->w_arg
&& w
->w_arg
!= ifp
->if_index
)
1645 ifnet_lock_shared(ifp
);
1647 * Holding ifnet lock here prevents the link address from
1648 * changing contents, so no need to hold the ifa lock.
1649 * The link address is always present; it's never freed.
1651 ifa
= ifp
->if_lladdr
;
1652 info
.rti_info
[RTAX_IFP
] = ifa
->ifa_addr
;
1653 len
= rt_msg2(RTM_IFINFO2
, &info
, (caddr_t
)0, NULL
);
1657 struct if_msghdr2
*ifm
;
1659 if (current_len
+ len
> total_len
) {
1660 ifnet_lock_done(ifp
);
1661 printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n",
1662 current_len
, len
, total_len
);
1666 info
.rti_info
[RTAX_IFP
] = ifa
->ifa_addr
;
1667 len
= rt_msg2(RTM_IFINFO2
, &info
, (caddr_t
)cp
, NULL
);
1668 info
.rti_info
[RTAX_IFP
] = NULL
;
1670 ifm
= (struct if_msghdr2
*)cp
;
1671 ifm
->ifm_addrs
= info
.rti_addrs
;
1672 ifm
->ifm_flags
= (u_short
)ifp
->if_flags
;
1673 ifm
->ifm_index
= ifp
->if_index
;
1674 ifm
->ifm_snd_len
= ifp
->if_snd
.ifq_len
;
1675 ifm
->ifm_snd_maxlen
= ifp
->if_snd
.ifq_maxlen
;
1676 ifm
->ifm_snd_drops
= ifp
->if_snd
.ifq_drops
;
1677 ifm
->ifm_timer
= ifp
->if_timer
;
1678 if_data_internal_to_if_data64(ifp
, &ifp
->if_data
,
1682 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
1685 while ((ifa
= ifa
->ifa_link
.tqe_next
) != 0) {
1687 if (af
&& af
!= ifa
->ifa_addr
->sa_family
) {
1691 info
.rti_info
[RTAX_IFA
] = ifa
->ifa_addr
;
1692 info
.rti_info
[RTAX_NETMASK
] = ifa
->ifa_netmask
;
1693 info
.rti_info
[RTAX_BRD
] = ifa
->ifa_dstaddr
;
1694 len
= rt_msg2(RTM_NEWADDR
, &info
, 0, 0);
1698 struct ifa_msghdr
*ifam
;
1700 if (current_len
+ len
> total_len
) {
1702 printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n",
1703 current_len
, len
, total_len
);
1707 len
= rt_msg2(RTM_NEWADDR
, &info
, (caddr_t
)cp
, 0);
1709 ifam
= (struct ifa_msghdr
*)cp
;
1710 ifam
->ifam_index
= ifa
->ifa_ifp
->if_index
;
1711 ifam
->ifam_flags
= ifa
->ifa_flags
;
1712 ifam
->ifam_metric
= ifa
->ifa_metric
;
1713 ifam
->ifam_addrs
= info
.rti_addrs
;
1716 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
1722 ifnet_lock_done(ifp
);
1726 struct ifmultiaddr
*ifma
;
1728 for (ifma
= LIST_FIRST(&ifp
->if_multiaddrs
);
1729 ifma
!= NULL
; ifma
= LIST_NEXT(ifma
, ifma_link
)) {
1730 struct ifaddr
*ifa0
;
1733 if (af
&& af
!= ifma
->ifma_addr
->sa_family
) {
1737 bzero((caddr_t
)&info
, sizeof(info
));
1738 info
.rti_info
[RTAX_IFA
] = ifma
->ifma_addr
;
1740 * Holding ifnet lock here prevents the link
1741 * address from changing contents, so no need
1742 * to hold the ifa0 lock. The link address is
1743 * always present; it's never freed.
1745 ifa0
= ifp
->if_lladdr
;
1746 info
.rti_info
[RTAX_IFP
] = ifa0
->ifa_addr
;
1747 if (ifma
->ifma_ll
!= NULL
)
1748 info
.rti_info
[RTAX_GATEWAY
] = ifma
->ifma_ll
->ifma_addr
;
1749 len
= rt_msg2(RTM_NEWMADDR2
, &info
, 0, 0);
1753 struct ifma_msghdr2
*ifmam
;
1755 if (current_len
+ len
> total_len
) {
1757 printf("sysctl_iflist2: current_len (%d) + len (%d) > total_len (%d)\n",
1758 current_len
, len
, total_len
);
1762 len
= rt_msg2(RTM_NEWMADDR2
, &info
, (caddr_t
)cp
, 0);
1764 ifmam
= (struct ifma_msghdr2
*)cp
;
1765 ifmam
->ifmam_addrs
= info
.rti_addrs
;
1766 ifmam
->ifmam_flags
= 0;
1767 ifmam
->ifmam_index
=
1768 ifma
->ifma_ifp
->if_index
;
1769 ifmam
->ifmam_refcount
=
1773 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
1779 ifnet_lock_done(ifp
);
1780 info
.rti_info
[RTAX_IFA
] = info
.rti_info
[RTAX_NETMASK
] =
1781 info
.rti_info
[RTAX_BRD
] = NULL
;
1789 /* Better to return zero length buffer than ENOBUFS */
1792 total_len
+= total_len
>> 3;
1793 total_buffer
= _MALLOC(total_len
, M_RTABLE
, M_ZERO
| M_WAITOK
);
1794 if (total_buffer
== NULL
) {
1795 printf("sysctl_iflist2: _MALLOC(%d) failed\n", total_len
);
1800 VERIFY(IS_P2ALIGNED(cp
, sizeof(u_int32_t
)));
1802 error
= SYSCTL_OUT(w
->w_req
, total_buffer
, current_len
);
1808 if (total_buffer
!= NULL
)
1809 _FREE(total_buffer
, M_RTABLE
);
1816 sysctl_rtstat(struct sysctl_req
*req
)
1820 error
= SYSCTL_OUT(req
, &rtstat
, sizeof(struct rtstat
));
1828 sysctl_rttrash(struct sysctl_req
*req
)
1832 error
= SYSCTL_OUT(req
, &rttrash
, sizeof(rttrash
));
1840 * Called from pfslowtimo(), protected by domain_proto_mtx
1845 struct timeval delta_ts
, current_ts
;
1848 * This test is done without holding rnh_lock; in the even that
1849 * we read stale value, it will only cause an extra (or miss)
1850 * drain and is therefore harmless.
1852 if (ifnet_aggressive_drainers
== 0) {
1853 if (timerisset(&last_ts
))
1854 timerclear(&last_ts
);
1858 microuptime(¤t_ts
);
1859 timersub(¤t_ts
, &last_ts
, &delta_ts
);
1861 if (delta_ts
.tv_sec
>= rt_if_idle_drain_interval
) {
1862 timerclear(&last_ts
);
1864 in_rtqdrain(); /* protocol cloned routes: INET */
1865 in_arpdrain(NULL
); /* cloned routes: ARP */
1867 in6_rtqdrain(); /* protocol cloned routes: INET6 */
1868 nd6_drain(NULL
); /* cloned routes: ND6 */
1871 last_ts
.tv_sec
= current_ts
.tv_sec
;
1872 last_ts
.tv_usec
= current_ts
.tv_usec
;
1879 lck_mtx_assert(rnh_lock
, LCK_MTX_ASSERT_OWNED
);
1882 routedomain
.dom_protosw
->pr_flags
|= PR_AGGDRAIN
;
1884 routedomain
.dom_protosw
->pr_flags
&= ~PR_AGGDRAIN
;
1888 sysctl_rtsock SYSCTL_HANDLER_ARGS
1890 #pragma unused(oidp)
1891 int *name
= (int *)arg1
;
1892 u_int namelen
= arg2
;
1893 struct radix_node_head
*rnh
;
1894 int i
, error
= EINVAL
;
1905 Bzero(&w
, sizeof(w
));
1915 lck_mtx_lock(rnh_lock
);
1916 for (i
= 1; i
<= AF_MAX
; i
++)
1917 if ((rnh
= rt_tables
[i
]) && (af
== 0 || af
== i
) &&
1918 (error
= rnh
->rnh_walktree(rnh
,
1919 sysctl_dumpentry
, &w
)))
1921 lck_mtx_unlock(rnh_lock
);
1924 case NET_RT_DUMPX_FLAGS
:
1925 lck_mtx_lock(rnh_lock
);
1926 for (i
= 1; i
<= AF_MAX
; i
++)
1927 if ((rnh
= rt_tables
[i
]) && (af
== 0 || af
== i
) &&
1928 (error
= rnh
->rnh_walktree(rnh
,
1929 sysctl_dumpentry_ext
, &w
)))
1931 lck_mtx_unlock(rnh_lock
);
1934 error
= sysctl_iflist(af
, &w
);
1936 case NET_RT_IFLIST2
:
1937 error
= sysctl_iflist2(af
, &w
);
1940 error
= sysctl_rtstat(req
);
1943 error
= sysctl_rttrash(req
);
1947 FREE(w
.w_tmem
, M_RTABLE
);
1951 SYSCTL_NODE(_net
, PF_ROUTE
, routetable
, CTLFLAG_RD
| CTLFLAG_LOCKED
, sysctl_rtsock
, "");
1954 * Definitions of protocols supported in the ROUTE domain.
1956 static struct protosw routesw
[] = {
1957 { SOCK_RAW
, &routedomain
, 0, PR_ATOMIC
|PR_ADDR
,
1958 0, route_output
, raw_ctlinput
, 0,
1960 raw_init
, 0, 0, rt_drainall
,
1968 struct domain routedomain
=
1969 { PF_ROUTE
, "route", route_init
, 0, 0,
1971 NULL
, NULL
, 0, 0, 0, 0, NULL
, 0,