5 // Copyright (c) 2011-2017 Apple Inc. All rights reserved.
9 #include "si_compare.h"
11 #include <dispatch/dispatch.h>
12 #include <sys/ioctl.h>
13 #include <sys/kern_control.h>
14 #include <sys/socketvar.h>
15 #include <sys/sys_domain.h>
16 #include <netinet/in.h>
17 #include <net/netsrc.h>
25 #include <mach/mach_time.h>
26 #include <net/ntstat.h>
34 kPrefer_Dest1_Slightly
= 2,
35 kPrefer_Dest2_Slightly
= -2,
41 kLookupSuccess_Found
= 1,
42 kLookupSuccess_Created
= 2,
45 typedef struct RFC6724Address
47 union sockaddr_in_4_6 addr
;
53 typedef struct Destination
58 uint32_t d_resolved_ifindex
;
60 uint32_t d_connection_attempts
;
61 uint32_t d_connection_successes
;
62 unsigned d_routable
: 1;
63 unsigned d_direct
: 1;
67 typedef struct DestCacheEntry
70 struct DestCacheEntry
*dce_next
;
71 Destination dce_entry
;
74 typedef struct DestCompareSettings
{
76 uint32_t rtt_leeway_small
;
77 int64_t rtt_failure_threshold
;
79 } DestCompareSettings
;
81 static DestCacheEntry
*cache
= NULL
;
82 static os_unfair_lock cache_lock
= OS_UNFAIR_LOCK_INIT
;
83 static uint64_t cache_timeout
= 0ULL;
84 static os_log_t si_destination_log
= OS_LOG_DEFAULT
;
85 static DestCompareSettings si_compare_settings
= {};
88 #define TCP_RTT_SCALE 32 // see netinet/tcp_var.h
89 #endif // defined(TCP_RTT_SCALE)
91 #define SI_DESTINATION_COMPARE_UNLIKELY_BOOL(b) (__builtin_expect(!!((long)(b)), 0L))
92 #define SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(arg, toReturn) \
94 if (SI_DESTINATION_COMPARE_UNLIKELY_BOOL(!arg)) { \
95 os_log_fault(si_destination_log, "%{public}s " #arg " is NULL", __FUNCTION__); \
101 static const uint32_t kFailureAttemptsTolerated
= 10;
102 static const uint32_t kDefaultRTTLeeway
= 100 * TCP_RTT_SCALE
;
103 static const uint32_t kDefaultRTTLeewaySmall
= 10 * TCP_RTT_SCALE
;
106 si_destination_compare_child_has_forked(void)
108 cache_lock
= OS_UNFAIR_LOCK_INIT
;
109 // Cannot use os_log_t object from parent process in child process.
110 si_destination_log
= OS_LOG_DEFAULT
;
114 si_destination_compare_init_once(void)
116 mach_timebase_info_data_t time_base
;
117 mach_timebase_info(&time_base
);
119 cache_timeout
= NSEC_PER_SEC
;
120 cache_timeout
*= time_base
.denom
;
121 cache_timeout
/= time_base
.numer
;
123 si_destination_log
= os_log_create("com.apple.network.libinfo", "si_destination_compare");
125 si_compare_settings
= (DestCompareSettings
){
126 .rtt_leeway
= kDefaultRTTLeeway
,
127 .rtt_leeway_small
= kDefaultRTTLeewaySmall
,
128 .rtt_failure_threshold
= kFailureAttemptsTolerated
,
129 .bypass_stats
= false
132 (void)pthread_atfork(NULL
, NULL
, si_destination_compare_child_has_forked
);
136 si_destination_compare_init(void)
138 static pthread_once_t cache_init
= PTHREAD_ONCE_INIT
;
139 pthread_once(&cache_init
, si_destination_compare_init_once
);
142 #pragma mark -- Netsrc --
145 si_destination_create_control_socket(const char *control_name
)
148 const int fd
= socket(PF_SYSTEM
, SOCK_DGRAM
, SYSPROTO_CONTROL
);
151 os_log_error(si_destination_log
, "socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL) failed: %m");
155 // Disable SIGPIPE <rdar://problem/9049030>
157 if (setsockopt(fd
, SOL_SOCKET
, SO_NOSIGPIPE
, &on
, sizeof(on
)) != 0)
159 os_log_error(si_destination_log
, "setsockopt(%d, SOL_SOCKET, SO_NOSIGPIPE, ...) failed: %m", fd
);
164 // Get the control ID for statistics
165 struct ctl_info ctl
= {
169 strlcpy(ctl
.ctl_name
, control_name
, sizeof(ctl
.ctl_name
));
170 if (ioctl(fd
, CTLIOCGINFO
, &ctl
) == -1)
172 os_log_error(si_destination_log
, "ioctl(%d, CTLIOCGINFO, ...) failed: %m", fd
);
177 // Connect to the statistics control
178 struct sockaddr_ctl sc
= {
179 .sc_len
= sizeof(sc
),
180 .sc_family
= AF_SYSTEM
,
181 .ss_sysaddr
= SYSPROTO_CONTROL
,
184 .sc_reserved
= { 0, 0, 0, 0, 0 }
186 if (connect(fd
, (struct sockaddr
*)&sc
, sc
.sc_len
) != 0)
188 os_log_error(si_destination_log
, "connect(%d, ...) ctl_id=%u, failed: %m", fd
, ctl
.ctl_id
);
193 // increase the receive socket buffer size
194 int rcvbuf
= 128 * 1024;
195 if (0 != setsockopt(fd
, SOL_SOCKET
, SO_RCVBUF
, &rcvbuf
, sizeof(rcvbuf
))) {
196 os_log_error(si_destination_log
, "setsockopt(%d, SOL_SOCKET, SO_RCVBUF, %d) failed: %m",
200 // Set socket to non-blocking operation
201 const int flags
= fcntl(fd
, F_GETFL
, 0);
203 if (-1 == fcntl(fd
, F_SETFL
, flags
| O_NONBLOCK
)) {
204 os_log_error(si_destination_log
, "fcntl(%d, F_SETFL, %d) failed: %m", fd
, flags
| O_NONBLOCK
);
207 os_log_error(si_destination_log
, "fcntl(%d, F_GETFL, 0) failed: %m", fd
);
214 si_destination_fill_netsrc(Destination
*d
)
216 static int netsrc_sockfd
= -1;
217 static int version
= NETSRC_CURVERS
;
219 if (netsrc_sockfd
< 0) {
220 netsrc_sockfd
= si_destination_create_control_socket(NETSRC_CTLNAME
);
222 if (netsrc_sockfd
< 0) {
226 // Make sure we have a socket, create one if necessary
227 struct netsrc_req request
= {
229 .nrq_ifscope
= d
->d_ifindex
,
232 if (d
->d_dst
.addr
.sa
.sa_len
> sizeof(request
._usa
)) {
233 os_log_debug(si_destination_log
, "sockaddr is too big");
236 memcpy(&request
._usa
, &d
->d_dst
, d
->d_dst
.addr
.sa
.sa_len
);
237 if (send(netsrc_sockfd
, &request
, sizeof(request
), 0) != sizeof(request
))
239 if (errno
== EINVAL
&& version
!= NETSRC_VERSION1
) {
240 // fall back to version 1
241 version
= NETSRC_VERSION1
;
242 si_destination_fill_netsrc(d
);
244 // Expected when there is no route to host
245 os_log_error(si_destination_log
, "send failed: %m");
250 if (version
== NETSRC_VERSION1
) {
251 #ifdef NETSRC_VERSION2
252 struct netsrc_repv1 v1
;
253 #else // NETSRC_VERSION2
254 struct netsrc_rep v1
;
255 #endif // NETSRC_VERSION2
256 if (recv(netsrc_sockfd
, &v1
, sizeof(v1
), 0) != sizeof(v1
)) {
258 os_log_debug(si_destination_log
, "recv failed: %m");
261 d
->d_dst
.label
= v1
.nrp_dstlabel
;
262 d
->d_dst
.precedence
= v1
.nrp_dstprecedence
;
263 if (v1
.nrq_sin
.sin_len
<= sizeof(d
->d_src
.addr
)) {
264 memcpy( &d
->d_src
.addr
, &v1
._usa
, v1
.nrq_sin
.sin_len
);
266 d
->d_src
.label
= v1
.nrp_label
;
267 d
->d_src
.precedence
= v1
.nrp_precedence
;
268 d
->d_src
.flags
= v1
.nrp_flags
;
271 #ifdef NETSRC_VERSION2
272 else if (version
== NETSRC_VERSION2
) {
273 struct netsrc_repv2 v2
;
274 if (recv(netsrc_sockfd
, &v2
, sizeof(v2
), 0) != sizeof(v2
)) {
276 os_log_debug(si_destination_log
, "recv failed: %m");
279 d
->d_dst
.label
= v2
.nrp_dstlabel
;
280 d
->d_dst
.precedence
= v2
.nrp_dstprecedence
;
281 d
->d_src
.addr
= v2
.nrp_src
;
282 d
->d_src
.label
= v2
.nrp_label
;
283 d
->d_src
.precedence
= v2
.nrp_precedence
;
284 d
->d_src
.flags
= v2
.nrp_flags
;
285 d
->d_resolved_ifindex
= v2
.nrp_ifindex
;
286 d
->d_min_rtt
= v2
.nrp_min_rtt
;
287 d
->d_connection_attempts
= v2
.nrp_connection_attempts
;
288 d
->d_connection_successes
= v2
.nrp_connection_successes
;
289 d
->d_routable
= (v2
.nrp_flags
& NETSRC_FLAG_ROUTEABLE
) ?
1 : 0;
290 d
->d_direct
= (v2
.nrp_flags
& NETSRC_FLAG_DIRECT
) ?
1 : 0;
291 d
->d_awdl
= (v2
.nrp_flags
& NETSRC_FLAG_AWDL
) ?
1 : 0;
294 #endif // NETSRC_VERSION2
299 #pragma mark -- Statistics --
301 static uint32_t kLoopbackIndex
= 1;
303 // Only update kLoopbackIndex from the default value of 1 if an entry with the IFF_LOOPBACK flag set is found.
305 set_loopback_ifindex()
307 struct ifaddrs
*ifaddrs
, *ifa
;
309 if (getifaddrs(&ifaddrs
) < 0)
312 for (ifa
= ifaddrs
; ifa
!= NULL
; ifa
= ifa
->ifa_next
) {
314 ifa
->ifa_addr
->sa_family
== AF_LINK
&&
315 (ifa
->ifa_flags
& IFF_LOOPBACK
) != 0) {
316 kLoopbackIndex
= (unsigned int)((struct sockaddr_dl
*)ifa
->ifa_addr
)->sdl_index
;
321 freeifaddrs(ifaddrs
);
326 si_destination_compare_statistics(
330 int slightPreference
= kPrefer_Equal
;
331 // Initialize kLoopbackIndex value
332 static pthread_once_t once
= PTHREAD_ONCE_INIT
;
333 pthread_once(&once
, set_loopback_ifindex
);
335 // If we have min round trip times for both, use that
336 if (dst1
->d_min_rtt
&& dst2
->d_min_rtt
)
338 #define RTT_MSEC_DOUBLE_FROM_DST( dst ) (((double)(dst->d_min_rtt)) / ((double)(TCP_RTT_SCALE)))
339 if (dst1
->d_min_rtt
< dst2
->d_min_rtt
)
341 if (dst1
->d_min_rtt
+ si_compare_settings
.rtt_leeway
< dst2
->d_min_rtt
)
343 os_log_debug(si_destination_log
,
344 "prefering 1, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
345 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
346 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
347 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
348 return kPrefer_Dest1
;
350 else if (dst1
->d_min_rtt
+ si_compare_settings
.rtt_leeway_small
< dst2
->d_min_rtt
)
352 slightPreference
= kPrefer_Dest1_Slightly
;
353 os_log_debug(si_destination_log
,
354 "prefering 1 slightly, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
355 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
356 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
357 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
361 os_log_debug(si_destination_log
,
362 "not prefering 1, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
363 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
364 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
365 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
368 else if (dst1
->d_min_rtt
> dst2
->d_min_rtt
)
370 if (dst1
->d_min_rtt
> si_compare_settings
.rtt_leeway
+ dst2
->d_min_rtt
)
372 os_log_debug(si_destination_log
,
373 "prefering 2, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
374 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
375 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
376 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
377 return kPrefer_Dest2
;
379 else if (dst1
->d_min_rtt
> si_compare_settings
.rtt_leeway_small
+ dst2
->d_min_rtt
)
381 slightPreference
= kPrefer_Dest2_Slightly
;
382 os_log_debug(si_destination_log
,
383 "prefering 2 slightly, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
384 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
385 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
386 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
390 os_log_debug(si_destination_log
,
391 "not prefering 2, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
392 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
393 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
394 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
399 os_log_debug(si_destination_log
, "no preference, equal min rtt %f", RTT_MSEC_DOUBLE_FROM_DST(dst1
));
401 // we hold on to slightPreference and only use it if no other part of the evaluation had a stronger preference
402 #undef RTT_MSEC_DOUBLE_FROM_DST
404 else if (dst1
->d_min_rtt
|| dst2
->d_min_rtt
)
406 // If only dest1 is missing round trip make sure it doesn't have zero successful attempts
407 if (dst1
->d_min_rtt
== 0 &&
408 dst1
->d_connection_successes
== 0 &&
409 dst1
->d_connection_attempts
> 0)
411 if (dst1
->d_connection_attempts
> si_compare_settings
.rtt_failure_threshold
)
413 os_log_debug(si_destination_log
, "prefer 2, dst 1 has no successful connections and %u attempts",
414 dst1
->d_connection_attempts
);
415 return kPrefer_Dest2
;
418 // If only dest2 is missing round trip make sure it doesn't have zero successful attempts
419 else if (dst2
->d_min_rtt
== 0 &&
420 dst2
->d_connection_successes
== 0 &&
421 dst2
->d_connection_attempts
> 0)
423 if (dst2
->d_connection_attempts
> si_compare_settings
.rtt_failure_threshold
)
425 os_log_debug(si_destination_log
, "prefer 1, dst 2 has no successful connections and %u attempts",
426 dst2
->d_connection_attempts
);
427 return kPrefer_Dest1
;
432 // If we have routes for both...
433 if (dst1
->d_routable
!= 0 && dst2
->d_routable
!= 0)
435 // <rdar://problem/9070784> Prefer interface that are not peer to peer
436 if (dst1
->d_awdl
!= dst2
->d_awdl
)
440 os_log_debug(si_destination_log
, "prefer dst 1, dst 2 is p2p, dst 1 is not");
441 return kPrefer_Dest1
;
443 else if (dst1
->d_awdl
)
445 os_log_debug(si_destination_log
, "prefer dst 2, dst 1 is p2p, dst 2 is not");
446 return kPrefer_Dest2
;
450 // Prefer loopback routes
451 bool dst1loopback
= dst1
->d_resolved_ifindex
== kLoopbackIndex
||
452 (dst1
->d_dst
.addr
.sa
.sa_family
== AF_INET6
&&
453 IN6_IS_ADDR_LINKLOCAL(&dst1
->d_dst
.addr
.sin6
.sin6_addr
) &&
454 dst1
->d_dst
.addr
.sin6
.sin6_scope_id
== kLoopbackIndex
);
455 bool dst2loopback
= dst2
->d_resolved_ifindex
== kLoopbackIndex
||
456 (dst2
->d_dst
.addr
.sa
.sa_family
== AF_INET6
&&
457 IN6_IS_ADDR_LINKLOCAL(&dst2
->d_dst
.addr
.sin6
.sin6_addr
) &&
458 dst2
->d_dst
.addr
.sin6
.sin6_scope_id
== kLoopbackIndex
);
460 if (dst1loopback
&& !dst2loopback
)
462 os_log_debug(si_destination_log
, "prefer 1, dst 1 is loopback, dst 2 is not");
463 return kPrefer_Dest1
;
465 else if (dst2loopback
&& !dst1loopback
)
467 os_log_debug(si_destination_log
, "prefer 2, dst 2 is loopback, dst 1 is not");
468 return kPrefer_Dest2
;
471 // Prefer direct routes
472 if (dst1
->d_direct
&& !dst2
->d_direct
)
474 os_log_debug(si_destination_log
, "prefer 1, dst 1 is local, dst 2 is not");
475 return kPrefer_Dest1
;
477 else if (dst2
->d_direct
&& !dst1
->d_direct
)
479 os_log_debug(si_destination_log
, "prefer 2, dst 2 is local, dst 1 is not");
480 return kPrefer_Dest2
;
483 else if (dst1
->d_routable
!= 0)
485 // prefer destination we have a route to
486 os_log_debug(si_destination_log
, "prefer 1, dst 2 has no route");
487 return kPrefer_Dest1
;
489 else if (dst2
->d_routable
!= 0)
491 // prefer destination we have a route to
492 os_log_debug(si_destination_log
, "prefer 2, dst 1 has no route");
493 return kPrefer_Dest2
;
495 return slightPreference
;
498 #pragma mark -- Cache --
502 const struct sockaddr
*sa1
,
503 const struct sockaddr
*sa2
)
505 if (sa1
->sa_family
!= sa2
->sa_family
)
508 /* We don't use bcmp because we don't care about the port number */
509 if (sa1
->sa_family
== AF_INET
)
511 const struct sockaddr_in
*sin1
= (const struct sockaddr_in
*)sa1
;
512 const struct sockaddr_in
*sin2
= (const struct sockaddr_in
*)sa2
;
514 if (sin1
->sin_addr
.s_addr
!= sin2
->sin_addr
.s_addr
)
519 if (sa1
->sa_family
== AF_INET6
)
521 const struct sockaddr_in6
*sin61
= (const struct sockaddr_in6
*)sa1
;
522 const struct sockaddr_in6
*sin62
= (const struct sockaddr_in6
*)sa2
;
524 if (memcmp(&sin61
->sin6_addr
, &sin62
->sin6_addr
, sizeof(sin61
->sin6_addr
)) != 0) {
528 // TBD: Is the flow info relevant?
530 if (sin61
->sin6_scope_id
!= sin62
->sin6_scope_id
) {
537 if (sa1
->sa_len
!= sa2
->sa_len
) {
541 return (memcmp(sa1
, sa2
, sa1
->sa_len
) == 0);
545 si_destination_cache_find(
546 const struct sockaddr
*dst_sa
,
548 Destination
*out_dst
)
550 // Loop through the entries looking for:
553 DestCacheEntry
**pprev
= &cache
;
554 uint64_t now
= mach_absolute_time();
557 while (*pprev
!= NULL
)
559 // If the item has expired, pull it out of the list
560 if ((now
- (*pprev
)->dce_time
) >= cache_timeout
)
562 DestCacheEntry
*expired
= *pprev
;
563 *pprev
= expired
->dce_next
;
564 memset(expired
, 0, sizeof(*expired
));
569 // If the item matches, copy the entry
570 if (!result
&& (*pprev
)->dce_entry
.d_ifindex
== ifscope
&&
571 sa_equal(dst_sa
, &(*pprev
)->dce_entry
.d_dst
.addr
.sa
))
573 *out_dst
= (*pprev
)->dce_entry
;
577 pprev
= &(*pprev
)->dce_next
;
585 si_destination_cache_create(
586 const struct sockaddr
*dst_sa
,
588 Destination
*out_dst
)
590 DestCacheEntry
*cache_entry
;
593 if (dst_sa
->sa_len
> sizeof(cache_entry
->dce_entry
.d_dst
.addr
)) {
594 os_log_error(si_destination_log
, "(dst_sa->sa_len %u > sizeof(cache_entry->dce_entry.d_dst.addr))",
600 cache_entry
= calloc(1, sizeof(*cache_entry
));
601 if (NULL
== cache_entry
) {
602 os_log_error(si_destination_log
, "calloc(%zu) failed: %m", sizeof(*cache_entry
));
605 d
= &cache_entry
->dce_entry
;
607 // Copy the destination sockaddr
608 memcpy(&d
->d_dst
.addr
, dst_sa
, dst_sa
->sa_len
);
609 d
->d_ifindex
= ifscope
;
611 // Query the kernel for the matching source, precedence and label
612 si_destination_fill_netsrc(d
);
615 cache_entry
->dce_time
= mach_absolute_time();
616 cache_entry
->dce_next
= cache
;
623 si_destination_lookup(
624 const struct sockaddr
*dst_sa
,
626 Destination
*out_dst
)
628 int lookupResult
= kLookupFailure
;
630 si_destination_compare_init();
631 os_unfair_lock_lock(&cache_lock
);
632 if (si_destination_cache_find(dst_sa
, ifscope
, out_dst
)) {
633 lookupResult
= kLookupSuccess_Found
;
634 } else if (si_destination_cache_create(dst_sa
, ifscope
, out_dst
)) {
635 lookupResult
= kLookupSuccess_Created
;
637 os_unfair_lock_unlock(&cache_lock
);
642 #pragma mark -- RFC 6724 --
644 // https://tools.ietf.org/html/rfc6724
646 #ifndef IN6_IS_ADDR_TEREDO
647 #define IN6_IS_ADDR_TEREDO(x) ((((x)->__u6_addr.__u6_addr16[0]) == htons(0x2001)) && \
648 (((x)->__u6_addr.__u6_addr16[1]) == 0x0000))
649 #endif // IN6_IS_ADDR_TEREDO
652 #undef IN6_IS_ADDR_6TO4
653 #define IN6_IS_ADDR_6TO4(x) (((x)->__u6_addr.__u6_addr16[0]) == htons(0x2002))
656 #ifndef IN6_IS_ADDR_6BONE
657 # define IN6_IS_ADDR_6BONE(x) (((x)->__u6_addr.__u6_addr16[0]) == htons(0x3ffe))
658 #endif // IN6_IS_ADDR_6BONE
661 rfc6724_scope_ip6(const struct in6_addr
*addr
)
665 if (addr
->s6_addr
[0] == 0xfe) {
666 scope
= addr
->s6_addr
[1] & 0xc0;
670 return __IPV6_ADDR_SCOPE_LINKLOCAL
;
672 return __IPV6_ADDR_SCOPE_SITELOCAL
;
674 return __IPV6_ADDR_SCOPE_GLOBAL
; /* just in case */
679 if (addr
->s6_addr
[0] == 0xff) {
680 scope
= addr
->s6_addr
[1] & 0x0f;
683 * due to other scope such as reserved,
684 * return scope doesn't work.
687 case __IPV6_ADDR_SCOPE_NODELOCAL
:
688 return __IPV6_ADDR_SCOPE_NODELOCAL
;
689 case __IPV6_ADDR_SCOPE_LINKLOCAL
:
690 return __IPV6_ADDR_SCOPE_LINKLOCAL
;
691 case __IPV6_ADDR_SCOPE_SITELOCAL
:
692 return __IPV6_ADDR_SCOPE_SITELOCAL
;
694 return __IPV6_ADDR_SCOPE_GLOBAL
;
699 * Regard loopback and unspecified addresses as global, since
700 * they have no ambiguity.
702 static const struct in6_addr in6addr_lo
= IN6ADDR_LOOPBACK_INIT
;
703 if (memcmp(&in6addr_lo
, addr
, sizeof(*addr
) - 1) == 0) {
704 if (addr
->s6_addr
[15] == 1) { /* loopback */
705 return __IPV6_ADDR_SCOPE_LINKLOCAL
;
707 if (addr
->s6_addr
[15] == 0) { /* unspecified */
708 return __IPV6_ADDR_SCOPE_GLOBAL
; /* XXX: correct? */
712 return __IPV6_ADDR_SCOPE_GLOBAL
;
716 rfc6724_scope_ip(const struct in_addr
*addr
)
718 uint32_t hostbyteaddr
= ntohl(addr
->s_addr
);
719 if (IN_LOOPBACK(hostbyteaddr
) || IN_LINKLOCAL(hostbyteaddr
)) {
720 return __IPV6_ADDR_SCOPE_LINKLOCAL
;
722 return __IPV6_ADDR_SCOPE_GLOBAL
;
726 rfc6724_scope_sa(const struct sockaddr
*sa
)
728 if (sa
->sa_family
== AF_INET6
) {
729 const struct sockaddr_in6
*sin6
= (const struct sockaddr_in6
*)sa
;
730 return rfc6724_scope_ip6(&sin6
->sin6_addr
);
731 } else if (sa
->sa_family
== AF_INET
) {
732 const struct sockaddr_in
*sin
= (const struct sockaddr_in
*)sa
;
733 return rfc6724_scope_ip(&sin
->sin_addr
);
739 rfc6724_scope(RFC6724Address
*addr
)
741 return rfc6724_scope_sa(&addr
->addr
.sa
);
744 // RFC 6724 Section 2.1
745 // https://tools.ietf.org/html/rfc6724#section-2.1
747 // Prefix Precedence Label
750 // ::ffff:0:0/96 35 4
759 rfc6724_precedence(const struct sockaddr
*sa
)
765 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(sa
, 0);
766 if (sa
->sa_family
== AF_INET6
) {
767 const struct sockaddr_in6
*sin6
= (const struct sockaddr_in6
*)sa
;
770 if (IN6_IS_ADDR_LOOPBACK(&sin6
->sin6_addr
)) {
775 if (IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
780 if (IN6_IS_ADDR_6TO4(&sin6
->sin6_addr
)) {
785 if (IN6_IS_ADDR_TEREDO(&sin6
->sin6_addr
)) {
790 if (IN6_IS_ADDR_UNIQUE_LOCAL(&sin6
->sin6_addr
)) {
795 if (IN6_IS_ADDR_V4COMPAT(&sin6
->sin6_addr
)) {
800 if (IN6_IS_ADDR_SITELOCAL(&sin6
->sin6_addr
)) {
805 if (IN6_IS_ADDR_6BONE(&sin6
->sin6_addr
)) {
811 } else if (sa
->sa_family
== AF_INET
) {
813 // ::ffff:0:0/96 (IPv4 is treated as a v4-mapped v6 address)
820 rfc6724_native(const RFC6724Address
*addr
)
822 return !(addr
->addr
.sa
.sa_family
== AF_INET6
&&
823 (IN6_IS_ADDR_6TO4(&addr
->addr
.sin6
.sin6_addr
) ||
824 IN6_IS_ADDR_TEREDO(&addr
->addr
.sin6
.sin6_addr
)));
828 common_prefix_length(const union sockaddr_in_4_6
*addr1
, const union sockaddr_in_4_6
*addr2
)
831 if (addr1
->sa
.sa_family
== AF_INET6
&& addr2
->sa
.sa_family
== AF_INET6
)
833 const unsigned char *s
= (const unsigned char *)&addr1
->sin6
.sin6_addr
;
834 const unsigned char *d
= (const unsigned char *)&addr2
->sin6
.sin6_addr
;
835 const unsigned char *lim
= s
+ 8;
839 if ((r
= (*d
++ ^ *s
++)) != 0) {
854 si_destination_compare_rfc6724(Destination
*d1
, Destination
*d2
, int statResult
)
856 // Rule 1: Avoid unusable destinations (no source means unusable dest)
857 if (d1
->d_src
.addr
.sa
.sa_family
== AF_UNSPEC
|| d2
->d_src
.addr
.sa
.sa_family
== AF_UNSPEC
)
859 if (d1
->d_src
.addr
.sa
.sa_family
!= AF_UNSPEC
)
861 os_log_debug(si_destination_log
, "Rule 1, prefer d1, d2 is not routable");
862 return kPrefer_Dest1
;
864 else if (d2
->d_src
.addr
.sa
.sa_family
!= AF_UNSPEC
)
866 os_log_debug(si_destination_log
, "Rule 1, prefer d2, d1 is not routable");
867 return kPrefer_Dest2
;
871 // Rule 2: Prefer matching scope
872 if (rfc6724_scope(&d1
->d_dst
) != rfc6724_scope(&d1
->d_src
) || rfc6724_scope(&d2
->d_dst
) != rfc6724_scope(&d2
->d_src
))
874 if (rfc6724_scope(&d1
->d_dst
) == rfc6724_scope(&d1
->d_src
))
876 os_log_debug(si_destination_log
, "Rule 2, prefer d1, d2 dst scope does not match src scope");
877 return kPrefer_Dest1
;
879 if (rfc6724_scope(&d2
->d_dst
) == rfc6724_scope(&d2
->d_src
))
881 os_log_debug(si_destination_log
, "Rule 2, prefer d2, d1 dst scope does not match src scope");
882 return kPrefer_Dest2
;
886 // Rule 3: Avoid deprecated addresses
887 if ((d1
->d_src
.flags
& NETSRC_IP6_FLAG_DEPRECATED
) != (d2
->d_src
.flags
& NETSRC_IP6_FLAG_DEPRECATED
))
889 if ((d1
->d_src
.flags
& NETSRC_IP6_FLAG_DEPRECATED
) != 0)
891 os_log_debug(si_destination_log
, "Rule 3, prefer d2, d1 source is deprecated");
892 return kPrefer_Dest1
;
894 if ((d2
->d_src
.flags
& NETSRC_IP6_FLAG_DEPRECATED
) != 0)
896 os_log_debug(si_destination_log
, "Rule 3, prefer d1, d2 source is deprecated");
897 return kPrefer_Dest2
;
901 // Rule 3bis: Avoid optimistic addresses, c.f. RFC 4429 which defines them as conceptually similar to deprecated
902 // Note that this rule is not part of RFC 6724
903 if ((d1
->d_src
.flags
& NETSRC_IP6_FLAG_OPTIMISTIC
) != (d2
->d_src
.flags
& NETSRC_IP6_FLAG_OPTIMISTIC
))
905 if ((d1
->d_src
.flags
& NETSRC_IP6_FLAG_OPTIMISTIC
) != 0)
907 os_log_debug(si_destination_log
, "Rule 3a, prefer d2, d1 source is optimistic");
908 return kPrefer_Dest1
;
910 if ((d2
->d_src
.flags
& NETSRC_IP6_FLAG_OPTIMISTIC
) != 0)
912 os_log_debug(si_destination_log
, "Rule 3a, prefer d1, d2 source is optimistic");
913 return kPrefer_Dest2
;
917 // Rule 4: Prefer home addresses
918 // TODO: requires Mobile IPv6 support
920 // Rule 5: Prefer matching label
921 if (d1
->d_dst
.label
!= d1
->d_src
.label
|| d2
->d_dst
.label
!= d2
->d_src
.label
)
923 if (d1
->d_dst
.label
== d1
->d_src
.label
)
925 os_log_debug(si_destination_log
, "Rule 5, prefer d1, d2 dst label does not match src label");
926 return kPrefer_Dest1
;
928 if (d2
->d_dst
.label
== d2
->d_src
.label
)
930 os_log_debug(si_destination_log
, "Rule 5, prefer d2, d1 dst label does not match src label");
931 return kPrefer_Dest2
;
935 // Rule 6: Prefer higher precedence
936 if (d1
->d_dst
.precedence
> d2
->d_dst
.precedence
)
938 os_log_debug(si_destination_log
, "Rule 6, prefer d1, d1 precedence %d > d2 precedence %d",
939 d1
->d_dst
.precedence
, d2
->d_dst
.precedence
);
940 return kPrefer_Dest1
;
942 else if (d2
->d_dst
.precedence
> d1
->d_dst
.precedence
)
944 os_log_debug(si_destination_log
, "Rule 6, prefer d2, d2 precedence %d > d1 precedence %d",
945 d2
->d_dst
.precedence
, d1
->d_dst
.precedence
);
946 return kPrefer_Dest2
;
949 // Rule 7: Prefer native transport
950 const bool d1_native
= rfc6724_native(&d1
->d_src
);
951 const bool d2_native
= rfc6724_native(&d2
->d_src
);
952 if (d1_native
&& !d2_native
)
954 os_log_debug(si_destination_log
, "Rule 7, prefer d1, d2 src is not native");
955 return kPrefer_Dest1
;
957 else if (d2_native
&& !d1_native
)
959 os_log_debug(si_destination_log
, "Rule 7, prefer d2, d1 src is not native");
960 return kPrefer_Dest2
;
963 // Rule 8: Prefer smaller scope
964 const int scope1
= rfc6724_scope(&d1
->d_dst
);
965 const int scope2
= rfc6724_scope(&d2
->d_dst
);
968 os_log_debug(si_destination_log
, "Rule 8, prefer d1, d1 scope %d < d2 scope %d", scope1
, scope2
);
969 return kPrefer_Dest1
;
971 else if (scope2
< scope1
)
973 os_log_debug(si_destination_log
, "Rule 8, prefer d2, d2 scope %d < d1 scope %d", scope2
, scope1
);
974 return kPrefer_Dest2
;
977 // RFC6724: Rules 9 and 10 MAY be superseded if the implementation has other means of sorting destination addresses.
978 if ((kPrefer_Dest1
== statResult
) || (kPrefer_Dest1_Slightly
== statResult
)) {
979 return kPrefer_Dest1
;
980 } else if ((kPrefer_Dest2
== statResult
) || (kPrefer_Dest2_Slightly
== statResult
)) {
981 return kPrefer_Dest2
;
984 // Rule 9: Use longest matching prefix
985 int matchlen1
= common_prefix_length(&d1
->d_dst
.addr
, &d1
->d_src
.addr
);
986 int matchlen2
= common_prefix_length(&d2
->d_dst
.addr
, &d2
->d_src
.addr
);
987 if (matchlen1
&& matchlen2
)
989 if (matchlen1
> matchlen2
)
991 os_log_debug(si_destination_log
, "Rule 9, prefer d1, d1 shares more common prefix");
992 return kPrefer_Dest1
;
994 else if (matchlen2
> matchlen1
)
996 os_log_debug(si_destination_log
, "Rule 9, prefer d2, d2 shares more common prefix");
997 return kPrefer_Dest2
;
1001 // Rule 10: Otherwise, leave the order unchanged
1002 return kPrefer_Equal
;
1005 #pragma mark -- Internal Helper --
1008 si_destination_compare_internal(
1009 const struct sockaddr
*dst1
,
1010 uint32_t dst1ifindex
,
1011 const struct sockaddr
*dst2
,
1012 uint32_t dst2ifindex
,
1015 // If either of the destinations is not AF_INET/AF_INET6
1016 if ((dst1
->sa_family
!= AF_INET
&& dst1
->sa_family
!= AF_INET6
) ||
1017 (dst2
->sa_family
!= AF_INET
&& dst2
->sa_family
!= AF_INET6
))
1019 if (dst1
->sa_family
== AF_INET
|| dst1
->sa_family
== AF_INET6
) {
1020 return kPrefer_Dest1
;
1021 } else if (dst2
->sa_family
== AF_INET
|| dst2
->sa_family
== AF_INET6
) {
1022 return kPrefer_Dest2
;
1024 return kPrefer_Equal
;
1031 // Lookup d1 and d2 in the cache
1032 int lookupResultD1
= si_destination_lookup(dst1
, dst1ifindex
, &d1
);
1033 int lookupResultD2
= si_destination_lookup(dst2
, dst2ifindex
, &d2
);
1034 if (lookupResultD1
== kLookupFailure
)
1036 os_log_debug(si_destination_log
, "si_destination_lookup for dst1 failed");
1037 return kPrefer_Equal
;
1039 if (lookupResultD2
== kLookupFailure
)
1041 os_log_debug(si_destination_log
, "si_destination_lookup for dst2 failed");
1042 return kPrefer_Equal
;
1045 int statResult
= kPrefer_Equal
;
1046 if (statistics
&& !si_compare_settings
.bypass_stats
)
1048 statResult
= si_destination_compare_statistics(&d1
, &d2
);
1049 if ((kPrefer_Dest1
== statResult
) || (kPrefer_Dest2
== statResult
))
1055 statResult
= si_destination_compare_rfc6724(&d1
, &d2
, statResult
);
1057 if (statResult
== kPrefer_Equal
) {
1058 // Only if all other comparisons are equal, prefer entries that were already in the cache over
1059 // ones that are new and we just created.
1062 if (lookupResultD1
== kLookupSuccess_Found
&& lookupResultD2
== kLookupSuccess_Created
) {
1063 os_log_debug(si_destination_log
, "prefer d1, known while d2 not known");
1064 statResult
= kPrefer_Dest1
;
1065 } else if (lookupResultD2
== kLookupSuccess_Found
&& lookupResultD1
== kLookupSuccess_Created
) {
1066 os_log_debug(si_destination_log
, "prefer d2, known while d1 not known");
1067 statResult
= kPrefer_Dest2
;
1074 #pragma mark -- SPI --
1077 si_destination_compare(
1078 const struct sockaddr
*dst1
,
1080 const struct sockaddr
*dst2
,
1084 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst1
, kPrefer_Equal
);
1085 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst2
, kPrefer_Equal
);
1087 si_destination_compare_init();
1089 const int result
= si_destination_compare_internal(dst1
, dst1ifindex
, dst2
, dst2ifindex
, statistics
);
1091 os_log_debug(si_destination_log
, "%{network:sockaddr}.*P@%u %c %{network:sockaddr}.*P@%u",
1092 dst1
->sa_len
, dst1
, dst1ifindex
, result
== 0 ?
'=' : result
< 0 ?
'<' : '>',
1093 dst2
->sa_len
, dst2
, dst2ifindex
);
1099 si_destination_compare_no_dependencies(const struct sockaddr
*dst1
,
1100 const struct sockaddr
*dst2
)
1102 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst1
, kPrefer_Equal
);
1103 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst2
, kPrefer_Equal
);
1105 // Skip rule 1 (requires route to destination address)
1106 // Skip rule 2, 3, 5, 7, 9 (requires corresponding source address)
1107 // Skip rule 4 (not supported by si_destination_compare() today)
1109 // Rule 6: Prefer higher precedence
1110 const int precedence1
= rfc6724_precedence(dst1
);
1111 const int precedence2
= rfc6724_precedence(dst2
);
1112 if (precedence1
> precedence2
)
1114 os_log_debug(si_destination_log
, "ND Rule 6, prefer d1, d1 precedence %d > d2 precedence %d",
1115 precedence1
, precedence2
);
1116 return kPrefer_Dest1
;
1118 else if (precedence2
> precedence1
)
1120 os_log_debug(si_destination_log
, "ND Rule 6, prefer d2, d2 precedence %d > d1 precedence %d",
1121 precedence2
, precedence1
);
1122 return kPrefer_Dest2
;
1125 // Rule 8: Prefer smaller scope
1126 const int scope1
= rfc6724_scope_sa(dst1
);
1127 const int scope2
= rfc6724_scope_sa(dst2
);
1128 if (scope1
< scope2
)
1130 os_log_debug(si_destination_log
, "ND Rule 8, prefer d1, d1 scope %d < d2 scope %d",
1132 return kPrefer_Dest1
;
1134 else if (scope2
< scope1
)
1136 os_log_debug(si_destination_log
, "ND Rule 8, prefer d2, d2 scope %d < d1 scope %d",
1138 return kPrefer_Dest2
;
1141 // Rule 10: Otherwise, leave the order unchanged
1143 return kPrefer_Equal
;