5 // Copyright (c) 2011-2017 Apple Inc. All rights reserved.
9 #include "si_compare.h"
11 #include <dispatch/dispatch.h>
12 #include <sys/ioctl.h>
13 #include <sys/kern_control.h>
14 #include <sys/socketvar.h>
15 #include <sys/sys_domain.h>
16 #include <netinet/in.h>
17 #include <net/netsrc.h>
25 #include <mach/mach_time.h>
26 #include <net/ntstat.h>
33 kPrefer_Dest1_Slightly
= 2,
34 kPrefer_Dest2_Slightly
= -2,
40 kLookupSuccess_Found
= 1,
41 kLookupSuccess_Created
= 2,
44 typedef struct RFC6724Address
46 union sockaddr_in_4_6 addr
;
52 typedef struct Destination
57 uint32_t d_resolved_ifindex
;
59 uint32_t d_connection_attempts
;
60 uint32_t d_connection_successes
;
61 unsigned d_routable
: 1;
62 unsigned d_direct
: 1;
66 typedef struct DestCacheEntry
69 struct DestCacheEntry
*dce_next
;
70 Destination dce_entry
;
73 typedef struct DestCompareSettings
{
75 uint32_t rtt_leeway_small
;
76 int64_t rtt_failure_threshold
;
78 } DestCompareSettings
;
80 static DestCacheEntry
*cache
= NULL
;
81 static os_unfair_lock cache_lock
= OS_UNFAIR_LOCK_INIT
;
82 static uint64_t cache_timeout
= 0ULL;
83 static os_log_t si_destination_log
= OS_LOG_DEFAULT
;
84 static DestCompareSettings si_compare_settings
= {};
86 static const uint32_t kLoopbackIndex
= 1;
89 #define TCP_RTT_SCALE 32 // see netinet/tcp_var.h
90 #endif // defined(TCP_RTT_SCALE)
92 #define SI_DESTINATION_COMPARE_UNLIKELY_BOOL(b) (__builtin_expect(!!((long)(b)), 0L))
93 #define SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(arg, toReturn) \
95 if (SI_DESTINATION_COMPARE_UNLIKELY_BOOL(!arg)) { \
96 os_log_fault(si_destination_log, "%{public}s " #arg " is NULL", __FUNCTION__); \
102 static const uint32_t kFailureAttemptsTolerated
= 10;
103 static const uint32_t kDefaultRTTLeeway
= 100 * TCP_RTT_SCALE
;
104 static const uint32_t kDefaultRTTLeewaySmall
= 10 * TCP_RTT_SCALE
;
107 si_destination_compare_child_has_forked(void)
109 cache_lock
= OS_UNFAIR_LOCK_INIT
;
113 si_destination_compare_init_once(void)
115 mach_timebase_info_data_t time_base
;
116 mach_timebase_info(&time_base
);
118 cache_timeout
= NSEC_PER_SEC
;
119 cache_timeout
*= time_base
.denom
;
120 cache_timeout
/= time_base
.numer
;
122 si_destination_log
= os_log_create("com.apple.network.libinfo", "si_destination_compare");
124 si_compare_settings
= (DestCompareSettings
){
125 .rtt_leeway
= kDefaultRTTLeeway
,
126 .rtt_leeway_small
= kDefaultRTTLeewaySmall
,
127 .rtt_failure_threshold
= kFailureAttemptsTolerated
,
128 .bypass_stats
= false
131 (void)pthread_atfork(NULL
, NULL
, si_destination_compare_child_has_forked
);
135 si_destination_compare_init(void)
137 static pthread_once_t cache_init
= PTHREAD_ONCE_INIT
;
138 pthread_once(&cache_init
, si_destination_compare_init_once
);
141 #pragma mark -- Netsrc --
144 si_destination_create_control_socket(const char *control_name
)
147 const int fd
= socket(PF_SYSTEM
, SOCK_DGRAM
, SYSPROTO_CONTROL
);
150 os_log_error(si_destination_log
, "socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL) failed: %m");
154 // Disable SIGPIPE <rdar://problem/9049030>
156 if (setsockopt(fd
, SOL_SOCKET
, SO_NOSIGPIPE
, &on
, sizeof(on
)) != 0)
158 os_log_error(si_destination_log
, "setsockopt(%d, SOL_SOCKET, SO_NOSIGPIPE, ...) failed: %m", fd
);
163 // Get the control ID for statistics
164 struct ctl_info ctl
= {
168 strlcpy(ctl
.ctl_name
, control_name
, sizeof(ctl
.ctl_name
));
169 if (ioctl(fd
, CTLIOCGINFO
, &ctl
) == -1)
171 os_log_error(si_destination_log
, "ioctl(%d, CTLIOCGINFO, ...) failed: %m", fd
);
176 // Connect to the statistics control
177 struct sockaddr_ctl sc
= {
178 .sc_len
= sizeof(sc
),
179 .sc_family
= AF_SYSTEM
,
180 .ss_sysaddr
= SYSPROTO_CONTROL
,
183 .sc_reserved
= { 0, 0, 0, 0, 0 }
185 if (connect(fd
, (struct sockaddr
*)&sc
, sc
.sc_len
) != 0)
187 os_log_error(si_destination_log
, "connect(%d, ...) ctl_id=%u, failed: %m", fd
, ctl
.ctl_id
);
192 // increase the receive socket buffer size
193 int rcvbuf
= 128 * 1024;
194 if (0 != setsockopt(fd
, SOL_SOCKET
, SO_RCVBUF
, &rcvbuf
, sizeof(rcvbuf
))) {
195 os_log_error(si_destination_log
, "setsockopt(%d, SOL_SOCKET, SO_RCVBUF, %d) failed: %m",
199 // Set socket to non-blocking operation
200 const int flags
= fcntl(fd
, F_GETFL
, 0);
202 if (-1 == fcntl(fd
, F_SETFL
, flags
| O_NONBLOCK
)) {
203 os_log_error(si_destination_log
, "fcntl(%d, F_SETFL, %d) failed: %m", fd
, flags
| O_NONBLOCK
);
206 os_log_error(si_destination_log
, "fcntl(%d, F_GETFL, 0) failed: %m", fd
);
213 si_destination_fill_netsrc(Destination
*d
)
215 static int netsrc_sockfd
= -1;
216 static int version
= NETSRC_CURVERS
;
218 if (netsrc_sockfd
< 0) {
219 netsrc_sockfd
= si_destination_create_control_socket(NETSRC_CTLNAME
);
221 if (netsrc_sockfd
< 0) {
225 // Make sure we have a socket, create one if necessary
226 struct netsrc_req request
= {
228 .nrq_ifscope
= d
->d_ifindex
,
231 if (d
->d_dst
.addr
.sa
.sa_len
> sizeof(request
._usa
)) {
232 os_log_debug(si_destination_log
, "sockaddr is too big");
235 memcpy(&request
._usa
, &d
->d_dst
, d
->d_dst
.addr
.sa
.sa_len
);
236 if (send(netsrc_sockfd
, &request
, sizeof(request
), 0) != sizeof(request
))
238 if (errno
== EINVAL
&& version
!= NETSRC_VERSION1
) {
239 // fall back to version 1
240 version
= NETSRC_VERSION1
;
241 si_destination_fill_netsrc(d
);
243 // Expected when there is no route to host
244 os_log_error(si_destination_log
, "send failed: %m");
249 if (version
== NETSRC_VERSION1
) {
250 #ifdef NETSRC_VERSION2
251 struct netsrc_repv1 v1
;
252 #else // NETSRC_VERSION2
253 struct netsrc_rep v1
;
254 #endif // NETSRC_VERSION2
255 if (recv(netsrc_sockfd
, &v1
, sizeof(v1
), 0) != sizeof(v1
)) {
257 os_log_debug(si_destination_log
, "recv failed: %m");
260 d
->d_dst
.label
= v1
.nrp_dstlabel
;
261 d
->d_dst
.precedence
= v1
.nrp_dstprecedence
;
262 if (v1
.nrq_sin
.sin_len
<= sizeof(d
->d_src
.addr
)) {
263 memcpy( &d
->d_src
.addr
, &v1
._usa
, v1
.nrq_sin
.sin_len
);
265 d
->d_src
.label
= v1
.nrp_label
;
266 d
->d_src
.precedence
= v1
.nrp_precedence
;
267 d
->d_src
.flags
= v1
.nrp_flags
;
270 #ifdef NETSRC_VERSION2
271 else if (version
== NETSRC_VERSION2
) {
272 struct netsrc_repv2 v2
;
273 if (recv(netsrc_sockfd
, &v2
, sizeof(v2
), 0) != sizeof(v2
)) {
275 os_log_debug(si_destination_log
, "recv failed: %m");
278 d
->d_dst
.label
= v2
.nrp_dstlabel
;
279 d
->d_dst
.precedence
= v2
.nrp_dstprecedence
;
280 d
->d_src
.addr
= v2
.nrp_src
;
281 d
->d_src
.label
= v2
.nrp_label
;
282 d
->d_src
.precedence
= v2
.nrp_precedence
;
283 d
->d_src
.flags
= v2
.nrp_flags
;
284 d
->d_resolved_ifindex
= v2
.nrp_ifindex
;
285 d
->d_min_rtt
= v2
.nrp_min_rtt
;
286 d
->d_connection_attempts
= v2
.nrp_connection_attempts
;
287 d
->d_connection_successes
= v2
.nrp_connection_successes
;
288 d
->d_routable
= (v2
.nrp_flags
& NETSRC_FLAG_ROUTEABLE
) ?
1 : 0;
289 d
->d_direct
= (v2
.nrp_flags
& NETSRC_FLAG_DIRECT
) ?
1 : 0;
290 d
->d_awdl
= (v2
.nrp_flags
& NETSRC_FLAG_AWDL
) ?
1 : 0;
293 #endif // NETSRC_VERSION2
298 #pragma mark -- Statistics --
301 si_destination_compare_statistics(
305 int slightPreference
= kPrefer_Equal
;
306 // If we have min round trip times for both, use that
307 if (dst1
->d_min_rtt
&& dst2
->d_min_rtt
)
309 #define RTT_MSEC_DOUBLE_FROM_DST( dst ) (((double)(dst->d_min_rtt)) / ((double)(TCP_RTT_SCALE)))
310 if (dst1
->d_min_rtt
< dst2
->d_min_rtt
)
312 if (dst1
->d_min_rtt
+ si_compare_settings
.rtt_leeway
< dst2
->d_min_rtt
)
314 os_log_debug(si_destination_log
,
315 "prefering 1, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
316 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
317 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
318 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
319 return kPrefer_Dest1
;
321 else if (dst1
->d_min_rtt
+ si_compare_settings
.rtt_leeway_small
< dst2
->d_min_rtt
)
323 slightPreference
= kPrefer_Dest1_Slightly
;
324 os_log_debug(si_destination_log
,
325 "prefering 1 slightly, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
326 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
327 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
328 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
332 os_log_debug(si_destination_log
,
333 "not prefering 1, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
334 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
335 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
336 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
339 else if (dst1
->d_min_rtt
> dst2
->d_min_rtt
)
341 if (dst1
->d_min_rtt
> si_compare_settings
.rtt_leeway
+ dst2
->d_min_rtt
)
343 os_log_debug(si_destination_log
,
344 "prefering 2, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
345 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
346 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
347 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
348 return kPrefer_Dest2
;
350 else if (dst1
->d_min_rtt
> si_compare_settings
.rtt_leeway_small
+ dst2
->d_min_rtt
)
352 slightPreference
= kPrefer_Dest2_Slightly
;
353 os_log_debug(si_destination_log
,
354 "prefering 2 slightly, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
355 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
356 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
357 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
361 os_log_debug(si_destination_log
,
362 "not prefering 2, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
363 RTT_MSEC_DOUBLE_FROM_DST(dst1
), RTT_MSEC_DOUBLE_FROM_DST(dst2
),
364 si_compare_settings
.rtt_leeway
/ TCP_RTT_SCALE
,
365 si_compare_settings
.rtt_leeway_small
/ TCP_RTT_SCALE
);
370 os_log_debug(si_destination_log
, "no preference, equal min rtt %f", RTT_MSEC_DOUBLE_FROM_DST(dst1
));
372 // we hold on to slightPreference and only use it if no other part of the evaluation had a stronger preference
373 #undef RTT_MSEC_DOUBLE_FROM_DST
375 else if (dst1
->d_min_rtt
|| dst2
->d_min_rtt
)
377 // If only dest1 is missing round trip make sure it doesn't have zero successful attempts
378 if (dst1
->d_min_rtt
== 0 &&
379 dst1
->d_connection_successes
== 0 &&
380 dst1
->d_connection_attempts
> 0)
382 if (dst1
->d_connection_attempts
> si_compare_settings
.rtt_failure_threshold
)
384 os_log_debug(si_destination_log
, "prefer 2, dst 1 has no successful connections and %u attempts",
385 dst1
->d_connection_attempts
);
386 return kPrefer_Dest2
;
389 // If only dest2 is missing round trip make sure it doesn't have zero successful attempts
390 else if (dst2
->d_min_rtt
== 0 &&
391 dst2
->d_connection_successes
== 0 &&
392 dst2
->d_connection_attempts
> 0)
394 if (dst2
->d_connection_attempts
> si_compare_settings
.rtt_failure_threshold
)
396 os_log_debug(si_destination_log
, "prefer 1, dst 2 has no successful connections and %u attempts",
397 dst2
->d_connection_attempts
);
398 return kPrefer_Dest1
;
403 // If we have routes for both...
404 if (dst1
->d_routable
!= 0 && dst2
->d_routable
!= 0)
406 // <rdar://problem/9070784> Prefer interface that are not peer to peer
407 if (dst1
->d_awdl
!= dst2
->d_awdl
)
411 os_log_debug(si_destination_log
, "prefer dst 1, dst 2 is p2p, dst 1 is not");
412 return kPrefer_Dest1
;
414 else if (dst1
->d_awdl
)
416 os_log_debug(si_destination_log
, "prefer dst 2, dst 1 is p2p, dst 2 is not");
417 return kPrefer_Dest2
;
421 // Prefer loopback routes
422 bool dst1loopback
= dst1
->d_resolved_ifindex
== kLoopbackIndex
||
423 (dst1
->d_dst
.addr
.sa
.sa_family
== AF_INET6
&&
424 IN6_IS_ADDR_LINKLOCAL(&dst1
->d_dst
.addr
.sin6
.sin6_addr
) &&
425 dst1
->d_dst
.addr
.sin6
.sin6_scope_id
== kLoopbackIndex
);
426 bool dst2loopback
= dst2
->d_resolved_ifindex
== kLoopbackIndex
||
427 (dst2
->d_dst
.addr
.sa
.sa_family
== AF_INET6
&&
428 IN6_IS_ADDR_LINKLOCAL(&dst2
->d_dst
.addr
.sin6
.sin6_addr
) &&
429 dst2
->d_dst
.addr
.sin6
.sin6_scope_id
== kLoopbackIndex
);
431 if (dst1loopback
&& !dst2loopback
)
433 os_log_debug(si_destination_log
, "prefer 1, dst 1 is loopback, dst 2 is not");
434 return kPrefer_Dest1
;
436 else if (dst2loopback
&& !dst1loopback
)
438 os_log_debug(si_destination_log
, "prefer 2, dst 2 is loopback, dst 1 is not");
439 return kPrefer_Dest2
;
442 // Prefer direct routes
443 if (dst1
->d_direct
&& !dst2
->d_direct
)
445 os_log_debug(si_destination_log
, "prefer 1, dst 1 is local, dst 2 is not");
446 return kPrefer_Dest1
;
448 else if (dst2
->d_direct
&& !dst1
->d_direct
)
450 os_log_debug(si_destination_log
, "prefer 2, dst 2 is local, dst 1 is not");
451 return kPrefer_Dest2
;
454 else if (dst1
->d_routable
!= 0)
456 // prefer destination we have a route to
457 os_log_debug(si_destination_log
, "prefer 1, dst 2 has no route");
458 return kPrefer_Dest1
;
460 else if (dst2
->d_routable
!= 0)
462 // prefer destination we have a route to
463 os_log_debug(si_destination_log
, "prefer 2, dst 1 has no route");
464 return kPrefer_Dest2
;
466 return slightPreference
;
469 #pragma mark -- Cache --
473 const struct sockaddr
*sa1
,
474 const struct sockaddr
*sa2
)
476 if (sa1
->sa_family
!= sa2
->sa_family
)
479 /* We don't use bcmp because we don't care about the port number */
480 if (sa1
->sa_family
== AF_INET
)
482 const struct sockaddr_in
*sin1
= (const struct sockaddr_in
*)sa1
;
483 const struct sockaddr_in
*sin2
= (const struct sockaddr_in
*)sa2
;
485 if (sin1
->sin_addr
.s_addr
!= sin2
->sin_addr
.s_addr
)
490 if (sa1
->sa_family
== AF_INET6
)
492 const struct sockaddr_in6
*sin61
= (const struct sockaddr_in6
*)sa1
;
493 const struct sockaddr_in6
*sin62
= (const struct sockaddr_in6
*)sa2
;
495 if (memcmp(&sin61
->sin6_addr
, &sin62
->sin6_addr
, sizeof(sin61
->sin6_addr
)) != 0) {
499 // TBD: Is the flow info relevant?
501 if (sin61
->sin6_scope_id
!= sin62
->sin6_scope_id
) {
508 if (sa1
->sa_len
!= sa2
->sa_len
) {
512 return (memcmp(sa1
, sa2
, sa1
->sa_len
) == 0);
516 si_destination_cache_find(
517 const struct sockaddr
*dst_sa
,
519 Destination
*out_dst
)
521 // Loop through the entries looking for:
524 DestCacheEntry
**pprev
= &cache
;
525 uint64_t now
= mach_absolute_time();
528 while (*pprev
!= NULL
)
530 // If the item has expired, pull it out of the list
531 if ((now
- (*pprev
)->dce_time
) >= cache_timeout
)
533 DestCacheEntry
*expired
= *pprev
;
534 *pprev
= expired
->dce_next
;
535 memset(expired
, 0, sizeof(*expired
));
540 // If the item matches, copy the entry
541 if (!result
&& (*pprev
)->dce_entry
.d_ifindex
== ifscope
&&
542 sa_equal(dst_sa
, &(*pprev
)->dce_entry
.d_dst
.addr
.sa
))
544 *out_dst
= (*pprev
)->dce_entry
;
548 pprev
= &(*pprev
)->dce_next
;
556 si_destination_cache_create(
557 const struct sockaddr
*dst_sa
,
559 Destination
*out_dst
)
561 DestCacheEntry
*cache_entry
;
564 if (dst_sa
->sa_len
> sizeof(cache_entry
->dce_entry
.d_dst
.addr
)) {
565 os_log_error(si_destination_log
, "(dst_sa->sa_len %u > sizeof(cache_entry->dce_entry.d_dst.addr))",
571 cache_entry
= calloc(1, sizeof(*cache_entry
));
572 if (NULL
== cache_entry
) {
573 os_log_error(si_destination_log
, "calloc(%zu) failed: %m", sizeof(*cache_entry
));
576 d
= &cache_entry
->dce_entry
;
578 // Copy the destination sockaddr
579 memcpy(&d
->d_dst
.addr
, dst_sa
, dst_sa
->sa_len
);
580 d
->d_ifindex
= ifscope
;
582 // Query the kernel for the matching source, precedence and label
583 si_destination_fill_netsrc(d
);
586 cache_entry
->dce_time
= mach_absolute_time();
587 cache_entry
->dce_next
= cache
;
594 si_destination_lookup(
595 const struct sockaddr
*dst_sa
,
597 Destination
*out_dst
)
599 int lookupResult
= kLookupFailure
;
601 si_destination_compare_init();
602 os_unfair_lock_lock(&cache_lock
);
603 if (si_destination_cache_find(dst_sa
, ifscope
, out_dst
)) {
604 lookupResult
= kLookupSuccess_Found
;
605 } else if (si_destination_cache_create(dst_sa
, ifscope
, out_dst
)) {
606 lookupResult
= kLookupSuccess_Created
;
608 os_unfair_lock_unlock(&cache_lock
);
613 #pragma mark -- RFC 6724 --
615 // https://tools.ietf.org/html/rfc6724
617 #ifndef IN6_IS_ADDR_TEREDO
618 #define IN6_IS_ADDR_TEREDO(x) ((((x)->__u6_addr.__u6_addr16[0]) == htons(0x2001)) && \
619 (((x)->__u6_addr.__u6_addr16[1]) == 0x0000))
620 #endif // IN6_IS_ADDR_TEREDO
623 #undef IN6_IS_ADDR_6TO4
624 #define IN6_IS_ADDR_6TO4(x) (((x)->__u6_addr.__u6_addr16[0]) == htons(0x2002))
627 #ifndef IN6_IS_ADDR_6BONE
628 # define IN6_IS_ADDR_6BONE(x) (((x)->__u6_addr.__u6_addr16[0]) == htons(0x3ffe))
629 #endif // IN6_IS_ADDR_6BONE
632 rfc6724_scope_ip6(const struct in6_addr
*addr
)
636 if (addr
->s6_addr
[0] == 0xfe) {
637 scope
= addr
->s6_addr
[1] & 0xc0;
641 return __IPV6_ADDR_SCOPE_LINKLOCAL
;
643 return __IPV6_ADDR_SCOPE_SITELOCAL
;
645 return __IPV6_ADDR_SCOPE_GLOBAL
; /* just in case */
650 if (addr
->s6_addr
[0] == 0xff) {
651 scope
= addr
->s6_addr
[1] & 0x0f;
654 * due to other scope such as reserved,
655 * return scope doesn't work.
658 case __IPV6_ADDR_SCOPE_NODELOCAL
:
659 return __IPV6_ADDR_SCOPE_NODELOCAL
;
660 case __IPV6_ADDR_SCOPE_LINKLOCAL
:
661 return __IPV6_ADDR_SCOPE_LINKLOCAL
;
662 case __IPV6_ADDR_SCOPE_SITELOCAL
:
663 return __IPV6_ADDR_SCOPE_SITELOCAL
;
665 return __IPV6_ADDR_SCOPE_GLOBAL
;
670 * Regard loopback and unspecified addresses as global, since
671 * they have no ambiguity.
673 static const struct in6_addr in6addr_lo
= IN6ADDR_LOOPBACK_INIT
;
674 if (memcmp(&in6addr_lo
, addr
, sizeof(*addr
) - 1) == 0) {
675 if (addr
->s6_addr
[15] == 1) { /* loopback */
676 return __IPV6_ADDR_SCOPE_LINKLOCAL
;
678 if (addr
->s6_addr
[15] == 0) { /* unspecified */
679 return __IPV6_ADDR_SCOPE_GLOBAL
; /* XXX: correct? */
683 return __IPV6_ADDR_SCOPE_GLOBAL
;
687 rfc6724_scope_ip(const struct in_addr
*addr
)
689 uint32_t hostbyteaddr
= ntohl(addr
->s_addr
);
690 if (IN_LOOPBACK(hostbyteaddr
) || IN_LINKLOCAL(hostbyteaddr
)) {
691 return __IPV6_ADDR_SCOPE_LINKLOCAL
;
693 return __IPV6_ADDR_SCOPE_GLOBAL
;
697 rfc6724_scope_sa(const struct sockaddr
*sa
)
699 if (sa
->sa_family
== AF_INET6
) {
700 const struct sockaddr_in6
*sin6
= (const struct sockaddr_in6
*)sa
;
701 return rfc6724_scope_ip6(&sin6
->sin6_addr
);
702 } else if (sa
->sa_family
== AF_INET
) {
703 const struct sockaddr_in
*sin
= (const struct sockaddr_in
*)sa
;
704 return rfc6724_scope_ip(&sin
->sin_addr
);
710 rfc6724_scope(RFC6724Address
*addr
)
712 return rfc6724_scope_sa(&addr
->addr
.sa
);
715 // RFC 6724 Section 2.1
716 // https://tools.ietf.org/html/rfc6724#section-2.1
718 // Prefix Precedence Label
721 // ::ffff:0:0/96 35 4
730 rfc6724_precedence(const struct sockaddr
*sa
)
736 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(sa
, 0);
737 if (sa
->sa_family
== AF_INET6
) {
738 const struct sockaddr_in6
*sin6
= (const struct sockaddr_in6
*)sa
;
741 if (IN6_IS_ADDR_LOOPBACK(&sin6
->sin6_addr
)) {
746 if (IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
751 if (IN6_IS_ADDR_6TO4(&sin6
->sin6_addr
)) {
756 if (IN6_IS_ADDR_TEREDO(&sin6
->sin6_addr
)) {
761 if (IN6_IS_ADDR_UNIQUE_LOCAL(&sin6
->sin6_addr
)) {
766 if (IN6_IS_ADDR_V4COMPAT(&sin6
->sin6_addr
)) {
771 if (IN6_IS_ADDR_SITELOCAL(&sin6
->sin6_addr
)) {
776 if (IN6_IS_ADDR_6BONE(&sin6
->sin6_addr
)) {
782 } else if (sa
->sa_family
== AF_INET
) {
784 // ::ffff:0:0/96 (IPv4 is treated as a v4-mapped v6 address)
791 rfc6724_native(const RFC6724Address
*addr
)
793 return !(addr
->addr
.sa
.sa_family
== AF_INET6
&&
794 (IN6_IS_ADDR_6TO4(&addr
->addr
.sin6
.sin6_addr
) ||
795 IN6_IS_ADDR_TEREDO(&addr
->addr
.sin6
.sin6_addr
)));
799 common_prefix_length(const union sockaddr_in_4_6
*addr1
, const union sockaddr_in_4_6
*addr2
)
802 if (addr1
->sa
.sa_family
== AF_INET6
&& addr2
->sa
.sa_family
== AF_INET6
)
804 const unsigned char *s
= (const unsigned char *)&addr1
->sin6
.sin6_addr
;
805 const unsigned char *d
= (const unsigned char *)&addr2
->sin6
.sin6_addr
;
806 const unsigned char *lim
= s
+ 8;
810 if ((r
= (*d
++ ^ *s
++)) != 0) {
825 si_destination_compare_rfc6724(Destination
*d1
, Destination
*d2
, int statResult
)
827 // Rule 1: Avoid unusable destinations (no source means unusable dest)
828 if (d1
->d_src
.addr
.sa
.sa_family
== AF_UNSPEC
|| d2
->d_src
.addr
.sa
.sa_family
== AF_UNSPEC
)
830 if (d1
->d_src
.addr
.sa
.sa_family
!= AF_UNSPEC
)
832 os_log_debug(si_destination_log
, "Rule 1, prefer d1, d2 is not routable");
833 return kPrefer_Dest1
;
835 else if (d2
->d_src
.addr
.sa
.sa_family
!= AF_UNSPEC
)
837 os_log_debug(si_destination_log
, "Rule 1, prefer d2, d1 is not routable");
838 return kPrefer_Dest2
;
842 // Rule 2: Prefer matching scope
843 if (rfc6724_scope(&d1
->d_dst
) != rfc6724_scope(&d1
->d_src
) || rfc6724_scope(&d2
->d_dst
) != rfc6724_scope(&d2
->d_src
))
845 if (rfc6724_scope(&d1
->d_dst
) == rfc6724_scope(&d1
->d_src
))
847 os_log_debug(si_destination_log
, "Rule 2, prefer d1, d2 dst scope does not match src scope");
848 return kPrefer_Dest1
;
850 if (rfc6724_scope(&d2
->d_dst
) == rfc6724_scope(&d2
->d_src
))
852 os_log_debug(si_destination_log
, "Rule 2, prefer d2, d1 dst scope does not match src scope");
853 return kPrefer_Dest2
;
857 // Rule 3: Avoid deprecated addresses
858 if ((d1
->d_src
.flags
& NETSRC_IP6_FLAG_DEPRECATED
) != (d2
->d_src
.flags
& NETSRC_IP6_FLAG_DEPRECATED
))
860 if ((d1
->d_src
.flags
& NETSRC_IP6_FLAG_DEPRECATED
) != 0)
862 os_log_debug(si_destination_log
, "Rule 3, prefer d2, d1 source is deprecated");
863 return kPrefer_Dest1
;
865 if ((d2
->d_src
.flags
& NETSRC_IP6_FLAG_DEPRECATED
) != 0)
867 os_log_debug(si_destination_log
, "Rule 3, prefer d1, d2 source is deprecated");
868 return kPrefer_Dest2
;
872 // Rule 3bis: Avoid optimistic addresses, c.f. RFC 4429 which defines them as conceptually similar to deprecated
873 // Note that this rule is not part of RFC 6724
874 if ((d1
->d_src
.flags
& NETSRC_IP6_FLAG_OPTIMISTIC
) != (d2
->d_src
.flags
& NETSRC_IP6_FLAG_OPTIMISTIC
))
876 if ((d1
->d_src
.flags
& NETSRC_IP6_FLAG_OPTIMISTIC
) != 0)
878 os_log_debug(si_destination_log
, "Rule 3a, prefer d2, d1 source is optimistic");
879 return kPrefer_Dest1
;
881 if ((d2
->d_src
.flags
& NETSRC_IP6_FLAG_OPTIMISTIC
) != 0)
883 os_log_debug(si_destination_log
, "Rule 3a, prefer d1, d2 source is optimistic");
884 return kPrefer_Dest2
;
888 // Rule 4: Prefer home addresses
889 // TODO: requires Mobile IPv6 support
891 // Rule 5: Prefer matching label
892 if (d1
->d_dst
.label
!= d1
->d_src
.label
|| d2
->d_dst
.label
!= d2
->d_src
.label
)
894 if (d1
->d_dst
.label
== d1
->d_src
.label
)
896 os_log_debug(si_destination_log
, "Rule 5, prefer d1, d2 dst label does not match src label");
897 return kPrefer_Dest1
;
899 if (d2
->d_dst
.label
== d2
->d_src
.label
)
901 os_log_debug(si_destination_log
, "Rule 5, prefer d2, d1 dst label does not match src label");
902 return kPrefer_Dest2
;
906 // Rule 6: Prefer higher precedence
907 if (d1
->d_dst
.precedence
> d2
->d_dst
.precedence
)
909 os_log_debug(si_destination_log
, "Rule 6, prefer d1, d1 precedence %d > d2 precedence %d",
910 d1
->d_dst
.precedence
, d2
->d_dst
.precedence
);
911 return kPrefer_Dest1
;
913 else if (d2
->d_dst
.precedence
> d1
->d_dst
.precedence
)
915 os_log_debug(si_destination_log
, "Rule 6, prefer d2, d2 precedence %d > d1 precedence %d",
916 d2
->d_dst
.precedence
, d1
->d_dst
.precedence
);
917 return kPrefer_Dest2
;
920 // Rule 7: Prefer native transport
921 const bool d1_native
= rfc6724_native(&d1
->d_src
);
922 const bool d2_native
= rfc6724_native(&d2
->d_src
);
923 if (d1_native
&& !d2_native
)
925 os_log_debug(si_destination_log
, "Rule 7, prefer d1, d2 src is not native");
926 return kPrefer_Dest1
;
928 else if (d2_native
&& !d1_native
)
930 os_log_debug(si_destination_log
, "Rule 7, prefer d2, d1 src is not native");
931 return kPrefer_Dest2
;
934 // Rule 8: Prefer smaller scope
935 const int scope1
= rfc6724_scope(&d1
->d_dst
);
936 const int scope2
= rfc6724_scope(&d2
->d_dst
);
939 os_log_debug(si_destination_log
, "Rule 8, prefer d1, d1 scope %d < d2 scope %d", scope1
, scope2
);
940 return kPrefer_Dest1
;
942 else if (scope2
< scope1
)
944 os_log_debug(si_destination_log
, "Rule 8, prefer d2, d2 scope %d < d1 scope %d", scope2
, scope1
);
945 return kPrefer_Dest2
;
948 // RFC6724: Rules 9 and 10 MAY be superseded if the implementation has other means of sorting destination addresses.
949 if ((kPrefer_Dest1
== statResult
) || (kPrefer_Dest1_Slightly
== statResult
)) {
950 return kPrefer_Dest1
;
951 } else if ((kPrefer_Dest2
== statResult
) || (kPrefer_Dest2_Slightly
== statResult
)) {
952 return kPrefer_Dest2
;
955 // Rule 9: Use longest matching prefix
956 int matchlen1
= common_prefix_length(&d1
->d_dst
.addr
, &d1
->d_src
.addr
);
957 int matchlen2
= common_prefix_length(&d2
->d_dst
.addr
, &d2
->d_src
.addr
);
958 if (matchlen1
&& matchlen2
)
960 if (matchlen1
> matchlen2
)
962 os_log_debug(si_destination_log
, "Rule 9, prefer d1, d1 shares more common prefix");
963 return kPrefer_Dest1
;
965 else if (matchlen2
> matchlen1
)
967 os_log_debug(si_destination_log
, "Rule 9, prefer d2, d2 shares more common prefix");
968 return kPrefer_Dest2
;
972 // Rule 10: Otherwise, leave the order unchanged
973 return kPrefer_Equal
;
976 #pragma mark -- Internal Helper --
979 si_destination_compare_internal(
980 const struct sockaddr
*dst1
,
981 uint32_t dst1ifindex
,
982 const struct sockaddr
*dst2
,
983 uint32_t dst2ifindex
,
986 // If either of the destinations is not AF_INET/AF_INET6
987 if ((dst1
->sa_family
!= AF_INET
&& dst1
->sa_family
!= AF_INET6
) ||
988 (dst2
->sa_family
!= AF_INET
&& dst2
->sa_family
!= AF_INET6
))
990 if (dst1
->sa_family
== AF_INET
|| dst1
->sa_family
== AF_INET6
) {
991 return kPrefer_Dest1
;
992 } else if (dst2
->sa_family
== AF_INET
|| dst2
->sa_family
== AF_INET6
) {
993 return kPrefer_Dest2
;
995 return kPrefer_Equal
;
1002 // Lookup d1 and d2 in the cache
1003 int lookupResultD1
= si_destination_lookup(dst1
, dst1ifindex
, &d1
);
1004 int lookupResultD2
= si_destination_lookup(dst2
, dst2ifindex
, &d2
);
1005 if (lookupResultD1
== kLookupFailure
)
1007 os_log_debug(si_destination_log
, "si_destination_lookup for dst1 failed");
1008 return kPrefer_Equal
;
1010 if (lookupResultD2
== kLookupFailure
)
1012 os_log_debug(si_destination_log
, "si_destination_lookup for dst2 failed");
1013 return kPrefer_Equal
;
1016 int statResult
= kPrefer_Equal
;
1017 if (statistics
&& !si_compare_settings
.bypass_stats
)
1019 statResult
= si_destination_compare_statistics(&d1
, &d2
);
1020 if ((kPrefer_Dest1
== statResult
) || (kPrefer_Dest2
== statResult
))
1026 statResult
= si_destination_compare_rfc6724(&d1
, &d2
, statResult
);
1028 if (statResult
== kPrefer_Equal
) {
1029 // Only if all other comparisons are equal, prefer entries that were already in the cache over
1030 // ones that are new and we just created.
1033 if (lookupResultD1
== kLookupSuccess_Found
&& lookupResultD2
== kLookupSuccess_Created
) {
1034 os_log_debug(si_destination_log
, "prefer d1, known while d2 not known");
1035 statResult
= kPrefer_Dest1
;
1036 } else if (lookupResultD2
== kLookupSuccess_Found
&& lookupResultD1
== kLookupSuccess_Created
) {
1037 os_log_debug(si_destination_log
, "prefer d2, known while d1 not known");
1038 statResult
= kPrefer_Dest2
;
1045 #pragma mark -- SPI --
1048 si_destination_compare(
1049 const struct sockaddr
*dst1
,
1051 const struct sockaddr
*dst2
,
1055 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst1
, kPrefer_Equal
);
1056 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst2
, kPrefer_Equal
);
1058 si_destination_compare_init();
1060 const int result
= si_destination_compare_internal(dst1
, dst1ifindex
, dst2
, dst2ifindex
, statistics
);
1062 os_log_debug(si_destination_log
, "%{network:sockaddr}.*P@%u %c %{network:sockaddr}.*P@%u",
1063 dst1
->sa_len
, dst1
, dst1ifindex
, result
== 0 ?
'=' : result
< 0 ?
'<' : '>',
1064 dst2
->sa_len
, dst2
, dst2ifindex
);
1070 si_destination_compare_no_dependencies(const struct sockaddr
*dst1
,
1071 const struct sockaddr
*dst2
)
1073 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst1
, kPrefer_Equal
);
1074 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst2
, kPrefer_Equal
);
1076 // Skip rule 1 (requires route to destination address)
1077 // Skip rule 2, 3, 5, 7, 9 (requires corresponding source address)
1078 // Skip rule 4 (not supported by si_destination_compare() today)
1080 // Rule 6: Prefer higher precedence
1081 const int precedence1
= rfc6724_precedence(dst1
);
1082 const int precedence2
= rfc6724_precedence(dst2
);
1083 if (precedence1
> precedence2
)
1085 os_log_debug(si_destination_log
, "ND Rule 6, prefer d1, d1 precedence %d > d2 precedence %d",
1086 precedence1
, precedence2
);
1087 return kPrefer_Dest1
;
1089 else if (precedence2
> precedence1
)
1091 os_log_debug(si_destination_log
, "ND Rule 6, prefer d2, d2 precedence %d > d1 precedence %d",
1092 precedence2
, precedence1
);
1093 return kPrefer_Dest2
;
1096 // Rule 8: Prefer smaller scope
1097 const int scope1
= rfc6724_scope_sa(dst1
);
1098 const int scope2
= rfc6724_scope_sa(dst2
);
1099 if (scope1
< scope2
)
1101 os_log_debug(si_destination_log
, "ND Rule 8, prefer d1, d1 scope %d < d2 scope %d",
1103 return kPrefer_Dest1
;
1105 else if (scope2
< scope1
)
1107 os_log_debug(si_destination_log
, "ND Rule 8, prefer d2, d2 scope %d < d1 scope %d",
1109 return kPrefer_Dest2
;
1112 // Rule 10: Otherwise, leave the order unchanged
1114 return kPrefer_Equal
;