Libinfo-538.tar.gz
[apple/libinfo.git] / lookup.subproj / si_compare.c
1 //
2 // si_compare.c
3 // Libinfo
4 //
5 // Copyright (c) 2011-2017 Apple Inc. All rights reserved.
6 //
7 //
8
9 #include "si_compare.h"
10
11 #include <dispatch/dispatch.h>
12 #include <sys/ioctl.h>
13 #include <sys/kern_control.h>
14 #include <sys/socketvar.h>
15 #include <sys/sys_domain.h>
16 #include <netinet/in.h>
17 #include <net/netsrc.h>
18 #include <net/if.h>
19 #include <errno.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <pthread.h>
23 #include <os/lock.h>
24 #include <os/log.h>
25 #include <mach/mach_time.h>
26 #include <net/ntstat.h>
27 #include <ifaddrs.h>
28
29 enum
30 {
31 kPrefer_Dest1 = 1,
32 kPrefer_Dest2 = -1,
33 kPrefer_Equal = 0,
34 kPrefer_Dest1_Slightly = 2,
35 kPrefer_Dest2_Slightly = -2,
36 };
37
38 enum
39 {
40 kLookupFailure = 0,
41 kLookupSuccess_Found = 1,
42 kLookupSuccess_Created = 2,
43 };
44
45 typedef struct RFC6724Address
46 {
47 union sockaddr_in_4_6 addr;
48 uint32_t flags;
49 uint16_t label;
50 uint16_t precedence;
51 } RFC6724Address;
52
53 typedef struct Destination
54 {
55 RFC6724Address d_dst;
56 RFC6724Address d_src;
57 uint32_t d_ifindex;
58 uint32_t d_resolved_ifindex;
59 uint32_t d_min_rtt;
60 uint32_t d_connection_attempts;
61 uint32_t d_connection_successes;
62 unsigned d_routable : 1;
63 unsigned d_direct : 1;
64 unsigned d_awdl : 1;
65 } Destination;
66
67 typedef struct DestCacheEntry
68 {
69 uint64_t dce_time;
70 struct DestCacheEntry *dce_next;
71 Destination dce_entry;
72 } DestCacheEntry;
73
74 typedef struct DestCompareSettings {
75 uint32_t rtt_leeway;
76 uint32_t rtt_leeway_small;
77 int64_t rtt_failure_threshold;
78 bool bypass_stats;
79 } DestCompareSettings;
80
81 static DestCacheEntry *cache = NULL;
82 static os_unfair_lock cache_lock = OS_UNFAIR_LOCK_INIT;
83 static uint64_t cache_timeout = 0ULL;
84 static os_log_t si_destination_log = OS_LOG_DEFAULT;
85 static DestCompareSettings si_compare_settings = {};
86
87 #ifndef TCP_RTT_SCALE
88 #define TCP_RTT_SCALE 32 // see netinet/tcp_var.h
89 #endif // defined(TCP_RTT_SCALE)
90
91 #define SI_DESTINATION_COMPARE_UNLIKELY_BOOL(b) (__builtin_expect(!!((long)(b)), 0L))
92 #define SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(arg, toReturn) \
93 do { \
94 if (SI_DESTINATION_COMPARE_UNLIKELY_BOOL(!arg)) { \
95 os_log_fault(si_destination_log, "%{public}s " #arg " is NULL", __FUNCTION__); \
96 return toReturn; \
97 } \
98 } \
99 while (0)
100
101 static const uint32_t kFailureAttemptsTolerated = 10;
102 static const uint32_t kDefaultRTTLeeway = 100 * TCP_RTT_SCALE;
103 static const uint32_t kDefaultRTTLeewaySmall = 10 * TCP_RTT_SCALE;
104
105 static void
106 si_destination_compare_child_has_forked(void)
107 {
108 cache_lock = OS_UNFAIR_LOCK_INIT;
109 // Cannot use os_log_t object from parent process in child process.
110 si_destination_log = OS_LOG_DEFAULT;
111 }
112
113 static void
114 si_destination_compare_init_once(void)
115 {
116 mach_timebase_info_data_t time_base;
117 mach_timebase_info(&time_base);
118
119 cache_timeout = NSEC_PER_SEC;
120 cache_timeout *= time_base.denom;
121 cache_timeout /= time_base.numer;
122
123 si_destination_log = os_log_create("com.apple.network.libinfo", "si_destination_compare");
124
125 si_compare_settings = (DestCompareSettings){
126 .rtt_leeway = kDefaultRTTLeeway,
127 .rtt_leeway_small = kDefaultRTTLeewaySmall,
128 .rtt_failure_threshold = kFailureAttemptsTolerated,
129 .bypass_stats = false
130 };
131
132 (void)pthread_atfork(NULL, NULL, si_destination_compare_child_has_forked);
133 }
134
135 static void
136 si_destination_compare_init(void)
137 {
138 static pthread_once_t cache_init = PTHREAD_ONCE_INIT;
139 pthread_once(&cache_init, si_destination_compare_init_once);
140 }
141
142 #pragma mark -- Netsrc --
143
144 int
145 si_destination_create_control_socket(const char *control_name)
146 {
147 // Creat a socket
148 const int fd = socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL);
149 if (fd == -1)
150 {
151 os_log_error(si_destination_log, "socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL) failed: %m");
152 return fd;
153 }
154
155 // Disable SIGPIPE <rdar://problem/9049030>
156 int on = 1;
157 if (setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &on, sizeof(on)) != 0)
158 {
159 os_log_error(si_destination_log, "setsockopt(%d, SOL_SOCKET, SO_NOSIGPIPE, ...) failed: %m", fd);
160 close(fd);
161 return -1;
162 }
163
164 // Get the control ID for statistics
165 struct ctl_info ctl = {
166 .ctl_id = 0,
167 .ctl_name = { 0 }
168 };
169 strlcpy(ctl.ctl_name, control_name, sizeof(ctl.ctl_name));
170 if (ioctl(fd, CTLIOCGINFO, &ctl) == -1)
171 {
172 os_log_error(si_destination_log, "ioctl(%d, CTLIOCGINFO, ...) failed: %m", fd);
173 close(fd);
174 return -1;
175 }
176
177 // Connect to the statistics control
178 struct sockaddr_ctl sc = {
179 .sc_len = sizeof(sc),
180 .sc_family = AF_SYSTEM,
181 .ss_sysaddr = SYSPROTO_CONTROL,
182 .sc_id = ctl.ctl_id,
183 .sc_unit = 0,
184 .sc_reserved = { 0, 0, 0, 0, 0 }
185 };
186 if (connect(fd, (struct sockaddr *)&sc, sc.sc_len) != 0)
187 {
188 os_log_error(si_destination_log, "connect(%d, ...) ctl_id=%u, failed: %m", fd, ctl.ctl_id);
189 close(fd);
190 return -1;
191 }
192
193 // increase the receive socket buffer size
194 int rcvbuf = 128 * 1024;
195 if (0 != setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf))) {
196 os_log_error(si_destination_log, "setsockopt(%d, SOL_SOCKET, SO_RCVBUF, %d) failed: %m",
197 fd, rcvbuf);
198 }
199
200 // Set socket to non-blocking operation
201 const int flags = fcntl(fd, F_GETFL, 0);
202 if (-1 != flags) {
203 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
204 os_log_error(si_destination_log, "fcntl(%d, F_SETFL, %d) failed: %m", fd, flags | O_NONBLOCK);
205 }
206 } else {
207 os_log_error(si_destination_log, "fcntl(%d, F_GETFL, 0) failed: %m", fd);
208 }
209
210 return fd;
211 }
212
213 static void
214 si_destination_fill_netsrc(Destination *d)
215 {
216 static int netsrc_sockfd = -1;
217 static int version = NETSRC_CURVERS;
218
219 if (netsrc_sockfd < 0) {
220 netsrc_sockfd = si_destination_create_control_socket(NETSRC_CTLNAME);
221 }
222 if (netsrc_sockfd < 0) {
223 return;
224 }
225
226 // Make sure we have a socket, create one if necessary
227 struct netsrc_req request = {
228 .nrq_ver = version,
229 .nrq_ifscope = d->d_ifindex,
230 };
231
232 if (d->d_dst.addr.sa.sa_len > sizeof(request._usa)) {
233 os_log_debug(si_destination_log, "sockaddr is too big");
234 return;
235 }
236 memcpy(&request._usa, &d->d_dst, d->d_dst.addr.sa.sa_len);
237 if (send(netsrc_sockfd, &request, sizeof(request), 0) != sizeof(request))
238 {
239 if (errno == EINVAL && version != NETSRC_VERSION1) {
240 // fall back to version 1
241 version = NETSRC_VERSION1;
242 si_destination_fill_netsrc(d);
243 }
244 // Expected when there is no route to host
245 os_log_error(si_destination_log, "send failed: %m");
246 return;
247 }
248
249 // Read the response
250 if (version == NETSRC_VERSION1) {
251 #ifdef NETSRC_VERSION2
252 struct netsrc_repv1 v1;
253 #else // NETSRC_VERSION2
254 struct netsrc_rep v1;
255 #endif // NETSRC_VERSION2
256 if (recv(netsrc_sockfd, &v1, sizeof(v1), 0) != sizeof(v1)) {
257 // Not expected
258 os_log_debug(si_destination_log, "recv failed: %m");
259 return;
260 }
261 d->d_dst.label = v1.nrp_dstlabel;
262 d->d_dst.precedence = v1.nrp_dstprecedence;
263 if (v1.nrq_sin.sin_len <= sizeof(d->d_src.addr)) {
264 memcpy( &d->d_src.addr, &v1._usa, v1.nrq_sin.sin_len);
265 }
266 d->d_src.label = v1.nrp_label;
267 d->d_src.precedence = v1.nrp_precedence;
268 d->d_src.flags = v1.nrp_flags;
269 return;
270 }
271 #ifdef NETSRC_VERSION2
272 else if (version == NETSRC_VERSION2) {
273 struct netsrc_repv2 v2;
274 if (recv(netsrc_sockfd, &v2, sizeof(v2), 0) != sizeof(v2)) {
275 // Not expected
276 os_log_debug(si_destination_log, "recv failed: %m");
277 return;
278 }
279 d->d_dst.label = v2.nrp_dstlabel;
280 d->d_dst.precedence = v2.nrp_dstprecedence;
281 d->d_src.addr = v2.nrp_src;
282 d->d_src.label = v2.nrp_label;
283 d->d_src.precedence = v2.nrp_precedence;
284 d->d_src.flags = v2.nrp_flags;
285 d->d_resolved_ifindex = v2.nrp_ifindex;
286 d->d_min_rtt = v2.nrp_min_rtt;
287 d->d_connection_attempts = v2.nrp_connection_attempts;
288 d->d_connection_successes = v2.nrp_connection_successes;
289 d->d_routable = (v2.nrp_flags & NETSRC_FLAG_ROUTEABLE) ? 1 : 0;
290 d->d_direct = (v2.nrp_flags & NETSRC_FLAG_DIRECT) ? 1 : 0;
291 d->d_awdl = (v2.nrp_flags & NETSRC_FLAG_AWDL) ? 1 : 0;
292 return;
293 }
294 #endif // NETSRC_VERSION2
295
296 return;
297 }
298
299 #pragma mark -- Statistics --
300
301 static uint32_t kLoopbackIndex = 1;
302
303 // Only update kLoopbackIndex from the default value of 1 if an entry with the IFF_LOOPBACK flag set is found.
304 void
305 set_loopback_ifindex()
306 {
307 struct ifaddrs *ifaddrs, *ifa;
308
309 if (getifaddrs(&ifaddrs) < 0)
310 return;
311
312 for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) {
313 if (ifa->ifa_addr &&
314 ifa->ifa_addr->sa_family == AF_LINK &&
315 (ifa->ifa_flags & IFF_LOOPBACK) != 0) {
316 kLoopbackIndex = (unsigned int)((struct sockaddr_dl*)ifa->ifa_addr)->sdl_index;
317 break;
318 }
319 }
320
321 freeifaddrs(ifaddrs);
322 return;
323 }
324
325 static int
326 si_destination_compare_statistics(
327 Destination *dst1,
328 Destination *dst2)
329 {
330 int slightPreference = kPrefer_Equal;
331 // Initialize kLoopbackIndex value
332 static pthread_once_t once = PTHREAD_ONCE_INIT;
333 pthread_once(&once, set_loopback_ifindex);
334
335 // If we have min round trip times for both, use that
336 if (dst1->d_min_rtt && dst2->d_min_rtt)
337 {
338 #define RTT_MSEC_DOUBLE_FROM_DST( dst ) (((double)(dst->d_min_rtt)) / ((double)(TCP_RTT_SCALE)))
339 if (dst1->d_min_rtt < dst2->d_min_rtt)
340 {
341 if (dst1->d_min_rtt + si_compare_settings.rtt_leeway < dst2->d_min_rtt)
342 {
343 os_log_debug(si_destination_log,
344 "prefering 1, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
345 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
346 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
347 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
348 return kPrefer_Dest1;
349 }
350 else if (dst1->d_min_rtt + si_compare_settings.rtt_leeway_small < dst2->d_min_rtt)
351 {
352 slightPreference = kPrefer_Dest1_Slightly;
353 os_log_debug(si_destination_log,
354 "prefering 1 slightly, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
355 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
356 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
357 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
358 }
359 else
360 {
361 os_log_debug(si_destination_log,
362 "not prefering 1, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
363 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
364 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
365 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
366 }
367 }
368 else if (dst1->d_min_rtt > dst2->d_min_rtt)
369 {
370 if (dst1->d_min_rtt > si_compare_settings.rtt_leeway + dst2->d_min_rtt)
371 {
372 os_log_debug(si_destination_log,
373 "prefering 2, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
374 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
375 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
376 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
377 return kPrefer_Dest2;
378 }
379 else if (dst1->d_min_rtt > si_compare_settings.rtt_leeway_small + dst2->d_min_rtt)
380 {
381 slightPreference = kPrefer_Dest2_Slightly;
382 os_log_debug(si_destination_log,
383 "prefering 2 slightly, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
384 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
385 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
386 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
387 }
388 else
389 {
390 os_log_debug(si_destination_log,
391 "not prefering 2, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
392 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
393 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
394 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
395 }
396 }
397 else
398 {
399 os_log_debug(si_destination_log, "no preference, equal min rtt %f", RTT_MSEC_DOUBLE_FROM_DST(dst1));
400 }
401 // we hold on to slightPreference and only use it if no other part of the evaluation had a stronger preference
402 #undef RTT_MSEC_DOUBLE_FROM_DST
403 }
404 else if (dst1->d_min_rtt || dst2->d_min_rtt)
405 {
406 // If only dest1 is missing round trip make sure it doesn't have zero successful attempts
407 if (dst1->d_min_rtt == 0 &&
408 dst1->d_connection_successes == 0 &&
409 dst1->d_connection_attempts > 0)
410 {
411 if (dst1->d_connection_attempts > si_compare_settings.rtt_failure_threshold)
412 {
413 os_log_debug(si_destination_log, "prefer 2, dst 1 has no successful connections and %u attempts",
414 dst1->d_connection_attempts);
415 return kPrefer_Dest2;
416 }
417 }
418 // If only dest2 is missing round trip make sure it doesn't have zero successful attempts
419 else if (dst2->d_min_rtt == 0 &&
420 dst2->d_connection_successes == 0 &&
421 dst2->d_connection_attempts > 0)
422 {
423 if (dst2->d_connection_attempts > si_compare_settings.rtt_failure_threshold)
424 {
425 os_log_debug(si_destination_log, "prefer 1, dst 2 has no successful connections and %u attempts",
426 dst2->d_connection_attempts);
427 return kPrefer_Dest1;
428 }
429 }
430 }
431
432 // If we have routes for both...
433 if (dst1->d_routable != 0 && dst2->d_routable != 0)
434 {
435 // <rdar://problem/9070784> Prefer interface that are not peer to peer
436 if (dst1->d_awdl != dst2->d_awdl)
437 {
438 if (dst2->d_awdl)
439 {
440 os_log_debug(si_destination_log, "prefer dst 1, dst 2 is p2p, dst 1 is not");
441 return kPrefer_Dest1;
442 }
443 else if (dst1->d_awdl)
444 {
445 os_log_debug(si_destination_log, "prefer dst 2, dst 1 is p2p, dst 2 is not");
446 return kPrefer_Dest2;
447 }
448 }
449
450 // Prefer loopback routes
451 bool dst1loopback = dst1->d_resolved_ifindex == kLoopbackIndex ||
452 (dst1->d_dst.addr.sa.sa_family == AF_INET6 &&
453 IN6_IS_ADDR_LINKLOCAL(&dst1->d_dst.addr.sin6.sin6_addr) &&
454 dst1->d_dst.addr.sin6.sin6_scope_id == kLoopbackIndex);
455 bool dst2loopback = dst2->d_resolved_ifindex == kLoopbackIndex ||
456 (dst2->d_dst.addr.sa.sa_family == AF_INET6 &&
457 IN6_IS_ADDR_LINKLOCAL(&dst2->d_dst.addr.sin6.sin6_addr) &&
458 dst2->d_dst.addr.sin6.sin6_scope_id == kLoopbackIndex);
459
460 if (dst1loopback && !dst2loopback)
461 {
462 os_log_debug(si_destination_log, "prefer 1, dst 1 is loopback, dst 2 is not");
463 return kPrefer_Dest1;
464 }
465 else if (dst2loopback && !dst1loopback)
466 {
467 os_log_debug(si_destination_log, "prefer 2, dst 2 is loopback, dst 1 is not");
468 return kPrefer_Dest2;
469 }
470
471 // Prefer direct routes
472 if (dst1->d_direct && !dst2->d_direct)
473 {
474 os_log_debug(si_destination_log, "prefer 1, dst 1 is local, dst 2 is not");
475 return kPrefer_Dest1;
476 }
477 else if (dst2->d_direct && !dst1->d_direct)
478 {
479 os_log_debug(si_destination_log, "prefer 2, dst 2 is local, dst 1 is not");
480 return kPrefer_Dest2;
481 }
482 }
483 else if (dst1->d_routable != 0)
484 {
485 // prefer destination we have a route to
486 os_log_debug(si_destination_log, "prefer 1, dst 2 has no route");
487 return kPrefer_Dest1;
488 }
489 else if (dst2->d_routable != 0)
490 {
491 // prefer destination we have a route to
492 os_log_debug(si_destination_log, "prefer 2, dst 1 has no route");
493 return kPrefer_Dest2;
494 }
495 return slightPreference;
496 }
497
498 #pragma mark -- Cache --
499
500 static bool
501 sa_equal(
502 const struct sockaddr *sa1,
503 const struct sockaddr *sa2)
504 {
505 if (sa1->sa_family != sa2->sa_family)
506 return false;
507
508 /* We don't use bcmp because we don't care about the port number */
509 if (sa1->sa_family == AF_INET)
510 {
511 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
512 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
513
514 if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr)
515 return false;
516 return true;
517 }
518
519 if (sa1->sa_family == AF_INET6)
520 {
521 const struct sockaddr_in6 *sin61 = (const struct sockaddr_in6 *)sa1;
522 const struct sockaddr_in6 *sin62 = (const struct sockaddr_in6 *)sa2;
523
524 if (memcmp(&sin61->sin6_addr, &sin62->sin6_addr, sizeof(sin61->sin6_addr)) != 0) {
525 return false;
526 }
527
528 // TBD: Is the flow info relevant?
529
530 if (sin61->sin6_scope_id != sin62->sin6_scope_id) {
531 return false;
532 }
533
534 return true;
535 }
536
537 if (sa1->sa_len != sa2->sa_len) {
538 return false;
539 }
540
541 return (memcmp(sa1, sa2, sa1->sa_len) == 0);
542 }
543
544 static bool
545 si_destination_cache_find(
546 const struct sockaddr *dst_sa,
547 uint32_t ifscope,
548 Destination *out_dst)
549 {
550 // Loop through the entries looking for:
551 // - expired items
552 // - macthing item
553 DestCacheEntry **pprev = &cache;
554 uint64_t now = mach_absolute_time();
555 bool result = false;
556
557 while (*pprev != NULL)
558 {
559 // If the item has expired, pull it out of the list
560 if ((now - (*pprev)->dce_time) >= cache_timeout)
561 {
562 DestCacheEntry *expired = *pprev;
563 *pprev = expired->dce_next;
564 memset(expired, 0, sizeof(*expired));
565 free(expired);
566 }
567 else
568 {
569 // If the item matches, copy the entry
570 if (!result && (*pprev)->dce_entry.d_ifindex == ifscope &&
571 sa_equal(dst_sa, &(*pprev)->dce_entry.d_dst.addr.sa))
572 {
573 *out_dst = (*pprev)->dce_entry;
574 result = true;
575 }
576
577 pprev = &(*pprev)->dce_next;
578 }
579 }
580
581 return result;
582 }
583
584 static bool
585 si_destination_cache_create(
586 const struct sockaddr *dst_sa,
587 uint32_t ifscope,
588 Destination *out_dst)
589 {
590 DestCacheEntry *cache_entry;
591 Destination *d;
592
593 if (dst_sa->sa_len > sizeof(cache_entry->dce_entry.d_dst.addr)) {
594 os_log_error(si_destination_log, "(dst_sa->sa_len %u > sizeof(cache_entry->dce_entry.d_dst.addr))",
595 dst_sa->sa_len);
596 return false;
597 }
598
599 // Allocate entry
600 cache_entry = calloc(1, sizeof(*cache_entry));
601 if (NULL == cache_entry) {
602 os_log_error(si_destination_log, "calloc(%zu) failed: %m", sizeof(*cache_entry));
603 return false;
604 }
605 d = &cache_entry->dce_entry;
606
607 // Copy the destination sockaddr
608 memcpy(&d->d_dst.addr, dst_sa, dst_sa->sa_len);
609 d->d_ifindex = ifscope;
610
611 // Query the kernel for the matching source, precedence and label
612 si_destination_fill_netsrc(d);
613 *out_dst = *d;
614
615 cache_entry->dce_time = mach_absolute_time();
616 cache_entry->dce_next = cache;
617 cache = cache_entry;
618
619 return true;
620 }
621
622 static int
623 si_destination_lookup(
624 const struct sockaddr *dst_sa,
625 uint32_t ifscope,
626 Destination *out_dst)
627 {
628 int lookupResult = kLookupFailure;
629
630 si_destination_compare_init();
631 os_unfair_lock_lock(&cache_lock);
632 if (si_destination_cache_find(dst_sa, ifscope, out_dst)) {
633 lookupResult = kLookupSuccess_Found;
634 } else if (si_destination_cache_create(dst_sa, ifscope, out_dst)) {
635 lookupResult = kLookupSuccess_Created;
636 }
637 os_unfair_lock_unlock(&cache_lock);
638
639 return lookupResult;
640 }
641
642 #pragma mark -- RFC 6724 --
643
644 // https://tools.ietf.org/html/rfc6724
645
646 #ifndef IN6_IS_ADDR_TEREDO
647 #define IN6_IS_ADDR_TEREDO(x) ((((x)->__u6_addr.__u6_addr16[0]) == htons(0x2001)) && \
648 (((x)->__u6_addr.__u6_addr16[1]) == 0x0000))
649 #endif // IN6_IS_ADDR_TEREDO
650
651 #ifndef s6_addr16
652 #undef IN6_IS_ADDR_6TO4
653 #define IN6_IS_ADDR_6TO4(x) (((x)->__u6_addr.__u6_addr16[0]) == htons(0x2002))
654 #endif // s6_addr16
655
656 #ifndef IN6_IS_ADDR_6BONE
657 # define IN6_IS_ADDR_6BONE(x) (((x)->__u6_addr.__u6_addr16[0]) == htons(0x3ffe))
658 #endif // IN6_IS_ADDR_6BONE
659
660 static int
661 rfc6724_scope_ip6(const struct in6_addr *addr)
662 {
663 int scope;
664
665 if (addr->s6_addr[0] == 0xfe) {
666 scope = addr->s6_addr[1] & 0xc0;
667
668 switch (scope) {
669 case 0x80:
670 return __IPV6_ADDR_SCOPE_LINKLOCAL;
671 case 0xc0:
672 return __IPV6_ADDR_SCOPE_SITELOCAL;
673 default:
674 return __IPV6_ADDR_SCOPE_GLOBAL; /* just in case */
675 }
676 }
677
678
679 if (addr->s6_addr[0] == 0xff) {
680 scope = addr->s6_addr[1] & 0x0f;
681
682 /*
683 * due to other scope such as reserved,
684 * return scope doesn't work.
685 */
686 switch (scope) {
687 case __IPV6_ADDR_SCOPE_NODELOCAL:
688 return __IPV6_ADDR_SCOPE_NODELOCAL;
689 case __IPV6_ADDR_SCOPE_LINKLOCAL:
690 return __IPV6_ADDR_SCOPE_LINKLOCAL;
691 case __IPV6_ADDR_SCOPE_SITELOCAL:
692 return __IPV6_ADDR_SCOPE_SITELOCAL;
693 default:
694 return __IPV6_ADDR_SCOPE_GLOBAL;
695 }
696 }
697
698 /*
699 * Regard loopback and unspecified addresses as global, since
700 * they have no ambiguity.
701 */
702 static const struct in6_addr in6addr_lo = IN6ADDR_LOOPBACK_INIT;
703 if (memcmp(&in6addr_lo, addr, sizeof(*addr) - 1) == 0) {
704 if (addr->s6_addr[15] == 1) { /* loopback */
705 return __IPV6_ADDR_SCOPE_LINKLOCAL;
706 }
707 if (addr->s6_addr[15] == 0) { /* unspecified */
708 return __IPV6_ADDR_SCOPE_GLOBAL; /* XXX: correct? */
709 }
710 }
711
712 return __IPV6_ADDR_SCOPE_GLOBAL;
713 }
714
715 static int
716 rfc6724_scope_ip(const struct in_addr *addr)
717 {
718 uint32_t hostbyteaddr = ntohl(addr->s_addr);
719 if (IN_LOOPBACK(hostbyteaddr) || IN_LINKLOCAL(hostbyteaddr)) {
720 return __IPV6_ADDR_SCOPE_LINKLOCAL;
721 }
722 return __IPV6_ADDR_SCOPE_GLOBAL;
723 }
724
725 static int
726 rfc6724_scope_sa(const struct sockaddr *sa)
727 {
728 if (sa->sa_family == AF_INET6) {
729 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa;
730 return rfc6724_scope_ip6(&sin6->sin6_addr);
731 } else if (sa->sa_family == AF_INET) {
732 const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
733 return rfc6724_scope_ip(&sin->sin_addr);
734 }
735 return 0;
736 }
737
738 static int
739 rfc6724_scope(RFC6724Address *addr)
740 {
741 return rfc6724_scope_sa(&addr->addr.sa);
742 }
743
744 // RFC 6724 Section 2.1
745 // https://tools.ietf.org/html/rfc6724#section-2.1
746
747 // Prefix Precedence Label
748 // ::1/128 50 0
749 // ::/0 40 1
750 // ::ffff:0:0/96 35 4
751 // 2002::/16 30 2
752 // 2001::/32 5 5
753 // fc00::/7 3 13
754 // ::/96 1 3
755 // fec0::/10 1 11
756 // 3ffe::/16 1 12
757
758 static int
759 rfc6724_precedence(const struct sockaddr *sa)
760 {
761 if (sa == NULL) {
762
763 return 0;
764 }
765 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(sa, 0);
766 if (sa->sa_family == AF_INET6) {
767 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa;
768
769 // ::1/128
770 if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) {
771 return 50;
772 }
773
774 // ::ffff:0:0/96
775 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
776 return 35;
777 }
778
779 // 2002::/16
780 if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
781 return 30;
782 }
783
784 // 2001::/32
785 if (IN6_IS_ADDR_TEREDO(&sin6->sin6_addr)) {
786 return 5;
787 }
788
789 // fc00::/7
790 if (IN6_IS_ADDR_UNIQUE_LOCAL(&sin6->sin6_addr)) {
791 return 3;
792 }
793
794 // ::/96
795 if (IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr)) {
796 return 1;
797 }
798
799 // fec0::/10
800 if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
801 return 1;
802 }
803
804 // 3ffe::/16
805 if (IN6_IS_ADDR_6BONE(&sin6->sin6_addr)) {
806 return 1;
807 }
808
809 // ::/0
810 return 40;
811 } else if (sa->sa_family == AF_INET) {
812
813 // ::ffff:0:0/96 (IPv4 is treated as a v4-mapped v6 address)
814 return 35;
815 }
816 return 0;
817 }
818
819 static bool
820 rfc6724_native(const RFC6724Address *addr)
821 {
822 return !(addr->addr.sa.sa_family == AF_INET6 &&
823 (IN6_IS_ADDR_6TO4(&addr->addr.sin6.sin6_addr) ||
824 IN6_IS_ADDR_TEREDO(&addr->addr.sin6.sin6_addr)));
825 }
826
827 static int
828 common_prefix_length(const union sockaddr_in_4_6 *addr1, const union sockaddr_in_4_6 *addr2)
829 {
830 int match = 0;
831 if (addr1->sa.sa_family == AF_INET6 && addr2->sa.sa_family == AF_INET6)
832 {
833 const unsigned char *s = (const unsigned char *)&addr1->sin6.sin6_addr;
834 const unsigned char *d = (const unsigned char *)&addr2->sin6.sin6_addr;
835 const unsigned char *lim = s + 8;
836 unsigned char r;
837
838 while (s < lim) {
839 if ((r = (*d++ ^ *s++)) != 0) {
840 while (r < 128) {
841 match++;
842 r <<= 1;
843 }
844 break;
845 } else {
846 match += 8;
847 }
848 }
849 }
850 return match;
851 }
852
853 static int
854 si_destination_compare_rfc6724(Destination *d1, Destination *d2, int statResult)
855 {
856 // Rule 1: Avoid unusable destinations (no source means unusable dest)
857 if (d1->d_src.addr.sa.sa_family == AF_UNSPEC || d2->d_src.addr.sa.sa_family == AF_UNSPEC)
858 {
859 if (d1->d_src.addr.sa.sa_family != AF_UNSPEC)
860 {
861 os_log_debug(si_destination_log, "Rule 1, prefer d1, d2 is not routable");
862 return kPrefer_Dest1;
863 }
864 else if (d2->d_src.addr.sa.sa_family != AF_UNSPEC)
865 {
866 os_log_debug(si_destination_log, "Rule 1, prefer d2, d1 is not routable");
867 return kPrefer_Dest2;
868 }
869 }
870
871 // Rule 2: Prefer matching scope
872 if (rfc6724_scope(&d1->d_dst) != rfc6724_scope(&d1->d_src) || rfc6724_scope(&d2->d_dst) != rfc6724_scope(&d2->d_src))
873 {
874 if (rfc6724_scope(&d1->d_dst) == rfc6724_scope(&d1->d_src))
875 {
876 os_log_debug(si_destination_log, "Rule 2, prefer d1, d2 dst scope does not match src scope");
877 return kPrefer_Dest1;
878 }
879 if (rfc6724_scope(&d2->d_dst) == rfc6724_scope(&d2->d_src))
880 {
881 os_log_debug(si_destination_log, "Rule 2, prefer d2, d1 dst scope does not match src scope");
882 return kPrefer_Dest2;
883 }
884 }
885
886 // Rule 3: Avoid deprecated addresses
887 if ((d1->d_src.flags & NETSRC_IP6_FLAG_DEPRECATED) != (d2->d_src.flags & NETSRC_IP6_FLAG_DEPRECATED))
888 {
889 if ((d1->d_src.flags & NETSRC_IP6_FLAG_DEPRECATED) != 0)
890 {
891 os_log_debug(si_destination_log, "Rule 3, prefer d2, d1 source is deprecated");
892 return kPrefer_Dest1;
893 }
894 if ((d2->d_src.flags & NETSRC_IP6_FLAG_DEPRECATED) != 0)
895 {
896 os_log_debug(si_destination_log, "Rule 3, prefer d1, d2 source is deprecated");
897 return kPrefer_Dest2;
898 }
899 }
900
901 // Rule 3bis: Avoid optimistic addresses, c.f. RFC 4429 which defines them as conceptually similar to deprecated
902 // Note that this rule is not part of RFC 6724
903 if ((d1->d_src.flags & NETSRC_IP6_FLAG_OPTIMISTIC) != (d2->d_src.flags & NETSRC_IP6_FLAG_OPTIMISTIC))
904 {
905 if ((d1->d_src.flags & NETSRC_IP6_FLAG_OPTIMISTIC) != 0)
906 {
907 os_log_debug(si_destination_log, "Rule 3a, prefer d2, d1 source is optimistic");
908 return kPrefer_Dest1;
909 }
910 if ((d2->d_src.flags & NETSRC_IP6_FLAG_OPTIMISTIC) != 0)
911 {
912 os_log_debug(si_destination_log, "Rule 3a, prefer d1, d2 source is optimistic");
913 return kPrefer_Dest2;
914 }
915 }
916
917 // Rule 4: Prefer home addresses
918 // TODO: requires Mobile IPv6 support
919
920 // Rule 5: Prefer matching label
921 if (d1->d_dst.label != d1->d_src.label || d2->d_dst.label != d2->d_src.label)
922 {
923 if (d1->d_dst.label == d1->d_src.label)
924 {
925 os_log_debug(si_destination_log, "Rule 5, prefer d1, d2 dst label does not match src label");
926 return kPrefer_Dest1;
927 }
928 if (d2->d_dst.label == d2->d_src.label)
929 {
930 os_log_debug(si_destination_log, "Rule 5, prefer d2, d1 dst label does not match src label");
931 return kPrefer_Dest2;
932 }
933 }
934
935 // Rule 6: Prefer higher precedence
936 if (d1->d_dst.precedence > d2->d_dst.precedence)
937 {
938 os_log_debug(si_destination_log, "Rule 6, prefer d1, d1 precedence %d > d2 precedence %d",
939 d1->d_dst.precedence, d2->d_dst.precedence);
940 return kPrefer_Dest1;
941 }
942 else if (d2->d_dst.precedence > d1->d_dst.precedence)
943 {
944 os_log_debug(si_destination_log, "Rule 6, prefer d2, d2 precedence %d > d1 precedence %d",
945 d2->d_dst.precedence, d1->d_dst.precedence);
946 return kPrefer_Dest2;
947 }
948
949 // Rule 7: Prefer native transport
950 const bool d1_native = rfc6724_native(&d1->d_src);
951 const bool d2_native = rfc6724_native(&d2->d_src);
952 if (d1_native && !d2_native)
953 {
954 os_log_debug(si_destination_log, "Rule 7, prefer d1, d2 src is not native");
955 return kPrefer_Dest1;
956 }
957 else if (d2_native && !d1_native)
958 {
959 os_log_debug(si_destination_log, "Rule 7, prefer d2, d1 src is not native");
960 return kPrefer_Dest2;
961 }
962
963 // Rule 8: Prefer smaller scope
964 const int scope1 = rfc6724_scope(&d1->d_dst);
965 const int scope2 = rfc6724_scope(&d2->d_dst);
966 if (scope1 < scope2)
967 {
968 os_log_debug(si_destination_log, "Rule 8, prefer d1, d1 scope %d < d2 scope %d", scope1, scope2);
969 return kPrefer_Dest1;
970 }
971 else if (scope2 < scope1)
972 {
973 os_log_debug(si_destination_log, "Rule 8, prefer d2, d2 scope %d < d1 scope %d", scope2, scope1);
974 return kPrefer_Dest2;
975 }
976
977 // RFC6724: Rules 9 and 10 MAY be superseded if the implementation has other means of sorting destination addresses.
978 if ((kPrefer_Dest1 == statResult) || (kPrefer_Dest1_Slightly == statResult)) {
979 return kPrefer_Dest1;
980 } else if ((kPrefer_Dest2 == statResult) || (kPrefer_Dest2_Slightly == statResult)) {
981 return kPrefer_Dest2;
982 }
983
984 // Rule 9: Use longest matching prefix
985 int matchlen1 = common_prefix_length(&d1->d_dst.addr, &d1->d_src.addr);
986 int matchlen2 = common_prefix_length(&d2->d_dst.addr, &d2->d_src.addr);
987 if (matchlen1 && matchlen2)
988 {
989 if (matchlen1 > matchlen2)
990 {
991 os_log_debug(si_destination_log, "Rule 9, prefer d1, d1 shares more common prefix");
992 return kPrefer_Dest1;
993 }
994 else if (matchlen2 > matchlen1)
995 {
996 os_log_debug(si_destination_log, "Rule 9, prefer d2, d2 shares more common prefix");
997 return kPrefer_Dest2;
998 }
999 }
1000
1001 // Rule 10: Otherwise, leave the order unchanged
1002 return kPrefer_Equal;
1003 }
1004
1005 #pragma mark -- Internal Helper --
1006
1007 static int
1008 si_destination_compare_internal(
1009 const struct sockaddr *dst1,
1010 uint32_t dst1ifindex,
1011 const struct sockaddr *dst2,
1012 uint32_t dst2ifindex,
1013 bool statistics)
1014 {
1015 // If either of the destinations is not AF_INET/AF_INET6
1016 if ((dst1->sa_family != AF_INET && dst1->sa_family != AF_INET6) ||
1017 (dst2->sa_family != AF_INET && dst2->sa_family != AF_INET6))
1018 {
1019 if (dst1->sa_family == AF_INET || dst1->sa_family == AF_INET6) {
1020 return kPrefer_Dest1;
1021 } else if (dst2->sa_family == AF_INET || dst2->sa_family == AF_INET6) {
1022 return kPrefer_Dest2;
1023 } else {
1024 return kPrefer_Equal;
1025 }
1026 }
1027
1028 Destination d1;
1029 Destination d2;
1030
1031 // Lookup d1 and d2 in the cache
1032 int lookupResultD1 = si_destination_lookup(dst1, dst1ifindex, &d1);
1033 int lookupResultD2 = si_destination_lookup(dst2, dst2ifindex, &d2);
1034 if (lookupResultD1 == kLookupFailure)
1035 {
1036 os_log_debug(si_destination_log, "si_destination_lookup for dst1 failed");
1037 return kPrefer_Equal;
1038 }
1039 if (lookupResultD2 == kLookupFailure)
1040 {
1041 os_log_debug(si_destination_log, "si_destination_lookup for dst2 failed");
1042 return kPrefer_Equal;
1043 }
1044
1045 int statResult = kPrefer_Equal;
1046 if (statistics && !si_compare_settings.bypass_stats)
1047 {
1048 statResult = si_destination_compare_statistics(&d1, &d2);
1049 if ((kPrefer_Dest1 == statResult) || (kPrefer_Dest2 == statResult))
1050 {
1051 return statResult;
1052 }
1053 }
1054
1055 statResult = si_destination_compare_rfc6724(&d1, &d2, statResult);
1056
1057 if (statResult == kPrefer_Equal) {
1058 // Only if all other comparisons are equal, prefer entries that were already in the cache over
1059 // ones that are new and we just created.
1060
1061 // Found < Created
1062 if (lookupResultD1 == kLookupSuccess_Found && lookupResultD2 == kLookupSuccess_Created) {
1063 os_log_debug(si_destination_log, "prefer d1, known while d2 not known");
1064 statResult = kPrefer_Dest1;
1065 } else if (lookupResultD2 == kLookupSuccess_Found && lookupResultD1 == kLookupSuccess_Created) {
1066 os_log_debug(si_destination_log, "prefer d2, known while d1 not known");
1067 statResult = kPrefer_Dest2;
1068 }
1069 }
1070
1071 return statResult;
1072 }
1073
1074 #pragma mark -- SPI --
1075
1076 int
1077 si_destination_compare(
1078 const struct sockaddr *dst1,
1079 int dst1ifindex,
1080 const struct sockaddr *dst2,
1081 int dst2ifindex,
1082 bool statistics)
1083 {
1084 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst1, kPrefer_Equal);
1085 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst2, kPrefer_Equal);
1086
1087 si_destination_compare_init();
1088
1089 const int result = si_destination_compare_internal(dst1, dst1ifindex, dst2, dst2ifindex, statistics);
1090
1091 os_log_debug(si_destination_log, "%{network:sockaddr}.*P@%u %c %{network:sockaddr}.*P@%u",
1092 dst1->sa_len, dst1, dst1ifindex, result == 0 ? '=' : result < 0 ? '<' : '>',
1093 dst2->sa_len, dst2, dst2ifindex);
1094
1095 return result;
1096 }
1097
1098 int
1099 si_destination_compare_no_dependencies(const struct sockaddr *dst1,
1100 const struct sockaddr *dst2)
1101 {
1102 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst1, kPrefer_Equal);
1103 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst2, kPrefer_Equal);
1104
1105 // Skip rule 1 (requires route to destination address)
1106 // Skip rule 2, 3, 5, 7, 9 (requires corresponding source address)
1107 // Skip rule 4 (not supported by si_destination_compare() today)
1108
1109 // Rule 6: Prefer higher precedence
1110 const int precedence1 = rfc6724_precedence(dst1);
1111 const int precedence2 = rfc6724_precedence(dst2);
1112 if (precedence1 > precedence2)
1113 {
1114 os_log_debug(si_destination_log, "ND Rule 6, prefer d1, d1 precedence %d > d2 precedence %d",
1115 precedence1, precedence2);
1116 return kPrefer_Dest1;
1117 }
1118 else if (precedence2 > precedence1)
1119 {
1120 os_log_debug(si_destination_log, "ND Rule 6, prefer d2, d2 precedence %d > d1 precedence %d",
1121 precedence2, precedence1);
1122 return kPrefer_Dest2;
1123 }
1124
1125 // Rule 8: Prefer smaller scope
1126 const int scope1 = rfc6724_scope_sa(dst1);
1127 const int scope2 = rfc6724_scope_sa(dst2);
1128 if (scope1 < scope2)
1129 {
1130 os_log_debug(si_destination_log, "ND Rule 8, prefer d1, d1 scope %d < d2 scope %d",
1131 scope1, scope2);
1132 return kPrefer_Dest1;
1133 }
1134 else if (scope2 < scope1)
1135 {
1136 os_log_debug(si_destination_log, "ND Rule 8, prefer d2, d2 scope %d < d1 scope %d",
1137 scope2, scope1);
1138 return kPrefer_Dest2;
1139 }
1140
1141 // Rule 10: Otherwise, leave the order unchanged
1142
1143 return kPrefer_Equal;
1144 }