Libinfo-517.200.9.tar.gz
[apple/libinfo.git] / lookup.subproj / si_compare.c
1 //
2 // si_compare.c
3 // Libinfo
4 //
5 // Copyright (c) 2011-2017 Apple Inc. All rights reserved.
6 //
7 //
8
9 #include "si_compare.h"
10
11 #include <dispatch/dispatch.h>
12 #include <sys/ioctl.h>
13 #include <sys/kern_control.h>
14 #include <sys/socketvar.h>
15 #include <sys/sys_domain.h>
16 #include <netinet/in.h>
17 #include <net/netsrc.h>
18 #include <net/if.h>
19 #include <errno.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <pthread.h>
23 #include <os/lock.h>
24 #include <os/log.h>
25 #include <mach/mach_time.h>
26 #include <net/ntstat.h>
27
28 enum
29 {
30 kPrefer_Dest1 = 1,
31 kPrefer_Dest2 = -1,
32 kPrefer_Equal = 0,
33 kPrefer_Dest1_Slightly = 2,
34 kPrefer_Dest2_Slightly = -2,
35 };
36
37 enum
38 {
39 kLookupFailure = 0,
40 kLookupSuccess_Found = 1,
41 kLookupSuccess_Created = 2,
42 };
43
44 typedef struct RFC6724Address
45 {
46 union sockaddr_in_4_6 addr;
47 uint32_t flags;
48 uint16_t label;
49 uint16_t precedence;
50 } RFC6724Address;
51
52 typedef struct Destination
53 {
54 RFC6724Address d_dst;
55 RFC6724Address d_src;
56 uint32_t d_ifindex;
57 uint32_t d_resolved_ifindex;
58 uint32_t d_min_rtt;
59 uint32_t d_connection_attempts;
60 uint32_t d_connection_successes;
61 unsigned d_routable : 1;
62 unsigned d_direct : 1;
63 unsigned d_awdl : 1;
64 } Destination;
65
66 typedef struct DestCacheEntry
67 {
68 uint64_t dce_time;
69 struct DestCacheEntry *dce_next;
70 Destination dce_entry;
71 } DestCacheEntry;
72
73 typedef struct DestCompareSettings {
74 uint32_t rtt_leeway;
75 uint32_t rtt_leeway_small;
76 int64_t rtt_failure_threshold;
77 bool bypass_stats;
78 } DestCompareSettings;
79
80 static DestCacheEntry *cache = NULL;
81 static os_unfair_lock cache_lock = OS_UNFAIR_LOCK_INIT;
82 static uint64_t cache_timeout = 0ULL;
83 static os_log_t si_destination_log = OS_LOG_DEFAULT;
84 static DestCompareSettings si_compare_settings = {};
85
86 static const uint32_t kLoopbackIndex = 1;
87
88 #ifndef TCP_RTT_SCALE
89 #define TCP_RTT_SCALE 32 // see netinet/tcp_var.h
90 #endif // defined(TCP_RTT_SCALE)
91
92 #define SI_DESTINATION_COMPARE_UNLIKELY_BOOL(b) (__builtin_expect(!!((long)(b)), 0L))
93 #define SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(arg, toReturn) \
94 do { \
95 if (SI_DESTINATION_COMPARE_UNLIKELY_BOOL(!arg)) { \
96 os_log_fault(si_destination_log, "%{public}s " #arg " is NULL", __FUNCTION__); \
97 return toReturn; \
98 } \
99 } \
100 while (0)
101
102 static const uint32_t kFailureAttemptsTolerated = 10;
103 static const uint32_t kDefaultRTTLeeway = 100 * TCP_RTT_SCALE;
104 static const uint32_t kDefaultRTTLeewaySmall = 10 * TCP_RTT_SCALE;
105
106 static void
107 si_destination_compare_child_has_forked(void)
108 {
109 cache_lock = OS_UNFAIR_LOCK_INIT;
110 }
111
112 static void
113 si_destination_compare_init_once(void)
114 {
115 mach_timebase_info_data_t time_base;
116 mach_timebase_info(&time_base);
117
118 cache_timeout = NSEC_PER_SEC;
119 cache_timeout *= time_base.denom;
120 cache_timeout /= time_base.numer;
121
122 si_destination_log = os_log_create("com.apple.network.libinfo", "si_destination_compare");
123
124 si_compare_settings = (DestCompareSettings){
125 .rtt_leeway = kDefaultRTTLeeway,
126 .rtt_leeway_small = kDefaultRTTLeewaySmall,
127 .rtt_failure_threshold = kFailureAttemptsTolerated,
128 .bypass_stats = false
129 };
130
131 (void)pthread_atfork(NULL, NULL, si_destination_compare_child_has_forked);
132 }
133
134 static void
135 si_destination_compare_init(void)
136 {
137 static pthread_once_t cache_init = PTHREAD_ONCE_INIT;
138 pthread_once(&cache_init, si_destination_compare_init_once);
139 }
140
141 #pragma mark -- Netsrc --
142
143 int
144 si_destination_create_control_socket(const char *control_name)
145 {
146 // Creat a socket
147 const int fd = socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL);
148 if (fd == -1)
149 {
150 os_log_error(si_destination_log, "socket(PF_SYSTEM, SOCK_DGRAM, SYSPROTO_CONTROL) failed: %m");
151 return fd;
152 }
153
154 // Disable SIGPIPE <rdar://problem/9049030>
155 int on = 1;
156 if (setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &on, sizeof(on)) != 0)
157 {
158 os_log_error(si_destination_log, "setsockopt(%d, SOL_SOCKET, SO_NOSIGPIPE, ...) failed: %m", fd);
159 close(fd);
160 return -1;
161 }
162
163 // Get the control ID for statistics
164 struct ctl_info ctl = {
165 .ctl_id = 0,
166 .ctl_name = { 0 }
167 };
168 strlcpy(ctl.ctl_name, control_name, sizeof(ctl.ctl_name));
169 if (ioctl(fd, CTLIOCGINFO, &ctl) == -1)
170 {
171 os_log_error(si_destination_log, "ioctl(%d, CTLIOCGINFO, ...) failed: %m", fd);
172 close(fd);
173 return -1;
174 }
175
176 // Connect to the statistics control
177 struct sockaddr_ctl sc = {
178 .sc_len = sizeof(sc),
179 .sc_family = AF_SYSTEM,
180 .ss_sysaddr = SYSPROTO_CONTROL,
181 .sc_id = ctl.ctl_id,
182 .sc_unit = 0,
183 .sc_reserved = { 0, 0, 0, 0, 0 }
184 };
185 if (connect(fd, (struct sockaddr *)&sc, sc.sc_len) != 0)
186 {
187 os_log_error(si_destination_log, "connect(%d, ...) ctl_id=%u, failed: %m", fd, ctl.ctl_id);
188 close(fd);
189 return -1;
190 }
191
192 // increase the receive socket buffer size
193 int rcvbuf = 128 * 1024;
194 if (0 != setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(rcvbuf))) {
195 os_log_error(si_destination_log, "setsockopt(%d, SOL_SOCKET, SO_RCVBUF, %d) failed: %m",
196 fd, rcvbuf);
197 }
198
199 // Set socket to non-blocking operation
200 const int flags = fcntl(fd, F_GETFL, 0);
201 if (-1 != flags) {
202 if (-1 == fcntl(fd, F_SETFL, flags | O_NONBLOCK)) {
203 os_log_error(si_destination_log, "fcntl(%d, F_SETFL, %d) failed: %m", fd, flags | O_NONBLOCK);
204 }
205 } else {
206 os_log_error(si_destination_log, "fcntl(%d, F_GETFL, 0) failed: %m", fd);
207 }
208
209 return fd;
210 }
211
212 static void
213 si_destination_fill_netsrc(Destination *d)
214 {
215 static int netsrc_sockfd = -1;
216 static int version = NETSRC_CURVERS;
217
218 if (netsrc_sockfd < 0) {
219 netsrc_sockfd = si_destination_create_control_socket(NETSRC_CTLNAME);
220 }
221 if (netsrc_sockfd < 0) {
222 return;
223 }
224
225 // Make sure we have a socket, create one if necessary
226 struct netsrc_req request = {
227 .nrq_ver = version,
228 .nrq_ifscope = d->d_ifindex,
229 };
230
231 if (d->d_dst.addr.sa.sa_len > sizeof(request._usa)) {
232 os_log_debug(si_destination_log, "sockaddr is too big");
233 return;
234 }
235 memcpy(&request._usa, &d->d_dst, d->d_dst.addr.sa.sa_len);
236 if (send(netsrc_sockfd, &request, sizeof(request), 0) != sizeof(request))
237 {
238 if (errno == EINVAL && version != NETSRC_VERSION1) {
239 // fall back to version 1
240 version = NETSRC_VERSION1;
241 si_destination_fill_netsrc(d);
242 }
243 // Expected when there is no route to host
244 os_log_error(si_destination_log, "send failed: %m");
245 return;
246 }
247
248 // Read the response
249 if (version == NETSRC_VERSION1) {
250 #ifdef NETSRC_VERSION2
251 struct netsrc_repv1 v1;
252 #else // NETSRC_VERSION2
253 struct netsrc_rep v1;
254 #endif // NETSRC_VERSION2
255 if (recv(netsrc_sockfd, &v1, sizeof(v1), 0) != sizeof(v1)) {
256 // Not expected
257 os_log_debug(si_destination_log, "recv failed: %m");
258 return;
259 }
260 d->d_dst.label = v1.nrp_dstlabel;
261 d->d_dst.precedence = v1.nrp_dstprecedence;
262 if (v1.nrq_sin.sin_len <= sizeof(d->d_src.addr)) {
263 memcpy( &d->d_src.addr, &v1._usa, v1.nrq_sin.sin_len);
264 }
265 d->d_src.label = v1.nrp_label;
266 d->d_src.precedence = v1.nrp_precedence;
267 d->d_src.flags = v1.nrp_flags;
268 return;
269 }
270 #ifdef NETSRC_VERSION2
271 else if (version == NETSRC_VERSION2) {
272 struct netsrc_repv2 v2;
273 if (recv(netsrc_sockfd, &v2, sizeof(v2), 0) != sizeof(v2)) {
274 // Not expected
275 os_log_debug(si_destination_log, "recv failed: %m");
276 return;
277 }
278 d->d_dst.label = v2.nrp_dstlabel;
279 d->d_dst.precedence = v2.nrp_dstprecedence;
280 d->d_src.addr = v2.nrp_src;
281 d->d_src.label = v2.nrp_label;
282 d->d_src.precedence = v2.nrp_precedence;
283 d->d_src.flags = v2.nrp_flags;
284 d->d_resolved_ifindex = v2.nrp_ifindex;
285 d->d_min_rtt = v2.nrp_min_rtt;
286 d->d_connection_attempts = v2.nrp_connection_attempts;
287 d->d_connection_successes = v2.nrp_connection_successes;
288 d->d_routable = (v2.nrp_flags & NETSRC_FLAG_ROUTEABLE) ? 1 : 0;
289 d->d_direct = (v2.nrp_flags & NETSRC_FLAG_DIRECT) ? 1 : 0;
290 d->d_awdl = (v2.nrp_flags & NETSRC_FLAG_AWDL) ? 1 : 0;
291 return;
292 }
293 #endif // NETSRC_VERSION2
294
295 return;
296 }
297
298 #pragma mark -- Statistics --
299
300 static int
301 si_destination_compare_statistics(
302 Destination *dst1,
303 Destination *dst2)
304 {
305 int slightPreference = kPrefer_Equal;
306 // If we have min round trip times for both, use that
307 if (dst1->d_min_rtt && dst2->d_min_rtt)
308 {
309 #define RTT_MSEC_DOUBLE_FROM_DST( dst ) (((double)(dst->d_min_rtt)) / ((double)(TCP_RTT_SCALE)))
310 if (dst1->d_min_rtt < dst2->d_min_rtt)
311 {
312 if (dst1->d_min_rtt + si_compare_settings.rtt_leeway < dst2->d_min_rtt)
313 {
314 os_log_debug(si_destination_log,
315 "prefering 1, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
316 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
317 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
318 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
319 return kPrefer_Dest1;
320 }
321 else if (dst1->d_min_rtt + si_compare_settings.rtt_leeway_small < dst2->d_min_rtt)
322 {
323 slightPreference = kPrefer_Dest1_Slightly;
324 os_log_debug(si_destination_log,
325 "prefering 1 slightly, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
326 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
327 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
328 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
329 }
330 else
331 {
332 os_log_debug(si_destination_log,
333 "not prefering 1, dst1 min rtt %f < dst2 min rtt %f, leeway %u small leeway %u",
334 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
335 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
336 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
337 }
338 }
339 else if (dst1->d_min_rtt > dst2->d_min_rtt)
340 {
341 if (dst1->d_min_rtt > si_compare_settings.rtt_leeway + dst2->d_min_rtt)
342 {
343 os_log_debug(si_destination_log,
344 "prefering 2, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
345 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
346 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
347 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
348 return kPrefer_Dest2;
349 }
350 else if (dst1->d_min_rtt > si_compare_settings.rtt_leeway_small + dst2->d_min_rtt)
351 {
352 slightPreference = kPrefer_Dest2_Slightly;
353 os_log_debug(si_destination_log,
354 "prefering 2 slightly, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
355 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
356 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
357 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
358 }
359 else
360 {
361 os_log_debug(si_destination_log,
362 "not prefering 2, dst1 min rtt %f > dst2 min rtt %f, leeway %u small leeway %u",
363 RTT_MSEC_DOUBLE_FROM_DST(dst1), RTT_MSEC_DOUBLE_FROM_DST(dst2),
364 si_compare_settings.rtt_leeway / TCP_RTT_SCALE,
365 si_compare_settings.rtt_leeway_small / TCP_RTT_SCALE);
366 }
367 }
368 else
369 {
370 os_log_debug(si_destination_log, "no preference, equal min rtt %f", RTT_MSEC_DOUBLE_FROM_DST(dst1));
371 }
372 // we hold on to slightPreference and only use it if no other part of the evaluation had a stronger preference
373 #undef RTT_MSEC_DOUBLE_FROM_DST
374 }
375 else if (dst1->d_min_rtt || dst2->d_min_rtt)
376 {
377 // If only dest1 is missing round trip make sure it doesn't have zero successful attempts
378 if (dst1->d_min_rtt == 0 &&
379 dst1->d_connection_successes == 0 &&
380 dst1->d_connection_attempts > 0)
381 {
382 if (dst1->d_connection_attempts > si_compare_settings.rtt_failure_threshold)
383 {
384 os_log_debug(si_destination_log, "prefer 2, dst 1 has no successful connections and %u attempts",
385 dst1->d_connection_attempts);
386 return kPrefer_Dest2;
387 }
388 }
389 // If only dest2 is missing round trip make sure it doesn't have zero successful attempts
390 else if (dst2->d_min_rtt == 0 &&
391 dst2->d_connection_successes == 0 &&
392 dst2->d_connection_attempts > 0)
393 {
394 if (dst2->d_connection_attempts > si_compare_settings.rtt_failure_threshold)
395 {
396 os_log_debug(si_destination_log, "prefer 1, dst 2 has no successful connections and %u attempts",
397 dst2->d_connection_attempts);
398 return kPrefer_Dest1;
399 }
400 }
401 }
402
403 // If we have routes for both...
404 if (dst1->d_routable != 0 && dst2->d_routable != 0)
405 {
406 // <rdar://problem/9070784> Prefer interface that are not peer to peer
407 if (dst1->d_awdl != dst2->d_awdl)
408 {
409 if (dst2->d_awdl)
410 {
411 os_log_debug(si_destination_log, "prefer dst 1, dst 2 is p2p, dst 1 is not");
412 return kPrefer_Dest1;
413 }
414 else if (dst1->d_awdl)
415 {
416 os_log_debug(si_destination_log, "prefer dst 2, dst 1 is p2p, dst 2 is not");
417 return kPrefer_Dest2;
418 }
419 }
420
421 // Prefer loopback routes
422 bool dst1loopback = dst1->d_resolved_ifindex == kLoopbackIndex ||
423 (dst1->d_dst.addr.sa.sa_family == AF_INET6 &&
424 IN6_IS_ADDR_LINKLOCAL(&dst1->d_dst.addr.sin6.sin6_addr) &&
425 dst1->d_dst.addr.sin6.sin6_scope_id == kLoopbackIndex);
426 bool dst2loopback = dst2->d_resolved_ifindex == kLoopbackIndex ||
427 (dst2->d_dst.addr.sa.sa_family == AF_INET6 &&
428 IN6_IS_ADDR_LINKLOCAL(&dst2->d_dst.addr.sin6.sin6_addr) &&
429 dst2->d_dst.addr.sin6.sin6_scope_id == kLoopbackIndex);
430
431 if (dst1loopback && !dst2loopback)
432 {
433 os_log_debug(si_destination_log, "prefer 1, dst 1 is loopback, dst 2 is not");
434 return kPrefer_Dest1;
435 }
436 else if (dst2loopback && !dst1loopback)
437 {
438 os_log_debug(si_destination_log, "prefer 2, dst 2 is loopback, dst 1 is not");
439 return kPrefer_Dest2;
440 }
441
442 // Prefer direct routes
443 if (dst1->d_direct && !dst2->d_direct)
444 {
445 os_log_debug(si_destination_log, "prefer 1, dst 1 is local, dst 2 is not");
446 return kPrefer_Dest1;
447 }
448 else if (dst2->d_direct && !dst1->d_direct)
449 {
450 os_log_debug(si_destination_log, "prefer 2, dst 2 is local, dst 1 is not");
451 return kPrefer_Dest2;
452 }
453 }
454 else if (dst1->d_routable != 0)
455 {
456 // prefer destination we have a route to
457 os_log_debug(si_destination_log, "prefer 1, dst 2 has no route");
458 return kPrefer_Dest1;
459 }
460 else if (dst2->d_routable != 0)
461 {
462 // prefer destination we have a route to
463 os_log_debug(si_destination_log, "prefer 2, dst 1 has no route");
464 return kPrefer_Dest2;
465 }
466 return slightPreference;
467 }
468
469 #pragma mark -- Cache --
470
471 static bool
472 sa_equal(
473 const struct sockaddr *sa1,
474 const struct sockaddr *sa2)
475 {
476 if (sa1->sa_family != sa2->sa_family)
477 return false;
478
479 /* We don't use bcmp because we don't care about the port number */
480 if (sa1->sa_family == AF_INET)
481 {
482 const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
483 const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
484
485 if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr)
486 return false;
487 return true;
488 }
489
490 if (sa1->sa_family == AF_INET6)
491 {
492 const struct sockaddr_in6 *sin61 = (const struct sockaddr_in6 *)sa1;
493 const struct sockaddr_in6 *sin62 = (const struct sockaddr_in6 *)sa2;
494
495 if (memcmp(&sin61->sin6_addr, &sin62->sin6_addr, sizeof(sin61->sin6_addr)) != 0) {
496 return false;
497 }
498
499 // TBD: Is the flow info relevant?
500
501 if (sin61->sin6_scope_id != sin62->sin6_scope_id) {
502 return false;
503 }
504
505 return true;
506 }
507
508 if (sa1->sa_len != sa2->sa_len) {
509 return false;
510 }
511
512 return (memcmp(sa1, sa2, sa1->sa_len) == 0);
513 }
514
515 static bool
516 si_destination_cache_find(
517 const struct sockaddr *dst_sa,
518 uint32_t ifscope,
519 Destination *out_dst)
520 {
521 // Loop through the entries looking for:
522 // - expired items
523 // - macthing item
524 DestCacheEntry **pprev = &cache;
525 uint64_t now = mach_absolute_time();
526 bool result = false;
527
528 while (*pprev != NULL)
529 {
530 // If the item has expired, pull it out of the list
531 if ((now - (*pprev)->dce_time) >= cache_timeout)
532 {
533 DestCacheEntry *expired = *pprev;
534 *pprev = expired->dce_next;
535 memset(expired, 0, sizeof(*expired));
536 free(expired);
537 }
538 else
539 {
540 // If the item matches, copy the entry
541 if (!result && (*pprev)->dce_entry.d_ifindex == ifscope &&
542 sa_equal(dst_sa, &(*pprev)->dce_entry.d_dst.addr.sa))
543 {
544 *out_dst = (*pprev)->dce_entry;
545 result = true;
546 }
547
548 pprev = &(*pprev)->dce_next;
549 }
550 }
551
552 return result;
553 }
554
555 static bool
556 si_destination_cache_create(
557 const struct sockaddr *dst_sa,
558 uint32_t ifscope,
559 Destination *out_dst)
560 {
561 DestCacheEntry *cache_entry;
562 Destination *d;
563
564 if (dst_sa->sa_len > sizeof(cache_entry->dce_entry.d_dst.addr)) {
565 os_log_error(si_destination_log, "(dst_sa->sa_len %u > sizeof(cache_entry->dce_entry.d_dst.addr))",
566 dst_sa->sa_len);
567 return false;
568 }
569
570 // Allocate entry
571 cache_entry = calloc(1, sizeof(*cache_entry));
572 if (NULL == cache_entry) {
573 os_log_error(si_destination_log, "calloc(%zu) failed: %m", sizeof(*cache_entry));
574 return false;
575 }
576 d = &cache_entry->dce_entry;
577
578 // Copy the destination sockaddr
579 memcpy(&d->d_dst.addr, dst_sa, dst_sa->sa_len);
580 d->d_ifindex = ifscope;
581
582 // Query the kernel for the matching source, precedence and label
583 si_destination_fill_netsrc(d);
584 *out_dst = *d;
585
586 cache_entry->dce_time = mach_absolute_time();
587 cache_entry->dce_next = cache;
588 cache = cache_entry;
589
590 return true;
591 }
592
593 static int
594 si_destination_lookup(
595 const struct sockaddr *dst_sa,
596 uint32_t ifscope,
597 Destination *out_dst)
598 {
599 int lookupResult = kLookupFailure;
600
601 si_destination_compare_init();
602 os_unfair_lock_lock(&cache_lock);
603 if (si_destination_cache_find(dst_sa, ifscope, out_dst)) {
604 lookupResult = kLookupSuccess_Found;
605 } else if (si_destination_cache_create(dst_sa, ifscope, out_dst)) {
606 lookupResult = kLookupSuccess_Created;
607 }
608 os_unfair_lock_unlock(&cache_lock);
609
610 return lookupResult;
611 }
612
613 #pragma mark -- RFC 6724 --
614
615 // https://tools.ietf.org/html/rfc6724
616
617 #ifndef IN6_IS_ADDR_TEREDO
618 #define IN6_IS_ADDR_TEREDO(x) ((((x)->__u6_addr.__u6_addr16[0]) == htons(0x2001)) && \
619 (((x)->__u6_addr.__u6_addr16[1]) == 0x0000))
620 #endif // IN6_IS_ADDR_TEREDO
621
622 #ifndef s6_addr16
623 #undef IN6_IS_ADDR_6TO4
624 #define IN6_IS_ADDR_6TO4(x) (((x)->__u6_addr.__u6_addr16[0]) == htons(0x2002))
625 #endif // s6_addr16
626
627 #ifndef IN6_IS_ADDR_6BONE
628 # define IN6_IS_ADDR_6BONE(x) (((x)->__u6_addr.__u6_addr16[0]) == htons(0x3ffe))
629 #endif // IN6_IS_ADDR_6BONE
630
631 static int
632 rfc6724_scope_ip6(const struct in6_addr *addr)
633 {
634 int scope;
635
636 if (addr->s6_addr[0] == 0xfe) {
637 scope = addr->s6_addr[1] & 0xc0;
638
639 switch (scope) {
640 case 0x80:
641 return __IPV6_ADDR_SCOPE_LINKLOCAL;
642 case 0xc0:
643 return __IPV6_ADDR_SCOPE_SITELOCAL;
644 default:
645 return __IPV6_ADDR_SCOPE_GLOBAL; /* just in case */
646 }
647 }
648
649
650 if (addr->s6_addr[0] == 0xff) {
651 scope = addr->s6_addr[1] & 0x0f;
652
653 /*
654 * due to other scope such as reserved,
655 * return scope doesn't work.
656 */
657 switch (scope) {
658 case __IPV6_ADDR_SCOPE_NODELOCAL:
659 return __IPV6_ADDR_SCOPE_NODELOCAL;
660 case __IPV6_ADDR_SCOPE_LINKLOCAL:
661 return __IPV6_ADDR_SCOPE_LINKLOCAL;
662 case __IPV6_ADDR_SCOPE_SITELOCAL:
663 return __IPV6_ADDR_SCOPE_SITELOCAL;
664 default:
665 return __IPV6_ADDR_SCOPE_GLOBAL;
666 }
667 }
668
669 /*
670 * Regard loopback and unspecified addresses as global, since
671 * they have no ambiguity.
672 */
673 static const struct in6_addr in6addr_lo = IN6ADDR_LOOPBACK_INIT;
674 if (memcmp(&in6addr_lo, addr, sizeof(*addr) - 1) == 0) {
675 if (addr->s6_addr[15] == 1) { /* loopback */
676 return __IPV6_ADDR_SCOPE_LINKLOCAL;
677 }
678 if (addr->s6_addr[15] == 0) { /* unspecified */
679 return __IPV6_ADDR_SCOPE_GLOBAL; /* XXX: correct? */
680 }
681 }
682
683 return __IPV6_ADDR_SCOPE_GLOBAL;
684 }
685
686 static int
687 rfc6724_scope_ip(const struct in_addr *addr)
688 {
689 uint32_t hostbyteaddr = ntohl(addr->s_addr);
690 if (IN_LOOPBACK(hostbyteaddr) || IN_LINKLOCAL(hostbyteaddr)) {
691 return __IPV6_ADDR_SCOPE_LINKLOCAL;
692 }
693 return __IPV6_ADDR_SCOPE_GLOBAL;
694 }
695
696 static int
697 rfc6724_scope_sa(const struct sockaddr *sa)
698 {
699 if (sa->sa_family == AF_INET6) {
700 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa;
701 return rfc6724_scope_ip6(&sin6->sin6_addr);
702 } else if (sa->sa_family == AF_INET) {
703 const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
704 return rfc6724_scope_ip(&sin->sin_addr);
705 }
706 return 0;
707 }
708
709 static int
710 rfc6724_scope(RFC6724Address *addr)
711 {
712 return rfc6724_scope_sa(&addr->addr.sa);
713 }
714
715 // RFC 6724 Section 2.1
716 // https://tools.ietf.org/html/rfc6724#section-2.1
717
718 // Prefix Precedence Label
719 // ::1/128 50 0
720 // ::/0 40 1
721 // ::ffff:0:0/96 35 4
722 // 2002::/16 30 2
723 // 2001::/32 5 5
724 // fc00::/7 3 13
725 // ::/96 1 3
726 // fec0::/10 1 11
727 // 3ffe::/16 1 12
728
729 static int
730 rfc6724_precedence(const struct sockaddr *sa)
731 {
732 if (sa == NULL) {
733
734 return 0;
735 }
736 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(sa, 0);
737 if (sa->sa_family == AF_INET6) {
738 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa;
739
740 // ::1/128
741 if (IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr)) {
742 return 50;
743 }
744
745 // ::ffff:0:0/96
746 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
747 return 35;
748 }
749
750 // 2002::/16
751 if (IN6_IS_ADDR_6TO4(&sin6->sin6_addr)) {
752 return 30;
753 }
754
755 // 2001::/32
756 if (IN6_IS_ADDR_TEREDO(&sin6->sin6_addr)) {
757 return 5;
758 }
759
760 // fc00::/7
761 if (IN6_IS_ADDR_UNIQUE_LOCAL(&sin6->sin6_addr)) {
762 return 3;
763 }
764
765 // ::/96
766 if (IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr)) {
767 return 1;
768 }
769
770 // fec0::/10
771 if (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)) {
772 return 1;
773 }
774
775 // 3ffe::/16
776 if (IN6_IS_ADDR_6BONE(&sin6->sin6_addr)) {
777 return 1;
778 }
779
780 // ::/0
781 return 40;
782 } else if (sa->sa_family == AF_INET) {
783
784 // ::ffff:0:0/96 (IPv4 is treated as a v4-mapped v6 address)
785 return 35;
786 }
787 return 0;
788 }
789
790 static bool
791 rfc6724_native(const RFC6724Address *addr)
792 {
793 return !(addr->addr.sa.sa_family == AF_INET6 &&
794 (IN6_IS_ADDR_6TO4(&addr->addr.sin6.sin6_addr) ||
795 IN6_IS_ADDR_TEREDO(&addr->addr.sin6.sin6_addr)));
796 }
797
798 static int
799 common_prefix_length(const union sockaddr_in_4_6 *addr1, const union sockaddr_in_4_6 *addr2)
800 {
801 int match = 0;
802 if (addr1->sa.sa_family == AF_INET6 && addr2->sa.sa_family == AF_INET6)
803 {
804 const unsigned char *s = (const unsigned char *)&addr1->sin6.sin6_addr;
805 const unsigned char *d = (const unsigned char *)&addr2->sin6.sin6_addr;
806 const unsigned char *lim = s + 8;
807 unsigned char r;
808
809 while (s < lim) {
810 if ((r = (*d++ ^ *s++)) != 0) {
811 while (r < 128) {
812 match++;
813 r <<= 1;
814 }
815 break;
816 } else {
817 match += 8;
818 }
819 }
820 }
821 return match;
822 }
823
824 static int
825 si_destination_compare_rfc6724(Destination *d1, Destination *d2, int statResult)
826 {
827 // Rule 1: Avoid unusable destinations (no source means unusable dest)
828 if (d1->d_src.addr.sa.sa_family == AF_UNSPEC || d2->d_src.addr.sa.sa_family == AF_UNSPEC)
829 {
830 if (d1->d_src.addr.sa.sa_family != AF_UNSPEC)
831 {
832 os_log_debug(si_destination_log, "Rule 1, prefer d1, d2 is not routable");
833 return kPrefer_Dest1;
834 }
835 else if (d2->d_src.addr.sa.sa_family != AF_UNSPEC)
836 {
837 os_log_debug(si_destination_log, "Rule 1, prefer d2, d1 is not routable");
838 return kPrefer_Dest2;
839 }
840 }
841
842 // Rule 2: Prefer matching scope
843 if (rfc6724_scope(&d1->d_dst) != rfc6724_scope(&d1->d_src) || rfc6724_scope(&d2->d_dst) != rfc6724_scope(&d2->d_src))
844 {
845 if (rfc6724_scope(&d1->d_dst) == rfc6724_scope(&d1->d_src))
846 {
847 os_log_debug(si_destination_log, "Rule 2, prefer d1, d2 dst scope does not match src scope");
848 return kPrefer_Dest1;
849 }
850 if (rfc6724_scope(&d2->d_dst) == rfc6724_scope(&d2->d_src))
851 {
852 os_log_debug(si_destination_log, "Rule 2, prefer d2, d1 dst scope does not match src scope");
853 return kPrefer_Dest2;
854 }
855 }
856
857 // Rule 3: Avoid deprecated addresses
858 if ((d1->d_src.flags & NETSRC_IP6_FLAG_DEPRECATED) != (d2->d_src.flags & NETSRC_IP6_FLAG_DEPRECATED))
859 {
860 if ((d1->d_src.flags & NETSRC_IP6_FLAG_DEPRECATED) != 0)
861 {
862 os_log_debug(si_destination_log, "Rule 3, prefer d2, d1 source is deprecated");
863 return kPrefer_Dest1;
864 }
865 if ((d2->d_src.flags & NETSRC_IP6_FLAG_DEPRECATED) != 0)
866 {
867 os_log_debug(si_destination_log, "Rule 3, prefer d1, d2 source is deprecated");
868 return kPrefer_Dest2;
869 }
870 }
871
872 // Rule 3bis: Avoid optimistic addresses, c.f. RFC 4429 which defines them as conceptually similar to deprecated
873 // Note that this rule is not part of RFC 6724
874 if ((d1->d_src.flags & NETSRC_IP6_FLAG_OPTIMISTIC) != (d2->d_src.flags & NETSRC_IP6_FLAG_OPTIMISTIC))
875 {
876 if ((d1->d_src.flags & NETSRC_IP6_FLAG_OPTIMISTIC) != 0)
877 {
878 os_log_debug(si_destination_log, "Rule 3a, prefer d2, d1 source is optimistic");
879 return kPrefer_Dest1;
880 }
881 if ((d2->d_src.flags & NETSRC_IP6_FLAG_OPTIMISTIC) != 0)
882 {
883 os_log_debug(si_destination_log, "Rule 3a, prefer d1, d2 source is optimistic");
884 return kPrefer_Dest2;
885 }
886 }
887
888 // Rule 4: Prefer home addresses
889 // TODO: requires Mobile IPv6 support
890
891 // Rule 5: Prefer matching label
892 if (d1->d_dst.label != d1->d_src.label || d2->d_dst.label != d2->d_src.label)
893 {
894 if (d1->d_dst.label == d1->d_src.label)
895 {
896 os_log_debug(si_destination_log, "Rule 5, prefer d1, d2 dst label does not match src label");
897 return kPrefer_Dest1;
898 }
899 if (d2->d_dst.label == d2->d_src.label)
900 {
901 os_log_debug(si_destination_log, "Rule 5, prefer d2, d1 dst label does not match src label");
902 return kPrefer_Dest2;
903 }
904 }
905
906 // Rule 6: Prefer higher precedence
907 if (d1->d_dst.precedence > d2->d_dst.precedence)
908 {
909 os_log_debug(si_destination_log, "Rule 6, prefer d1, d1 precedence %d > d2 precedence %d",
910 d1->d_dst.precedence, d2->d_dst.precedence);
911 return kPrefer_Dest1;
912 }
913 else if (d2->d_dst.precedence > d1->d_dst.precedence)
914 {
915 os_log_debug(si_destination_log, "Rule 6, prefer d2, d2 precedence %d > d1 precedence %d",
916 d2->d_dst.precedence, d1->d_dst.precedence);
917 return kPrefer_Dest2;
918 }
919
920 // Rule 7: Prefer native transport
921 const bool d1_native = rfc6724_native(&d1->d_src);
922 const bool d2_native = rfc6724_native(&d2->d_src);
923 if (d1_native && !d2_native)
924 {
925 os_log_debug(si_destination_log, "Rule 7, prefer d1, d2 src is not native");
926 return kPrefer_Dest1;
927 }
928 else if (d2_native && !d1_native)
929 {
930 os_log_debug(si_destination_log, "Rule 7, prefer d2, d1 src is not native");
931 return kPrefer_Dest2;
932 }
933
934 // Rule 8: Prefer smaller scope
935 const int scope1 = rfc6724_scope(&d1->d_dst);
936 const int scope2 = rfc6724_scope(&d2->d_dst);
937 if (scope1 < scope2)
938 {
939 os_log_debug(si_destination_log, "Rule 8, prefer d1, d1 scope %d < d2 scope %d", scope1, scope2);
940 return kPrefer_Dest1;
941 }
942 else if (scope2 < scope1)
943 {
944 os_log_debug(si_destination_log, "Rule 8, prefer d2, d2 scope %d < d1 scope %d", scope2, scope1);
945 return kPrefer_Dest2;
946 }
947
948 // RFC6724: Rules 9 and 10 MAY be superseded if the implementation has other means of sorting destination addresses.
949 if ((kPrefer_Dest1 == statResult) || (kPrefer_Dest1_Slightly == statResult)) {
950 return kPrefer_Dest1;
951 } else if ((kPrefer_Dest2 == statResult) || (kPrefer_Dest2_Slightly == statResult)) {
952 return kPrefer_Dest2;
953 }
954
955 // Rule 9: Use longest matching prefix
956 int matchlen1 = common_prefix_length(&d1->d_dst.addr, &d1->d_src.addr);
957 int matchlen2 = common_prefix_length(&d2->d_dst.addr, &d2->d_src.addr);
958 if (matchlen1 && matchlen2)
959 {
960 if (matchlen1 > matchlen2)
961 {
962 os_log_debug(si_destination_log, "Rule 9, prefer d1, d1 shares more common prefix");
963 return kPrefer_Dest1;
964 }
965 else if (matchlen2 > matchlen1)
966 {
967 os_log_debug(si_destination_log, "Rule 9, prefer d2, d2 shares more common prefix");
968 return kPrefer_Dest2;
969 }
970 }
971
972 // Rule 10: Otherwise, leave the order unchanged
973 return kPrefer_Equal;
974 }
975
976 #pragma mark -- Internal Helper --
977
978 static int
979 si_destination_compare_internal(
980 const struct sockaddr *dst1,
981 uint32_t dst1ifindex,
982 const struct sockaddr *dst2,
983 uint32_t dst2ifindex,
984 bool statistics)
985 {
986 // If either of the destinations is not AF_INET/AF_INET6
987 if ((dst1->sa_family != AF_INET && dst1->sa_family != AF_INET6) ||
988 (dst2->sa_family != AF_INET && dst2->sa_family != AF_INET6))
989 {
990 if (dst1->sa_family == AF_INET || dst1->sa_family == AF_INET6) {
991 return kPrefer_Dest1;
992 } else if (dst2->sa_family == AF_INET || dst2->sa_family == AF_INET6) {
993 return kPrefer_Dest2;
994 } else {
995 return kPrefer_Equal;
996 }
997 }
998
999 Destination d1;
1000 Destination d2;
1001
1002 // Lookup d1 and d2 in the cache
1003 int lookupResultD1 = si_destination_lookup(dst1, dst1ifindex, &d1);
1004 int lookupResultD2 = si_destination_lookup(dst2, dst2ifindex, &d2);
1005 if (lookupResultD1 == kLookupFailure)
1006 {
1007 os_log_debug(si_destination_log, "si_destination_lookup for dst1 failed");
1008 return kPrefer_Equal;
1009 }
1010 if (lookupResultD2 == kLookupFailure)
1011 {
1012 os_log_debug(si_destination_log, "si_destination_lookup for dst2 failed");
1013 return kPrefer_Equal;
1014 }
1015
1016 int statResult = kPrefer_Equal;
1017 if (statistics && !si_compare_settings.bypass_stats)
1018 {
1019 statResult = si_destination_compare_statistics(&d1, &d2);
1020 if ((kPrefer_Dest1 == statResult) || (kPrefer_Dest2 == statResult))
1021 {
1022 return statResult;
1023 }
1024 }
1025
1026 statResult = si_destination_compare_rfc6724(&d1, &d2, statResult);
1027
1028 if (statResult == kPrefer_Equal) {
1029 // Only if all other comparisons are equal, prefer entries that were already in the cache over
1030 // ones that are new and we just created.
1031
1032 // Found < Created
1033 if (lookupResultD1 == kLookupSuccess_Found && lookupResultD2 == kLookupSuccess_Created) {
1034 os_log_debug(si_destination_log, "prefer d1, known while d2 not known");
1035 statResult = kPrefer_Dest1;
1036 } else if (lookupResultD2 == kLookupSuccess_Found && lookupResultD1 == kLookupSuccess_Created) {
1037 os_log_debug(si_destination_log, "prefer d2, known while d1 not known");
1038 statResult = kPrefer_Dest2;
1039 }
1040 }
1041
1042 return statResult;
1043 }
1044
1045 #pragma mark -- SPI --
1046
1047 int
1048 si_destination_compare(
1049 const struct sockaddr *dst1,
1050 int dst1ifindex,
1051 const struct sockaddr *dst2,
1052 int dst2ifindex,
1053 bool statistics)
1054 {
1055 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst1, kPrefer_Equal);
1056 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst2, kPrefer_Equal);
1057
1058 si_destination_compare_init();
1059
1060 const int result = si_destination_compare_internal(dst1, dst1ifindex, dst2, dst2ifindex, statistics);
1061
1062 os_log_debug(si_destination_log, "%{network:sockaddr}.*P@%u %c %{network:sockaddr}.*P@%u",
1063 dst1->sa_len, dst1, dst1ifindex, result == 0 ? '=' : result < 0 ? '<' : '>',
1064 dst2->sa_len, dst2, dst2ifindex);
1065
1066 return result;
1067 }
1068
1069 int
1070 si_destination_compare_no_dependencies(const struct sockaddr *dst1,
1071 const struct sockaddr *dst2)
1072 {
1073 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst1, kPrefer_Equal);
1074 SI_DESTINATION_COMPARE_CHECK_ARG_RETURN(dst2, kPrefer_Equal);
1075
1076 // Skip rule 1 (requires route to destination address)
1077 // Skip rule 2, 3, 5, 7, 9 (requires corresponding source address)
1078 // Skip rule 4 (not supported by si_destination_compare() today)
1079
1080 // Rule 6: Prefer higher precedence
1081 const int precedence1 = rfc6724_precedence(dst1);
1082 const int precedence2 = rfc6724_precedence(dst2);
1083 if (precedence1 > precedence2)
1084 {
1085 os_log_debug(si_destination_log, "ND Rule 6, prefer d1, d1 precedence %d > d2 precedence %d",
1086 precedence1, precedence2);
1087 return kPrefer_Dest1;
1088 }
1089 else if (precedence2 > precedence1)
1090 {
1091 os_log_debug(si_destination_log, "ND Rule 6, prefer d2, d2 precedence %d > d1 precedence %d",
1092 precedence2, precedence1);
1093 return kPrefer_Dest2;
1094 }
1095
1096 // Rule 8: Prefer smaller scope
1097 const int scope1 = rfc6724_scope_sa(dst1);
1098 const int scope2 = rfc6724_scope_sa(dst2);
1099 if (scope1 < scope2)
1100 {
1101 os_log_debug(si_destination_log, "ND Rule 8, prefer d1, d1 scope %d < d2 scope %d",
1102 scope1, scope2);
1103 return kPrefer_Dest1;
1104 }
1105 else if (scope2 < scope1)
1106 {
1107 os_log_debug(si_destination_log, "ND Rule 8, prefer d2, d2 scope %d < d1 scope %d",
1108 scope2, scope1);
1109 return kPrefer_Dest2;
1110 }
1111
1112 // Rule 10: Otherwise, leave the order unchanged
1113
1114 return kPrefer_Equal;
1115 }