]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/tcp_cache.c
xnu-6153.81.5.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_cache.c
CommitLineData
3e170ce0 1/*
5ba3f43e 2 * Copyright (c) 2015-2017 Apple Inc. All rights reserved.
3e170ce0
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/* TCP-cache to store and retrieve TCP-related information */
30
31#include <net/flowhash.h>
32#include <net/route.h>
5ba3f43e 33#include <net/necp.h>
3e170ce0 34#include <netinet/in_pcb.h>
5ba3f43e 35#include <netinet/mptcp_var.h>
3e170ce0
A
36#include <netinet/tcp_cache.h>
37#include <netinet/tcp_seq.h>
38#include <netinet/tcp_var.h>
39#include <kern/locks.h>
40#include <sys/queue.h>
41#include <dev/random/randomdev.h>
42
5ba3f43e
A
43typedef union {
44 struct in_addr addr;
45 struct in6_addr addr6;
46} in_4_6_addr;
47
3e170ce0
A
48struct tcp_heuristic_key {
49 union {
50 uint8_t thk_net_signature[IFNET_SIGNATURELEN];
5ba3f43e 51 in_4_6_addr thk_ip;
3e170ce0 52 };
0a7de745 53 sa_family_t thk_family;
3e170ce0
A
54};
55
56struct tcp_heuristic {
57 SLIST_ENTRY(tcp_heuristic) list;
58
0a7de745 59 uint32_t th_last_access;
3e170ce0 60
0a7de745 61 struct tcp_heuristic_key th_key;
3e170ce0 62
0a7de745 63 char th_val_start[0]; /* Marker for memsetting to 0 */
4bd07ac2 64
0a7de745
A
65 uint8_t th_tfo_data_loss; /* The number of times a SYN+data has been lost */
66 uint8_t th_tfo_req_loss; /* The number of times a SYN+cookie-req has been lost */
67 uint8_t th_tfo_data_rst; /* The number of times a SYN+data has received a RST */
68 uint8_t th_tfo_req_rst; /* The number of times a SYN+cookie-req has received a RST */
69 uint8_t th_mptcp_loss; /* The number of times a SYN+MP_CAPABLE has been lost */
cb323159 70 uint8_t th_mptcp_success; /* The number of times MPTCP-negotiation has been successful */
0a7de745
A
71 uint8_t th_ecn_loss; /* The number of times a SYN+ecn has been lost */
72 uint8_t th_ecn_aggressive; /* The number of times we did an aggressive fallback */
73 uint8_t th_ecn_droprst; /* The number of times ECN connections received a RST after first data pkt */
74 uint8_t th_ecn_droprxmt; /* The number of times ECN connection is dropped after multiple retransmits */
75 uint8_t th_ecn_synrst; /* number of times RST was received in response to an ECN enabled SYN */
76 uint32_t th_tfo_enabled_time; /* The moment when we reenabled TFO after backing off */
77 uint32_t th_tfo_backoff_until; /* Time until when we should not try out TFO */
78 uint32_t th_tfo_backoff; /* Current backoff timer */
79 uint32_t th_mptcp_backoff; /* Time until when we should not try out MPTCP */
80 uint32_t th_ecn_backoff; /* Time until when we should not try out ECN */
5ba3f43e 81
0a7de745 82 uint8_t th_tfo_in_backoff:1, /* Are we avoiding TFO due to the backoff timer? */
cb323159
A
83 th_mptcp_in_backoff:1, /* Are we avoiding MPTCP due to the backoff timer? */
84 th_mptcp_heuristic_disabled:1; /* Are heuristics disabled? */
4bd07ac2 85
0a7de745 86 char th_val_end[0]; /* Marker for memsetting to 0 */
3e170ce0
A
87};
88
89struct tcp_heuristics_head {
90 SLIST_HEAD(tcp_heur_bucket, tcp_heuristic) tcp_heuristics;
91
92 /* Per-hashbucket lock to avoid lock-contention */
0a7de745 93 lck_mtx_t thh_mtx;
3e170ce0
A
94};
95
96struct tcp_cache_key {
0a7de745 97 sa_family_t tck_family;
3e170ce0
A
98
99 struct tcp_heuristic_key tck_src;
5ba3f43e 100 in_4_6_addr tck_dst;
3e170ce0
A
101};
102
103struct tcp_cache {
104 SLIST_ENTRY(tcp_cache) list;
105
0a7de745 106 u_int32_t tc_last_access;
3e170ce0
A
107
108 struct tcp_cache_key tc_key;
109
0a7de745
A
110 u_int8_t tc_tfo_cookie[TFO_COOKIE_LEN_MAX];
111 u_int8_t tc_tfo_cookie_len;
3e170ce0
A
112};
113
114struct tcp_cache_head {
115 SLIST_HEAD(tcp_cache_bucket, tcp_cache) tcp_caches;
116
117 /* Per-hashbucket lock to avoid lock-contention */
0a7de745 118 lck_mtx_t tch_mtx;
3e170ce0
A
119};
120
5ba3f43e
A
121struct tcp_cache_key_src {
122 struct ifnet *ifp;
123 in_4_6_addr laddr;
124 in_4_6_addr faddr;
125 int af;
126};
127
3e170ce0
A
128static u_int32_t tcp_cache_hash_seed;
129
130size_t tcp_cache_size;
131
132/*
133 * The maximum depth of the hash-bucket. This way we limit the tcp_cache to
134 * TCP_CACHE_BUCKET_SIZE * tcp_cache_size and have "natural" garbage collection
135 */
0a7de745 136#define TCP_CACHE_BUCKET_SIZE 5
3e170ce0
A
137
138static struct tcp_cache_head *tcp_cache;
139
140decl_lck_mtx_data(, tcp_cache_mtx);
141
0a7de745
A
142static lck_attr_t *tcp_cache_mtx_attr;
143static lck_grp_t *tcp_cache_mtx_grp;
144static lck_grp_attr_t *tcp_cache_mtx_grp_attr;
3e170ce0
A
145
146static struct tcp_heuristics_head *tcp_heuristics;
147
148decl_lck_mtx_data(, tcp_heuristics_mtx);
149
0a7de745
A
150static lck_attr_t *tcp_heuristic_mtx_attr;
151static lck_grp_t *tcp_heuristic_mtx_grp;
152static lck_grp_attr_t *tcp_heuristic_mtx_grp_attr;
3e170ce0 153
5ba3f43e
A
154static uint32_t tcp_backoff_maximum = 65536;
155
156SYSCTL_UINT(_net_inet_tcp, OID_AUTO, backoff_maximum, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 157 &tcp_backoff_maximum, 0, "Maximum time for which we won't try TFO");
5ba3f43e
A
158
159SYSCTL_SKMEM_TCP_INT(OID_AUTO, ecn_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 160 static int, tcp_ecn_timeout, 60, "Initial minutes to wait before re-trying ECN");
5ba3f43e
A
161
162SYSCTL_SKMEM_TCP_INT(OID_AUTO, disable_tcp_heuristics, CTLFLAG_RW | CTLFLAG_LOCKED,
163 static int, disable_tcp_heuristics, 0, "Set to 1, to disable all TCP heuristics (TFO, ECN, MPTCP)");
3e170ce0 164
0a7de745
A
165static uint32_t
166tcp_min_to_hz(uint32_t minutes)
5ba3f43e 167{
0a7de745
A
168 if (minutes > 65536) {
169 return (uint32_t)65536 * 60 * TCP_RETRANSHZ;
170 }
5ba3f43e 171
0a7de745 172 return minutes * 60 * TCP_RETRANSHZ;
5ba3f43e 173}
39037602
A
174
175/*
176 * This number is coupled with tcp_ecn_timeout, because we want to prevent
177 * integer overflow. Need to find an unexpensive way to prevent integer overflow
178 * while still allowing a dynamic sysctl.
179 */
0a7de745 180#define TCP_CACHE_OVERFLOW_PROTECT 9
39037602
A
181
182/* Number of SYN-losses we accept */
0a7de745
A
183#define TFO_MAX_COOKIE_LOSS 2
184#define ECN_MAX_SYN_LOSS 2
185#define MPTCP_MAX_SYN_LOSS 2
cb323159 186#define MPTCP_SUCCESS_TRIGGER 10
0a7de745
A
187#define ECN_MAX_DROPRST 1
188#define ECN_MAX_DROPRXMT 4
189#define ECN_MAX_SYNRST 4
5ba3f43e
A
190
191/* Flags for setting/unsetting loss-heuristics, limited to 4 bytes */
0a7de745
A
192#define TCPCACHE_F_TFO_REQ 0x01
193#define TCPCACHE_F_TFO_DATA 0x02
194#define TCPCACHE_F_ECN 0x04
195#define TCPCACHE_F_MPTCP 0x08
196#define TCPCACHE_F_ECN_DROPRST 0x10
197#define TCPCACHE_F_ECN_DROPRXMT 0x20
198#define TCPCACHE_F_TFO_REQ_RST 0x40
199#define TCPCACHE_F_TFO_DATA_RST 0x80
200#define TCPCACHE_F_ECN_SYNRST 0x100
39037602
A
201
202/* Always retry ECN after backing off to this level for some heuristics */
0a7de745 203#define ECN_RETRY_LIMIT 9
39037602 204
5ba3f43e
A
205#define TCP_CACHE_INC_IFNET_STAT(_ifp_, _af_, _stat_) { \
206 if ((_ifp_) != NULL) { \
0a7de745
A
207 if ((_af_) == AF_INET6) { \
208 (_ifp_)->if_ipv6_stat->_stat_++;\
209 } else { \
210 (_ifp_)->if_ipv4_stat->_stat_++;\
211 }\
5ba3f43e
A
212 }\
213}
214
3e170ce0
A
215/*
216 * Round up to next higher power-of 2. See "Bit Twiddling Hacks".
217 *
218 * Might be worth moving this to a library so that others
219 * (e.g., scale_to_powerof2()) can use this as well instead of a while-loop.
220 */
0a7de745
A
221static u_int32_t
222tcp_cache_roundup2(u_int32_t a)
3e170ce0
A
223{
224 a--;
225 a |= a >> 1;
226 a |= a >> 2;
227 a |= a >> 4;
228 a |= a >> 8;
229 a |= a >> 16;
230 a++;
231
232 return a;
233}
234
0a7de745
A
235static void
236tcp_cache_hash_src(struct tcp_cache_key_src *tcks, struct tcp_heuristic_key *key)
3e170ce0 237{
5ba3f43e 238 struct ifnet *ifp = tcks->ifp;
3e170ce0
A
239 uint8_t len = sizeof(key->thk_net_signature);
240 uint16_t flags;
241
5ba3f43e 242 if (tcks->af == AF_INET6) {
3e170ce0
A
243 int ret;
244
245 key->thk_family = AF_INET6;
5ba3f43e 246 ret = ifnet_get_netsignature(ifp, AF_INET6, &len, &flags,
3e170ce0
A
247 key->thk_net_signature);
248
249 /*
250 * ifnet_get_netsignature only returns EINVAL if ifn is NULL
251 * (we made sure that in the other cases it does not). So,
252 * in this case we should take the connection's address.
253 */
0a7de745 254 if (ret == ENOENT || ret == EINVAL) {
5ba3f43e 255 memcpy(&key->thk_ip.addr6, &tcks->laddr.addr6, sizeof(struct in6_addr));
0a7de745 256 }
3e170ce0
A
257 } else {
258 int ret;
259
260 key->thk_family = AF_INET;
5ba3f43e 261 ret = ifnet_get_netsignature(ifp, AF_INET, &len, &flags,
0a7de745 262 key->thk_net_signature);
3e170ce0
A
263
264 /*
265 * ifnet_get_netsignature only returns EINVAL if ifn is NULL
266 * (we made sure that in the other cases it does not). So,
267 * in this case we should take the connection's address.
268 */
0a7de745 269 if (ret == ENOENT || ret == EINVAL) {
5ba3f43e 270 memcpy(&key->thk_ip.addr, &tcks->laddr.addr, sizeof(struct in_addr));
0a7de745 271 }
3e170ce0
A
272 }
273}
274
0a7de745
A
275static u_int16_t
276tcp_cache_hash(struct tcp_cache_key_src *tcks, struct tcp_cache_key *key)
3e170ce0
A
277{
278 u_int32_t hash;
279
280 bzero(key, sizeof(struct tcp_cache_key));
281
5ba3f43e 282 tcp_cache_hash_src(tcks, &key->tck_src);
3e170ce0 283
5ba3f43e 284 if (tcks->af == AF_INET6) {
3e170ce0 285 key->tck_family = AF_INET6;
5ba3f43e 286 memcpy(&key->tck_dst.addr6, &tcks->faddr.addr6,
3e170ce0
A
287 sizeof(struct in6_addr));
288 } else {
289 key->tck_family = AF_INET;
5ba3f43e 290 memcpy(&key->tck_dst.addr, &tcks->faddr.addr,
3e170ce0
A
291 sizeof(struct in_addr));
292 }
293
294 hash = net_flowhash(key, sizeof(struct tcp_cache_key),
295 tcp_cache_hash_seed);
296
0a7de745 297 return hash & (tcp_cache_size - 1);
3e170ce0
A
298}
299
0a7de745
A
300static void
301tcp_cache_unlock(struct tcp_cache_head *head)
3e170ce0
A
302{
303 lck_mtx_unlock(&head->tch_mtx);
304}
305
306/*
307 * Make sure that everything that happens after tcp_getcache_with_lock()
308 * is short enough to justify that you hold the per-bucket lock!!!
309 *
310 * Otherwise, better build another lookup-function that does not hold the
311 * lock and you copy out the bits and bytes.
312 *
313 * That's why we provide the head as a "return"-pointer so that the caller
314 * can give it back to use for tcp_cache_unlock().
315 */
0a7de745
A
316static struct tcp_cache *
317tcp_getcache_with_lock(struct tcp_cache_key_src *tcks,
5ba3f43e 318 int create, struct tcp_cache_head **headarg)
3e170ce0 319{
3e170ce0
A
320 struct tcp_cache *tpcache = NULL;
321 struct tcp_cache_head *head;
322 struct tcp_cache_key key;
323 u_int16_t hash;
324 int i = 0;
325
5ba3f43e 326 hash = tcp_cache_hash(tcks, &key);
3e170ce0
A
327 head = &tcp_cache[hash];
328
329 lck_mtx_lock(&head->tch_mtx);
330
331 /*** First step: Look for the tcp_cache in our bucket ***/
332 SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
0a7de745 333 if (memcmp(&tpcache->tc_key, &key, sizeof(key)) == 0) {
3e170ce0 334 break;
0a7de745 335 }
3e170ce0
A
336
337 i++;
338 }
339
340 /*** Second step: If it's not there, create/recycle it ***/
341 if ((tpcache == NULL) && create) {
342 if (i >= TCP_CACHE_BUCKET_SIZE) {
343 struct tcp_cache *oldest_cache = NULL;
344 u_int32_t max_age = 0;
345
346 /* Look for the oldest tcp_cache in the bucket */
347 SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
348 u_int32_t age = tcp_now - tpcache->tc_last_access;
349 if (age > max_age) {
350 max_age = age;
351 oldest_cache = tpcache;
352 }
353 }
354 VERIFY(oldest_cache != NULL);
355
356 tpcache = oldest_cache;
357
358 /* We recycle, thus let's indicate that there is no cookie */
359 tpcache->tc_tfo_cookie_len = 0;
360 } else {
361 /* Create a new cache and add it to the list */
362 tpcache = _MALLOC(sizeof(struct tcp_cache), M_TEMP,
363 M_NOWAIT | M_ZERO);
0a7de745 364 if (tpcache == NULL) {
3e170ce0 365 goto out_null;
0a7de745 366 }
3e170ce0
A
367
368 SLIST_INSERT_HEAD(&head->tcp_caches, tpcache, list);
369 }
370
371 memcpy(&tpcache->tc_key, &key, sizeof(key));
372 }
373
0a7de745 374 if (tpcache == NULL) {
3e170ce0 375 goto out_null;
0a7de745 376 }
3e170ce0
A
377
378 /* Update timestamp for garbage collection purposes */
379 tpcache->tc_last_access = tcp_now;
380 *headarg = head;
381
0a7de745 382 return tpcache;
3e170ce0
A
383
384out_null:
385 tcp_cache_unlock(head);
0a7de745 386 return NULL;
3e170ce0
A
387}
388
0a7de745
A
389static void
390tcp_cache_key_src_create(struct tcpcb *tp, struct tcp_cache_key_src *tcks)
5ba3f43e
A
391{
392 struct inpcb *inp = tp->t_inpcb;
393 memset(tcks, 0, sizeof(*tcks));
394
395 tcks->ifp = inp->inp_last_outifp;
396
397 if (inp->inp_vflag & INP_IPV6) {
398 memcpy(&tcks->laddr.addr6, &inp->in6p_laddr, sizeof(struct in6_addr));
399 memcpy(&tcks->faddr.addr6, &inp->in6p_faddr, sizeof(struct in6_addr));
400 tcks->af = AF_INET6;
401 } else {
402 memcpy(&tcks->laddr.addr, &inp->inp_laddr, sizeof(struct in_addr));
403 memcpy(&tcks->faddr.addr, &inp->inp_faddr, sizeof(struct in_addr));
404 tcks->af = AF_INET;
405 }
406
407 return;
408}
409
0a7de745
A
410static void
411tcp_cache_set_cookie_common(struct tcp_cache_key_src *tcks, u_char *cookie, u_int8_t len)
3e170ce0
A
412{
413 struct tcp_cache_head *head;
414 struct tcp_cache *tpcache;
415
416 /* Call lookup/create function */
5ba3f43e 417 tpcache = tcp_getcache_with_lock(tcks, 1, &head);
0a7de745 418 if (tpcache == NULL) {
3e170ce0 419 return;
0a7de745 420 }
3e170ce0 421
a39ff7e2 422 tpcache->tc_tfo_cookie_len = len > TFO_COOKIE_LEN_MAX ?
0a7de745 423 TFO_COOKIE_LEN_MAX : len;
a39ff7e2 424 memcpy(tpcache->tc_tfo_cookie, cookie, tpcache->tc_tfo_cookie_len);
3e170ce0
A
425
426 tcp_cache_unlock(head);
427}
428
0a7de745
A
429void
430tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len)
5ba3f43e
A
431{
432 struct tcp_cache_key_src tcks;
433
434 tcp_cache_key_src_create(tp, &tcks);
435 tcp_cache_set_cookie_common(&tcks, cookie, len);
436}
437
0a7de745
A
438static int
439tcp_cache_get_cookie_common(struct tcp_cache_key_src *tcks, u_char *cookie, u_int8_t *len)
3e170ce0
A
440{
441 struct tcp_cache_head *head;
442 struct tcp_cache *tpcache;
443
444 /* Call lookup/create function */
5ba3f43e
A
445 tpcache = tcp_getcache_with_lock(tcks, 1, &head);
446 if (tpcache == NULL) {
0a7de745 447 return 0;
5ba3f43e 448 }
3e170ce0
A
449
450 if (tpcache->tc_tfo_cookie_len == 0) {
451 tcp_cache_unlock(head);
0a7de745 452 return 0;
3e170ce0
A
453 }
454
455 /*
456 * Not enough space - this should never happen as it has been checked
457 * in tcp_tfo_check. So, fail here!
458 */
459 VERIFY(tpcache->tc_tfo_cookie_len <= *len);
460
461 memcpy(cookie, tpcache->tc_tfo_cookie, tpcache->tc_tfo_cookie_len);
462 *len = tpcache->tc_tfo_cookie_len;
463
464 tcp_cache_unlock(head);
465
0a7de745 466 return 1;
3e170ce0
A
467}
468
5ba3f43e
A
469/*
470 * Get the cookie related to 'tp', and copy it into 'cookie', provided that len
471 * is big enough (len designates the available memory.
472 * Upon return, 'len' is set to the cookie's length.
473 *
474 * Returns 0 if we should request a cookie.
475 * Returns 1 if the cookie has been found and written.
476 */
0a7de745
A
477int
478tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len)
5ba3f43e
A
479{
480 struct tcp_cache_key_src tcks;
481
482 tcp_cache_key_src_create(tp, &tcks);
483 return tcp_cache_get_cookie_common(&tcks, cookie, len);
484}
485
0a7de745
A
486static unsigned int
487tcp_cache_get_cookie_len_common(struct tcp_cache_key_src *tcks)
3e170ce0
A
488{
489 struct tcp_cache_head *head;
490 struct tcp_cache *tpcache;
491 unsigned int cookie_len;
492
493 /* Call lookup/create function */
5ba3f43e 494 tpcache = tcp_getcache_with_lock(tcks, 1, &head);
0a7de745
A
495 if (tpcache == NULL) {
496 return 0;
497 }
3e170ce0
A
498
499 cookie_len = tpcache->tc_tfo_cookie_len;
500
501 tcp_cache_unlock(head);
502
503 return cookie_len;
504}
505
0a7de745
A
506unsigned int
507tcp_cache_get_cookie_len(struct tcpcb *tp)
5ba3f43e
A
508{
509 struct tcp_cache_key_src tcks;
510
511 tcp_cache_key_src_create(tp, &tcks);
512 return tcp_cache_get_cookie_len_common(&tcks);
513}
514
0a7de745
A
515static u_int16_t
516tcp_heuristics_hash(struct tcp_cache_key_src *tcks, struct tcp_heuristic_key *key)
3e170ce0
A
517{
518 u_int32_t hash;
519
520 bzero(key, sizeof(struct tcp_heuristic_key));
521
5ba3f43e 522 tcp_cache_hash_src(tcks, key);
3e170ce0
A
523
524 hash = net_flowhash(key, sizeof(struct tcp_heuristic_key),
525 tcp_cache_hash_seed);
526
0a7de745 527 return hash & (tcp_cache_size - 1);
3e170ce0
A
528}
529
0a7de745
A
530static void
531tcp_heuristic_unlock(struct tcp_heuristics_head *head)
3e170ce0
A
532{
533 lck_mtx_unlock(&head->thh_mtx);
534}
535
536/*
537 * Make sure that everything that happens after tcp_getheuristic_with_lock()
538 * is short enough to justify that you hold the per-bucket lock!!!
539 *
540 * Otherwise, better build another lookup-function that does not hold the
541 * lock and you copy out the bits and bytes.
542 *
543 * That's why we provide the head as a "return"-pointer so that the caller
544 * can give it back to use for tcp_heur_unlock().
545 *
546 *
547 * ToDo - way too much code-duplication. We should create an interface to handle
548 * bucketized hashtables with recycling of the oldest element.
549 */
0a7de745
A
550static struct tcp_heuristic *
551tcp_getheuristic_with_lock(struct tcp_cache_key_src *tcks,
3e170ce0
A
552 int create, struct tcp_heuristics_head **headarg)
553{
3e170ce0
A
554 struct tcp_heuristic *tpheur = NULL;
555 struct tcp_heuristics_head *head;
556 struct tcp_heuristic_key key;
557 u_int16_t hash;
558 int i = 0;
559
5ba3f43e 560 hash = tcp_heuristics_hash(tcks, &key);
3e170ce0
A
561 head = &tcp_heuristics[hash];
562
563 lck_mtx_lock(&head->thh_mtx);
564
565 /*** First step: Look for the tcp_heur in our bucket ***/
566 SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
0a7de745 567 if (memcmp(&tpheur->th_key, &key, sizeof(key)) == 0) {
3e170ce0 568 break;
0a7de745 569 }
3e170ce0
A
570
571 i++;
572 }
573
574 /*** Second step: If it's not there, create/recycle it ***/
575 if ((tpheur == NULL) && create) {
576 if (i >= TCP_CACHE_BUCKET_SIZE) {
577 struct tcp_heuristic *oldest_heur = NULL;
578 u_int32_t max_age = 0;
579
580 /* Look for the oldest tcp_heur in the bucket */
581 SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
582 u_int32_t age = tcp_now - tpheur->th_last_access;
583 if (age > max_age) {
584 max_age = age;
585 oldest_heur = tpheur;
586 }
587 }
588 VERIFY(oldest_heur != NULL);
589
590 tpheur = oldest_heur;
591
592 /* We recycle - set everything to 0 */
4bd07ac2 593 bzero(tpheur->th_val_start,
0a7de745 594 tpheur->th_val_end - tpheur->th_val_start);
3e170ce0
A
595 } else {
596 /* Create a new heuristic and add it to the list */
597 tpheur = _MALLOC(sizeof(struct tcp_heuristic), M_TEMP,
598 M_NOWAIT | M_ZERO);
0a7de745 599 if (tpheur == NULL) {
3e170ce0 600 goto out_null;
0a7de745 601 }
3e170ce0
A
602
603 SLIST_INSERT_HEAD(&head->tcp_heuristics, tpheur, list);
604 }
605
4bd07ac2
A
606 /*
607 * Set to tcp_now, to make sure it won't be > than tcp_now in the
608 * near future.
609 */
610 tpheur->th_ecn_backoff = tcp_now;
5ba3f43e 611 tpheur->th_tfo_backoff_until = tcp_now;
39037602 612 tpheur->th_mptcp_backoff = tcp_now;
5ba3f43e 613 tpheur->th_tfo_backoff = tcp_min_to_hz(tcp_ecn_timeout);
4bd07ac2 614
3e170ce0
A
615 memcpy(&tpheur->th_key, &key, sizeof(key));
616 }
617
0a7de745 618 if (tpheur == NULL) {
3e170ce0 619 goto out_null;
0a7de745 620 }
3e170ce0
A
621
622 /* Update timestamp for garbage collection purposes */
623 tpheur->th_last_access = tcp_now;
624 *headarg = head;
625
0a7de745 626 return tpheur;
3e170ce0
A
627
628out_null:
629 tcp_heuristic_unlock(head);
0a7de745 630 return NULL;
3e170ce0
A
631}
632
0a7de745
A
633static void
634tcp_heuristic_reset_counters(struct tcp_cache_key_src *tcks, u_int8_t flags)
3e170ce0
A
635{
636 struct tcp_heuristics_head *head;
39037602 637 struct tcp_heuristic *tpheur;
3e170ce0 638
39037602 639 /*
cb323159
A
640 * Always create heuristics here because MPTCP needs to write success
641 * into it. Thus, we always end up creating them.
39037602 642 */
cb323159 643 tpheur = tcp_getheuristic_with_lock(tcks, 1, &head);
0a7de745 644 if (tpheur == NULL) {
3e170ce0 645 return;
0a7de745 646 }
3e170ce0 647
5ba3f43e 648 if (flags & TCPCACHE_F_TFO_DATA) {
cb323159
A
649 if (tpheur->th_tfo_data_loss >= TFO_MAX_COOKIE_LOSS) {
650 os_log(OS_LOG_DEFAULT, "%s: Resetting TFO-data loss to 0 from %u on heur %lx\n",
651 __func__, tpheur->th_tfo_data_loss, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
652 }
5ba3f43e
A
653 tpheur->th_tfo_data_loss = 0;
654 }
655
656 if (flags & TCPCACHE_F_TFO_REQ) {
cb323159
A
657 if (tpheur->th_tfo_req_loss >= TFO_MAX_COOKIE_LOSS) {
658 os_log(OS_LOG_DEFAULT, "%s: Resetting TFO-req loss to 0 from %u on heur %lx\n",
659 __func__, tpheur->th_tfo_req_loss, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
660 }
5ba3f43e
A
661 tpheur->th_tfo_req_loss = 0;
662 }
663
664 if (flags & TCPCACHE_F_TFO_DATA_RST) {
cb323159
A
665 if (tpheur->th_tfo_data_rst >= TFO_MAX_COOKIE_LOSS) {
666 os_log(OS_LOG_DEFAULT, "%s: Resetting TFO-data RST to 0 from %u on heur %lx\n",
667 __func__, tpheur->th_tfo_data_rst, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
668 }
5ba3f43e
A
669 tpheur->th_tfo_data_rst = 0;
670 }
671
672 if (flags & TCPCACHE_F_TFO_REQ_RST) {
cb323159
A
673 if (tpheur->th_tfo_req_rst >= TFO_MAX_COOKIE_LOSS) {
674 os_log(OS_LOG_DEFAULT, "%s: Resetting TFO-req RST to 0 from %u on heur %lx\n",
675 __func__, tpheur->th_tfo_req_rst, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
676 }
5ba3f43e
A
677 tpheur->th_tfo_req_rst = 0;
678 }
39037602 679
5ba3f43e 680 if (flags & TCPCACHE_F_ECN) {
cb323159
A
681 if (tpheur->th_ecn_loss >= ECN_MAX_SYN_LOSS || tpheur->th_ecn_synrst >= ECN_MAX_SYNRST) {
682 os_log(OS_LOG_DEFAULT, "%s: Resetting ECN-loss to 0 from %u and synrst from %u on heur %lx\n",
683 __func__, tpheur->th_ecn_loss, tpheur->th_ecn_synrst, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
684 }
39037602 685 tpheur->th_ecn_loss = 0;
5ba3f43e
A
686 tpheur->th_ecn_synrst = 0;
687 }
39037602 688
0a7de745 689 if (flags & TCPCACHE_F_MPTCP) {
39037602 690 tpheur->th_mptcp_loss = 0;
cb323159
A
691 if (tpheur->th_mptcp_success < MPTCP_SUCCESS_TRIGGER) {
692 tpheur->th_mptcp_success++;
693
694 if (tpheur->th_mptcp_success == MPTCP_SUCCESS_TRIGGER) {
695 os_log(mptcp_log_handle, "%s disabling heuristics for 12 hours", __func__);
696 tpheur->th_mptcp_heuristic_disabled = 1;
697 /* Disable heuristics for 12 hours */
698 tpheur->th_mptcp_backoff = tcp_now + tcp_min_to_hz(tcp_ecn_timeout * 12);
699 }
700 }
0a7de745 701 }
3e170ce0
A
702
703 tcp_heuristic_unlock(head);
704}
705
0a7de745
A
706void
707tcp_heuristic_tfo_success(struct tcpcb *tp)
39037602 708{
5ba3f43e
A
709 struct tcp_cache_key_src tcks;
710 uint8_t flag = 0;
711
712 tcp_cache_key_src_create(tp, &tcks);
713
0a7de745 714 if (tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) {
5ba3f43e 715 flag = (TCPCACHE_F_TFO_DATA | TCPCACHE_F_TFO_REQ |
0a7de745
A
716 TCPCACHE_F_TFO_DATA_RST | TCPCACHE_F_TFO_REQ_RST);
717 }
718 if (tp->t_tfo_stats & TFO_S_COOKIE_REQ) {
5ba3f43e 719 flag = (TCPCACHE_F_TFO_REQ | TCPCACHE_F_TFO_REQ_RST);
0a7de745 720 }
5ba3f43e
A
721
722 tcp_heuristic_reset_counters(&tcks, flag);
39037602
A
723}
724
0a7de745
A
725void
726tcp_heuristic_mptcp_success(struct tcpcb *tp)
39037602 727{
5ba3f43e
A
728 struct tcp_cache_key_src tcks;
729
730 tcp_cache_key_src_create(tp, &tcks);
731 tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_MPTCP);
39037602
A
732}
733
0a7de745
A
734void
735tcp_heuristic_ecn_success(struct tcpcb *tp)
39037602 736{
5ba3f43e
A
737 struct tcp_cache_key_src tcks;
738
739 tcp_cache_key_src_create(tp, &tcks);
740 tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_ECN);
39037602
A
741}
742
0a7de745
A
743static void
744__tcp_heuristic_tfo_middlebox_common(struct tcp_heuristic *tpheur)
3e170ce0 745{
0a7de745 746 if (tpheur->th_tfo_in_backoff) {
3e170ce0 747 return;
0a7de745 748 }
3e170ce0 749
5ba3f43e 750 tpheur->th_tfo_in_backoff = 1;
3e170ce0 751
5ba3f43e
A
752 if (tpheur->th_tfo_enabled_time) {
753 uint32_t old_backoff = tpheur->th_tfo_backoff;
754
755 tpheur->th_tfo_backoff -= (tcp_now - tpheur->th_tfo_enabled_time);
0a7de745 756 if (tpheur->th_tfo_backoff > old_backoff) {
5ba3f43e 757 tpheur->th_tfo_backoff = tcp_min_to_hz(tcp_ecn_timeout);
0a7de745 758 }
5ba3f43e 759 }
3e170ce0 760
5ba3f43e
A
761 tpheur->th_tfo_backoff_until = tcp_now + tpheur->th_tfo_backoff;
762
763 /* Then, increase the backoff time */
764 tpheur->th_tfo_backoff *= 2;
765
0a7de745 766 if (tpheur->th_tfo_backoff > tcp_min_to_hz(tcp_backoff_maximum)) {
5ba3f43e 767 tpheur->th_tfo_backoff = tcp_min_to_hz(tcp_ecn_timeout);
0a7de745 768 }
cb323159
A
769
770 os_log(OS_LOG_DEFAULT, "%s disable TFO until %u now %u on %lx\n", __func__,
771 tpheur->th_tfo_backoff_until, tcp_now, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
3e170ce0
A
772}
773
0a7de745
A
774static void
775tcp_heuristic_tfo_middlebox_common(struct tcp_cache_key_src *tcks)
3e170ce0
A
776{
777 struct tcp_heuristics_head *head;
5ba3f43e 778 struct tcp_heuristic *tpheur;
3e170ce0 779
5ba3f43e 780 tpheur = tcp_getheuristic_with_lock(tcks, 1, &head);
0a7de745 781 if (tpheur == NULL) {
3e170ce0 782 return;
0a7de745 783 }
3e170ce0 784
5ba3f43e 785 __tcp_heuristic_tfo_middlebox_common(tpheur);
3e170ce0
A
786
787 tcp_heuristic_unlock(head);
3e170ce0
A
788}
789
0a7de745
A
790static void
791tcp_heuristic_inc_counters(struct tcp_cache_key_src *tcks,
5ba3f43e 792 u_int32_t flags)
3e170ce0
A
793{
794 struct tcp_heuristics_head *head;
795 struct tcp_heuristic *tpheur;
796
5ba3f43e 797 tpheur = tcp_getheuristic_with_lock(tcks, 1, &head);
0a7de745 798 if (tpheur == NULL) {
3e170ce0 799 return;
0a7de745 800 }
3e170ce0 801
39037602 802 /* Limit to prevent integer-overflow during exponential backoff */
5ba3f43e
A
803 if ((flags & TCPCACHE_F_TFO_DATA) && tpheur->th_tfo_data_loss < TCP_CACHE_OVERFLOW_PROTECT) {
804 tpheur->th_tfo_data_loss++;
805
0a7de745 806 if (tpheur->th_tfo_data_loss >= TFO_MAX_COOKIE_LOSS) {
5ba3f43e 807 __tcp_heuristic_tfo_middlebox_common(tpheur);
0a7de745 808 }
5ba3f43e
A
809 }
810
811 if ((flags & TCPCACHE_F_TFO_REQ) && tpheur->th_tfo_req_loss < TCP_CACHE_OVERFLOW_PROTECT) {
812 tpheur->th_tfo_req_loss++;
813
0a7de745 814 if (tpheur->th_tfo_req_loss >= TFO_MAX_COOKIE_LOSS) {
5ba3f43e 815 __tcp_heuristic_tfo_middlebox_common(tpheur);
0a7de745 816 }
5ba3f43e
A
817 }
818
819 if ((flags & TCPCACHE_F_TFO_DATA_RST) && tpheur->th_tfo_data_rst < TCP_CACHE_OVERFLOW_PROTECT) {
820 tpheur->th_tfo_data_rst++;
821
0a7de745 822 if (tpheur->th_tfo_data_rst >= TFO_MAX_COOKIE_LOSS) {
5ba3f43e 823 __tcp_heuristic_tfo_middlebox_common(tpheur);
0a7de745 824 }
5ba3f43e
A
825 }
826
827 if ((flags & TCPCACHE_F_TFO_REQ_RST) && tpheur->th_tfo_req_rst < TCP_CACHE_OVERFLOW_PROTECT) {
828 tpheur->th_tfo_req_rst++;
829
0a7de745 830 if (tpheur->th_tfo_req_rst >= TFO_MAX_COOKIE_LOSS) {
5ba3f43e 831 __tcp_heuristic_tfo_middlebox_common(tpheur);
0a7de745 832 }
5ba3f43e 833 }
4bd07ac2 834
cb323159
A
835 if ((flags & TCPCACHE_F_ECN) &&
836 tpheur->th_ecn_loss < TCP_CACHE_OVERFLOW_PROTECT &&
837 TSTMP_LEQ(tpheur->th_ecn_backoff, tcp_now)) {
4bd07ac2
A
838 tpheur->th_ecn_loss++;
839 if (tpheur->th_ecn_loss >= ECN_MAX_SYN_LOSS) {
840 tcpstat.tcps_ecn_fallback_synloss++;
5ba3f43e 841 TCP_CACHE_INC_IFNET_STAT(tcks->ifp, tcks->af, ecn_fallback_synloss);
4bd07ac2 842 tpheur->th_ecn_backoff = tcp_now +
5ba3f43e 843 (tcp_min_to_hz(tcp_ecn_timeout) <<
39037602 844 (tpheur->th_ecn_loss - ECN_MAX_SYN_LOSS));
cb323159
A
845
846 os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx for SYN-loss\n",
847 __func__, tpheur->th_ecn_backoff, tcp_now,
848 (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
39037602
A
849 }
850 }
851
852 if ((flags & TCPCACHE_F_MPTCP) &&
cb323159
A
853 tpheur->th_mptcp_loss < TCP_CACHE_OVERFLOW_PROTECT &&
854 tpheur->th_mptcp_heuristic_disabled == 0) {
39037602
A
855 tpheur->th_mptcp_loss++;
856 if (tpheur->th_mptcp_loss >= MPTCP_MAX_SYN_LOSS) {
857 /*
858 * Yes, we take tcp_ecn_timeout, to avoid adding yet
859 * another sysctl that is just used for testing.
860 */
861 tpheur->th_mptcp_backoff = tcp_now +
5ba3f43e 862 (tcp_min_to_hz(tcp_ecn_timeout) <<
39037602 863 (tpheur->th_mptcp_loss - MPTCP_MAX_SYN_LOSS));
cb323159
A
864 tpheur->th_mptcp_in_backoff = 1;
865
866 os_log(OS_LOG_DEFAULT, "%s disable MPTCP until %u now %u on %lx\n",
867 __func__, tpheur->th_mptcp_backoff, tcp_now,
868 (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
39037602
A
869 }
870 }
871
872 if ((flags & TCPCACHE_F_ECN_DROPRST) &&
cb323159
A
873 tpheur->th_ecn_droprst < TCP_CACHE_OVERFLOW_PROTECT &&
874 TSTMP_LEQ(tpheur->th_ecn_backoff, tcp_now)) {
39037602
A
875 tpheur->th_ecn_droprst++;
876 if (tpheur->th_ecn_droprst >= ECN_MAX_DROPRST) {
877 tcpstat.tcps_ecn_fallback_droprst++;
5ba3f43e
A
878 TCP_CACHE_INC_IFNET_STAT(tcks->ifp, tcks->af,
879 ecn_fallback_droprst);
39037602 880 tpheur->th_ecn_backoff = tcp_now +
5ba3f43e 881 (tcp_min_to_hz(tcp_ecn_timeout) <<
39037602 882 (tpheur->th_ecn_droprst - ECN_MAX_DROPRST));
cb323159
A
883
884 os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx for drop-RST\n",
885 __func__, tpheur->th_ecn_backoff, tcp_now,
886 (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
4bd07ac2
A
887 }
888 }
3e170ce0 889
39037602 890 if ((flags & TCPCACHE_F_ECN_DROPRXMT) &&
cb323159
A
891 tpheur->th_ecn_droprxmt < TCP_CACHE_OVERFLOW_PROTECT &&
892 TSTMP_LEQ(tpheur->th_ecn_backoff, tcp_now)) {
39037602
A
893 tpheur->th_ecn_droprxmt++;
894 if (tpheur->th_ecn_droprxmt >= ECN_MAX_DROPRXMT) {
895 tcpstat.tcps_ecn_fallback_droprxmt++;
5ba3f43e
A
896 TCP_CACHE_INC_IFNET_STAT(tcks->ifp, tcks->af,
897 ecn_fallback_droprxmt);
39037602 898 tpheur->th_ecn_backoff = tcp_now +
5ba3f43e 899 (tcp_min_to_hz(tcp_ecn_timeout) <<
39037602 900 (tpheur->th_ecn_droprxmt - ECN_MAX_DROPRXMT));
cb323159
A
901
902 os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx for drop-Rxmit\n",
903 __func__, tpheur->th_ecn_backoff, tcp_now,
904 (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
39037602
A
905 }
906 }
5ba3f43e
A
907 if ((flags & TCPCACHE_F_ECN_SYNRST) &&
908 tpheur->th_ecn_synrst < TCP_CACHE_OVERFLOW_PROTECT) {
909 tpheur->th_ecn_synrst++;
910 if (tpheur->th_ecn_synrst >= ECN_MAX_SYNRST) {
911 tcpstat.tcps_ecn_fallback_synrst++;
912 TCP_CACHE_INC_IFNET_STAT(tcks->ifp, tcks->af,
913 ecn_fallback_synrst);
914 tpheur->th_ecn_backoff = tcp_now +
915 (tcp_min_to_hz(tcp_ecn_timeout) <<
916 (tpheur->th_ecn_synrst - ECN_MAX_SYNRST));
cb323159
A
917
918 os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx for SYN-RST\n",
919 __func__, tpheur->th_ecn_backoff, tcp_now,
920 (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
5ba3f43e
A
921 }
922 }
3e170ce0
A
923 tcp_heuristic_unlock(head);
924}
925
0a7de745
A
926void
927tcp_heuristic_tfo_loss(struct tcpcb *tp)
39037602 928{
5ba3f43e
A
929 struct tcp_cache_key_src tcks;
930 uint32_t flag = 0;
931
cb323159
A
932 if (symptoms_is_wifi_lossy() &&
933 IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
934 return;
935 }
936
5ba3f43e
A
937 tcp_cache_key_src_create(tp, &tcks);
938
0a7de745 939 if (tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) {
5ba3f43e 940 flag = (TCPCACHE_F_TFO_DATA | TCPCACHE_F_TFO_REQ);
0a7de745
A
941 }
942 if (tp->t_tfo_stats & TFO_S_COOKIE_REQ) {
5ba3f43e 943 flag = TCPCACHE_F_TFO_REQ;
0a7de745 944 }
5ba3f43e
A
945
946 tcp_heuristic_inc_counters(&tcks, flag);
947}
948
0a7de745
A
949void
950tcp_heuristic_tfo_rst(struct tcpcb *tp)
5ba3f43e
A
951{
952 struct tcp_cache_key_src tcks;
953 uint32_t flag = 0;
954
955 tcp_cache_key_src_create(tp, &tcks);
956
0a7de745 957 if (tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) {
5ba3f43e 958 flag = (TCPCACHE_F_TFO_DATA_RST | TCPCACHE_F_TFO_REQ_RST);
0a7de745
A
959 }
960 if (tp->t_tfo_stats & TFO_S_COOKIE_REQ) {
5ba3f43e 961 flag = TCPCACHE_F_TFO_REQ_RST;
0a7de745 962 }
5ba3f43e
A
963
964 tcp_heuristic_inc_counters(&tcks, flag);
39037602
A
965}
966
0a7de745
A
967void
968tcp_heuristic_mptcp_loss(struct tcpcb *tp)
39037602 969{
5ba3f43e
A
970 struct tcp_cache_key_src tcks;
971
cb323159
A
972 if (symptoms_is_wifi_lossy() &&
973 IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
974 return;
975 }
976
5ba3f43e
A
977 tcp_cache_key_src_create(tp, &tcks);
978
979 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_MPTCP);
39037602
A
980}
981
0a7de745
A
982void
983tcp_heuristic_ecn_loss(struct tcpcb *tp)
39037602 984{
5ba3f43e
A
985 struct tcp_cache_key_src tcks;
986
cb323159
A
987 if (symptoms_is_wifi_lossy() &&
988 IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
989 return;
990 }
991
5ba3f43e
A
992 tcp_cache_key_src_create(tp, &tcks);
993
994 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN);
39037602
A
995}
996
0a7de745
A
997void
998tcp_heuristic_ecn_droprst(struct tcpcb *tp)
39037602 999{
5ba3f43e
A
1000 struct tcp_cache_key_src tcks;
1001
1002 tcp_cache_key_src_create(tp, &tcks);
1003
1004 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_DROPRST);
39037602
A
1005}
1006
0a7de745
A
1007void
1008tcp_heuristic_ecn_droprxmt(struct tcpcb *tp)
39037602 1009{
5ba3f43e
A
1010 struct tcp_cache_key_src tcks;
1011
1012 tcp_cache_key_src_create(tp, &tcks);
1013
1014 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_DROPRXMT);
39037602
A
1015}
1016
0a7de745
A
1017void
1018tcp_heuristic_ecn_synrst(struct tcpcb *tp)
3e170ce0 1019{
5ba3f43e 1020 struct tcp_cache_key_src tcks;
3e170ce0 1021
5ba3f43e 1022 tcp_cache_key_src_create(tp, &tcks);
3e170ce0 1023
5ba3f43e
A
1024 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_SYNRST);
1025}
3e170ce0 1026
0a7de745
A
1027void
1028tcp_heuristic_tfo_middlebox(struct tcpcb *tp)
5ba3f43e
A
1029{
1030 struct tcp_cache_key_src tcks;
1031
1032 tp->t_tfo_flags |= TFO_F_HEURISTIC_DONE;
1033
1034 tcp_cache_key_src_create(tp, &tcks);
1035 tcp_heuristic_tfo_middlebox_common(&tcks);
3e170ce0
A
1036}
1037
0a7de745
A
1038static void
1039tcp_heuristic_ecn_aggressive_common(struct tcp_cache_key_src *tcks)
4bd07ac2
A
1040{
1041 struct tcp_heuristics_head *head;
1042 struct tcp_heuristic *tpheur;
1043
5ba3f43e 1044 tpheur = tcp_getheuristic_with_lock(tcks, 1, &head);
0a7de745 1045 if (tpheur == NULL) {
4bd07ac2 1046 return;
0a7de745 1047 }
4bd07ac2 1048
cb323159
A
1049 if (TSTMP_GT(tpheur->th_ecn_backoff, tcp_now)) {
1050 /* We are already in aggressive mode */
1051 tcp_heuristic_unlock(head);
1052 return;
1053 }
1054
4bd07ac2
A
1055 /* Must be done before, otherwise we will start off with expo-backoff */
1056 tpheur->th_ecn_backoff = tcp_now +
0a7de745 1057 (tcp_min_to_hz(tcp_ecn_timeout) << (tpheur->th_ecn_aggressive));
4bd07ac2
A
1058
1059 /*
39037602 1060 * Ugly way to prevent integer overflow... limit to prevent in
4bd07ac2
A
1061 * overflow during exp. backoff.
1062 */
0a7de745 1063 if (tpheur->th_ecn_aggressive < TCP_CACHE_OVERFLOW_PROTECT) {
4bd07ac2 1064 tpheur->th_ecn_aggressive++;
0a7de745 1065 }
4bd07ac2
A
1066
1067 tcp_heuristic_unlock(head);
cb323159
A
1068
1069 os_log(OS_LOG_DEFAULT, "%s disable ECN until %u now %u on %lx\n", __func__,
1070 tpheur->th_ecn_backoff, tcp_now, (unsigned long)VM_KERNEL_ADDRPERM(tpheur));
4bd07ac2
A
1071}
1072
0a7de745
A
1073void
1074tcp_heuristic_ecn_aggressive(struct tcpcb *tp)
5ba3f43e
A
1075{
1076 struct tcp_cache_key_src tcks;
1077
1078 tcp_cache_key_src_create(tp, &tcks);
1079 tcp_heuristic_ecn_aggressive_common(&tcks);
1080}
1081
0a7de745
A
1082static boolean_t
1083tcp_heuristic_do_tfo_common(struct tcp_cache_key_src *tcks)
3e170ce0
A
1084{
1085 struct tcp_heuristics_head *head;
1086 struct tcp_heuristic *tpheur;
1087
0a7de745
A
1088 if (disable_tcp_heuristics) {
1089 return TRUE;
1090 }
39037602 1091
3e170ce0 1092 /* Get the tcp-heuristic. */
5ba3f43e 1093 tpheur = tcp_getheuristic_with_lock(tcks, 0, &head);
0a7de745
A
1094 if (tpheur == NULL) {
1095 return TRUE;
1096 }
3e170ce0 1097
0a7de745 1098 if (tpheur->th_tfo_in_backoff == 0) {
5ba3f43e 1099 goto tfo_ok;
0a7de745 1100 }
3e170ce0 1101
5ba3f43e
A
1102 if (TSTMP_GT(tcp_now, tpheur->th_tfo_backoff_until)) {
1103 tpheur->th_tfo_in_backoff = 0;
1104 tpheur->th_tfo_enabled_time = tcp_now;
3e170ce0 1105
5ba3f43e 1106 goto tfo_ok;
3e170ce0
A
1107 }
1108
3e170ce0 1109 tcp_heuristic_unlock(head);
0a7de745 1110 return FALSE;
3e170ce0 1111
5ba3f43e
A
1112tfo_ok:
1113 tcp_heuristic_unlock(head);
0a7de745 1114 return TRUE;
39037602
A
1115}
1116
0a7de745
A
1117boolean_t
1118tcp_heuristic_do_tfo(struct tcpcb *tp)
5ba3f43e
A
1119{
1120 struct tcp_cache_key_src tcks;
1121
1122 tcp_cache_key_src_create(tp, &tcks);
0a7de745
A
1123 if (tcp_heuristic_do_tfo_common(&tcks)) {
1124 return TRUE;
1125 }
5ba3f43e 1126
0a7de745 1127 return FALSE;
5ba3f43e 1128}
cb323159
A
1129/*
1130 * @return:
1131 * 0 Enable MPTCP (we are still discovering middleboxes)
1132 * -1 Enable MPTCP (heuristics have been temporarily disabled)
1133 * 1 Disable MPTCP
1134 */
1135int
0a7de745 1136tcp_heuristic_do_mptcp(struct tcpcb *tp)
39037602 1137{
5ba3f43e
A
1138 struct tcp_cache_key_src tcks;
1139 struct tcp_heuristics_head *head = NULL;
39037602 1140 struct tcp_heuristic *tpheur;
cb323159 1141 int ret = 0;
39037602 1142
cb323159
A
1143 if (disable_tcp_heuristics ||
1144 (tptomptp(tp)->mpt_mpte->mpte_flags & MPTE_FORCE_ENABLE)) {
1145 return 0;
0a7de745 1146 }
39037602 1147
5ba3f43e
A
1148 tcp_cache_key_src_create(tp, &tcks);
1149
39037602 1150 /* Get the tcp-heuristic. */
5ba3f43e 1151 tpheur = tcp_getheuristic_with_lock(&tcks, 0, &head);
0a7de745 1152 if (tpheur == NULL) {
cb323159
A
1153 return 0;
1154 }
1155
1156 if (tpheur->th_mptcp_in_backoff == 0 ||
1157 tpheur->th_mptcp_heuristic_disabled == 1) {
1158 goto mptcp_ok;
0a7de745 1159 }
39037602 1160
0a7de745 1161 if (TSTMP_GT(tpheur->th_mptcp_backoff, tcp_now)) {
5ba3f43e 1162 goto fallback;
0a7de745 1163 }
39037602 1164
cb323159 1165 tpheur->th_mptcp_in_backoff = 0;
39037602 1166
cb323159
A
1167mptcp_ok:
1168 if (tpheur->th_mptcp_heuristic_disabled) {
1169 ret = -1;
1170
1171 if (TSTMP_GT(tcp_now, tpheur->th_mptcp_backoff)) {
1172 tpheur->th_mptcp_heuristic_disabled = 0;
1173 tpheur->th_mptcp_success = 0;
1174 }
1175 }
1176
1177 tcp_heuristic_unlock(head);
1178 return ret;
5ba3f43e
A
1179
1180fallback:
0a7de745 1181 if (head) {
5ba3f43e 1182 tcp_heuristic_unlock(head);
0a7de745 1183 }
5ba3f43e 1184
0a7de745 1185 if (tptomptp(tp)->mpt_mpte->mpte_flags & MPTE_FIRSTPARTY) {
5ba3f43e 1186 tcpstat.tcps_mptcp_fp_heuristic_fallback++;
0a7de745 1187 } else {
5ba3f43e 1188 tcpstat.tcps_mptcp_heuristic_fallback++;
0a7de745 1189 }
5ba3f43e 1190
cb323159 1191 return 1;
3e170ce0
A
1192}
1193
0a7de745
A
1194static boolean_t
1195tcp_heuristic_do_ecn_common(struct tcp_cache_key_src *tcks)
4bd07ac2
A
1196{
1197 struct tcp_heuristics_head *head;
1198 struct tcp_heuristic *tpheur;
39037602
A
1199 boolean_t ret = TRUE;
1200
0a7de745
A
1201 if (disable_tcp_heuristics) {
1202 return TRUE;
1203 }
4bd07ac2
A
1204
1205 /* Get the tcp-heuristic. */
5ba3f43e 1206 tpheur = tcp_getheuristic_with_lock(tcks, 0, &head);
0a7de745 1207 if (tpheur == NULL) {
4bd07ac2 1208 return ret;
0a7de745 1209 }
4bd07ac2 1210
39037602
A
1211 if (TSTMP_GT(tpheur->th_ecn_backoff, tcp_now)) {
1212 ret = FALSE;
1213 } else {
1214 /* Reset the following counters to start re-evaluating */
0a7de745 1215 if (tpheur->th_ecn_droprst >= ECN_RETRY_LIMIT) {
39037602 1216 tpheur->th_ecn_droprst = 0;
0a7de745
A
1217 }
1218 if (tpheur->th_ecn_droprxmt >= ECN_RETRY_LIMIT) {
39037602 1219 tpheur->th_ecn_droprxmt = 0;
0a7de745
A
1220 }
1221 if (tpheur->th_ecn_synrst >= ECN_RETRY_LIMIT) {
5ba3f43e 1222 tpheur->th_ecn_synrst = 0;
0a7de745 1223 }
cb323159
A
1224
1225 /* Make sure it follows along */
1226 tpheur->th_ecn_backoff = tcp_now;
39037602 1227 }
4bd07ac2
A
1228
1229 tcp_heuristic_unlock(head);
1230
0a7de745 1231 return ret;
4bd07ac2
A
1232}
1233
0a7de745
A
1234boolean_t
1235tcp_heuristic_do_ecn(struct tcpcb *tp)
5ba3f43e
A
1236{
1237 struct tcp_cache_key_src tcks;
1238
1239 tcp_cache_key_src_create(tp, &tcks);
1240 return tcp_heuristic_do_ecn_common(&tcks);
1241}
1242
0a7de745
A
1243boolean_t
1244tcp_heuristic_do_ecn_with_address(struct ifnet *ifp,
5ba3f43e
A
1245 union sockaddr_in_4_6 *local_address)
1246{
1247 struct tcp_cache_key_src tcks;
1248
1249 memset(&tcks, 0, sizeof(tcks));
1250 tcks.ifp = ifp;
1251
1252 calculate_tcp_clock();
1253
1254 if (local_address->sa.sa_family == AF_INET6) {
1255 memcpy(&tcks.laddr.addr6, &local_address->sin6.sin6_addr, sizeof(struct in6_addr));
1256 tcks.af = AF_INET6;
1257 } else if (local_address->sa.sa_family == AF_INET) {
1258 memcpy(&tcks.laddr.addr, &local_address->sin.sin_addr, sizeof(struct in_addr));
1259 tcks.af = AF_INET;
1260 }
1261
1262 return tcp_heuristic_do_ecn_common(&tcks);
1263}
1264
0a7de745
A
1265void
1266tcp_heuristics_ecn_update(struct necp_tcp_ecn_cache *necp_buffer,
5ba3f43e
A
1267 struct ifnet *ifp, union sockaddr_in_4_6 *local_address)
1268{
1269 struct tcp_cache_key_src tcks;
1270
1271 memset(&tcks, 0, sizeof(tcks));
1272 tcks.ifp = ifp;
1273
1274 calculate_tcp_clock();
1275
1276 if (local_address->sa.sa_family == AF_INET6) {
1277 memcpy(&tcks.laddr.addr6, &local_address->sin6.sin6_addr, sizeof(struct in6_addr));
1278 tcks.af = AF_INET6;
1279 } else if (local_address->sa.sa_family == AF_INET) {
1280 memcpy(&tcks.laddr.addr, &local_address->sin.sin_addr, sizeof(struct in_addr));
1281 tcks.af = AF_INET;
1282 }
1283
1284 if (necp_buffer->necp_tcp_ecn_heuristics_success) {
1285 tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_ECN);
1286 } else if (necp_buffer->necp_tcp_ecn_heuristics_loss) {
1287 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN);
1288 } else if (necp_buffer->necp_tcp_ecn_heuristics_drop_rst) {
1289 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_DROPRST);
1290 } else if (necp_buffer->necp_tcp_ecn_heuristics_drop_rxmt) {
1291 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_DROPRXMT);
1292 } else if (necp_buffer->necp_tcp_ecn_heuristics_syn_rst) {
1293 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_ECN_SYNRST);
1294 } else if (necp_buffer->necp_tcp_ecn_heuristics_aggressive) {
1295 tcp_heuristic_ecn_aggressive_common(&tcks);
1296 }
1297
1298 return;
1299}
1300
0a7de745
A
1301boolean_t
1302tcp_heuristic_do_tfo_with_address(struct ifnet *ifp,
5ba3f43e
A
1303 union sockaddr_in_4_6 *local_address, union sockaddr_in_4_6 *remote_address,
1304 u_int8_t *cookie, u_int8_t *cookie_len)
1305{
1306 struct tcp_cache_key_src tcks;
1307
1308 memset(&tcks, 0, sizeof(tcks));
1309 tcks.ifp = ifp;
1310
1311 calculate_tcp_clock();
1312
1313 if (remote_address->sa.sa_family == AF_INET6) {
1314 memcpy(&tcks.laddr.addr6, &local_address->sin6.sin6_addr, sizeof(struct in6_addr));
1315 memcpy(&tcks.faddr.addr6, &remote_address->sin6.sin6_addr, sizeof(struct in6_addr));
1316 tcks.af = AF_INET6;
1317 } else if (remote_address->sa.sa_family == AF_INET) {
1318 memcpy(&tcks.laddr.addr, &local_address->sin.sin_addr, sizeof(struct in_addr));
1319 memcpy(&tcks.faddr.addr, &remote_address->sin.sin_addr, sizeof(struct in_addr));
1320 tcks.af = AF_INET;
1321 }
1322
1323 if (tcp_heuristic_do_tfo_common(&tcks)) {
1324 if (!tcp_cache_get_cookie_common(&tcks, cookie, cookie_len)) {
0a7de745 1325 *cookie_len = 0;
5ba3f43e
A
1326 }
1327 return TRUE;
1328 }
1329
1330 return FALSE;
1331}
1332
0a7de745
A
1333void
1334tcp_heuristics_tfo_update(struct necp_tcp_tfo_cache *necp_buffer,
5ba3f43e
A
1335 struct ifnet *ifp, union sockaddr_in_4_6 *local_address,
1336 union sockaddr_in_4_6 *remote_address)
1337{
1338 struct tcp_cache_key_src tcks;
1339
1340 memset(&tcks, 0, sizeof(tcks));
1341 tcks.ifp = ifp;
1342
1343 calculate_tcp_clock();
1344
1345 if (remote_address->sa.sa_family == AF_INET6) {
1346 memcpy(&tcks.laddr.addr6, &local_address->sin6.sin6_addr, sizeof(struct in6_addr));
1347 memcpy(&tcks.faddr.addr6, &remote_address->sin6.sin6_addr, sizeof(struct in6_addr));
1348 tcks.af = AF_INET6;
1349 } else if (remote_address->sa.sa_family == AF_INET) {
1350 memcpy(&tcks.laddr.addr, &local_address->sin.sin_addr, sizeof(struct in_addr));
1351 memcpy(&tcks.faddr.addr, &remote_address->sin.sin_addr, sizeof(struct in_addr));
1352 tcks.af = AF_INET;
1353 }
1354
0a7de745 1355 if (necp_buffer->necp_tcp_tfo_heuristics_success) {
5ba3f43e 1356 tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_TFO_REQ | TCPCACHE_F_TFO_DATA |
0a7de745
A
1357 TCPCACHE_F_TFO_REQ_RST | TCPCACHE_F_TFO_DATA_RST);
1358 }
5ba3f43e 1359
0a7de745 1360 if (necp_buffer->necp_tcp_tfo_heuristics_success_req) {
5ba3f43e 1361 tcp_heuristic_reset_counters(&tcks, TCPCACHE_F_TFO_REQ | TCPCACHE_F_TFO_REQ_RST);
0a7de745 1362 }
5ba3f43e 1363
0a7de745 1364 if (necp_buffer->necp_tcp_tfo_heuristics_loss) {
5ba3f43e 1365 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_TFO_REQ | TCPCACHE_F_TFO_DATA);
0a7de745 1366 }
5ba3f43e 1367
0a7de745 1368 if (necp_buffer->necp_tcp_tfo_heuristics_loss_req) {
5ba3f43e 1369 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_TFO_REQ);
0a7de745 1370 }
5ba3f43e 1371
0a7de745 1372 if (necp_buffer->necp_tcp_tfo_heuristics_rst_data) {
5ba3f43e 1373 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_TFO_REQ_RST | TCPCACHE_F_TFO_DATA_RST);
0a7de745 1374 }
5ba3f43e 1375
0a7de745 1376 if (necp_buffer->necp_tcp_tfo_heuristics_rst_req) {
5ba3f43e 1377 tcp_heuristic_inc_counters(&tcks, TCPCACHE_F_TFO_REQ_RST);
0a7de745 1378 }
5ba3f43e 1379
0a7de745 1380 if (necp_buffer->necp_tcp_tfo_heuristics_middlebox) {
5ba3f43e 1381 tcp_heuristic_tfo_middlebox_common(&tcks);
0a7de745 1382 }
5ba3f43e
A
1383
1384 if (necp_buffer->necp_tcp_tfo_cookie_len != 0) {
1385 tcp_cache_set_cookie_common(&tcks,
0a7de745 1386 necp_buffer->necp_tcp_tfo_cookie, necp_buffer->necp_tcp_tfo_cookie_len);
5ba3f43e
A
1387 }
1388
1389 return;
1390}
1391
0a7de745
A
1392static void
1393sysctl_cleartfocache(void)
3e170ce0
A
1394{
1395 int i;
1396
1397 for (i = 0; i < tcp_cache_size; i++) {
1398 struct tcp_cache_head *head = &tcp_cache[i];
1399 struct tcp_cache *tpcache, *tmp;
1400 struct tcp_heuristics_head *hhead = &tcp_heuristics[i];
1401 struct tcp_heuristic *tpheur, *htmp;
1402
1403 lck_mtx_lock(&head->tch_mtx);
1404 SLIST_FOREACH_SAFE(tpcache, &head->tcp_caches, list, tmp) {
1405 SLIST_REMOVE(&head->tcp_caches, tpcache, tcp_cache, list);
1406 _FREE(tpcache, M_TEMP);
1407 }
1408 lck_mtx_unlock(&head->tch_mtx);
1409
1410 lck_mtx_lock(&hhead->thh_mtx);
1411 SLIST_FOREACH_SAFE(tpheur, &hhead->tcp_heuristics, list, htmp) {
1412 SLIST_REMOVE(&hhead->tcp_heuristics, tpheur, tcp_heuristic, list);
1413 _FREE(tpheur, M_TEMP);
1414 }
1415 lck_mtx_unlock(&hhead->thh_mtx);
1416 }
1417}
1418
1419/* This sysctl is useful for testing purposes only */
1420static int tcpcleartfo = 0;
1421
1422static int sysctl_cleartfo SYSCTL_HANDLER_ARGS
1423{
1424#pragma unused(arg1, arg2)
1425 int error = 0, val, oldval = tcpcleartfo;
1426
1427 val = oldval;
1428 error = sysctl_handle_int(oidp, &val, 0, req);
0a7de745
A
1429 if (error || !req->newptr) {
1430 return error;
1431 }
3e170ce0
A
1432
1433 /*
1434 * The actual value does not matter. If the value is set, it triggers
1435 * the clearing of the TFO cache. If a future implementation does not
1436 * use the route entry to hold the TFO cache, replace the route sysctl.
1437 */
1438
0a7de745 1439 if (val != oldval) {
3e170ce0 1440 sysctl_cleartfocache();
0a7de745 1441 }
3e170ce0
A
1442
1443 tcpcleartfo = val;
1444
0a7de745 1445 return error;
3e170ce0
A
1446}
1447
1448SYSCTL_PROC(_net_inet_tcp, OID_AUTO, clear_tfocache, CTLTYPE_INT | CTLFLAG_RW |
0a7de745
A
1449 CTLFLAG_LOCKED, &tcpcleartfo, 0, &sysctl_cleartfo, "I",
1450 "Toggle to clear the TFO destination based heuristic cache");
3e170ce0 1451
0a7de745
A
1452void
1453tcp_cache_init(void)
3e170ce0
A
1454{
1455 uint64_t sane_size_meg = sane_size / 1024 / 1024;
1456 int i;
1457
1458 /*
1459 * On machines with <100MB of memory this will result in a (full) cache-size
1460 * of 32 entries, thus 32 * 5 * 64bytes = 10KB. (about 0.01 %)
1461 * On machines with > 4GB of memory, we have a cache-size of 1024 entries,
1462 * thus about 327KB.
1463 *
1464 * Side-note: we convert to u_int32_t. If sane_size is more than
1465 * 16000 TB, we loose precision. But, who cares? :)
1466 */
1467 tcp_cache_size = tcp_cache_roundup2((u_int32_t)(sane_size_meg >> 2));
0a7de745 1468 if (tcp_cache_size < 32) {
3e170ce0 1469 tcp_cache_size = 32;
0a7de745 1470 } else if (tcp_cache_size > 1024) {
3e170ce0 1471 tcp_cache_size = 1024;
0a7de745 1472 }
3e170ce0
A
1473
1474 tcp_cache = _MALLOC(sizeof(struct tcp_cache_head) * tcp_cache_size,
1475 M_TEMP, M_ZERO);
0a7de745 1476 if (tcp_cache == NULL) {
3e170ce0 1477 panic("Allocating tcp_cache failed at boot-time!");
0a7de745 1478 }
3e170ce0
A
1479
1480 tcp_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
1481 tcp_cache_mtx_grp = lck_grp_alloc_init("tcpcache", tcp_cache_mtx_grp_attr);
1482 tcp_cache_mtx_attr = lck_attr_alloc_init();
1483
1484 tcp_heuristics = _MALLOC(sizeof(struct tcp_heuristics_head) * tcp_cache_size,
1485 M_TEMP, M_ZERO);
0a7de745 1486 if (tcp_heuristics == NULL) {
3e170ce0 1487 panic("Allocating tcp_heuristic failed at boot-time!");
0a7de745 1488 }
3e170ce0
A
1489
1490 tcp_heuristic_mtx_grp_attr = lck_grp_attr_alloc_init();
1491 tcp_heuristic_mtx_grp = lck_grp_alloc_init("tcpheuristic", tcp_heuristic_mtx_grp_attr);
1492 tcp_heuristic_mtx_attr = lck_attr_alloc_init();
1493
1494 for (i = 0; i < tcp_cache_size; i++) {
1495 lck_mtx_init(&tcp_cache[i].tch_mtx, tcp_cache_mtx_grp,
1496 tcp_cache_mtx_attr);
1497 SLIST_INIT(&tcp_cache[i].tcp_caches);
1498
1499 lck_mtx_init(&tcp_heuristics[i].thh_mtx, tcp_heuristic_mtx_grp,
1500 tcp_heuristic_mtx_attr);
1501 SLIST_INIT(&tcp_heuristics[i].tcp_heuristics);
1502 }
1503
1504 tcp_cache_hash_seed = RandomULong();
1505}