]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/netinet/tcp_cache.c
xnu-3789.31.2.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_cache.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2015-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/* TCP-cache to store and retrieve TCP-related information */
30
31#include <net/flowhash.h>
32#include <net/route.h>
33#include <netinet/in_pcb.h>
34#include <netinet/tcp_cache.h>
35#include <netinet/tcp_seq.h>
36#include <netinet/tcp_var.h>
37#include <kern/locks.h>
38#include <sys/queue.h>
39#include <dev/random/randomdev.h>
40
41struct tcp_heuristic_key {
42 union {
43 uint8_t thk_net_signature[IFNET_SIGNATURELEN];
44 union {
45 struct in_addr addr;
46 struct in6_addr addr6;
47 } thk_ip;
48 };
49 sa_family_t thk_family;
50};
51
52struct tcp_heuristic {
53 SLIST_ENTRY(tcp_heuristic) list;
54
55 u_int32_t th_last_access;
56
57 struct tcp_heuristic_key th_key;
58
59 char th_val_start[0]; /* Marker for memsetting to 0 */
60
61 u_int8_t th_tfo_cookie_loss; /* The number of times a SYN+cookie has been lost */
62 u_int8_t th_mptcp_loss; /* The number of times a SYN+MP_CAPABLE has been lost */
63 u_int8_t th_ecn_loss; /* The number of times a SYN+ecn has been lost */
64 u_int8_t th_ecn_aggressive; /* The number of times we did an aggressive fallback */
65 u_int8_t th_ecn_droprst; /* The number of times ECN connections received a RST after first data pkt */
66 u_int8_t th_ecn_droprxmt; /* The number of times ECN connection is dropped after multiple retransmits */
67 u_int32_t th_tfo_fallback_trials; /* Number of times we did not try out TFO due to SYN-loss */
68 u_int32_t th_tfo_cookie_backoff; /* Time until when we should not try out TFO */
69 u_int32_t th_mptcp_backoff; /* Time until when we should not try out MPTCP */
70 u_int32_t th_ecn_backoff; /* Time until when we should not try out ECN */
71
72 u_int8_t th_tfo_in_backoff:1, /* Are we avoiding TFO due to the backoff timer? */
73 th_tfo_aggressive_fallback:1, /* Aggressive fallback due to nasty middlebox */
74 th_tfo_snd_middlebox_supp:1, /* We are sure that the network supports TFO in upstream direction */
75 th_tfo_rcv_middlebox_supp:1, /* We are sure that the network supports TFO in downstream direction*/
76 th_mptcp_in_backoff:1; /* Are we avoiding MPTCP due to the backoff timer? */
77
78 char th_val_end[0]; /* Marker for memsetting to 0 */
79};
80
81struct tcp_heuristics_head {
82 SLIST_HEAD(tcp_heur_bucket, tcp_heuristic) tcp_heuristics;
83
84 /* Per-hashbucket lock to avoid lock-contention */
85 lck_mtx_t thh_mtx;
86};
87
88struct tcp_cache_key {
89 sa_family_t tck_family;
90
91 struct tcp_heuristic_key tck_src;
92 union {
93 struct in_addr addr;
94 struct in6_addr addr6;
95 } tck_dst;
96};
97
98struct tcp_cache {
99 SLIST_ENTRY(tcp_cache) list;
100
101 u_int32_t tc_last_access;
102
103 struct tcp_cache_key tc_key;
104
105 u_int8_t tc_tfo_cookie[TFO_COOKIE_LEN_MAX];
106 u_int8_t tc_tfo_cookie_len;
107};
108
109struct tcp_cache_head {
110 SLIST_HEAD(tcp_cache_bucket, tcp_cache) tcp_caches;
111
112 /* Per-hashbucket lock to avoid lock-contention */
113 lck_mtx_t tch_mtx;
114};
115
116static u_int32_t tcp_cache_hash_seed;
117
118size_t tcp_cache_size;
119
120/*
121 * The maximum depth of the hash-bucket. This way we limit the tcp_cache to
122 * TCP_CACHE_BUCKET_SIZE * tcp_cache_size and have "natural" garbage collection
123 */
124#define TCP_CACHE_BUCKET_SIZE 5
125
126static struct tcp_cache_head *tcp_cache;
127
128decl_lck_mtx_data(, tcp_cache_mtx);
129
130static lck_attr_t *tcp_cache_mtx_attr;
131static lck_grp_t *tcp_cache_mtx_grp;
132static lck_grp_attr_t *tcp_cache_mtx_grp_attr;
133
134static struct tcp_heuristics_head *tcp_heuristics;
135
136decl_lck_mtx_data(, tcp_heuristics_mtx);
137
138static lck_attr_t *tcp_heuristic_mtx_attr;
139static lck_grp_t *tcp_heuristic_mtx_grp;
140static lck_grp_attr_t *tcp_heuristic_mtx_grp_attr;
141
142static int tcp_ecn_timeout = 60;
143SYSCTL_INT(_net_inet_tcp, OID_AUTO, ecn_timeout, CTLFLAG_RW | CTLFLAG_LOCKED,
144 &tcp_ecn_timeout, 0, "Initial minutes to wait before re-trying ECN");
145
146static int disable_tcp_heuristics = 0;
147SYSCTL_INT(_net_inet_tcp, OID_AUTO, disable_tcp_heuristics, CTLFLAG_RW | CTLFLAG_LOCKED,
148 &disable_tcp_heuristics, 0, "Set to 1, to disable all TCP heuristics (TFO, ECN, MPTCP)");
149
150/*
151 * This number is coupled with tcp_ecn_timeout, because we want to prevent
152 * integer overflow. Need to find an unexpensive way to prevent integer overflow
153 * while still allowing a dynamic sysctl.
154 */
155#define TCP_CACHE_OVERFLOW_PROTECT 9
156
157/* Number of SYN-losses we accept */
158#define TFO_MAX_COOKIE_LOSS 2
159#define ECN_MAX_SYN_LOSS 2
160#define MPTCP_MAX_SYN_LOSS 2
161#define ECN_MAX_DROPRST 2
162#define ECN_MAX_DROPRXMT 4
163
164/* Flags for setting/unsetting loss-heuristics, limited to 1 byte */
165#define TCPCACHE_F_TFO 0x01
166#define TCPCACHE_F_ECN 0x02
167#define TCPCACHE_F_MPTCP 0x04
168#define TCPCACHE_F_ECN_DROPRST 0x08
169#define TCPCACHE_F_ECN_DROPRXMT 0x10
170
171/* Always retry ECN after backing off to this level for some heuristics */
172#define ECN_RETRY_LIMIT 9
173
174/*
175 * Round up to next higher power-of 2. See "Bit Twiddling Hacks".
176 *
177 * Might be worth moving this to a library so that others
178 * (e.g., scale_to_powerof2()) can use this as well instead of a while-loop.
179 */
180static u_int32_t tcp_cache_roundup2(u_int32_t a)
181{
182 a--;
183 a |= a >> 1;
184 a |= a >> 2;
185 a |= a >> 4;
186 a |= a >> 8;
187 a |= a >> 16;
188 a++;
189
190 return a;
191}
192
193static void tcp_cache_hash_src(struct inpcb *inp, struct tcp_heuristic_key *key)
194{
195 struct ifnet *ifn = inp->inp_last_outifp;
196 uint8_t len = sizeof(key->thk_net_signature);
197 uint16_t flags;
198
199 if (inp->inp_vflag & INP_IPV6) {
200 int ret;
201
202 key->thk_family = AF_INET6;
203 ret = ifnet_get_netsignature(ifn, AF_INET6, &len, &flags,
204 key->thk_net_signature);
205
206 /*
207 * ifnet_get_netsignature only returns EINVAL if ifn is NULL
208 * (we made sure that in the other cases it does not). So,
209 * in this case we should take the connection's address.
210 */
211 if (ret == ENOENT || ret == EINVAL)
212 memcpy(&key->thk_ip.addr6, &inp->in6p_laddr, sizeof(struct in6_addr));
213 } else {
214 int ret;
215
216 key->thk_family = AF_INET;
217 ret = ifnet_get_netsignature(ifn, AF_INET, &len, &flags,
218 key->thk_net_signature);
219
220 /*
221 * ifnet_get_netsignature only returns EINVAL if ifn is NULL
222 * (we made sure that in the other cases it does not). So,
223 * in this case we should take the connection's address.
224 */
225 if (ret == ENOENT || ret == EINVAL)
226 memcpy(&key->thk_ip.addr, &inp->inp_laddr, sizeof(struct in_addr));
227 }
228}
229
230static u_int16_t tcp_cache_hash(struct inpcb *inp, struct tcp_cache_key *key)
231{
232 u_int32_t hash;
233
234 bzero(key, sizeof(struct tcp_cache_key));
235
236 tcp_cache_hash_src(inp, &key->tck_src);
237
238 if (inp->inp_vflag & INP_IPV6) {
239 key->tck_family = AF_INET6;
240 memcpy(&key->tck_dst.addr6, &inp->in6p_faddr,
241 sizeof(struct in6_addr));
242 } else {
243 key->tck_family = AF_INET;
244 memcpy(&key->tck_dst.addr, &inp->inp_faddr,
245 sizeof(struct in_addr));
246 }
247
248 hash = net_flowhash(key, sizeof(struct tcp_cache_key),
249 tcp_cache_hash_seed);
250
251 return (hash & (tcp_cache_size - 1));
252}
253
254static void tcp_cache_unlock(struct tcp_cache_head *head)
255{
256 lck_mtx_unlock(&head->tch_mtx);
257}
258
259/*
260 * Make sure that everything that happens after tcp_getcache_with_lock()
261 * is short enough to justify that you hold the per-bucket lock!!!
262 *
263 * Otherwise, better build another lookup-function that does not hold the
264 * lock and you copy out the bits and bytes.
265 *
266 * That's why we provide the head as a "return"-pointer so that the caller
267 * can give it back to use for tcp_cache_unlock().
268 */
269static struct tcp_cache *tcp_getcache_with_lock(struct tcpcb *tp, int create,
270 struct tcp_cache_head **headarg)
271{
272 struct inpcb *inp = tp->t_inpcb;
273 struct tcp_cache *tpcache = NULL;
274 struct tcp_cache_head *head;
275 struct tcp_cache_key key;
276 u_int16_t hash;
277 int i = 0;
278
279 hash = tcp_cache_hash(inp, &key);
280 head = &tcp_cache[hash];
281
282 lck_mtx_lock(&head->tch_mtx);
283
284 /*** First step: Look for the tcp_cache in our bucket ***/
285 SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
286 if (memcmp(&tpcache->tc_key, &key, sizeof(key)) == 0)
287 break;
288
289 i++;
290 }
291
292 /*** Second step: If it's not there, create/recycle it ***/
293 if ((tpcache == NULL) && create) {
294 if (i >= TCP_CACHE_BUCKET_SIZE) {
295 struct tcp_cache *oldest_cache = NULL;
296 u_int32_t max_age = 0;
297
298 /* Look for the oldest tcp_cache in the bucket */
299 SLIST_FOREACH(tpcache, &head->tcp_caches, list) {
300 u_int32_t age = tcp_now - tpcache->tc_last_access;
301 if (age > max_age) {
302 max_age = age;
303 oldest_cache = tpcache;
304 }
305 }
306 VERIFY(oldest_cache != NULL);
307
308 tpcache = oldest_cache;
309
310 /* We recycle, thus let's indicate that there is no cookie */
311 tpcache->tc_tfo_cookie_len = 0;
312 } else {
313 /* Create a new cache and add it to the list */
314 tpcache = _MALLOC(sizeof(struct tcp_cache), M_TEMP,
315 M_NOWAIT | M_ZERO);
316 if (tpcache == NULL)
317 goto out_null;
318
319 SLIST_INSERT_HEAD(&head->tcp_caches, tpcache, list);
320 }
321
322 memcpy(&tpcache->tc_key, &key, sizeof(key));
323 }
324
325 if (tpcache == NULL)
326 goto out_null;
327
328 /* Update timestamp for garbage collection purposes */
329 tpcache->tc_last_access = tcp_now;
330 *headarg = head;
331
332 return (tpcache);
333
334out_null:
335 tcp_cache_unlock(head);
336 return (NULL);
337}
338
339void tcp_cache_set_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t len)
340{
341 struct tcp_cache_head *head;
342 struct tcp_cache *tpcache;
343
344 /* Call lookup/create function */
345 tpcache = tcp_getcache_with_lock(tp, 1, &head);
346 if (tpcache == NULL)
347 return;
348
349 tpcache->tc_tfo_cookie_len = len;
350 memcpy(tpcache->tc_tfo_cookie, cookie, len);
351
352 tcp_cache_unlock(head);
353}
354
355/*
356 * Get the cookie related to 'tp', and copy it into 'cookie', provided that len
357 * is big enough (len designates the available memory.
358 * Upon return, 'len' is set to the cookie's length.
359 *
360 * Returns 0 if we should request a cookie.
361 * Returns 1 if the cookie has been found and written.
362 */
363int tcp_cache_get_cookie(struct tcpcb *tp, u_char *cookie, u_int8_t *len)
364{
365 struct tcp_cache_head *head;
366 struct tcp_cache *tpcache;
367
368 /* Call lookup/create function */
369 tpcache = tcp_getcache_with_lock(tp, 1, &head);
370 if (tpcache == NULL)
371 return (0);
372
373 if (tpcache->tc_tfo_cookie_len == 0) {
374 tcp_cache_unlock(head);
375 return (0);
376 }
377
378 /*
379 * Not enough space - this should never happen as it has been checked
380 * in tcp_tfo_check. So, fail here!
381 */
382 VERIFY(tpcache->tc_tfo_cookie_len <= *len);
383
384 memcpy(cookie, tpcache->tc_tfo_cookie, tpcache->tc_tfo_cookie_len);
385 *len = tpcache->tc_tfo_cookie_len;
386
387 tcp_cache_unlock(head);
388
389 return (1);
390}
391
392unsigned int tcp_cache_get_cookie_len(struct tcpcb *tp)
393{
394 struct tcp_cache_head *head;
395 struct tcp_cache *tpcache;
396 unsigned int cookie_len;
397
398 /* Call lookup/create function */
399 tpcache = tcp_getcache_with_lock(tp, 1, &head);
400 if (tpcache == NULL)
401 return (0);
402
403 cookie_len = tpcache->tc_tfo_cookie_len;
404
405 tcp_cache_unlock(head);
406
407 return cookie_len;
408}
409
410static u_int16_t tcp_heuristics_hash(struct inpcb *inp,
411 struct tcp_heuristic_key *key)
412{
413 u_int32_t hash;
414
415 bzero(key, sizeof(struct tcp_heuristic_key));
416
417 tcp_cache_hash_src(inp, key);
418
419 hash = net_flowhash(key, sizeof(struct tcp_heuristic_key),
420 tcp_cache_hash_seed);
421
422 return (hash & (tcp_cache_size - 1));
423}
424
425static void tcp_heuristic_unlock(struct tcp_heuristics_head *head)
426{
427 lck_mtx_unlock(&head->thh_mtx);
428}
429
430/*
431 * Make sure that everything that happens after tcp_getheuristic_with_lock()
432 * is short enough to justify that you hold the per-bucket lock!!!
433 *
434 * Otherwise, better build another lookup-function that does not hold the
435 * lock and you copy out the bits and bytes.
436 *
437 * That's why we provide the head as a "return"-pointer so that the caller
438 * can give it back to use for tcp_heur_unlock().
439 *
440 *
441 * ToDo - way too much code-duplication. We should create an interface to handle
442 * bucketized hashtables with recycling of the oldest element.
443 */
444static struct tcp_heuristic *tcp_getheuristic_with_lock(struct tcpcb *tp,
445 int create, struct tcp_heuristics_head **headarg)
446{
447 struct inpcb *inp = tp->t_inpcb;
448 struct tcp_heuristic *tpheur = NULL;
449 struct tcp_heuristics_head *head;
450 struct tcp_heuristic_key key;
451 u_int16_t hash;
452 int i = 0;
453
454 hash = tcp_heuristics_hash(inp, &key);
455 head = &tcp_heuristics[hash];
456
457 lck_mtx_lock(&head->thh_mtx);
458
459 /*** First step: Look for the tcp_heur in our bucket ***/
460 SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
461 if (memcmp(&tpheur->th_key, &key, sizeof(key)) == 0)
462 break;
463
464 i++;
465 }
466
467 /*** Second step: If it's not there, create/recycle it ***/
468 if ((tpheur == NULL) && create) {
469 if (i >= TCP_CACHE_BUCKET_SIZE) {
470 struct tcp_heuristic *oldest_heur = NULL;
471 u_int32_t max_age = 0;
472
473 /* Look for the oldest tcp_heur in the bucket */
474 SLIST_FOREACH(tpheur, &head->tcp_heuristics, list) {
475 u_int32_t age = tcp_now - tpheur->th_last_access;
476 if (age > max_age) {
477 max_age = age;
478 oldest_heur = tpheur;
479 }
480 }
481 VERIFY(oldest_heur != NULL);
482
483 tpheur = oldest_heur;
484
485 /* We recycle - set everything to 0 */
486 bzero(tpheur->th_val_start,
487 tpheur->th_val_end - tpheur->th_val_start);
488 } else {
489 /* Create a new heuristic and add it to the list */
490 tpheur = _MALLOC(sizeof(struct tcp_heuristic), M_TEMP,
491 M_NOWAIT | M_ZERO);
492 if (tpheur == NULL)
493 goto out_null;
494
495 SLIST_INSERT_HEAD(&head->tcp_heuristics, tpheur, list);
496 }
497
498 /*
499 * Set to tcp_now, to make sure it won't be > than tcp_now in the
500 * near future.
501 */
502 tpheur->th_ecn_backoff = tcp_now;
503 tpheur->th_tfo_cookie_backoff = tcp_now;
504 tpheur->th_mptcp_backoff = tcp_now;
505
506 memcpy(&tpheur->th_key, &key, sizeof(key));
507 }
508
509 if (tpheur == NULL)
510 goto out_null;
511
512 /* Update timestamp for garbage collection purposes */
513 tpheur->th_last_access = tcp_now;
514 *headarg = head;
515
516 return (tpheur);
517
518out_null:
519 tcp_heuristic_unlock(head);
520 return (NULL);
521}
522
523static void tcp_heuristic_reset_loss(struct tcpcb *tp, u_int8_t flags)
524{
525 struct tcp_heuristics_head *head;
526 struct tcp_heuristic *tpheur;
527
528 /*
529 * Don't attempt to create it! Keep the heuristics clean if the
530 * server does not support TFO. This reduces the lookup-cost on
531 * our side.
532 */
533 tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
534 if (tpheur == NULL)
535 return;
536
537 if (flags & TCPCACHE_F_TFO)
538 tpheur->th_tfo_cookie_loss = 0;
539
540 if (flags & TCPCACHE_F_ECN)
541 tpheur->th_ecn_loss = 0;
542
543 if (flags & TCPCACHE_F_MPTCP)
544 tpheur->th_mptcp_loss = 0;
545
546 tcp_heuristic_unlock(head);
547}
548
549void tcp_heuristic_tfo_success(struct tcpcb *tp)
550{
551 tcp_heuristic_reset_loss(tp, TCPCACHE_F_TFO);
552}
553
554void tcp_heuristic_mptcp_success(struct tcpcb *tp)
555{
556 tcp_heuristic_reset_loss(tp, TCPCACHE_F_MPTCP);
557}
558
559void tcp_heuristic_ecn_success(struct tcpcb *tp)
560{
561 tcp_heuristic_reset_loss(tp, TCPCACHE_F_ECN);
562}
563
564void tcp_heuristic_tfo_rcv_good(struct tcpcb *tp)
565{
566 struct tcp_heuristics_head *head;
567
568 struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
569 if (tpheur == NULL)
570 return;
571
572 tpheur->th_tfo_rcv_middlebox_supp = 1;
573
574 tcp_heuristic_unlock(head);
575
576 tp->t_tfo_flags |= TFO_F_NO_RCVPROBING;
577}
578
579void tcp_heuristic_tfo_snd_good(struct tcpcb *tp)
580{
581 struct tcp_heuristics_head *head;
582
583 struct tcp_heuristic *tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
584 if (tpheur == NULL)
585 return;
586
587 tpheur->th_tfo_snd_middlebox_supp = 1;
588
589 tcp_heuristic_unlock(head);
590
591 tp->t_tfo_flags |= TFO_F_NO_SNDPROBING;
592}
593
594static void tcp_heuristic_inc_loss(struct tcpcb *tp, u_int8_t flags)
595{
596 struct tcp_heuristics_head *head;
597 struct tcp_heuristic *tpheur;
598
599 tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
600 if (tpheur == NULL)
601 return;
602
603 /* Limit to prevent integer-overflow during exponential backoff */
604 if ((flags & TCPCACHE_F_TFO) && tpheur->th_tfo_cookie_loss < TCP_CACHE_OVERFLOW_PROTECT)
605 tpheur->th_tfo_cookie_loss++;
606
607 if ((flags & TCPCACHE_F_ECN) && tpheur->th_ecn_loss < TCP_CACHE_OVERFLOW_PROTECT) {
608 tpheur->th_ecn_loss++;
609 if (tpheur->th_ecn_loss >= ECN_MAX_SYN_LOSS) {
610 tcpstat.tcps_ecn_fallback_synloss++;
611 INP_INC_IFNET_STAT(tp->t_inpcb, ecn_fallback_synloss);
612 tpheur->th_ecn_backoff = tcp_now +
613 ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ) <<
614 (tpheur->th_ecn_loss - ECN_MAX_SYN_LOSS));
615 }
616 }
617
618 if ((flags & TCPCACHE_F_MPTCP) &&
619 tpheur->th_mptcp_loss < TCP_CACHE_OVERFLOW_PROTECT) {
620 tpheur->th_mptcp_loss++;
621 if (tpheur->th_mptcp_loss >= MPTCP_MAX_SYN_LOSS) {
622 /*
623 * Yes, we take tcp_ecn_timeout, to avoid adding yet
624 * another sysctl that is just used for testing.
625 */
626 tpheur->th_mptcp_backoff = tcp_now +
627 ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ) <<
628 (tpheur->th_mptcp_loss - MPTCP_MAX_SYN_LOSS));
629 }
630 }
631
632 if ((flags & TCPCACHE_F_ECN_DROPRST) &&
633 tpheur->th_ecn_droprst < TCP_CACHE_OVERFLOW_PROTECT) {
634 tpheur->th_ecn_droprst++;
635 if (tpheur->th_ecn_droprst >= ECN_MAX_DROPRST) {
636 tcpstat.tcps_ecn_fallback_droprst++;
637 INP_INC_IFNET_STAT(tp->t_inpcb, ecn_fallback_droprst);
638 tpheur->th_ecn_backoff = tcp_now +
639 ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ) <<
640 (tpheur->th_ecn_droprst - ECN_MAX_DROPRST));
641
642 }
643 }
644
645 if ((flags & TCPCACHE_F_ECN_DROPRXMT) &&
646 tpheur->th_ecn_droprst < TCP_CACHE_OVERFLOW_PROTECT) {
647 tpheur->th_ecn_droprxmt++;
648 if (tpheur->th_ecn_droprxmt >= ECN_MAX_DROPRXMT) {
649 tcpstat.tcps_ecn_fallback_droprxmt++;
650 INP_INC_IFNET_STAT(tp->t_inpcb, ecn_fallback_droprxmt);
651 tpheur->th_ecn_backoff = tcp_now +
652 ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ) <<
653 (tpheur->th_ecn_droprxmt - ECN_MAX_DROPRXMT));
654 }
655 }
656 tcp_heuristic_unlock(head);
657}
658
659void tcp_heuristic_tfo_loss(struct tcpcb *tp)
660{
661 tcp_heuristic_inc_loss(tp, TCPCACHE_F_TFO);
662}
663
664void tcp_heuristic_mptcp_loss(struct tcpcb *tp)
665{
666 tcp_heuristic_inc_loss(tp, TCPCACHE_F_MPTCP);
667}
668
669void tcp_heuristic_ecn_loss(struct tcpcb *tp)
670{
671 tcp_heuristic_inc_loss(tp, TCPCACHE_F_ECN);
672}
673
674void tcp_heuristic_ecn_droprst(struct tcpcb *tp)
675{
676 tcp_heuristic_inc_loss(tp, TCPCACHE_F_ECN_DROPRST);
677}
678
679void tcp_heuristic_ecn_droprxmt(struct tcpcb *tp)
680{
681 tcp_heuristic_inc_loss(tp, TCPCACHE_F_ECN_DROPRXMT);
682}
683
684void tcp_heuristic_tfo_middlebox(struct tcpcb *tp)
685{
686 struct tcp_heuristics_head *head;
687 struct tcp_heuristic *tpheur;
688
689 tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
690 if (tpheur == NULL)
691 return;
692
693 tpheur->th_tfo_aggressive_fallback = 1;
694
695 tcp_heuristic_unlock(head);
696}
697
698void tcp_heuristic_ecn_aggressive(struct tcpcb *tp)
699{
700 struct tcp_heuristics_head *head;
701 struct tcp_heuristic *tpheur;
702
703 tpheur = tcp_getheuristic_with_lock(tp, 1, &head);
704 if (tpheur == NULL)
705 return;
706
707 /* Must be done before, otherwise we will start off with expo-backoff */
708 tpheur->th_ecn_backoff = tcp_now +
709 ((tcp_ecn_timeout * 60 * TCP_RETRANSHZ) << (tpheur->th_ecn_aggressive));
710
711 /*
712 * Ugly way to prevent integer overflow... limit to prevent in
713 * overflow during exp. backoff.
714 */
715 if (tpheur->th_ecn_aggressive < TCP_CACHE_OVERFLOW_PROTECT)
716 tpheur->th_ecn_aggressive++;
717
718 tcp_heuristic_unlock(head);
719}
720
721boolean_t tcp_heuristic_do_tfo(struct tcpcb *tp)
722{
723 struct tcp_heuristics_head *head;
724 struct tcp_heuristic *tpheur;
725
726 if (disable_tcp_heuristics)
727 return (TRUE);
728
729 /* Get the tcp-heuristic. */
730 tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
731 if (tpheur == NULL)
732 return (TRUE);
733
734 if (tpheur->th_tfo_aggressive_fallback) {
735 /* Aggressive fallback - don't do TFO anymore... :'( */
736 tcp_heuristic_unlock(head);
737 return (FALSE);
738 }
739
740 if (tpheur->th_tfo_cookie_loss >= TFO_MAX_COOKIE_LOSS &&
741 (tpheur->th_tfo_fallback_trials < tcp_tfo_fallback_min ||
742 TSTMP_GT(tpheur->th_tfo_cookie_backoff, tcp_now))) {
743 /*
744 * So, when we are in SYN-loss mode we try to stop using TFO
745 * for the next 'tcp_tfo_fallback_min' connections. That way,
746 * we are sure that never more than 1 out of tcp_tfo_fallback_min
747 * connections will suffer from our nice little middelbox.
748 *
749 * After that we first wait for 2 minutes. If we fail again,
750 * we wait for yet another 60 minutes.
751 */
752 tpheur->th_tfo_fallback_trials++;
753 if (tpheur->th_tfo_fallback_trials >= tcp_tfo_fallback_min &&
754 !tpheur->th_tfo_in_backoff) {
755 if (tpheur->th_tfo_cookie_loss == TFO_MAX_COOKIE_LOSS)
756 /* Backoff for 2 minutes */
757 tpheur->th_tfo_cookie_backoff = tcp_now + (60 * 2 * TCP_RETRANSHZ);
758 else
759 /* Backoff for 60 minutes */
760 tpheur->th_tfo_cookie_backoff = tcp_now + (60 * 60 * TCP_RETRANSHZ);
761
762 tpheur->th_tfo_in_backoff = 1;
763 }
764
765 tcp_heuristic_unlock(head);
766 return (FALSE);
767 }
768
769 /*
770 * We give it a new shot, set trials back to 0. This allows to
771 * start counting again from zero in case we get yet another SYN-loss
772 */
773 tpheur->th_tfo_fallback_trials = 0;
774 tpheur->th_tfo_in_backoff = 0;
775
776 if (tpheur->th_tfo_rcv_middlebox_supp)
777 tp->t_tfo_flags |= TFO_F_NO_RCVPROBING;
778 if (tpheur->th_tfo_snd_middlebox_supp)
779 tp->t_tfo_flags |= TFO_F_NO_SNDPROBING;
780
781 tcp_heuristic_unlock(head);
782
783 return (TRUE);
784}
785
786boolean_t tcp_heuristic_do_mptcp(struct tcpcb *tp)
787{
788 struct tcp_heuristics_head *head;
789 struct tcp_heuristic *tpheur;
790 boolean_t ret = TRUE;
791
792 if (disable_tcp_heuristics)
793 return (TRUE);
794
795 /* Get the tcp-heuristic. */
796 tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
797 if (tpheur == NULL)
798 return ret;
799
800 if (TSTMP_GT(tpheur->th_mptcp_backoff, tcp_now))
801 ret = FALSE;
802
803 tcp_heuristic_unlock(head);
804
805 return (ret);
806}
807
808boolean_t tcp_heuristic_do_ecn(struct tcpcb *tp)
809{
810 struct tcp_heuristics_head *head;
811 struct tcp_heuristic *tpheur;
812 boolean_t ret = TRUE;
813
814 if (disable_tcp_heuristics)
815 return (TRUE);
816
817 /* Get the tcp-heuristic. */
818 tpheur = tcp_getheuristic_with_lock(tp, 0, &head);
819 if (tpheur == NULL)
820 return ret;
821
822 if (TSTMP_GT(tpheur->th_ecn_backoff, tcp_now)) {
823 ret = FALSE;
824 } else {
825 /* Reset the following counters to start re-evaluating */
826 if (tpheur->th_ecn_droprst >= ECN_RETRY_LIMIT)
827 tpheur->th_ecn_droprst = 0;
828 if (tpheur->th_ecn_droprxmt >= ECN_RETRY_LIMIT)
829 tpheur->th_ecn_droprxmt = 0;
830 }
831
832 tcp_heuristic_unlock(head);
833
834 return (ret);
835}
836
837static void sysctl_cleartfocache(void)
838{
839 int i;
840
841 for (i = 0; i < tcp_cache_size; i++) {
842 struct tcp_cache_head *head = &tcp_cache[i];
843 struct tcp_cache *tpcache, *tmp;
844 struct tcp_heuristics_head *hhead = &tcp_heuristics[i];
845 struct tcp_heuristic *tpheur, *htmp;
846
847 lck_mtx_lock(&head->tch_mtx);
848 SLIST_FOREACH_SAFE(tpcache, &head->tcp_caches, list, tmp) {
849 SLIST_REMOVE(&head->tcp_caches, tpcache, tcp_cache, list);
850 _FREE(tpcache, M_TEMP);
851 }
852 lck_mtx_unlock(&head->tch_mtx);
853
854 lck_mtx_lock(&hhead->thh_mtx);
855 SLIST_FOREACH_SAFE(tpheur, &hhead->tcp_heuristics, list, htmp) {
856 SLIST_REMOVE(&hhead->tcp_heuristics, tpheur, tcp_heuristic, list);
857 _FREE(tpheur, M_TEMP);
858 }
859 lck_mtx_unlock(&hhead->thh_mtx);
860 }
861}
862
863/* This sysctl is useful for testing purposes only */
864static int tcpcleartfo = 0;
865
866static int sysctl_cleartfo SYSCTL_HANDLER_ARGS
867{
868#pragma unused(arg1, arg2)
869 int error = 0, val, oldval = tcpcleartfo;
870
871 val = oldval;
872 error = sysctl_handle_int(oidp, &val, 0, req);
873 if (error || !req->newptr)
874 return (error);
875
876 /*
877 * The actual value does not matter. If the value is set, it triggers
878 * the clearing of the TFO cache. If a future implementation does not
879 * use the route entry to hold the TFO cache, replace the route sysctl.
880 */
881
882 if (val != oldval)
883 sysctl_cleartfocache();
884
885 tcpcleartfo = val;
886
887 return (error);
888}
889
890SYSCTL_PROC(_net_inet_tcp, OID_AUTO, clear_tfocache, CTLTYPE_INT | CTLFLAG_RW |
891 CTLFLAG_LOCKED, &tcpcleartfo, 0, &sysctl_cleartfo, "I",
892 "Toggle to clear the TFO destination based heuristic cache");
893
894void tcp_cache_init(void)
895{
896 uint64_t sane_size_meg = sane_size / 1024 / 1024;
897 int i;
898
899 /*
900 * On machines with <100MB of memory this will result in a (full) cache-size
901 * of 32 entries, thus 32 * 5 * 64bytes = 10KB. (about 0.01 %)
902 * On machines with > 4GB of memory, we have a cache-size of 1024 entries,
903 * thus about 327KB.
904 *
905 * Side-note: we convert to u_int32_t. If sane_size is more than
906 * 16000 TB, we loose precision. But, who cares? :)
907 */
908 tcp_cache_size = tcp_cache_roundup2((u_int32_t)(sane_size_meg >> 2));
909 if (tcp_cache_size < 32)
910 tcp_cache_size = 32;
911 else if (tcp_cache_size > 1024)
912 tcp_cache_size = 1024;
913
914 tcp_cache = _MALLOC(sizeof(struct tcp_cache_head) * tcp_cache_size,
915 M_TEMP, M_ZERO);
916 if (tcp_cache == NULL)
917 panic("Allocating tcp_cache failed at boot-time!");
918
919 tcp_cache_mtx_grp_attr = lck_grp_attr_alloc_init();
920 tcp_cache_mtx_grp = lck_grp_alloc_init("tcpcache", tcp_cache_mtx_grp_attr);
921 tcp_cache_mtx_attr = lck_attr_alloc_init();
922
923 tcp_heuristics = _MALLOC(sizeof(struct tcp_heuristics_head) * tcp_cache_size,
924 M_TEMP, M_ZERO);
925 if (tcp_heuristics == NULL)
926 panic("Allocating tcp_heuristic failed at boot-time!");
927
928 tcp_heuristic_mtx_grp_attr = lck_grp_attr_alloc_init();
929 tcp_heuristic_mtx_grp = lck_grp_alloc_init("tcpheuristic", tcp_heuristic_mtx_grp_attr);
930 tcp_heuristic_mtx_attr = lck_attr_alloc_init();
931
932 for (i = 0; i < tcp_cache_size; i++) {
933 lck_mtx_init(&tcp_cache[i].tch_mtx, tcp_cache_mtx_grp,
934 tcp_cache_mtx_attr);
935 SLIST_INIT(&tcp_cache[i].tcp_caches);
936
937 lck_mtx_init(&tcp_heuristics[i].thh_mtx, tcp_heuristic_mtx_grp,
938 tcp_heuristic_mtx_attr);
939 SLIST_INIT(&tcp_heuristics[i].tcp_heuristics);
940 }
941
942 tcp_cache_hash_seed = RandomULong();
943}