2 * Copyright (c) 2015-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 /* TCP-cache to store and retrieve TCP-related information */
31 #include <net/flowhash.h>
32 #include <net/route.h>
33 #include <netinet/in_pcb.h>
34 #include <netinet/tcp_cache.h>
35 #include <netinet/tcp_seq.h>
36 #include <netinet/tcp_var.h>
37 #include <kern/locks.h>
38 #include <sys/queue.h>
39 #include <dev/random/randomdev.h>
41 struct tcp_heuristic_key
{
43 uint8_t thk_net_signature
[IFNET_SIGNATURELEN
];
46 struct in6_addr addr6
;
49 sa_family_t thk_family
;
52 struct tcp_heuristic
{
53 SLIST_ENTRY(tcp_heuristic
) list
;
55 u_int32_t th_last_access
;
57 struct tcp_heuristic_key th_key
;
59 char th_val_start
[0]; /* Marker for memsetting to 0 */
61 u_int8_t th_tfo_cookie_loss
; /* The number of times a SYN+cookie has been lost */
62 u_int8_t th_mptcp_loss
; /* The number of times a SYN+MP_CAPABLE has been lost */
63 u_int8_t th_ecn_loss
; /* The number of times a SYN+ecn has been lost */
64 u_int8_t th_ecn_aggressive
; /* The number of times we did an aggressive fallback */
65 u_int8_t th_ecn_droprst
; /* The number of times ECN connections received a RST after first data pkt */
66 u_int8_t th_ecn_droprxmt
; /* The number of times ECN connection is dropped after multiple retransmits */
67 u_int32_t th_tfo_fallback_trials
; /* Number of times we did not try out TFO due to SYN-loss */
68 u_int32_t th_tfo_cookie_backoff
; /* Time until when we should not try out TFO */
69 u_int32_t th_mptcp_backoff
; /* Time until when we should not try out MPTCP */
70 u_int32_t th_ecn_backoff
; /* Time until when we should not try out ECN */
72 u_int8_t th_tfo_in_backoff
:1, /* Are we avoiding TFO due to the backoff timer? */
73 th_tfo_aggressive_fallback
:1, /* Aggressive fallback due to nasty middlebox */
74 th_tfo_snd_middlebox_supp
:1, /* We are sure that the network supports TFO in upstream direction */
75 th_tfo_rcv_middlebox_supp
:1, /* We are sure that the network supports TFO in downstream direction*/
76 th_mptcp_in_backoff
:1; /* Are we avoiding MPTCP due to the backoff timer? */
78 char th_val_end
[0]; /* Marker for memsetting to 0 */
81 struct tcp_heuristics_head
{
82 SLIST_HEAD(tcp_heur_bucket
, tcp_heuristic
) tcp_heuristics
;
84 /* Per-hashbucket lock to avoid lock-contention */
88 struct tcp_cache_key
{
89 sa_family_t tck_family
;
91 struct tcp_heuristic_key tck_src
;
94 struct in6_addr addr6
;
99 SLIST_ENTRY(tcp_cache
) list
;
101 u_int32_t tc_last_access
;
103 struct tcp_cache_key tc_key
;
105 u_int8_t tc_tfo_cookie
[TFO_COOKIE_LEN_MAX
];
106 u_int8_t tc_tfo_cookie_len
;
109 struct tcp_cache_head
{
110 SLIST_HEAD(tcp_cache_bucket
, tcp_cache
) tcp_caches
;
112 /* Per-hashbucket lock to avoid lock-contention */
116 static u_int32_t tcp_cache_hash_seed
;
118 size_t tcp_cache_size
;
121 * The maximum depth of the hash-bucket. This way we limit the tcp_cache to
122 * TCP_CACHE_BUCKET_SIZE * tcp_cache_size and have "natural" garbage collection
124 #define TCP_CACHE_BUCKET_SIZE 5
126 static struct tcp_cache_head
*tcp_cache
;
128 decl_lck_mtx_data(, tcp_cache_mtx
);
130 static lck_attr_t
*tcp_cache_mtx_attr
;
131 static lck_grp_t
*tcp_cache_mtx_grp
;
132 static lck_grp_attr_t
*tcp_cache_mtx_grp_attr
;
134 static struct tcp_heuristics_head
*tcp_heuristics
;
136 decl_lck_mtx_data(, tcp_heuristics_mtx
);
138 static lck_attr_t
*tcp_heuristic_mtx_attr
;
139 static lck_grp_t
*tcp_heuristic_mtx_grp
;
140 static lck_grp_attr_t
*tcp_heuristic_mtx_grp_attr
;
142 static int tcp_ecn_timeout
= 60;
143 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, ecn_timeout
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
144 &tcp_ecn_timeout
, 0, "Initial minutes to wait before re-trying ECN");
146 static int disable_tcp_heuristics
= 0;
147 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, disable_tcp_heuristics
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
148 &disable_tcp_heuristics
, 0, "Set to 1, to disable all TCP heuristics (TFO, ECN, MPTCP)");
151 * This number is coupled with tcp_ecn_timeout, because we want to prevent
152 * integer overflow. Need to find an unexpensive way to prevent integer overflow
153 * while still allowing a dynamic sysctl.
155 #define TCP_CACHE_OVERFLOW_PROTECT 9
157 /* Number of SYN-losses we accept */
158 #define TFO_MAX_COOKIE_LOSS 2
159 #define ECN_MAX_SYN_LOSS 2
160 #define MPTCP_MAX_SYN_LOSS 2
161 #define ECN_MAX_DROPRST 2
162 #define ECN_MAX_DROPRXMT 4
164 /* Flags for setting/unsetting loss-heuristics, limited to 1 byte */
165 #define TCPCACHE_F_TFO 0x01
166 #define TCPCACHE_F_ECN 0x02
167 #define TCPCACHE_F_MPTCP 0x04
168 #define TCPCACHE_F_ECN_DROPRST 0x08
169 #define TCPCACHE_F_ECN_DROPRXMT 0x10
171 /* Always retry ECN after backing off to this level for some heuristics */
172 #define ECN_RETRY_LIMIT 9
175 * Round up to next higher power-of 2. See "Bit Twiddling Hacks".
177 * Might be worth moving this to a library so that others
178 * (e.g., scale_to_powerof2()) can use this as well instead of a while-loop.
180 static u_int32_t
tcp_cache_roundup2(u_int32_t a
)
193 static void tcp_cache_hash_src(struct inpcb
*inp
, struct tcp_heuristic_key
*key
)
195 struct ifnet
*ifn
= inp
->inp_last_outifp
;
196 uint8_t len
= sizeof(key
->thk_net_signature
);
199 if (inp
->inp_vflag
& INP_IPV6
) {
202 key
->thk_family
= AF_INET6
;
203 ret
= ifnet_get_netsignature(ifn
, AF_INET6
, &len
, &flags
,
204 key
->thk_net_signature
);
207 * ifnet_get_netsignature only returns EINVAL if ifn is NULL
208 * (we made sure that in the other cases it does not). So,
209 * in this case we should take the connection's address.
211 if (ret
== ENOENT
|| ret
== EINVAL
)
212 memcpy(&key
->thk_ip
.addr6
, &inp
->in6p_laddr
, sizeof(struct in6_addr
));
216 key
->thk_family
= AF_INET
;
217 ret
= ifnet_get_netsignature(ifn
, AF_INET
, &len
, &flags
,
218 key
->thk_net_signature
);
221 * ifnet_get_netsignature only returns EINVAL if ifn is NULL
222 * (we made sure that in the other cases it does not). So,
223 * in this case we should take the connection's address.
225 if (ret
== ENOENT
|| ret
== EINVAL
)
226 memcpy(&key
->thk_ip
.addr
, &inp
->inp_laddr
, sizeof(struct in_addr
));
230 static u_int16_t
tcp_cache_hash(struct inpcb
*inp
, struct tcp_cache_key
*key
)
234 bzero(key
, sizeof(struct tcp_cache_key
));
236 tcp_cache_hash_src(inp
, &key
->tck_src
);
238 if (inp
->inp_vflag
& INP_IPV6
) {
239 key
->tck_family
= AF_INET6
;
240 memcpy(&key
->tck_dst
.addr6
, &inp
->in6p_faddr
,
241 sizeof(struct in6_addr
));
243 key
->tck_family
= AF_INET
;
244 memcpy(&key
->tck_dst
.addr
, &inp
->inp_faddr
,
245 sizeof(struct in_addr
));
248 hash
= net_flowhash(key
, sizeof(struct tcp_cache_key
),
249 tcp_cache_hash_seed
);
251 return (hash
& (tcp_cache_size
- 1));
254 static void tcp_cache_unlock(struct tcp_cache_head
*head
)
256 lck_mtx_unlock(&head
->tch_mtx
);
260 * Make sure that everything that happens after tcp_getcache_with_lock()
261 * is short enough to justify that you hold the per-bucket lock!!!
263 * Otherwise, better build another lookup-function that does not hold the
264 * lock and you copy out the bits and bytes.
266 * That's why we provide the head as a "return"-pointer so that the caller
267 * can give it back to use for tcp_cache_unlock().
269 static struct tcp_cache
*tcp_getcache_with_lock(struct tcpcb
*tp
, int create
,
270 struct tcp_cache_head
**headarg
)
272 struct inpcb
*inp
= tp
->t_inpcb
;
273 struct tcp_cache
*tpcache
= NULL
;
274 struct tcp_cache_head
*head
;
275 struct tcp_cache_key key
;
279 hash
= tcp_cache_hash(inp
, &key
);
280 head
= &tcp_cache
[hash
];
282 lck_mtx_lock(&head
->tch_mtx
);
284 /*** First step: Look for the tcp_cache in our bucket ***/
285 SLIST_FOREACH(tpcache
, &head
->tcp_caches
, list
) {
286 if (memcmp(&tpcache
->tc_key
, &key
, sizeof(key
)) == 0)
292 /*** Second step: If it's not there, create/recycle it ***/
293 if ((tpcache
== NULL
) && create
) {
294 if (i
>= TCP_CACHE_BUCKET_SIZE
) {
295 struct tcp_cache
*oldest_cache
= NULL
;
296 u_int32_t max_age
= 0;
298 /* Look for the oldest tcp_cache in the bucket */
299 SLIST_FOREACH(tpcache
, &head
->tcp_caches
, list
) {
300 u_int32_t age
= tcp_now
- tpcache
->tc_last_access
;
303 oldest_cache
= tpcache
;
306 VERIFY(oldest_cache
!= NULL
);
308 tpcache
= oldest_cache
;
310 /* We recycle, thus let's indicate that there is no cookie */
311 tpcache
->tc_tfo_cookie_len
= 0;
313 /* Create a new cache and add it to the list */
314 tpcache
= _MALLOC(sizeof(struct tcp_cache
), M_TEMP
,
319 SLIST_INSERT_HEAD(&head
->tcp_caches
, tpcache
, list
);
322 memcpy(&tpcache
->tc_key
, &key
, sizeof(key
));
328 /* Update timestamp for garbage collection purposes */
329 tpcache
->tc_last_access
= tcp_now
;
335 tcp_cache_unlock(head
);
339 void tcp_cache_set_cookie(struct tcpcb
*tp
, u_char
*cookie
, u_int8_t len
)
341 struct tcp_cache_head
*head
;
342 struct tcp_cache
*tpcache
;
344 /* Call lookup/create function */
345 tpcache
= tcp_getcache_with_lock(tp
, 1, &head
);
349 tpcache
->tc_tfo_cookie_len
= len
;
350 memcpy(tpcache
->tc_tfo_cookie
, cookie
, len
);
352 tcp_cache_unlock(head
);
356 * Get the cookie related to 'tp', and copy it into 'cookie', provided that len
357 * is big enough (len designates the available memory.
358 * Upon return, 'len' is set to the cookie's length.
360 * Returns 0 if we should request a cookie.
361 * Returns 1 if the cookie has been found and written.
363 int tcp_cache_get_cookie(struct tcpcb
*tp
, u_char
*cookie
, u_int8_t
*len
)
365 struct tcp_cache_head
*head
;
366 struct tcp_cache
*tpcache
;
368 /* Call lookup/create function */
369 tpcache
= tcp_getcache_with_lock(tp
, 1, &head
);
373 if (tpcache
->tc_tfo_cookie_len
== 0) {
374 tcp_cache_unlock(head
);
379 * Not enough space - this should never happen as it has been checked
380 * in tcp_tfo_check. So, fail here!
382 VERIFY(tpcache
->tc_tfo_cookie_len
<= *len
);
384 memcpy(cookie
, tpcache
->tc_tfo_cookie
, tpcache
->tc_tfo_cookie_len
);
385 *len
= tpcache
->tc_tfo_cookie_len
;
387 tcp_cache_unlock(head
);
392 unsigned int tcp_cache_get_cookie_len(struct tcpcb
*tp
)
394 struct tcp_cache_head
*head
;
395 struct tcp_cache
*tpcache
;
396 unsigned int cookie_len
;
398 /* Call lookup/create function */
399 tpcache
= tcp_getcache_with_lock(tp
, 1, &head
);
403 cookie_len
= tpcache
->tc_tfo_cookie_len
;
405 tcp_cache_unlock(head
);
410 static u_int16_t
tcp_heuristics_hash(struct inpcb
*inp
,
411 struct tcp_heuristic_key
*key
)
415 bzero(key
, sizeof(struct tcp_heuristic_key
));
417 tcp_cache_hash_src(inp
, key
);
419 hash
= net_flowhash(key
, sizeof(struct tcp_heuristic_key
),
420 tcp_cache_hash_seed
);
422 return (hash
& (tcp_cache_size
- 1));
425 static void tcp_heuristic_unlock(struct tcp_heuristics_head
*head
)
427 lck_mtx_unlock(&head
->thh_mtx
);
431 * Make sure that everything that happens after tcp_getheuristic_with_lock()
432 * is short enough to justify that you hold the per-bucket lock!!!
434 * Otherwise, better build another lookup-function that does not hold the
435 * lock and you copy out the bits and bytes.
437 * That's why we provide the head as a "return"-pointer so that the caller
438 * can give it back to use for tcp_heur_unlock().
441 * ToDo - way too much code-duplication. We should create an interface to handle
442 * bucketized hashtables with recycling of the oldest element.
444 static struct tcp_heuristic
*tcp_getheuristic_with_lock(struct tcpcb
*tp
,
445 int create
, struct tcp_heuristics_head
**headarg
)
447 struct inpcb
*inp
= tp
->t_inpcb
;
448 struct tcp_heuristic
*tpheur
= NULL
;
449 struct tcp_heuristics_head
*head
;
450 struct tcp_heuristic_key key
;
454 hash
= tcp_heuristics_hash(inp
, &key
);
455 head
= &tcp_heuristics
[hash
];
457 lck_mtx_lock(&head
->thh_mtx
);
459 /*** First step: Look for the tcp_heur in our bucket ***/
460 SLIST_FOREACH(tpheur
, &head
->tcp_heuristics
, list
) {
461 if (memcmp(&tpheur
->th_key
, &key
, sizeof(key
)) == 0)
467 /*** Second step: If it's not there, create/recycle it ***/
468 if ((tpheur
== NULL
) && create
) {
469 if (i
>= TCP_CACHE_BUCKET_SIZE
) {
470 struct tcp_heuristic
*oldest_heur
= NULL
;
471 u_int32_t max_age
= 0;
473 /* Look for the oldest tcp_heur in the bucket */
474 SLIST_FOREACH(tpheur
, &head
->tcp_heuristics
, list
) {
475 u_int32_t age
= tcp_now
- tpheur
->th_last_access
;
478 oldest_heur
= tpheur
;
481 VERIFY(oldest_heur
!= NULL
);
483 tpheur
= oldest_heur
;
485 /* We recycle - set everything to 0 */
486 bzero(tpheur
->th_val_start
,
487 tpheur
->th_val_end
- tpheur
->th_val_start
);
489 /* Create a new heuristic and add it to the list */
490 tpheur
= _MALLOC(sizeof(struct tcp_heuristic
), M_TEMP
,
495 SLIST_INSERT_HEAD(&head
->tcp_heuristics
, tpheur
, list
);
499 * Set to tcp_now, to make sure it won't be > than tcp_now in the
502 tpheur
->th_ecn_backoff
= tcp_now
;
503 tpheur
->th_tfo_cookie_backoff
= tcp_now
;
504 tpheur
->th_mptcp_backoff
= tcp_now
;
506 memcpy(&tpheur
->th_key
, &key
, sizeof(key
));
512 /* Update timestamp for garbage collection purposes */
513 tpheur
->th_last_access
= tcp_now
;
519 tcp_heuristic_unlock(head
);
523 static void tcp_heuristic_reset_loss(struct tcpcb
*tp
, u_int8_t flags
)
525 struct tcp_heuristics_head
*head
;
526 struct tcp_heuristic
*tpheur
;
529 * Don't attempt to create it! Keep the heuristics clean if the
530 * server does not support TFO. This reduces the lookup-cost on
533 tpheur
= tcp_getheuristic_with_lock(tp
, 0, &head
);
537 if (flags
& TCPCACHE_F_TFO
)
538 tpheur
->th_tfo_cookie_loss
= 0;
540 if (flags
& TCPCACHE_F_ECN
)
541 tpheur
->th_ecn_loss
= 0;
543 if (flags
& TCPCACHE_F_MPTCP
)
544 tpheur
->th_mptcp_loss
= 0;
546 tcp_heuristic_unlock(head
);
549 void tcp_heuristic_tfo_success(struct tcpcb
*tp
)
551 tcp_heuristic_reset_loss(tp
, TCPCACHE_F_TFO
);
554 void tcp_heuristic_mptcp_success(struct tcpcb
*tp
)
556 tcp_heuristic_reset_loss(tp
, TCPCACHE_F_MPTCP
);
559 void tcp_heuristic_ecn_success(struct tcpcb
*tp
)
561 tcp_heuristic_reset_loss(tp
, TCPCACHE_F_ECN
);
564 void tcp_heuristic_tfo_rcv_good(struct tcpcb
*tp
)
566 struct tcp_heuristics_head
*head
;
568 struct tcp_heuristic
*tpheur
= tcp_getheuristic_with_lock(tp
, 1, &head
);
572 tpheur
->th_tfo_rcv_middlebox_supp
= 1;
574 tcp_heuristic_unlock(head
);
576 tp
->t_tfo_flags
|= TFO_F_NO_RCVPROBING
;
579 void tcp_heuristic_tfo_snd_good(struct tcpcb
*tp
)
581 struct tcp_heuristics_head
*head
;
583 struct tcp_heuristic
*tpheur
= tcp_getheuristic_with_lock(tp
, 1, &head
);
587 tpheur
->th_tfo_snd_middlebox_supp
= 1;
589 tcp_heuristic_unlock(head
);
591 tp
->t_tfo_flags
|= TFO_F_NO_SNDPROBING
;
594 static void tcp_heuristic_inc_loss(struct tcpcb
*tp
, u_int8_t flags
)
596 struct tcp_heuristics_head
*head
;
597 struct tcp_heuristic
*tpheur
;
599 tpheur
= tcp_getheuristic_with_lock(tp
, 1, &head
);
603 /* Limit to prevent integer-overflow during exponential backoff */
604 if ((flags
& TCPCACHE_F_TFO
) && tpheur
->th_tfo_cookie_loss
< TCP_CACHE_OVERFLOW_PROTECT
)
605 tpheur
->th_tfo_cookie_loss
++;
607 if ((flags
& TCPCACHE_F_ECN
) && tpheur
->th_ecn_loss
< TCP_CACHE_OVERFLOW_PROTECT
) {
608 tpheur
->th_ecn_loss
++;
609 if (tpheur
->th_ecn_loss
>= ECN_MAX_SYN_LOSS
) {
610 tcpstat
.tcps_ecn_fallback_synloss
++;
611 INP_INC_IFNET_STAT(tp
->t_inpcb
, ecn_fallback_synloss
);
612 tpheur
->th_ecn_backoff
= tcp_now
+
613 ((tcp_ecn_timeout
* 60 * TCP_RETRANSHZ
) <<
614 (tpheur
->th_ecn_loss
- ECN_MAX_SYN_LOSS
));
618 if ((flags
& TCPCACHE_F_MPTCP
) &&
619 tpheur
->th_mptcp_loss
< TCP_CACHE_OVERFLOW_PROTECT
) {
620 tpheur
->th_mptcp_loss
++;
621 if (tpheur
->th_mptcp_loss
>= MPTCP_MAX_SYN_LOSS
) {
623 * Yes, we take tcp_ecn_timeout, to avoid adding yet
624 * another sysctl that is just used for testing.
626 tpheur
->th_mptcp_backoff
= tcp_now
+
627 ((tcp_ecn_timeout
* 60 * TCP_RETRANSHZ
) <<
628 (tpheur
->th_mptcp_loss
- MPTCP_MAX_SYN_LOSS
));
632 if ((flags
& TCPCACHE_F_ECN_DROPRST
) &&
633 tpheur
->th_ecn_droprst
< TCP_CACHE_OVERFLOW_PROTECT
) {
634 tpheur
->th_ecn_droprst
++;
635 if (tpheur
->th_ecn_droprst
>= ECN_MAX_DROPRST
) {
636 tcpstat
.tcps_ecn_fallback_droprst
++;
637 INP_INC_IFNET_STAT(tp
->t_inpcb
, ecn_fallback_droprst
);
638 tpheur
->th_ecn_backoff
= tcp_now
+
639 ((tcp_ecn_timeout
* 60 * TCP_RETRANSHZ
) <<
640 (tpheur
->th_ecn_droprst
- ECN_MAX_DROPRST
));
645 if ((flags
& TCPCACHE_F_ECN_DROPRXMT
) &&
646 tpheur
->th_ecn_droprst
< TCP_CACHE_OVERFLOW_PROTECT
) {
647 tpheur
->th_ecn_droprxmt
++;
648 if (tpheur
->th_ecn_droprxmt
>= ECN_MAX_DROPRXMT
) {
649 tcpstat
.tcps_ecn_fallback_droprxmt
++;
650 INP_INC_IFNET_STAT(tp
->t_inpcb
, ecn_fallback_droprxmt
);
651 tpheur
->th_ecn_backoff
= tcp_now
+
652 ((tcp_ecn_timeout
* 60 * TCP_RETRANSHZ
) <<
653 (tpheur
->th_ecn_droprxmt
- ECN_MAX_DROPRXMT
));
656 tcp_heuristic_unlock(head
);
659 void tcp_heuristic_tfo_loss(struct tcpcb
*tp
)
661 tcp_heuristic_inc_loss(tp
, TCPCACHE_F_TFO
);
664 void tcp_heuristic_mptcp_loss(struct tcpcb
*tp
)
666 tcp_heuristic_inc_loss(tp
, TCPCACHE_F_MPTCP
);
669 void tcp_heuristic_ecn_loss(struct tcpcb
*tp
)
671 tcp_heuristic_inc_loss(tp
, TCPCACHE_F_ECN
);
674 void tcp_heuristic_ecn_droprst(struct tcpcb
*tp
)
676 tcp_heuristic_inc_loss(tp
, TCPCACHE_F_ECN_DROPRST
);
679 void tcp_heuristic_ecn_droprxmt(struct tcpcb
*tp
)
681 tcp_heuristic_inc_loss(tp
, TCPCACHE_F_ECN_DROPRXMT
);
684 void tcp_heuristic_tfo_middlebox(struct tcpcb
*tp
)
686 struct tcp_heuristics_head
*head
;
687 struct tcp_heuristic
*tpheur
;
689 tpheur
= tcp_getheuristic_with_lock(tp
, 1, &head
);
693 tpheur
->th_tfo_aggressive_fallback
= 1;
695 tcp_heuristic_unlock(head
);
698 void tcp_heuristic_ecn_aggressive(struct tcpcb
*tp
)
700 struct tcp_heuristics_head
*head
;
701 struct tcp_heuristic
*tpheur
;
703 tpheur
= tcp_getheuristic_with_lock(tp
, 1, &head
);
707 /* Must be done before, otherwise we will start off with expo-backoff */
708 tpheur
->th_ecn_backoff
= tcp_now
+
709 ((tcp_ecn_timeout
* 60 * TCP_RETRANSHZ
) << (tpheur
->th_ecn_aggressive
));
712 * Ugly way to prevent integer overflow... limit to prevent in
713 * overflow during exp. backoff.
715 if (tpheur
->th_ecn_aggressive
< TCP_CACHE_OVERFLOW_PROTECT
)
716 tpheur
->th_ecn_aggressive
++;
718 tcp_heuristic_unlock(head
);
721 boolean_t
tcp_heuristic_do_tfo(struct tcpcb
*tp
)
723 struct tcp_heuristics_head
*head
;
724 struct tcp_heuristic
*tpheur
;
726 if (disable_tcp_heuristics
)
729 /* Get the tcp-heuristic. */
730 tpheur
= tcp_getheuristic_with_lock(tp
, 0, &head
);
734 if (tpheur
->th_tfo_aggressive_fallback
) {
735 /* Aggressive fallback - don't do TFO anymore... :'( */
736 tcp_heuristic_unlock(head
);
740 if (tpheur
->th_tfo_cookie_loss
>= TFO_MAX_COOKIE_LOSS
&&
741 (tpheur
->th_tfo_fallback_trials
< tcp_tfo_fallback_min
||
742 TSTMP_GT(tpheur
->th_tfo_cookie_backoff
, tcp_now
))) {
744 * So, when we are in SYN-loss mode we try to stop using TFO
745 * for the next 'tcp_tfo_fallback_min' connections. That way,
746 * we are sure that never more than 1 out of tcp_tfo_fallback_min
747 * connections will suffer from our nice little middelbox.
749 * After that we first wait for 2 minutes. If we fail again,
750 * we wait for yet another 60 minutes.
752 tpheur
->th_tfo_fallback_trials
++;
753 if (tpheur
->th_tfo_fallback_trials
>= tcp_tfo_fallback_min
&&
754 !tpheur
->th_tfo_in_backoff
) {
755 if (tpheur
->th_tfo_cookie_loss
== TFO_MAX_COOKIE_LOSS
)
756 /* Backoff for 2 minutes */
757 tpheur
->th_tfo_cookie_backoff
= tcp_now
+ (60 * 2 * TCP_RETRANSHZ
);
759 /* Backoff for 60 minutes */
760 tpheur
->th_tfo_cookie_backoff
= tcp_now
+ (60 * 60 * TCP_RETRANSHZ
);
762 tpheur
->th_tfo_in_backoff
= 1;
765 tcp_heuristic_unlock(head
);
770 * We give it a new shot, set trials back to 0. This allows to
771 * start counting again from zero in case we get yet another SYN-loss
773 tpheur
->th_tfo_fallback_trials
= 0;
774 tpheur
->th_tfo_in_backoff
= 0;
776 if (tpheur
->th_tfo_rcv_middlebox_supp
)
777 tp
->t_tfo_flags
|= TFO_F_NO_RCVPROBING
;
778 if (tpheur
->th_tfo_snd_middlebox_supp
)
779 tp
->t_tfo_flags
|= TFO_F_NO_SNDPROBING
;
781 tcp_heuristic_unlock(head
);
786 boolean_t
tcp_heuristic_do_mptcp(struct tcpcb
*tp
)
788 struct tcp_heuristics_head
*head
;
789 struct tcp_heuristic
*tpheur
;
790 boolean_t ret
= TRUE
;
792 if (disable_tcp_heuristics
)
795 /* Get the tcp-heuristic. */
796 tpheur
= tcp_getheuristic_with_lock(tp
, 0, &head
);
800 if (TSTMP_GT(tpheur
->th_mptcp_backoff
, tcp_now
))
803 tcp_heuristic_unlock(head
);
808 boolean_t
tcp_heuristic_do_ecn(struct tcpcb
*tp
)
810 struct tcp_heuristics_head
*head
;
811 struct tcp_heuristic
*tpheur
;
812 boolean_t ret
= TRUE
;
814 if (disable_tcp_heuristics
)
817 /* Get the tcp-heuristic. */
818 tpheur
= tcp_getheuristic_with_lock(tp
, 0, &head
);
822 if (TSTMP_GT(tpheur
->th_ecn_backoff
, tcp_now
)) {
825 /* Reset the following counters to start re-evaluating */
826 if (tpheur
->th_ecn_droprst
>= ECN_RETRY_LIMIT
)
827 tpheur
->th_ecn_droprst
= 0;
828 if (tpheur
->th_ecn_droprxmt
>= ECN_RETRY_LIMIT
)
829 tpheur
->th_ecn_droprxmt
= 0;
832 tcp_heuristic_unlock(head
);
837 static void sysctl_cleartfocache(void)
841 for (i
= 0; i
< tcp_cache_size
; i
++) {
842 struct tcp_cache_head
*head
= &tcp_cache
[i
];
843 struct tcp_cache
*tpcache
, *tmp
;
844 struct tcp_heuristics_head
*hhead
= &tcp_heuristics
[i
];
845 struct tcp_heuristic
*tpheur
, *htmp
;
847 lck_mtx_lock(&head
->tch_mtx
);
848 SLIST_FOREACH_SAFE(tpcache
, &head
->tcp_caches
, list
, tmp
) {
849 SLIST_REMOVE(&head
->tcp_caches
, tpcache
, tcp_cache
, list
);
850 _FREE(tpcache
, M_TEMP
);
852 lck_mtx_unlock(&head
->tch_mtx
);
854 lck_mtx_lock(&hhead
->thh_mtx
);
855 SLIST_FOREACH_SAFE(tpheur
, &hhead
->tcp_heuristics
, list
, htmp
) {
856 SLIST_REMOVE(&hhead
->tcp_heuristics
, tpheur
, tcp_heuristic
, list
);
857 _FREE(tpheur
, M_TEMP
);
859 lck_mtx_unlock(&hhead
->thh_mtx
);
863 /* This sysctl is useful for testing purposes only */
864 static int tcpcleartfo
= 0;
866 static int sysctl_cleartfo SYSCTL_HANDLER_ARGS
868 #pragma unused(arg1, arg2)
869 int error
= 0, val
, oldval
= tcpcleartfo
;
872 error
= sysctl_handle_int(oidp
, &val
, 0, req
);
873 if (error
|| !req
->newptr
)
877 * The actual value does not matter. If the value is set, it triggers
878 * the clearing of the TFO cache. If a future implementation does not
879 * use the route entry to hold the TFO cache, replace the route sysctl.
883 sysctl_cleartfocache();
890 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, clear_tfocache
, CTLTYPE_INT
| CTLFLAG_RW
|
891 CTLFLAG_LOCKED
, &tcpcleartfo
, 0, &sysctl_cleartfo
, "I",
892 "Toggle to clear the TFO destination based heuristic cache");
894 void tcp_cache_init(void)
896 uint64_t sane_size_meg
= sane_size
/ 1024 / 1024;
900 * On machines with <100MB of memory this will result in a (full) cache-size
901 * of 32 entries, thus 32 * 5 * 64bytes = 10KB. (about 0.01 %)
902 * On machines with > 4GB of memory, we have a cache-size of 1024 entries,
905 * Side-note: we convert to u_int32_t. If sane_size is more than
906 * 16000 TB, we loose precision. But, who cares? :)
908 tcp_cache_size
= tcp_cache_roundup2((u_int32_t
)(sane_size_meg
>> 2));
909 if (tcp_cache_size
< 32)
911 else if (tcp_cache_size
> 1024)
912 tcp_cache_size
= 1024;
914 tcp_cache
= _MALLOC(sizeof(struct tcp_cache_head
) * tcp_cache_size
,
916 if (tcp_cache
== NULL
)
917 panic("Allocating tcp_cache failed at boot-time!");
919 tcp_cache_mtx_grp_attr
= lck_grp_attr_alloc_init();
920 tcp_cache_mtx_grp
= lck_grp_alloc_init("tcpcache", tcp_cache_mtx_grp_attr
);
921 tcp_cache_mtx_attr
= lck_attr_alloc_init();
923 tcp_heuristics
= _MALLOC(sizeof(struct tcp_heuristics_head
) * tcp_cache_size
,
925 if (tcp_heuristics
== NULL
)
926 panic("Allocating tcp_heuristic failed at boot-time!");
928 tcp_heuristic_mtx_grp_attr
= lck_grp_attr_alloc_init();
929 tcp_heuristic_mtx_grp
= lck_grp_alloc_init("tcpheuristic", tcp_heuristic_mtx_grp_attr
);
930 tcp_heuristic_mtx_attr
= lck_attr_alloc_init();
932 for (i
= 0; i
< tcp_cache_size
; i
++) {
933 lck_mtx_init(&tcp_cache
[i
].tch_mtx
, tcp_cache_mtx_grp
,
935 SLIST_INIT(&tcp_cache
[i
].tcp_caches
);
937 lck_mtx_init(&tcp_heuristics
[i
].thh_mtx
, tcp_heuristic_mtx_grp
,
938 tcp_heuristic_mtx_attr
);
939 SLIST_INIT(&tcp_heuristics
[i
].tcp_heuristics
);
942 tcp_cache_hash_seed
= RandomULong();