2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 
  30  *      The Regents of the University of California.  All rights reserved. 
  32  * Redistribution and use in source and binary forms, with or without 
  33  * modification, are permitted provided that the following conditions 
  35  * 1. Redistributions of source code must retain the above copyright 
  36  *    notice, this list of conditions and the following disclaimer. 
  37  * 2. Redistributions in binary form must reproduce the above copyright 
  38  *    notice, this list of conditions and the following disclaimer in the 
  39  *    documentation and/or other materials provided with the distribution. 
  40  * 3. All advertising materials mentioning features or use of this software 
  41  *    must display the following acknowledgement: 
  42  *      This product includes software developed by the University of 
  43  *      California, Berkeley and its contributors. 
  44  * 4. Neither the name of the University nor the names of its contributors 
  45  *    may be used to endorse or promote products derived from this software 
  46  *    without specific prior written permission. 
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  60  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 
  61  * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.11 2001/08/22 00:59:12 silby Exp $ 
  65 #include <sys/param.h> 
  66 #include <sys/systm.h> 
  67 #include <sys/kernel.h> 
  69 #include <sys/sysctl.h> 
  70 #include <sys/socket.h> 
  71 #include <sys/socketvar.h> 
  72 #include <sys/protosw.h> 
  73 #include <sys/domain.h> 
  74 #include <sys/mcache.h> 
  75 #include <sys/queue.h> 
  76 #include <kern/locks.h> 
  77 #include <kern/cpu_number.h>    /* before tcp_seq.h, for tcp_random18() */ 
  78 #include <mach/boolean.h> 
  80 #include <net/route.h> 
  81 #include <net/if_var.h> 
  82 #include <net/ntstat.h> 
  84 #include <netinet/in.h> 
  85 #include <netinet/in_systm.h> 
  86 #include <netinet/in_pcb.h> 
  88 #include <netinet6/in6_pcb.h> 
  90 #include <netinet/ip_var.h> 
  91 #include <netinet/tcp.h> 
  92 #include <netinet/tcp_cache.h> 
  93 #include <netinet/tcp_fsm.h> 
  94 #include <netinet/tcp_seq.h> 
  95 #include <netinet/tcp_timer.h> 
  96 #include <netinet/tcp_var.h> 
  97 #include <netinet/tcp_cc.h> 
  99 #include <netinet6/tcp6_var.h> 
 101 #include <netinet/tcpip.h> 
 103 #include <netinet/tcp_debug.h> 
 105 #include <sys/kdebug.h> 
 106 #include <mach/sdt.h> 
 107 #include <netinet/mptcp_var.h> 
 109 /* Max number of times a stretch ack can be delayed on a connection */ 
 110 #define TCP_STRETCHACK_DELAY_THRESHOLD  5 
 113  * If the host processor has been sleeping for too long, this is the threshold 
 114  * used to avoid sending stale retransmissions. 
 116 #define TCP_SLEEP_TOO_LONG      (10 * 60 * 1000) /* 10 minutes in ms */ 
 119 struct tcptimerlist tcp_timer_list
; 
 121 /* List of pcbs in timewait state, protected by tcbinfo's ipi_lock */ 
 122 struct tcptailq tcp_tw_tailq
; 
 125 sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS
 
 131         s 
= tt 
* 1000 / TCP_RETRANSHZ
;; 
 133         error 
= sysctl_handle_int(oidp
, &s
, 0, req
); 
 134         if (error 
|| !req
->newptr
) { 
 138         tt 
= s 
* TCP_RETRANSHZ 
/ 1000; 
 144         SYSCTL_SKMEM_UPDATE_AT_OFFSET(arg2
, *(int*)arg1
); 
 149 int     tcp_keepinit 
= TCPTV_KEEP_INIT
; 
 150 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPINIT
, keepinit
, 
 151     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 152     &tcp_keepinit
, offsetof(skmem_sysctl
, tcp
.keepinit
), 
 153     sysctl_msec_to_ticks
, "I", ""); 
 155 int     tcp_keepidle 
= TCPTV_KEEP_IDLE
; 
 156 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPIDLE
, keepidle
, 
 157     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 158     &tcp_keepidle
, offsetof(skmem_sysctl
, tcp
.keepidle
), 
 159     sysctl_msec_to_ticks
, "I", ""); 
 161 int     tcp_keepintvl 
= TCPTV_KEEPINTVL
; 
 162 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPINTVL
, keepintvl
, 
 163     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 164     &tcp_keepintvl
, offsetof(skmem_sysctl
, tcp
.keepintvl
), 
 165     sysctl_msec_to_ticks
, "I", ""); 
 167 SYSCTL_SKMEM_TCP_INT(OID_AUTO
, keepcnt
, 
 168     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 169     int, tcp_keepcnt
, TCPTV_KEEPCNT
, "number of times to repeat keepalive"); 
 171 int     tcp_msl 
= TCPTV_MSL
; 
 172 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, msl
, 
 173     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 174     &tcp_msl
, offsetof(skmem_sysctl
, tcp
.msl
), 
 175     sysctl_msec_to_ticks
, "I", "Maximum segment lifetime"); 
 176 #else /* SYSCTL_SKMEM */ 
 178 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPINIT
, keepinit
, 
 179     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 180     &tcp_keepinit
, 0, sysctl_msec_to_ticks
, "I", ""); 
 183 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPIDLE
, keepidle
, 
 184     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 185     &tcp_keepidle
, 0, sysctl_msec_to_ticks
, "I", ""); 
 188 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPINTVL
, keepintvl
, 
 189     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 190     &tcp_keepintvl
, 0, sysctl_msec_to_ticks
, "I", ""); 
 193 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, keepcnt
, 
 194     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 195     &tcp_keepcnt
, 0, "number of times to repeat keepalive"); 
 198 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, msl
, 
 199     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 200     &tcp_msl
, 0, sysctl_msec_to_ticks
, "I", "Maximum segment lifetime"); 
 201 #endif /* SYSCTL_SKMEM */ 
 204  * Avoid DoS via TCP Robustness in Persist Condition 
 205  * (see http://www.ietf.org/id/draft-ananth-tcpm-persist-02.txt) 
 206  * by allowing a system wide maximum persistence timeout value when in 
 207  * Zero Window Probe mode. 
 209  * Expressed in milliseconds to be consistent without timeout related 
 210  * values, the TCP socket option is in seconds. 
 213 u_int32_t tcp_max_persist_timeout 
= 0; 
 214 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, max_persist_timeout
, 
 215     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 216     &tcp_max_persist_timeout
, offsetof(skmem_sysctl
, tcp
.max_persist_timeout
), 
 217     sysctl_msec_to_ticks
, "I", "Maximum persistence timeout for ZWP"); 
 218 #else /* SYSCTL_SKMEM */ 
 219 u_int32_t tcp_max_persist_timeout 
= 0; 
 220 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, max_persist_timeout
, 
 221     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 222     &tcp_max_persist_timeout
, 0, sysctl_msec_to_ticks
, "I", 
 223     "Maximum persistence timeout for ZWP"); 
 224 #endif /* SYSCTL_SKMEM */ 
 226 SYSCTL_SKMEM_TCP_INT(OID_AUTO
, always_keepalive
, 
 227     CTLFLAG_RW 
| CTLFLAG_LOCKED
, static int, always_keepalive
, 0, 
 228     "Assume SO_KEEPALIVE on all TCP connections"); 
 231  * This parameter determines how long the timer list will stay in fast or 
 232  * quick mode even though all connections are idle. In this state, the 
 233  * timer will run more frequently anticipating new data. 
 235 SYSCTL_SKMEM_TCP_INT(OID_AUTO
, timer_fastmode_idlemax
, 
 236     CTLFLAG_RW 
| CTLFLAG_LOCKED
, int, timer_fastmode_idlemax
, 
 237     TCP_FASTMODE_IDLERUN_MAX
, "Maximum idle generations in fast mode"); 
 240  * See tcp_syn_backoff[] for interval values between SYN retransmits; 
 241  * the value set below defines the number of retransmits, before we 
 242  * disable the timestamp and window scaling options during subsequent 
 243  * SYN retransmits.  Setting it to 0 disables the dropping off of those 
 246 SYSCTL_SKMEM_TCP_INT(OID_AUTO
, broken_peer_syn_rexmit_thres
, 
 247     CTLFLAG_RW 
| CTLFLAG_LOCKED
, static int, tcp_broken_peer_syn_rxmit_thres
, 
 248     10, "Number of retransmitted SYNs before disabling RFC 1323 " 
 249     "options on local connections"); 
 251 static int tcp_timer_advanced 
= 0; 
 252 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_timer_advanced
, 
 253     CTLFLAG_RD 
| CTLFLAG_LOCKED
, &tcp_timer_advanced
, 0, 
 254     "Number of times one of the timers was advanced"); 
 256 static int tcp_resched_timerlist 
= 0; 
 257 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_resched_timerlist
, 
 258     CTLFLAG_RD 
| CTLFLAG_LOCKED
, &tcp_resched_timerlist
, 0, 
 259     "Number of times timer list was rescheduled as part of processing a packet"); 
 261 SYSCTL_SKMEM_TCP_INT(OID_AUTO
, pmtud_blackhole_detection
, 
 262     CTLFLAG_RW 
| CTLFLAG_LOCKED
, int, tcp_pmtud_black_hole_detect
, 1, 
 263     "Path MTU Discovery Black Hole Detection"); 
 265 SYSCTL_SKMEM_TCP_INT(OID_AUTO
, pmtud_blackhole_mss
, 
 266     CTLFLAG_RW 
| CTLFLAG_LOCKED
, int, tcp_pmtud_black_hole_mss
, 1200, 
 267     "Path MTU Discovery Black Hole Detection lowered MSS"); 
 269 static u_int32_t tcp_mss_rec_medium 
= 1200; 
 270 static u_int32_t tcp_mss_rec_low 
= 512; 
 272 #define TCP_REPORT_STATS_INTERVAL       43200 /* 12 hours, in seconds */ 
 273 int tcp_report_stats_interval 
= TCP_REPORT_STATS_INTERVAL
; 
 275 /* performed garbage collection of "used" sockets */ 
 276 static boolean_t tcp_gc_done 
= FALSE
; 
 278 /* max idle probes */ 
 279 int     tcp_maxpersistidle 
= TCPTV_KEEP_IDLE
; 
 282  * TCP delack timer is set to 100 ms. Since the processing of timer list 
 283  * in fast mode will happen no faster than 100 ms, the delayed ack timer 
 284  * will fire some where between 100 and 200 ms. 
 286 int     tcp_delack 
= TCP_RETRANSHZ 
/ 10; 
 290  * MP_JOIN retransmission of 3rd ACK will be every 500 msecs without backoff 
 292 int     tcp_jack_rxmt 
= TCP_RETRANSHZ 
/ 2; 
 295 static boolean_t tcp_itimer_done 
= FALSE
; 
 297 static void tcp_remove_timer(struct tcpcb 
*tp
); 
 298 static void tcp_sched_timerlist(uint32_t offset
); 
 299 static u_int32_t 
tcp_run_conn_timer(struct tcpcb 
*tp
, u_int16_t 
*mode
, 
 300     u_int16_t probe_if_index
); 
 301 static void tcp_sched_timers(struct tcpcb 
*tp
); 
 302 static inline void tcp_set_lotimer_index(struct tcpcb 
*); 
 303 __private_extern__ 
void tcp_remove_from_time_wait(struct inpcb 
*inp
); 
 304 static inline void tcp_update_mss_core(struct tcpcb 
*tp
, struct ifnet 
*ifp
); 
 305 __private_extern__ 
void tcp_report_stats(void); 
 307 static  u_int64_t tcp_last_report_time
; 
 310  * Structure to store previously reported stats so that we can send 
 311  * incremental changes in each report interval. 
 313 struct tcp_last_report_stats 
{ 
 314         u_int32_t       tcps_connattempt
; 
 315         u_int32_t       tcps_accepts
; 
 316         u_int32_t       tcps_ecn_client_setup
; 
 317         u_int32_t       tcps_ecn_server_setup
; 
 318         u_int32_t       tcps_ecn_client_success
; 
 319         u_int32_t       tcps_ecn_server_success
; 
 320         u_int32_t       tcps_ecn_not_supported
; 
 321         u_int32_t       tcps_ecn_lost_syn
; 
 322         u_int32_t       tcps_ecn_lost_synack
; 
 323         u_int32_t       tcps_ecn_recv_ce
; 
 324         u_int32_t       tcps_ecn_recv_ece
; 
 325         u_int32_t       tcps_ecn_sent_ece
; 
 326         u_int32_t       tcps_ecn_conn_recv_ce
; 
 327         u_int32_t       tcps_ecn_conn_recv_ece
; 
 328         u_int32_t       tcps_ecn_conn_plnoce
; 
 329         u_int32_t       tcps_ecn_conn_pl_ce
; 
 330         u_int32_t       tcps_ecn_conn_nopl_ce
; 
 331         u_int32_t       tcps_ecn_fallback_synloss
; 
 332         u_int32_t       tcps_ecn_fallback_reorder
; 
 333         u_int32_t       tcps_ecn_fallback_ce
; 
 335         /* TFO-related statistics */ 
 336         u_int32_t       tcps_tfo_syn_data_rcv
; 
 337         u_int32_t       tcps_tfo_cookie_req_rcv
; 
 338         u_int32_t       tcps_tfo_cookie_sent
; 
 339         u_int32_t       tcps_tfo_cookie_invalid
; 
 340         u_int32_t       tcps_tfo_cookie_req
; 
 341         u_int32_t       tcps_tfo_cookie_rcv
; 
 342         u_int32_t       tcps_tfo_syn_data_sent
; 
 343         u_int32_t       tcps_tfo_syn_data_acked
; 
 344         u_int32_t       tcps_tfo_syn_loss
; 
 345         u_int32_t       tcps_tfo_blackhole
; 
 346         u_int32_t       tcps_tfo_cookie_wrong
; 
 347         u_int32_t       tcps_tfo_no_cookie_rcv
; 
 348         u_int32_t       tcps_tfo_heuristics_disable
; 
 349         u_int32_t       tcps_tfo_sndblackhole
; 
 351         /* MPTCP-related statistics */ 
 352         u_int32_t       tcps_mptcp_handover_attempt
; 
 353         u_int32_t       tcps_mptcp_interactive_attempt
; 
 354         u_int32_t       tcps_mptcp_aggregate_attempt
; 
 355         u_int32_t       tcps_mptcp_fp_handover_attempt
; 
 356         u_int32_t       tcps_mptcp_fp_interactive_attempt
; 
 357         u_int32_t       tcps_mptcp_fp_aggregate_attempt
; 
 358         u_int32_t       tcps_mptcp_heuristic_fallback
; 
 359         u_int32_t       tcps_mptcp_fp_heuristic_fallback
; 
 360         u_int32_t       tcps_mptcp_handover_success_wifi
; 
 361         u_int32_t       tcps_mptcp_handover_success_cell
; 
 362         u_int32_t       tcps_mptcp_interactive_success
; 
 363         u_int32_t       tcps_mptcp_aggregate_success
; 
 364         u_int32_t       tcps_mptcp_fp_handover_success_wifi
; 
 365         u_int32_t       tcps_mptcp_fp_handover_success_cell
; 
 366         u_int32_t       tcps_mptcp_fp_interactive_success
; 
 367         u_int32_t       tcps_mptcp_fp_aggregate_success
; 
 368         u_int32_t       tcps_mptcp_handover_cell_from_wifi
; 
 369         u_int32_t       tcps_mptcp_handover_wifi_from_cell
; 
 370         u_int32_t       tcps_mptcp_interactive_cell_from_wifi
; 
 371         u_int64_t       tcps_mptcp_handover_cell_bytes
; 
 372         u_int64_t       tcps_mptcp_interactive_cell_bytes
; 
 373         u_int64_t       tcps_mptcp_aggregate_cell_bytes
; 
 374         u_int64_t       tcps_mptcp_handover_all_bytes
; 
 375         u_int64_t       tcps_mptcp_interactive_all_bytes
; 
 376         u_int64_t       tcps_mptcp_aggregate_all_bytes
; 
 377         u_int32_t       tcps_mptcp_back_to_wifi
; 
 378         u_int32_t       tcps_mptcp_wifi_proxy
; 
 379         u_int32_t       tcps_mptcp_cell_proxy
; 
 380         u_int32_t       tcps_mptcp_triggered_cell
; 
 384 /* Returns true if the timer is on the timer list */ 
 385 #define TIMER_IS_ON_LIST(tp) ((tp)->t_flags & TF_TIMER_ONLIST) 
 387 /* Run the TCP timerlist atleast once every hour */ 
 388 #define TCP_TIMERLIST_MAX_OFFSET (60 * 60 * TCP_RETRANSHZ) 
 391 static void add_to_time_wait_locked(struct tcpcb 
*tp
, uint32_t delay
); 
 392 static boolean_t 
tcp_garbage_collect(struct inpcb 
*, int); 
 394 #define TIMERENTRY_TO_TP(te) ((struct tcpcb *)((uintptr_t)te - offsetof(struct tcpcb, tentry.le.le_next))) 
 396 #define VERIFY_NEXT_LINK(elm, field) do {       \ 
 397         if (LIST_NEXT((elm),field) != NULL &&   \ 
 398             LIST_NEXT((elm),field)->field.le_prev !=    \ 
 399                 &((elm)->field.le_next))        \ 
 400                 panic("Bad link elm %p next->prev != elm", (elm));      \ 
 403 #define VERIFY_PREV_LINK(elm, field) do {       \ 
 404         if (*(elm)->field.le_prev != (elm))     \ 
 405                 panic("Bad link elm %p prev->next != elm", (elm));      \ 
 408 #define TCP_SET_TIMER_MODE(mode, i) do { \ 
 409         if (IS_TIMER_HZ_10MS(i)) \ 
 410                 (mode) |= TCP_TIMERLIST_10MS_MODE; \ 
 411         else if (IS_TIMER_HZ_100MS(i)) \ 
 412                 (mode) |= TCP_TIMERLIST_100MS_MODE; \ 
 414                 (mode) |= TCP_TIMERLIST_500MS_MODE; \ 
 417 #if (DEVELOPMENT || DEBUG) 
 418 SYSCTL_UINT(_net_inet_tcp
, OID_AUTO
, mss_rec_medium
, 
 419     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_mss_rec_medium
, 0, 
 420     "Medium MSS based on recommendation in link status report"); 
 421 SYSCTL_UINT(_net_inet_tcp
, OID_AUTO
, mss_rec_low
, 
 422     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_mss_rec_low
, 0, 
 423     "Low MSS based on recommendation in link status report"); 
 425 static int32_t tcp_change_mss_recommended 
= 0; 
 427 sysctl_change_mss_recommended SYSCTL_HANDLER_ARGS
 
 429 #pragma unused(oidp, arg1, arg2) 
 430         int i
, err 
= 0, changed 
= 0; 
 432         struct if_link_status ifsr
; 
 433         struct if_cellular_status_v1 
*new_cell_sr
; 
 434         err 
= sysctl_io_number(req
, tcp_change_mss_recommended
, 
 435             sizeof(int32_t), &i
, &changed
); 
 437                 ifnet_head_lock_shared(); 
 438                 TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) { 
 439                         if (IFNET_IS_CELLULAR(ifp
)) { 
 440                                 bzero(&ifsr
, sizeof(ifsr
)); 
 441                                 new_cell_sr 
= &ifsr
.ifsr_u
.ifsr_cell
.if_cell_u
.if_status_v1
; 
 442                                 ifsr
.ifsr_version 
= IF_CELLULAR_STATUS_REPORT_CURRENT_VERSION
; 
 443                                 ifsr
.ifsr_len 
= sizeof(*new_cell_sr
); 
 445                                 /* Set MSS recommended */ 
 446                                 new_cell_sr
->valid_bitmask 
|= IF_CELL_UL_MSS_RECOMMENDED_VALID
; 
 447                                 new_cell_sr
->mss_recommended 
= i
; 
 448                                 err 
= ifnet_link_status_report(ifp
, new_cell_sr
, sizeof(new_cell_sr
)); 
 450                                         tcp_change_mss_recommended 
= i
; 
 461 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, change_mss_recommended
, 
 462     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_change_mss_recommended
, 
 463     0, sysctl_change_mss_recommended
, "IU", "Change MSS recommended"); 
 465 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, report_stats_interval
, 
 466     CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_report_stats_interval
, 0, 
 467     "Report stats interval"); 
 468 #endif /* (DEVELOPMENT || DEBUG) */ 
 471  * Macro to compare two timers. If there is a reset of the sign bit, 
 472  * it is safe to assume that the timer has wrapped around. By doing 
 473  * signed comparision, we take care of wrap around such that the value 
 474  * with the sign bit reset is actually ahead of the other. 
 477 timer_diff(uint32_t t1
, uint32_t toff1
, uint32_t t2
, uint32_t toff2
) 
 479         return (int32_t)((t1 
+ toff1
) - (t2 
+ toff2
)); 
 483  * Add to tcp timewait list, delay is given in milliseconds. 
 486 add_to_time_wait_locked(struct tcpcb 
*tp
, uint32_t delay
) 
 488         struct inpcbinfo 
*pcbinfo 
= &tcbinfo
; 
 489         struct inpcb 
*inp 
= tp
->t_inpcb
; 
 492         /* pcb list should be locked when we get here */ 
 493         LCK_RW_ASSERT(pcbinfo
->ipi_lock
, LCK_RW_ASSERT_EXCLUSIVE
); 
 495         /* We may get here multiple times, so check */ 
 496         if (!(inp
->inp_flags2 
& INP2_TIMEWAIT
)) { 
 497                 pcbinfo
->ipi_twcount
++; 
 498                 inp
->inp_flags2 
|= INP2_TIMEWAIT
; 
 500                 /* Remove from global inp list */ 
 501                 LIST_REMOVE(inp
, inp_list
); 
 503                 TAILQ_REMOVE(&tcp_tw_tailq
, tp
, t_twentry
); 
 506         /* Compute the time at which this socket can be closed */ 
 507         timer 
= tcp_now 
+ delay
; 
 509         /* We will use the TCPT_2MSL timer for tracking this delay */ 
 511         if (TIMER_IS_ON_LIST(tp
)) { 
 512                 tcp_remove_timer(tp
); 
 514         tp
->t_timer
[TCPT_2MSL
] = timer
; 
 516         TAILQ_INSERT_TAIL(&tcp_tw_tailq
, tp
, t_twentry
); 
 520 add_to_time_wait(struct tcpcb 
*tp
, uint32_t delay
) 
 522         struct inpcbinfo 
*pcbinfo 
= &tcbinfo
; 
 523         if (tp
->t_inpcb
->inp_socket
->so_options 
& SO_NOWAKEFROMSLEEP
) { 
 524                 socket_post_kev_msg_closed(tp
->t_inpcb
->inp_socket
); 
 527         /* 19182803: Notify nstat that connection is closing before waiting. */ 
 528         nstat_pcb_detach(tp
->t_inpcb
); 
 530         if (!lck_rw_try_lock_exclusive(pcbinfo
->ipi_lock
)) { 
 531                 socket_unlock(tp
->t_inpcb
->inp_socket
, 0); 
 532                 lck_rw_lock_exclusive(pcbinfo
->ipi_lock
); 
 533                 socket_lock(tp
->t_inpcb
->inp_socket
, 0); 
 535         add_to_time_wait_locked(tp
, delay
); 
 536         lck_rw_done(pcbinfo
->ipi_lock
); 
 538         inpcb_gc_sched(pcbinfo
, INPCB_TIMER_LAZY
); 
 541 /* If this is on time wait queue, remove it. */ 
 543 tcp_remove_from_time_wait(struct inpcb 
*inp
) 
 545         struct tcpcb 
*tp 
= intotcpcb(inp
); 
 546         if (inp
->inp_flags2 
& INP2_TIMEWAIT
) { 
 547                 TAILQ_REMOVE(&tcp_tw_tailq
, tp
, t_twentry
); 
 552 tcp_garbage_collect(struct inpcb 
*inp
, int istimewait
) 
 554         boolean_t active 
= FALSE
; 
 555         struct socket 
*so
, *mp_so 
= NULL
; 
 558         so 
= inp
->inp_socket
; 
 561         if (so
->so_flags 
& SOF_MP_SUBFLOW
) { 
 562                 mp_so 
= mptetoso(tptomptp(tp
)->mpt_mpte
); 
 563                 if (!socket_try_lock(mp_so
)) { 
 568                 mp_so
->so_usecount
++; 
 572          * Skip if still in use or busy; it would have been more efficient 
 573          * if we were to test so_usecount against 0, but this isn't possible 
 574          * due to the current implementation of tcp_dropdropablreq() where 
 575          * overflow sockets that are eligible for garbage collection have 
 576          * their usecounts set to 1. 
 578         if (!lck_mtx_try_lock_spin(&inp
->inpcb_mtx
)) { 
 583         /* Check again under the lock */ 
 584         if (so
->so_usecount 
> 1) { 
 585                 if (inp
->inp_wantcnt 
== WNT_STOPUSING
) { 
 588                 lck_mtx_unlock(&inp
->inpcb_mtx
); 
 592         if (istimewait 
&& TSTMP_GEQ(tcp_now
, tp
->t_timer
[TCPT_2MSL
]) && 
 593             tp
->t_state 
!= TCPS_CLOSED
) { 
 594                 /* Become a regular mutex */ 
 595                 lck_mtx_convert_spin(&inp
->inpcb_mtx
); 
 600          * Overflowed socket dropped from the listening queue?  Do this 
 601          * only if we are called to clean up the time wait slots, since 
 602          * tcp_dropdropablreq() considers a socket to have been fully 
 603          * dropped after add_to_time_wait() is finished. 
 604          * Also handle the case of connections getting closed by the peer 
 605          * while in the queue as seen with rdar://6422317 
 608         if (so
->so_usecount 
== 1 && 
 609             ((istimewait 
&& (so
->so_flags 
& SOF_OVERFLOW
)) || 
 610             ((tp 
!= NULL
) && (tp
->t_state 
== TCPS_CLOSED
) && 
 611             (so
->so_head 
!= NULL
) && 
 612             ((so
->so_state 
& (SS_INCOMP 
| SS_CANTSENDMORE 
| SS_CANTRCVMORE
)) == 
 613             (SS_INCOMP 
| SS_CANTSENDMORE 
| SS_CANTRCVMORE
))))) { 
 614                 if (inp
->inp_state 
!= INPCB_STATE_DEAD
) { 
 615                         /* Become a regular mutex */ 
 616                         lck_mtx_convert_spin(&inp
->inpcb_mtx
); 
 618                         if (SOCK_CHECK_DOM(so
, PF_INET6
)) { 
 624                 VERIFY(so
->so_usecount 
> 0); 
 626                 if (inp
->inp_wantcnt 
== WNT_STOPUSING
) { 
 629                 lck_mtx_unlock(&inp
->inpcb_mtx
); 
 631         } else if (inp
->inp_wantcnt 
!= WNT_STOPUSING
) { 
 632                 lck_mtx_unlock(&inp
->inpcb_mtx
); 
 638          * We get here because the PCB is no longer searchable 
 639          * (WNT_STOPUSING); detach (if needed) and dispose if it is dead 
 640          * (usecount is 0).  This covers all cases, including overflow 
 641          * sockets and those that are considered as "embryonic", 
 642          * i.e. created by sonewconn() in TCP input path, and have 
 643          * not yet been committed.  For the former, we reduce the usecount 
 644          *  to 0 as done by the code above.  For the latter, the usecount 
 645          * would have reduced to 0 as part calling soabort() when the 
 646          * socket is dropped at the end of tcp_input(). 
 648         if (so
->so_usecount 
== 0) { 
 649                 DTRACE_TCP4(state__change
, void, NULL
, struct inpcb 
*, inp
, 
 650                     struct tcpcb 
*, tp
, int32_t, TCPS_CLOSED
); 
 651                 /* Become a regular mutex */ 
 652                 lck_mtx_convert_spin(&inp
->inpcb_mtx
); 
 655                  * If this tp still happens to be on the timer list, 
 658                 if (TIMER_IS_ON_LIST(tp
)) { 
 659                         tcp_remove_timer(tp
); 
 662                 if (inp
->inp_state 
!= INPCB_STATE_DEAD
) { 
 664                         if (SOCK_CHECK_DOM(so
, PF_INET6
)) { 
 672                         mptcp_subflow_del(tptomptp(tp
)->mpt_mpte
, tp
->t_mpsub
); 
 674                         /* so is now unlinked from mp_so - let's drop the lock */ 
 675                         socket_unlock(mp_so
, 1); 
 684         lck_mtx_unlock(&inp
->inpcb_mtx
); 
 689                 socket_unlock(mp_so
, 1); 
 696  * TCP garbage collector callback (inpcb_timer_func_t). 
 698  * Returns the number of pcbs that will need to be gc-ed soon, 
 699  * returnining > 0 will keep timer active. 
 702 tcp_gc(struct inpcbinfo 
*ipi
) 
 704         struct inpcb 
*inp
, *nxt
; 
 705         struct tcpcb 
*tw_tp
, *tw_ntp
; 
 710         static int tws_checked 
= 0; 
 713         KERNEL_DEBUG(DBG_FNC_TCP_SLOW 
| DBG_FUNC_START
, 0, 0, 0, 0, 0); 
 716          * Update tcp_now here as it may get used while 
 717          * processing the slow timer. 
 719         calculate_tcp_clock(); 
 722          * Garbage collect socket/tcpcb: We need to acquire the list lock 
 723          * exclusively to do this 
 726         if (lck_rw_try_lock_exclusive(ipi
->ipi_lock
) == FALSE
) { 
 727                 /* don't sweat it this time; cleanup was done last time */ 
 728                 if (tcp_gc_done 
== TRUE
) { 
 730                         KERNEL_DEBUG(DBG_FNC_TCP_SLOW 
| DBG_FUNC_END
, 
 731                             tws_checked
, cur_tw_slot
, 0, 0, 0); 
 732                         /* Lock upgrade failed, give up this round */ 
 733                         atomic_add_32(&ipi
->ipi_gc_req
.intimer_fast
, 1); 
 736                 /* Upgrade failed, lost lock now take it again exclusive */ 
 737                 lck_rw_lock_exclusive(ipi
->ipi_lock
); 
 741         LIST_FOREACH_SAFE(inp
, &tcb
, inp_list
, nxt
) { 
 742                 if (tcp_garbage_collect(inp
, 0)) { 
 743                         atomic_add_32(&ipi
->ipi_gc_req
.intimer_fast
, 1); 
 747         /* Now cleanup the time wait ones */ 
 748         TAILQ_FOREACH_SAFE(tw_tp
, &tcp_tw_tailq
, t_twentry
, tw_ntp
) { 
 750                  * We check the timestamp here without holding the 
 751                  * socket lock for better performance. If there are 
 752                  * any pcbs in time-wait, the timer will get rescheduled. 
 753                  * Hence some error in this check can be tolerated. 
 755                  * Sometimes a socket on time-wait queue can be closed if 
 756                  * 2MSL timer expired but the application still has a 
 759                 if (tw_tp
->t_state 
== TCPS_CLOSED 
|| 
 760                     TSTMP_GEQ(tcp_now
, tw_tp
->t_timer
[TCPT_2MSL
])) { 
 761                         if (tcp_garbage_collect(tw_tp
->t_inpcb
, 1)) { 
 762                                 atomic_add_32(&ipi
->ipi_gc_req
.intimer_lazy
, 1); 
 767         /* take into account pcbs that are still in time_wait_slots */ 
 768         atomic_add_32(&ipi
->ipi_gc_req
.intimer_lazy
, ipi
->ipi_twcount
); 
 770         lck_rw_done(ipi
->ipi_lock
); 
 772         /* Clean up the socache while we are here */ 
 773         if (so_cache_timer()) { 
 774                 atomic_add_32(&ipi
->ipi_gc_req
.intimer_lazy
, 1); 
 777         KERNEL_DEBUG(DBG_FNC_TCP_SLOW 
| DBG_FUNC_END
, tws_checked
, 
 778             cur_tw_slot
, 0, 0, 0); 
 784  * Cancel all timers for TCP tp. 
 787 tcp_canceltimers(struct tcpcb 
*tp
) 
 791         tcp_remove_timer(tp
); 
 792         for (i 
= 0; i 
< TCPT_NTIMERS
; i
++) { 
 795         tp
->tentry
.timer_start 
= tcp_now
; 
 796         tp
->tentry
.index 
= TCPT_NONE
; 
 799 int     tcp_syn_backoff
[TCP_MAXRXTSHIFT 
+ 1] = 
 800 { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 
 802 int     tcp_backoff
[TCP_MAXRXTSHIFT 
+ 1] = 
 803 { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 
 805 static int tcp_totbackoff 
= 511;        /* sum of tcp_backoff[] */ 
 808 tcp_rexmt_save_state(struct tcpcb 
*tp
) 
 811         if (TSTMP_SUPPORTED(tp
)) { 
 813                  * Since timestamps are supported on the connection, 
 814                  * we can do recovery as described in rfc 4015. 
 816                 fsize 
= tp
->snd_max 
- tp
->snd_una
; 
 817                 tp
->snd_ssthresh_prev 
= max(fsize
, tp
->snd_ssthresh
); 
 818                 tp
->snd_recover_prev 
= tp
->snd_recover
; 
 821                  * Timestamp option is not supported on this connection. 
 822                  * Record ssthresh and cwnd so they can 
 823                  * be recovered if this turns out to be a "bad" retransmit. 
 824                  * A retransmit is considered "bad" if an ACK for this 
 825                  * segment is received within RTT/2 interval; the assumption 
 826                  * here is that the ACK was already in flight.  See 
 827                  * "On Estimating End-to-End Network Path Properties" by 
 828                  * Allman and Paxson for more details. 
 830                 tp
->snd_cwnd_prev 
= tp
->snd_cwnd
; 
 831                 tp
->snd_ssthresh_prev 
= tp
->snd_ssthresh
; 
 832                 tp
->snd_recover_prev 
= tp
->snd_recover
; 
 833                 if (IN_FASTRECOVERY(tp
)) { 
 834                         tp
->t_flags 
|= TF_WASFRECOVERY
; 
 836                         tp
->t_flags 
&= ~TF_WASFRECOVERY
; 
 839         tp
->t_srtt_prev 
= (tp
->t_srtt 
>> TCP_RTT_SHIFT
) + 2; 
 840         tp
->t_rttvar_prev 
= (tp
->t_rttvar 
>> TCP_RTTVAR_SHIFT
); 
 841         tp
->t_flagsext 
&= ~(TF_RECOMPUTE_RTT
); 
 845  * Revert to the older segment size if there is an indication that PMTU 
 846  * blackhole detection was not needed. 
 849 tcp_pmtud_revert_segment_size(struct tcpcb 
*tp
) 
 853         VERIFY(tp
->t_pmtud_saved_maxopd 
> 0); 
 854         tp
->t_flags 
|= TF_PMTUD
; 
 855         tp
->t_flags 
&= ~TF_BLACKHOLE
; 
 856         optlen 
= tp
->t_maxopd 
- tp
->t_maxseg
; 
 857         tp
->t_maxopd 
= tp
->t_pmtud_saved_maxopd
; 
 858         tp
->t_maxseg 
= tp
->t_maxopd 
- optlen
; 
 861          * Reset the slow-start flight size as it 
 862          * may depend on the new MSS 
 864         if (CC_ALGO(tp
)->cwnd_init 
!= NULL
) { 
 865                 CC_ALGO(tp
)->cwnd_init(tp
); 
 867         tp
->t_pmtud_start_ts 
= 0; 
 868         tcpstat
.tcps_pmtudbh_reverted
++; 
 870         /* change MSS according to recommendation, if there was one */ 
 871         tcp_update_mss_locked(tp
->t_inpcb
->inp_socket
, NULL
); 
 875  * TCP timer processing. 
 878 tcp_timers(struct tcpcb 
*tp
, int timer
) 
 880         int32_t rexmt
, optlen 
= 0, idle_time 
= 0; 
 882         struct tcptemp 
*t_template
; 
 888         int isipv6 
= (tp
->t_inpcb
->inp_vflag 
& INP_IPV4
) == 0; 
 890         u_int64_t accsleep_ms
; 
 891         u_int32_t last_sleep_ms 
= 0; 
 893         so 
= tp
->t_inpcb
->inp_socket
; 
 894         idle_time 
= tcp_now 
- tp
->t_rcvtime
; 
 898          * 2 MSL timeout in shutdown went off.  If we're closed but 
 899          * still waiting for peer to close and connection has been idle 
 900          * too long, or if 2MSL time is up from TIME_WAIT or FIN_WAIT_2, 
 901          * delete connection control block. 
 902          * Otherwise, (this case shouldn't happen) check again in a bit 
 903          * we keep the socket in the main list in that case. 
 906                 tcp_free_sackholes(tp
); 
 907                 if (tp
->t_state 
!= TCPS_TIME_WAIT 
&& 
 908                     tp
->t_state 
!= TCPS_FIN_WAIT_2 
&& 
 909                     ((idle_time 
> 0) && (idle_time 
< TCP_CONN_MAXIDLE(tp
)))) { 
 910                         tp
->t_timer
[TCPT_2MSL
] = OFFSET_FROM_START(tp
, 
 911                             (u_int32_t
)TCP_CONN_KEEPINTVL(tp
)); 
 919          * Retransmission timer went off.  Message has not 
 920          * been acked within retransmit interval.  Back off 
 921          * to a longer retransmit interval and retransmit one segment. 
 924                 absolutetime_to_nanoseconds(mach_absolutetime_asleep
, 
 926                 accsleep_ms 
= accsleep_ms 
/ 1000000UL; 
 927                 if (accsleep_ms 
> tp
->t_accsleep_ms
) { 
 928                         last_sleep_ms 
= accsleep_ms 
- tp
->t_accsleep_ms
; 
 931                  * Drop a connection in the retransmit timer 
 932                  * 1. If we have retransmitted more than TCP_MAXRXTSHIFT 
 934                  * 2. If the time spent in this retransmission episode is 
 935                  * more than the time limit set with TCP_RXT_CONNDROPTIME 
 937                  * 3. If TCP_RXT_FINDROP socket option was set and 
 938                  * we have already retransmitted the FIN 3 times without 
 941                 if (++tp
->t_rxtshift 
> TCP_MAXRXTSHIFT 
|| 
 942                     (tp
->t_rxt_conndroptime 
> 0 && tp
->t_rxtstart 
> 0 && 
 943                     (tcp_now 
- tp
->t_rxtstart
) >= tp
->t_rxt_conndroptime
) || 
 944                     ((tp
->t_flagsext 
& TF_RXTFINDROP
) != 0 && 
 945                     (tp
->t_flags 
& TF_SENTFIN
) != 0 && tp
->t_rxtshift 
>= 4) || 
 946                     (tp
->t_rxtshift 
> 4 && last_sleep_ms 
>= TCP_SLEEP_TOO_LONG
)) { 
 947                         if (tp
->t_state 
== TCPS_ESTABLISHED 
&& 
 948                             tp
->t_rxt_minimum_timeout 
> 0) { 
 950                                  * Avoid dropping a connection if minimum 
 951                                  * timeout is set and that time did not 
 952                                  * pass. We will retry sending 
 953                                  * retransmissions at the maximum interval 
 955                                 if (TSTMP_LT(tcp_now
, (tp
->t_rxtstart 
+ 
 956                                     tp
->t_rxt_minimum_timeout
))) { 
 957                                         tp
->t_rxtshift 
= TCP_MAXRXTSHIFT 
- 1; 
 958                                         goto retransmit_packet
; 
 961                         if ((tp
->t_flagsext 
& TF_RXTFINDROP
) != 0) { 
 962                                 tcpstat
.tcps_rxtfindrop
++; 
 963                         } else if (last_sleep_ms 
>= TCP_SLEEP_TOO_LONG
) { 
 964                                 tcpstat
.tcps_drop_after_sleep
++; 
 966                                 tcpstat
.tcps_timeoutdrop
++; 
 968                         if (tp
->t_rxtshift 
>= TCP_MAXRXTSHIFT
) { 
 969                                 if (TCP_ECN_ENABLED(tp
)) { 
 970                                         INP_INC_IFNET_STAT(tp
->t_inpcb
, 
 973                                         INP_INC_IFNET_STAT(tp
->t_inpcb
, 
 977                         tp
->t_rxtshift 
= TCP_MAXRXTSHIFT
; 
 978                         postevent(so
, 0, EV_TIMEOUT
); 
 980                             (SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_TIMEOUT
)); 
 982                         if (TCP_ECN_ENABLED(tp
) && 
 983                             tp
->t_state 
== TCPS_ESTABLISHED
) { 
 984                                 tcp_heuristic_ecn_droprxmt(tp
); 
 987                         tp 
= tcp_drop(tp
, tp
->t_softerror 
? 
 988                             tp
->t_softerror 
: ETIMEDOUT
); 
 993                 tcpstat
.tcps_rexmttimeo
++; 
 994                 tp
->t_accsleep_ms 
= accsleep_ms
; 
 996                 if (tp
->t_rxtshift 
== 1 && 
 997                     tp
->t_state 
== TCPS_ESTABLISHED
) { 
 998                         /* Set the time at which retransmission started. */ 
 999                         tp
->t_rxtstart 
= tcp_now
; 
1002                          * if this is the first retransmit timeout, save 
1003                          * the state so that we can recover if the timeout 
1006                         tcp_rexmt_save_state(tp
); 
1009                 if ((tp
->t_rxtshift 
>= mptcp_fail_thresh
) && 
1010                     (tp
->t_state 
== TCPS_ESTABLISHED
) && 
1011                     (tp
->t_mpflags 
& TMPF_MPTCP_TRUE
)) { 
1012                         mptcp_act_on_txfail(so
); 
1015                 if (so
->so_flags 
& SOF_MP_SUBFLOW
) { 
1016                         struct mptses 
*mpte 
= tptomptp(tp
)->mpt_mpte
; 
1018                         mptcp_check_subflows_and_add(mpte
); 
1022                 if (tp
->t_adaptive_wtimo 
> 0 && 
1023                     tp
->t_rxtshift 
> tp
->t_adaptive_wtimo 
&& 
1024                     TCPS_HAVEESTABLISHED(tp
->t_state
)) { 
1025                         /* Send an event to the application */ 
1027                             (SO_FILT_HINT_LOCKED 
| 
1028                             SO_FILT_HINT_ADAPTIVE_WTIMO
)); 
1032                  * If this is a retransmit timeout after PTO, the PTO 
1035                 if (tp
->t_flagsext 
& TF_SENT_TLPROBE
) { 
1036                         tp
->t_flagsext 
&= ~(TF_SENT_TLPROBE
); 
1037                         tcpstat
.tcps_rto_after_pto
++; 
1040                 if (tp
->t_flagsext 
& TF_DELAY_RECOVERY
) { 
1042                          * Retransmit timer fired before entering recovery 
1043                          * on a connection with packet re-ordering. This 
1044                          * suggests that the reordering metrics computed 
1047                         tp
->t_reorderwin 
= 0; 
1048                         tp
->t_timer
[TCPT_DELAYFR
] = 0; 
1049                         tp
->t_flagsext 
&= ~(TF_DELAY_RECOVERY
); 
1052                 if (tp
->t_state 
== TCPS_SYN_RECEIVED
) { 
1053                         tcp_disable_tfo(tp
); 
1056                 if (!(tp
->t_tfo_flags 
& TFO_F_HEURISTIC_DONE
) && 
1057                     (tp
->t_tfo_stats 
& TFO_S_SYN_DATA_SENT
) && 
1058                     !(tp
->t_tfo_flags 
& TFO_F_NO_SNDPROBING
) && 
1059                     ((tp
->t_state 
!= TCPS_SYN_SENT 
&& tp
->t_rxtshift 
> 1) || 
1060                     tp
->t_rxtshift 
> 4)) { 
1062                          * For regular retransmissions, a first one is being 
1063                          * done for tail-loss probe. 
1064                          * Thus, if rxtshift > 1, this means we have sent the segment 
1065                          * a total of 3 times. 
1067                          * If we are in SYN-SENT state, then there is no tail-loss 
1068                          * probe thus we have to let rxtshift go up to 3. 
1070                         tcp_heuristic_tfo_middlebox(tp
); 
1072                         so
->so_error 
= ENODATA
; 
1076                         tp
->t_tfo_stats 
|= TFO_S_SEND_BLACKHOLE
; 
1077                         tcpstat
.tcps_tfo_sndblackhole
++; 
1080                 if (!(tp
->t_tfo_flags 
& TFO_F_HEURISTIC_DONE
) && 
1081                     (tp
->t_tfo_stats 
& TFO_S_SYN_DATA_ACKED
) && 
1082                     tp
->t_rxtshift 
> 3) { 
1083                         if (TSTMP_GT(tp
->t_sndtime 
- 10 * TCP_RETRANSHZ
, tp
->t_rcvtime
)) { 
1084                                 tcp_heuristic_tfo_middlebox(tp
); 
1086                                 so
->so_error 
= ENODATA
; 
1092                 if (tp
->t_state 
== TCPS_SYN_SENT
) { 
1093                         rexmt 
= TCP_REXMTVAL(tp
) * tcp_syn_backoff
[tp
->t_rxtshift
]; 
1094                         tp
->t_stat
.synrxtshift 
= tp
->t_rxtshift
; 
1096                         /* When retransmitting, disable TFO */ 
1097                         if (tfo_enabled(tp
) && 
1098                             (!(so
->so_flags1 
& SOF1_DATA_AUTHENTICATED
) || 
1099                             (tp
->t_flagsext 
& TF_FASTOPEN_HEUR
))) { 
1100                                 tp
->t_flagsext 
&= ~TF_FASTOPEN
; 
1101                                 tp
->t_tfo_flags 
|= TFO_F_SYN_LOSS
; 
1104                         rexmt 
= TCP_REXMTVAL(tp
) * tcp_backoff
[tp
->t_rxtshift
]; 
1107                 TCPT_RANGESET(tp
->t_rxtcur
, rexmt
, tp
->t_rttmin
, TCPTV_REXMTMAX
, 
1108                     TCP_ADD_REXMTSLOP(tp
)); 
1109                 tp
->t_timer
[TCPT_REXMT
] = OFFSET_FROM_START(tp
, tp
->t_rxtcur
); 
1111                 if (INP_WAIT_FOR_IF_FEEDBACK(tp
->t_inpcb
)) { 
1115                 tcp_free_sackholes(tp
); 
1117                  * Check for potential Path MTU Discovery Black Hole 
1119                 if (tcp_pmtud_black_hole_detect 
&& 
1120                     !(tp
->t_flagsext 
& TF_NOBLACKHOLE_DETECTION
) && 
1121                     (tp
->t_state 
== TCPS_ESTABLISHED
)) { 
1122                         if ((tp
->t_flags 
& TF_PMTUD
) && 
1123                             ((tp
->t_flags 
& TF_MAXSEGSNT
) 
1124                             || tp
->t_pmtud_lastseg_size 
> tcp_pmtud_black_hole_mss
) && 
1125                             tp
->t_rxtshift 
== 2) { 
1127                                  * Enter Path MTU Black-hole Detection mechanism: 
1128                                  * - Disable Path MTU Discovery (IP "DF" bit). 
1129                                  * - Reduce MTU to lower value than what we 
1130                                  * negotiated with the peer. 
1132                                 /* Disable Path MTU Discovery for now */ 
1133                                 tp
->t_flags 
&= ~TF_PMTUD
; 
1134                                 /* Record that we may have found a black hole */ 
1135                                 tp
->t_flags 
|= TF_BLACKHOLE
; 
1136                                 optlen 
= tp
->t_maxopd 
- tp
->t_maxseg
; 
1137                                 /* Keep track of previous MSS */ 
1138                                 tp
->t_pmtud_saved_maxopd 
= tp
->t_maxopd
; 
1139                                 tp
->t_pmtud_start_ts 
= tcp_now
; 
1140                                 if (tp
->t_pmtud_start_ts 
== 0) { 
1141                                         tp
->t_pmtud_start_ts
++; 
1143                                 /* Reduce the MSS to intermediary value */ 
1144                                 if (tp
->t_maxopd 
> tcp_pmtud_black_hole_mss
) { 
1145                                         tp
->t_maxopd 
= tcp_pmtud_black_hole_mss
; 
1147                                         tp
->t_maxopd 
= /* use the default MSS */ 
1149                                             isipv6 
? tcp_v6mssdflt 
: 
1153                                 tp
->t_maxseg 
= tp
->t_maxopd 
- optlen
; 
1156                                  * Reset the slow-start flight size 
1157                                  * as it may depend on the new MSS 
1159                                 if (CC_ALGO(tp
)->cwnd_init 
!= NULL
) { 
1160                                         CC_ALGO(tp
)->cwnd_init(tp
); 
1162                                 tp
->snd_cwnd 
= tp
->t_maxseg
; 
1165                          * If further retransmissions are still 
1166                          * unsuccessful with a lowered MTU, maybe this 
1167                          * isn't a Black Hole and we restore the previous 
1168                          * MSS and blackhole detection flags. 
1171                                 if ((tp
->t_flags 
& TF_BLACKHOLE
) && 
1172                                     (tp
->t_rxtshift 
> 4)) { 
1173                                         tcp_pmtud_revert_segment_size(tp
); 
1174                                         tp
->snd_cwnd 
= tp
->t_maxseg
; 
1181                  * Disable rfc1323 and rfc1644 if we haven't got any 
1182                  * response to our SYN (after we reach the threshold) 
1183                  * to work-around some broken terminal servers (most of 
1184                  * which have hopefully been retired) that have bad VJ 
1185                  * header compression code which trashes TCP segments 
1186                  * containing unknown-to-them TCP options. 
1187                  * Do this only on non-local connections. 
1189                 if (tp
->t_state 
== TCPS_SYN_SENT 
&& 
1190                     tp
->t_rxtshift 
== tcp_broken_peer_syn_rxmit_thres
) { 
1191                         tp
->t_flags 
&= ~(TF_REQ_SCALE 
| TF_REQ_TSTMP 
| TF_REQ_CC
); 
1195                  * If losing, let the lower level know and try for 
1196                  * a better route.  Also, if we backed off this far, 
1197                  * our srtt estimate is probably bogus.  Clobber it 
1198                  * so we'll take the next rtt measurement as our srtt; 
1199                  * move the current srtt into rttvar to keep the current 
1200                  * retransmit times until then. 
1202                 if (tp
->t_rxtshift 
> TCP_MAXRXTSHIFT 
/ 4) { 
1205                                 in6_losing(tp
->t_inpcb
); 
1208                         in_losing(tp
->t_inpcb
); 
1209                         tp
->t_rttvar 
+= (tp
->t_srtt 
>> TCP_RTT_SHIFT
); 
1212                 tp
->snd_nxt 
= tp
->snd_una
; 
1214                  * Note:  We overload snd_recover to function also as the 
1215                  * snd_last variable described in RFC 2582 
1217                 tp
->snd_recover 
= tp
->snd_max
; 
1219                  * Force a segment to be sent. 
1221                 tp
->t_flags 
|= TF_ACKNOW
; 
1223                 /* If timing a segment in this window, stop the timer */ 
1226                 if (!IN_FASTRECOVERY(tp
) && tp
->t_rxtshift 
== 1) { 
1227                         tcpstat
.tcps_tailloss_rto
++; 
1232                  * RFC 5681 says: when a TCP sender detects segment loss 
1233                  * using retransmit timer and the given segment has already 
1234                  * been retransmitted by way of the retransmission timer at 
1235                  * least once, the value of ssthresh is held constant 
1237                 if (tp
->t_rxtshift 
== 1 && 
1238                     CC_ALGO(tp
)->after_timeout 
!= NULL
) { 
1239                         CC_ALGO(tp
)->after_timeout(tp
); 
1241                          * CWR notifications are to be sent on new data 
1242                          * right after Fast Retransmits and ECE 
1243                          * notification receipts. 
1245                         if (TCP_ECN_ENABLED(tp
)) { 
1246                                 tp
->ecn_flags 
|= TE_SENDCWR
; 
1250                 EXIT_FASTRECOVERY(tp
); 
1252                 /* Exit cwnd non validated phase */ 
1253                 tp
->t_flagsext 
&= ~TF_CWND_NONVALIDATED
; 
1257                 tcp_ccdbg_trace(tp
, NULL
, TCP_CC_REXMT_TIMEOUT
); 
1259                 (void) tcp_output(tp
); 
1263          * Persistance timer into zero window. 
1264          * Force a byte to be output, if possible. 
1267                 tcpstat
.tcps_persisttimeo
++; 
1269                  * Hack: if the peer is dead/unreachable, we do not 
1270                  * time out if the window is closed.  After a full 
1271                  * backoff, drop the connection if the idle time 
1272                  * (no responses to probes) reaches the maximum 
1273                  * backoff that we would use if retransmitting. 
1275                  * Drop the connection if we reached the maximum allowed time for 
1276                  * Zero Window Probes without a non-zero update from the peer. 
1277                  * See rdar://5805356 
1279                 if ((tp
->t_rxtshift 
== TCP_MAXRXTSHIFT 
&& 
1280                     (idle_time 
>= tcp_maxpersistidle 
|| 
1281                     idle_time 
>= TCP_REXMTVAL(tp
) * tcp_totbackoff
)) || 
1282                     ((tp
->t_persist_stop 
!= 0) && 
1283                     TSTMP_LEQ(tp
->t_persist_stop
, tcp_now
))) { 
1284                         tcpstat
.tcps_persistdrop
++; 
1285                         postevent(so
, 0, EV_TIMEOUT
); 
1287                             (SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_TIMEOUT
)); 
1288                         tp 
= tcp_drop(tp
, ETIMEDOUT
); 
1292                 tp
->t_flagsext 
|= TF_FORCE
; 
1293                 (void) tcp_output(tp
); 
1294                 tp
->t_flagsext 
&= ~TF_FORCE
; 
1298          * Keep-alive timer went off; send something 
1299          * or drop connection if idle for too long. 
1302                 tcpstat
.tcps_keeptimeo
++; 
1305                  * Regular TCP connections do not send keepalives after closing 
1306                  * MPTCP must not also, after sending Data FINs. 
1308                 struct mptcb 
*mp_tp 
= tptomptp(tp
); 
1309                 if ((tp
->t_mpflags 
& TMPF_MPTCP_TRUE
) && 
1310                     (tp
->t_state 
> TCPS_ESTABLISHED
)) { 
1312                 } else if (mp_tp 
!= NULL
) { 
1313                         if ((mptcp_ok_to_keepalive(mp_tp
) == 0)) { 
1318                 if (tp
->t_state 
< TCPS_ESTABLISHED
) { 
1321                 if ((always_keepalive 
|| 
1322                     (tp
->t_inpcb
->inp_socket
->so_options 
& SO_KEEPALIVE
) || 
1323                     (tp
->t_flagsext 
& TF_DETECT_READSTALL
) || 
1324                     (tp
->t_tfo_probe_state 
== TFO_PROBE_PROBING
)) && 
1325                     (tp
->t_state 
<= TCPS_CLOSING 
|| tp
->t_state 
== TCPS_FIN_WAIT_2
)) { 
1326                         if (idle_time 
>= TCP_CONN_KEEPIDLE(tp
) + TCP_CONN_MAXIDLE(tp
)) { 
1330                          * Send a packet designed to force a response 
1331                          * if the peer is up and reachable: 
1332                          * either an ACK if the connection is still alive, 
1333                          * or an RST if the peer has closed the connection 
1334                          * due to timeout or reboot. 
1335                          * Using sequence number tp->snd_una-1 
1336                          * causes the transmitted zero-length segment 
1337                          * to lie outside the receive window; 
1338                          * by the protocol spec, this requires the 
1339                          * correspondent TCP to respond. 
1341                         tcpstat
.tcps_keepprobe
++; 
1342                         t_template 
= tcp_maketemplate(tp
); 
1344                                 struct inpcb 
*inp 
= tp
->t_inpcb
; 
1345                                 struct tcp_respond_args tra
; 
1347                                 bzero(&tra
, sizeof(tra
)); 
1348                                 tra
.nocell 
= INP_NO_CELLULAR(inp
); 
1349                                 tra
.noexpensive 
= INP_NO_EXPENSIVE(inp
); 
1350                                 tra
.awdl_unrestricted 
= INP_AWDL_UNRESTRICTED(inp
); 
1351                                 tra
.intcoproc_allowed 
= INP_INTCOPROC_ALLOWED(inp
); 
1352                                 if (tp
->t_inpcb
->inp_flags 
& INP_BOUND_IF
) { 
1353                                         tra
.ifscope 
= tp
->t_inpcb
->inp_boundifp
->if_index
; 
1355                                         tra
.ifscope 
= IFSCOPE_NONE
; 
1357                                 tcp_respond(tp
, t_template
->tt_ipgen
, 
1358                                     &t_template
->tt_t
, (struct mbuf 
*)NULL
, 
1359                                     tp
->rcv_nxt
, tp
->snd_una 
- 1, 0, &tra
); 
1360                                 (void) m_free(dtom(t_template
)); 
1361                                 if (tp
->t_flagsext 
& TF_DETECT_READSTALL
) { 
1362                                         tp
->t_rtimo_probes
++; 
1365                         tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START(tp
, 
1366                             TCP_CONN_KEEPINTVL(tp
)); 
1368                         tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START(tp
, 
1369                             TCP_CONN_KEEPIDLE(tp
)); 
1371                 if (tp
->t_flagsext 
& TF_DETECT_READSTALL
) { 
1372                         struct ifnet 
*outifp 
= tp
->t_inpcb
->inp_last_outifp
; 
1373                         bool reenable_probe 
= false; 
1375                          * The keep alive packets sent to detect a read 
1376                          * stall did not get a response from the 
1377                          * peer. Generate more keep-alives to confirm this. 
1378                          * If the number of probes sent reaches the limit, 
1379                          * generate an event. 
1381                         if (tp
->t_adaptive_rtimo 
> 0) { 
1382                                 if (tp
->t_rtimo_probes 
> tp
->t_adaptive_rtimo
) { 
1383                                         /* Generate an event */ 
1385                                             (SO_FILT_HINT_LOCKED 
| 
1386                                             SO_FILT_HINT_ADAPTIVE_RTIMO
)); 
1387                                         tcp_keepalive_reset(tp
); 
1389                                         reenable_probe 
= true; 
1391                         } else if (outifp 
!= NULL 
&& 
1392                             (outifp
->if_eflags 
& IFEF_PROBE_CONNECTIVITY
) && 
1393                             tp
->t_rtimo_probes 
<= TCP_CONNECTIVITY_PROBES_MAX
) { 
1394                                 reenable_probe 
= true; 
1396                                 tp
->t_flagsext 
&= ~TF_DETECT_READSTALL
; 
1398                         if (reenable_probe
) { 
1399                                 int ind 
= min(tp
->t_rtimo_probes
, 
1401                                 tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START( 
1402                                         tp
, tcp_backoff
[ind
] * TCP_REXMTVAL(tp
)); 
1405                 if (tp
->t_tfo_probe_state 
== TFO_PROBE_PROBING
) { 
1409                         ind 
= min(tp
->t_tfo_probes
, TCP_MAXRXTSHIFT
); 
1412                          * We take the minimum among the time set by true 
1413                          * keepalive (see above) and the backoff'd RTO. That 
1414                          * way we backoff in case of packet-loss but will never 
1415                          * timeout slower than regular keepalive due to the 
1418                         tp
->t_timer
[TCPT_KEEP
] = min(OFFSET_FROM_START( 
1419                                     tp
, tcp_backoff
[ind
] * TCP_REXMTVAL(tp
)), 
1420                             tp
->t_timer
[TCPT_KEEP
]); 
1421                 } else if (!(tp
->t_tfo_flags 
& TFO_F_HEURISTIC_DONE
) && 
1422                     tp
->t_tfo_probe_state 
== TFO_PROBE_WAIT_DATA
) { 
1423                         /* Still no data! Let's assume a TFO-error and err out... */ 
1424                         tcp_heuristic_tfo_middlebox(tp
); 
1426                         so
->so_error 
= ENODATA
; 
1428                         tp
->t_tfo_stats 
|= TFO_S_RECV_BLACKHOLE
; 
1429                         tcpstat
.tcps_tfo_blackhole
++; 
1433                 if (tcp_delack_enabled 
&& (tp
->t_flags 
& TF_DELACK
)) { 
1434                         tp
->t_flags 
&= ~TF_DELACK
; 
1435                         tp
->t_timer
[TCPT_DELACK
] = 0; 
1436                         tp
->t_flags 
|= TF_ACKNOW
; 
1439                          * If delayed ack timer fired while stretching 
1440                          * acks, count the number of times the streaming 
1441                          * detection was not correct. If this exceeds a 
1442                          * threshold, disable strech ack on this 
1445                          * Also, go back to acking every other packet. 
1447                         if ((tp
->t_flags 
& TF_STRETCHACK
)) { 
1448                                 if (tp
->t_unacksegs 
> 1 && 
1449                                     tp
->t_unacksegs 
< maxseg_unacked
) { 
1450                                         tp
->t_stretchack_delayed
++; 
1453                                 if (tp
->t_stretchack_delayed 
> 
1454                                     TCP_STRETCHACK_DELAY_THRESHOLD
) { 
1455                                         tp
->t_flagsext 
|= TF_DISABLE_STRETCHACK
; 
1457                                          * Note the time at which stretch 
1458                                          * ack was disabled automatically 
1460                                         tp
->rcv_nostrack_ts 
= tcp_now
; 
1461                                         tcpstat
.tcps_nostretchack
++; 
1462                                         tp
->t_stretchack_delayed 
= 0; 
1463                                         tp
->rcv_nostrack_pkts 
= 0; 
1465                                 tcp_reset_stretch_ack(tp
); 
1469                          * If we are measuring inter packet arrival jitter 
1470                          * for throttling a connection, this delayed ack 
1471                          * might be the reason for accumulating some 
1472                          * jitter. So let's restart the measurement. 
1474                         CLEAR_IAJ_STATE(tp
); 
1476                         tcpstat
.tcps_delack
++; 
1477                         (void) tcp_output(tp
); 
1482         case TCPT_JACK_RXMT
: 
1483                 if ((tp
->t_state 
== TCPS_ESTABLISHED
) && 
1484                     (tp
->t_mpflags 
& TMPF_PREESTABLISHED
) && 
1485                     (tp
->t_mpflags 
& TMPF_JOINED_FLOW
)) { 
1486                         if (++tp
->t_mprxtshift 
> TCP_MAXRXTSHIFT
) { 
1487                                 tcpstat
.tcps_timeoutdrop
++; 
1488                                 postevent(so
, 0, EV_TIMEOUT
); 
1490                                     (SO_FILT_HINT_LOCKED 
| 
1491                                     SO_FILT_HINT_TIMEOUT
)); 
1492                                 tp 
= tcp_drop(tp
, tp
->t_softerror 
? 
1493                                     tp
->t_softerror 
: ETIMEDOUT
); 
1496                         tcpstat
.tcps_join_rxmts
++; 
1497                         tp
->t_mpflags 
|= TMPF_SND_JACK
; 
1498                         tp
->t_flags 
|= TF_ACKNOW
; 
1501                          * No backoff is implemented for simplicity for this 
1504                         (void) tcp_output(tp
); 
1512                 tp
->t_flagsext 
&= ~(TF_SENT_TLPROBE
); 
1515                  * Check if the connection is in the right state to 
1518                 if (tp
->t_state 
!= TCPS_ESTABLISHED 
|| 
1519                     (tp
->t_rxtshift 
> 0 && !(tp
->t_flagsext 
& TF_PROBING
)) || 
1520                     tp
->snd_max 
== tp
->snd_una 
|| 
1521                     !SACK_ENABLED(tp
) || 
1522                     !TAILQ_EMPTY(&tp
->snd_holes
) || 
1523                     IN_FASTRECOVERY(tp
)) { 
1528                  * If there is no new data to send or if the 
1529                  * connection is limited by receive window then 
1530                  * retransmit the last segment, otherwise send 
1533                 snd_len 
= min(so
->so_snd
.sb_cc
, tp
->snd_wnd
) 
1534                     - (tp
->snd_max 
- tp
->snd_una
); 
1536                         tp
->snd_nxt 
= tp
->snd_max
; 
1538                         snd_len 
= min((tp
->snd_max 
- tp
->snd_una
), 
1540                         tp
->snd_nxt 
= tp
->snd_max 
- snd_len
; 
1544                 if (tp
->t_flagsext 
& TF_PROBING
) { 
1545                         tcpstat
.tcps_probe_if
++; 
1548                 /* If timing a segment in this window, stop the timer */ 
1550                 /* Note that tail loss probe is being sent */ 
1551                 tp
->t_flagsext 
|= TF_SENT_TLPROBE
; 
1552                 tp
->t_tlpstart 
= tcp_now
; 
1554                 tp
->snd_cwnd 
+= tp
->t_maxseg
; 
1557                  * When tail-loss-probe fires, we reset the RTO timer, because 
1558                  * a probe just got sent, so we are good to push out the timer. 
1560                  * Set to 0 to ensure that tcp_output() will reschedule it 
1562                 tp
->t_timer
[TCPT_REXMT
] = 0; 
1564                 (void)tcp_output(tp
); 
1565                 tp
->snd_cwnd 
-= tp
->t_maxseg
; 
1567                 tp
->t_tlphighrxt 
= tp
->snd_nxt
; 
1571                 tp
->t_flagsext 
&= ~TF_DELAY_RECOVERY
; 
1574                  * Don't do anything if one of the following is true: 
1575                  * - the connection is already in recovery 
1576                  * - sequence until snd_recover has been acknowledged. 
1577                  * - retransmit timeout has fired 
1579                 if (IN_FASTRECOVERY(tp
) || 
1580                     SEQ_GEQ(tp
->snd_una
, tp
->snd_recover
) || 
1581                     tp
->t_rxtshift 
> 0) { 
1585                 VERIFY(SACK_ENABLED(tp
)); 
1586                 tcp_rexmt_save_state(tp
); 
1587                 if (CC_ALGO(tp
)->pre_fr 
!= NULL
) { 
1588                         CC_ALGO(tp
)->pre_fr(tp
); 
1589                         if (TCP_ECN_ENABLED(tp
)) { 
1590                                 tp
->ecn_flags 
|= TE_SENDCWR
; 
1593                 ENTER_FASTRECOVERY(tp
); 
1595                 tp
->t_timer
[TCPT_REXMT
] = 0; 
1596                 tcpstat
.tcps_sack_recovery_episode
++; 
1597                 tp
->t_sack_recovery_episode
++; 
1598                 tp
->sack_newdata 
= tp
->snd_nxt
; 
1599                 tp
->snd_cwnd 
= tp
->t_maxseg
; 
1600                 tcp_ccdbg_trace(tp
, NULL
, TCP_CC_ENTER_FASTRECOVERY
); 
1601                 (void) tcp_output(tp
); 
1604                 tcpstat
.tcps_keepdrops
++; 
1605                 postevent(so
, 0, EV_TIMEOUT
); 
1607                     (SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_TIMEOUT
)); 
1608                 tp 
= tcp_drop(tp
, ETIMEDOUT
); 
1612         if (tp
->t_inpcb
->inp_socket
->so_options 
& SO_DEBUG
) { 
1613                 tcp_trace(TA_USER
, ostate
, tp
, (void *)0, (struct tcphdr 
*)0, 
1620 /* Remove a timer entry from timer list */ 
1622 tcp_remove_timer(struct tcpcb 
*tp
) 
1624         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1626         socket_lock_assert_owned(tp
->t_inpcb
->inp_socket
); 
1627         if (!(TIMER_IS_ON_LIST(tp
))) { 
1630         lck_mtx_lock(listp
->mtx
); 
1632         /* Check if pcb is on timer list again after acquiring the lock */ 
1633         if (!(TIMER_IS_ON_LIST(tp
))) { 
1634                 lck_mtx_unlock(listp
->mtx
); 
1638         if (listp
->next_te 
!= NULL 
&& listp
->next_te 
== &tp
->tentry
) { 
1639                 listp
->next_te 
= LIST_NEXT(&tp
->tentry
, le
); 
1642         LIST_REMOVE(&tp
->tentry
, le
); 
1643         tp
->t_flags 
&= ~(TF_TIMER_ONLIST
); 
1647         tp
->tentry
.le
.le_next 
= NULL
; 
1648         tp
->tentry
.le
.le_prev 
= NULL
; 
1649         lck_mtx_unlock(listp
->mtx
); 
1653  * Function to check if the timerlist needs to be rescheduled to run 
1654  * the timer entry correctly. Basically, this is to check if we can avoid 
1655  * taking the list lock. 
1659 need_to_resched_timerlist(u_int32_t runtime
, u_int16_t mode
) 
1661         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1665          * If the list is being processed then the state of the list is 
1666          * in flux. In this case always acquire the lock and set the state 
1669         if (listp
->running
) { 
1673         if (!listp
->scheduled
) { 
1677         diff 
= timer_diff(listp
->runtime
, 0, runtime
, 0); 
1679                 /* The list is going to run before this timer */ 
1682                 if (mode 
& TCP_TIMERLIST_10MS_MODE
) { 
1683                         if (diff 
<= TCP_TIMER_10MS_QUANTUM
) { 
1686                 } else if (mode 
& TCP_TIMERLIST_100MS_MODE
) { 
1687                         if (diff 
<= TCP_TIMER_100MS_QUANTUM
) { 
1691                         if (diff 
<= TCP_TIMER_500MS_QUANTUM
) { 
1700 tcp_sched_timerlist(uint32_t offset
) 
1702         uint64_t deadline 
= 0; 
1703         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1705         LCK_MTX_ASSERT(listp
->mtx
, LCK_MTX_ASSERT_OWNED
); 
1707         offset 
= min(offset
, TCP_TIMERLIST_MAX_OFFSET
); 
1708         listp
->runtime 
= tcp_now 
+ offset
; 
1709         listp
->schedtime 
= tcp_now
; 
1710         if (listp
->runtime 
== 0) { 
1715         clock_interval_to_deadline(offset
, USEC_PER_SEC
, &deadline
); 
1717         thread_call_enter_delayed(listp
->call
, deadline
); 
1718         listp
->scheduled 
= TRUE
; 
1722  * Function to run the timers for a connection. 
1724  * Returns the offset of next timer to be run for this connection which 
1725  * can be used to reschedule the timerlist. 
1727  * te_mode is an out parameter that indicates the modes of active 
1728  * timers for this connection. 
1731 tcp_run_conn_timer(struct tcpcb 
*tp
, u_int16_t 
*te_mode
, 
1732     u_int16_t probe_if_index
) 
1735         u_int16_t i 
= 0, index 
= TCPT_NONE
, lo_index 
= TCPT_NONE
; 
1736         u_int32_t timer_val
, offset 
= 0, lo_timer 
= 0; 
1738         boolean_t needtorun
[TCPT_NTIMERS
]; 
1742         bzero(needtorun
, sizeof(needtorun
)); 
1745         socket_lock(tp
->t_inpcb
->inp_socket
, 1); 
1747         so 
= tp
->t_inpcb
->inp_socket
; 
1748         /* Release the want count on inp */ 
1749         if (in_pcb_checkstate(tp
->t_inpcb
, WNT_RELEASE
, 1) 
1751                 if (TIMER_IS_ON_LIST(tp
)) { 
1752                         tcp_remove_timer(tp
); 
1755                 /* Looks like the TCP connection got closed while we 
1756                  * were waiting for the lock.. Done 
1762          * If this connection is over an interface that needs to 
1763          * be probed, send probe packets to reinitiate communication. 
1765         if (probe_if_index 
> 0 && tp
->t_inpcb
->inp_last_outifp 
!= NULL 
&& 
1766             tp
->t_inpcb
->inp_last_outifp
->if_index 
== probe_if_index
) { 
1767                 tp
->t_flagsext 
|= TF_PROBING
; 
1768                 tcp_timers(tp
, TCPT_PTO
); 
1769                 tp
->t_timer
[TCPT_PTO
] = 0; 
1770                 tp
->t_flagsext 
&= ~TF_PROBING
; 
1774          * Since the timer thread needs to wait for tcp lock, it may race 
1775          * with another thread that can cancel or reschedule the timer 
1776          * that is about to run. Check if we need to run anything. 
1778         if ((index 
= tp
->tentry
.index
) == TCPT_NONE
) { 
1782         timer_val 
= tp
->t_timer
[index
]; 
1784         diff 
= timer_diff(tp
->tentry
.runtime
, 0, tcp_now
, 0); 
1786                 if (tp
->tentry
.index 
!= TCPT_NONE
) { 
1788                         *(te_mode
) = tp
->tentry
.mode
; 
1793         tp
->t_timer
[index
] = 0; 
1794         if (timer_val 
> 0) { 
1795                 tp 
= tcp_timers(tp
, index
); 
1802          * Check if there are any other timers that need to be run. 
1803          * While doing it, adjust the timer values wrt tcp_now. 
1805         tp
->tentry
.mode 
= 0; 
1806         for (i 
= 0; i 
< TCPT_NTIMERS
; ++i
) { 
1807                 if (tp
->t_timer
[i
] != 0) { 
1808                         diff 
= timer_diff(tp
->tentry
.timer_start
, 
1809                             tp
->t_timer
[i
], tcp_now
, 0); 
1811                                 needtorun
[i
] = TRUE
; 
1814                                 tp
->t_timer
[i
] = diff
; 
1815                                 needtorun
[i
] = FALSE
; 
1816                                 if (lo_timer 
== 0 || diff 
< lo_timer
) { 
1820                                 TCP_SET_TIMER_MODE(tp
->tentry
.mode
, i
); 
1825         tp
->tentry
.timer_start 
= tcp_now
; 
1826         tp
->tentry
.index 
= lo_index
; 
1827         VERIFY(tp
->tentry
.index 
== TCPT_NONE 
|| tp
->tentry
.mode 
> 0); 
1829         if (tp
->tentry
.index 
!= TCPT_NONE
) { 
1830                 tp
->tentry
.runtime 
= tp
->tentry
.timer_start 
+ 
1831                     tp
->t_timer
[tp
->tentry
.index
]; 
1832                 if (tp
->tentry
.runtime 
== 0) { 
1833                         tp
->tentry
.runtime
++; 
1838                 /* run any other timers outstanding at this time. */ 
1839                 for (i 
= 0; i 
< TCPT_NTIMERS
; ++i
) { 
1842                                 tp 
= tcp_timers(tp
, i
); 
1850                 tcp_set_lotimer_index(tp
); 
1853         if (tp
->tentry
.index 
< TCPT_NONE
) { 
1854                 offset 
= tp
->t_timer
[tp
->tentry
.index
]; 
1855                 *(te_mode
) = tp
->tentry
.mode
; 
1859         if (tp 
!= NULL 
&& tp
->tentry
.index 
== TCPT_NONE
) { 
1860                 tcp_remove_timer(tp
); 
1864         socket_unlock(so
, 1); 
1869 tcp_run_timerlist(void * arg1
, void * arg2
) 
1871 #pragma unused(arg1, arg2) 
1872         struct tcptimerentry 
*te
, *next_te
; 
1873         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1875         uint32_t next_timer 
= 0; /* offset of the next timer on the list */ 
1876         u_int16_t te_mode 
= 0;  /* modes of all active timers in a tcpcb */ 
1877         u_int16_t list_mode 
= 0; /* cumulative of modes of all tcpcbs */ 
1878         uint32_t active_count 
= 0; 
1880         calculate_tcp_clock(); 
1882         lck_mtx_lock(listp
->mtx
); 
1884         int32_t drift 
= tcp_now 
- listp
->runtime
; 
1886                 tcpstat
.tcps_timer_drift_le_1_ms
++; 
1887         } else if (drift 
<= 10) { 
1888                 tcpstat
.tcps_timer_drift_le_10_ms
++; 
1889         } else if (drift 
<= 20) { 
1890                 tcpstat
.tcps_timer_drift_le_20_ms
++; 
1891         } else if (drift 
<= 50) { 
1892                 tcpstat
.tcps_timer_drift_le_50_ms
++; 
1893         } else if (drift 
<= 100) { 
1894                 tcpstat
.tcps_timer_drift_le_100_ms
++; 
1895         } else if (drift 
<= 200) { 
1896                 tcpstat
.tcps_timer_drift_le_200_ms
++; 
1897         } else if (drift 
<= 500) { 
1898                 tcpstat
.tcps_timer_drift_le_500_ms
++; 
1899         } else if (drift 
<= 1000) { 
1900                 tcpstat
.tcps_timer_drift_le_1000_ms
++; 
1902                 tcpstat
.tcps_timer_drift_gt_1000_ms
++; 
1905         listp
->running 
= TRUE
; 
1907         LIST_FOREACH_SAFE(te
, &listp
->lhead
, le
, next_te
) { 
1908                 uint32_t offset 
= 0; 
1909                 uint32_t runtime 
= te
->runtime
; 
1910                 if (te
->index 
< TCPT_NONE 
&& TSTMP_GT(runtime
, tcp_now
)) { 
1911                         offset 
= timer_diff(runtime
, 0, tcp_now
, 0); 
1912                         if (next_timer 
== 0 || offset 
< next_timer
) { 
1913                                 next_timer 
= offset
; 
1915                         list_mode 
|= te
->mode
; 
1919                 tp 
= TIMERENTRY_TO_TP(te
); 
1922                  * Acquire an inp wantcnt on the inpcb so that the socket 
1923                  * won't get detached even if tcp_close is called 
1925                 if (in_pcb_checkstate(tp
->t_inpcb
, WNT_ACQUIRE
, 0) 
1928                          * Some how this pcb went into dead state while 
1929                          * on the timer list, just take it off the list. 
1930                          * Since the timer list entry pointers are 
1931                          * protected by the timer list lock, we can 
1932                          * do it here without the socket lock. 
1934                         if (TIMER_IS_ON_LIST(tp
)) { 
1935                                 tp
->t_flags 
&= ~(TF_TIMER_ONLIST
); 
1936                                 LIST_REMOVE(&tp
->tentry
, le
); 
1939                                 tp
->tentry
.le
.le_next 
= NULL
; 
1940                                 tp
->tentry
.le
.le_prev 
= NULL
; 
1947                  * Store the next timerentry pointer before releasing the 
1948                  * list lock. If that entry has to be removed when we 
1949                  * release the lock, this pointer will be updated to the 
1950                  * element after that. 
1952                 listp
->next_te 
= next_te
; 
1954                 VERIFY_NEXT_LINK(&tp
->tentry
, le
); 
1955                 VERIFY_PREV_LINK(&tp
->tentry
, le
); 
1957                 lck_mtx_unlock(listp
->mtx
); 
1959                 offset 
= tcp_run_conn_timer(tp
, &te_mode
, 
1960                     listp
->probe_if_index
); 
1962                 lck_mtx_lock(listp
->mtx
); 
1964                 next_te 
= listp
->next_te
; 
1965                 listp
->next_te 
= NULL
; 
1967                 if (offset 
> 0 && te_mode 
!= 0) { 
1968                         list_mode 
|= te_mode
; 
1970                         if (next_timer 
== 0 || offset 
< next_timer
) { 
1971                                 next_timer 
= offset
; 
1976         if (!LIST_EMPTY(&listp
->lhead
)) { 
1977                 u_int16_t next_mode 
= 0; 
1978                 if ((list_mode 
& TCP_TIMERLIST_10MS_MODE
) || 
1979                     (listp
->pref_mode 
& TCP_TIMERLIST_10MS_MODE
)) { 
1980                         next_mode 
= TCP_TIMERLIST_10MS_MODE
; 
1981                 } else if ((list_mode 
& TCP_TIMERLIST_100MS_MODE
) || 
1982                     (listp
->pref_mode 
& TCP_TIMERLIST_100MS_MODE
)) { 
1983                         next_mode 
= TCP_TIMERLIST_100MS_MODE
; 
1985                         next_mode 
= TCP_TIMERLIST_500MS_MODE
; 
1988                 if (next_mode 
!= TCP_TIMERLIST_500MS_MODE
) { 
1989                         listp
->idleruns 
= 0; 
1992                          * the next required mode is slow mode, but if 
1993                          * the last one was a faster mode and we did not 
1994                          * have enough idle runs, repeat the last mode. 
1996                          * We try to keep the timer list in fast mode for 
1997                          * some idle time in expectation of new data. 
1999                         if (listp
->mode 
!= next_mode 
&& 
2000                             listp
->idleruns 
< timer_fastmode_idlemax
) { 
2002                                 next_mode 
= listp
->mode
; 
2003                                 next_timer 
= TCP_TIMER_100MS_QUANTUM
; 
2005                                 listp
->idleruns 
= 0; 
2008                 listp
->mode 
= next_mode
; 
2009                 if (listp
->pref_offset 
!= 0) { 
2010                         next_timer 
= min(listp
->pref_offset
, next_timer
); 
2013                 if (listp
->mode 
== TCP_TIMERLIST_500MS_MODE
) { 
2014                         next_timer 
= max(next_timer
, 
2015                             TCP_TIMER_500MS_QUANTUM
); 
2018                 tcp_sched_timerlist(next_timer
); 
2021                  * No need to reschedule this timer, but always run 
2022                  * periodically at a much higher granularity. 
2024                 tcp_sched_timerlist(TCP_TIMERLIST_MAX_OFFSET
); 
2027         listp
->running 
= FALSE
; 
2028         listp
->pref_mode 
= 0; 
2029         listp
->pref_offset 
= 0; 
2030         listp
->probe_if_index 
= 0; 
2032         lck_mtx_unlock(listp
->mtx
); 
2036  * Function to check if the timerlist needs to be rescheduled to run this 
2037  * connection's timers correctly. 
2040 tcp_sched_timers(struct tcpcb 
*tp
) 
2042         struct tcptimerentry 
*te 
= &tp
->tentry
; 
2043         u_int16_t index 
= te
->index
; 
2044         u_int16_t mode 
= te
->mode
; 
2045         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
2047         boolean_t list_locked 
= FALSE
; 
2049         if (tp
->t_inpcb
->inp_state 
== INPCB_STATE_DEAD
) { 
2050                 /* Just return without adding the dead pcb to the list */ 
2051                 if (TIMER_IS_ON_LIST(tp
)) { 
2052                         tcp_remove_timer(tp
); 
2057         if (index 
== TCPT_NONE
) { 
2058                 /* Nothing to run */ 
2059                 tcp_remove_timer(tp
); 
2064          * compute the offset at which the next timer for this connection 
2067         offset 
= timer_diff(te
->runtime
, 0, tcp_now
, 0); 
2070                 tcp_timer_advanced
++; 
2073         if (!TIMER_IS_ON_LIST(tp
)) { 
2075                         lck_mtx_lock(listp
->mtx
); 
2079                 if (!TIMER_IS_ON_LIST(tp
)) { 
2080                         LIST_INSERT_HEAD(&listp
->lhead
, te
, le
); 
2081                         tp
->t_flags 
|= TF_TIMER_ONLIST
; 
2084                         if (listp
->entries 
> listp
->maxentries
) { 
2085                                 listp
->maxentries 
= listp
->entries
; 
2088                         /* if the list is not scheduled, just schedule it */ 
2089                         if (!listp
->scheduled
) { 
2096          * Timer entry is currently on the list, check if the list needs 
2097          * to be rescheduled. 
2099         if (need_to_resched_timerlist(te
->runtime
, mode
)) { 
2100                 tcp_resched_timerlist
++; 
2103                         lck_mtx_lock(listp
->mtx
); 
2107                 VERIFY_NEXT_LINK(te
, le
); 
2108                 VERIFY_PREV_LINK(te
, le
); 
2110                 if (listp
->running
) { 
2111                         listp
->pref_mode 
|= mode
; 
2112                         if (listp
->pref_offset 
== 0 || 
2113                             offset 
< listp
->pref_offset
) { 
2114                                 listp
->pref_offset 
= offset
; 
2118                          * The list could have got rescheduled while 
2119                          * this thread was waiting for the lock 
2121                         if (listp
->scheduled
) { 
2123                                 diff 
= timer_diff(listp
->runtime
, 0, 
2139          * Since a connection with timers is getting scheduled, the timer 
2140          * list moves from idle to active state and that is why idlegen is 
2143         if (mode 
& TCP_TIMERLIST_10MS_MODE
) { 
2144                 listp
->mode 
= TCP_TIMERLIST_10MS_MODE
; 
2145                 listp
->idleruns 
= 0; 
2146                 offset 
= min(offset
, TCP_TIMER_10MS_QUANTUM
); 
2147         } else if (mode 
& TCP_TIMERLIST_100MS_MODE
) { 
2148                 if (listp
->mode 
> TCP_TIMERLIST_100MS_MODE
) { 
2149                         listp
->mode 
= TCP_TIMERLIST_100MS_MODE
; 
2151                 listp
->idleruns 
= 0; 
2152                 offset 
= min(offset
, TCP_TIMER_100MS_QUANTUM
); 
2154         tcp_sched_timerlist(offset
); 
2158                 lck_mtx_unlock(listp
->mtx
); 
2165 tcp_set_lotimer_index(struct tcpcb 
*tp
) 
2167         uint16_t i
, lo_index 
= TCPT_NONE
, mode 
= 0; 
2168         uint32_t lo_timer 
= 0; 
2169         for (i 
= 0; i 
< TCPT_NTIMERS
; ++i
) { 
2170                 if (tp
->t_timer
[i
] != 0) { 
2171                         TCP_SET_TIMER_MODE(mode
, i
); 
2172                         if (lo_timer 
== 0 || tp
->t_timer
[i
] < lo_timer
) { 
2173                                 lo_timer 
= tp
->t_timer
[i
]; 
2178         tp
->tentry
.index 
= lo_index
; 
2179         tp
->tentry
.mode 
= mode
; 
2180         VERIFY(tp
->tentry
.index 
== TCPT_NONE 
|| tp
->tentry
.mode 
> 0); 
2182         if (tp
->tentry
.index 
!= TCPT_NONE
) { 
2183                 tp
->tentry
.runtime 
= tp
->tentry
.timer_start
 
2184                     + tp
->t_timer
[tp
->tentry
.index
]; 
2185                 if (tp
->tentry
.runtime 
== 0) { 
2186                         tp
->tentry
.runtime
++; 
2192 tcp_check_timer_state(struct tcpcb 
*tp
) 
2194         socket_lock_assert_owned(tp
->t_inpcb
->inp_socket
); 
2196         if (tp
->t_inpcb
->inp_flags2 
& INP2_TIMEWAIT
) { 
2200         tcp_set_lotimer_index(tp
); 
2202         tcp_sched_timers(tp
); 
2207 tcp_cumulative_stat(u_int32_t cur
, u_int32_t 
*prev
, u_int32_t 
*dest
) 
2209         /* handle wrap around */ 
2210         int32_t diff 
= (int32_t) (cur 
- *prev
); 
2221 tcp_cumulative_stat64(u_int64_t cur
, u_int64_t 
*prev
, u_int64_t 
*dest
) 
2223         /* handle wrap around */ 
2224         int64_t diff 
= (int64_t) (cur 
- *prev
); 
2234 __private_extern__ 
void 
2235 tcp_report_stats(void) 
2237         struct nstat_sysinfo_data data
; 
2238         struct sockaddr_in dst
; 
2239         struct sockaddr_in6 dst6
; 
2240         struct rtentry 
*rt 
= NULL
; 
2241         static struct tcp_last_report_stats prev
; 
2242         u_int64_t var
, uptime
; 
2244 #define stat    data.u.tcp_stats 
2245         if (((uptime 
= net_uptime()) - tcp_last_report_time
) < 
2246             tcp_report_stats_interval
) { 
2250         tcp_last_report_time 
= uptime
; 
2252         bzero(&data
, sizeof(data
)); 
2253         data
.flags 
= NSTAT_SYSINFO_TCP_STATS
; 
2255         bzero(&dst
, sizeof(dst
)); 
2256         dst
.sin_len 
= sizeof(dst
); 
2257         dst
.sin_family 
= AF_INET
; 
2260         lck_mtx_lock(rnh_lock
); 
2261         rt 
=  rt_lookup(TRUE
, (struct sockaddr 
*)&dst
, NULL
, 
2262             rt_tables
[AF_INET
], IFSCOPE_NONE
); 
2263         lck_mtx_unlock(rnh_lock
); 
2266                 if (rt_primary_default(rt
, rt_key(rt
)) && 
2267                     rt
->rt_stats 
!= NULL
) { 
2268                         stat
.ipv4_avgrtt 
= rt
->rt_stats
->nstat_avg_rtt
; 
2276         bzero(&dst6
, sizeof(dst6
)); 
2277         dst6
.sin6_len 
= sizeof(dst6
); 
2278         dst6
.sin6_family 
= AF_INET6
; 
2280         lck_mtx_lock(rnh_lock
); 
2281         rt 
= rt_lookup(TRUE
, (struct sockaddr 
*)&dst6
, NULL
, 
2282             rt_tables
[AF_INET6
], IFSCOPE_NONE
); 
2283         lck_mtx_unlock(rnh_lock
); 
2286                 if (rt_primary_default(rt
, rt_key(rt
)) && 
2287                     rt
->rt_stats 
!= NULL
) { 
2288                         stat
.ipv6_avgrtt 
= rt
->rt_stats
->nstat_avg_rtt
; 
2295         /* send packet loss rate, shift by 10 for precision */ 
2296         if (tcpstat
.tcps_sndpack 
> 0 && tcpstat
.tcps_sndrexmitpack 
> 0) { 
2297                 var 
= tcpstat
.tcps_sndrexmitpack 
<< 10; 
2298                 stat
.send_plr 
= (var 
* 100) / tcpstat
.tcps_sndpack
; 
2301         /* recv packet loss rate, shift by 10 for precision */ 
2302         if (tcpstat
.tcps_rcvpack 
> 0 && tcpstat
.tcps_recovered_pkts 
> 0) { 
2303                 var 
= tcpstat
.tcps_recovered_pkts 
<< 10; 
2304                 stat
.recv_plr 
= (var 
* 100) / tcpstat
.tcps_rcvpack
; 
2307         /* RTO after tail loss, shift by 10 for precision */ 
2308         if (tcpstat
.tcps_sndrexmitpack 
> 0 
2309             && tcpstat
.tcps_tailloss_rto 
> 0) { 
2310                 var 
= tcpstat
.tcps_tailloss_rto 
<< 10; 
2311                 stat
.send_tlrto_rate 
= 
2312                     (var 
* 100) / tcpstat
.tcps_sndrexmitpack
; 
2315         /* packet reordering */ 
2316         if (tcpstat
.tcps_sndpack 
> 0 && tcpstat
.tcps_reordered_pkts 
> 0) { 
2317                 var 
= tcpstat
.tcps_reordered_pkts 
<< 10; 
2318                 stat
.send_reorder_rate 
= 
2319                     (var 
* 100) / tcpstat
.tcps_sndpack
; 
2322         if (tcp_ecn_outbound 
== 1) { 
2323                 stat
.ecn_client_enabled 
= 1; 
2325         if (tcp_ecn_inbound 
== 1) { 
2326                 stat
.ecn_server_enabled 
= 1; 
2328         tcp_cumulative_stat(tcpstat
.tcps_connattempt
, 
2329             &prev
.tcps_connattempt
, &stat
.connection_attempts
); 
2330         tcp_cumulative_stat(tcpstat
.tcps_accepts
, 
2331             &prev
.tcps_accepts
, &stat
.connection_accepts
); 
2332         tcp_cumulative_stat(tcpstat
.tcps_ecn_client_setup
, 
2333             &prev
.tcps_ecn_client_setup
, &stat
.ecn_client_setup
); 
2334         tcp_cumulative_stat(tcpstat
.tcps_ecn_server_setup
, 
2335             &prev
.tcps_ecn_server_setup
, &stat
.ecn_server_setup
); 
2336         tcp_cumulative_stat(tcpstat
.tcps_ecn_client_success
, 
2337             &prev
.tcps_ecn_client_success
, &stat
.ecn_client_success
); 
2338         tcp_cumulative_stat(tcpstat
.tcps_ecn_server_success
, 
2339             &prev
.tcps_ecn_server_success
, &stat
.ecn_server_success
); 
2340         tcp_cumulative_stat(tcpstat
.tcps_ecn_not_supported
, 
2341             &prev
.tcps_ecn_not_supported
, &stat
.ecn_not_supported
); 
2342         tcp_cumulative_stat(tcpstat
.tcps_ecn_lost_syn
, 
2343             &prev
.tcps_ecn_lost_syn
, &stat
.ecn_lost_syn
); 
2344         tcp_cumulative_stat(tcpstat
.tcps_ecn_lost_synack
, 
2345             &prev
.tcps_ecn_lost_synack
, &stat
.ecn_lost_synack
); 
2346         tcp_cumulative_stat(tcpstat
.tcps_ecn_recv_ce
, 
2347             &prev
.tcps_ecn_recv_ce
, &stat
.ecn_recv_ce
); 
2348         tcp_cumulative_stat(tcpstat
.tcps_ecn_recv_ece
, 
2349             &prev
.tcps_ecn_recv_ece
, &stat
.ecn_recv_ece
); 
2350         tcp_cumulative_stat(tcpstat
.tcps_ecn_recv_ece
, 
2351             &prev
.tcps_ecn_recv_ece
, &stat
.ecn_recv_ece
); 
2352         tcp_cumulative_stat(tcpstat
.tcps_ecn_sent_ece
, 
2353             &prev
.tcps_ecn_sent_ece
, &stat
.ecn_sent_ece
); 
2354         tcp_cumulative_stat(tcpstat
.tcps_ecn_sent_ece
, 
2355             &prev
.tcps_ecn_sent_ece
, &stat
.ecn_sent_ece
); 
2356         tcp_cumulative_stat(tcpstat
.tcps_ecn_conn_recv_ce
, 
2357             &prev
.tcps_ecn_conn_recv_ce
, &stat
.ecn_conn_recv_ce
); 
2358         tcp_cumulative_stat(tcpstat
.tcps_ecn_conn_recv_ece
, 
2359             &prev
.tcps_ecn_conn_recv_ece
, &stat
.ecn_conn_recv_ece
); 
2360         tcp_cumulative_stat(tcpstat
.tcps_ecn_conn_plnoce
, 
2361             &prev
.tcps_ecn_conn_plnoce
, &stat
.ecn_conn_plnoce
); 
2362         tcp_cumulative_stat(tcpstat
.tcps_ecn_conn_pl_ce
, 
2363             &prev
.tcps_ecn_conn_pl_ce
, &stat
.ecn_conn_pl_ce
); 
2364         tcp_cumulative_stat(tcpstat
.tcps_ecn_conn_nopl_ce
, 
2365             &prev
.tcps_ecn_conn_nopl_ce
, &stat
.ecn_conn_nopl_ce
); 
2366         tcp_cumulative_stat(tcpstat
.tcps_ecn_fallback_synloss
, 
2367             &prev
.tcps_ecn_fallback_synloss
, &stat
.ecn_fallback_synloss
); 
2368         tcp_cumulative_stat(tcpstat
.tcps_ecn_fallback_reorder
, 
2369             &prev
.tcps_ecn_fallback_reorder
, &stat
.ecn_fallback_reorder
); 
2370         tcp_cumulative_stat(tcpstat
.tcps_ecn_fallback_ce
, 
2371             &prev
.tcps_ecn_fallback_ce
, &stat
.ecn_fallback_ce
); 
2372         tcp_cumulative_stat(tcpstat
.tcps_tfo_syn_data_rcv
, 
2373             &prev
.tcps_tfo_syn_data_rcv
, &stat
.tfo_syn_data_rcv
); 
2374         tcp_cumulative_stat(tcpstat
.tcps_tfo_cookie_req_rcv
, 
2375             &prev
.tcps_tfo_cookie_req_rcv
, &stat
.tfo_cookie_req_rcv
); 
2376         tcp_cumulative_stat(tcpstat
.tcps_tfo_cookie_sent
, 
2377             &prev
.tcps_tfo_cookie_sent
, &stat
.tfo_cookie_sent
); 
2378         tcp_cumulative_stat(tcpstat
.tcps_tfo_cookie_invalid
, 
2379             &prev
.tcps_tfo_cookie_invalid
, &stat
.tfo_cookie_invalid
); 
2380         tcp_cumulative_stat(tcpstat
.tcps_tfo_cookie_req
, 
2381             &prev
.tcps_tfo_cookie_req
, &stat
.tfo_cookie_req
); 
2382         tcp_cumulative_stat(tcpstat
.tcps_tfo_cookie_rcv
, 
2383             &prev
.tcps_tfo_cookie_rcv
, &stat
.tfo_cookie_rcv
); 
2384         tcp_cumulative_stat(tcpstat
.tcps_tfo_syn_data_sent
, 
2385             &prev
.tcps_tfo_syn_data_sent
, &stat
.tfo_syn_data_sent
); 
2386         tcp_cumulative_stat(tcpstat
.tcps_tfo_syn_data_acked
, 
2387             &prev
.tcps_tfo_syn_data_acked
, &stat
.tfo_syn_data_acked
); 
2388         tcp_cumulative_stat(tcpstat
.tcps_tfo_syn_loss
, 
2389             &prev
.tcps_tfo_syn_loss
, &stat
.tfo_syn_loss
); 
2390         tcp_cumulative_stat(tcpstat
.tcps_tfo_blackhole
, 
2391             &prev
.tcps_tfo_blackhole
, &stat
.tfo_blackhole
); 
2392         tcp_cumulative_stat(tcpstat
.tcps_tfo_cookie_wrong
, 
2393             &prev
.tcps_tfo_cookie_wrong
, &stat
.tfo_cookie_wrong
); 
2394         tcp_cumulative_stat(tcpstat
.tcps_tfo_no_cookie_rcv
, 
2395             &prev
.tcps_tfo_no_cookie_rcv
, &stat
.tfo_no_cookie_rcv
); 
2396         tcp_cumulative_stat(tcpstat
.tcps_tfo_heuristics_disable
, 
2397             &prev
.tcps_tfo_heuristics_disable
, &stat
.tfo_heuristics_disable
); 
2398         tcp_cumulative_stat(tcpstat
.tcps_tfo_sndblackhole
, 
2399             &prev
.tcps_tfo_sndblackhole
, &stat
.tfo_sndblackhole
); 
2402         tcp_cumulative_stat(tcpstat
.tcps_mptcp_handover_attempt
, 
2403             &prev
.tcps_mptcp_handover_attempt
, &stat
.mptcp_handover_attempt
); 
2404         tcp_cumulative_stat(tcpstat
.tcps_mptcp_interactive_attempt
, 
2405             &prev
.tcps_mptcp_interactive_attempt
, &stat
.mptcp_interactive_attempt
); 
2406         tcp_cumulative_stat(tcpstat
.tcps_mptcp_aggregate_attempt
, 
2407             &prev
.tcps_mptcp_aggregate_attempt
, &stat
.mptcp_aggregate_attempt
); 
2408         tcp_cumulative_stat(tcpstat
.tcps_mptcp_fp_handover_attempt
, 
2409             &prev
.tcps_mptcp_fp_handover_attempt
, &stat
.mptcp_fp_handover_attempt
); 
2410         tcp_cumulative_stat(tcpstat
.tcps_mptcp_fp_interactive_attempt
, 
2411             &prev
.tcps_mptcp_fp_interactive_attempt
, &stat
.mptcp_fp_interactive_attempt
); 
2412         tcp_cumulative_stat(tcpstat
.tcps_mptcp_fp_aggregate_attempt
, 
2413             &prev
.tcps_mptcp_fp_aggregate_attempt
, &stat
.mptcp_fp_aggregate_attempt
); 
2414         tcp_cumulative_stat(tcpstat
.tcps_mptcp_heuristic_fallback
, 
2415             &prev
.tcps_mptcp_heuristic_fallback
, &stat
.mptcp_heuristic_fallback
); 
2416         tcp_cumulative_stat(tcpstat
.tcps_mptcp_fp_heuristic_fallback
, 
2417             &prev
.tcps_mptcp_fp_heuristic_fallback
, &stat
.mptcp_fp_heuristic_fallback
); 
2418         tcp_cumulative_stat(tcpstat
.tcps_mptcp_handover_success_wifi
, 
2419             &prev
.tcps_mptcp_handover_success_wifi
, &stat
.mptcp_handover_success_wifi
); 
2420         tcp_cumulative_stat(tcpstat
.tcps_mptcp_handover_success_cell
, 
2421             &prev
.tcps_mptcp_handover_success_cell
, &stat
.mptcp_handover_success_cell
); 
2422         tcp_cumulative_stat(tcpstat
.tcps_mptcp_interactive_success
, 
2423             &prev
.tcps_mptcp_interactive_success
, &stat
.mptcp_interactive_success
); 
2424         tcp_cumulative_stat(tcpstat
.tcps_mptcp_aggregate_success
, 
2425             &prev
.tcps_mptcp_aggregate_success
, &stat
.mptcp_aggregate_success
); 
2426         tcp_cumulative_stat(tcpstat
.tcps_mptcp_fp_handover_success_wifi
, 
2427             &prev
.tcps_mptcp_fp_handover_success_wifi
, &stat
.mptcp_fp_handover_success_wifi
); 
2428         tcp_cumulative_stat(tcpstat
.tcps_mptcp_fp_handover_success_cell
, 
2429             &prev
.tcps_mptcp_fp_handover_success_cell
, &stat
.mptcp_fp_handover_success_cell
); 
2430         tcp_cumulative_stat(tcpstat
.tcps_mptcp_fp_interactive_success
, 
2431             &prev
.tcps_mptcp_fp_interactive_success
, &stat
.mptcp_fp_interactive_success
); 
2432         tcp_cumulative_stat(tcpstat
.tcps_mptcp_fp_aggregate_success
, 
2433             &prev
.tcps_mptcp_fp_aggregate_success
, &stat
.mptcp_fp_aggregate_success
); 
2434         tcp_cumulative_stat(tcpstat
.tcps_mptcp_handover_cell_from_wifi
, 
2435             &prev
.tcps_mptcp_handover_cell_from_wifi
, &stat
.mptcp_handover_cell_from_wifi
); 
2436         tcp_cumulative_stat(tcpstat
.tcps_mptcp_handover_wifi_from_cell
, 
2437             &prev
.tcps_mptcp_handover_wifi_from_cell
, &stat
.mptcp_handover_wifi_from_cell
); 
2438         tcp_cumulative_stat(tcpstat
.tcps_mptcp_interactive_cell_from_wifi
, 
2439             &prev
.tcps_mptcp_interactive_cell_from_wifi
, &stat
.mptcp_interactive_cell_from_wifi
); 
2440         tcp_cumulative_stat64(tcpstat
.tcps_mptcp_handover_cell_bytes
, 
2441             &prev
.tcps_mptcp_handover_cell_bytes
, &stat
.mptcp_handover_cell_bytes
); 
2442         tcp_cumulative_stat64(tcpstat
.tcps_mptcp_interactive_cell_bytes
, 
2443             &prev
.tcps_mptcp_interactive_cell_bytes
, &stat
.mptcp_interactive_cell_bytes
); 
2444         tcp_cumulative_stat64(tcpstat
.tcps_mptcp_aggregate_cell_bytes
, 
2445             &prev
.tcps_mptcp_aggregate_cell_bytes
, &stat
.mptcp_aggregate_cell_bytes
); 
2446         tcp_cumulative_stat64(tcpstat
.tcps_mptcp_handover_all_bytes
, 
2447             &prev
.tcps_mptcp_handover_all_bytes
, &stat
.mptcp_handover_all_bytes
); 
2448         tcp_cumulative_stat64(tcpstat
.tcps_mptcp_interactive_all_bytes
, 
2449             &prev
.tcps_mptcp_interactive_all_bytes
, &stat
.mptcp_interactive_all_bytes
); 
2450         tcp_cumulative_stat64(tcpstat
.tcps_mptcp_aggregate_all_bytes
, 
2451             &prev
.tcps_mptcp_aggregate_all_bytes
, &stat
.mptcp_aggregate_all_bytes
); 
2452         tcp_cumulative_stat(tcpstat
.tcps_mptcp_back_to_wifi
, 
2453             &prev
.tcps_mptcp_back_to_wifi
, &stat
.mptcp_back_to_wifi
); 
2454         tcp_cumulative_stat(tcpstat
.tcps_mptcp_wifi_proxy
, 
2455             &prev
.tcps_mptcp_wifi_proxy
, &stat
.mptcp_wifi_proxy
); 
2456         tcp_cumulative_stat(tcpstat
.tcps_mptcp_cell_proxy
, 
2457             &prev
.tcps_mptcp_cell_proxy
, &stat
.mptcp_cell_proxy
); 
2458         tcp_cumulative_stat(tcpstat
.tcps_mptcp_triggered_cell
, 
2459             &prev
.tcps_mptcp_triggered_cell
, &stat
.mptcp_triggered_cell
); 
2461         nstat_sysinfo_send_data(&data
); 
2467 tcp_interface_send_probe(u_int16_t probe_if_index
) 
2470         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
2472         /* Make sure TCP clock is up to date */ 
2473         calculate_tcp_clock(); 
2475         lck_mtx_lock(listp
->mtx
); 
2476         if (listp
->probe_if_index 
> 0) { 
2477                 tcpstat
.tcps_probe_if_conflict
++; 
2481         listp
->probe_if_index 
= probe_if_index
; 
2482         if (listp
->running
) { 
2487          * Reschedule the timerlist to run within the next 10ms, which is 
2488          * the fastest that we can do. 
2490         offset 
= TCP_TIMER_10MS_QUANTUM
; 
2491         if (listp
->scheduled
) { 
2493                 diff 
= timer_diff(listp
->runtime
, 0, tcp_now
, offset
); 
2495                         /* The timer will fire sooner than what's needed */ 
2499         listp
->mode 
= TCP_TIMERLIST_10MS_MODE
; 
2500         listp
->idleruns 
= 0; 
2502         tcp_sched_timerlist(offset
); 
2505         lck_mtx_unlock(listp
->mtx
); 
2510  * Enable read probes on this connection, if: 
2511  * - it is in established state 
2512  * - doesn't have any data outstanding 
2513  * - the outgoing ifp matches 
2514  * - we have not already sent any read probes 
2517 tcp_enable_read_probe(struct tcpcb 
*tp
, struct ifnet 
*ifp
) 
2519         if (tp
->t_state 
== TCPS_ESTABLISHED 
&& 
2520             tp
->snd_max 
== tp
->snd_una 
&& 
2521             tp
->t_inpcb
->inp_last_outifp 
== ifp 
&& 
2522             !(tp
->t_flagsext 
& TF_DETECT_READSTALL
) && 
2523             tp
->t_rtimo_probes 
== 0) { 
2524                 tp
->t_flagsext 
|= TF_DETECT_READSTALL
; 
2525                 tp
->t_rtimo_probes 
= 0; 
2526                 tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START(tp
, 
2527                     TCP_TIMER_10MS_QUANTUM
); 
2528                 if (tp
->tentry
.index 
== TCPT_NONE
) { 
2529                         tp
->tentry
.index 
= TCPT_KEEP
; 
2530                         tp
->tentry
.runtime 
= tcp_now 
+ 
2531                             TCP_TIMER_10MS_QUANTUM
; 
2535                         /* Reset runtime to be in next 10ms */ 
2536                         diff 
= timer_diff(tp
->tentry
.runtime
, 0, 
2537                             tcp_now
, TCP_TIMER_10MS_QUANTUM
); 
2539                                 tp
->tentry
.index 
= TCPT_KEEP
; 
2540                                 tp
->tentry
.runtime 
= tcp_now 
+ 
2541                                     TCP_TIMER_10MS_QUANTUM
; 
2542                                 if (tp
->tentry
.runtime 
== 0) { 
2543                                         tp
->tentry
.runtime
++; 
2551  * Disable read probe and reset the keep alive timer 
2554 tcp_disable_read_probe(struct tcpcb 
*tp
) 
2556         if (tp
->t_adaptive_rtimo 
== 0 && 
2557             ((tp
->t_flagsext 
& TF_DETECT_READSTALL
) || 
2558             tp
->t_rtimo_probes 
> 0)) { 
2559                 tcp_keepalive_reset(tp
); 
2562                         mptcp_reset_keepalive(tp
); 
2568  * Reschedule the tcp timerlist in the next 10ms to re-enable read/write 
2569  * probes on connections going over a particular interface. 
2572 tcp_probe_connectivity(struct ifnet 
*ifp
, u_int32_t enable
) 
2575         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
2576         struct inpcbinfo 
*pcbinfo 
= &tcbinfo
; 
2577         struct inpcb 
*inp
, *nxt
; 
2584         calculate_tcp_clock(); 
2587          * Enable keep alive timer on all connections that are 
2588          * active/established on this interface. 
2590         lck_rw_lock_shared(pcbinfo
->ipi_lock
); 
2592         LIST_FOREACH_SAFE(inp
, pcbinfo
->ipi_listhead
, inp_list
, nxt
) { 
2593                 struct tcpcb 
*tp 
= NULL
; 
2594                 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) == 
2599                 /* Acquire lock to look at the state of the connection */ 
2600                 socket_lock(inp
->inp_socket
, 1); 
2602                 /* Release the want count */ 
2603                 if (inp
->inp_ppcb 
== NULL 
|| 
2604                     (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
)) { 
2605                         socket_unlock(inp
->inp_socket
, 1); 
2608                 tp 
= intotcpcb(inp
); 
2610                         tcp_enable_read_probe(tp
, ifp
); 
2612                         tcp_disable_read_probe(tp
); 
2615                 socket_unlock(inp
->inp_socket
, 1); 
2617         lck_rw_done(pcbinfo
->ipi_lock
); 
2619         lck_mtx_lock(listp
->mtx
); 
2620         if (listp
->running
) { 
2621                 listp
->pref_mode 
|= TCP_TIMERLIST_10MS_MODE
; 
2625         /* Reschedule within the next 10ms */ 
2626         offset 
= TCP_TIMER_10MS_QUANTUM
; 
2627         if (listp
->scheduled
) { 
2629                 diff 
= timer_diff(listp
->runtime
, 0, tcp_now
, offset
); 
2631                         /* The timer will fire sooner than what's needed */ 
2635         listp
->mode 
= TCP_TIMERLIST_10MS_MODE
; 
2636         listp
->idleruns 
= 0; 
2638         tcp_sched_timerlist(offset
); 
2640         lck_mtx_unlock(listp
->mtx
); 
2645 tcp_update_mss_core(struct tcpcb 
*tp
, struct ifnet 
*ifp
) 
2647         struct if_cellular_status_v1 
*ifsr
; 
2649         ifsr 
= &ifp
->if_link_status
->ifsr_u
.ifsr_cell
.if_cell_u
.if_status_v1
; 
2650         if (ifsr
->valid_bitmask 
& IF_CELL_UL_MSS_RECOMMENDED_VALID
) { 
2651                 optlen 
= tp
->t_maxopd 
- tp
->t_maxseg
; 
2653                 if (ifsr
->mss_recommended 
== 
2654                     IF_CELL_UL_MSS_RECOMMENDED_NONE 
&& 
2655                     tp
->t_cached_maxopd 
> 0 && 
2656                     tp
->t_maxopd 
< tp
->t_cached_maxopd
) { 
2657                         tp
->t_maxopd 
= tp
->t_cached_maxopd
; 
2658                         tcpstat
.tcps_mss_to_default
++; 
2659                 } else if (ifsr
->mss_recommended 
== 
2660                     IF_CELL_UL_MSS_RECOMMENDED_MEDIUM 
&& 
2661                     tp
->t_maxopd 
> tcp_mss_rec_medium
) { 
2662                         tp
->t_cached_maxopd 
= tp
->t_maxopd
; 
2663                         tp
->t_maxopd 
= tcp_mss_rec_medium
; 
2664                         tcpstat
.tcps_mss_to_medium
++; 
2665                 } else if (ifsr
->mss_recommended 
== 
2666                     IF_CELL_UL_MSS_RECOMMENDED_LOW 
&& 
2667                     tp
->t_maxopd 
> tcp_mss_rec_low
) { 
2668                         tp
->t_cached_maxopd 
= tp
->t_maxopd
; 
2669                         tp
->t_maxopd 
= tcp_mss_rec_low
; 
2670                         tcpstat
.tcps_mss_to_low
++; 
2672                 tp
->t_maxseg 
= tp
->t_maxopd 
- optlen
; 
2675                  * clear the cached value if it is same as the current 
2677                 if (tp
->t_maxopd 
== tp
->t_cached_maxopd
) { 
2678                         tp
->t_cached_maxopd 
= 0; 
2684 tcp_update_mss_locked(struct socket 
*so
, struct ifnet 
*ifp
) 
2686         struct inpcb 
*inp 
= sotoinpcb(so
); 
2687         struct tcpcb 
*tp 
= intotcpcb(inp
); 
2689         if (ifp 
== NULL 
&& (ifp 
= inp
->inp_last_outifp
) == NULL
) { 
2693         if (!IFNET_IS_CELLULAR(ifp
)) { 
2695                  * This optimization is implemented for cellular 
2700         if (tp
->t_state 
<= TCPS_CLOSE_WAIT
) { 
2702                  * If the connection is currently doing or has done PMTU 
2703                  * blackhole detection, do not change the MSS 
2705                 if (tp
->t_flags 
& TF_BLACKHOLE
) { 
2708                 if (ifp
->if_link_status 
== NULL
) { 
2711                 tcp_update_mss_core(tp
, ifp
); 
2716 tcp_itimer(struct inpcbinfo 
*ipi
) 
2718         struct inpcb 
*inp
, *nxt
; 
2720         if (lck_rw_try_lock_exclusive(ipi
->ipi_lock
) == FALSE
) { 
2721                 if (tcp_itimer_done 
== TRUE
) { 
2722                         tcp_itimer_done 
= FALSE
; 
2723                         atomic_add_32(&ipi
->ipi_timer_req
.intimer_fast
, 1); 
2726                 /* Upgrade failed, lost lock now take it again exclusive */ 
2727                 lck_rw_lock_exclusive(ipi
->ipi_lock
); 
2729         tcp_itimer_done 
= TRUE
; 
2731         LIST_FOREACH_SAFE(inp
, &tcb
, inp_list
, nxt
) { 
2735                 if (inp
->inp_ppcb 
== NULL 
|| 
2736                     in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) == WNT_STOPUSING
) { 
2739                 so 
= inp
->inp_socket
; 
2740                 ifp 
= inp
->inp_last_outifp
; 
2742                 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) { 
2743                         socket_unlock(so
, 1); 
2746                 so_check_extended_bk_idle_time(so
); 
2747                 if (ipi
->ipi_flags 
& INPCBINFO_UPDATE_MSS
) { 
2748                         tcp_update_mss_locked(so
, NULL
); 
2750                 socket_unlock(so
, 1); 
2753                  * Defunct all system-initiated background sockets if the 
2754                  * socket is using the cellular interface and the interface 
2755                  * has its LQM set to abort. 
2757                 if ((ipi
->ipi_flags 
& INPCBINFO_HANDLE_LQM_ABORT
) && 
2758                     IS_SO_TC_BACKGROUNDSYSTEM(so
->so_traffic_class
) && 
2759                     ifp 
!= NULL 
&& IFNET_IS_CELLULAR(ifp
) && 
2760                     (ifp
->if_interface_state
.valid_bitmask 
& 
2761                     IF_INTERFACE_STATE_LQM_STATE_VALID
) && 
2762                     ifp
->if_interface_state
.lqm_state 
== 
2763                     IFNET_LQM_THRESH_ABORT
) { 
2764                         socket_defunct(current_proc(), so
, 
2765                             SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
); 
2769         ipi
->ipi_flags 
&= ~(INPCBINFO_UPDATE_MSS 
| INPCBINFO_HANDLE_LQM_ABORT
); 
2770         lck_rw_done(ipi
->ipi_lock
);