2  * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 
  30  *      The Regents of the University of California.  All rights reserved. 
  32  * Redistribution and use in source and binary forms, with or without 
  33  * modification, are permitted provided that the following conditions 
  35  * 1. Redistributions of source code must retain the above copyright 
  36  *    notice, this list of conditions and the following disclaimer. 
  37  * 2. Redistributions in binary form must reproduce the above copyright 
  38  *    notice, this list of conditions and the following disclaimer in the 
  39  *    documentation and/or other materials provided with the distribution. 
  40  * 3. All advertising materials mentioning features or use of this software 
  41  *    must display the following acknowledgement: 
  42  *      This product includes software developed by the University of 
  43  *      California, Berkeley and its contributors. 
  44  * 4. Neither the name of the University nor the names of its contributors 
  45  *    may be used to endorse or promote products derived from this software 
  46  *    without specific prior written permission. 
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  60  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 
  61  * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.11 2001/08/22 00:59:12 silby Exp $ 
  65 #include <sys/param.h> 
  66 #include <sys/systm.h> 
  67 #include <sys/kernel.h> 
  69 #include <sys/sysctl.h> 
  70 #include <sys/socket.h> 
  71 #include <sys/socketvar.h> 
  72 #include <sys/protosw.h> 
  73 #include <sys/domain.h> 
  74 #include <sys/mcache.h> 
  75 #include <sys/queue.h> 
  76 #include <kern/locks.h> 
  77 #include <kern/cpu_number.h>    /* before tcp_seq.h, for tcp_random18() */ 
  78 #include <mach/boolean.h> 
  80 #include <net/route.h> 
  81 #include <net/if_var.h> 
  83 #include <netinet/in.h> 
  84 #include <netinet/in_systm.h> 
  85 #include <netinet/in_pcb.h> 
  87 #include <netinet6/in6_pcb.h> 
  89 #include <netinet/ip_var.h> 
  90 #include <netinet/tcp.h> 
  91 #include <netinet/tcp_fsm.h> 
  92 #include <netinet/tcp_seq.h> 
  93 #include <netinet/tcp_timer.h> 
  94 #include <netinet/tcp_var.h> 
  95 #include <netinet/tcp_cc.h> 
  97 #include <netinet6/tcp6_var.h> 
  99 #include <netinet/tcpip.h> 
 101 #include <netinet/tcp_debug.h> 
 103 #include <sys/kdebug.h> 
 104 #include <mach/sdt.h> 
 105 #include <netinet/mptcp_var.h> 
 107 extern void postevent(struct socket 
*, struct sockbuf 
*, 
 109 #define DBG_FNC_TCP_FAST        NETDBG_CODE(DBG_NETTCP, (5 << 8)) 
 110 #define DBG_FNC_TCP_SLOW        NETDBG_CODE(DBG_NETTCP, (5 << 8) | 1) 
 112 #define TIMERENTRY_TO_TP(te) ((struct tcpcb *)((uintptr_t)te - offsetof(struct tcpcb, tentry.le.le_next)))  
 114 #define VERIFY_NEXT_LINK(elm,field) do {        \ 
 115         if (LIST_NEXT((elm),field) != NULL &&   \ 
 116             LIST_NEXT((elm),field)->field.le_prev !=    \ 
 117                 &((elm)->field.le_next))        \ 
 118                 panic("Bad link elm %p next->prev != elm", (elm));      \ 
 121 #define VERIFY_PREV_LINK(elm,field) do {        \ 
 122         if (*(elm)->field.le_prev != (elm))     \ 
 123                 panic("Bad link elm %p prev->next != elm", (elm));      \ 
 127 struct tcptimerlist tcp_timer_list
; 
 129 /* List of pcbs in timewait state, protected by tcbinfo's ipi_lock */ 
 130 struct tcptailq tcp_tw_tailq
; 
 132 static int      background_io_trigger 
= 5; 
 133 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, background_io_trigger
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 134     &background_io_trigger
, 0, "Background IO Trigger Setting"); 
 137 sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS
 
 139 #pragma unused(arg1, arg2) 
 142         tt 
= *(int *)oidp
->oid_arg1
; 
 143         s 
= tt 
* 1000 / TCP_RETRANSHZ
;; 
 145         error 
= sysctl_handle_int(oidp
, &s
, 0, req
); 
 146         if (error 
|| !req
->newptr
) 
 149         tt 
= s 
* TCP_RETRANSHZ 
/ 1000; 
 153         *(int *)oidp
->oid_arg1 
= tt
; 
 158 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPINIT
, keepinit
, CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 159     &tcp_keepinit
, 0, sysctl_msec_to_ticks
, "I", ""); 
 162 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPIDLE
, keepidle
, CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 163     &tcp_keepidle
, 0, sysctl_msec_to_ticks
, "I", ""); 
 166 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_KEEPINTVL
, keepintvl
, CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 167     &tcp_keepintvl
, 0, sysctl_msec_to_ticks
, "I", ""); 
 170 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, keepcnt
, CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 171         &tcp_keepcnt
, 0, "number of times to repeat keepalive"); 
 174 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, msl
, CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 175     &tcp_msl
, 0, sysctl_msec_to_ticks
, "I", "Maximum segment lifetime"); 
 178  * Avoid DoS via TCP Robustness in Persist Condition (see http://www.ietf.org/id/draft-ananth-tcpm-persist-02.txt) 
 179  * by allowing a system wide maximum persistence timeout value when in Zero Window Probe mode. 
 180  * Expressed in milliseconds to be consistent without timeout related values, the TCP socket option is in seconds. 
 182 u_int32_t tcp_max_persist_timeout 
= 0; 
 183 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, max_persist_timeout
, CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 184     &tcp_max_persist_timeout
, 0, sysctl_msec_to_ticks
, "I", "Maximum persistence timout for ZWP"); 
 186 static int      always_keepalive 
= 0; 
 187 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, always_keepalive
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 188     &always_keepalive 
, 0, "Assume SO_KEEPALIVE on all TCP connections"); 
 190 /* This parameter determines how long the timer list will stay in fast mode even 
 191  * though all connections are idle. In fast mode, the timer will fire more frequently 
 192  * anticipating new data. 
 194 int timer_fastmode_idlemax 
= TCP_FASTMODE_IDLEGEN_MAX
; 
 195 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, timer_fastmode_idlemax
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 196         &timer_fastmode_idlemax
, 0, "Maximum idle generations in fast mode"); 
 199  * See tcp_syn_backoff[] for interval values between SYN retransmits; 
 200  * the value set below defines the number of retransmits, before we 
 201  * disable the timestamp and window scaling options during subsequent 
 202  * SYN retransmits.  Setting it to 0 disables the dropping off of those 
 205 static int tcp_broken_peer_syn_rxmit_thres 
= 7; 
 206 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, broken_peer_syn_rxmit_thres
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 207     &tcp_broken_peer_syn_rxmit_thres
, 0, "Number of retransmitted SYNs before " 
 208     "TCP disables rfc1323 and rfc1644 during the rest of attempts"); 
 210 /* A higher threshold on local connections for disabling RFC 1323 options */ 
 211 static int tcp_broken_peer_syn_rxmit_thres_local 
= 10; 
 212 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, broken_peer_syn_rexmit_thres_local
,  
 213         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_broken_peer_syn_rxmit_thres_local
, 0, 
 214         "Number of retransmitted SYNs before disabling RFC 1323 options on local connections"); 
 216 static int tcp_timer_advanced 
= 0; 
 217 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_timer_advanced
, CTLFLAG_RD 
| CTLFLAG_LOCKED
, 
 218     &tcp_timer_advanced
, 0, "Number of times one of the timers was advanced"); 
 220 static int tcp_resched_timerlist 
= 0; 
 221 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_resched_timerlist
, CTLFLAG_RD 
| CTLFLAG_LOCKED
, 
 222     &tcp_resched_timerlist
, 0,  
 223     "Number of times timer list was rescheduled as part of processing a packet"); 
 225 int     tcp_pmtud_black_hole_detect 
= 1 ; 
 226 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, pmtud_blackhole_detection
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 227     &tcp_pmtud_black_hole_detect
, 0, "Path MTU Discovery Black Hole Detection"); 
 229 int     tcp_pmtud_black_hole_mss 
= 1200 ; 
 230 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, pmtud_blackhole_mss
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 231     &tcp_pmtud_black_hole_mss
, 0, "Path MTU Discovery Black Hole Detection lowered MSS"); 
 233 /* performed garbage collection of "used" sockets */ 
 234 static boolean_t tcp_gc_done 
= FALSE
; 
 236         /* max idle probes */ 
 237 int     tcp_maxpersistidle
; 
 239 /* TCP delack timer is set to 100 ms. Since the processing of timer list in fast 
 240  * mode will happen no faster than 100 ms, the delayed ack timer will fire some where  
 241  * between 100 and 200 ms. 
 243 int     tcp_delack 
= TCP_RETRANSHZ 
/ 10; 
 247  * MP_JOIN retransmission of 3rd ACK will be every 500 msecs without backoff 
 249 int     tcp_jack_rxmt 
= TCP_RETRANSHZ 
/ 2; 
 252 /* The frequency of running through the TCP timer list in  
 253  * fast and slow mode can be configured. 
 255 SYSCTL_UINT(_net_inet_tcp
, OID_AUTO
, timer_fastquantum
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 256         &tcp_timer_list
.fast_quantum
, TCP_FASTTIMER_QUANTUM
,  
 257         "Frequency of running timer list in fast mode"); 
 259 SYSCTL_UINT(_net_inet_tcp
, OID_AUTO
, timer_slowquantum
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 260         &tcp_timer_list
.slow_quantum
, TCP_SLOWTIMER_QUANTUM
,  
 261         "Frequency of running timer list in slow mode"); 
 263 static void tcp_remove_timer(struct tcpcb 
*tp
); 
 264 static void tcp_sched_timerlist(uint32_t offset
); 
 265 static uint32_t tcp_run_conn_timer(struct tcpcb 
*tp
, uint16_t *next_index
); 
 266 static void tcp_sched_timers(struct tcpcb 
*tp
); 
 267 static inline void tcp_set_lotimer_index(struct tcpcb 
*); 
 268 static void tcp_rexmt_save_state(struct tcpcb 
*tp
); 
 269 void tcp_remove_from_time_wait(struct inpcb 
*inp
); 
 271 /* Macro to compare two timers. If there is a reset of the sign bit, it is  
 272  * safe to assume that the timer has wrapped around. By doing signed comparision,  
 273  * we take care of wrap around such that the value with the sign bit reset is  
 274  * actually ahead of the other. 
 277 static inline int32_t 
 278 timer_diff(uint32_t t1
, uint32_t toff1
, uint32_t t2
, uint32_t toff2
) {  
 279         return (int32_t)((t1 
+ toff1
) - (t2 
+ toff2
)); 
 282 /* Returns true if the timer is on the timer list */ 
 283 #define TIMER_IS_ON_LIST(tp) ((tp)->t_flags & TF_TIMER_ONLIST) 
 286 static void add_to_time_wait_locked(struct tcpcb 
*tp
, uint32_t delay
); 
 287 void    add_to_time_wait(struct tcpcb 
*tp
, uint32_t delay
) ; 
 289 static boolean_t 
tcp_garbage_collect(struct inpcb 
*, int); 
 292  * Add to tcp timewait list, delay is given in milliseconds. 
 295 add_to_time_wait_locked(struct tcpcb 
*tp
, uint32_t delay
) 
 297         struct inpcbinfo 
*pcbinfo 
= &tcbinfo
; 
 298         struct inpcb 
*inp 
= tp
->t_inpcb
; 
 301         /* pcb list should be locked when we get here */ 
 302         lck_rw_assert(pcbinfo
->ipi_lock
, LCK_RW_ASSERT_EXCLUSIVE
); 
 304         /* We may get here multiple times, so check */ 
 305         if (!(inp
->inp_flags2 
& INP2_TIMEWAIT
)) { 
 306                 pcbinfo
->ipi_twcount
++; 
 307                 inp
->inp_flags2 
|= INP2_TIMEWAIT
; 
 309                 /* Remove from global inp list */ 
 310                 LIST_REMOVE(inp
, inp_list
); 
 312                 TAILQ_REMOVE(&tcp_tw_tailq
, tp
, t_twentry
); 
 315         /* Compute the time at which this socket can be closed */ 
 316         timer 
= tcp_now 
+ delay
; 
 318         /* We will use the TCPT_2MSL timer for tracking this delay */ 
 320         if (TIMER_IS_ON_LIST(tp
)) 
 321                 tcp_remove_timer(tp
); 
 322         tp
->t_timer
[TCPT_2MSL
] = timer
; 
 324         TAILQ_INSERT_TAIL(&tcp_tw_tailq
, tp
, t_twentry
); 
 328 add_to_time_wait(struct tcpcb 
*tp
, uint32_t delay
) 
 330         struct inpcbinfo 
*pcbinfo 
= &tcbinfo
; 
 332         if (!lck_rw_try_lock_exclusive(pcbinfo
->ipi_lock
)) { 
 333                 tcp_unlock(tp
->t_inpcb
->inp_socket
, 0, 0); 
 334                 lck_rw_lock_exclusive(pcbinfo
->ipi_lock
); 
 335                 tcp_lock(tp
->t_inpcb
->inp_socket
, 0, 0); 
 337         add_to_time_wait_locked(tp
, delay
); 
 338         lck_rw_done(pcbinfo
->ipi_lock
); 
 340         inpcb_gc_sched(pcbinfo
, INPCB_TIMER_LAZY
); 
 343 /* If this is on time wait queue, remove it. */ 
 345 tcp_remove_from_time_wait(struct inpcb 
*inp
) 
 347         struct tcpcb 
*tp 
= intotcpcb(inp
); 
 348         if (inp
->inp_flags2 
& INP2_TIMEWAIT
) 
 349                 TAILQ_REMOVE(&tcp_tw_tailq
, tp
, t_twentry
); 
 353 tcp_garbage_collect(struct inpcb 
*inp
, int istimewait
) 
 355         boolean_t active 
= FALSE
; 
 359         so 
= inp
->inp_socket
; 
 363          * Skip if still in use or busy; it would have been more efficient 
 364          * if we were to test so_usecount against 0, but this isn't possible 
 365          * due to the current implementation of tcp_dropdropablreq() where 
 366          * overflow sockets that are eligible for garbage collection have 
 367          * their usecounts set to 1. 
 369         if (!lck_mtx_try_lock_spin(&inp
->inpcb_mtx
)) 
 372         /* Check again under the lock */ 
 373         if (so
->so_usecount 
> 1) { 
 374                 if (inp
->inp_wantcnt 
== WNT_STOPUSING
) 
 376                 lck_mtx_unlock(&inp
->inpcb_mtx
); 
 381                 TSTMP_GEQ(tcp_now
, tp
->t_timer
[TCPT_2MSL
]) && 
 382                 tp
->t_state 
!= TCPS_CLOSED
) { 
 383                 /* Become a regular mutex */ 
 384                 lck_mtx_convert_spin(&inp
->inpcb_mtx
); 
 389          * Overflowed socket dropped from the listening queue?  Do this 
 390          * only if we are called to clean up the time wait slots, since 
 391          * tcp_dropdropablreq() considers a socket to have been fully 
 392          * dropped after add_to_time_wait() is finished. 
 393          * Also handle the case of connections getting closed by the peer 
 394          * while in the queue as seen with rdar://6422317 
 397         if (so
->so_usecount 
== 1 && 
 398             ((istimewait 
&& (so
->so_flags 
& SOF_OVERFLOW
)) || 
 399             ((tp 
!= NULL
) && (tp
->t_state 
== TCPS_CLOSED
) && 
 400             (so
->so_head 
!= NULL
) && 
 401             ((so
->so_state 
& (SS_INCOMP
|SS_CANTSENDMORE
|SS_CANTRCVMORE
)) == 
 402             (SS_INCOMP
|SS_CANTSENDMORE
|SS_CANTRCVMORE
))))) { 
 404                 if (inp
->inp_state 
!= INPCB_STATE_DEAD
) { 
 405                         /* Become a regular mutex */ 
 406                         lck_mtx_convert_spin(&inp
->inpcb_mtx
); 
 408                         if (SOCK_CHECK_DOM(so
, PF_INET6
)) 
 415                 if (inp
->inp_wantcnt 
== WNT_STOPUSING
) 
 417                 lck_mtx_unlock(&inp
->inpcb_mtx
); 
 419         } else if (inp
->inp_wantcnt 
!= WNT_STOPUSING
) { 
 420                 lck_mtx_unlock(&inp
->inpcb_mtx
); 
 425          * We get here because the PCB is no longer searchable  
 426          * (WNT_STOPUSING); detach (if needed) and dispose if it is dead  
 427          * (usecount is 0).  This covers all cases, including overflow  
 428          * sockets and those that are considered as "embryonic",  
 429          * i.e. created by sonewconn() in TCP input path, and have  
 430          * not yet been committed.  For the former, we reduce the usecount 
 431          *  to 0 as done by the code above.  For the latter, the usecount  
 432          * would have reduced to 0 as part calling soabort() when the 
 433          * socket is dropped at the end of tcp_input(). 
 435         if (so
->so_usecount 
== 0) { 
 436                 DTRACE_TCP4(state__change
, void, NULL
, struct inpcb 
*, inp
, 
 437                         struct tcpcb 
*, tp
, int32_t, TCPS_CLOSED
); 
 438                 /* Become a regular mutex */ 
 439                 lck_mtx_convert_spin(&inp
->inpcb_mtx
); 
 442                  * If this tp still happens to be on the timer list,  
 445                 if (TIMER_IS_ON_LIST(tp
)) { 
 446                         tcp_remove_timer(tp
); 
 449                 if (inp
->inp_state 
!= INPCB_STATE_DEAD
) { 
 451                         if (SOCK_CHECK_DOM(so
, PF_INET6
)) 
 461         lck_mtx_unlock(&inp
->inpcb_mtx
); 
 466  * TCP garbage collector callback (inpcb_timer_func_t). 
 468  * Returns the number of pcbs that will need to be gc-ed soon, 
 469  * returnining > 0 will keep timer active. 
 472 tcp_gc(struct inpcbinfo 
*ipi
) 
 474         struct inpcb 
*inp
, *nxt
; 
 475         struct tcpcb 
*tw_tp
, *tw_ntp
; 
 480         static int tws_checked 
= 0; 
 483         KERNEL_DEBUG(DBG_FNC_TCP_SLOW 
| DBG_FUNC_START
, 0, 0, 0, 0, 0); 
 486          * Update tcp_now here as it may get used while 
 487          * processing the slow timer. 
 489         calculate_tcp_clock(); 
 492          * Garbage collect socket/tcpcb: We need to acquire the list lock 
 493          * exclusively to do this 
 496         if (lck_rw_try_lock_exclusive(ipi
->ipi_lock
) == FALSE
) { 
 497                 /* don't sweat it this time; cleanup was done last time */ 
 498                 if (tcp_gc_done 
== TRUE
) { 
 500                         KERNEL_DEBUG(DBG_FNC_TCP_SLOW 
| DBG_FUNC_END
, 
 501                             tws_checked
, cur_tw_slot
, 0, 0, 0); 
 502                         /* Lock upgrade failed, give up this round */ 
 503                         atomic_add_32(&ipi
->ipi_gc_req
.intimer_fast
, 1); 
 506                 /* Upgrade failed, lost lock now take it again exclusive */ 
 507                 lck_rw_lock_exclusive(ipi
->ipi_lock
); 
 511         LIST_FOREACH_SAFE(inp
, &tcb
, inp_list
, nxt
) { 
 512                 if (tcp_garbage_collect(inp
, 0)) 
 513                         atomic_add_32(&ipi
->ipi_gc_req
.intimer_fast
, 1); 
 516         /* Now cleanup the time wait ones */ 
 517         TAILQ_FOREACH_SAFE(tw_tp
, &tcp_tw_tailq
, t_twentry
, tw_ntp
) { 
 519                  * We check the timestamp here without holding the  
 520                  * socket lock for better performance. If there are 
 521                  * any pcbs in time-wait, the timer will get rescheduled. 
 522                  * Hence some error in this check can be tolerated. 
 524                 if (TSTMP_GEQ(tcp_now
, tw_tp
->t_timer
[TCPT_2MSL
])) { 
 525                         if (tcp_garbage_collect(tw_tp
->t_inpcb
, 1)) 
 526                                 atomic_add_32(&ipi
->ipi_gc_req
.intimer_lazy
, 1); 
 532         /* take into account pcbs that are still in time_wait_slots */ 
 533         atomic_add_32(&ipi
->ipi_gc_req
.intimer_lazy
, ipi
->ipi_twcount
); 
 535         lck_rw_done(ipi
->ipi_lock
); 
 537         /* Clean up the socache while we are here */ 
 538         if (so_cache_timer()) 
 539                 atomic_add_32(&ipi
->ipi_gc_req
.intimer_lazy
, 1); 
 541         KERNEL_DEBUG(DBG_FNC_TCP_SLOW 
| DBG_FUNC_END
, tws_checked
, 
 542             cur_tw_slot
, 0, 0, 0); 
 548  * Cancel all timers for TCP tp. 
 556         tcp_remove_timer(tp
); 
 557         for (i 
= 0; i 
< TCPT_NTIMERS
; i
++) 
 559         tp
->tentry
.timer_start 
= tcp_now
; 
 560         tp
->tentry
.index 
= TCPT_NONE
; 
 563 int     tcp_syn_backoff
[TCP_MAXRXTSHIFT 
+ 1] = 
 564     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; 
 566 int     tcp_backoff
[TCP_MAXRXTSHIFT 
+ 1] = 
 567     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; 
 569 static int tcp_totbackoff 
= 511;        /* sum of tcp_backoff[] */ 
 571 static void tcp_rexmt_save_state(struct tcpcb 
*tp
) 
 574         if (TSTMP_SUPPORTED(tp
)) { 
 576                  * Since timestamps are supported on the connection,  
 577                  * we can do recovery as described in rfc 4015. 
 579                 fsize 
= tp
->snd_max 
- tp
->snd_una
; 
 580                 tp
->snd_ssthresh_prev 
= max(fsize
, tp
->snd_ssthresh
); 
 581                 tp
->snd_recover_prev 
= tp
->snd_recover
; 
 584                  * Timestamp option is not supported on this connection. 
 585                  * Record ssthresh and cwnd so they can 
 586                  * be recovered if this turns out to be a "bad" retransmit. 
 587                  * A retransmit is considered "bad" if an ACK for this  
 588                  * segment is received within RTT/2 interval; the assumption 
 589                  * here is that the ACK was already in flight.  See  
 590                  * "On Estimating End-to-End Network Path Properties" by 
 591                  * Allman and Paxson for more details. 
 593                 tp
->snd_cwnd_prev 
= tp
->snd_cwnd
; 
 594                 tp
->snd_ssthresh_prev 
= tp
->snd_ssthresh
; 
 595                 tp
->snd_recover_prev 
= tp
->snd_recover
; 
 596                 if (IN_FASTRECOVERY(tp
)) 
 597                         tp
->t_flags 
|= TF_WASFRECOVERY
; 
 599                         tp
->t_flags 
&= ~TF_WASFRECOVERY
; 
 601         tp
->t_srtt_prev 
= (tp
->t_srtt 
>> TCP_RTT_SHIFT
) + 2; 
 602         tp
->t_rttvar_prev 
= (tp
->t_rttvar 
>> TCP_RTTVAR_SHIFT
); 
 603         tp
->t_flagsext 
&= ~(TF_RECOMPUTE_RTT
); 
 607  * TCP timer processing. 
 610 tcp_timers(tp
, timer
) 
 611         register struct tcpcb 
*tp
; 
 616         struct tcptemp 
*t_template
; 
 625         int isipv6 
= (tp
->t_inpcb
->inp_vflag 
& INP_IPV4
) == 0; 
 628         so 
= tp
->t_inpcb
->inp_socket
; 
 629         idle_time 
= tcp_now 
- tp
->t_rcvtime
; 
 634          * 2 MSL timeout in shutdown went off.  If we're closed but 
 635          * still waiting for peer to close and connection has been idle 
 636          * too long, or if 2MSL time is up from TIME_WAIT or FIN_WAIT_2, 
 637          * delete connection control block. 
 638          * Otherwise, (this case shouldn't happen) check again in a bit 
 639          * we keep the socket in the main list in that case. 
 642                 tcp_free_sackholes(tp
); 
 643                 if (tp
->t_state 
!= TCPS_TIME_WAIT 
&& 
 644                     tp
->t_state 
!= TCPS_FIN_WAIT_2 
&& 
 645                     ((idle_time 
> 0) && (idle_time 
< TCP_CONN_MAXIDLE(tp
)))) { 
 646                         tp
->t_timer
[TCPT_2MSL
] = OFFSET_FROM_START(tp
,  
 647                                 (u_int32_t
)TCP_CONN_KEEPINTVL(tp
)); 
 655          * Retransmission timer went off.  Message has not 
 656          * been acked within retransmit interval.  Back off 
 657          * to a longer retransmit interval and retransmit one segment. 
 660                 /* Drop a connection in the retransmit timer 
 661                  * 1. If we have retransmitted more than TCP_MAXRXTSHIFT times 
 662                  * 2. If the time spent in this retransmission episode is more than 
 663                  *    the time limit set with TCP_RXT_CONNDROPTIME socket option 
 664                  * 3. If TCP_RXT_FINDROP socket option was set and we have already 
 665                  *    retransmitted the FIN 3 times without receiving an ack 
 667                 if (++tp
->t_rxtshift 
> TCP_MAXRXTSHIFT 
|| 
 668                         (tp
->t_rxt_conndroptime 
> 0 && tp
->t_rxtstart 
> 0 &&  
 669                         (tcp_now 
- tp
->t_rxtstart
) >= tp
->t_rxt_conndroptime
) || 
 670                         ((tp
->t_flagsext 
& TF_RXTFINDROP
) != 0 && 
 671                         (tp
->t_flags 
& TF_SENTFIN
) != 0 && 
 672                         tp
->t_rxtshift 
>= 4)) { 
 674                         if ((tp
->t_flagsext 
& TF_RXTFINDROP
) != 0) { 
 675                                 tcpstat
.tcps_rxtfindrop
++; 
 677                                 tcpstat
.tcps_timeoutdrop
++; 
 679                         tp
->t_rxtshift 
= TCP_MAXRXTSHIFT
; 
 680                         postevent(so
, 0, EV_TIMEOUT
);                    
 682                             (SO_FILT_HINT_LOCKED
|SO_FILT_HINT_TIMEOUT
)); 
 683                         tp 
= tcp_drop(tp
, tp
->t_softerror 
? 
 684                             tp
->t_softerror 
: ETIMEDOUT
); 
 689                 tcpstat
.tcps_rexmttimeo
++; 
 691                 if (tp
->t_rxtshift 
== 1 &&  
 692                         tp
->t_state 
== TCPS_ESTABLISHED
) { 
 693                         /* Set the time at which retransmission started. */ 
 694                         tp
->t_rxtstart 
= tcp_now
; 
 697                          * if this is the first retransmit timeout, save 
 698                          * the state so that we can recover if the timeout 
 701                         tcp_rexmt_save_state(tp
); 
 704                 if ((tp
->t_rxtshift 
== mptcp_fail_thresh
) && 
 705                     (tp
->t_state 
== TCPS_ESTABLISHED
) && 
 706                     (tp
->t_mpflags 
& TMPF_MPTCP_TRUE
)) { 
 707                         mptcp_act_on_txfail(so
); 
 712                 if (tp
->t_adaptive_wtimo 
> 0 && 
 713                         tp
->t_rxtshift 
> tp
->t_adaptive_wtimo 
&& 
 714                         TCPS_HAVEESTABLISHED(tp
->t_state
)) { 
 715                         /* Send an event to the application */ 
 717                                 (SO_FILT_HINT_LOCKED
| 
 718                                 SO_FILT_HINT_ADAPTIVE_WTIMO
)); 
 721                 if (tp
->t_state 
== TCPS_SYN_SENT
) { 
 722                         rexmt 
= TCP_REXMTVAL(tp
) * tcp_syn_backoff
[tp
->t_rxtshift
]; 
 723                         tp
->t_stat
.synrxtshift 
= tp
->t_rxtshift
; 
 726                         rexmt 
= TCP_REXMTVAL(tp
) * tcp_backoff
[tp
->t_rxtshift
]; 
 727                 TCPT_RANGESET(tp
->t_rxtcur
, rexmt
, 
 728                         tp
->t_rttmin
, TCPTV_REXMTMAX
,  
 729                         TCP_ADD_REXMTSLOP(tp
)); 
 730                 tp
->t_timer
[TCPT_REXMT
] = OFFSET_FROM_START(tp
, tp
->t_rxtcur
); 
 732                 if (INP_WAIT_FOR_IF_FEEDBACK(tp
->t_inpcb
)) 
 735                 tcp_free_sackholes(tp
); 
 737                  * Check for potential Path MTU Discovery Black Hole  
 740                 if (tcp_pmtud_black_hole_detect 
&& (tp
->t_state 
== TCPS_ESTABLISHED
)) { 
 741                         if (((tp
->t_flags 
& (TF_PMTUD
|TF_MAXSEGSNT
)) == (TF_PMTUD
|TF_MAXSEGSNT
)) && 
 742                                  (tp
->t_rxtshift 
== 2)) { 
 744                                  * Enter Path MTU Black-hole Detection mechanism: 
 745                                  * - Disable Path MTU Discovery (IP "DF" bit). 
 746                                  * - Reduce MTU to lower value than what we negociated with peer. 
 748                                 /* Disable Path MTU Discovery for now */ 
 749                                 tp
->t_flags 
&= ~TF_PMTUD
; 
 750                                 /* Record that we may have found a black hole */ 
 751                                 tp
->t_flags 
|= TF_BLACKHOLE
; 
 752                                 optlen 
= tp
->t_maxopd 
- tp
->t_maxseg
; 
 753                                 /* Keep track of previous MSS */ 
 754                                 tp
->t_pmtud_saved_maxopd 
= tp
->t_maxopd
; 
 755                                 /* Reduce the MSS to intermediary value */ 
 756                                 if (tp
->t_maxopd 
> tcp_pmtud_black_hole_mss
) { 
 757                                         tp
->t_maxopd 
= tcp_pmtud_black_hole_mss
; 
 759                                         tp
->t_maxopd 
=  /* use the default MSS */ 
 761                                                 isipv6 
? tcp_v6mssdflt 
: 
 765                                 tp
->t_maxseg 
= tp
->t_maxopd 
- optlen
; 
 768                                  * Reset the slow-start flight size  
 769                                  * as it may depend on the new MSS 
 771                                 if (CC_ALGO(tp
)->cwnd_init 
!= NULL
) 
 772                                         CC_ALGO(tp
)->cwnd_init(tp
); 
 775                          * If further retransmissions are still unsuccessful with a lowered MTU, 
 776                          * maybe this isn't a Black Hole and we restore the previous MSS and 
 777                          * blackhole detection flags. 
 781                                 if ((tp
->t_flags 
& TF_BLACKHOLE
) && (tp
->t_rxtshift 
> 4)) { 
 782                                         tp
->t_flags 
|= TF_PMTUD
;  
 783                                         tp
->t_flags 
&= ~TF_BLACKHOLE
;  
 784                                         optlen 
= tp
->t_maxopd 
- tp
->t_maxseg
; 
 785                                         tp
->t_maxopd 
= tp
->t_pmtud_saved_maxopd
; 
 786                                         tp
->t_maxseg 
= tp
->t_maxopd 
- optlen
; 
 788                                          * Reset the slow-start flight size as it  
 789                                          * may depend on the new MSS 
 791                                         if (CC_ALGO(tp
)->cwnd_init 
!= NULL
) 
 792                                                 CC_ALGO(tp
)->cwnd_init(tp
); 
 799                  * Disable rfc1323 and rfc1644 if we haven't got any response to 
 800                  * our SYN (after we reach the threshold) to work-around some 
 801                  * broken terminal servers (most of which have hopefully been 
 802                  * retired) that have bad VJ header compression code which 
 803                  * trashes TCP segments containing unknown-to-them TCP options. 
 804                  * Do this only on non-local connections. 
 806                 if (tp
->t_state 
== TCPS_SYN_SENT 
&& 
 807                     ((!(tp
->t_flags 
& TF_LOCAL
) && 
 808                     tp
->t_rxtshift 
== tcp_broken_peer_syn_rxmit_thres
) || 
 809                     ((tp
->t_flags 
& TF_LOCAL
) &&  
 810                     tp
->t_rxtshift 
== tcp_broken_peer_syn_rxmit_thres_local
))) 
 811                         tp
->t_flags 
&= ~(TF_REQ_SCALE
|TF_REQ_TSTMP
|TF_REQ_CC
); 
 814                  * If losing, let the lower level know and try for 
 815                  * a better route.  Also, if we backed off this far, 
 816                  * our srtt estimate is probably bogus.  Clobber it 
 817                  * so we'll take the next rtt measurement as our srtt; 
 818                  * move the current srtt into rttvar to keep the current 
 819                  * retransmit times until then. 
 821                 if (tp
->t_rxtshift 
> TCP_MAXRXTSHIFT 
/ 4) { 
 824                                 in6_losing(tp
->t_inpcb
); 
 827                         in_losing(tp
->t_inpcb
); 
 828                         tp
->t_rttvar 
+= (tp
->t_srtt 
>> TCP_RTT_SHIFT
); 
 831                 tp
->snd_nxt 
= tp
->snd_una
; 
 833                  * Note:  We overload snd_recover to function also as the 
 834                  * snd_last variable described in RFC 2582 
 836                 tp
->snd_recover 
= tp
->snd_max
; 
 838                  * Force a segment to be sent. 
 840                 tp
->t_flags 
|= TF_ACKNOW
; 
 842                  * If timing a segment in this window, stop the timer. 
 846                 EXIT_FASTRECOVERY(tp
); 
 848                 /* RFC 5681 says: when a TCP sender detects segment loss 
 849                  * using retransmit timer and the given segment has already 
 850                  * been retransmitted by way of the retransmission timer at 
 851                  * least once, the value of ssthresh is held constant 
 853                 if (tp
->t_rxtshift 
== 1 &&  
 854                         CC_ALGO(tp
)->after_timeout 
!= NULL
) 
 855                         CC_ALGO(tp
)->after_timeout(tp
); 
 858                 /* CWR notifications are to be sent on new data right after 
 859                  * RTOs, Fast Retransmits and ECE notification receipts. 
 861                 if ((tp
->ecn_flags 
& TE_ECN_ON
) == TE_ECN_ON
) { 
 862                         tp
->ecn_flags 
|= TE_SENDCWR
; 
 865                 DTRACE_TCP5(cc
, void, NULL
, struct inpcb 
*, tp
->t_inpcb
, 
 866                         struct tcpcb 
*, tp
, struct tcphdr 
*, NULL
, 
 867                         int32_t, TCP_CC_REXMT_TIMEOUT
); 
 869                 (void) tcp_output(tp
); 
 873          * Persistance timer into zero window. 
 874          * Force a byte to be output, if possible. 
 877                 tcpstat
.tcps_persisttimeo
++; 
 879                  * Hack: if the peer is dead/unreachable, we do not 
 880                  * time out if the window is closed.  After a full 
 881                  * backoff, drop the connection if the idle time 
 882                  * (no responses to probes) reaches the maximum 
 883                  * backoff that we would use if retransmitting. 
 885                  * Drop the connection if we reached the maximum allowed time for  
 886                  * Zero Window Probes without a non-zero update from the peer.  
 889                 if ((tp
->t_rxtshift 
== TCP_MAXRXTSHIFT 
&& 
 890                     (idle_time 
>= tcp_maxpersistidle 
|| 
 891                     idle_time 
>= TCP_REXMTVAL(tp
) * tcp_totbackoff
)) ||  
 892                     ((tp
->t_persist_stop 
!= 0) &&  
 893                         TSTMP_LEQ(tp
->t_persist_stop
, tcp_now
))) { 
 894                         tcpstat
.tcps_persistdrop
++; 
 895                         postevent(so
, 0, EV_TIMEOUT
); 
 897                             (SO_FILT_HINT_LOCKED
|SO_FILT_HINT_TIMEOUT
)); 
 898                         tp 
= tcp_drop(tp
, ETIMEDOUT
); 
 903                 (void) tcp_output(tp
); 
 908          * Keep-alive timer went off; send something 
 909          * or drop connection if idle for too long. 
 912                 tcpstat
.tcps_keeptimeo
++; 
 915                  * Regular TCP connections do not send keepalives after closing 
 916                  * MPTCP must not also, after sending Data FINs. 
 918                 struct mptcb 
*mp_tp 
= tp
->t_mptcb
; 
 919                 if ((tp
->t_mpflags 
& TMPF_MPTCP_TRUE
) &&  
 922                 } else if (mp_tp 
!= NULL
) { 
 923                         if ((mptcp_ok_to_keepalive(mp_tp
) == 0)) 
 927                 if (tp
->t_state 
< TCPS_ESTABLISHED
) 
 929                 if ((always_keepalive 
|| 
 930                     (tp
->t_inpcb
->inp_socket
->so_options 
& SO_KEEPALIVE
) || 
 931                     (tp
->t_flagsext 
& TF_DETECT_READSTALL
)) && 
 932                     (tp
->t_state 
<= TCPS_CLOSING 
|| tp
->t_state 
== TCPS_FIN_WAIT_2
)) { 
 933                         if (idle_time 
>= TCP_CONN_KEEPIDLE(tp
) + TCP_CONN_MAXIDLE(tp
)) 
 936                          * Send a packet designed to force a response 
 937                          * if the peer is up and reachable: 
 938                          * either an ACK if the connection is still alive, 
 939                          * or an RST if the peer has closed the connection 
 940                          * due to timeout or reboot. 
 941                          * Using sequence number tp->snd_una-1 
 942                          * causes the transmitted zero-length segment 
 943                          * to lie outside the receive window; 
 944                          * by the protocol spec, this requires the 
 945                          * correspondent TCP to respond. 
 947                         tcpstat
.tcps_keepprobe
++; 
 948                         t_template 
= tcp_maketemplate(tp
); 
 950                                 unsigned int ifscope
, nocell 
= 0; 
 952                                 if (tp
->t_inpcb
->inp_flags 
& INP_BOUND_IF
) 
 953                                         ifscope 
= tp
->t_inpcb
->inp_boundifp
->if_index
; 
 955                                         ifscope 
= IFSCOPE_NONE
; 
 958                                  * If the socket isn't allowed to use the 
 959                                  * cellular interface, indicate it as such. 
 961                                 if (tp
->t_inpcb
->inp_flags 
& INP_NO_IFT_CELLULAR
) 
 964                                 tcp_respond(tp
, t_template
->tt_ipgen
, 
 965                                     &t_template
->tt_t
, (struct mbuf 
*)NULL
, 
 966                                     tp
->rcv_nxt
, tp
->snd_una 
- 1, 0, ifscope
, 
 968                                 (void) m_free(dtom(t_template
)); 
 969                                 if (tp
->t_flagsext 
& TF_DETECT_READSTALL
) 
 970                                         tp
->t_rtimo_probes
++; 
 972                         tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START(tp
, 
 973                                 TCP_CONN_KEEPINTVL(tp
)); 
 975                         tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START(tp
, 
 976                                 TCP_CONN_KEEPIDLE(tp
)); 
 978                 if (tp
->t_flagsext 
& TF_DETECT_READSTALL
) { 
 980                          * The keep alive packets sent to detect a read 
 981                          * stall did not get a response from the  
 982                          * peer. Generate more keep-alives to confirm this. 
 983                          * If the number of probes sent reaches the limit, 
 986                         if (tp
->t_rtimo_probes 
> tp
->t_adaptive_rtimo
) { 
 987                                 /* Generate an event */ 
 989                                         (SO_FILT_HINT_LOCKED
| 
 990                                         SO_FILT_HINT_ADAPTIVE_RTIMO
)); 
 991                                 tcp_keepalive_reset(tp
); 
 993                                 tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START( 
 994                                         tp
, TCP_REXMTVAL(tp
)); 
 999                 if (tcp_delack_enabled 
&& (tp
->t_flags 
& TF_DELACK
)) { 
1000                         tp
->t_flags 
&= ~TF_DELACK
; 
1001                         tp
->t_timer
[TCPT_DELACK
] = 0; 
1002                         tp
->t_flags 
|= TF_ACKNOW
; 
1004                         /* If delayed ack timer fired while stretching acks 
1005                          * go back to acking every other packet 
1007                         if ((tp
->t_flags 
& TF_STRETCHACK
) != 0) 
1008                                 tcp_reset_stretch_ack(tp
); 
1010                         /* If we are measuring inter packet arrival jitter for  
1011                          * throttling a connection, this delayed ack might be  
1012                          * the reason for accumulating some jitter. So let's 
1013                          * restart the measurement. 
1015                         CLEAR_IAJ_STATE(tp
); 
1017                         tcpstat
.tcps_delack
++; 
1018                         (void) tcp_output(tp
); 
1023         case TCPT_JACK_RXMT
: 
1024                 if ((tp
->t_state 
== TCPS_ESTABLISHED
) && 
1025                     (tp
->t_mpflags 
& TMPF_PREESTABLISHED
) && 
1026                     (tp
->t_mpflags 
& TMPF_JOINED_FLOW
)) { 
1027                         if (++tp
->t_mprxtshift 
> TCP_MAXRXTSHIFT
) { 
1028                                 tcpstat
.tcps_timeoutdrop
++; 
1029                                 postevent(so
, 0, EV_TIMEOUT
); 
1031                                     (SO_FILT_HINT_LOCKED
| 
1032                                     SO_FILT_HINT_TIMEOUT
)); 
1033                                 tp 
= tcp_drop(tp
, tp
->t_softerror 
? 
1034                                     tp
->t_softerror 
: ETIMEDOUT
); 
1037                         tcpstat
.tcps_join_rxmts
++; 
1038                         tp
->t_flags 
|= TF_ACKNOW
; 
1041                          * No backoff is implemented for simplicity for this  
1044                         (void) tcp_output(tp
); 
1050         if (tp
->t_inpcb
->inp_socket
->so_options 
& SO_DEBUG
) 
1051                 tcp_trace(TA_USER
, ostate
, tp
, (void *)0, (struct tcphdr 
*)0, 
1055                 tcpstat
.tcps_keepdrops
++; 
1056                 postevent(so
, 0, EV_TIMEOUT
); 
1058                     (SO_FILT_HINT_LOCKED
|SO_FILT_HINT_TIMEOUT
)); 
1059                 tp 
= tcp_drop(tp
, ETIMEDOUT
); 
1065 /* Remove a timer entry from timer list */ 
1067 tcp_remove_timer(struct tcpcb 
*tp
) 
1069         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1071         lck_mtx_assert(&tp
->t_inpcb
->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
1072         if (!(TIMER_IS_ON_LIST(tp
))) { 
1075         lck_mtx_lock(listp
->mtx
); 
1077         /* Check if pcb is on timer list again after acquiring the lock */ 
1078         if (!(TIMER_IS_ON_LIST(tp
))) { 
1079                 lck_mtx_unlock(listp
->mtx
); 
1083         if (listp
->next_te 
!= NULL 
&& listp
->next_te 
== &tp
->tentry
) 
1084                 listp
->next_te 
= LIST_NEXT(&tp
->tentry
, le
); 
1086         LIST_REMOVE(&tp
->tentry
, le
); 
1087         tp
->t_flags 
&= ~(TF_TIMER_ONLIST
); 
1091         tp
->tentry
.le
.le_next 
= NULL
; 
1092         tp
->tentry
.le
.le_prev 
= NULL
; 
1093         lck_mtx_unlock(listp
->mtx
); 
1096 /* Function to check if the timerlist needs to be rescheduled to run 
1097  * the timer entry correctly. Basically, this is to check if we can avoid 
1098  * taking the list lock. 
1102 need_to_resched_timerlist(uint32_t runtime
, uint16_t index
) { 
1103         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1107         if (runtime 
== 0 || index 
== TCPT_NONE
) 
1109         is_fast 
= !(IS_TIMER_SLOW(index
)); 
1111         /* If the list is being processed then the state of the list is in flux. 
1112          * In this case always acquire the lock and set the state correctly. 
1114         if (listp
->running
) { 
1118         diff 
= timer_diff(listp
->runtime
, 0, runtime
, 0); 
1120                 /* The list is going to run before this timer */ 
1124                         if (diff 
<= listp
->fast_quantum
) 
1127                         if (diff 
<= listp
->slow_quantum
) 
1135 tcp_sched_timerlist(uint32_t offset
)  
1138         uint64_t deadline 
= 0; 
1139         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1141         lck_mtx_assert(listp
->mtx
, LCK_MTX_ASSERT_OWNED
); 
1143         listp
->runtime 
= tcp_now 
+ offset
; 
1145         clock_interval_to_deadline(offset
, NSEC_PER_SEC 
/ TCP_RETRANSHZ
, 
1148         thread_call_enter_delayed(listp
->call
, deadline
); 
1151 /* Function to run the timers for a connection. 
1153  * Returns the offset of next timer to be run for this connection which  
1154  * can be used to reschedule the timerlist. 
1157 tcp_run_conn_timer(struct tcpcb 
*tp
, uint16_t *next_index
) { 
1160         uint16_t i 
= 0, index 
= TCPT_NONE
, lo_index 
= TCPT_NONE
; 
1161         uint32_t timer_val
, offset 
= 0, lo_timer 
= 0; 
1163         boolean_t needtorun
[TCPT_NTIMERS
]; 
1167         bzero(needtorun
, sizeof(needtorun
)); 
1169         tcp_lock(tp
->t_inpcb
->inp_socket
, 1, 0); 
1171         so 
= tp
->t_inpcb
->inp_socket
; 
1172         /* Release the want count on inp */  
1173         if (in_pcb_checkstate(tp
->t_inpcb
, WNT_RELEASE
, 1) == WNT_STOPUSING
) { 
1174                 if (TIMER_IS_ON_LIST(tp
)) { 
1175                         tcp_remove_timer(tp
); 
1178                 /* Looks like the TCP connection got closed while we  
1179                  * were waiting for the lock.. Done 
1184         /* Since the timer thread needs to wait for tcp lock, it may race 
1185          * with another thread that can cancel or reschedule the timer that is 
1186          * about to run. Check if we need to run anything. 
1188         index 
= tp
->tentry
.index
; 
1189         timer_val 
= tp
->t_timer
[index
]; 
1191         if (index 
== TCPT_NONE 
|| tp
->tentry
.runtime 
== 0)  
1194         diff 
= timer_diff(tp
->tentry
.runtime
, 0, tcp_now
, 0); 
1196                 if (tp
->tentry
.index 
!= TCPT_NONE
) { 
1198                         *(next_index
) = tp
->tentry
.index
; 
1203         tp
->t_timer
[index
] = 0; 
1204         if (timer_val 
> 0) { 
1205                 tp 
= tcp_timers(tp
, index
); 
1210         /* Check if there are any other timers that need to be run. While doing it, 
1211          * adjust the timer values wrt tcp_now. 
1213         for (i 
= 0; i 
< TCPT_NTIMERS
; ++i
) { 
1214                 if (tp
->t_timer
[i
] != 0) { 
1215                         diff 
= timer_diff(tp
->tentry
.timer_start
, tp
->t_timer
[i
], tcp_now
, 0); 
1218                                 needtorun
[i
] = TRUE
; 
1221                                 tp
->t_timer
[i
] = diff
; 
1222                                 needtorun
[i
] = FALSE
; 
1223                                 if (lo_timer 
== 0 || diff 
< lo_timer
) { 
1231         tp
->tentry
.timer_start 
= tcp_now
; 
1232         tp
->tentry
.index 
= lo_index
; 
1233         if (lo_index 
!= TCPT_NONE
) { 
1234                 tp
->tentry
.runtime 
= tp
->tentry
.timer_start 
+ tp
->t_timer
[lo_index
]; 
1236                 tp
->tentry
.runtime 
= 0; 
1240                 /* run any other timers that are also outstanding at this time. */ 
1241                 for (i 
= 0; i 
< TCPT_NTIMERS
; ++i
) { 
1244                                 tp 
= tcp_timers(tp
, i
); 
1249                 tcp_set_lotimer_index(tp
); 
1252         if (tp
->tentry
.index 
< TCPT_NONE
) { 
1253                 offset 
= tp
->t_timer
[tp
->tentry
.index
]; 
1254                 *(next_index
) = tp
->tentry
.index
; 
1258         if (tp 
!= NULL 
&& tp
->tentry
.index 
== TCPT_NONE
) { 
1259                 tcp_remove_timer(tp
); 
1261         tcp_unlock(so
, 1, 0); 
1266 tcp_run_timerlist(void * arg1
, void * arg2
) { 
1268 #pragma unused(arg1, arg2) 
1270         struct tcptimerentry 
*te
, *next_te
; 
1271         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1273         uint32_t next_timer 
= 0; 
1274         uint16_t index 
= TCPT_NONE
; 
1275         boolean_t need_fast 
= FALSE
; 
1276         uint32_t active_count 
= 0; 
1277         uint32_t mode 
= TCP_TIMERLIST_FASTMODE
; 
1279         calculate_tcp_clock(); 
1281         lck_mtx_lock(listp
->mtx
); 
1283         listp
->running 
= TRUE
; 
1285         LIST_FOREACH_SAFE(te
, &listp
->lhead
, le
, next_te
) { 
1286                 uint32_t offset 
= 0; 
1287                 uint32_t runtime 
= te
->runtime
; 
1288                 if (TSTMP_GT(runtime
, tcp_now
)) { 
1289                         offset 
= timer_diff(runtime
, 0, tcp_now
, 0); 
1290                         if (next_timer 
== 0 || offset 
< next_timer
) { 
1291                                 next_timer 
= offset
; 
1297                 tp 
= TIMERENTRY_TO_TP(te
); 
1299                 /* Acquire an inp wantcnt on the inpcb so that the socket won't get 
1300                  * detached even if tcp_close is called 
1302                 if (in_pcb_checkstate(tp
->t_inpcb
, WNT_ACQUIRE
, 0) == WNT_STOPUSING
) { 
1303                         /* Some how this pcb went into dead state while on the timer list, 
1304                          * just take it off the list. Since the timer list entry pointers  
1305                          * are protected by the timer list lock, we can do it here 
1307                         if (TIMER_IS_ON_LIST(tp
)) { 
1308                                 tp
->t_flags 
&= ~(TF_TIMER_ONLIST
); 
1309                                 LIST_REMOVE(&tp
->tentry
, le
); 
1312                                 tp
->tentry
.le
.le_next 
= NULL
; 
1313                                 tp
->tentry
.le
.le_prev 
= NULL
; 
1318                 /* Store the next timerentry pointer before releasing the list lock. 
1319                  * If that entry has to be removed when we release the lock, this 
1320                  * pointer will be updated to the element after that. 
1322                 listp
->next_te 
= next_te
;  
1324                 VERIFY_NEXT_LINK(&tp
->tentry
, le
); 
1325                 VERIFY_PREV_LINK(&tp
->tentry
, le
); 
1327                 lck_mtx_unlock(listp
->mtx
); 
1330                 offset 
= tcp_run_conn_timer(tp
, &index
); 
1332                 lck_mtx_lock(listp
->mtx
); 
1334                 next_te 
= listp
->next_te
; 
1335                 listp
->next_te 
= NULL
; 
1338                         if (index 
< TCPT_NONE
) { 
1339                                 /* Check if this is a fast_timer. */ 
1340                                 if (!need_fast 
&& !(IS_TIMER_SLOW(index
))) { 
1344                                 if (next_timer 
== 0 || offset 
< next_timer
) { 
1345                                         next_timer 
= offset
; 
1351         if (!LIST_EMPTY(&listp
->lhead
)) { 
1352                 if (listp
->mode 
== TCP_TIMERLIST_FASTMODE
) { 
1353                         if (need_fast 
|| active_count 
> 0 ||  
1354                                 listp
->pref_mode 
== TCP_TIMERLIST_FASTMODE
) { 
1358                                 if (listp
->idlegen 
> timer_fastmode_idlemax
) { 
1359                                         mode 
= TCP_TIMERLIST_SLOWMODE
; 
1365                                 mode 
= TCP_TIMERLIST_SLOWMODE
; 
1369                 if (mode 
== TCP_TIMERLIST_FASTMODE 
||  
1370                         listp
->pref_mode 
== TCP_TIMERLIST_FASTMODE
) { 
1371                         next_timer 
= listp
->fast_quantum
; 
1373                         if (listp
->pref_offset 
!= 0 &&  
1374                                 listp
->pref_offset 
< next_timer
) 
1375                                 next_timer 
= listp
->pref_offset
; 
1376                         if (next_timer 
< listp
->slow_quantum
) 
1377                                 next_timer 
= listp
->slow_quantum
; 
1382                 tcp_sched_timerlist(next_timer
); 
1384                 /* No need to reschedule this timer */ 
1388         listp
->running 
= FALSE
; 
1389         listp
->pref_mode 
= 0; 
1390         listp
->pref_offset 
= 0; 
1392         lck_mtx_unlock(listp
->mtx
); 
1395 /* Function to verify if a change in timer state is required for a connection */ 
1397 tcp_sched_timers(struct tcpcb 
*tp
)  
1399         struct tcptimerentry 
*te 
= &tp
->tentry
; 
1400         uint16_t index 
= te
->index
; 
1401         struct tcptimerlist 
*listp 
= &tcp_timer_list
; 
1402         uint32_t offset 
= 0; 
1404         int list_locked 
= 0; 
1406         if (tp
->t_inpcb
->inp_state 
== INPCB_STATE_DEAD
) { 
1407                 /* Just return without adding the dead pcb to the list */ 
1408                 if (TIMER_IS_ON_LIST(tp
)) { 
1409                         tcp_remove_timer(tp
); 
1414         if (index 
== TCPT_NONE
) { 
1415                 tcp_remove_timer(tp
); 
1419         is_fast 
= !(IS_TIMER_SLOW(index
)); 
1420         offset 
= te
->runtime 
- tcp_now
; 
1423                 tcp_timer_advanced
++; 
1426                 offset 
= listp
->fast_quantum
; 
1428         if (!TIMER_IS_ON_LIST(tp
)) { 
1430                         lck_mtx_lock(listp
->mtx
); 
1434                 LIST_INSERT_HEAD(&listp
->lhead
, te
, le
); 
1435                 tp
->t_flags 
|= TF_TIMER_ONLIST
; 
1438                 if (listp
->entries 
> listp
->maxentries
) 
1439                         listp
->maxentries 
= listp
->entries
; 
1441                 /* if the list is not scheduled, just schedule it */ 
1442                 if (listp
->runtime 
== 0) 
1448         /* timer entry is currently on the list */ 
1449         if (need_to_resched_timerlist(te
->runtime
, index
)) { 
1450                 tcp_resched_timerlist
++; 
1453                         lck_mtx_lock(listp
->mtx
); 
1457                 VERIFY_NEXT_LINK(te
, le
); 
1458                 VERIFY_PREV_LINK(te
, le
); 
1460                 if (listp
->running
) { 
1462                                 listp
->pref_mode 
= TCP_TIMERLIST_FASTMODE
; 
1463                         } else if (listp
->pref_offset 
== 0 || 
1464                                 ((int)offset
) < listp
->pref_offset
) { 
1465                                 listp
->pref_offset 
= offset
; 
1469                         diff 
= timer_diff(listp
->runtime
, 0, tcp_now
, offset
); 
1471                                 /* The list is going to run before this timer */ 
1482                 listp
->mode 
= TCP_TIMERLIST_FASTMODE
; 
1485         tcp_sched_timerlist(offset
); 
1489                 lck_mtx_unlock(listp
->mtx
); 
1495 tcp_set_lotimer_index(struct tcpcb 
*tp
) { 
1496         uint16_t i
, lo_index 
= TCPT_NONE
; 
1497         uint32_t lo_timer 
= 0; 
1498         for (i 
= 0; i 
< TCPT_NTIMERS
; ++i
) { 
1499                 if (tp
->t_timer
[i
] != 0 && 
1500                         (lo_timer 
== 0 || tp
->t_timer
[i
] < lo_timer
)) { 
1501                         lo_timer 
= tp
->t_timer
[i
]; 
1505         tp
->tentry
.index 
= lo_index
; 
1506         if (lo_index 
!= TCPT_NONE
) { 
1507                 tp
->tentry
.runtime 
= tp
->tentry
.timer_start 
+ tp
->t_timer
[lo_index
]; 
1509                 tp
->tentry
.runtime 
= 0; 
1514 tcp_check_timer_state(struct tcpcb 
*tp
) { 
1516         lck_mtx_assert(&tp
->t_inpcb
->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
1518         tcp_set_lotimer_index(tp
); 
1520         tcp_sched_timers(tp
);