X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/8a3053a07cee346dca737a5670e546fd26a7c9d6..HEAD:/bsd/netinet/tcp_timer.h diff --git a/bsd/netinet/tcp_timer.h b/bsd/netinet/tcp_timer.h index f7b775db5..8d09d9cb6 100644 --- a/bsd/netinet/tcp_timer.h +++ b/bsd/netinet/tcp_timer.h @@ -2,7 +2,7 @@ * Copyright (c) 2000-2014 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* @@ -65,32 +65,23 @@ #define _NETINET_TCP_TIMER_H_ #include -#ifdef KERNEL +#ifdef BSD_KERNEL_PRIVATE #include -#endif /* KERNEL */ - -/* - * Definitions of the TCP timers. - */ -#define TCPT_NTIMERS (TCPT_MAX + 1) +#endif /* BSD_KERNEL_PRIVATE */ /* Keep the external definition the same for binary compatibility */ -#define TCPT_NTIMERS_EXT 4 - -#define TCPT_REXMT 0 /* retransmit */ -#define TCPT_PERSIST 1 /* retransmit persistence */ -#define TCPT_KEEP 2 /* keep alive */ -#define TCPT_2MSL 3 /* 2*msl quiet time timer */ -#define TCPT_DELACK 4 /* delayed ack timer */ -#if MPTCP -#define TCPT_JACK_RXMT 5 /* retransmit timer for join ack */ -#define TCPT_MAX 5 -#else /* MPTCP */ -#define TCPT_MAX 4 -#endif /* !MPTCP */ -#define TCPT_NONE (TCPT_MAX + 1) +#define TCPT_NTIMERS_EXT 4 /* + * Definitions of the TCP timers. + * + * The TCPT_PTO timer is used for probing for a tail loss in a send window. + * If this probe gets acknowledged using SACK, it will allow the connection + * to enter fast-recovery instead of hitting a retransmit timeout. A probe + * timeout will send the last unacknowledged segment to generate more acks + * with SACK information which can be used for fast-retransmiting the lost + * packets. This will fire in the order of 10ms. + * * The TCPT_REXMT timer is used to force retransmissions. * The TCP has the TCPT_REXMT timer set whenever segments * have been sent for which ACKs are expected but not yet @@ -101,9 +92,12 @@ * we retransmit one unacknowledged segment, and do a backoff * on the retransmit timer. * + * The TCPT_DELACK timer is used for transmitting delayed acknowledgements + * if an acknowledgement was delayed in anticipation of a new segment. + * * The TCPT_PERSIST timer is used to keep window size information * flowing even if the window goes shut. If all previous transmissions - * have been acknowledged (so that there are no retransmissions in progress), + * have been acknowledged(so that there are no retransmissions in progress), * and the window is too small to bother sending anything, then we start * the TCPT_PERSIST timer. When it expires, if the window is nonzero, * we go to transmit state. Otherwise, at intervals send a single byte @@ -114,178 +108,227 @@ * a window update from the peer. * * The TCPT_KEEP timer is used to keep connections alive. If an - * connection is idle (no segments received) for TCPTV_KEEP_INIT amount of time, - * but not yet established, then we drop the connection. Once the connection - * is established, if the connection is idle for TCPTV_KEEP_IDLE time - * (and keepalives have been enabled on the socket), we begin to probe - * the connection. We force the peer to send us a segment by sending: + * connection is idle (no segments received) for TCPTV_KEEP_INIT amount + * of time, but not yet established, then we drop the connection. + * Once the connection is established, if the connection is idle for + * TCPTV_KEEP_IDLE time (and keepalives have been enabled on the socket), + * we begin to probe the connection. We force the peer to send us a + * segment by sending: * * This segment is (deliberately) outside the window, and should elicit * an ack segment in response from the peer. If, despite the TCPT_KEEP - * initiated segments we cannot elicit a response from a peer in TCPT_MAXIDLE - * amount of time probing, then we drop the connection. + * initiated segments we cannot elicit a response from a peer in + * TCPT_MAXIDLE amount of time probing, then we drop the connection. + * + * The TCPT_2MSL timer is used for keeping the conenction in Time-wait state + * before fully closing it so that the connection 4-tuple can be reused. */ +#ifdef BSD_KERNEL_PRIVATE + +#define TCPT_PTO 0 /* Probe timeout */ +#define TCPT_DELAYFR 1 /* Delay recovery if there is reordering */ +#define TCPT_REXMT 2 /* retransmit */ +#define TCPT_DELACK 3 /* delayed ack */ +#define TCPT_PERSIST 4 /* retransmit persistence */ +#define TCPT_KEEP 5 /* keep alive */ +#define TCPT_2MSL 6 /* 2*msl quiet time timer */ +#if MPTCP +#define TCPT_JACK_RXMT 7 /* retransmit timer for join ack */ +#define TCPT_CELLICON 8 /* Timer to check for cell-activity */ +#define TCPT_MAX 8 +#else /* MPTCP */ +#define TCPT_MAX 6 +#endif /* !MPTCP */ -#ifdef PRIVATE +#define TCPT_NONE (TCPT_MAX + 1) +#define TCPT_NTIMERS (TCPT_MAX + 1) + +/* External definitions */ +#define TCPT_REXMT_EXT 0 +#define TCPT_PERSIST_EXT 1 +#define TCPT_KEEP_EXT 2 +#define TCPT_2MSL_EXT 3 +#define TCPT_DELACK_EXT 4 + +#else /* !BSD_KERNEL_PRIVATE */ +#define TCPT_REXMT 0 /* retransmit */ +#define TCPT_PERSIST 1 /* retransmit persistence */ +#define TCPT_KEEP 2 /* keep alive */ +#define TCPT_2MSL 3 /* 2*msl quiet time timer */ +#define TCPT_DELACK 4 /* delayed ack timer */ +#if MPTCP +#define TCPT_JACK_RXMT 5 /* retransmit timer for join ack */ +#define TCPT_MAX 5 +#else /* MPTCP */ +#define TCPT_MAX 4 +#endif /* !MPTCP */ +#define TCPT_NONE (TCPT_MAX + 1) +#define TCPT_NTIMERS (TCPT_MAX + 1) +#endif /* BSD_KERNEL_PRIVATE */ + +#ifdef BSD_KERNEL_PRIVATE /* * Time constants. */ -#define TCPTV_MSL ( 15*TCP_RETRANSHZ) /* max seg lifetime (hah!) */ -#define TCPTV_SRTTBASE 0 /* base roundtrip time; - if 0, no idea yet */ -#define TCPTV_RTOBASE ( 1*TCP_RETRANSHZ) /* assumed RTO if no info */ -#define TCPTV_SRTTDFLT ( 1*TCP_RETRANSHZ) /* assumed RTT if no info */ - -#define TCPTV_PERSMIN ( 5*TCP_RETRANSHZ) /* retransmit persistence */ -#define TCPTV_PERSMAX ( 60*TCP_RETRANSHZ) /* maximum persist interval */ - -#define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ) /* initial connect keep alive */ -#define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ) /* dflt time before probing */ -#define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ) /* default probe interval */ -#define TCPTV_KEEPCNT 8 /* max probes before drop */ - -#define TCPTV_REXMTMAX ( 64*TCP_RETRANSHZ ) /* max allowable REXMT value */ -#define TCPTV_REXMTMIN ( TCP_RETRANSHZ/33 ) /* min REXMT for non-local connections */ -#define TCPTV_UNACKWIN ( TCP_RETRANSHZ/10 ) /* Window for counting rcv bytes to see if - ack-stretching can start (default 100 ms) */ -#define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 ) /* Receiver idle time, avoid ack-stretching after that*/ -#define TCPTV_RCVBUFIDLE (TCP_RETRANSHZ/2) /* Receiver idle time, for rcv socket buffer resizing */ - -/* No ack stretching during slow-start, until we see some packets. - * By the time the receiver gets 512 packets, the senders cwnd - * should open by a few hundred packets consdering the +#define TCPTV_MSL ( 15*TCP_RETRANSHZ) /* max seg lifetime */ +#define TCPTV_SRTTBASE 0 /* base roundtrip time; if 0, no idea yet */ +#define TCPTV_RTOBASE ( 1*TCP_RETRANSHZ) /* assumed RTO if no info */ +#define TCPTV_SRTTDFLT ( 1*TCP_RETRANSHZ) /* assumed RTT if no info */ +#define TCPTV_PERSMIN ( 5*TCP_RETRANSHZ) /* retransmit persistence */ +#define TCPTV_PERSMAX ( 60*TCP_RETRANSHZ) /* maximum persist interval */ + +extern int tcptv_persmin_val; + +#define TCPTV_KEEP_INIT ( 75*TCP_RETRANSHZ) /* connect keep alive */ +#define TCPTV_KEEP_IDLE (120*60*TCP_RETRANSHZ) /* time before probing */ +#define TCPTV_KEEPINTVL ( 75*TCP_RETRANSHZ) /* default probe interval */ +#define TCPTV_KEEPCNT 8 /* max probes before drop */ + +#define TCPTV_REXMTMAX ( 64*TCP_RETRANSHZ ) /* max REXMT value */ +#define TCPTV_REXMTMIN ( TCP_RETRANSHZ/33 ) /* min REXMT for non-local connections */ + +/* + * Window for counting received bytes to see if ack-stretching + * can start (default 100 ms) + */ +#define TCPTV_UNACKWIN ( TCP_RETRANSHZ/10 ) + +/* Receiver idle time, avoid ack-stretching after this idle time */ +#define TCPTV_MAXRCVIDLE (TCP_RETRANSHZ/5 ) + +/* + * No ack stretching during slow-start, until we see some packets. + * By the time the receiver gets 512 packets, the senders cwnd + * should open by a few hundred packets consdering the * slow-start progression. */ #define TCP_RCV_SS_PKTCOUNT 512 -#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */ +#define TCPTV_TWTRUNC 8 /* RTO factor to truncate TW */ -#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ +#define TCP_LINGERTIME 120 /* linger at most 2 minutes */ -#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ +#define TCP_MAXRXTSHIFT 12 /* maximum retransmits */ -#ifdef TCPTIMERS +#ifdef TCPTIMERS static char *tcptimers[] = - { "REXMT", "PERSIST", "KEEP", "2MSL" , "DELACK"}; -#endif +{ "REXMT", "PERSIST", "KEEP", "2MSL", "DELACK"}; +#endif /* TCPTIMERS */ -#ifdef KERNEL - -/* We consider persist, keep and 2msl as slow timers which can be coalesced - * at a higher granularity (500 ms). Rexmt and delayed ack are considered fast - * timers which fire in the order of 100ms. +/* + * Persist, keep, 2msl and MPTCP's join-ack timer as slow timers which can + * be coalesced at a higher granularity (500 ms). * - * The following conditional is to check if a timer is one of the slow timers. This - * is fast and works well for now. If we add more slow timers for any reason, - * we may need to change this. + * Rexmt and delayed ack timers are considered as fast timers which run + * in the order of 100ms. + * + * Probe timeout is a quick timer which will run in the order of 10ms. */ -#define IS_TIMER_SLOW(ind) ((ind & 0x3) != 0) +#define IS_TIMER_HZ_500MS(i) ((i) >= TCPT_PERSIST) +#define IS_TIMER_HZ_100MS(i) ((i) >= TCPT_REXMT && (i) < TCPT_PERSIST) +#define IS_TIMER_HZ_10MS(i) ((i) < TCPT_REXMT) struct tcptimerlist; struct tcptimerentry { - LIST_ENTRY(tcptimerentry) le; /* links for timer list */ - uint32_t timer_start; /* tcp clock when the timer was started */ - uint16_t index; /* index of lowest timer that needs to run first */ - uint32_t runtime; /* deadline at which the first timer has to fire */ + LIST_ENTRY(tcptimerentry) le; /* links for timer list */ + uint32_t timer_start; /* tcp clock when the timer was started */ + uint16_t index; /* index of lowest timer that needs to run first */ + uint16_t mode; /* Bit-wise OR of timers that are active */ + uint32_t runtime; /* deadline at which the first timer has to fire */ }; LIST_HEAD(timerlisthead, tcptimerentry); struct tcptimerlist { - struct timerlisthead lhead; /* head of the list of timer entries */ - lck_mtx_t *mtx; /* lock to protect the list */ - lck_attr_t *mtx_attr; /* mutex attributes */ - lck_grp_t *mtx_grp; /* mutex group definition */ - lck_grp_attr_t *mtx_grp_attr; /* mutex group attributes */ - uint32_t fast_quantum; /* minimum time quantum to coalesce fast timers */ - uint32_t slow_quantum; /* minimum time quantum to coalesce slow timers */ - thread_call_t call; /* call entry */ - uint32_t runtime; /* time at which this list is going to run */ - uint32_t entries; /* Number of entries on the list */ - uint32_t maxentries; /* Max number of entries at any time */ + struct timerlisthead lhead; /* head of the list */ + lck_mtx_t *mtx; /* lock to protect the list */ + lck_attr_t *mtx_attr; /* mutex attributes */ + lck_grp_t *mtx_grp; /* mutex group definition */ + lck_grp_attr_t *mtx_grp_attr; /* mutex group attributes */ + thread_call_t call; /* call entry */ + uint32_t runtime; /* time at which this list is going to run */ + uint32_t schedtime; /* time at which this list was scheduled */ + uint32_t entries; /* Number of entries on the list */ + uint32_t maxentries; /* Max number of entries at any time */ /* Set desired mode when timer list running */ - boolean_t running; /* Set when timer list is being processed */ - boolean_t scheduled; /* Set when timer is scheduled */ -#define TCP_TIMERLIST_FASTMODE 0x1 -#define TCP_TIMERLIST_SLOWMODE 0x2 - uint32_t mode; /* Current mode, fast or slow */ - uint32_t pref_mode; /* Preferred mode set by a connection, fast or slow */ - uint32_t pref_offset; /* Preferred offset set by a connection */ - uint32_t idlegen; /* Number of times the list has been idle in fast mode */ - struct tcptimerentry *next_te; /* Store the next timer entry pointer to process */ - + boolean_t running; /* Set when timer list is being processed */ + boolean_t scheduled; /* set when the timer is scheduled */ +#define TCP_TIMERLIST_10MS_MODE 0x1 +#define TCP_TIMERLIST_100MS_MODE 0x2 +#define TCP_TIMERLIST_500MS_MODE 0x4 + uint32_t mode; /* Current mode of the timer */ + uint32_t pref_mode; /* Preferred mode set by a connection */ + uint32_t pref_offset; /* Preferred offset set by a connection */ + uint32_t idleruns; /* Number of times the list has been idle in fast mode */ + struct tcptimerentry *next_te; /* next timer entry pointer to process */ + u_int16_t probe_if_index; /* Interface index that needs to send probes */ }; -#define TCP_FASTMODE_IDLEGEN_MAX 20 /* Approximately 2 seconds */ +/* number of idle runs allowed for TCP timer list in fast or quick modes */ +#define TCP_FASTMODE_IDLERUN_MAX 10 /* - * Minimum retransmit timeout is set to 30ms. We add a slop of - * 200 ms to the retransmit value to account for processing - * variance and delayed ack. This extra 200ms will help to avoid - * spurious retransmits by taking into consideration the receivers - * that wait for delayed ack timer instead of generating an ack + * Minimum retransmit timeout is set to 30ms. We add a slop of + * 200 ms to the retransmit value to account for processing + * variance and delayed ack. This extra 200ms will help to avoid + * spurious retransmits by taking into consideration the receivers + * that wait for delayed ack timer instead of generating an ack * for every two packets. * * On a local link, the minimum retransmit timeout is 100ms and * variance is set to 0. This will make the sender a little bit more * aggressive on local link. When the connection is not established yet, * there is no need to add an extra 200ms to retransmit timeout because - * the initial value is high (1s) and delayed ack is not a problem in + * the initial value is high (1s) and delayed ack is not a problem in * that case. */ -#define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 ) /* rexmt slop allowed (200 ms) */ +#define TCPTV_REXMTSLOP ( TCP_RETRANSHZ/5 ) /* extra 200 ms slop */ /* macro to decide when retransmit slop (described above) should be added */ -#define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED) +#define TCP_ADD_REXMTSLOP(tp) (tp->t_state >= TCPS_ESTABLISHED) -#define TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \ +#define TCPT_RANGESET(tv, value, tvmin, tvmax, addslop) do { \ (tv) = ((addslop) ? tcp_rexmt_slop : 0) + (value); \ if ((uint32_t)(tv) < (uint32_t)(tvmin)) \ - (tv) = (tvmin); \ + (tv) = (tvmin); \ else if ((uint32_t)(tv) > (uint32_t)(tvmax)) \ - (tv) = (tvmax); \ + (tv) = (tvmax); \ } while(0) #define TCP_CONN_KEEPIDLE(tp) \ ((tp)->t_keepidle && \ ((tp)->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) ? \ - (tp)->t_keepidle : tcp_keepidle) + (tp)->t_keepidle : (uint32_t)tcp_keepidle) #define TCP_CONN_KEEPINIT(tp) \ - (((tp)->t_keepinit > 0) ? (tp)->t_keepinit : tcp_keepinit) + (((tp)->t_keepinit > 0) ? (tp)->t_keepinit : (uint32_t)tcp_keepinit) #define TCP_CONN_KEEPCNT(tp) \ - (((tp)->t_keepcnt > 0) ? (tp)->t_keepcnt : tcp_keepcnt) + (((tp)->t_keepcnt > 0) ? (tp)->t_keepcnt : (uint32_t)tcp_keepcnt) #define TCP_CONN_KEEPINTVL(tp) \ - (((tp)->t_keepintvl > 0) ? (tp)->t_keepintvl : tcp_keepintvl) + (((tp)->t_keepintvl > 0) ? (tp)->t_keepintvl : (uint32_t)tcp_keepintvl) #define TCP_CONN_MAXIDLE(tp) \ (TCP_CONN_KEEPCNT(tp) * TCP_CONN_KEEPINTVL(tp)) -/* Since we did not add rexmt slop for local connections, we should add - * it to idle timeout. Otherwise local connections will reach idle state - * quickly - */ #define TCP_IDLETIMEOUT(tp) \ (((TCP_ADD_REXMTSLOP(tp)) ? 0 : tcp_rexmt_slop) + tp->t_rxtcur) TAILQ_HEAD(tcptailq, tcpcb); -extern int tcp_keepinit; /* time to establish connection */ -extern int tcp_keepidle; /* time before keepalive probes begin */ -extern int tcp_keepintvl; /* time between keepalive probes */ -extern int tcp_keepcnt; /* number of keepalives */ -extern int tcp_delack; /* delayed ack timer */ +extern int tcp_keepinit; /* time to establish connection */ +extern int tcp_keepidle; /* time before keepalive probes begin */ +extern int tcp_keepintvl; /* time between keepalive probes */ +extern int tcp_keepcnt; /* number of keepalives */ +extern int tcp_delack; /* delayed ack timer */ extern int tcp_maxpersistidle; extern int tcp_msl; -extern int tcp_ttl; /* time to live for TCP segs */ +extern int tcp_ttl; /* time to live for TCP segs */ extern int tcp_backoff[]; extern int tcp_rexmt_slop; -extern u_int32_t tcp_max_persist_timeout; /* Maximum persistence for Zero Window Probes */ +extern u_int32_t tcp_max_persist_timeout; /* Maximum persistence for Zero Window Probes */ #define OFFSET_FROM_START(tp, off) ((tcp_now + (off)) - (tp)->tentry.timer_start) -#endif /* KERNEL */ -#endif /* PRIVATE */ +#endif /* BSD_KERNEL_PRIVATE */ #endif /* !_NETINET_TCP_TIMER_H_ */ -