X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..5eebf7385fedb1517b66b53c28e5aa6bb0a2be50:/bsd/netinet/tcp_timer.c?ds=sidebyside diff --git a/bsd/netinet/tcp_timer.c b/bsd/netinet/tcp_timer.c index 3819a60cf..a99efeea7 100644 --- a/bsd/netinet/tcp_timer.c +++ b/bsd/netinet/tcp_timer.c @@ -52,16 +52,14 @@ * SUCH DAMAGE. * * @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95 + * $FreeBSD: src/sys/netinet/tcp_timer.c,v 1.34.2.11 2001/08/22 00:59:12 silby Exp $ */ -#if ISFB31 -#include "opt_compat.h" -#include "opt_tcpdebug.h" -#endif #include #include #include +#include #include #include #include @@ -73,9 +71,10 @@ #include #include -#include -#include #include +#if INET6 +#include +#endif #include #include #include @@ -91,37 +90,68 @@ #define DBG_FNC_TCP_FAST NETDBG_CODE(DBG_NETTCP, (5 << 8)) #define DBG_FNC_TCP_SLOW NETDBG_CODE(DBG_NETTCP, (5 << 8) | 1) +/* + * NOTE - WARNING + * + * + * + * + */ +static int +sysctl_msec_to_ticks SYSCTL_HANDLER_ARGS +{ + int error, s, tt; + + tt = *(int *)oidp->oid_arg1; + s = tt * 1000 / hz; + + error = sysctl_handle_int(oidp, &s, 0, req); + if (error || !req->newptr) + return (error); + + tt = s * hz / 1000; + if (tt < 1) + return (EINVAL); + + *(int *)oidp->oid_arg1 = tt; + return (0); +} + +int tcp_keepinit; +SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, + &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", ""); -int tcp_keepinit = TCPTV_KEEP_INIT; -SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, - CTLFLAG_RW, &tcp_keepinit , 0, ""); +int tcp_keepidle; +SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW, + &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", ""); -int tcp_keepidle = TCPTV_KEEP_IDLE; -SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, - CTLFLAG_RW, &tcp_keepidle , 0, ""); +int tcp_keepintvl; +SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW, + &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", ""); -static int tcp_keepintvl = TCPTV_KEEPINTVL; -SYSCTL_INT(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, - CTLFLAG_RW, &tcp_keepintvl , 0, ""); +int tcp_delacktime; +SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, + CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I", + "Time before a delayed ACK is sent"); + +int tcp_msl; +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW, + &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime"); static int always_keepalive = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, - CTLFLAG_RW, &always_keepalive , 0, ""); +SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW, + &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections"); static int tcp_keepcnt = TCPTV_KEEPCNT; /* max idle probes */ -static int tcp_maxpersistidle = TCPTV_KEEP_IDLE; +int tcp_maxpersistidle; /* max idle time in persist */ int tcp_maxidle; - - struct inpcbhead time_wait_slots[N_TIME_WAIT_SLOTS]; int cur_tw_slot = 0; u_long *delack_bitmask; -u_long current_active_connections = 0; -u_long last_active_conn_count = 0; void add_to_time_wait(tp) @@ -134,7 +164,7 @@ void add_to_time_wait(tp) if (tp->t_timer[TCPT_2MSL] == 0) tp->t_timer[TCPT_2MSL] = 1; - tp->t_idle += tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1); + tp->t_rcvtime += tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1); tw_slot = (tp->t_timer[TCPT_2MSL] & (N_TIME_WAIT_SLOTS - 1)) + cur_tw_slot; if (tw_slot >= N_TIME_WAIT_SLOTS) tw_slot -= N_TIME_WAIT_SLOTS; @@ -170,9 +200,7 @@ tcp_fasttimo() if (!tcp_delack_enabled) return; - if ((current_active_connections > DELACK_BITMASK_THRESH) && - (last_active_conn_count > DELACK_BITMASK_THRESH)) { - for (i=0; i < (tcbinfo.hashsize / 32); i++) { + for (i=0; i < (tcbinfo.hashsize / 32); i++) { if (delack_bitmask[i]) { temp_mask = 1; for (j=0; j < 32; j++) { @@ -193,22 +221,7 @@ tcp_fasttimo() delack_bitmask[i] = 0; } elem_base += 32; - } - } - else - { - for (inp = tcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) { - if ((tp = (struct tcpcb *)inp->inp_ppcb) && - (tp->t_flags & TF_DELACK)) { - tp->t_flags &= ~TF_DELACK; - tp->t_flags |= TF_ACKNOW; - tcpstat.tcps_delack++; - (void) tcp_output(tp); - } - } } - - last_active_conn_count = current_active_connections; KERNEL_DEBUG(DBG_FNC_TCP_FAST | DBG_FUNC_END, delack_checked,tcpstat.tcps_delack,0,0,0); splx(s); @@ -251,6 +264,13 @@ tcp_slowtimo() tp = intotcpcb(ip); if (tp == 0 || tp->t_state == TCPS_LISTEN) continue; + /* + * Bogus state when port owned by SharedIP with loopback as the + * only configured interface: BlueBox does not filters loopback + */ + if (tp->t_state == TCP_NSTATES) + continue; + for (i = 0; i < TCPT_NTIMERS; i++) { if (tp->t_timer[i] && --tp->t_timer[i] == 0) { #if TCPDEBUG @@ -269,10 +289,10 @@ tcp_slowtimo() #endif } } - tp->t_idle++; - tp->t_duration++; - if (tp->t_rtt) - tp->t_rtt++; + tp->t_rcvtime++; + tp->t_starttime++; + if (tp->t_rtttime) + tp->t_rtttime++; tpgone: ; } @@ -293,9 +313,13 @@ tpgone: #endif ipnxt = ip->inp_list.le_next; tp = intotcpcb(ip); + if (tp == NULL) { /* tp already closed, remove from list */ + LIST_REMOVE(ip, inp_list); + continue; + } if (tp->t_timer[TCPT_2MSL] >= N_TIME_WAIT_SLOTS) { tp->t_timer[TCPT_2MSL] -= N_TIME_WAIT_SLOTS; - tp->t_idle += N_TIME_WAIT_SLOTS; + tp->t_rcvtime += N_TIME_WAIT_SLOTS; } else tp->t_timer[TCPT_2MSL] = 0; @@ -306,12 +330,6 @@ tpgone: if (++cur_tw_slot >= N_TIME_WAIT_SLOTS) cur_tw_slot = 0; - - tcp_iss += TCP_ISSINCR/PR_SLOWHZ; /* increment iss */ -#if TCP_COMPAT_42 - if ((int)tcp_iss < 0) - tcp_iss = TCP_ISSINCR; /* XXX */ -#endif tcp_now++; /* for timestamps */ splx(s); KERNEL_DEBUG(DBG_FNC_TCP_SLOW | DBG_FUNC_END, tws_checked, cur_tw_slot,0,0,0); @@ -330,6 +348,9 @@ tcp_canceltimers(tp) tp->t_timer[i] = 0; } +int tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] = + { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 }; + int tcp_backoff[TCP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; @@ -345,10 +366,17 @@ tcp_timers(tp, timer) { register int rexmt; struct socket *so_tmp; + struct tcptemp *t_template; + +#if TCPDEBUG + int ostate; +#endif + #if INET6 int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0; #endif /* INET6 */ + switch (timer) { /* @@ -359,7 +387,7 @@ tcp_timers(tp, timer) */ case TCPT_2MSL: if (tp->t_state != TCPS_TIME_WAIT && - tp->t_idle <= tcp_maxidle) { + tp->t_rcvtime <= tcp_maxidle) { tp->t_timer[TCPT_2MSL] = tcp_keepintvl; add_to_time_wait(tp); } @@ -382,11 +410,39 @@ tcp_timers(tp, timer) postevent(so_tmp, 0, EV_TIMEOUT); break; } + + if (tp->t_rxtshift == 1) { + /* + * first retransmit; record ssthresh and cwnd so they can + * be recovered if this turns out to be a "bad" retransmit. + * A retransmit is considered "bad" if an ACK for this + * segment is received within RTT/2 interval; the assumption + * here is that the ACK was already in flight. See + * "On Estimating End-to-End Network Path Properties" by + * Allman and Paxson for more details. + */ + tp->snd_cwnd_prev = tp->snd_cwnd; + tp->snd_ssthresh_prev = tp->snd_ssthresh; + tp->t_badrxtwin = tcp_now + (tp->t_srtt >> (TCP_RTT_SHIFT + 1)); + } tcpstat.tcps_rexmttimeo++; - rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; + if (tp->t_state == TCPS_SYN_SENT) + rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift]; + else + rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; TCPT_RANGESET(tp->t_rxtcur, rexmt, - tp->t_rttmin, TCPTV_REXMTMAX); + tp->t_rttmin, TCPTV_REXMTMAX); tp->t_timer[TCPT_REXMT] = tp->t_rxtcur; + + /* + * Disable rfc1323 and rfc1644 if we havn't got any response to + * our third SYN to work-around some broken terminal servers + * (most of which have hopefully been retired) that have bad VJ + * header compression code which trashes TCP segments containing + * unknown-to-them TCP options. + */ + if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3)) + tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_REQ_CC); /* * If losing, let the lower level know and try for * a better route. Also, if we backed off this far, @@ -406,6 +462,11 @@ tcp_timers(tp, timer) tp->t_srtt = 0; } tp->snd_nxt = tp->snd_una; + /* + * Note: We overload snd_recover to function also as the + * snd_last variable described in RFC 2582 + */ + tp->snd_recover = tp->snd_max; /* * Force a segment to be sent. */ @@ -413,7 +474,7 @@ tcp_timers(tp, timer) /* * If timing a segment in this window, stop the timer. */ - tp->t_rtt = 0; + tp->t_rtttime = 0; /* * Close the congestion window down to one segment * (we'll open it by one segment for each ack we get). @@ -463,8 +524,8 @@ tcp_timers(tp, timer) * backoff that we would use if retransmitting. */ if (tp->t_rxtshift == TCP_MAXRXTSHIFT && - (tp->t_idle >= tcp_maxpersistidle || - tp->t_idle >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { + (tp->t_rcvtime >= tcp_maxpersistidle || + tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { tcpstat.tcps_persistdrop++; so_tmp = tp->t_inpcb->inp_socket; tp = tcp_drop(tp, ETIMEDOUT); @@ -488,7 +549,7 @@ tcp_timers(tp, timer) if ((always_keepalive || tp->t_inpcb->inp_socket->so_options & SO_KEEPALIVE) && tp->t_state <= TCPS_CLOSING) { - if (tp->t_idle >= tcp_keepidle + tcp_maxidle) + if (tp->t_rcvtime >= TCP_KEEPIDLE(tp) + tcp_maxidle) goto dropit; /* * Send a packet designed to force a response @@ -503,41 +564,23 @@ tcp_timers(tp, timer) * correspondent TCP to respond. */ tcpstat.tcps_keepprobe++; -#if TCP_COMPAT_42 - /* - * The keepalive packet must have nonzero length - * to get a 4.2 host to respond. - */ -#if INET6 - if (isipv6) - tcp_respond(tp, (void *)&tp->t_template->tt_i6, - &tp->t_template->tt_t, - (struct mbuf *)NULL, - tp->rcv_nxt - 1, tp->snd_una - 1, - 0, isipv6); - else -#endif /* INET6 */ - tcp_respond(tp, (void *)&tp->t_template->tt_i, - &tp->t_template->tt_t, (struct mbuf *)NULL, - tp->rcv_nxt - 1, tp->snd_una - 1, 0, - isipv6); -#else -#if INET6 - if (isipv6) - tcp_respond(tp, (void *)&tp->t_template->tt_i6, - &tp->t_template->tt_t, - (struct mbuf *)NULL, tp->rcv_nxt, - tp->snd_una - 1, 0, isipv6); - else -#endif /* INET6 */ - tcp_respond(tp, (void *)&tp->t_template->tt_i, - &tp->t_template->tt_t, (struct mbuf *)NULL, - tp->rcv_nxt, tp->snd_una - 1, 0, isipv6); -#endif + t_template = tcp_maketemplate(tp); + if (t_template) { + tcp_respond(tp, t_template->tt_ipgen, + &t_template->tt_t, (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una - 1, 0); + (void) m_free(dtom(t_template)); + } tp->t_timer[TCPT_KEEP] = tcp_keepintvl; } else - tp->t_timer[TCPT_KEEP] = tcp_keepidle; + tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp); break; + +#if TCPDEBUG + if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) + tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, + PRU_SLOWTIMO); +#endif dropit: tcpstat.tcps_keepdrops++; so_tmp = tp->t_inpcb->inp_socket;