X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0a7de7458d150b5d4dffc935ba399be265ef0a1a..f427ee49d309d8fc33ebf3042c3a775f2f530ded:/bsd/netinet/tcp_cc.c diff --git a/bsd/netinet/tcp_cc.c b/bsd/netinet/tcp_cc.c index 3512bc9a1..461b180f9 100644 --- a/bsd/netinet/tcp_cc.c +++ b/bsd/netinet/tcp_cc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 Apple Inc. All rights reserved. + * Copyright (c) 2013-2018 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -42,42 +42,9 @@ #include #include -struct tcp_cc_debug_state { - u_int64_t ccd_tsns; - char ccd_srcaddr[INET6_ADDRSTRLEN]; - uint16_t ccd_srcport; - char ccd_destaddr[INET6_ADDRSTRLEN]; - uint16_t ccd_destport; - uint32_t ccd_snd_cwnd; - uint32_t ccd_snd_wnd; - uint32_t ccd_snd_ssthresh; - uint32_t ccd_pipeack; - uint32_t ccd_rttcur; - uint32_t ccd_rxtcur; - uint32_t ccd_srtt; - uint32_t ccd_event; - uint32_t ccd_sndcc; - uint32_t ccd_sndhiwat; - uint32_t ccd_bytes_acked; - u_int8_t ccd_cc_index; - u_int8_t ccd_unused_1__; - u_int16_t ccd_unused_2__; - union { - struct { - uint32_t ccd_last_max; - uint32_t ccd_tcp_win; - uint32_t ccd_target_win; - uint32_t ccd_avg_lastmax; - uint32_t ccd_mean_deviation; - } cubic_state; - struct { - u_int32_t led_base_rtt; - } ledbat_state; - } u; -}; - -SYSCTL_SKMEM_TCP_INT(OID_AUTO, cc_debug, CTLFLAG_RW | CTLFLAG_LOCKED, - int, tcp_cc_debug, 0, "Enable debug data collection"); +static int tcp_cc_debug; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, cc_debug, CTLFLAG_RW | CTLFLAG_LOCKED, + &tcp_cc_debug, 0, "Enable debug data collection"); extern struct tcp_cc_algo tcp_cc_newreno; SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno_sockets, @@ -113,8 +80,6 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, cwnd_nonvalidated, struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT]; struct zone *tcp_cc_zone; -/* Information for colelcting TCP debug information using control socket */ -#define TCP_CCDEBUG_CONTROL_NAME "com.apple.network.tcp_ccdebug" #define TCP_CCDBG_NOUNIT 0xffffffff static kern_ctl_ref tcp_ccdbg_ctlref = NULL; volatile UInt32 tcp_ccdbg_unit = TCP_CCDBG_NOUNIT; @@ -151,12 +116,13 @@ tcp_cc_control_register(void) errno_t err; bzero(&ccdbg_control, sizeof(ccdbg_control)); - strlcpy(ccdbg_control.ctl_name, TCP_CCDEBUG_CONTROL_NAME, + strlcpy(ccdbg_control.ctl_name, TCP_CC_CONTROL_NAME, sizeof(ccdbg_control.ctl_name)); ccdbg_control.ctl_connect = tcp_ccdbg_control_connect; ccdbg_control.ctl_disconnect = tcp_ccdbg_control_disconnect; ccdbg_control.ctl_flags |= CTL_FLAG_PRIVILEGED; ccdbg_control.ctl_flags |= CTL_FLAG_REG_SOCK_STREAM; + ccdbg_control.ctl_sendsize = 32 * 1024; err = ctl_register(&ccdbg_control, &tcp_ccdbg_ctlref); if (err != 0) { @@ -255,8 +221,6 @@ tcp_ccdbg_trace(struct tcpcb *tp, struct tcphdr *th, int32_t event) tp->t_ccstate->cub_last_max; dbg_state.u.cubic_state.ccd_tcp_win = tp->t_ccstate->cub_tcp_win; - dbg_state.u.cubic_state.ccd_target_win = - tp->t_ccstate->cub_target_win; dbg_state.u.cubic_state.ccd_avg_lastmax = tp->t_ccstate->cub_avg_lastmax; dbg_state.u.cubic_state.ccd_mean_deviation = @@ -317,9 +281,6 @@ tcp_bad_rexmt_fix_sndbuf(struct tcpcb *tp) /* * Calculate initial cwnd according to RFC3390. - * - * Keep the old ss_fltsz sysctl for ABI compabitility issues. - * but it will be overriden if tcp_do_rfc3390 sysctl when it is set. */ void tcp_cc_cwnd_init_or_reset(struct tcpcb *tp) @@ -327,12 +288,12 @@ tcp_cc_cwnd_init_or_reset(struct tcpcb *tp) if (tp->t_flags & TF_LOCAL) { tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local; } else { - /* initial congestion window according to RFC 3390 */ - if (tcp_do_rfc3390) { + if (tcp_cubic_minor_fixes) { + tp->snd_cwnd = tcp_initial_cwnd(tp); + } else { + /* initial congestion window according to RFC 3390 */ tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, TCP_CC_CWND_INIT_BYTES)); - } else { - tp->snd_cwnd = tp->t_maxseg * ss_fltsz; } } } @@ -340,7 +301,7 @@ tcp_cc_cwnd_init_or_reset(struct tcpcb *tp) /* * Indicate whether this ack should be delayed. * Here is the explanation for different settings of tcp_delack_enabled: - * - when set to 1, the bhavior is same as when set to 2. We kept this + * - when set to 1, the behavior is same as when set to 2. We kept this * for binary compatibility. * - when set to 2, will "ack every other packet" * - if our last ack wasn't a 0-sized window. @@ -369,12 +330,50 @@ tcp_cc_delay_ack(struct tcpcb *tp, struct tcphdr *th) } break; case 3: - if ((tp->t_flags & TF_RXWIN0SENT) == 0 && - (th->th_flags & TH_PUSH) == 0 && - ((tp->t_unacksegs == 1) || - ((tp->t_flags & TF_STRETCHACK) != 0 && - tp->t_unacksegs < (maxseg_unacked)))) { - return 1; + if (tcp_ack_strategy == TCP_ACK_STRATEGY_LEGACY) { + if ((tp->t_flags & TF_RXWIN0SENT) == 0 && + (th->th_flags & TH_PUSH) == 0 && + ((tp->t_unacksegs == 1) || + ((tp->t_flags & TF_STRETCHACK) && + tp->t_unacksegs < maxseg_unacked))) { + return 1; + } + } else { + uint32_t recwin; + + /* Get the receive-window we would announce */ + recwin = tcp_sbspace(tp); + if (recwin > (uint32_t)(TCP_MAXWIN << tp->rcv_scale)) { + recwin = (uint32_t)(TCP_MAXWIN << tp->rcv_scale); + } + + /* Delay ACK, if: + * + * 1. We are not sending a zero-window + * 2. We are not forcing fast ACKs + * 3. We have more than the low-water mark in receive-buffer + * 4. The receive-window is not increasing + * 5. We have less than or equal of an MSS unacked or + * Window actually has been growing larger than the initial value by half of it. + * (this makes sure that during ramp-up we ACK every second MSS + * until we pass the tcp_recvspace * 1.5-threshold) + * 6. We haven't waited for half a BDP + * + * (a note on 6: The receive-window is + * roughly 2 BDP. Thus, recwin / 4 means half a BDP and + * thus we enforce an ACK roughly twice per RTT - even + * if the app does not read) + */ + if ((tp->t_flags & TF_RXWIN0SENT) == 0 && + tp->t_forced_acks == 0 && + tp->t_inpcb->inp_socket->so_rcv.sb_cc > tp->t_inpcb->inp_socket->so_rcv.sb_lowat && + recwin <= tp->t_last_recwin && + (tp->rcv_nxt - tp->last_ack_sent <= tp->t_maxseg || + recwin > (uint32_t)(tcp_recvspace + (tcp_recvspace >> 1))) && + (tp->rcv_nxt - tp->last_ack_sent) < (recwin >> 2)) { + tp->t_stat.acks_delayed++; + return 1; + } } break; } @@ -466,7 +465,11 @@ tcp_cc_adjust_nonvalidated_cwnd(struct tcpcb *tp) tp->t_pipeack = tcp_get_max_pipeack(tp); tcp_clear_pipeack_state(tp); tp->snd_cwnd = (max(tp->t_pipeack, tp->t_lossflightsize) >> 1); - tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES); + if (tcp_cubic_minor_fixes) { + tp->snd_cwnd = max(tp->snd_cwnd, tp->t_maxseg); + } else { + tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES); + } tp->snd_cwnd += tp->t_maxseg * tcprexmtthresh; tp->t_flagsext &= ~TF_CWND_NONVALIDATED; }