/*
- * Copyright (c) 2013-2017 Apple Inc. All rights reserved.
+ * Copyright (c) 2013-2018 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <mach/sdt.h>
#include <libkern/OSAtomic.h>
-struct tcp_cc_debug_state {
- u_int64_t ccd_tsns;
- char ccd_srcaddr[INET6_ADDRSTRLEN];
- uint16_t ccd_srcport;
- char ccd_destaddr[INET6_ADDRSTRLEN];
- uint16_t ccd_destport;
- uint32_t ccd_snd_cwnd;
- uint32_t ccd_snd_wnd;
- uint32_t ccd_snd_ssthresh;
- uint32_t ccd_pipeack;
- uint32_t ccd_rttcur;
- uint32_t ccd_rxtcur;
- uint32_t ccd_srtt;
- uint32_t ccd_event;
- uint32_t ccd_sndcc;
- uint32_t ccd_sndhiwat;
- uint32_t ccd_bytes_acked;
- u_int8_t ccd_cc_index;
- u_int8_t ccd_unused_1__;
- u_int16_t ccd_unused_2__;
- union {
- struct {
- uint32_t ccd_last_max;
- uint32_t ccd_tcp_win;
- uint32_t ccd_target_win;
- uint32_t ccd_avg_lastmax;
- uint32_t ccd_mean_deviation;
- } cubic_state;
- struct {
- u_int32_t led_base_rtt;
- } ledbat_state;
- } u;
-};
-
-SYSCTL_SKMEM_TCP_INT(OID_AUTO, cc_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
- int, tcp_cc_debug, 0, "Enable debug data collection");
+static int tcp_cc_debug;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, cc_debug, CTLFLAG_RW | CTLFLAG_LOCKED,
+ &tcp_cc_debug, 0, "Enable debug data collection");
extern struct tcp_cc_algo tcp_cc_newreno;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno_sockets,
struct tcp_cc_algo* tcp_cc_algo_list[TCP_CC_ALGO_COUNT];
struct zone *tcp_cc_zone;
-/* Information for colelcting TCP debug information using control socket */
-#define TCP_CCDEBUG_CONTROL_NAME "com.apple.network.tcp_ccdebug"
#define TCP_CCDBG_NOUNIT 0xffffffff
static kern_ctl_ref tcp_ccdbg_ctlref = NULL;
volatile UInt32 tcp_ccdbg_unit = TCP_CCDBG_NOUNIT;
errno_t err;
bzero(&ccdbg_control, sizeof(ccdbg_control));
- strlcpy(ccdbg_control.ctl_name, TCP_CCDEBUG_CONTROL_NAME,
+ strlcpy(ccdbg_control.ctl_name, TCP_CC_CONTROL_NAME,
sizeof(ccdbg_control.ctl_name));
ccdbg_control.ctl_connect = tcp_ccdbg_control_connect;
ccdbg_control.ctl_disconnect = tcp_ccdbg_control_disconnect;
ccdbg_control.ctl_flags |= CTL_FLAG_PRIVILEGED;
ccdbg_control.ctl_flags |= CTL_FLAG_REG_SOCK_STREAM;
+ ccdbg_control.ctl_sendsize = 32 * 1024;
err = ctl_register(&ccdbg_control, &tcp_ccdbg_ctlref);
if (err != 0) {
tp->t_ccstate->cub_last_max;
dbg_state.u.cubic_state.ccd_tcp_win =
tp->t_ccstate->cub_tcp_win;
- dbg_state.u.cubic_state.ccd_target_win =
- tp->t_ccstate->cub_target_win;
dbg_state.u.cubic_state.ccd_avg_lastmax =
tp->t_ccstate->cub_avg_lastmax;
dbg_state.u.cubic_state.ccd_mean_deviation =
/*
* Calculate initial cwnd according to RFC3390.
- *
- * Keep the old ss_fltsz sysctl for ABI compabitility issues.
- * but it will be overriden if tcp_do_rfc3390 sysctl when it is set.
*/
void
tcp_cc_cwnd_init_or_reset(struct tcpcb *tp)
if (tp->t_flags & TF_LOCAL) {
tp->snd_cwnd = tp->t_maxseg * ss_fltsz_local;
} else {
- /* initial congestion window according to RFC 3390 */
- if (tcp_do_rfc3390) {
+ if (tcp_cubic_minor_fixes) {
+ tp->snd_cwnd = tcp_initial_cwnd(tp);
+ } else {
+ /* initial congestion window according to RFC 3390 */
tp->snd_cwnd = min(4 * tp->t_maxseg,
max(2 * tp->t_maxseg, TCP_CC_CWND_INIT_BYTES));
- } else {
- tp->snd_cwnd = tp->t_maxseg * ss_fltsz;
}
}
}
/*
* Indicate whether this ack should be delayed.
* Here is the explanation for different settings of tcp_delack_enabled:
- * - when set to 1, the bhavior is same as when set to 2. We kept this
+ * - when set to 1, the behavior is same as when set to 2. We kept this
* for binary compatibility.
* - when set to 2, will "ack every other packet"
* - if our last ack wasn't a 0-sized window.
}
break;
case 3:
- if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
- (th->th_flags & TH_PUSH) == 0 &&
- ((tp->t_unacksegs == 1) ||
- ((tp->t_flags & TF_STRETCHACK) != 0 &&
- tp->t_unacksegs < (maxseg_unacked)))) {
- return 1;
+ if (tcp_ack_strategy == TCP_ACK_STRATEGY_LEGACY) {
+ if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
+ (th->th_flags & TH_PUSH) == 0 &&
+ ((tp->t_unacksegs == 1) ||
+ ((tp->t_flags & TF_STRETCHACK) &&
+ tp->t_unacksegs < maxseg_unacked))) {
+ return 1;
+ }
+ } else {
+ uint32_t recwin;
+
+ /* Get the receive-window we would announce */
+ recwin = tcp_sbspace(tp);
+ if (recwin > (uint32_t)(TCP_MAXWIN << tp->rcv_scale)) {
+ recwin = (uint32_t)(TCP_MAXWIN << tp->rcv_scale);
+ }
+
+ /* Delay ACK, if:
+ *
+ * 1. We are not sending a zero-window
+ * 2. We are not forcing fast ACKs
+ * 3. We have more than the low-water mark in receive-buffer
+ * 4. The receive-window is not increasing
+ * 5. We have less than or equal of an MSS unacked or
+ * Window actually has been growing larger than the initial value by half of it.
+ * (this makes sure that during ramp-up we ACK every second MSS
+ * until we pass the tcp_recvspace * 1.5-threshold)
+ * 6. We haven't waited for half a BDP
+ *
+ * (a note on 6: The receive-window is
+ * roughly 2 BDP. Thus, recwin / 4 means half a BDP and
+ * thus we enforce an ACK roughly twice per RTT - even
+ * if the app does not read)
+ */
+ if ((tp->t_flags & TF_RXWIN0SENT) == 0 &&
+ tp->t_forced_acks == 0 &&
+ tp->t_inpcb->inp_socket->so_rcv.sb_cc > tp->t_inpcb->inp_socket->so_rcv.sb_lowat &&
+ recwin <= tp->t_last_recwin &&
+ (tp->rcv_nxt - tp->last_ack_sent <= tp->t_maxseg ||
+ recwin > (uint32_t)(tcp_recvspace + (tcp_recvspace >> 1))) &&
+ (tp->rcv_nxt - tp->last_ack_sent) < (recwin >> 2)) {
+ tp->t_stat.acks_delayed++;
+ return 1;
+ }
}
break;
}
tp->t_pipeack = tcp_get_max_pipeack(tp);
tcp_clear_pipeack_state(tp);
tp->snd_cwnd = (max(tp->t_pipeack, tp->t_lossflightsize) >> 1);
- tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES);
+ if (tcp_cubic_minor_fixes) {
+ tp->snd_cwnd = max(tp->snd_cwnd, tp->t_maxseg);
+ } else {
+ tp->snd_cwnd = max(tp->snd_cwnd, TCP_CC_CWND_INIT_BYTES);
+ }
tp->snd_cwnd += tp->t_maxseg * tcprexmtthresh;
tp->t_flagsext &= ~TF_CWND_NONVALIDATED;
}