/*
- * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
#if INET6
#include <netinet6/in6_pcb.h>
#endif
#if TCPDEBUG
#include <netinet/tcp_debug.h>
#endif
+#include <netinet/tcp_log.h>
+
#include <sys/kdebug.h>
#include <mach/sdt.h>
#include <netinet/mptcp_var.h>
int error, s, tt;
tt = *(int *)arg1;
- s = tt * 1000 / TCP_RETRANSHZ;;
+ if (tt < 0 || tt >= INT_MAX / 1000) {
+ return EINVAL;
+ }
+ s = tt * 1000 / TCP_RETRANSHZ;
error = sysctl_handle_int(oidp, &s, 0, req);
if (error || !req->newptr) {
CTLFLAG_RW | CTLFLAG_LOCKED, int, tcp_pmtud_black_hole_mss, 1200,
"Path MTU Discovery Black Hole Detection lowered MSS");
+#if (DEBUG || DEVELOPMENT)
+int tcp_probe_if_fix_port = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, probe_if_fix_port,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+ &tcp_probe_if_fix_port, 0, "");
+#endif /* (DEBUG || DEVELOPMENT) */
+
static u_int32_t tcp_mss_rec_medium = 1200;
static u_int32_t tcp_mss_rec_low = 512;
static void tcp_sched_timerlist(uint32_t offset);
static u_int32_t tcp_run_conn_timer(struct tcpcb *tp, u_int16_t *mode,
u_int16_t probe_if_index);
-static void tcp_sched_timers(struct tcpcb *tp);
static inline void tcp_set_lotimer_index(struct tcpcb *);
__private_extern__ void tcp_remove_from_time_wait(struct inpcb *inp);
static inline void tcp_update_mss_core(struct tcpcb *tp, struct ifnet *ifp);
timer_diff(uint32_t t1, uint32_t toff1, uint32_t t2, uint32_t toff2)
{
return (int32_t)((t1 + toff1) - (t2 + toff2));
-};
+}
/*
* Add to tcp timewait list, delay is given in milliseconds.
active = TRUE;
goto out;
}
+ if (mpsotomppcb(mp_so)->mpp_inside > 0) {
+ os_log(mptcp_log_handle, "%s - %lx: Still inside %d usecount %d\n", __func__,
+ (unsigned long)VM_KERNEL_ADDRPERM(mpsotompte(mp_so)),
+ mpsotomppcb(mp_so)->mpp_inside,
+ mp_so->so_usecount);
+ socket_unlock(mp_so, 0);
+ mp_so = NULL;
+ active = TRUE;
+ goto out;
+ }
+ /* We call socket_unlock with refcount further below */
mp_so->so_usecount++;
+ tptomptp(tp)->mpt_mpte->mpte_mppcb->mpp_inside++;
}
/*
* is spurious.
*/
tcp_rexmt_save_state(tp);
+ tcp_ccdbg_trace(tp, NULL, TCP_CC_FIRST_REXMT);
}
#if MPTCP
if ((tp->t_rxtshift >= mptcp_fail_thresh) &&
mptcp_act_on_txfail(so);
}
- if (so->so_flags & SOF_MP_SUBFLOW) {
+ if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+ (so->so_flags & SOF_MP_SUBFLOW)) {
struct mptses *mpte = tptomptp(tp)->mpt_mpte;
- mptcp_check_subflows_and_add(mpte);
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
+ mptcp_check_subflows_and_add(mpte);
+ }
}
#endif /* MPTCP */
tp->t_flagsext &= ~(TF_DELAY_RECOVERY);
}
- if (tp->t_state == TCPS_SYN_RECEIVED) {
+ if (!(tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) &&
+ tp->t_state == TCPS_SYN_RECEIVED) {
tcp_disable_tfo(tp);
}
- if (!(tp->t_tfo_flags & TFO_F_HEURISTIC_DONE) &&
+ if (!(tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) &&
+ !(tp->t_tfo_flags & TFO_F_HEURISTIC_DONE) &&
(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT) &&
!(tp->t_tfo_flags & TFO_F_NO_SNDPROBING) &&
((tp->t_state != TCPS_SYN_SENT && tp->t_rxtshift > 1) ||
tcp_heuristic_tfo_middlebox(tp);
so->so_error = ENODATA;
+ soevent(so,
+ (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MP_SUB_ERROR));
sorwakeup(so);
sowwakeup(so);
tcpstat.tcps_tfo_sndblackhole++;
}
- if (!(tp->t_tfo_flags & TFO_F_HEURISTIC_DONE) &&
+ if (!(tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) &&
+ !(tp->t_tfo_flags & TFO_F_HEURISTIC_DONE) &&
(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED) &&
tp->t_rxtshift > 3) {
if (TSTMP_GT(tp->t_sndtime - 10 * TCP_RETRANSHZ, tp->t_rcvtime)) {
tcp_heuristic_tfo_middlebox(tp);
so->so_error = ENODATA;
+ soevent(so,
+ (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MP_SUB_ERROR));
sorwakeup(so);
sowwakeup(so);
}
if (tp->t_state == TCPS_SYN_SENT) {
rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
tp->t_stat.synrxtshift = tp->t_rxtshift;
+ tp->t_stat.rxmitsyns++;
/* When retransmitting, disable TFO */
if (tfo_enabled(tp) &&
- (!(so->so_flags1 & SOF1_DATA_AUTHENTICATED) ||
- (tp->t_flagsext & TF_FASTOPEN_HEUR))) {
- tp->t_flagsext &= ~TF_FASTOPEN;
+ !(tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE)) {
+ tcp_disable_tfo(tp);
tp->t_tfo_flags |= TFO_F_SYN_LOSS;
}
} else {
TCP_ADD_REXMTSLOP(tp));
tp->t_timer[TCPT_REXMT] = OFFSET_FROM_START(tp, tp->t_rxtcur);
+ TCP_LOG_RTT_INFO(tp);
+
if (INP_WAIT_FOR_IF_FEEDBACK(tp->t_inpcb)) {
goto fc_output;
}
bzero(&tra, sizeof(tra));
tra.nocell = INP_NO_CELLULAR(inp);
tra.noexpensive = INP_NO_EXPENSIVE(inp);
+ tra.noconstrained = INP_NO_CONSTRAINED(inp);
tra.awdl_unrestricted = INP_AWDL_UNRESTRICTED(inp);
tra.intcoproc_allowed = INP_INTCOPROC_ALLOWED(inp);
+ tra.keep_alive = 1;
if (tp->t_inpcb->inp_flags & INP_BOUND_IF) {
tra.ifscope = tp->t_inpcb->inp_boundifp->if_index;
} else {
tp->t_rtimo_probes++;
}
}
+
+ TCP_LOG_KEEP_ALIVE(tp, idle_time);
+
tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
TCP_CONN_KEEPINTVL(tp));
} else {
tp->t_timer[TCPT_KEEP] = min(OFFSET_FROM_START(
tp, tcp_backoff[ind] * TCP_REXMTVAL(tp)),
tp->t_timer[TCPT_KEEP]);
- } else if (!(tp->t_tfo_flags & TFO_F_HEURISTIC_DONE) &&
+ } else if (!(tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) &&
+ !(tp->t_tfo_flags & TFO_F_HEURISTIC_DONE) &&
tp->t_tfo_probe_state == TFO_PROBE_WAIT_DATA) {
/* Still no data! Let's assume a TFO-error and err out... */
tcp_heuristic_tfo_middlebox(tp);
so->so_error = ENODATA;
+ soevent(so,
+ (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MP_SUB_ERROR));
sorwakeup(so);
tp->t_tfo_stats |= TFO_S_RECV_BLACKHOLE;
tcpstat.tcps_tfo_blackhole++;
(void) tcp_output(tp);
}
break;
+ case TCPT_CELLICON:
+ {
+ struct mptses *mpte = tptomptp(tp)->mpt_mpte;
+
+ tp->t_timer[TCPT_CELLICON] = 0;
+
+ if (mpte->mpte_cellicon_increments == 0) {
+ /* Cell-icon not set by this connection */
+ break;
+ }
+
+ if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) {
+ mptcp_unset_cellicon(mpte, NULL, 1);
+ }
+
+ if (mpte->mpte_cellicon_increments) {
+ tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE);
+ }
+
+ break;
+ }
#endif /* MPTCP */
case TCPT_PTO:
{
- int32_t snd_len;
- tp->t_flagsext &= ~(TF_SENT_TLPROBE);
+ int32_t ret = 0;
+ if (!(tp->t_flagsext & TF_IF_PROBING)) {
+ tp->t_flagsext &= ~(TF_SENT_TLPROBE);
+ }
/*
* Check if the connection is in the right state to
* send a probe
*/
- if (tp->t_state != TCPS_ESTABLISHED ||
- (tp->t_rxtshift > 0 && !(tp->t_flagsext & TF_PROBING)) ||
+ if ((tp->t_state != TCPS_ESTABLISHED ||
+ tp->t_rxtshift > 0 ||
tp->snd_max == tp->snd_una ||
!SACK_ENABLED(tp) ||
!TAILQ_EMPTY(&tp->snd_holes) ||
- IN_FASTRECOVERY(tp)) {
+ IN_FASTRECOVERY(tp)) &&
+ !(tp->t_flagsext & TF_IF_PROBING)) {
break;
}
/*
- * If there is no new data to send or if the
- * connection is limited by receive window then
- * retransmit the last segment, otherwise send
- * new data.
+ * When the interface state is changed explicitly reset the retransmission
+ * timer state for both SYN and data packets because we do not want to
+ * wait unnecessarily or timeout too quickly if the link characteristics
+ * have changed drastically
*/
- snd_len = min(so->so_snd.sb_cc, tp->snd_wnd)
- - (tp->snd_max - tp->snd_una);
- if (snd_len > 0) {
- tp->snd_nxt = tp->snd_max;
+ if (tp->t_flagsext & TF_IF_PROBING) {
+ tp->t_rxtshift = 0;
+ if (tp->t_state == TCPS_SYN_SENT) {
+ tp->t_stat.synrxtshift = tp->t_rxtshift;
+ }
+ /*
+ * Reset to the the default RTO
+ */
+ tp->t_srtt = TCPTV_SRTTBASE;
+ tp->t_rttvar =
+ ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
+ tp->t_rttmin = tp->t_flags & TF_LOCAL ? tcp_TCPTV_MIN :
+ TCPTV_REXMTMIN;
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+ tp->t_rttmin, TCPTV_REXMTMAX, TCP_ADD_REXMTSLOP(tp));
+ TCP_LOG_RTT_INFO(tp);
+ }
+
+ if (tp->t_state == TCPS_SYN_SENT) {
+ /*
+ * The PTO for SYN_SENT reinitializes TCP as if it was a fresh
+ * connection attempt
+ */
+ tp->snd_nxt = tp->snd_una;
+ /*
+ * Note: We overload snd_recover to function also as the
+ * snd_last variable described in RFC 2582
+ */
+ tp->snd_recover = tp->snd_max;
+ /*
+ * Force a segment to be sent.
+ */
+ tp->t_flags |= TF_ACKNOW;
+
+ /* If timing a segment in this window, stop the timer */
+ tp->t_rtttime = 0;
} else {
- snd_len = min((tp->snd_max - tp->snd_una),
- tp->t_maxseg);
- tp->snd_nxt = tp->snd_max - snd_len;
+ int32_t snd_len;
+
+ /*
+ * If there is no new data to send or if the
+ * connection is limited by receive window then
+ * retransmit the last segment, otherwise send
+ * new data.
+ */
+ snd_len = min(so->so_snd.sb_cc, tp->snd_wnd)
+ - (tp->snd_max - tp->snd_una);
+ if (snd_len > 0) {
+ tp->snd_nxt = tp->snd_max;
+ } else {
+ snd_len = min((tp->snd_max - tp->snd_una),
+ tp->t_maxseg);
+ tp->snd_nxt = tp->snd_max - snd_len;
+ }
}
tcpstat.tcps_pto++;
- if (tp->t_flagsext & TF_PROBING) {
+ if (tp->t_flagsext & TF_IF_PROBING) {
tcpstat.tcps_probe_if++;
}
/* If timing a segment in this window, stop the timer */
tp->t_rtttime = 0;
- /* Note that tail loss probe is being sent */
- tp->t_flagsext |= TF_SENT_TLPROBE;
- tp->t_tlpstart = tcp_now;
+ /* Note that tail loss probe is being sent. Exclude IF probe */
+ if (!(tp->t_flagsext & TF_IF_PROBING)) {
+ tp->t_flagsext |= TF_SENT_TLPROBE;
+ tp->t_tlpstart = tcp_now;
+ }
tp->snd_cwnd += tp->t_maxseg;
-
/*
* When tail-loss-probe fires, we reset the RTO timer, because
* a probe just got sent, so we are good to push out the timer.
* Set to 0 to ensure that tcp_output() will reschedule it
*/
tp->t_timer[TCPT_REXMT] = 0;
+ ret = tcp_output(tp);
+
+#if (DEBUG || DEVELOPMENT)
+ if ((tp->t_flagsext & TF_IF_PROBING) &&
+ ((IFNET_IS_COMPANION_LINK(tp->t_inpcb->inp_last_outifp)) ||
+ tp->t_state == TCPS_SYN_SENT)) {
+ if (ret == 0 && tcp_probe_if_fix_port > 0 &&
+ tcp_probe_if_fix_port <= IPPORT_HILASTAUTO) {
+ tp->t_timer[TCPT_REXMT] = 0;
+ tcp_set_lotimer_index(tp);
+ }
+
+ os_log(OS_LOG_DEFAULT,
+ "%s: sent %s probe for %u > %u on interface %s"
+ " (%u) %s(%d)",
+ __func__,
+ tp->t_state == TCPS_SYN_SENT ? "SYN" : "data",
+ ntohs(tp->t_inpcb->inp_lport),
+ ntohs(tp->t_inpcb->inp_fport),
+ if_name(tp->t_inpcb->inp_last_outifp),
+ tp->t_inpcb->inp_last_outifp->if_index,
+ ret == 0 ? "succeeded" :"failed", ret);
+ }
+#endif /* DEBUG || DEVELOPMENT */
- (void)tcp_output(tp);
+ /*
+ * When the connection is not idle, make sure the retransmission timer
+ * is armed because it was set to zero above
+ */
+ if ((tp->t_timer[TCPT_REXMT] == 0 || tp->t_timer[TCPT_PERSIST] == 0) &&
+ (tp->t_inpcb->inp_socket->so_snd.sb_cc != 0 || tp->t_state == TCPS_SYN_SENT ||
+ tp->t_state == TCPS_SYN_RECEIVED)) {
+ tp->t_timer[TCPT_REXMT] =
+ OFFSET_FROM_START(tp, tp->t_rxtcur);
+
+ os_log(OS_LOG_DEFAULT,
+ "%s: tcp_output() returned %u with retransmission timer disabled "
+ "for %u > %u in state %d, reset timer to %d",
+ __func__, ret,
+ ntohs(tp->t_inpcb->inp_lport),
+ ntohs(tp->t_inpcb->inp_fport),
+ tp->t_state,
+ tp->t_timer[TCPT_REXMT]);
+
+ tcp_check_timer_state(tp);
+ }
tp->snd_cwnd -= tp->t_maxseg;
- tp->t_tlphighrxt = tp->snd_nxt;
+ if (!(tp->t_flagsext & TF_IF_PROBING)) {
+ tp->t_tlphighrxt = tp->snd_nxt;
+ }
break;
}
case TCPT_DELAYFR:
* If this connection is over an interface that needs to
* be probed, send probe packets to reinitiate communication.
*/
- if (probe_if_index > 0 && tp->t_inpcb->inp_last_outifp != NULL &&
- tp->t_inpcb->inp_last_outifp->if_index == probe_if_index) {
- tp->t_flagsext |= TF_PROBING;
+ if (TCP_IF_STATE_CHANGED(tp, probe_if_index)) {
+ tp->t_flagsext |= TF_IF_PROBING;
tcp_timers(tp, TCPT_PTO);
tp->t_timer[TCPT_PTO] = 0;
- tp->t_flagsext &= ~TF_PROBING;
+ tp->t_flagsext &= ~TF_IF_PROBING;
}
/*
LIST_FOREACH_SAFE(te, &listp->lhead, le, next_te) {
uint32_t offset = 0;
uint32_t runtime = te->runtime;
- if (te->index < TCPT_NONE && TSTMP_GT(runtime, tcp_now)) {
+
+ tp = TIMERENTRY_TO_TP(te);
+
+ /*
+ * An interface probe may need to happen before the previously scheduled runtime
+ */
+ if (te->index < TCPT_NONE && TSTMP_GT(runtime, tcp_now) &&
+ !TCP_IF_STATE_CHANGED(tp, listp->probe_if_index)) {
offset = timer_diff(runtime, 0, tcp_now, 0);
if (next_timer == 0 || offset < next_timer) {
next_timer = offset;
continue;
}
- tp = TIMERENTRY_TO_TP(te);
-
/*
* Acquire an inp wantcnt on the inpcb so that the socket
* won't get detached even if tcp_close is called
calculate_tcp_clock();
lck_mtx_lock(listp->mtx);
- if (listp->probe_if_index > 0) {
+ if (listp->probe_if_index > 0 && listp->probe_if_index != probe_if_index) {
tcpstat.tcps_probe_if_conflict++;
+ os_log(OS_LOG_DEFAULT,
+ "%s: probe_if_index %u conflicts with %u, tcps_probe_if_conflict %u\n",
+ __func__, probe_if_index, listp->probe_if_index,
+ tcpstat.tcps_probe_if_conflict);
goto done;
}
listp->probe_if_index = probe_if_index;
if (listp->running) {
+ os_log(OS_LOG_DEFAULT, "%s: timer list already running for if_index %u\n",
+ __func__, probe_if_index);
goto done;
}
diff = timer_diff(listp->runtime, 0, tcp_now, offset);
if (diff <= 0) {
/* The timer will fire sooner than what's needed */
+ os_log(OS_LOG_DEFAULT,
+ "%s: timer will fire sooner than needed for if_index %u\n",
+ __func__, probe_if_index);
goto done;
}
}