/*
- * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2018 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
int mptcp_enable = 1;
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_enable, 0, "Enable Multipath TCP Support");
+ &mptcp_enable, 0, "Enable Multipath TCP Support");
-/* Number of times to try negotiating MPTCP on SYN retransmissions */
-int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES;
+/*
+ * Number of times to try negotiating MPTCP on SYN retransmissions.
+ * We haven't seen any reports of a middlebox that is dropping all SYN-segments
+ * that have an MPTCP-option. Thus, let's be generous and retransmit it 4 times.
+ */
+int mptcp_mpcap_retries = 4;
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
- CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
+ CTLFLAG_RW | CTLFLAG_LOCKED,
+ &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
/*
* By default, DSS checksum is turned off, revisit if we ever do
*/
int mptcp_dss_csum = 0;
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_dss_csum, 0, "Enable DSS checksum");
+ &mptcp_dss_csum, 0, "Enable DSS checksum");
/*
* When mptcp_fail_thresh number of retransmissions are sent, subflow failover
*/
int mptcp_fail_thresh = 1;
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_fail_thresh, 0, "Failover threshold");
-
+ &mptcp_fail_thresh, 0, "Failover threshold");
/*
* MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime
* as carrier networks mostly have a 30 minute to 60 minute NAT Timeout.
* Some carrier networks have a timeout of 10 or 15 minutes.
*/
-int mptcp_subflow_keeptime = 60*14;
+int mptcp_subflow_keeptime = 60 * 14;
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
+ &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
int mptcp_rtthist_rtthresh = 600;
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
-
-/*
- * Use RTO history for sending new data
- */
-int mptcp_use_rto = 1;
-SYSCTL_INT(_net_inet_mptcp, OID_AUTO, userto, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_use_rto, 0, "Disable RTO for subflow selection");
+ &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
int mptcp_rtothresh = 1500;
SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_rtothresh, 0, "RTO threshold");
+ &mptcp_rtothresh, 0, "RTO threshold");
/*
* Probe the preferred path, when it is not in use
*/
uint32_t mptcp_probeto = 1000;
SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_probeto, 0, "Disable probing by setting to 0");
+ &mptcp_probeto, 0, "Disable probing by setting to 0");
uint32_t mptcp_probecnt = 5;
SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED,
- &mptcp_probecnt, 0, "Number of probe writes");
-
-/*
- * Static declarations
- */
-static uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, uint64_t,
- uint32_t, uint16_t, uint16_t);
+ &mptcp_probecnt, 0, "Number of probe writes");
static int
mptcp_reass_present(struct socket *mp_so)
{
- struct mptcb *mp_tp = mpsotomppcb(mp_so)->mpp_pcbe->mpte_mptcb;
+ struct mptses *mpte = mpsotompte(mp_so);
+ struct mptcb *mp_tp = mpte->mpte_mptcb;
struct tseg_qent *q;
int dowakeup = 0;
+ int flags = 0;
/*
* Present data to user, advancing rcv_nxt through
* completed sequence space.
*/
- if (mp_tp->mpt_state < MPTCPS_ESTABLISHED)
- return (0);
+ if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
+ return flags;
+ }
q = LIST_FIRST(&mp_tp->mpt_segq);
- if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt)
- return (0);
+ if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) {
+ return flags;
+ }
/*
* If there is already another thread doing reassembly for this
* connection, it is better to let it finish the job --
* (radar 16316196)
*/
- if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG)
- return (0);
+ if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) {
+ return flags;
+ }
mp_tp->mpt_flags |= MPTCPF_REASS_INPROG;
if (mp_so->so_state & SS_CANTRCVMORE) {
m_freem(q->tqe_m);
} else {
- if (sbappendstream(&mp_so->so_rcv, q->tqe_m))
+ flags = !!(q->tqe_m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
+ if (sbappendstream_rcvdemux(mp_so, q->tqe_m)) {
dowakeup = 1;
+ }
}
zfree(tcp_reass_zone, q);
mp_tp->mpt_reassqlen--;
} while (q && q->tqe_m->m_pkthdr.mp_dsn == mp_tp->mpt_rcvnxt);
mp_tp->mpt_flags &= ~MPTCPF_REASS_INPROG;
- if (dowakeup)
+ if (dowakeup) {
sorwakeup(mp_so); /* done with socket lock held */
- return (0);
-
+ }
+ return flags;
}
static int
struct tseg_qent *p = NULL;
struct tseg_qent *nq;
struct tseg_qent *te = NULL;
- u_int16_t qlimit;
+ uint32_t qlimit;
/*
* Limit the number of segments in the reassembly queue to prevent
* queue. Always keep one global queue entry spare to be able to
* process the missing segment.
*/
- qlimit = min(max(100, mp_so->so_rcv.sb_hiwat >> 10),
+ qlimit = MIN(MAX(100, mp_so->so_rcv.sb_hiwat >> 10),
(tcp_autorcvbuf_max >> 10));
if (mb_dsn != mp_tp->mpt_rcvnxt &&
(mp_tp->mpt_reassqlen + 1) >= qlimit) {
tcpstat.tcps_mptcp_rcvmemdrop++;
m_freem(m);
*tlenp = 0;
- return (0);
+ return 0;
}
/* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
if (te == NULL) {
tcpstat.tcps_mptcp_rcvmemdrop++;
m_freem(m);
- return (0);
+ return 0;
}
mp_tp->mpt_reassqlen++;
* Find a segment which begins after this one does.
*/
LIST_FOREACH(q, &mp_tp->mpt_segq, tqe_q) {
- if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn))
+ if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) {
break;
+ }
p = q;
}
*/
goto out;
}
- m_adj(m, i);
+ VERIFY(i <= INT_MAX);
+ m_adj(m, (int)i);
*tlenp -= i;
phdr->mp_dsn += i;
}
*/
while (q) {
int64_t i = (mb_dsn + *tlenp) - q->tqe_m->m_pkthdr.mp_dsn;
- if (i <= 0)
+ if (i <= 0) {
break;
+ }
if (i < q->tqe_len) {
q->tqe_m->m_pkthdr.mp_dsn += i;
q->tqe_len -= i;
- m_adj(q->tqe_m, i);
+
+ VERIFY(i <= INT_MAX);
+ m_adj(q->tqe_m, (int)i);
break;
}
}
out:
- return (mptcp_reass_present(mp_so));
+ return mptcp_reass_present(mp_so);
}
/*
VERIFY(m->m_flags & M_PKTHDR);
- mpte_lock_assert_held(mpte); /* same as MP socket lock */
-
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
+ socket_lock_assert_owned(mp_so);
+
DTRACE_MPTCP(input);
mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
* In the degraded fallback case, data is accepted without DSS map
*/
if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
+ struct mbuf *iter;
+ int mb_dfin = 0;
fallback:
mptcp_sbrcv_grow(mp_tp);
+ iter = m;
+ while (iter) {
+ if ((iter->m_flags & M_PKTHDR) &&
+ (iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
+ mb_dfin = 1;
+ }
+
+ if ((iter->m_flags & M_PKTHDR) && m_pktlen(iter) == 0) {
+ /* Don't add zero-length packets, so jump it! */
+ if (prev == NULL) {
+ m = iter->m_next;
+ m_free(iter);
+ iter = m;
+ } else {
+ prev->m_next = iter->m_next;
+ m_free(iter);
+ iter = prev->m_next;
+ }
+
+ /* It was a zero-length packet so next one must be a pkthdr */
+ VERIFY(iter == NULL || iter->m_flags & M_PKTHDR);
+ } else {
+ prev = iter;
+ iter = iter->m_next;
+ }
+ }
+
/*
* assume degraded flow as this may be the first packet
* without DSS, and the subflow state is not updated yet.
*/
- if (sbappendstream(&mp_so->so_rcv, m))
+ if (sbappendstream_rcvdemux(mp_so, m)) {
sorwakeup(mp_so);
+ }
+
DTRACE_MPTCP5(receive__degraded, struct mbuf *, m,
struct socket *, mp_so,
struct sockbuf *, &mp_so->so_rcv,
struct sockbuf *, &mp_so->so_snd,
struct mptses *, mpte);
count = mp_so->so_rcv.sb_cc - count;
- mptcplog((LOG_DEBUG, "%s: Fallback read %d bytes\n", __func__,
- count), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
+
+ mp_tp->mpt_rcvnxt += count;
+
+ if (mb_dfin) {
+ mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
+ socantrcvmore(mp_so);
+ }
return;
}
u_int64_t mb_dsn;
int32_t mb_datalen;
int64_t todrop;
+ int mb_dfin = 0;
+
+ VERIFY(m->m_flags & M_PKTHDR);
/* If fallback occurs, mbufs will not have PKTF_MPTCP set */
- if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP))
+ if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
goto fallback;
+ }
save = m->m_next;
/*
prev = save;
save = save->m_next;
}
- if (prev)
+ if (prev) {
prev->m_next = NULL;
- else
+ } else {
m->m_next = NULL;
+ }
mb_dsn = m->m_pkthdr.mp_dsn;
mb_datalen = m->m_pkthdr.mp_rlen;
if (todrop > 0) {
tcpstat.tcps_mptcp_rcvpackafterwin++;
+ os_log_info(mptcp_log_handle, "%s - %lx: dropping dsn %u dlen %u rcvnxt %u rcvwnd %u todrop %lld\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+ (uint32_t)mb_dsn, mb_datalen, (uint32_t)mp_tp->mpt_rcvnxt,
+ mp_tp->mpt_rcvwnd, todrop);
+
if (todrop >= mb_datalen) {
- if (freelist == NULL)
+ if (freelist == NULL) {
freelist = m;
- else
+ } else {
tail->m_next = m;
+ }
- if (prev != NULL)
+ if (prev != NULL) {
tail = prev;
- else
+ } else {
tail = m;
+ }
m = save;
prev = save = NULL;
continue;
} else {
- m_adj(m, -todrop);
+ VERIFY(todrop <= INT_MAX);
+ m_adj(m, (int)-todrop);
mb_datalen -= todrop;
+ m->m_pkthdr.mp_rlen -= todrop;
}
- }
- if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) ||
- !LIST_EMPTY(&mp_tp->mpt_segq)) {
- mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m);
-
- goto next;
+ /*
+ * We drop from the right edge of the mbuf, thus the
+ * DATA_FIN is dropped as well
+ */
+ m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN;
}
if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvnxt)) {
if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen),
mp_tp->mpt_rcvnxt)) {
- if (freelist == NULL)
+ if (freelist == NULL) {
freelist = m;
- else
+ } else {
tail->m_next = m;
+ }
- if (prev != NULL)
+ if (prev != NULL) {
tail = prev;
- else
+ } else {
tail = m;
+ }
m = save;
prev = save = NULL;
continue;
} else {
- m_adj(m, (mp_tp->mpt_rcvnxt - mb_dsn));
+ VERIFY((mp_tp->mpt_rcvnxt - mb_dsn) <= INT_MAX);
+ m_adj(m, (int)(mp_tp->mpt_rcvnxt - mb_dsn));
+ mb_datalen -= (mp_tp->mpt_rcvnxt - mb_dsn);
+ mb_dsn = mp_tp->mpt_rcvnxt;
+ VERIFY(mb_datalen >= 0 && mb_datalen <= USHRT_MAX);
+ m->m_pkthdr.mp_rlen = (uint16_t)mb_datalen;
+ m->m_pkthdr.mp_dsn = mb_dsn;
}
- mptcplog((LOG_INFO, "%s: Left Edge %llu\n", __func__,
- mp_tp->mpt_rcvnxt),
- MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
}
+ if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) ||
+ !LIST_EMPTY(&mp_tp->mpt_segq)) {
+ mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m);
+
+ goto next;
+ }
+ mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
+
mptcp_sbrcv_grow(mp_tp);
- if (sbappendstream(&mp_so->so_rcv, m))
+ if (sbappendstream_rcvdemux(mp_so, m)) {
wakeup = 1;
+ }
DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so,
struct sockbuf *, &mp_so->so_rcv,
count = mp_so->so_rcv.sb_cc - count;
tcpstat.tcps_mp_rcvtotal++;
tcpstat.tcps_mp_rcvbytes += count;
- mptcplog((LOG_DEBUG, "%s: Read %d bytes\n", __func__, count),
- MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
mp_tp->mpt_rcvnxt += count;
next:
+ if (mb_dfin) {
+ mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
+ socantrcvmore(mp_so);
+ }
m = save;
prev = save = NULL;
count = mp_so->so_rcv.sb_cc;
} while (m);
- if (freelist)
+ if (freelist) {
m_freem(freelist);
+ }
- if (wakeup)
+ if (wakeup) {
sorwakeup(mp_so);
+ }
}
-static boolean_t
-mptcp_can_send_more(struct mptcb *mp_tp)
+boolean_t
+mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject)
{
struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
/*
* Always send if there is data in the reinject-queue.
*/
- if (mp_tp->mpt_mpte->mpte_reinjectq)
- return (TRUE);
+ if (!ignore_reinject && mp_tp->mpt_mpte->mpte_reinjectq) {
+ return TRUE;
+ }
/*
* Don't send, if:
* 3. snd_nxt + 1 == snd_max and we are closing: A DATA_FIN is scheduled.
*/
- if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax))
- return (FALSE);
+ if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) {
+ return FALSE;
+ }
- if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt))
- return (FALSE);
+ if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) {
+ return FALSE;
+ }
- if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT)
- return (FALSE);
+ if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
+ return FALSE;
+ }
- if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2)
- return (FALSE);
+ if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
+ return FALSE;
+ }
- return (TRUE);
+ return TRUE;
}
/*
uint64_t old_snd_nxt;
int error = 0;
- mpte_lock_assert_held(mpte);
mp_so = mptetoso(mpte);
mp_tp = mpte->mpte_mptcb;
+ socket_lock_assert_owned(mp_so);
+
+ if (mp_so->so_flags & SOF_DEFUNCT) {
+ return 0;
+ }
+
VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL));
mpte->mpte_mppcb->mpp_flags |= MPP_WUPCALL;
- mptcplog((LOG_DEBUG, "%s: snxt %u sndmax %u suna %u swnd %u reinjectq %u state %u\n",
- __func__, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_sndmax,
- (uint32_t)mp_tp->mpt_snduna, mp_tp->mpt_sndwnd,
- mpte->mpte_reinjectq ? 1 : 0,
- mp_tp->mpt_state),
- MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
-
old_snd_nxt = mp_tp->mpt_sndnxt;
- while (mptcp_can_send_more(mp_tp)) {
+ while (mptcp_can_send_more(mp_tp, FALSE)) {
/* get the "best" subflow to be used for transmission */
- mpts = mptcp_get_subflow(mpte, NULL, &preferred_mpts);
+ mpts = mptcp_get_subflow(mpte, &preferred_mpts);
if (mpts == NULL) {
mptcplog((LOG_INFO, "%s: no subflow\n", __func__),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
break;
}
- mptcplog((LOG_DEBUG, "%s: using id %u\n", __func__, mpts->mpts_connid),
- MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
-
/* In case there's just one flow, we reattempt later */
if (mpts_tried != NULL &&
(mpts == mpts_tried || (mpts->mpts_flags & MPTSF_FAILINGOVER))) {
mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER;
mpts_tried->mpts_flags |= MPTSF_ACTIVE;
mptcp_start_timer(mpte, MPTT_REXMT);
- mptcplog((LOG_DEBUG, "%s: retry later\n", __func__),
- MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
break;
}
* 2. send buffer is filled to 7/8th with data (so we actually
* have data to make use of it);
*/
- if (tcp_do_autosendbuf == 1 &&
- (mp_so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE &&
+ if ((mp_so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE &&
tcp_cansbgrow(&mp_so->so_snd)) {
if ((mp_tp->mpt_sndwnd / 4 * 5) >= mp_so->so_snd.sb_hiwat &&
mp_so->so_snd.sb_cc >= (mp_so->so_snd.sb_hiwat / 8 * 7)) {
min(mp_so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
tcp_autosndbuf_max)) == 1) {
mp_so->so_snd.sb_idealsize = mp_so->so_snd.sb_hiwat;
-
- mptcplog((LOG_DEBUG, "%s: increased snd hiwat to %u lowat %u\n",
- __func__, mp_so->so_snd.sb_hiwat,
- mp_so->so_snd.sb_lowat),
- MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
}
}
}
mpts->mpts_flags |= MPTSF_FAILINGOVER;
mpts->mpts_flags &= ~MPTSF_ACTIVE;
mpts_tried = mpts;
- mptcplog((LOG_ERR, "%s: Error = %d mpts_flags %#x\n", __func__,
- error, mpts->mpts_flags),
- MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
+ if (error != ECANCELED) {
+ os_log_error(mptcp_log_handle, "%s - %lx: Error = %d mpts_flags %#x\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+ error, mpts->mpts_flags);
+ }
break;
}
/* The model is to have only one active flow at a time */
if (mpte->mpte_active_sub == NULL) {
mpte->mpte_active_sub = mpts;
} else if (mpte->mpte_active_sub != mpts) {
- struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
- struct tcpcb *acttp = sototcpcb(mpte->mpte_active_sub->mpts_socket);
-
- mptcplog((LOG_DEBUG, "%s: switch [%u, srtt %d] to [%u, srtt %d]\n", __func__,
- mpte->mpte_active_sub->mpts_connid, acttp->t_srtt >> TCP_RTT_SHIFT,
- mpts->mpts_connid, tp->t_srtt >> TCP_RTT_SHIFT),
- (MPTCP_SENDER_DBG | MPTCP_SOCKET_DBG), MPTCP_LOGLVL_LOG);
-
mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE;
mpte->mpte_active_sub = mpts;
}
}
+ if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
+ if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax &&
+ mp_tp->mpt_snduna == mp_tp->mpt_sndnxt) {
+ mptcp_finish_usrclosed(mpte);
+ }
+ }
+
mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_WUPCALL);
/* subflow errors should not be percolated back up */
- return (0);
+ return 0;
}
*/
if (tp->t_srtt && *currtt > tp->t_srtt &&
(curbest == NULL || tp->t_rxtshift == 0 ||
- sototcpcb(curbest->mpts_socket)->t_rxtshift)) {
+ sototcpcb(curbest->mpts_socket)->t_rxtshift)) {
*currtt = tp->t_srtt;
- return (mpts);
+ return mpts;
}
/*
sototcpcb(curbest->mpts_socket)->t_rxtshift &&
tp->t_rxtshift == 0) {
*currtt = tp->t_srtt;
- return (mpts);
+ return mpts;
}
- return (curbest != NULL ? curbest : mpts);
+ return curbest != NULL ? curbest : mpts;
}
static struct mptsub *
mptcp_return_subflow(struct mptsub *mpts)
{
- if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0)
- return (NULL);
+ if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) {
+ return NULL;
+ }
+
+ return mpts;
+}
+
+static boolean_t
+mptcp_subflow_is_slow(struct mptses *mpte, struct mptsub *mpts)
+{
+ struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
+ int fail_thresh = mptcp_fail_thresh;
+
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
+ fail_thresh *= 2;
+ }
- return (mpts);
+ return tp->t_rxtshift >= fail_thresh &&
+ (mptetoso(mpte)->so_snd.sb_cc || mpte->mpte_reinjectq);
}
/*
* Return the most eligible subflow to be used for sending data.
*/
struct mptsub *
-mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **preferred)
+mptcp_get_subflow(struct mptses *mpte, struct mptsub **preferred)
{
struct tcpcb *besttp, *secondtp;
struct inpcb *bestinp, *secondinp;
struct tcpcb *tp = sototcpcb(so);
struct inpcb *inp = sotoinpcb(so);
- mptcplog((LOG_DEBUG, "%s mpts %u ignore %d, mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n",
- __func__, mpts->mpts_connid, ignore ? ignore->mpts_connid : -1, mpts->mpts_flags,
- INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state,
- inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1,
- tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt,
- mptcp_subflow_cwnd_space(so)),
- MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+ mptcplog((LOG_DEBUG, "%s mpts %u mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n",
+ __func__, mpts->mpts_connid, mpts->mpts_flags,
+ INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state,
+ inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1,
+ tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt,
+ mptcp_subflow_cwnd_space(so)),
+ MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
/*
* First, the hard conditions to reject subflows
* (e.g., not connected,...)
*/
- if (mpts == ignore || inp->inp_last_outifp == NULL)
+ if (inp->inp_last_outifp == NULL) {
continue;
+ }
- if (INP_WAIT_FOR_IF_FEEDBACK(inp))
+ if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
continue;
+ }
/* There can only be one subflow in degraded state */
if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
/*
* If this subflow is waiting to finally send, do it!
*/
- if (so->so_flags1 & SOF1_PRECONNECT_DATA)
- return (mptcp_return_subflow(mpts));
+ if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
+ return mptcp_return_subflow(mpts);
+ }
/*
* Only send if the subflow is MP_CAPABLE. The exceptions to
* this rule (degraded or TFO) have been taken care of above.
*/
- if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE))
+ if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) {
continue;
+ }
if ((so->so_state & SS_ISDISCONNECTED) ||
!(so->so_state & SS_ISCONNECTED) ||
!TCPS_HAVEESTABLISHED(tp->t_state) ||
- tp->t_state > TCPS_CLOSE_WAIT)
+ tp->t_state > TCPS_CLOSE_WAIT) {
continue;
+ }
/*
* Second, the soft conditions to find the subflow with best
* conditions for each set (aka cellular vs non-cellular)
*/
- if (IFNET_IS_CELLULAR(inp->inp_last_outifp))
+ if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
second_best = mptcp_choose_subflow(mpts, second_best,
- &exp_rtt);
- else
+ &exp_rtt);
+ } else {
best = mptcp_choose_subflow(mpts, best, &cheap_rtt);
+ }
}
/*
* If there is no preferred or backup subflow, and there is no active
* subflow use the last usable subflow.
*/
- if (best == NULL)
- return (mptcp_return_subflow(second_best));
+ if (best == NULL) {
+ return mptcp_return_subflow(second_best);
+ }
- if (second_best == NULL)
- return (mptcp_return_subflow(best));
+ if (second_best == NULL) {
+ return mptcp_return_subflow(best);
+ }
besttp = sototcpcb(best->mpts_socket);
bestinp = sotoinpcb(best->mpts_socket);
secondtp = sototcpcb(second_best->mpts_socket);
secondinp = sotoinpcb(second_best->mpts_socket);
- if (preferred != NULL)
+ if (preferred != NULL) {
*preferred = mptcp_return_subflow(best);
+ }
/*
* Second Step: Among best and second_best. Choose the one that is
* most appropriate for this particular service-type.
*/
- if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
+ if (mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
+ return mptcp_return_subflow(best);
+ } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
/*
* Only handover if Symptoms tells us to do so.
*/
- if (IFNET_IS_WIFI(bestinp->inp_last_outifp) &&
- mptcp_is_wifi_unusable() &&
- besttp->t_rxtshift >= mptcp_fail_thresh)
- return (mptcp_return_subflow(second_best));
+ if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
+ mptcp_is_wifi_unusable_for_session(mpte) != 0 && mptcp_subflow_is_slow(mpte, best)) {
+ return mptcp_return_subflow(second_best);
+ }
- return (mptcp_return_subflow(best));
+ return mptcp_return_subflow(best);
} else if (mpte->mpte_svctype == MPTCP_SVCTYPE_INTERACTIVE) {
int rtt_thresh = mptcp_rtthist_rtthresh << TCP_RTT_SHIFT;
int rto_thresh = mptcp_rtothresh;
/* Adjust with symptoms information */
- if (IFNET_IS_WIFI(bestinp->inp_last_outifp) &&
- mptcp_is_wifi_unusable()) {
+ if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
+ mptcp_is_wifi_unusable_for_session(mpte) != 0) {
rtt_thresh /= 2;
rto_thresh /= 2;
}
second_best->mpts_connid,
secondtp->t_srtt >> TCP_RTT_SHIFT),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
- return (mptcp_return_subflow(second_best));
+ return mptcp_return_subflow(second_best);
}
- if (besttp->t_rxtshift >= mptcp_fail_thresh &&
+ if (mptcp_subflow_is_slow(mpte, best) &&
secondtp->t_rxtshift == 0) {
- return (mptcp_return_subflow(second_best));
+ return mptcp_return_subflow(second_best);
}
/* Compare RTOs, select second_best if best's rto exceeds rtothresh */
second_best->mpts_connid, secondtp->t_rxtcur),
MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
- return (mptcp_return_subflow(second_best));
+ return mptcp_return_subflow(second_best);
}
/*
* were true. So, let's schedule on the best one, if he still
* has some space in the congestion-window.
*/
- return (mptcp_return_subflow(best));
- } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_AGGREGATE) {
+ return mptcp_return_subflow(best);
+ } else if (mpte->mpte_svctype >= MPTCP_SVCTYPE_AGGREGATE) {
struct mptsub *tmp;
/*
}
/* Is there still space in the congestion window? */
- if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0)
- return (mptcp_return_subflow(second_best));
+ if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) {
+ return mptcp_return_subflow(second_best);
+ }
- return (mptcp_return_subflow(best));
+ return mptcp_return_subflow(best);
} else {
panic("Unknown service-type configured for MPTCP");
}
- return (NULL);
+ return NULL;
}
static const char *
c = "MPCE_RECV_DATA_FIN";
break;
}
- return (c);
+ return c;
}
static const char *
mptcp_state_to_str(mptcp_state_t state)
{
- const char *c = "UNDEFINED";
+ const char *c = "UNDEFINED";
switch (state) {
case MPTCPS_CLOSED:
c = "MPTCPS_CLOSED";
c = "MPTCPS_TERMINATE";
break;
}
- return (c);
+ return c;
}
void
mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
{
- mpte_lock_assert_held(mp_tp->mpt_mpte);
+ struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
+
+ socket_lock_assert_owned(mp_so);
+
mptcp_state_t old_state = mp_tp->mpt_state;
DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
switch (mp_tp->mpt_state) {
case MPTCPS_CLOSED:
case MPTCPS_LISTEN:
- mp_tp->mpt_state = MPTCPS_CLOSED;
+ mp_tp->mpt_state = MPTCPS_TERMINATE;
break;
case MPTCPS_ESTABLISHED:
break;
case MPTCPS_CLOSING:
- if (event == MPCE_RECV_DATA_ACK)
+ if (event == MPCE_RECV_DATA_ACK) {
mp_tp->mpt_state = MPTCPS_TIME_WAIT;
+ }
break;
case MPTCPS_LAST_ACK:
- if (event == MPCE_RECV_DATA_ACK)
+ if (event == MPCE_RECV_DATA_ACK) {
mptcp_close(mp_tp->mpt_mpte, mp_tp);
+ }
break;
case MPTCPS_FIN_WAIT_2:
mptcp_update_rcv_state_meat(mp_tp, tp,
full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len,
csum);
-
}
void
uint16_t csum)
{
if (mdss_data_len == 0) {
- mptcplog((LOG_INFO, "%s: Infinite Mapping.\n", __func__),
- MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
+ os_log_error(mptcp_log_handle, "%s - %lx: Infinite Mapping.\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte));
if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) {
- mptcplog((LOG_ERR, "%s: Bad checksum %x \n", __func__,
- csum), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
+ os_log_error(mptcp_log_handle, "%s - %lx: Bad checksum %x \n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), csum);
}
mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
return;
}
- mptcplog((LOG_DEBUG,
- "%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n", __func__,
- seqn, mdss_data_len, full_dsn, mp_tp->mpt_rcvnxt),
- MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
-
- /* Process a Data FIN packet , handled in mptcp_do_fin_opt */
- if ((seqn == 0) && (mdss_data_len == 1)) {
- mptcplog((LOG_INFO, "%s: Data FIN in %s state \n", __func__,
- mptcp_state_to_str(mp_tp->mpt_state)),
- MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
- return;
- }
+
mptcp_notify_mpready(tp->t_inpcb->inp_socket);
+
tp->t_rcv_map.mpt_dsn = full_dsn;
tp->t_rcv_map.mpt_sseq = seqn;
tp->t_rcv_map.mpt_len = mdss_data_len;
{
u_int32_t datalen;
- if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP))
+ if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
return 0;
+ }
datalen = m->m_pkthdr.mp_rlen;
/* unacceptable DSS option, fallback to TCP */
if (m->m_pkthdr.len > ((int) datalen + hdrlen)) {
- mptcplog((LOG_ERR, "%s: mbuf len %d, MPTCP expected %d",
- __func__, m->m_pkthdr.len, datalen),
- MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
+ os_log_error(mptcp_log_handle, "%s - %lx: mbuf len %d, MPTCP expected %d",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte), m->m_pkthdr.len, datalen);
} else {
return 0;
}
}
int
-mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, int drop_hdrlen)
+mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
+ int drop_hdrlen)
{
- mptcp_insert_rmap(tp, m);
+ mptcp_insert_rmap(tp, m, th);
if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m,
- drop_hdrlen) != 0)
+ drop_hdrlen) != 0) {
return -1;
- return 0;
-}
-
-/*
- * MPTCP Checksum support
- * The checksum is calculated whenever the MPTCP DSS option is included
- * in the TCP packet. The checksum includes the sum of the MPTCP psuedo
- * header and the actual data indicated by the length specified in the
- * DSS option.
- */
-
-int
-mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
- uint32_t sseq, uint16_t dlen, uint16_t csum)
-{
- uint16_t mptcp_csum;
-
- mptcp_csum = mptcp_input_csum(tp, m, dsn, sseq, dlen, csum);
- if (mptcp_csum) {
- tp->t_mpflags |= TMPF_SND_MPFAIL;
- mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
- m_freem(m);
- tcpstat.tcps_mp_badcsum++;
- return (-1);
}
- return (0);
+ return 0;
}
static uint16_t
mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq,
- uint16_t dlen, uint16_t csum)
+ uint16_t dlen, uint16_t csum, int dfin)
{
struct mptcb *mp_tp = tptomptp(tp);
+ int real_len = dlen - dfin;
uint32_t sum = 0;
- if (mp_tp == NULL)
- return (0);
+ VERIFY(real_len >= 0);
- if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM))
- return (0);
+ if (mp_tp == NULL) {
+ return 0;
+ }
- if (tp->t_mpflags & TMPF_TCP_FALLBACK)
- return (0);
+ if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) {
+ return 0;
+ }
+
+ if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
+ return 0;
+ }
/*
* The remote side may send a packet with fewer bytes than the
* claimed DSS checksum length.
*/
- if ((int)m_length2(m, NULL) < dlen)
- return (0xffff);
+ if ((int)m_length2(m, NULL) < real_len) {
+ return 0xffff;
+ }
- if (dlen != 0)
- sum = m_sum16(m, 0, dlen);
+ if (real_len != 0) {
+ sum = m_sum16(m, 0, real_len);
+ }
sum += in_pseudo64(htonll(dsn), htonl(sseq), htons(dlen) + csum);
ADDCARRY(sum);
+
DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m,
uint32_t, sum);
- mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
- MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
- return (~sum & 0xffff);
+ return ~sum & 0xffff;
}
-uint32_t
+/*
+ * MPTCP Checksum support
+ * The checksum is calculated whenever the MPTCP DSS option is included
+ * in the TCP packet. The checksum includes the sum of the MPTCP psuedo
+ * header and the actual data indicated by the length specified in the
+ * DSS option.
+ */
+
+int
+mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
+ uint32_t sseq, uint16_t dlen, uint16_t csum, int dfin)
+{
+ uint16_t mptcp_csum;
+
+ mptcp_csum = mptcp_input_csum(tp, m, dsn, sseq, dlen, csum, dfin);
+ if (mptcp_csum) {
+ tp->t_mpflags |= TMPF_SND_MPFAIL;
+ mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
+ m_freem(m);
+ tcpstat.tcps_mp_badcsum++;
+ return -1;
+ }
+ return 0;
+}
+
+uint16_t
mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen)
{
- u_int32_t sum = 0;
+ uint32_t sum = 0;
- if (dlen)
+ if (dlen) {
sum = m_sum16(m, 0, dlen);
+ }
dss_val = mptcp_hton64(dss_val);
sseq = htonl(sseq);
sum = ~sum & 0xffff;
DTRACE_MPTCP2(checksum__result, struct mbuf *, m, uint32_t, sum);
mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
- MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
+ MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
- return sum;
+ return (uint16_t)sum;
}
/*
__func__, spike,
tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT,
tp->t_rttcur),
- (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
-
+ (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
}
- if (spike > 0 ) {
- return (FALSE);
+ if (spike > 0) {
+ return FALSE;
} else {
- return (TRUE);
+ return TRUE;
}
}
VERIFY(mpp->mpp_flags & flag);
mpp->mpp_flags &= ~flag;
- if (mptcp_should_defer_upcall(mpp))
+ if (mptcp_should_defer_upcall(mpp)) {
return;
+ }
if (mpp->mpp_flags & MPP_SHOULD_WORKLOOP) {
mpp->mpp_flags &= ~MPP_SHOULD_WORKLOOP;
sowwakeup(mpp->mpp_socket);
}
-
- if (mpp->mpp_flags & MPP_SET_CELLICON) {
- mpp->mpp_flags &= ~MPP_SET_CELLICON;
-
- mptcp_set_cellicon(mpp->mpp_pcbe);
- }
-
- if (mpp->mpp_flags & MPP_UNSET_CELLICON) {
- mpp->mpp_flags &= ~MPP_UNSET_CELLICON;
-
- mptcp_unset_cellicon();
- }
-}
-
-static void
-mptcp_ask_for_nat64(struct ifnet *ifp)
-{
- in6_post_msg(ifp, KEV_INET6_REQUEST_NAT64_PREFIX, NULL, NULL);
-
- mptcplog((LOG_DEBUG, "%s: asked for NAT64-prefix on %s\n",
- __func__, ifp->if_name), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
}
static void
mptcp_reset_itfinfo(struct mpt_itf_info *info)
{
- info->ifindex = 0;
- info->has_v4_conn = 0;
- info->has_v6_conn = 0;
+ memset(info, 0, sizeof(*info));
}
void
-mptcp_session_necp_cb(void *handle, int action, struct necp_client_flow *flow)
+mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index,
+ uint32_t necp_flags, __unused bool *viable)
{
+ boolean_t has_v4 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4);
+ boolean_t has_v6 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6);
+ boolean_t has_nat64 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_NAT64);
+ boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER);
struct mppcb *mp = (struct mppcb *)handle;
struct mptses *mpte = mptompte(mp);
struct socket *mp_so;
struct mptcb *mp_tp;
- int locked = 0;
uint32_t i, ifindex;
+ struct ifnet *ifp;
+ int locked = 0;
- ifindex = flow->interface_index;
+ ifindex = interface_index;
VERIFY(ifindex != IFSCOPE_NONE);
- /* ToDo - remove after rdar://problem/32007628 */
- if (!IF_INDEX_IN_RANGE(ifindex))
- printf("%s 1 ifindex %u not in range of flow %p action %d\n",
- __func__, ifindex, flow, action);
-
/* About to be garbage-collected (see note about MPTCP/NECP interactions) */
- if (mp->mpp_socket->so_usecount == 0)
+ if (mp->mpp_socket->so_usecount == 0) {
return;
+ }
+
+ mp_so = mptetoso(mpte);
if (action != NECP_CLIENT_CBACTION_INITIAL) {
- mpte_lock(mpte);
+ socket_lock(mp_so, 1);
locked = 1;
/* Check again, because it might have changed while waiting */
- if (mp->mpp_socket->so_usecount == 0)
+ if (mp->mpp_socket->so_usecount == 0) {
goto out;
+ }
}
+ socket_lock_assert_owned(mp_so);
+
mp_tp = mpte->mpte_mptcb;
- mp_so = mptetoso(mpte);
- mptcplog((LOG_DEBUG, "%s, action: %u ifindex %u usecount %u mpt_flags %#x state %u\n",
- __func__, action, ifindex, mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state),
- MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+ ifnet_head_lock_shared();
+ ifp = ifindex2ifnet[ifindex];
+ ifnet_head_done();
+
+ os_log(mptcp_log_handle, "%s - %lx: action: %u ifindex %u delegated to %u usecount %u mpt_flags %#x state %u v4 %u v6 %u nat64 %u power %u\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), action, ifindex,
+ ifp && ifp->if_delegated.ifp ? ifp->if_delegated.ifp->if_index : IFSCOPE_NONE,
+ mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state,
+ has_v4, has_v6, has_nat64, low_power);
/* No need on fallen back sockets */
- if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)
+ if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
goto out;
+ }
+
+ /*
+ * When the interface goes in low-power mode we don't want to establish
+ * new subflows on it. Thus, mark it internally as non-viable.
+ */
+ if (low_power) {
+ action = NECP_CLIENT_CBACTION_NONVIABLE;
+ }
if (action == NECP_CLIENT_CBACTION_NONVIABLE) {
for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
- if (mpte->mpte_itfinfo[i].ifindex == ifindex)
+ if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) {
+ continue;
+ }
+
+ if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
mptcp_reset_itfinfo(&mpte->mpte_itfinfo[i]);
+ }
}
mptcp_sched_create_subflows(mpte);
} else if (action == NECP_CLIENT_CBACTION_VIABLE ||
- action == NECP_CLIENT_CBACTION_INITIAL) {
- int found_empty = 0, empty_index = -1;
- struct ifnet *ifp;
+ action == NECP_CLIENT_CBACTION_INITIAL) {
+ int found_slot = 0, slot_index = -1;
+ struct sockaddr *dst;
- /* ToDo - remove after rdar://problem/32007628 */
- if (!IF_INDEX_IN_RANGE(ifindex))
- printf("%s 2 ifindex %u not in range of flow %p action %d\n",
- __func__, ifindex, flow, action);
-
- ifnet_head_lock_shared();
- ifp = ifindex2ifnet[ifindex];
- ifnet_head_done();
-
- /* ToDo - remove after rdar://problem/32007628 */
- if (!IF_INDEX_IN_RANGE(ifindex))
- printf("%s 3 ifindex %u not in range of flow %p action %d\n",
- __func__, ifindex, flow, action);
+ if (ifp == NULL) {
+ goto out;
+ }
- if (ifp == NULL)
+ if (IFNET_IS_COMPANION_LINK(ifp)) {
goto out;
+ }
if (IFNET_IS_EXPENSIVE(ifp) &&
- (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE))
+ (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
goto out;
+ }
+
+ if (IFNET_IS_CONSTRAINED(ifp) &&
+ (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
+ goto out;
+ }
if (IFNET_IS_CELLULAR(ifp) &&
- (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR))
+ (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
goto out;
+ }
+ if (IS_INTF_CLAT46(ifp)) {
+ has_v4 = FALSE;
+ }
+
+ /* Look for the slot on where to store/update the interface-info. */
for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
+ /* Found a potential empty slot where we can put it */
if (mpte->mpte_itfinfo[i].ifindex == 0) {
- found_empty = 1;
- empty_index = i;
+ found_slot = 1;
+ slot_index = i;
+ }
+
+ /*
+ * The interface is already in our array. Check if we
+ * need to update it.
+ */
+ if (mpte->mpte_itfinfo[i].ifindex == ifindex &&
+ (mpte->mpte_itfinfo[i].has_v4_conn != has_v4 ||
+ mpte->mpte_itfinfo[i].has_v6_conn != has_v6 ||
+ mpte->mpte_itfinfo[i].has_nat64_conn != has_nat64)) {
+ found_slot = 1;
+ slot_index = i;
+ break;
}
if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
- /* Ok, it's already there */
+ /*
+ * Ok, it's already there and we don't need
+ * to update it
+ */
goto out;
}
}
- if ((mpte->mpte_dst.sa_family == AF_INET || mpte->mpte_dst.sa_family == 0) &&
- !(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4) &&
- ifnet_get_nat64prefix(ifp, NULL) == ENOENT) {
- mptcp_ask_for_nat64(ifp);
+ dst = mptcp_get_session_dst(mpte, has_v6, has_v4);
+ if (dst && dst->sa_family == AF_INET &&
+ has_v6 && !has_nat64 && !has_v4) {
+ if (found_slot) {
+ mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
+ mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
+ mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
+ mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
+ }
goto out;
}
- if (found_empty == 0) {
+ if (found_slot == 0) {
int new_size = mpte->mpte_itfinfo_size * 2;
struct mpt_itf_info *info = _MALLOC(sizeof(*info) * new_size, M_TEMP, M_ZERO);
if (info == NULL) {
- mptcplog((LOG_ERR, "%s malloc failed for %u\n", __func__, new_size),
- MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
+ os_log_error(mptcp_log_handle, "%s - %lx: malloc failed for %u\n",
+ __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), new_size);
goto out;
}
memcpy(info, mpte->mpte_itfinfo, mpte->mpte_itfinfo_size * sizeof(*info));
- if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE)
+ if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) {
_FREE(mpte->mpte_itfinfo, M_TEMP);
+ }
/* We allocated a new one, thus the first must be empty */
- empty_index = mpte->mpte_itfinfo_size;
+ slot_index = mpte->mpte_itfinfo_size;
mpte->mpte_itfinfo = info;
mpte->mpte_itfinfo_size = new_size;
-
- mptcplog((LOG_DEBUG, "%s Needed to realloc to %u\n", __func__, new_size),
- MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
}
- VERIFY(empty_index >= 0 && empty_index < (int)mpte->mpte_itfinfo_size);
- mpte->mpte_itfinfo[empty_index].ifindex = ifindex;
- mpte->mpte_itfinfo[empty_index].has_v4_conn = !!(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4);
- mpte->mpte_itfinfo[empty_index].has_v6_conn = !!(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6);
+ VERIFY(slot_index >= 0 && slot_index < (int)mpte->mpte_itfinfo_size);
+ mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
+ mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
+ mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
+ mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
mptcp_sched_create_subflows(mpte);
}
out:
- if (locked)
- mpte_unlock(mpte);
+ if (locked) {
+ socket_unlock(mp_so, 1);
+ }
}
void
struct mptses *mpte = mpsotompte(mp_so);
uint32_t i;
- mpte_lock_assert_held(mpte);
+ socket_lock_assert_owned(mp_so);
ifnet_head_lock_shared();
uint32_t ifindex = info->ifindex;
struct ifnet *ifp;
- if (ifindex == IFSCOPE_NONE)
+ if (ifindex == IFSCOPE_NONE) {
continue;
+ }
ifp = ifindex2ifnet[ifindex];
+ if (ifp == NULL) {
+ continue;
+ }
if (IFNET_IS_EXPENSIVE(ifp) &&
- (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE))
+ (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
info->ifindex = IFSCOPE_NONE;
+ }
+
+ if (IFNET_IS_CONSTRAINED(ifp) &&
+ (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
+ info->ifindex = IFSCOPE_NONE;
+ }
if (IFNET_IS_CELLULAR(ifp) &&
- (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR))
+ (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
info->ifindex = IFSCOPE_NONE;
+ }
}
ifnet_head_done();
}
-