X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/5c9f46613a83ebfc29a5b1f099448259e96a98f0..cb3231590a3c94ab4375e2228bd5e86b0cf1ad7e:/bsd/netinet/mptcp.c diff --git a/bsd/netinet/mptcp.c b/bsd/netinet/mptcp.c index 8cf437f64..a2883309e 100644 --- a/bsd/netinet/mptcp.c +++ b/bsd/netinet/mptcp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2017 Apple Inc. All rights reserved. + * Copyright (c) 2012-2018 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -109,13 +109,17 @@ int mptcp_enable = 1; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_enable, 0, "Enable Multipath TCP Support"); + &mptcp_enable, 0, "Enable Multipath TCP Support"); -/* Number of times to try negotiating MPTCP on SYN retransmissions */ -int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES; +/* + * Number of times to try negotiating MPTCP on SYN retransmissions. + * We haven't seen any reports of a middlebox that is dropping all SYN-segments + * that have an MPTCP-option. Thus, let's be generous and retransmit it 4 times. + */ +int mptcp_mpcap_retries = 4; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, - CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries"); + CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries"); /* * By default, DSS checksum is turned off, revisit if we ever do @@ -123,7 +127,7 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr, */ int mptcp_dss_csum = 0; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_dss_csum, 0, "Enable DSS checksum"); + &mptcp_dss_csum, 0, "Enable DSS checksum"); /* * When mptcp_fail_thresh number of retransmissions are sent, subflow failover @@ -131,7 +135,7 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED, */ int mptcp_fail_thresh = 1; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_fail_thresh, 0, "Failover threshold"); + &mptcp_fail_thresh, 0, "Failover threshold"); /* @@ -139,46 +143,47 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED, * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout. * Some carrier networks have a timeout of 10 or 15 minutes. */ -int mptcp_subflow_keeptime = 60*14; +int mptcp_subflow_keeptime = 60 * 14; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); + &mptcp_subflow_keeptime, 0, "Keepalive in seconds"); int mptcp_rtthist_rtthresh = 600; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_rtthist_rtthresh, 0, "Rtt threshold"); + &mptcp_rtthist_rtthresh, 0, "Rtt threshold"); /* * Use RTO history for sending new data */ int mptcp_use_rto = 1; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, userto, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_use_rto, 0, "Disable RTO for subflow selection"); + &mptcp_use_rto, 0, "Disable RTO for subflow selection"); int mptcp_rtothresh = 1500; SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_rtothresh, 0, "RTO threshold"); + &mptcp_rtothresh, 0, "RTO threshold"); /* * Probe the preferred path, when it is not in use */ uint32_t mptcp_probeto = 1000; SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_probeto, 0, "Disable probing by setting to 0"); + &mptcp_probeto, 0, "Disable probing by setting to 0"); uint32_t mptcp_probecnt = 5; SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED, - &mptcp_probecnt, 0, "Number of probe writes"); + &mptcp_probecnt, 0, "Number of probe writes"); /* * Static declarations */ static uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, uint64_t, - uint32_t, uint16_t, uint16_t, uint16_t); + uint32_t, uint16_t, uint16_t, uint16_t); static int mptcp_reass_present(struct socket *mp_so) { - struct mptcb *mp_tp = mpsotomppcb(mp_so)->mpp_pcbe->mpte_mptcb; + struct mptses *mpte = mpsotompte(mp_so); + struct mptcb *mp_tp = mpte->mpte_mptcb; struct tseg_qent *q; int dowakeup = 0; int flags = 0; @@ -187,19 +192,22 @@ mptcp_reass_present(struct socket *mp_so) * Present data to user, advancing rcv_nxt through * completed sequence space. */ - if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) - return (flags); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + return flags; + } q = LIST_FIRST(&mp_tp->mpt_segq); - if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) - return (flags); + if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) { + return flags; + } /* * If there is already another thread doing reassembly for this * connection, it is better to let it finish the job -- * (radar 16316196) */ - if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) - return (flags); + if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) { + return flags; + } mp_tp->mpt_flags |= MPTCPF_REASS_INPROG; @@ -210,8 +218,9 @@ mptcp_reass_present(struct socket *mp_so) m_freem(q->tqe_m); } else { flags = !!(q->tqe_m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN); - if (sbappendstream_rcvdemux(mp_so, q->tqe_m, 0, 0)) + if (sbappendstream_rcvdemux(mp_so, q->tqe_m, 0, 0)) { dowakeup = 1; + } } zfree(tcp_reass_zone, q); mp_tp->mpt_reassqlen--; @@ -219,10 +228,10 @@ mptcp_reass_present(struct socket *mp_so) } while (q && q->tqe_m->m_pkthdr.mp_dsn == mp_tp->mpt_rcvnxt); mp_tp->mpt_flags &= ~MPTCPF_REASS_INPROG; - if (dowakeup) + if (dowakeup) { sorwakeup(mp_so); /* done with socket lock held */ - return (flags); - + } + return flags; } static int @@ -250,7 +259,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf * tcpstat.tcps_mptcp_rcvmemdrop++; m_freem(m); *tlenp = 0; - return (0); + return 0; } /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */ @@ -258,7 +267,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf * if (te == NULL) { tcpstat.tcps_mptcp_rcvmemdrop++; m_freem(m); - return (0); + return 0; } mp_tp->mpt_reassqlen++; @@ -267,8 +276,9 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf * * Find a segment which begins after this one does. */ LIST_FOREACH(q, &mp_tp->mpt_segq, tqe_q) { - if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) + if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) { break; + } p = q; } @@ -310,8 +320,9 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf * */ while (q) { int64_t i = (mb_dsn + *tlenp) - q->tqe_m->m_pkthdr.mp_dsn; - if (i <= 0) + if (i <= 0) { break; + } if (i < q->tqe_len) { q->tqe_m->m_pkthdr.mp_dsn += i; @@ -340,7 +351,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf * } out: - return (mptcp_reass_present(mp_so)); + return mptcp_reass_present(mp_so); } /* @@ -357,11 +368,11 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) VERIFY(m->m_flags & M_PKTHDR); - mpte_lock_assert_held(mpte); /* same as MP socket lock */ - mp_so = mptetoso(mpte); mp_tp = mpte->mpte_mptcb; + socket_lock_assert_owned(mp_so); + DTRACE_MPTCP(input); mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp); @@ -383,11 +394,30 @@ mptcp_input(struct mptses *mpte, struct mbuf *m) fallback: mptcp_sbrcv_grow(mp_tp); - for (iter = m; iter; iter = iter->m_next) { + iter = m; + while (iter) { if ((iter->m_flags & M_PKTHDR) && (iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) { mb_dfin = 1; - break; + } + + if ((iter->m_flags & M_PKTHDR) && m_pktlen(iter) == 0) { + /* Don't add zero-length packets, so jump it! */ + if (prev == NULL) { + m = iter->m_next; + m_free(iter); + iter = m; + } else { + prev->m_next = iter->m_next; + m_free(iter); + iter = prev->m_next; + } + + /* It was a zero-length packet so next one must be a pkthdr */ + VERIFY(iter == NULL || iter->m_flags & M_PKTHDR); + } else { + prev = iter; + iter = iter->m_next; } } @@ -395,8 +425,9 @@ fallback: * assume degraded flow as this may be the first packet * without DSS, and the subflow state is not updated yet. */ - if (sbappendstream_rcvdemux(mp_so, m, 0, 0)) + if (sbappendstream_rcvdemux(mp_so, m, 0, 0)) { sorwakeup(mp_so); + } DTRACE_MPTCP5(receive__degraded, struct mbuf *, m, struct socket *, mp_so, @@ -411,9 +442,6 @@ fallback: mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN); socantrcvmore(mp_so); } - - mptcplog((LOG_DEBUG, "%s: Fallback read %d bytes\n", __func__, - count), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); return; } @@ -423,9 +451,12 @@ fallback: int64_t todrop; int mb_dfin = 0; + VERIFY(m->m_flags & M_PKTHDR); + /* If fallback occurs, mbufs will not have PKTF_MPTCP set */ - if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) + if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { goto fallback; + } save = m->m_next; /* @@ -442,10 +473,11 @@ fallback: prev = save; save = save->m_next; } - if (prev) + if (prev) { prev->m_next = NULL; - else + } else { m->m_next = NULL; + } mb_dsn = m->m_pkthdr.mp_dsn; mb_datalen = m->m_pkthdr.mp_rlen; @@ -454,16 +486,23 @@ fallback: if (todrop > 0) { tcpstat.tcps_mptcp_rcvpackafterwin++; + os_log_info(mptcp_log_handle, "%s - %lx: dropping dsn %u dlen %u rcvnxt %u rcvwnd %u todrop %lld\n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), + (uint32_t)mb_dsn, mb_datalen, (uint32_t)mp_tp->mpt_rcvnxt, + mp_tp->mpt_rcvwnd, todrop); + if (todrop >= mb_datalen) { - if (freelist == NULL) + if (freelist == NULL) { freelist = m; - else + } else { tail->m_next = m; + } - if (prev != NULL) + if (prev != NULL) { tail = prev; - else + } else { tail = m; + } m = save; prev = save = NULL; @@ -471,6 +510,7 @@ fallback: } else { m_adj(m, -todrop); mb_datalen -= todrop; + m->m_pkthdr.mp_rlen -= todrop; } /* @@ -480,42 +520,46 @@ fallback: m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN; } - if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) || - !LIST_EMPTY(&mp_tp->mpt_segq)) { - mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m); - - goto next; - } - mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN); - if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvnxt)) { if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen), mp_tp->mpt_rcvnxt)) { - if (freelist == NULL) + if (freelist == NULL) { freelist = m; - else + } else { tail->m_next = m; + } - if (prev != NULL) + if (prev != NULL) { tail = prev; - else + } else { tail = m; + } m = save; prev = save = NULL; continue; } else { m_adj(m, (mp_tp->mpt_rcvnxt - mb_dsn)); + mb_datalen -= (mp_tp->mpt_rcvnxt - mb_dsn); + mb_dsn = mp_tp->mpt_rcvnxt; + m->m_pkthdr.mp_rlen = mb_datalen; + m->m_pkthdr.mp_dsn = mb_dsn; } - mptcplog((LOG_INFO, "%s: Left Edge %llu\n", __func__, - mp_tp->mpt_rcvnxt), - MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); } + if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) || + !LIST_EMPTY(&mp_tp->mpt_segq)) { + mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m); + + goto next; + } + mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN); + mptcp_sbrcv_grow(mp_tp); - if (sbappendstream_rcvdemux(mp_so, m, 0, 0)) + if (sbappendstream_rcvdemux(mp_so, m, 0, 0)) { wakeup = 1; + } DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so, struct sockbuf *, &mp_so->so_rcv, @@ -525,8 +569,6 @@ fallback: count = mp_so->so_rcv.sb_cc - count; tcpstat.tcps_mp_rcvtotal++; tcpstat.tcps_mp_rcvbytes += count; - mptcplog((LOG_DEBUG, "%s: Read %d bytes\n", __func__, count), - MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); mp_tp->mpt_rcvnxt += count; @@ -540,23 +582,26 @@ next: count = mp_so->so_rcv.sb_cc; } while (m); - if (freelist) + if (freelist) { m_freem(freelist); + } - if (wakeup) + if (wakeup) { sorwakeup(mp_so); + } } -static boolean_t -mptcp_can_send_more(struct mptcb *mp_tp) +boolean_t +mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject) { struct socket *mp_so = mptetoso(mp_tp->mpt_mpte); /* * Always send if there is data in the reinject-queue. */ - if (mp_tp->mpt_mpte->mpte_reinjectq) - return (TRUE); + if (!ignore_reinject && mp_tp->mpt_mpte->mpte_reinjectq) { + return TRUE; + } /* * Don't send, if: @@ -567,19 +612,23 @@ mptcp_can_send_more(struct mptcb *mp_tp) * 3. snd_nxt + 1 == snd_max and we are closing: A DATA_FIN is scheduled. */ - if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) - return (FALSE); + if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) { + return FALSE; + } - if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) - return (FALSE); + if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) { + return FALSE; + } - if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) - return (FALSE); + if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { + return FALSE; + } - if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) - return (FALSE); + if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) { + return FALSE; + } - return (TRUE); + return TRUE; } /* @@ -596,41 +645,29 @@ mptcp_output(struct mptses *mpte) uint64_t old_snd_nxt; int error = 0; - mpte_lock_assert_held(mpte); mp_so = mptetoso(mpte); + socket_lock_assert_owned(mp_so); mp_tp = mpte->mpte_mptcb; VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL)); mpte->mpte_mppcb->mpp_flags |= MPP_WUPCALL; - mptcplog((LOG_DEBUG, "%s: snxt %u sndmax %u suna %u swnd %u reinjectq %u state %u\n", - __func__, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_sndmax, - (uint32_t)mp_tp->mpt_snduna, mp_tp->mpt_sndwnd, - mpte->mpte_reinjectq ? 1 : 0, - mp_tp->mpt_state), - MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); - old_snd_nxt = mp_tp->mpt_sndnxt; - while (mptcp_can_send_more(mp_tp)) { + while (mptcp_can_send_more(mp_tp, FALSE)) { /* get the "best" subflow to be used for transmission */ - mpts = mptcp_get_subflow(mpte, NULL, &preferred_mpts); + mpts = mptcp_get_subflow(mpte, &preferred_mpts); if (mpts == NULL) { mptcplog((LOG_INFO, "%s: no subflow\n", __func__), MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); break; } - mptcplog((LOG_DEBUG, "%s: using id %u\n", __func__, mpts->mpts_connid), - MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); - /* In case there's just one flow, we reattempt later */ if (mpts_tried != NULL && (mpts == mpts_tried || (mpts->mpts_flags & MPTSF_FAILINGOVER))) { mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER; mpts_tried->mpts_flags |= MPTSF_ACTIVE; mptcp_start_timer(mpte, MPTT_REXMT); - mptcplog((LOG_DEBUG, "%s: retry later\n", __func__), - MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); break; } @@ -650,11 +687,6 @@ mptcp_output(struct mptses *mpte) min(mp_so->so_snd.sb_hiwat + tcp_autosndbuf_inc, tcp_autosndbuf_max)) == 1) { mp_so->so_snd.sb_idealsize = mp_so->so_snd.sb_hiwat; - - mptcplog((LOG_DEBUG, "%s: increased snd hiwat to %u lowat %u\n", - __func__, mp_so->so_snd.sb_hiwat, - mp_so->so_snd.sb_lowat), - MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); } } } @@ -667,9 +699,11 @@ mptcp_output(struct mptses *mpte) mpts->mpts_flags |= MPTSF_FAILINGOVER; mpts->mpts_flags &= ~MPTSF_ACTIVE; mpts_tried = mpts; - mptcplog((LOG_ERR, "%s: Error = %d mpts_flags %#x\n", __func__, - error, mpts->mpts_flags), - MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR); + if (error != ECANCELED) { + os_log_error(mptcp_log_handle, "%s - %lx: Error = %d mpts_flags %#x\n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), + error, mpts->mpts_flags); + } break; } /* The model is to have only one active flow at a time */ @@ -695,14 +729,6 @@ mptcp_output(struct mptses *mpte) if (mpte->mpte_active_sub == NULL) { mpte->mpte_active_sub = mpts; } else if (mpte->mpte_active_sub != mpts) { - struct tcpcb *tp = sototcpcb(mpts->mpts_socket); - struct tcpcb *acttp = sototcpcb(mpte->mpte_active_sub->mpts_socket); - - mptcplog((LOG_DEBUG, "%s: switch [%u, srtt %d] to [%u, srtt %d]\n", __func__, - mpte->mpte_active_sub->mpts_connid, acttp->t_srtt >> TCP_RTT_SHIFT, - mpts->mpts_connid, tp->t_srtt >> TCP_RTT_SHIFT), - (MPTCP_SENDER_DBG | MPTCP_SOCKET_DBG), MPTCP_LOGLVL_LOG); - mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE; mpte->mpte_active_sub = mpts; @@ -710,10 +736,17 @@ mptcp_output(struct mptses *mpte) } } + if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) { + if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && + mp_tp->mpt_snduna == mp_tp->mpt_sndnxt) { + mptcp_finish_usrclosed(mpte); + } + } + mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_WUPCALL); /* subflow errors should not be percolated back up */ - return (0); + return 0; } @@ -729,9 +762,9 @@ mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt) */ if (tp->t_srtt && *currtt > tp->t_srtt && (curbest == NULL || tp->t_rxtshift == 0 || - sototcpcb(curbest->mpts_socket)->t_rxtshift)) { + sototcpcb(curbest->mpts_socket)->t_rxtshift)) { *currtt = tp->t_srtt; - return (mpts); + return mpts; } /* @@ -741,26 +774,41 @@ mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt) sototcpcb(curbest->mpts_socket)->t_rxtshift && tp->t_rxtshift == 0) { *currtt = tp->t_srtt; - return (mpts); + return mpts; } - return (curbest != NULL ? curbest : mpts); + return curbest != NULL ? curbest : mpts; } static struct mptsub * mptcp_return_subflow(struct mptsub *mpts) { - if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) - return (NULL); + if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) { + return NULL; + } - return (mpts); + return mpts; +} + +static boolean_t +mptcp_subflow_is_slow(struct mptses *mpte, struct mptsub *mpts) +{ + struct tcpcb *tp = sototcpcb(mpts->mpts_socket); + int fail_thresh = mptcp_fail_thresh; + + if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) { + fail_thresh *= 2; + } + + return tp->t_rxtshift >= fail_thresh && + (mptetoso(mpte)->so_snd.sb_cc || mpte->mpte_reinjectq); } /* * Return the most eligible subflow to be used for sending data. */ struct mptsub * -mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **preferred) +mptcp_get_subflow(struct mptses *mpte, struct mptsub **preferred) { struct tcpcb *besttp, *secondtp; struct inpcb *bestinp, *secondinp; @@ -779,23 +827,25 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr struct tcpcb *tp = sototcpcb(so); struct inpcb *inp = sotoinpcb(so); - mptcplog((LOG_DEBUG, "%s mpts %u ignore %d, mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n", - __func__, mpts->mpts_connid, ignore ? ignore->mpts_connid : -1, mpts->mpts_flags, - INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state, - inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1, - tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt, - mptcp_subflow_cwnd_space(so)), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + mptcplog((LOG_DEBUG, "%s mpts %u mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n", + __func__, mpts->mpts_connid, mpts->mpts_flags, + INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state, + inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1, + tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt, + mptcp_subflow_cwnd_space(so)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); /* * First, the hard conditions to reject subflows * (e.g., not connected,...) */ - if (mpts == ignore || inp->inp_last_outifp == NULL) + if (inp->inp_last_outifp == NULL) { continue; + } - if (INP_WAIT_FOR_IF_FEEDBACK(inp)) + if (INP_WAIT_FOR_IF_FEEDBACK(inp)) { continue; + } /* There can only be one subflow in degraded state */ if (mpts->mpts_flags & MPTSF_MP_DEGRADED) { @@ -806,50 +856,57 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr /* * If this subflow is waiting to finally send, do it! */ - if (so->so_flags1 & SOF1_PRECONNECT_DATA) - return (mptcp_return_subflow(mpts)); + if (so->so_flags1 & SOF1_PRECONNECT_DATA) { + return mptcp_return_subflow(mpts); + } /* * Only send if the subflow is MP_CAPABLE. The exceptions to * this rule (degraded or TFO) have been taken care of above. */ - if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) + if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) { continue; + } if ((so->so_state & SS_ISDISCONNECTED) || !(so->so_state & SS_ISCONNECTED) || !TCPS_HAVEESTABLISHED(tp->t_state) || - tp->t_state > TCPS_CLOSE_WAIT) + tp->t_state > TCPS_CLOSE_WAIT) { continue; + } /* * Second, the soft conditions to find the subflow with best * conditions for each set (aka cellular vs non-cellular) */ - if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) + if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) { second_best = mptcp_choose_subflow(mpts, second_best, - &exp_rtt); - else + &exp_rtt); + } else { best = mptcp_choose_subflow(mpts, best, &cheap_rtt); + } } /* * If there is no preferred or backup subflow, and there is no active * subflow use the last usable subflow. */ - if (best == NULL) - return (mptcp_return_subflow(second_best)); + if (best == NULL) { + return mptcp_return_subflow(second_best); + } - if (second_best == NULL) - return (mptcp_return_subflow(best)); + if (second_best == NULL) { + return mptcp_return_subflow(best); + } besttp = sototcpcb(best->mpts_socket); bestinp = sotoinpcb(best->mpts_socket); secondtp = sototcpcb(second_best->mpts_socket); secondinp = sotoinpcb(second_best->mpts_socket); - if (preferred != NULL) + if (preferred != NULL) { *preferred = mptcp_return_subflow(best); + } /* * Second Step: Among best and second_best. Choose the one that is @@ -859,19 +916,19 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr /* * Only handover if Symptoms tells us to do so. */ - if (IFNET_IS_WIFI(bestinp->inp_last_outifp) && - mptcp_is_wifi_unusable() && - besttp->t_rxtshift >= mptcp_fail_thresh) - return (mptcp_return_subflow(second_best)); + if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) && + mptcp_is_wifi_unusable_for_session(mpte) != 0 && mptcp_subflow_is_slow(mpte, best)) { + return mptcp_return_subflow(second_best); + } - return (mptcp_return_subflow(best)); + return mptcp_return_subflow(best); } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_INTERACTIVE) { int rtt_thresh = mptcp_rtthist_rtthresh << TCP_RTT_SHIFT; int rto_thresh = mptcp_rtothresh; /* Adjust with symptoms information */ - if (IFNET_IS_WIFI(bestinp->inp_last_outifp) && - mptcp_is_wifi_unusable()) { + if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) && + mptcp_is_wifi_unusable_for_session(mpte) != 0) { rtt_thresh /= 2; rto_thresh /= 2; } @@ -885,12 +942,12 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr second_best->mpts_connid, secondtp->t_srtt >> TCP_RTT_SHIFT), MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); - return (mptcp_return_subflow(second_best)); + return mptcp_return_subflow(second_best); } - if (besttp->t_rxtshift >= mptcp_fail_thresh && + if (mptcp_subflow_is_slow(mpte, best) && secondtp->t_rxtshift == 0) { - return (mptcp_return_subflow(second_best)); + return mptcp_return_subflow(second_best); } /* Compare RTOs, select second_best if best's rto exceeds rtothresh */ @@ -903,7 +960,7 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr second_best->mpts_connid, secondtp->t_rxtcur), MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG); - return (mptcp_return_subflow(second_best)); + return mptcp_return_subflow(second_best); } /* @@ -911,8 +968,8 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr * were true. So, let's schedule on the best one, if he still * has some space in the congestion-window. */ - return (mptcp_return_subflow(best)); - } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_AGGREGATE) { + return mptcp_return_subflow(best); + } else if (mpte->mpte_svctype >= MPTCP_SVCTYPE_AGGREGATE) { struct mptsub *tmp; /* @@ -930,15 +987,16 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr } /* Is there still space in the congestion window? */ - if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) - return (mptcp_return_subflow(second_best)); + if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) { + return mptcp_return_subflow(second_best); + } - return (mptcp_return_subflow(best)); + return mptcp_return_subflow(best); } else { panic("Unknown service-type configured for MPTCP"); } - return (NULL); + return NULL; } static const char * @@ -956,13 +1014,13 @@ mptcp_event_to_str(uint32_t event) c = "MPCE_RECV_DATA_FIN"; break; } - return (c); + return c; } static const char * mptcp_state_to_str(mptcp_state_t state) { - const char *c = "UNDEFINED"; + const char *c = "UNDEFINED"; switch (state) { case MPTCPS_CLOSED: c = "MPTCPS_CLOSED"; @@ -995,13 +1053,16 @@ mptcp_state_to_str(mptcp_state_t state) c = "MPTCPS_TERMINATE"; break; } - return (c); + return c; } void mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) { - mpte_lock_assert_held(mp_tp->mpt_mpte); + struct socket *mp_so = mptetoso(mp_tp->mpt_mpte); + + socket_lock_assert_owned(mp_so); + mptcp_state_t old_state = mp_tp->mpt_state; DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp, @@ -1010,7 +1071,7 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) switch (mp_tp->mpt_state) { case MPTCPS_CLOSED: case MPTCPS_LISTEN: - mp_tp->mpt_state = MPTCPS_CLOSED; + mp_tp->mpt_state = MPTCPS_TERMINATE; break; case MPTCPS_ESTABLISHED: @@ -1040,13 +1101,15 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event) break; case MPTCPS_CLOSING: - if (event == MPCE_RECV_DATA_ACK) + if (event == MPCE_RECV_DATA_ACK) { mp_tp->mpt_state = MPTCPS_TIME_WAIT; + } break; case MPTCPS_LAST_ACK: - if (event == MPCE_RECV_DATA_ACK) + if (event == MPCE_RECV_DATA_ACK) { mptcp_close(mp_tp->mpt_mpte, mp_tp); + } break; case MPTCPS_FIN_WAIT_2: @@ -1090,7 +1153,6 @@ mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp, mptcp_update_rcv_state_meat(mp_tp, tp, full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len, csum); - } void @@ -1099,20 +1161,16 @@ mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp, uint16_t csum) { if (mdss_data_len == 0) { - mptcplog((LOG_INFO, "%s: Infinite Mapping.\n", __func__), - MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG); + os_log_error(mptcp_log_handle, "%s - %lx: Infinite Mapping.\n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte)); if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) { - mptcplog((LOG_ERR, "%s: Bad checksum %x \n", __func__, - csum), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR); + os_log_error(mptcp_log_handle, "%s - %lx: Bad checksum %x \n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), csum); } mptcp_notify_mpfail(tp->t_inpcb->inp_socket); return; } - mptcplog((LOG_DEBUG, - "%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n", __func__, - seqn, mdss_data_len, full_dsn, mp_tp->mpt_rcvnxt), - MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); mptcp_notify_mpready(tp->t_inpcb->inp_socket); @@ -1130,16 +1188,16 @@ mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m, { u_int32_t datalen; - if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) + if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) { return 0; + } datalen = m->m_pkthdr.mp_rlen; /* unacceptable DSS option, fallback to TCP */ if (m->m_pkthdr.len > ((int) datalen + hdrlen)) { - mptcplog((LOG_ERR, "%s: mbuf len %d, MPTCP expected %d", - __func__, m->m_pkthdr.len, datalen), - MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG); + os_log_error(mptcp_log_handle, "%s - %lx: mbuf len %d, MPTCP expected %d", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte), m->m_pkthdr.len, datalen); } else { return 0; } @@ -1151,12 +1209,13 @@ mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m, int mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, - int drop_hdrlen) + int drop_hdrlen) { mptcp_insert_rmap(tp, m, th); if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m, - drop_hdrlen) != 0) + drop_hdrlen) != 0) { return -1; + } return 0; } @@ -1170,7 +1229,7 @@ mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, int mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, - uint32_t sseq, uint16_t dlen, uint16_t csum, uint16_t dfin) + uint32_t sseq, uint16_t dlen, uint16_t csum, uint16_t dfin) { uint16_t mptcp_csum; @@ -1180,38 +1239,42 @@ mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, mptcp_notify_mpfail(tp->t_inpcb->inp_socket); m_freem(m); tcpstat.tcps_mp_badcsum++; - return (-1); + return -1; } - return (0); + return 0; } static uint16_t mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq, - uint16_t dlen, uint16_t csum, uint16_t dfin) + uint16_t dlen, uint16_t csum, uint16_t dfin) { struct mptcb *mp_tp = tptomptp(tp); uint16_t real_len = dlen - dfin; uint32_t sum = 0; - if (mp_tp == NULL) - return (0); + if (mp_tp == NULL) { + return 0; + } - if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) - return (0); + if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) { + return 0; + } - if (tp->t_mpflags & TMPF_TCP_FALLBACK) - return (0); + if (tp->t_mpflags & TMPF_TCP_FALLBACK) { + return 0; + } /* * The remote side may send a packet with fewer bytes than the * claimed DSS checksum length. */ if ((int)m_length2(m, NULL) < real_len) { - return (0xffff); + return 0xffff; } - if (real_len != 0) + if (real_len != 0) { sum = m_sum16(m, 0, real_len); + } sum += in_pseudo64(htonll(dsn), htonl(sseq), htons(dlen) + csum); ADDCARRY(sum); @@ -1220,16 +1283,17 @@ mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq, mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE); - return (~sum & 0xffff); + return ~sum & 0xffff; } uint32_t mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen) { - u_int32_t sum = 0; + uint32_t sum = 0; - if (dlen) + if (dlen) { sum = m_sum16(m, 0, dlen); + } dss_val = mptcp_hton64(dss_val); sseq = htonl(sseq); @@ -1240,7 +1304,7 @@ mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen sum = ~sum & 0xffff; DTRACE_MPTCP2(checksum__result, struct mbuf *, m, uint32_t, sum); mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum), - MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); + MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE); return sum; } @@ -1263,14 +1327,13 @@ mptcp_no_rto_spike(struct socket *so) __func__, spike, tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT, tp->t_rttcur), - (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG); - + (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG); } - if (spike > 0 ) { - return (FALSE); + if (spike > 0) { + return FALSE; } else { - return (TRUE); + return TRUE; } } @@ -1280,8 +1343,9 @@ mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag) VERIFY(mpp->mpp_flags & flag); mpp->mpp_flags &= ~flag; - if (mptcp_should_defer_upcall(mpp)) + if (mptcp_should_defer_upcall(mpp)) { return; + } if (mpp->mpp_flags & MPP_SHOULD_WORKLOOP) { mpp->mpp_flags &= ~MPP_SHOULD_WORKLOOP; @@ -1300,40 +1364,32 @@ mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag) sowwakeup(mpp->mpp_socket); } - - if (mpp->mpp_flags & MPP_SET_CELLICON) { - mpp->mpp_flags &= ~MPP_SET_CELLICON; - - mptcp_set_cellicon(mpp->mpp_pcbe); - } - - if (mpp->mpp_flags & MPP_UNSET_CELLICON) { - mpp->mpp_flags &= ~MPP_UNSET_CELLICON; - - mptcp_unset_cellicon(); - } } -static void +void mptcp_ask_for_nat64(struct ifnet *ifp) { in6_post_msg(ifp, KEV_INET6_REQUEST_NAT64_PREFIX, NULL, NULL); - mptcplog((LOG_DEBUG, "%s: asked for NAT64-prefix on %s\n", - __func__, ifp->if_name), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + os_log_info(mptcp_log_handle, + "%s: asked for NAT64-prefix on %s\n", __func__, + ifp->if_name); } static void mptcp_reset_itfinfo(struct mpt_itf_info *info) { - info->ifindex = 0; - info->has_v4_conn = 0; - info->has_v6_conn = 0; + memset(info, 0, sizeof(*info)); } void -mptcp_session_necp_cb(void *handle, int action, struct necp_client_flow *flow) +mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index, + uint32_t necp_flags, __unused bool *viable) { + boolean_t has_v4 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4); + boolean_t has_v6 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6); + boolean_t has_nat64 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_NAT64); + boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER); struct mppcb *mp = (struct mppcb *)handle; struct mptses *mpte = mptompte(mp); struct socket *mp_so; @@ -1341,130 +1397,171 @@ mptcp_session_necp_cb(void *handle, int action, struct necp_client_flow *flow) int locked = 0; uint32_t i, ifindex; - ifindex = flow->interface_index; + ifindex = interface_index; VERIFY(ifindex != IFSCOPE_NONE); - /* ToDo - remove after rdar://problem/32007628 */ - if (!IF_INDEX_IN_RANGE(ifindex)) - printf("%s 1 ifindex %u not in range of flow %p action %d\n", - __func__, ifindex, flow, action); - /* About to be garbage-collected (see note about MPTCP/NECP interactions) */ - if (mp->mpp_socket->so_usecount == 0) + if (mp->mpp_socket->so_usecount == 0) { return; + } + + mp_so = mptetoso(mpte); if (action != NECP_CLIENT_CBACTION_INITIAL) { - mpte_lock(mpte); + socket_lock(mp_so, 1); locked = 1; /* Check again, because it might have changed while waiting */ - if (mp->mpp_socket->so_usecount == 0) + if (mp->mpp_socket->so_usecount == 0) { goto out; + } } + socket_lock_assert_owned(mp_so); + mp_tp = mpte->mpte_mptcb; - mp_so = mptetoso(mpte); - mptcplog((LOG_DEBUG, "%s, action: %u ifindex %u usecount %u mpt_flags %#x state %u\n", - __func__, action, ifindex, mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + os_log_info(mptcp_log_handle, "%s - %lx: action: %u ifindex %u usecount %u mpt_flags %#x state %u v4 %u v6 %u nat64 %u power %u\n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), action, ifindex, + mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state, + has_v4, has_v6, has_nat64, low_power); /* No need on fallen back sockets */ - if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) + if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { goto out; + } + + /* + * When the interface goes in low-power mode we don't want to establish + * new subflows on it. Thus, mark it internally as non-viable. + */ + if (low_power) { + action = NECP_CLIENT_CBACTION_NONVIABLE; + } if (action == NECP_CLIENT_CBACTION_NONVIABLE) { for (i = 0; i < mpte->mpte_itfinfo_size; i++) { - if (mpte->mpte_itfinfo[i].ifindex == ifindex) + if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) { + continue; + } + + if (mpte->mpte_itfinfo[i].ifindex == ifindex) { mptcp_reset_itfinfo(&mpte->mpte_itfinfo[i]); + } } mptcp_sched_create_subflows(mpte); } else if (action == NECP_CLIENT_CBACTION_VIABLE || - action == NECP_CLIENT_CBACTION_INITIAL) { - int found_empty = 0, empty_index = -1; + action == NECP_CLIENT_CBACTION_INITIAL) { + int found_slot = 0, slot_index = -1; + struct sockaddr *dst; struct ifnet *ifp; - /* ToDo - remove after rdar://problem/32007628 */ - if (!IF_INDEX_IN_RANGE(ifindex)) - printf("%s 2 ifindex %u not in range of flow %p action %d\n", - __func__, ifindex, flow, action); - ifnet_head_lock_shared(); ifp = ifindex2ifnet[ifindex]; ifnet_head_done(); - /* ToDo - remove after rdar://problem/32007628 */ - if (!IF_INDEX_IN_RANGE(ifindex)) - printf("%s 3 ifindex %u not in range of flow %p action %d\n", - __func__, ifindex, flow, action); - - if (ifp == NULL) + if (ifp == NULL) { goto out; + } if (IFNET_IS_EXPENSIVE(ifp) && - (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) + (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) { goto out; + } + + if (IFNET_IS_CONSTRAINED(ifp) && + (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) { + goto out; + } if (IFNET_IS_CELLULAR(ifp) && - (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) + (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { goto out; + } + if (IS_INTF_CLAT46(ifp)) { + has_v4 = FALSE; + } + + /* Look for the slot on where to store/update the interface-info. */ for (i = 0; i < mpte->mpte_itfinfo_size; i++) { + /* Found a potential empty slot where we can put it */ if (mpte->mpte_itfinfo[i].ifindex == 0) { - found_empty = 1; - empty_index = i; + found_slot = 1; + slot_index = i; + } + + /* + * The interface is already in our array. Check if we + * need to update it. + */ + if (mpte->mpte_itfinfo[i].ifindex == ifindex && + (mpte->mpte_itfinfo[i].has_v4_conn != has_v4 || + mpte->mpte_itfinfo[i].has_v6_conn != has_v6 || + mpte->mpte_itfinfo[i].has_nat64_conn != has_nat64)) { + found_slot = 1; + slot_index = i; + break; } if (mpte->mpte_itfinfo[i].ifindex == ifindex) { - /* Ok, it's already there */ + /* + * Ok, it's already there and we don't need + * to update it + */ goto out; } } - if ((mpte->mpte_dst.sa_family == AF_INET || mpte->mpte_dst.sa_family == 0) && - !(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4) && - ifnet_get_nat64prefix(ifp, NULL) == ENOENT) { + dst = mptcp_get_session_dst(mpte, has_v6, has_v4); + if (dst && (dst->sa_family == AF_INET || dst->sa_family == 0) && + has_v6 && !has_nat64 && !has_v4) { + if (found_slot) { + mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4; + mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6; + mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64; + } mptcp_ask_for_nat64(ifp); goto out; } - if (found_empty == 0) { + if (found_slot == 0) { int new_size = mpte->mpte_itfinfo_size * 2; struct mpt_itf_info *info = _MALLOC(sizeof(*info) * new_size, M_TEMP, M_ZERO); if (info == NULL) { - mptcplog((LOG_ERR, "%s malloc failed for %u\n", __func__, new_size), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + os_log_error(mptcp_log_handle, "%s - %lx: malloc failed for %u\n", + __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), new_size); goto out; } memcpy(info, mpte->mpte_itfinfo, mpte->mpte_itfinfo_size * sizeof(*info)); - if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) + if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) { _FREE(mpte->mpte_itfinfo, M_TEMP); + } /* We allocated a new one, thus the first must be empty */ - empty_index = mpte->mpte_itfinfo_size; + slot_index = mpte->mpte_itfinfo_size; mpte->mpte_itfinfo = info; mpte->mpte_itfinfo_size = new_size; - - mptcplog((LOG_DEBUG, "%s Needed to realloc to %u\n", __func__, new_size), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); } - VERIFY(empty_index >= 0 && empty_index < (int)mpte->mpte_itfinfo_size); - mpte->mpte_itfinfo[empty_index].ifindex = ifindex; - mpte->mpte_itfinfo[empty_index].has_v4_conn = !!(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4); - mpte->mpte_itfinfo[empty_index].has_v6_conn = !!(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6); + VERIFY(slot_index >= 0 && slot_index < (int)mpte->mpte_itfinfo_size); + mpte->mpte_itfinfo[slot_index].ifindex = ifindex; + mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4; + mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6; + mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64; mptcp_sched_create_subflows(mpte); } out: - if (locked) - mpte_unlock(mpte); + if (locked) { + socket_unlock(mp_so, 1); + } } void @@ -1473,7 +1570,7 @@ mptcp_set_restrictions(struct socket *mp_so) struct mptses *mpte = mpsotompte(mp_so); uint32_t i; - mpte_lock_assert_held(mpte); + socket_lock_assert_owned(mp_so); ifnet_head_lock_shared(); @@ -1482,20 +1579,30 @@ mptcp_set_restrictions(struct socket *mp_so) uint32_t ifindex = info->ifindex; struct ifnet *ifp; - if (ifindex == IFSCOPE_NONE) + if (ifindex == IFSCOPE_NONE) { continue; + } ifp = ifindex2ifnet[ifindex]; + if (ifp == NULL) { + continue; + } if (IFNET_IS_EXPENSIVE(ifp) && - (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) + (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) { + info->ifindex = IFSCOPE_NONE; + } + + if (IFNET_IS_CONSTRAINED(ifp) && + (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) { info->ifindex = IFSCOPE_NONE; + } if (IFNET_IS_CELLULAR(ifp) && - (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) + (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) { info->ifindex = IFSCOPE_NONE; + } } ifnet_head_done(); } -