]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/mptcp.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp.c
index 8cf437f64795a6865b477fe05616ae64599d4743..a2883309e5a9c756d20a56b522cd6e58b1536059 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2018 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 
 int mptcp_enable = 1;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_enable, 0, "Enable Multipath TCP Support");
+    &mptcp_enable, 0, "Enable Multipath TCP Support");
 
-/* Number of times to try negotiating MPTCP on SYN retransmissions */
-int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES;
+/*
+ * Number of times to try negotiating MPTCP on SYN retransmissions.
+ * We haven't seen any reports of a middlebox that is dropping all SYN-segments
+ * that have an MPTCP-option. Thus, let's be generous and retransmit it 4 times.
+ */
+int mptcp_mpcap_retries = 4;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
-       CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
+    CTLFLAG_RW | CTLFLAG_LOCKED,
+    &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
 
 /*
  * By default, DSS checksum is turned off, revisit if we ever do
@@ -123,7 +127,7 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
  */
 int mptcp_dss_csum = 0;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_dss_csum, 0, "Enable DSS checksum");
+    &mptcp_dss_csum, 0, "Enable DSS checksum");
 
 /*
  * When mptcp_fail_thresh number of retransmissions are sent, subflow failover
@@ -131,7 +135,7 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED,
  */
 int mptcp_fail_thresh = 1;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_fail_thresh, 0, "Failover threshold");
+    &mptcp_fail_thresh, 0, "Failover threshold");
 
 
 /*
@@ -139,46 +143,47 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED,
  * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout.
  * Some carrier networks have a timeout of 10 or 15 minutes.
  */
-int mptcp_subflow_keeptime = 60*14;
+int mptcp_subflow_keeptime = 60 * 14;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
+    &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
 
 int mptcp_rtthist_rtthresh = 600;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
+    &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
 
 /*
  * Use RTO history for sending new data
  */
 int mptcp_use_rto = 1;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, userto, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_use_rto, 0, "Disable RTO for subflow selection");
+    &mptcp_use_rto, 0, "Disable RTO for subflow selection");
 
 int mptcp_rtothresh = 1500;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_rtothresh, 0, "RTO threshold");
+    &mptcp_rtothresh, 0, "RTO threshold");
 
 /*
  * Probe the preferred path, when it is not in use
  */
 uint32_t mptcp_probeto = 1000;
 SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_probeto, 0, "Disable probing by setting to 0");
+    &mptcp_probeto, 0, "Disable probing by setting to 0");
 
 uint32_t mptcp_probecnt = 5;
 SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_probecnt, 0, "Number of probe writes");
+    &mptcp_probecnt, 0, "Number of probe writes");
 
 /*
  * Static declarations
  */
 static uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, uint64_t,
-                                uint32_t, uint16_t, uint16_t, uint16_t);
+    uint32_t, uint16_t, uint16_t, uint16_t);
 
 static int
 mptcp_reass_present(struct socket *mp_so)
 {
-       struct mptcb *mp_tp = mpsotomppcb(mp_so)->mpp_pcbe->mpte_mptcb;
+       struct mptses *mpte = mpsotompte(mp_so);
+       struct mptcb *mp_tp = mpte->mpte_mptcb;
        struct tseg_qent *q;
        int dowakeup = 0;
        int flags = 0;
@@ -187,19 +192,22 @@ mptcp_reass_present(struct socket *mp_so)
         * Present data to user, advancing rcv_nxt through
         * completed sequence space.
         */
-       if (mp_tp->mpt_state < MPTCPS_ESTABLISHED)
-               return (flags);
+       if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
+               return flags;
+       }
        q = LIST_FIRST(&mp_tp->mpt_segq);
-       if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt)
-               return (flags);
+       if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) {
+               return flags;
+       }
 
        /*
         * If there is already another thread doing reassembly for this
         * connection, it is better to let it finish the job --
         * (radar 16316196)
         */
-       if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG)
-               return (flags);
+       if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) {
+               return flags;
+       }
 
        mp_tp->mpt_flags |= MPTCPF_REASS_INPROG;
 
@@ -210,8 +218,9 @@ mptcp_reass_present(struct socket *mp_so)
                        m_freem(q->tqe_m);
                } else {
                        flags = !!(q->tqe_m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
-                       if (sbappendstream_rcvdemux(mp_so, q->tqe_m, 0, 0))
+                       if (sbappendstream_rcvdemux(mp_so, q->tqe_m, 0, 0)) {
                                dowakeup = 1;
+                       }
                }
                zfree(tcp_reass_zone, q);
                mp_tp->mpt_reassqlen--;
@@ -219,10 +228,10 @@ mptcp_reass_present(struct socket *mp_so)
        } while (q && q->tqe_m->m_pkthdr.mp_dsn == mp_tp->mpt_rcvnxt);
        mp_tp->mpt_flags &= ~MPTCPF_REASS_INPROG;
 
-       if (dowakeup)
+       if (dowakeup) {
                sorwakeup(mp_so); /* done with socket lock held */
-       return (flags);
-
+       }
+       return flags;
 }
 
 static int
@@ -250,7 +259,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
                tcpstat.tcps_mptcp_rcvmemdrop++;
                m_freem(m);
                *tlenp = 0;
-               return (0);
+               return 0;
        }
 
        /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
@@ -258,7 +267,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
        if (te == NULL) {
                tcpstat.tcps_mptcp_rcvmemdrop++;
                m_freem(m);
-               return (0);
+               return 0;
        }
 
        mp_tp->mpt_reassqlen++;
@@ -267,8 +276,9 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
         * Find a segment which begins after this one does.
         */
        LIST_FOREACH(q, &mp_tp->mpt_segq, tqe_q) {
-               if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn))
+               if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) {
                        break;
+               }
                p = q;
        }
 
@@ -310,8 +320,9 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
         */
        while (q) {
                int64_t i = (mb_dsn + *tlenp) - q->tqe_m->m_pkthdr.mp_dsn;
-               if (i <= 0)
+               if (i <= 0) {
                        break;
+               }
 
                if (i < q->tqe_len) {
                        q->tqe_m->m_pkthdr.mp_dsn += i;
@@ -340,7 +351,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
        }
 
 out:
-       return (mptcp_reass_present(mp_so));
+       return mptcp_reass_present(mp_so);
 }
 
 /*
@@ -357,11 +368,11 @@ mptcp_input(struct mptses *mpte, struct mbuf *m)
 
        VERIFY(m->m_flags & M_PKTHDR);
 
-       mpte_lock_assert_held(mpte);    /* same as MP socket lock */
-
        mp_so = mptetoso(mpte);
        mp_tp = mpte->mpte_mptcb;
 
+       socket_lock_assert_owned(mp_so);
+
        DTRACE_MPTCP(input);
 
        mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
@@ -383,11 +394,30 @@ mptcp_input(struct mptses *mpte, struct mbuf *m)
 fallback:
                mptcp_sbrcv_grow(mp_tp);
 
-               for (iter = m; iter; iter = iter->m_next) {
+               iter = m;
+               while (iter) {
                        if ((iter->m_flags & M_PKTHDR) &&
                            (iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
                                mb_dfin = 1;
-                               break;
+                       }
+
+                       if ((iter->m_flags & M_PKTHDR) && m_pktlen(iter) == 0) {
+                               /* Don't add zero-length packets, so jump it! */
+                               if (prev == NULL) {
+                                       m = iter->m_next;
+                                       m_free(iter);
+                                       iter = m;
+                               } else {
+                                       prev->m_next = iter->m_next;
+                                       m_free(iter);
+                                       iter = prev->m_next;
+                               }
+
+                               /* It was a zero-length packet so next one must be a pkthdr */
+                               VERIFY(iter == NULL || iter->m_flags & M_PKTHDR);
+                       } else {
+                               prev = iter;
+                               iter = iter->m_next;
                        }
                }
 
@@ -395,8 +425,9 @@ fallback:
                 * assume degraded flow as this may be the first packet
                 * without DSS, and the subflow state is not updated yet.
                 */
-               if (sbappendstream_rcvdemux(mp_so, m, 0, 0))
+               if (sbappendstream_rcvdemux(mp_so, m, 0, 0)) {
                        sorwakeup(mp_so);
+               }
 
                DTRACE_MPTCP5(receive__degraded, struct mbuf *, m,
                    struct socket *, mp_so,
@@ -411,9 +442,6 @@ fallback:
                        mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
                        socantrcvmore(mp_so);
                }
-
-               mptcplog((LOG_DEBUG, "%s: Fallback read %d bytes\n", __func__,
-                   count), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
                return;
        }
 
@@ -423,9 +451,12 @@ fallback:
                int64_t todrop;
                int mb_dfin = 0;
 
+               VERIFY(m->m_flags & M_PKTHDR);
+
                /* If fallback occurs, mbufs will not have PKTF_MPTCP set */
-               if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP))
+               if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
                        goto fallback;
+               }
 
                save = m->m_next;
                /*
@@ -442,10 +473,11 @@ fallback:
                        prev = save;
                        save = save->m_next;
                }
-               if (prev)
+               if (prev) {
                        prev->m_next = NULL;
-               else
+               } else {
                        m->m_next = NULL;
+               }
 
                mb_dsn = m->m_pkthdr.mp_dsn;
                mb_datalen = m->m_pkthdr.mp_rlen;
@@ -454,16 +486,23 @@ fallback:
                if (todrop > 0) {
                        tcpstat.tcps_mptcp_rcvpackafterwin++;
 
+                       os_log_info(mptcp_log_handle, "%s - %lx: dropping dsn %u dlen %u rcvnxt %u rcvwnd %u todrop %lld\n",
+                           __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+                           (uint32_t)mb_dsn, mb_datalen, (uint32_t)mp_tp->mpt_rcvnxt,
+                           mp_tp->mpt_rcvwnd, todrop);
+
                        if (todrop >= mb_datalen) {
-                               if (freelist == NULL)
+                               if (freelist == NULL) {
                                        freelist = m;
-                               else
+                               } else {
                                        tail->m_next = m;
+                               }
 
-                               if (prev != NULL)
+                               if (prev != NULL) {
                                        tail = prev;
-                               else
+                               } else {
                                        tail = m;
+                               }
 
                                m = save;
                                prev = save = NULL;
@@ -471,6 +510,7 @@ fallback:
                        } else {
                                m_adj(m, -todrop);
                                mb_datalen -= todrop;
+                               m->m_pkthdr.mp_rlen -= todrop;
                        }
 
                        /*
@@ -480,42 +520,46 @@ fallback:
                        m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN;
                }
 
-               if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) ||
-                   !LIST_EMPTY(&mp_tp->mpt_segq)) {
-                       mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m);
-
-                       goto next;
-               }
-               mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
-
                if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvnxt)) {
                        if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen),
                            mp_tp->mpt_rcvnxt)) {
-                               if (freelist == NULL)
+                               if (freelist == NULL) {
                                        freelist = m;
-                               else
+                               } else {
                                        tail->m_next = m;
+                               }
 
-                               if (prev != NULL)
+                               if (prev != NULL) {
                                        tail = prev;
-                               else
+                               } else {
                                        tail = m;
+                               }
 
                                m = save;
                                prev = save = NULL;
                                continue;
                        } else {
                                m_adj(m, (mp_tp->mpt_rcvnxt - mb_dsn));
+                               mb_datalen -= (mp_tp->mpt_rcvnxt - mb_dsn);
+                               mb_dsn = mp_tp->mpt_rcvnxt;
+                               m->m_pkthdr.mp_rlen = mb_datalen;
+                               m->m_pkthdr.mp_dsn = mb_dsn;
                        }
-                       mptcplog((LOG_INFO, "%s: Left Edge %llu\n", __func__,
-                           mp_tp->mpt_rcvnxt),
-                           MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
                }
 
+               if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) ||
+                   !LIST_EMPTY(&mp_tp->mpt_segq)) {
+                       mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m);
+
+                       goto next;
+               }
+               mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
+
                mptcp_sbrcv_grow(mp_tp);
 
-               if (sbappendstream_rcvdemux(mp_so, m, 0, 0))
+               if (sbappendstream_rcvdemux(mp_so, m, 0, 0)) {
                        wakeup = 1;
+               }
 
                DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so,
                    struct sockbuf *, &mp_so->so_rcv,
@@ -525,8 +569,6 @@ fallback:
                count = mp_so->so_rcv.sb_cc - count;
                tcpstat.tcps_mp_rcvtotal++;
                tcpstat.tcps_mp_rcvbytes += count;
-               mptcplog((LOG_DEBUG, "%s: Read %d bytes\n", __func__, count),
-                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
 
                mp_tp->mpt_rcvnxt += count;
 
@@ -540,23 +582,26 @@ next:
                count = mp_so->so_rcv.sb_cc;
        } while (m);
 
-       if (freelist)
+       if (freelist) {
                m_freem(freelist);
+       }
 
-       if (wakeup)
+       if (wakeup) {
                sorwakeup(mp_so);
+       }
 }
 
-static boolean_t
-mptcp_can_send_more(struct mptcb *mp_tp)
+boolean_t
+mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject)
 {
        struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
 
        /*
         * Always send if there is data in the reinject-queue.
         */
-       if (mp_tp->mpt_mpte->mpte_reinjectq)
-               return (TRUE);
+       if (!ignore_reinject && mp_tp->mpt_mpte->mpte_reinjectq) {
+               return TRUE;
+       }
 
        /*
         * Don't send, if:
@@ -567,19 +612,23 @@ mptcp_can_send_more(struct mptcb *mp_tp)
         * 3. snd_nxt + 1 == snd_max and we are closing: A DATA_FIN is scheduled.
         */
 
-       if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax))
-               return (FALSE);
+       if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) {
+               return FALSE;
+       }
 
-       if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt))
-               return (FALSE);
+       if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) {
+               return FALSE;
+       }
 
-       if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT)
-               return (FALSE);
+       if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
+               return FALSE;
+       }
 
-       if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2)
-               return (FALSE);
+       if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
+               return FALSE;
+       }
 
-       return (TRUE);
+       return TRUE;
 }
 
 /*
@@ -596,41 +645,29 @@ mptcp_output(struct mptses *mpte)
        uint64_t old_snd_nxt;
        int error = 0;
 
-       mpte_lock_assert_held(mpte);
        mp_so = mptetoso(mpte);
+       socket_lock_assert_owned(mp_so);
        mp_tp = mpte->mpte_mptcb;
 
        VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL));
        mpte->mpte_mppcb->mpp_flags |= MPP_WUPCALL;
 
-       mptcplog((LOG_DEBUG, "%s: snxt %u sndmax %u suna %u swnd %u reinjectq %u state %u\n",
-                 __func__, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_sndmax,
-                 (uint32_t)mp_tp->mpt_snduna, mp_tp->mpt_sndwnd,
-                 mpte->mpte_reinjectq ? 1 : 0,
-                 mp_tp->mpt_state),
-                MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
-
        old_snd_nxt = mp_tp->mpt_sndnxt;
-       while (mptcp_can_send_more(mp_tp)) {
+       while (mptcp_can_send_more(mp_tp, FALSE)) {
                /* get the "best" subflow to be used for transmission */
-               mpts = mptcp_get_subflow(mpte, NULL, &preferred_mpts);
+               mpts = mptcp_get_subflow(mpte, &preferred_mpts);
                if (mpts == NULL) {
                        mptcplog((LOG_INFO, "%s: no subflow\n", __func__),
                            MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
                        break;
                }
 
-               mptcplog((LOG_DEBUG, "%s: using id %u\n", __func__, mpts->mpts_connid),
-                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
-
                /* In case there's just one flow, we reattempt later */
                if (mpts_tried != NULL &&
                    (mpts == mpts_tried || (mpts->mpts_flags & MPTSF_FAILINGOVER))) {
                        mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER;
                        mpts_tried->mpts_flags |= MPTSF_ACTIVE;
                        mptcp_start_timer(mpte, MPTT_REXMT);
-                       mptcplog((LOG_DEBUG, "%s: retry later\n", __func__),
-                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                        break;
                }
 
@@ -650,11 +687,6 @@ mptcp_output(struct mptses *mpte)
                                    min(mp_so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
                                    tcp_autosndbuf_max)) == 1) {
                                        mp_so->so_snd.sb_idealsize = mp_so->so_snd.sb_hiwat;
-
-                                       mptcplog((LOG_DEBUG, "%s: increased snd hiwat to %u lowat %u\n",
-                                                 __func__, mp_so->so_snd.sb_hiwat,
-                                                 mp_so->so_snd.sb_lowat),
-                                                 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                                }
                        }
                }
@@ -667,9 +699,11 @@ mptcp_output(struct mptses *mpte)
                        mpts->mpts_flags |= MPTSF_FAILINGOVER;
                        mpts->mpts_flags &= ~MPTSF_ACTIVE;
                        mpts_tried = mpts;
-                       mptcplog((LOG_ERR, "%s: Error = %d mpts_flags %#x\n", __func__,
-                                 error, mpts->mpts_flags),
-                                MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
+                       if (error != ECANCELED) {
+                               os_log_error(mptcp_log_handle, "%s - %lx: Error = %d mpts_flags %#x\n",
+                                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+                                   error, mpts->mpts_flags);
+                       }
                        break;
                }
                /* The model is to have only one active flow at a time */
@@ -695,14 +729,6 @@ mptcp_output(struct mptses *mpte)
                if (mpte->mpte_active_sub == NULL) {
                        mpte->mpte_active_sub = mpts;
                } else if (mpte->mpte_active_sub != mpts) {
-                       struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
-                       struct tcpcb *acttp = sototcpcb(mpte->mpte_active_sub->mpts_socket);
-
-                       mptcplog((LOG_DEBUG, "%s: switch [%u, srtt %d] to [%u, srtt %d]\n", __func__,
-                           mpte->mpte_active_sub->mpts_connid, acttp->t_srtt >> TCP_RTT_SHIFT,
-                           mpts->mpts_connid, tp->t_srtt >> TCP_RTT_SHIFT),
-                           (MPTCP_SENDER_DBG | MPTCP_SOCKET_DBG), MPTCP_LOGLVL_LOG);
-
                        mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE;
                        mpte->mpte_active_sub = mpts;
 
@@ -710,10 +736,17 @@ mptcp_output(struct mptses *mpte)
                }
        }
 
+       if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
+               if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax &&
+                   mp_tp->mpt_snduna == mp_tp->mpt_sndnxt) {
+                       mptcp_finish_usrclosed(mpte);
+               }
+       }
+
        mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_WUPCALL);
 
        /* subflow errors should not be percolated back up */
-       return (0);
+       return 0;
 }
 
 
@@ -729,9 +762,9 @@ mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt)
         */
        if (tp->t_srtt && *currtt > tp->t_srtt &&
            (curbest == NULL || tp->t_rxtshift == 0 ||
-            sototcpcb(curbest->mpts_socket)->t_rxtshift)) {
+           sototcpcb(curbest->mpts_socket)->t_rxtshift)) {
                *currtt = tp->t_srtt;
-               return (mpts);
+               return mpts;
        }
 
        /*
@@ -741,26 +774,41 @@ mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt)
            sototcpcb(curbest->mpts_socket)->t_rxtshift &&
            tp->t_rxtshift == 0) {
                *currtt = tp->t_srtt;
-               return (mpts);
+               return mpts;
        }
 
-       return (curbest != NULL ? curbest : mpts);
+       return curbest != NULL ? curbest : mpts;
 }
 
 static struct mptsub *
 mptcp_return_subflow(struct mptsub *mpts)
 {
-       if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0)
-               return (NULL);
+       if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) {
+               return NULL;
+       }
 
-       return (mpts);
+       return mpts;
+}
+
+static boolean_t
+mptcp_subflow_is_slow(struct mptses *mpte, struct mptsub *mpts)
+{
+       struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
+       int fail_thresh = mptcp_fail_thresh;
+
+       if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
+               fail_thresh *= 2;
+       }
+
+       return tp->t_rxtshift >= fail_thresh &&
+              (mptetoso(mpte)->so_snd.sb_cc || mpte->mpte_reinjectq);
 }
 
 /*
  * Return the most eligible subflow to be used for sending data.
  */
 struct mptsub *
-mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **preferred)
+mptcp_get_subflow(struct mptses *mpte, struct mptsub **preferred)
 {
        struct tcpcb *besttp, *secondtp;
        struct inpcb *bestinp, *secondinp;
@@ -779,23 +827,25 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                struct tcpcb *tp = sototcpcb(so);
                struct inpcb *inp = sotoinpcb(so);
 
-               mptcplog((LOG_DEBUG, "%s mpts %u ignore %d, mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n",
-                         __func__, mpts->mpts_connid, ignore ? ignore->mpts_connid : -1, mpts->mpts_flags,
-                         INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state,
-                         inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1,
-                         tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt,
-                         mptcp_subflow_cwnd_space(so)),
-                         MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+               mptcplog((LOG_DEBUG, "%s mpts %u mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n",
+                   __func__, mpts->mpts_connid, mpts->mpts_flags,
+                   INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state,
+                   inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1,
+                   tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt,
+                   mptcp_subflow_cwnd_space(so)),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
 
                /*
                 * First, the hard conditions to reject subflows
                 * (e.g., not connected,...)
                 */
-               if (mpts == ignore || inp->inp_last_outifp == NULL)
+               if (inp->inp_last_outifp == NULL) {
                        continue;
+               }
 
-               if (INP_WAIT_FOR_IF_FEEDBACK(inp))
+               if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
                        continue;
+               }
 
                /* There can only be one subflow in degraded state */
                if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
@@ -806,50 +856,57 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                /*
                 * If this subflow is waiting to finally send, do it!
                 */
-               if (so->so_flags1 & SOF1_PRECONNECT_DATA)
-                       return (mptcp_return_subflow(mpts));
+               if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
+                       return mptcp_return_subflow(mpts);
+               }
 
                /*
                 * Only send if the subflow is MP_CAPABLE. The exceptions to
                 * this rule (degraded or TFO) have been taken care of above.
                 */
-               if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE))
+               if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) {
                        continue;
+               }
 
                if ((so->so_state & SS_ISDISCONNECTED) ||
                    !(so->so_state & SS_ISCONNECTED) ||
                    !TCPS_HAVEESTABLISHED(tp->t_state) ||
-                   tp->t_state > TCPS_CLOSE_WAIT)
+                   tp->t_state > TCPS_CLOSE_WAIT) {
                        continue;
+               }
 
                /*
                 * Second, the soft conditions to find the subflow with best
                 * conditions for each set (aka cellular vs non-cellular)
                 */
-               if (IFNET_IS_CELLULAR(inp->inp_last_outifp))
+               if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
                        second_best = mptcp_choose_subflow(mpts, second_best,
-                                                          &exp_rtt);
-               else
+                           &exp_rtt);
+               } else {
                        best = mptcp_choose_subflow(mpts, best, &cheap_rtt);
+               }
        }
 
        /*
         * If there is no preferred or backup subflow, and there is no active
         * subflow use the last usable subflow.
         */
-       if (best == NULL)
-               return (mptcp_return_subflow(second_best));
+       if (best == NULL) {
+               return mptcp_return_subflow(second_best);
+       }
 
-       if (second_best == NULL)
-               return (mptcp_return_subflow(best));
+       if (second_best == NULL) {
+               return mptcp_return_subflow(best);
+       }
 
        besttp = sototcpcb(best->mpts_socket);
        bestinp = sotoinpcb(best->mpts_socket);
        secondtp = sototcpcb(second_best->mpts_socket);
        secondinp = sotoinpcb(second_best->mpts_socket);
 
-       if (preferred != NULL)
+       if (preferred != NULL) {
                *preferred = mptcp_return_subflow(best);
+       }
 
        /*
         * Second Step: Among best and second_best. Choose the one that is
@@ -859,19 +916,19 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                /*
                 * Only handover if Symptoms tells us to do so.
                 */
-               if (IFNET_IS_WIFI(bestinp->inp_last_outifp) &&
-                   mptcp_is_wifi_unusable() &&
-                   besttp->t_rxtshift >= mptcp_fail_thresh)
-                       return (mptcp_return_subflow(second_best));
+               if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
+                   mptcp_is_wifi_unusable_for_session(mpte) != 0 && mptcp_subflow_is_slow(mpte, best)) {
+                       return mptcp_return_subflow(second_best);
+               }
 
-               return (mptcp_return_subflow(best));
+               return mptcp_return_subflow(best);
        } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_INTERACTIVE) {
                int rtt_thresh = mptcp_rtthist_rtthresh << TCP_RTT_SHIFT;
                int rto_thresh = mptcp_rtothresh;
 
                /* Adjust with symptoms information */
-               if (IFNET_IS_WIFI(bestinp->inp_last_outifp) &&
-                   mptcp_is_wifi_unusable()) {
+               if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
+                   mptcp_is_wifi_unusable_for_session(mpte) != 0) {
                        rtt_thresh /= 2;
                        rto_thresh /= 2;
                }
@@ -885,12 +942,12 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                            second_best->mpts_connid,
                            secondtp->t_srtt >> TCP_RTT_SHIFT),
                            MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
-                       return (mptcp_return_subflow(second_best));
+                       return mptcp_return_subflow(second_best);
                }
 
-               if (besttp->t_rxtshift >= mptcp_fail_thresh &&
+               if (mptcp_subflow_is_slow(mpte, best) &&
                    secondtp->t_rxtshift == 0) {
-                       return (mptcp_return_subflow(second_best));
+                       return mptcp_return_subflow(second_best);
                }
 
                /* Compare RTOs, select second_best if best's rto exceeds rtothresh */
@@ -903,7 +960,7 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                            second_best->mpts_connid, secondtp->t_rxtcur),
                            MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
 
-                       return (mptcp_return_subflow(second_best));
+                       return mptcp_return_subflow(second_best);
                }
 
                /*
@@ -911,8 +968,8 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                 * were true. So, let's schedule on the best one, if he still
                 * has some space in the congestion-window.
                 */
-               return (mptcp_return_subflow(best));
-       } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_AGGREGATE) {
+               return mptcp_return_subflow(best);
+       } else if (mpte->mpte_svctype >= MPTCP_SVCTYPE_AGGREGATE) {
                struct mptsub *tmp;
 
                /*
@@ -930,15 +987,16 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                }
 
                /* Is there still space in the congestion window? */
-               if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0)
-                       return (mptcp_return_subflow(second_best));
+               if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) {
+                       return mptcp_return_subflow(second_best);
+               }
 
-               return (mptcp_return_subflow(best));
+               return mptcp_return_subflow(best);
        } else {
                panic("Unknown service-type configured for MPTCP");
        }
 
-       return (NULL);
+       return NULL;
 }
 
 static const char *
@@ -956,13 +1014,13 @@ mptcp_event_to_str(uint32_t event)
                c = "MPCE_RECV_DATA_FIN";
                break;
        }
-       return (c);
+       return c;
 }
 
 static const char *
 mptcp_state_to_str(mptcp_state_t state)
 {
-        const char *c = "UNDEFINED";
+       const char *c = "UNDEFINED";
        switch (state) {
        case MPTCPS_CLOSED:
                c = "MPTCPS_CLOSED";
@@ -995,13 +1053,16 @@ mptcp_state_to_str(mptcp_state_t state)
                c = "MPTCPS_TERMINATE";
                break;
        }
-       return (c);
+       return c;
 }
 
 void
 mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
 {
-       mpte_lock_assert_held(mp_tp->mpt_mpte);
+       struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
+
+       socket_lock_assert_owned(mp_so);
+
        mptcp_state_t old_state = mp_tp->mpt_state;
 
        DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
@@ -1010,7 +1071,7 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
        switch (mp_tp->mpt_state) {
        case MPTCPS_CLOSED:
        case MPTCPS_LISTEN:
-               mp_tp->mpt_state = MPTCPS_CLOSED;
+               mp_tp->mpt_state = MPTCPS_TERMINATE;
                break;
 
        case MPTCPS_ESTABLISHED:
@@ -1040,13 +1101,15 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
                break;
 
        case MPTCPS_CLOSING:
-               if (event == MPCE_RECV_DATA_ACK)
+               if (event == MPCE_RECV_DATA_ACK) {
                        mp_tp->mpt_state = MPTCPS_TIME_WAIT;
+               }
                break;
 
        case MPTCPS_LAST_ACK:
-               if (event == MPCE_RECV_DATA_ACK)
+               if (event == MPCE_RECV_DATA_ACK) {
                        mptcp_close(mp_tp->mpt_mpte, mp_tp);
+               }
                break;
 
        case MPTCPS_FIN_WAIT_2:
@@ -1090,7 +1153,6 @@ mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp,
        mptcp_update_rcv_state_meat(mp_tp, tp,
            full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len,
            csum);
-
 }
 
 void
@@ -1099,20 +1161,16 @@ mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp,
     uint16_t csum)
 {
        if (mdss_data_len == 0) {
-               mptcplog((LOG_INFO, "%s: Infinite Mapping.\n", __func__),
-                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
+               os_log_error(mptcp_log_handle, "%s - %lx: Infinite Mapping.\n",
+                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte));
 
                if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) {
-                       mptcplog((LOG_ERR, "%s: Bad checksum %x \n", __func__,
-                           csum), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
+                       os_log_error(mptcp_log_handle, "%s - %lx: Bad checksum %x \n",
+                           __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), csum);
                }
                mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
                return;
        }
-       mptcplog((LOG_DEBUG,
-           "%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n", __func__,
-           seqn, mdss_data_len, full_dsn, mp_tp->mpt_rcvnxt),
-           MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
 
        mptcp_notify_mpready(tp->t_inpcb->inp_socket);
 
@@ -1130,16 +1188,16 @@ mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m,
 {
        u_int32_t datalen;
 
-       if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP))
+       if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
                return 0;
+       }
 
        datalen = m->m_pkthdr.mp_rlen;
 
        /* unacceptable DSS option, fallback to TCP */
        if (m->m_pkthdr.len > ((int) datalen + hdrlen)) {
-               mptcplog((LOG_ERR, "%s: mbuf len %d, MPTCP expected %d",
-                   __func__, m->m_pkthdr.len, datalen),
-                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
+               os_log_error(mptcp_log_handle, "%s - %lx: mbuf len %d, MPTCP expected %d",
+                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte), m->m_pkthdr.len, datalen);
        } else {
                return 0;
        }
@@ -1151,12 +1209,13 @@ mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m,
 
 int
 mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
-                   int drop_hdrlen)
+    int drop_hdrlen)
 {
        mptcp_insert_rmap(tp, m, th);
        if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m,
-           drop_hdrlen) != 0)
+           drop_hdrlen) != 0) {
                return -1;
+       }
        return 0;
 }
 
@@ -1170,7 +1229,7 @@ mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
 
 int
 mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
-                   uint32_t sseq, uint16_t dlen, uint16_t csum, uint16_t dfin)
+    uint32_t sseq, uint16_t dlen, uint16_t csum, uint16_t dfin)
 {
        uint16_t mptcp_csum;
 
@@ -1180,38 +1239,42 @@ mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
                mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
                m_freem(m);
                tcpstat.tcps_mp_badcsum++;
-               return (-1);
+               return -1;
        }
-       return (0);
+       return 0;
 }
 
 static uint16_t
 mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq,
-                uint16_t dlen, uint16_t csum, uint16_t dfin)
+    uint16_t dlen, uint16_t csum, uint16_t dfin)
 {
        struct mptcb *mp_tp = tptomptp(tp);
        uint16_t real_len = dlen - dfin;
        uint32_t sum = 0;
 
-       if (mp_tp == NULL)
-               return (0);
+       if (mp_tp == NULL) {
+               return 0;
+       }
 
-       if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM))
-               return (0);
+       if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) {
+               return 0;
+       }
 
-       if (tp->t_mpflags & TMPF_TCP_FALLBACK)
-               return (0);
+       if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
+               return 0;
+       }
 
        /*
         * The remote side may send a packet with fewer bytes than the
         * claimed DSS checksum length.
         */
        if ((int)m_length2(m, NULL) < real_len) {
-               return (0xffff);
+               return 0xffff;
        }
 
-       if (real_len != 0)
+       if (real_len != 0) {
                sum = m_sum16(m, 0, real_len);
+       }
 
        sum += in_pseudo64(htonll(dsn), htonl(sseq), htons(dlen) + csum);
        ADDCARRY(sum);
@@ -1220,16 +1283,17 @@ mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq,
 
        mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
            MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
-       return (~sum & 0xffff);
+       return ~sum & 0xffff;
 }
 
 uint32_t
 mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen)
 {
-       u_int32_t sum = 0;
+       uint32_t sum = 0;
 
-       if (dlen)
+       if (dlen) {
                sum = m_sum16(m, 0, dlen);
+       }
 
        dss_val = mptcp_hton64(dss_val);
        sseq = htonl(sseq);
@@ -1240,7 +1304,7 @@ mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen
        sum = ~sum & 0xffff;
        DTRACE_MPTCP2(checksum__result, struct mbuf *, m, uint32_t, sum);
        mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
-                 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
+           MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
 
        return sum;
 }
@@ -1263,14 +1327,13 @@ mptcp_no_rto_spike(struct socket *so)
                    __func__, spike,
                    tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT,
                    tp->t_rttcur),
-                   (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
-
+                   (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
        }
 
-       if (spike > 0 ) {
-               return (FALSE);
+       if (spike > 0) {
+               return FALSE;
        } else {
-               return (TRUE);
+               return TRUE;
        }
 }
 
@@ -1280,8 +1343,9 @@ mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag)
        VERIFY(mpp->mpp_flags & flag);
        mpp->mpp_flags &= ~flag;
 
-       if (mptcp_should_defer_upcall(mpp))
+       if (mptcp_should_defer_upcall(mpp)) {
                return;
+       }
 
        if (mpp->mpp_flags & MPP_SHOULD_WORKLOOP) {
                mpp->mpp_flags &= ~MPP_SHOULD_WORKLOOP;
@@ -1300,40 +1364,32 @@ mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag)
 
                sowwakeup(mpp->mpp_socket);
        }
-
-       if (mpp->mpp_flags & MPP_SET_CELLICON) {
-               mpp->mpp_flags &= ~MPP_SET_CELLICON;
-
-               mptcp_set_cellicon(mpp->mpp_pcbe);
-       }
-
-       if (mpp->mpp_flags & MPP_UNSET_CELLICON) {
-               mpp->mpp_flags &= ~MPP_UNSET_CELLICON;
-
-               mptcp_unset_cellicon();
-       }
 }
 
-static void
+void
 mptcp_ask_for_nat64(struct ifnet *ifp)
 {
        in6_post_msg(ifp, KEV_INET6_REQUEST_NAT64_PREFIX, NULL, NULL);
 
-       mptcplog((LOG_DEBUG, "%s: asked for NAT64-prefix on %s\n",
-                __func__, ifp->if_name), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+       os_log_info(mptcp_log_handle,
+           "%s: asked for NAT64-prefix on %s\n", __func__,
+           ifp->if_name);
 }
 
 static void
 mptcp_reset_itfinfo(struct mpt_itf_info *info)
 {
-       info->ifindex = 0;
-       info->has_v4_conn = 0;
-       info->has_v6_conn = 0;
+       memset(info, 0, sizeof(*info));
 }
 
 void
-mptcp_session_necp_cb(void *handle, int action, struct necp_client_flow *flow)
+mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index,
+    uint32_t necp_flags, __unused bool *viable)
 {
+       boolean_t has_v4 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4);
+       boolean_t has_v6 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6);
+       boolean_t has_nat64 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_NAT64);
+       boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER);
        struct mppcb *mp = (struct mppcb *)handle;
        struct mptses *mpte = mptompte(mp);
        struct socket *mp_so;
@@ -1341,130 +1397,171 @@ mptcp_session_necp_cb(void *handle, int action, struct necp_client_flow *flow)
        int locked = 0;
        uint32_t i, ifindex;
 
-       ifindex = flow->interface_index;
+       ifindex = interface_index;
        VERIFY(ifindex != IFSCOPE_NONE);
 
-       /* ToDo - remove after rdar://problem/32007628 */
-       if (!IF_INDEX_IN_RANGE(ifindex))
-               printf("%s 1 ifindex %u not in range of flow %p action %d\n",
-                      __func__, ifindex, flow, action);
-
        /* About to be garbage-collected (see note about MPTCP/NECP interactions) */
-       if (mp->mpp_socket->so_usecount == 0)
+       if (mp->mpp_socket->so_usecount == 0) {
                return;
+       }
+
+       mp_so = mptetoso(mpte);
 
        if (action != NECP_CLIENT_CBACTION_INITIAL) {
-               mpte_lock(mpte);
+               socket_lock(mp_so, 1);
                locked = 1;
 
                /* Check again, because it might have changed while waiting */
-               if (mp->mpp_socket->so_usecount == 0)
+               if (mp->mpp_socket->so_usecount == 0) {
                        goto out;
+               }
        }
 
+       socket_lock_assert_owned(mp_so);
+
        mp_tp = mpte->mpte_mptcb;
-       mp_so = mptetoso(mpte);
 
-       mptcplog((LOG_DEBUG, "%s, action: %u ifindex %u usecount %u mpt_flags %#x state %u\n",
-                __func__, action, ifindex, mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state),
-                MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+       os_log_info(mptcp_log_handle, "%s - %lx: action: %u ifindex %u usecount %u mpt_flags %#x state %u v4 %u v6 %u nat64 %u power %u\n",
+           __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), action, ifindex,
+           mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state,
+           has_v4, has_v6, has_nat64, low_power);
 
        /* No need on fallen back sockets */
-       if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)
+       if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
                goto out;
+       }
+
+       /*
+        * When the interface goes in low-power mode we don't want to establish
+        * new subflows on it. Thus, mark it internally as non-viable.
+        */
+       if (low_power) {
+               action = NECP_CLIENT_CBACTION_NONVIABLE;
+       }
 
        if (action == NECP_CLIENT_CBACTION_NONVIABLE) {
                for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
-                       if (mpte->mpte_itfinfo[i].ifindex == ifindex)
+                       if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) {
+                               continue;
+                       }
+
+                       if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
                                mptcp_reset_itfinfo(&mpte->mpte_itfinfo[i]);
+                       }
                }
 
                mptcp_sched_create_subflows(mpte);
        } else if (action == NECP_CLIENT_CBACTION_VIABLE ||
-                  action == NECP_CLIENT_CBACTION_INITIAL) {
-               int found_empty = 0, empty_index = -1;
+           action == NECP_CLIENT_CBACTION_INITIAL) {
+               int found_slot = 0, slot_index = -1;
+               struct sockaddr *dst;
                struct ifnet *ifp;
 
-               /* ToDo - remove after rdar://problem/32007628 */
-               if (!IF_INDEX_IN_RANGE(ifindex))
-                       printf("%s 2 ifindex %u not in range of flow %p action %d\n",
-                              __func__, ifindex, flow, action);
-
                ifnet_head_lock_shared();
                ifp = ifindex2ifnet[ifindex];
                ifnet_head_done();
 
-               /* ToDo - remove after rdar://problem/32007628 */
-               if (!IF_INDEX_IN_RANGE(ifindex))
-                       printf("%s 3 ifindex %u not in range of flow %p action %d\n",
-                              __func__, ifindex, flow, action);
-
-               if (ifp == NULL)
+               if (ifp == NULL) {
                        goto out;
+               }
 
                if (IFNET_IS_EXPENSIVE(ifp) &&
-                   (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE))
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
                        goto out;
+               }
+
+               if (IFNET_IS_CONSTRAINED(ifp) &&
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
+                       goto out;
+               }
 
                if (IFNET_IS_CELLULAR(ifp) &&
-                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR))
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
                        goto out;
+               }
 
+               if (IS_INTF_CLAT46(ifp)) {
+                       has_v4 = FALSE;
+               }
+
+               /* Look for the slot on where to store/update the interface-info. */
                for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
+                       /* Found a potential empty slot where we can put it */
                        if (mpte->mpte_itfinfo[i].ifindex == 0) {
-                               found_empty = 1;
-                               empty_index = i;
+                               found_slot = 1;
+                               slot_index = i;
+                       }
+
+                       /*
+                        * The interface is already in our array. Check if we
+                        * need to update it.
+                        */
+                       if (mpte->mpte_itfinfo[i].ifindex == ifindex &&
+                           (mpte->mpte_itfinfo[i].has_v4_conn != has_v4 ||
+                           mpte->mpte_itfinfo[i].has_v6_conn != has_v6 ||
+                           mpte->mpte_itfinfo[i].has_nat64_conn != has_nat64)) {
+                               found_slot = 1;
+                               slot_index = i;
+                               break;
                        }
 
                        if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
-                               /* Ok, it's already there */
+                               /*
+                                * Ok, it's already there and we don't need
+                                * to update it
+                                */
                                goto out;
                        }
                }
 
-               if ((mpte->mpte_dst.sa_family == AF_INET || mpte->mpte_dst.sa_family == 0) &&
-                   !(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4) &&
-                   ifnet_get_nat64prefix(ifp, NULL) == ENOENT) {
+               dst = mptcp_get_session_dst(mpte, has_v6, has_v4);
+               if (dst && (dst->sa_family == AF_INET || dst->sa_family == 0) &&
+                   has_v6 && !has_nat64 && !has_v4) {
+                       if (found_slot) {
+                               mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
+                               mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
+                               mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
+                       }
                        mptcp_ask_for_nat64(ifp);
                        goto out;
                }
 
-               if (found_empty == 0) {
+               if (found_slot == 0) {
                        int new_size = mpte->mpte_itfinfo_size * 2;
                        struct mpt_itf_info *info = _MALLOC(sizeof(*info) * new_size, M_TEMP, M_ZERO);
 
                        if (info == NULL) {
-                               mptcplog((LOG_ERR, "%s malloc failed for %u\n", __func__, new_size),
-                                        MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
+                               os_log_error(mptcp_log_handle, "%s - %lx: malloc failed for %u\n",
+                                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), new_size);
                                goto out;
                        }
 
                        memcpy(info, mpte->mpte_itfinfo, mpte->mpte_itfinfo_size * sizeof(*info));
 
-                       if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE)
+                       if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) {
                                _FREE(mpte->mpte_itfinfo, M_TEMP);
+                       }
 
                        /* We allocated a new one, thus the first must be empty */
-                       empty_index = mpte->mpte_itfinfo_size;
+                       slot_index = mpte->mpte_itfinfo_size;
 
                        mpte->mpte_itfinfo = info;
                        mpte->mpte_itfinfo_size = new_size;
-
-                       mptcplog((LOG_DEBUG, "%s Needed to realloc to %u\n", __func__, new_size),
-                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
                }
 
-               VERIFY(empty_index >= 0 && empty_index < (int)mpte->mpte_itfinfo_size);
-               mpte->mpte_itfinfo[empty_index].ifindex = ifindex;
-               mpte->mpte_itfinfo[empty_index].has_v4_conn = !!(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4);
-               mpte->mpte_itfinfo[empty_index].has_v6_conn = !!(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6);
+               VERIFY(slot_index >= 0 && slot_index < (int)mpte->mpte_itfinfo_size);
+               mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
+               mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
+               mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
+               mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
 
                mptcp_sched_create_subflows(mpte);
        }
 
 out:
-       if (locked)
-               mpte_unlock(mpte);
+       if (locked) {
+               socket_unlock(mp_so, 1);
+       }
 }
 
 void
@@ -1473,7 +1570,7 @@ mptcp_set_restrictions(struct socket *mp_so)
        struct mptses *mpte = mpsotompte(mp_so);
        uint32_t i;
 
-       mpte_lock_assert_held(mpte);
+       socket_lock_assert_owned(mp_so);
 
        ifnet_head_lock_shared();
 
@@ -1482,20 +1579,30 @@ mptcp_set_restrictions(struct socket *mp_so)
                uint32_t ifindex = info->ifindex;
                struct ifnet *ifp;
 
-               if (ifindex == IFSCOPE_NONE)
+               if (ifindex == IFSCOPE_NONE) {
                        continue;
+               }
 
                ifp = ifindex2ifnet[ifindex];
+               if (ifp == NULL) {
+                       continue;
+               }
 
                if (IFNET_IS_EXPENSIVE(ifp) &&
-                   (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE))
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
+                       info->ifindex = IFSCOPE_NONE;
+               }
+
+               if (IFNET_IS_CONSTRAINED(ifp) &&
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
                        info->ifindex = IFSCOPE_NONE;
+               }
 
                if (IFNET_IS_CELLULAR(ifp) &&
-                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR))
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
                        info->ifindex = IFSCOPE_NONE;
+               }
        }
 
        ifnet_head_done();
 }
-