]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/mptcp.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp.c
index 5d901a9da48770ce22fd5a5fb867c4fb291712b7..85a8cebc17245d25404a55f3c89ef91adf5d12e7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
+ * Copyright (c) 2012-2018 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  *
 
 int mptcp_enable = 1;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_enable, 0, "Enable Multipath TCP Support");
+    &mptcp_enable, 0, "Enable Multipath TCP Support");
 
-/* Number of times to try negotiating MPTCP on SYN retransmissions */
-int mptcp_mpcap_retries = MPTCP_CAPABLE_RETRIES;
+/*
+ * Number of times to try negotiating MPTCP on SYN retransmissions.
+ * We haven't seen any reports of a middlebox that is dropping all SYN-segments
+ * that have an MPTCP-option. Thus, let's be generous and retransmit it 4 times.
+ */
+int mptcp_mpcap_retries = 4;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
-       CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
+    CTLFLAG_RW | CTLFLAG_LOCKED,
+    &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
 
 /*
  * By default, DSS checksum is turned off, revisit if we ever do
@@ -123,7 +127,7 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
  */
 int mptcp_dss_csum = 0;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_dss_csum, 0, "Enable DSS checksum");
+    &mptcp_dss_csum, 0, "Enable DSS checksum");
 
 /*
  * When mptcp_fail_thresh number of retransmissions are sent, subflow failover
@@ -131,74 +135,65 @@ SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED,
  */
 int mptcp_fail_thresh = 1;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_fail_thresh, 0, "Failover threshold");
-
+    &mptcp_fail_thresh, 0, "Failover threshold");
 
 /*
  * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime
  * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout.
  * Some carrier networks have a timeout of 10 or 15 minutes.
  */
-int mptcp_subflow_keeptime = 60*14;
+int mptcp_subflow_keeptime = 60 * 14;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
+    &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
 
 int mptcp_rtthist_rtthresh = 600;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
-
-/*
- * Use RTO history for sending new data
- */
-int mptcp_use_rto = 1;
-SYSCTL_INT(_net_inet_mptcp, OID_AUTO, userto, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_use_rto, 0, "Disable RTO for subflow selection");
+    &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
 
 int mptcp_rtothresh = 1500;
 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_rtothresh, 0, "RTO threshold");
+    &mptcp_rtothresh, 0, "RTO threshold");
 
 /*
  * Probe the preferred path, when it is not in use
  */
 uint32_t mptcp_probeto = 1000;
 SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_probeto, 0, "Disable probing by setting to 0");
+    &mptcp_probeto, 0, "Disable probing by setting to 0");
 
 uint32_t mptcp_probecnt = 5;
 SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED,
-       &mptcp_probecnt, 0, "Number of probe writes");
-
-/*
- * Static declarations
- */
-static uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, uint64_t,
-                                uint32_t, uint16_t, uint16_t);
+    &mptcp_probecnt, 0, "Number of probe writes");
 
 static int
 mptcp_reass_present(struct socket *mp_so)
 {
-       struct mptcb *mp_tp = mpsotomppcb(mp_so)->mpp_pcbe->mpte_mptcb;
+       struct mptses *mpte = mpsotompte(mp_so);
+       struct mptcb *mp_tp = mpte->mpte_mptcb;
        struct tseg_qent *q;
        int dowakeup = 0;
+       int flags = 0;
 
        /*
         * Present data to user, advancing rcv_nxt through
         * completed sequence space.
         */
-       if (mp_tp->mpt_state < MPTCPS_ESTABLISHED)
-               return (0);
+       if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
+               return flags;
+       }
        q = LIST_FIRST(&mp_tp->mpt_segq);
-       if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt)
-               return (0);
+       if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) {
+               return flags;
+       }
 
        /*
         * If there is already another thread doing reassembly for this
         * connection, it is better to let it finish the job --
         * (radar 16316196)
         */
-       if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG)
-               return (0);
+       if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) {
+               return flags;
+       }
 
        mp_tp->mpt_flags |= MPTCPF_REASS_INPROG;
 
@@ -208,8 +203,10 @@ mptcp_reass_present(struct socket *mp_so)
                if (mp_so->so_state & SS_CANTRCVMORE) {
                        m_freem(q->tqe_m);
                } else {
-                       if (sbappendstream(&mp_so->so_rcv, q->tqe_m))
+                       flags = !!(q->tqe_m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
+                       if (sbappendstream_rcvdemux(mp_so, q->tqe_m)) {
                                dowakeup = 1;
+                       }
                }
                zfree(tcp_reass_zone, q);
                mp_tp->mpt_reassqlen--;
@@ -217,10 +214,10 @@ mptcp_reass_present(struct socket *mp_so)
        } while (q && q->tqe_m->m_pkthdr.mp_dsn == mp_tp->mpt_rcvnxt);
        mp_tp->mpt_flags &= ~MPTCPF_REASS_INPROG;
 
-       if (dowakeup)
+       if (dowakeup) {
                sorwakeup(mp_so); /* done with socket lock held */
-       return (0);
-
+       }
+       return flags;
 }
 
 static int
@@ -232,7 +229,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
        struct tseg_qent *p = NULL;
        struct tseg_qent *nq;
        struct tseg_qent *te = NULL;
-       u_int16_t qlimit;
+       uint32_t qlimit;
 
        /*
         * Limit the number of segments in the reassembly queue to prevent
@@ -241,14 +238,14 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
         * queue.  Always keep one global queue entry spare to be able to
         * process the missing segment.
         */
-       qlimit = min(max(100, mp_so->so_rcv.sb_hiwat >> 10),
+       qlimit = MIN(MAX(100, mp_so->so_rcv.sb_hiwat >> 10),
            (tcp_autorcvbuf_max >> 10));
        if (mb_dsn != mp_tp->mpt_rcvnxt &&
            (mp_tp->mpt_reassqlen + 1) >= qlimit) {
                tcpstat.tcps_mptcp_rcvmemdrop++;
                m_freem(m);
                *tlenp = 0;
-               return (0);
+               return 0;
        }
 
        /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
@@ -256,7 +253,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
        if (te == NULL) {
                tcpstat.tcps_mptcp_rcvmemdrop++;
                m_freem(m);
-               return (0);
+               return 0;
        }
 
        mp_tp->mpt_reassqlen++;
@@ -265,8 +262,9 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
         * Find a segment which begins after this one does.
         */
        LIST_FOREACH(q, &mp_tp->mpt_segq, tqe_q) {
-               if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn))
+               if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) {
                        break;
+               }
                p = q;
        }
 
@@ -294,7 +292,8 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
                                 */
                                goto out;
                        }
-                       m_adj(m, i);
+                       VERIFY(i <= INT_MAX);
+                       m_adj(m, (int)i);
                        *tlenp -= i;
                        phdr->mp_dsn += i;
                }
@@ -308,13 +307,16 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
         */
        while (q) {
                int64_t i = (mb_dsn + *tlenp) - q->tqe_m->m_pkthdr.mp_dsn;
-               if (i <= 0)
+               if (i <= 0) {
                        break;
+               }
 
                if (i < q->tqe_len) {
                        q->tqe_m->m_pkthdr.mp_dsn += i;
                        q->tqe_len -= i;
-                       m_adj(q->tqe_m, i);
+
+                       VERIFY(i <= INT_MAX);
+                       m_adj(q->tqe_m, (int)i);
                        break;
                }
 
@@ -338,7 +340,7 @@ mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *
        }
 
 out:
-       return (mptcp_reass_present(mp_so));
+       return mptcp_reass_present(mp_so);
 }
 
 /*
@@ -355,11 +357,11 @@ mptcp_input(struct mptses *mpte, struct mbuf *m)
 
        VERIFY(m->m_flags & M_PKTHDR);
 
-       mpte_lock_assert_held(mpte);    /* same as MP socket lock */
-
        mp_so = mptetoso(mpte);
        mp_tp = mpte->mpte_mptcb;
 
+       socket_lock_assert_owned(mp_so);
+
        DTRACE_MPTCP(input);
 
        mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
@@ -376,23 +378,59 @@ mptcp_input(struct mptses *mpte, struct mbuf *m)
         * In the degraded fallback case, data is accepted without DSS map
         */
        if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
+               struct mbuf *iter;
+               int mb_dfin = 0;
 fallback:
                mptcp_sbrcv_grow(mp_tp);
 
+               iter = m;
+               while (iter) {
+                       if ((iter->m_flags & M_PKTHDR) &&
+                           (iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
+                               mb_dfin = 1;
+                       }
+
+                       if ((iter->m_flags & M_PKTHDR) && m_pktlen(iter) == 0) {
+                               /* Don't add zero-length packets, so jump it! */
+                               if (prev == NULL) {
+                                       m = iter->m_next;
+                                       m_free(iter);
+                                       iter = m;
+                               } else {
+                                       prev->m_next = iter->m_next;
+                                       m_free(iter);
+                                       iter = prev->m_next;
+                               }
+
+                               /* It was a zero-length packet so next one must be a pkthdr */
+                               VERIFY(iter == NULL || iter->m_flags & M_PKTHDR);
+                       } else {
+                               prev = iter;
+                               iter = iter->m_next;
+                       }
+               }
+
                /*
                 * assume degraded flow as this may be the first packet
                 * without DSS, and the subflow state is not updated yet.
                 */
-               if (sbappendstream(&mp_so->so_rcv, m))
+               if (sbappendstream_rcvdemux(mp_so, m)) {
                        sorwakeup(mp_so);
+               }
+
                DTRACE_MPTCP5(receive__degraded, struct mbuf *, m,
                    struct socket *, mp_so,
                    struct sockbuf *, &mp_so->so_rcv,
                    struct sockbuf *, &mp_so->so_snd,
                    struct mptses *, mpte);
                count = mp_so->so_rcv.sb_cc - count;
-               mptcplog((LOG_DEBUG, "%s: Fallback read %d bytes\n", __func__,
-                   count), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
+
+               mp_tp->mpt_rcvnxt += count;
+
+               if (mb_dfin) {
+                       mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
+                       socantrcvmore(mp_so);
+               }
                return;
        }
 
@@ -400,10 +438,14 @@ fallback:
                u_int64_t mb_dsn;
                int32_t mb_datalen;
                int64_t todrop;
+               int mb_dfin = 0;
+
+               VERIFY(m->m_flags & M_PKTHDR);
 
                /* If fallback occurs, mbufs will not have PKTF_MPTCP set */
-               if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP))
+               if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
                        goto fallback;
+               }
 
                save = m->m_next;
                /*
@@ -420,10 +462,11 @@ fallback:
                        prev = save;
                        save = save->m_next;
                }
-               if (prev)
+               if (prev) {
                        prev->m_next = NULL;
-               else
+               } else {
                        m->m_next = NULL;
+               }
 
                mb_dsn = m->m_pkthdr.mp_dsn;
                mb_datalen = m->m_pkthdr.mp_rlen;
@@ -432,61 +475,83 @@ fallback:
                if (todrop > 0) {
                        tcpstat.tcps_mptcp_rcvpackafterwin++;
 
+                       os_log_info(mptcp_log_handle, "%s - %lx: dropping dsn %u dlen %u rcvnxt %u rcvwnd %u todrop %lld\n",
+                           __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+                           (uint32_t)mb_dsn, mb_datalen, (uint32_t)mp_tp->mpt_rcvnxt,
+                           mp_tp->mpt_rcvwnd, todrop);
+
                        if (todrop >= mb_datalen) {
-                               if (freelist == NULL)
+                               if (freelist == NULL) {
                                        freelist = m;
-                               else
+                               } else {
                                        tail->m_next = m;
+                               }
 
-                               if (prev != NULL)
+                               if (prev != NULL) {
                                        tail = prev;
-                               else
+                               } else {
                                        tail = m;
+                               }
 
                                m = save;
                                prev = save = NULL;
                                continue;
                        } else {
-                               m_adj(m, -todrop);
+                               VERIFY(todrop <= INT_MAX);
+                               m_adj(m, (int)-todrop);
                                mb_datalen -= todrop;
+                               m->m_pkthdr.mp_rlen -= todrop;
                        }
-               }
 
-               if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) ||
-                   !LIST_EMPTY(&mp_tp->mpt_segq)) {
-                       mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m);
-
-                       goto next;
+                       /*
+                        * We drop from the right edge of the mbuf, thus the
+                        * DATA_FIN is dropped as well
+                        */
+                       m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN;
                }
 
                if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvnxt)) {
                        if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen),
                            mp_tp->mpt_rcvnxt)) {
-                               if (freelist == NULL)
+                               if (freelist == NULL) {
                                        freelist = m;
-                               else
+                               } else {
                                        tail->m_next = m;
+                               }
 
-                               if (prev != NULL)
+                               if (prev != NULL) {
                                        tail = prev;
-                               else
+                               } else {
                                        tail = m;
+                               }
 
                                m = save;
                                prev = save = NULL;
                                continue;
                        } else {
-                               m_adj(m, (mp_tp->mpt_rcvnxt - mb_dsn));
+                               VERIFY((mp_tp->mpt_rcvnxt - mb_dsn) <= INT_MAX);
+                               m_adj(m, (int)(mp_tp->mpt_rcvnxt - mb_dsn));
+                               mb_datalen -= (mp_tp->mpt_rcvnxt - mb_dsn);
+                               mb_dsn = mp_tp->mpt_rcvnxt;
+                               VERIFY(mb_datalen >= 0 && mb_datalen <= USHRT_MAX);
+                               m->m_pkthdr.mp_rlen = (uint16_t)mb_datalen;
+                               m->m_pkthdr.mp_dsn = mb_dsn;
                        }
-                       mptcplog((LOG_INFO, "%s: Left Edge %llu\n", __func__,
-                           mp_tp->mpt_rcvnxt),
-                           MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
                }
 
+               if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) ||
+                   !LIST_EMPTY(&mp_tp->mpt_segq)) {
+                       mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m);
+
+                       goto next;
+               }
+               mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
+
                mptcp_sbrcv_grow(mp_tp);
 
-               if (sbappendstream(&mp_so->so_rcv, m))
+               if (sbappendstream_rcvdemux(mp_so, m)) {
                        wakeup = 1;
+               }
 
                DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so,
                    struct sockbuf *, &mp_so->so_rcv,
@@ -496,34 +561,39 @@ fallback:
                count = mp_so->so_rcv.sb_cc - count;
                tcpstat.tcps_mp_rcvtotal++;
                tcpstat.tcps_mp_rcvbytes += count;
-               mptcplog((LOG_DEBUG, "%s: Read %d bytes\n", __func__, count),
-                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
 
                mp_tp->mpt_rcvnxt += count;
 
 next:
+               if (mb_dfin) {
+                       mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
+                       socantrcvmore(mp_so);
+               }
                m = save;
                prev = save = NULL;
                count = mp_so->so_rcv.sb_cc;
        } while (m);
 
-       if (freelist)
+       if (freelist) {
                m_freem(freelist);
+       }
 
-       if (wakeup)
+       if (wakeup) {
                sorwakeup(mp_so);
+       }
 }
 
-static boolean_t
-mptcp_can_send_more(struct mptcb *mp_tp)
+boolean_t
+mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject)
 {
        struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
 
        /*
         * Always send if there is data in the reinject-queue.
         */
-       if (mp_tp->mpt_mpte->mpte_reinjectq)
-               return (TRUE);
+       if (!ignore_reinject && mp_tp->mpt_mpte->mpte_reinjectq) {
+               return TRUE;
+       }
 
        /*
         * Don't send, if:
@@ -534,19 +604,23 @@ mptcp_can_send_more(struct mptcb *mp_tp)
         * 3. snd_nxt + 1 == snd_max and we are closing: A DATA_FIN is scheduled.
         */
 
-       if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax))
-               return (FALSE);
+       if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) {
+               return FALSE;
+       }
 
-       if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt))
-               return (FALSE);
+       if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) {
+               return FALSE;
+       }
 
-       if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT)
-               return (FALSE);
+       if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
+               return FALSE;
+       }
 
-       if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2)
-               return (FALSE);
+       if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
+               return FALSE;
+       }
 
-       return (TRUE);
+       return TRUE;
 }
 
 /*
@@ -563,41 +637,34 @@ mptcp_output(struct mptses *mpte)
        uint64_t old_snd_nxt;
        int error = 0;
 
-       mpte_lock_assert_held(mpte);
        mp_so = mptetoso(mpte);
        mp_tp = mpte->mpte_mptcb;
 
+       socket_lock_assert_owned(mp_so);
+
+       if (mp_so->so_flags & SOF_DEFUNCT) {
+               return 0;
+       }
+
        VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL));
        mpte->mpte_mppcb->mpp_flags |= MPP_WUPCALL;
 
-       mptcplog((LOG_DEBUG, "%s: snxt %u sndmax %u suna %u swnd %u reinjectq %u state %u\n",
-                 __func__, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_sndmax,
-                 (uint32_t)mp_tp->mpt_snduna, mp_tp->mpt_sndwnd,
-                 mpte->mpte_reinjectq ? 1 : 0,
-                 mp_tp->mpt_state),
-                MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
-
        old_snd_nxt = mp_tp->mpt_sndnxt;
-       while (mptcp_can_send_more(mp_tp)) {
+       while (mptcp_can_send_more(mp_tp, FALSE)) {
                /* get the "best" subflow to be used for transmission */
-               mpts = mptcp_get_subflow(mpte, NULL, &preferred_mpts);
+               mpts = mptcp_get_subflow(mpte, &preferred_mpts);
                if (mpts == NULL) {
                        mptcplog((LOG_INFO, "%s: no subflow\n", __func__),
                            MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
                        break;
                }
 
-               mptcplog((LOG_DEBUG, "%s: using id %u\n", __func__, mpts->mpts_connid),
-                   MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
-
                /* In case there's just one flow, we reattempt later */
                if (mpts_tried != NULL &&
                    (mpts == mpts_tried || (mpts->mpts_flags & MPTSF_FAILINGOVER))) {
                        mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER;
                        mpts_tried->mpts_flags |= MPTSF_ACTIVE;
                        mptcp_start_timer(mpte, MPTT_REXMT);
-                       mptcplog((LOG_DEBUG, "%s: retry later\n", __func__),
-                           MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                        break;
                }
 
@@ -608,8 +675,7 @@ mptcp_output(struct mptses *mpte)
                 *      2. send buffer is filled to 7/8th with data (so we actually
                 *         have data to make use of it);
                 */
-               if (tcp_do_autosendbuf == 1 &&
-                   (mp_so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE &&
+               if ((mp_so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE &&
                    tcp_cansbgrow(&mp_so->so_snd)) {
                        if ((mp_tp->mpt_sndwnd / 4 * 5) >= mp_so->so_snd.sb_hiwat &&
                            mp_so->so_snd.sb_cc >= (mp_so->so_snd.sb_hiwat / 8 * 7)) {
@@ -617,11 +683,6 @@ mptcp_output(struct mptses *mpte)
                                    min(mp_so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
                                    tcp_autosndbuf_max)) == 1) {
                                        mp_so->so_snd.sb_idealsize = mp_so->so_snd.sb_hiwat;
-
-                                       mptcplog((LOG_DEBUG, "%s: increased snd hiwat to %u lowat %u\n",
-                                                 __func__, mp_so->so_snd.sb_hiwat,
-                                                 mp_so->so_snd.sb_lowat),
-                                                 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
                                }
                        }
                }
@@ -634,9 +695,11 @@ mptcp_output(struct mptses *mpte)
                        mpts->mpts_flags |= MPTSF_FAILINGOVER;
                        mpts->mpts_flags &= ~MPTSF_ACTIVE;
                        mpts_tried = mpts;
-                       mptcplog((LOG_ERR, "%s: Error = %d mpts_flags %#x\n", __func__,
-                                 error, mpts->mpts_flags),
-                                MPTCP_SENDER_DBG, MPTCP_LOGLVL_ERR);
+                       if (error != ECANCELED) {
+                               os_log_error(mptcp_log_handle, "%s - %lx: Error = %d mpts_flags %#x\n",
+                                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
+                                   error, mpts->mpts_flags);
+                       }
                        break;
                }
                /* The model is to have only one active flow at a time */
@@ -662,14 +725,6 @@ mptcp_output(struct mptses *mpte)
                if (mpte->mpte_active_sub == NULL) {
                        mpte->mpte_active_sub = mpts;
                } else if (mpte->mpte_active_sub != mpts) {
-                       struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
-                       struct tcpcb *acttp = sototcpcb(mpte->mpte_active_sub->mpts_socket);
-
-                       mptcplog((LOG_DEBUG, "%s: switch [%u, srtt %d] to [%u, srtt %d]\n", __func__,
-                           mpte->mpte_active_sub->mpts_connid, acttp->t_srtt >> TCP_RTT_SHIFT,
-                           mpts->mpts_connid, tp->t_srtt >> TCP_RTT_SHIFT),
-                           (MPTCP_SENDER_DBG | MPTCP_SOCKET_DBG), MPTCP_LOGLVL_LOG);
-
                        mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE;
                        mpte->mpte_active_sub = mpts;
 
@@ -677,10 +732,17 @@ mptcp_output(struct mptses *mpte)
                }
        }
 
+       if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
+               if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax &&
+                   mp_tp->mpt_snduna == mp_tp->mpt_sndnxt) {
+                       mptcp_finish_usrclosed(mpte);
+               }
+       }
+
        mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_WUPCALL);
 
        /* subflow errors should not be percolated back up */
-       return (0);
+       return 0;
 }
 
 
@@ -696,9 +758,9 @@ mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt)
         */
        if (tp->t_srtt && *currtt > tp->t_srtt &&
            (curbest == NULL || tp->t_rxtshift == 0 ||
-            sototcpcb(curbest->mpts_socket)->t_rxtshift)) {
+           sototcpcb(curbest->mpts_socket)->t_rxtshift)) {
                *currtt = tp->t_srtt;
-               return (mpts);
+               return mpts;
        }
 
        /*
@@ -708,26 +770,41 @@ mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt)
            sototcpcb(curbest->mpts_socket)->t_rxtshift &&
            tp->t_rxtshift == 0) {
                *currtt = tp->t_srtt;
-               return (mpts);
+               return mpts;
        }
 
-       return (curbest != NULL ? curbest : mpts);
+       return curbest != NULL ? curbest : mpts;
 }
 
 static struct mptsub *
 mptcp_return_subflow(struct mptsub *mpts)
 {
-       if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0)
-               return (NULL);
+       if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) {
+               return NULL;
+       }
+
+       return mpts;
+}
+
+static boolean_t
+mptcp_subflow_is_slow(struct mptses *mpte, struct mptsub *mpts)
+{
+       struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
+       int fail_thresh = mptcp_fail_thresh;
+
+       if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
+               fail_thresh *= 2;
+       }
 
-       return (mpts);
+       return tp->t_rxtshift >= fail_thresh &&
+              (mptetoso(mpte)->so_snd.sb_cc || mpte->mpte_reinjectq);
 }
 
 /*
  * Return the most eligible subflow to be used for sending data.
  */
 struct mptsub *
-mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **preferred)
+mptcp_get_subflow(struct mptses *mpte, struct mptsub **preferred)
 {
        struct tcpcb *besttp, *secondtp;
        struct inpcb *bestinp, *secondinp;
@@ -746,23 +823,25 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                struct tcpcb *tp = sototcpcb(so);
                struct inpcb *inp = sotoinpcb(so);
 
-               mptcplog((LOG_DEBUG, "%s mpts %u ignore %d, mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n",
-                         __func__, mpts->mpts_connid, ignore ? ignore->mpts_connid : -1, mpts->mpts_flags,
-                         INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state,
-                         inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1,
-                         tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt,
-                         mptcp_subflow_cwnd_space(so)),
-                         MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+               mptcplog((LOG_DEBUG, "%s mpts %u mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n",
+                   __func__, mpts->mpts_connid, mpts->mpts_flags,
+                   INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state,
+                   inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1,
+                   tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt,
+                   mptcp_subflow_cwnd_space(so)),
+                   MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
 
                /*
                 * First, the hard conditions to reject subflows
                 * (e.g., not connected,...)
                 */
-               if (mpts == ignore || inp->inp_last_outifp == NULL)
+               if (inp->inp_last_outifp == NULL) {
                        continue;
+               }
 
-               if (INP_WAIT_FOR_IF_FEEDBACK(inp))
+               if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
                        continue;
+               }
 
                /* There can only be one subflow in degraded state */
                if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
@@ -773,72 +852,81 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                /*
                 * If this subflow is waiting to finally send, do it!
                 */
-               if (so->so_flags1 & SOF1_PRECONNECT_DATA)
-                       return (mptcp_return_subflow(mpts));
+               if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
+                       return mptcp_return_subflow(mpts);
+               }
 
                /*
                 * Only send if the subflow is MP_CAPABLE. The exceptions to
                 * this rule (degraded or TFO) have been taken care of above.
                 */
-               if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE))
+               if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) {
                        continue;
+               }
 
                if ((so->so_state & SS_ISDISCONNECTED) ||
                    !(so->so_state & SS_ISCONNECTED) ||
                    !TCPS_HAVEESTABLISHED(tp->t_state) ||
-                   tp->t_state > TCPS_CLOSE_WAIT)
+                   tp->t_state > TCPS_CLOSE_WAIT) {
                        continue;
+               }
 
                /*
                 * Second, the soft conditions to find the subflow with best
                 * conditions for each set (aka cellular vs non-cellular)
                 */
-               if (IFNET_IS_CELLULAR(inp->inp_last_outifp))
+               if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
                        second_best = mptcp_choose_subflow(mpts, second_best,
-                                                          &exp_rtt);
-               else
+                           &exp_rtt);
+               } else {
                        best = mptcp_choose_subflow(mpts, best, &cheap_rtt);
+               }
        }
 
        /*
         * If there is no preferred or backup subflow, and there is no active
         * subflow use the last usable subflow.
         */
-       if (best == NULL)
-               return (mptcp_return_subflow(second_best));
+       if (best == NULL) {
+               return mptcp_return_subflow(second_best);
+       }
 
-       if (second_best == NULL)
-               return (mptcp_return_subflow(best));
+       if (second_best == NULL) {
+               return mptcp_return_subflow(best);
+       }
 
        besttp = sototcpcb(best->mpts_socket);
        bestinp = sotoinpcb(best->mpts_socket);
        secondtp = sototcpcb(second_best->mpts_socket);
        secondinp = sotoinpcb(second_best->mpts_socket);
 
-       if (preferred != NULL)
+       if (preferred != NULL) {
                *preferred = mptcp_return_subflow(best);
+       }
 
        /*
         * Second Step: Among best and second_best. Choose the one that is
         * most appropriate for this particular service-type.
         */
-       if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
+       if (mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
+               return mptcp_return_subflow(best);
+       } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
                /*
                 * Only handover if Symptoms tells us to do so.
                 */
-               if (IFNET_IS_WIFI(bestinp->inp_last_outifp) &&
-                   mptcp_is_wifi_unusable() &&
-                   besttp->t_rxtshift >= mptcp_fail_thresh)
-                       return (mptcp_return_subflow(second_best));
+               if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
+                   mptcp_is_wifi_unusable_for_session(mpte) != 0 && mptcp_subflow_is_slow(mpte, best)) {
+                       return mptcp_return_subflow(second_best);
+               }
 
-               return (mptcp_return_subflow(best));
+               return mptcp_return_subflow(best);
        } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_INTERACTIVE) {
                int rtt_thresh = mptcp_rtthist_rtthresh << TCP_RTT_SHIFT;
                int rto_thresh = mptcp_rtothresh;
 
                /* Adjust with symptoms information */
-               if (IFNET_IS_WIFI(bestinp->inp_last_outifp) &&
-                   mptcp_is_wifi_unusable()) {
+               if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
+                   mptcp_is_wifi_unusable_for_session(mpte) != 0) {
                        rtt_thresh /= 2;
                        rto_thresh /= 2;
                }
@@ -852,12 +940,12 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                            second_best->mpts_connid,
                            secondtp->t_srtt >> TCP_RTT_SHIFT),
                            MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
-                       return (mptcp_return_subflow(second_best));
+                       return mptcp_return_subflow(second_best);
                }
 
-               if (besttp->t_rxtshift >= mptcp_fail_thresh &&
+               if (mptcp_subflow_is_slow(mpte, best) &&
                    secondtp->t_rxtshift == 0) {
-                       return (mptcp_return_subflow(second_best));
+                       return mptcp_return_subflow(second_best);
                }
 
                /* Compare RTOs, select second_best if best's rto exceeds rtothresh */
@@ -870,7 +958,7 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                            second_best->mpts_connid, secondtp->t_rxtcur),
                            MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
 
-                       return (mptcp_return_subflow(second_best));
+                       return mptcp_return_subflow(second_best);
                }
 
                /*
@@ -878,8 +966,8 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                 * were true. So, let's schedule on the best one, if he still
                 * has some space in the congestion-window.
                 */
-               return (mptcp_return_subflow(best));
-       } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_AGGREGATE) {
+               return mptcp_return_subflow(best);
+       } else if (mpte->mpte_svctype >= MPTCP_SVCTYPE_AGGREGATE) {
                struct mptsub *tmp;
 
                /*
@@ -897,15 +985,16 @@ mptcp_get_subflow(struct mptses *mpte, struct mptsub *ignore, struct mptsub **pr
                }
 
                /* Is there still space in the congestion window? */
-               if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0)
-                       return (mptcp_return_subflow(second_best));
+               if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) {
+                       return mptcp_return_subflow(second_best);
+               }
 
-               return (mptcp_return_subflow(best));
+               return mptcp_return_subflow(best);
        } else {
                panic("Unknown service-type configured for MPTCP");
        }
 
-       return (NULL);
+       return NULL;
 }
 
 static const char *
@@ -923,13 +1012,13 @@ mptcp_event_to_str(uint32_t event)
                c = "MPCE_RECV_DATA_FIN";
                break;
        }
-       return (c);
+       return c;
 }
 
 static const char *
 mptcp_state_to_str(mptcp_state_t state)
 {
-        const char *c = "UNDEFINED";
+       const char *c = "UNDEFINED";
        switch (state) {
        case MPTCPS_CLOSED:
                c = "MPTCPS_CLOSED";
@@ -962,13 +1051,16 @@ mptcp_state_to_str(mptcp_state_t state)
                c = "MPTCPS_TERMINATE";
                break;
        }
-       return (c);
+       return c;
 }
 
 void
 mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
 {
-       mpte_lock_assert_held(mp_tp->mpt_mpte);
+       struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
+
+       socket_lock_assert_owned(mp_so);
+
        mptcp_state_t old_state = mp_tp->mpt_state;
 
        DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
@@ -977,7 +1069,7 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
        switch (mp_tp->mpt_state) {
        case MPTCPS_CLOSED:
        case MPTCPS_LISTEN:
-               mp_tp->mpt_state = MPTCPS_CLOSED;
+               mp_tp->mpt_state = MPTCPS_TERMINATE;
                break;
 
        case MPTCPS_ESTABLISHED:
@@ -1007,13 +1099,15 @@ mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
                break;
 
        case MPTCPS_CLOSING:
-               if (event == MPCE_RECV_DATA_ACK)
+               if (event == MPCE_RECV_DATA_ACK) {
                        mp_tp->mpt_state = MPTCPS_TIME_WAIT;
+               }
                break;
 
        case MPTCPS_LAST_ACK:
-               if (event == MPCE_RECV_DATA_ACK)
+               if (event == MPCE_RECV_DATA_ACK) {
                        mptcp_close(mp_tp->mpt_mpte, mp_tp);
+               }
                break;
 
        case MPTCPS_FIN_WAIT_2:
@@ -1057,7 +1151,6 @@ mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp,
        mptcp_update_rcv_state_meat(mp_tp, tp,
            full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len,
            csum);
-
 }
 
 void
@@ -1066,29 +1159,19 @@ mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp,
     uint16_t csum)
 {
        if (mdss_data_len == 0) {
-               mptcplog((LOG_INFO, "%s: Infinite Mapping.\n", __func__),
-                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
+               os_log_error(mptcp_log_handle, "%s - %lx: Infinite Mapping.\n",
+                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte));
 
                if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) {
-                       mptcplog((LOG_ERR, "%s: Bad checksum %x \n", __func__,
-                           csum), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_ERR);
+                       os_log_error(mptcp_log_handle, "%s - %lx: Bad checksum %x \n",
+                           __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), csum);
                }
                mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
                return;
        }
-               mptcplog((LOG_DEBUG,
-                   "%s: seqn = %x len = %x full = %llx rcvnxt = %llu \n", __func__,
-                   seqn, mdss_data_len, full_dsn, mp_tp->mpt_rcvnxt),
-                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
-
-       /* Process a Data FIN packet , handled in mptcp_do_fin_opt */
-       if ((seqn == 0) && (mdss_data_len == 1)) {
-               mptcplog((LOG_INFO, "%s: Data FIN in %s state \n", __func__,
-                   mptcp_state_to_str(mp_tp->mpt_state)),
-                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
-               return;
-       }
+
        mptcp_notify_mpready(tp->t_inpcb->inp_socket);
+
        tp->t_rcv_map.mpt_dsn = full_dsn;
        tp->t_rcv_map.mpt_sseq = seqn;
        tp->t_rcv_map.mpt_len = mdss_data_len;
@@ -1103,16 +1186,16 @@ mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m,
 {
        u_int32_t datalen;
 
-       if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP))
+       if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
                return 0;
+       }
 
        datalen = m->m_pkthdr.mp_rlen;
 
        /* unacceptable DSS option, fallback to TCP */
        if (m->m_pkthdr.len > ((int) datalen + hdrlen)) {
-               mptcplog((LOG_ERR, "%s: mbuf len %d, MPTCP expected %d",
-                   __func__, m->m_pkthdr.len, datalen),
-                   MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
+               os_log_error(mptcp_log_handle, "%s - %lx: mbuf len %d, MPTCP expected %d",
+                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte), m->m_pkthdr.len, datalen);
        } else {
                return 0;
        }
@@ -1123,83 +1206,93 @@ mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m,
 }
 
 int
-mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, int drop_hdrlen)
+mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
+    int drop_hdrlen)
 {
-       mptcp_insert_rmap(tp, m);
+       mptcp_insert_rmap(tp, m, th);
        if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m,
-           drop_hdrlen) != 0)
+           drop_hdrlen) != 0) {
                return -1;
-       return 0;
-}
-
-/*
- * MPTCP Checksum support
- * The checksum is calculated whenever the MPTCP DSS option is included
- * in the TCP packet. The checksum includes the sum of the MPTCP psuedo
- * header and the actual data indicated by the length specified in the
- * DSS option.
- */
-
-int
-mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
-                   uint32_t sseq, uint16_t dlen, uint16_t csum)
-{
-       uint16_t mptcp_csum;
-
-       mptcp_csum = mptcp_input_csum(tp, m, dsn, sseq, dlen, csum);
-       if (mptcp_csum) {
-               tp->t_mpflags |= TMPF_SND_MPFAIL;
-               mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
-               m_freem(m);
-               tcpstat.tcps_mp_badcsum++;
-               return (-1);
        }
-       return (0);
+       return 0;
 }
 
 static uint16_t
 mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq,
-                uint16_t dlen, uint16_t csum)
+    uint16_t dlen, uint16_t csum, int dfin)
 {
        struct mptcb *mp_tp = tptomptp(tp);
+       int real_len = dlen - dfin;
        uint32_t sum = 0;
 
-       if (mp_tp == NULL)
-               return (0);
+       VERIFY(real_len >= 0);
 
-       if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM))
-               return (0);
+       if (mp_tp == NULL) {
+               return 0;
+       }
 
-       if (tp->t_mpflags & TMPF_TCP_FALLBACK)
-               return (0);
+       if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) {
+               return 0;
+       }
+
+       if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
+               return 0;
+       }
 
        /*
         * The remote side may send a packet with fewer bytes than the
         * claimed DSS checksum length.
         */
-       if ((int)m_length2(m, NULL) < dlen)
-               return (0xffff);
+       if ((int)m_length2(m, NULL) < real_len) {
+               return 0xffff;
+       }
 
-       if (dlen != 0)
-               sum = m_sum16(m, 0, dlen);
+       if (real_len != 0) {
+               sum = m_sum16(m, 0, real_len);
+       }
 
        sum += in_pseudo64(htonll(dsn), htonl(sseq), htons(dlen) + csum);
        ADDCARRY(sum);
+
        DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m,
            uint32_t, sum);
 
-       mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
-           MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
-       return (~sum & 0xffff);
+       return ~sum & 0xffff;
 }
 
-uint32_t
+/*
+ * MPTCP Checksum support
+ * The checksum is calculated whenever the MPTCP DSS option is included
+ * in the TCP packet. The checksum includes the sum of the MPTCP psuedo
+ * header and the actual data indicated by the length specified in the
+ * DSS option.
+ */
+
+int
+mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
+    uint32_t sseq, uint16_t dlen, uint16_t csum, int dfin)
+{
+       uint16_t mptcp_csum;
+
+       mptcp_csum = mptcp_input_csum(tp, m, dsn, sseq, dlen, csum, dfin);
+       if (mptcp_csum) {
+               tp->t_mpflags |= TMPF_SND_MPFAIL;
+               mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
+               m_freem(m);
+               tcpstat.tcps_mp_badcsum++;
+               return -1;
+       }
+       return 0;
+}
+
+uint16_t
 mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen)
 {
-       u_int32_t sum = 0;
+       uint32_t sum = 0;
 
-       if (dlen)
+       if (dlen) {
                sum = m_sum16(m, 0, dlen);
+       }
 
        dss_val = mptcp_hton64(dss_val);
        sseq = htonl(sseq);
@@ -1210,9 +1303,9 @@ mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen
        sum = ~sum & 0xffff;
        DTRACE_MPTCP2(checksum__result, struct mbuf *, m, uint32_t, sum);
        mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
-                 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
+           MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
 
-       return sum;
+       return (uint16_t)sum;
 }
 
 /*
@@ -1233,14 +1326,13 @@ mptcp_no_rto_spike(struct socket *so)
                    __func__, spike,
                    tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT,
                    tp->t_rttcur),
-                   (MPTCP_SOCKET_DBG|MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
-
+                   (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
        }
 
-       if (spike > 0 ) {
-               return (FALSE);
+       if (spike > 0) {
+               return FALSE;
        } else {
-               return (TRUE);
+               return TRUE;
        }
 }
 
@@ -1250,8 +1342,9 @@ mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag)
        VERIFY(mpp->mpp_flags & flag);
        mpp->mpp_flags &= ~flag;
 
-       if (mptcp_should_defer_upcall(mpp))
+       if (mptcp_should_defer_upcall(mpp)) {
                return;
+       }
 
        if (mpp->mpp_flags & MPP_SHOULD_WORKLOOP) {
                mpp->mpp_flags &= ~MPP_SHOULD_WORKLOOP;
@@ -1270,171 +1363,199 @@ mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag)
 
                sowwakeup(mpp->mpp_socket);
        }
-
-       if (mpp->mpp_flags & MPP_SET_CELLICON) {
-               mpp->mpp_flags &= ~MPP_SET_CELLICON;
-
-               mptcp_set_cellicon(mpp->mpp_pcbe);
-       }
-
-       if (mpp->mpp_flags & MPP_UNSET_CELLICON) {
-               mpp->mpp_flags &= ~MPP_UNSET_CELLICON;
-
-               mptcp_unset_cellicon();
-       }
-}
-
-static void
-mptcp_ask_for_nat64(struct ifnet *ifp)
-{
-       in6_post_msg(ifp, KEV_INET6_REQUEST_NAT64_PREFIX, NULL, NULL);
-
-       mptcplog((LOG_DEBUG, "%s: asked for NAT64-prefix on %s\n",
-                __func__, ifp->if_name), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
 }
 
 static void
 mptcp_reset_itfinfo(struct mpt_itf_info *info)
 {
-       info->ifindex = 0;
-       info->has_v4_conn = 0;
-       info->has_v6_conn = 0;
+       memset(info, 0, sizeof(*info));
 }
 
 void
-mptcp_session_necp_cb(void *handle, int action, struct necp_client_flow *flow)
+mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index,
+    uint32_t necp_flags, __unused bool *viable)
 {
+       boolean_t has_v4 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4);
+       boolean_t has_v6 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6);
+       boolean_t has_nat64 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_NAT64);
+       boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER);
        struct mppcb *mp = (struct mppcb *)handle;
        struct mptses *mpte = mptompte(mp);
        struct socket *mp_so;
        struct mptcb *mp_tp;
-       int locked = 0;
        uint32_t i, ifindex;
+       struct ifnet *ifp;
+       int locked = 0;
 
-       ifindex = flow->interface_index;
+       ifindex = interface_index;
        VERIFY(ifindex != IFSCOPE_NONE);
 
-       /* ToDo - remove after rdar://problem/32007628 */
-       if (!IF_INDEX_IN_RANGE(ifindex))
-               printf("%s 1 ifindex %u not in range of flow %p action %d\n",
-                      __func__, ifindex, flow, action);
-
        /* About to be garbage-collected (see note about MPTCP/NECP interactions) */
-       if (mp->mpp_socket->so_usecount == 0)
+       if (mp->mpp_socket->so_usecount == 0) {
                return;
+       }
+
+       mp_so = mptetoso(mpte);
 
        if (action != NECP_CLIENT_CBACTION_INITIAL) {
-               mpte_lock(mpte);
+               socket_lock(mp_so, 1);
                locked = 1;
 
                /* Check again, because it might have changed while waiting */
-               if (mp->mpp_socket->so_usecount == 0)
+               if (mp->mpp_socket->so_usecount == 0) {
                        goto out;
+               }
        }
 
+       socket_lock_assert_owned(mp_so);
+
        mp_tp = mpte->mpte_mptcb;
-       mp_so = mptetoso(mpte);
 
-       mptcplog((LOG_DEBUG, "%s, action: %u ifindex %u usecount %u mpt_flags %#x state %u\n",
-                __func__, action, ifindex, mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state),
-                MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
+       ifnet_head_lock_shared();
+       ifp = ifindex2ifnet[ifindex];
+       ifnet_head_done();
+
+       os_log(mptcp_log_handle, "%s - %lx: action: %u ifindex %u delegated to %u usecount %u mpt_flags %#x state %u v4 %u v6 %u nat64 %u power %u\n",
+           __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), action, ifindex,
+           ifp && ifp->if_delegated.ifp ? ifp->if_delegated.ifp->if_index : IFSCOPE_NONE,
+           mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state,
+           has_v4, has_v6, has_nat64, low_power);
 
        /* No need on fallen back sockets */
-       if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)
+       if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
                goto out;
+       }
+
+       /*
+        * When the interface goes in low-power mode we don't want to establish
+        * new subflows on it. Thus, mark it internally as non-viable.
+        */
+       if (low_power) {
+               action = NECP_CLIENT_CBACTION_NONVIABLE;
+       }
 
        if (action == NECP_CLIENT_CBACTION_NONVIABLE) {
                for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
-                       if (mpte->mpte_itfinfo[i].ifindex == ifindex)
+                       if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) {
+                               continue;
+                       }
+
+                       if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
                                mptcp_reset_itfinfo(&mpte->mpte_itfinfo[i]);
+                       }
                }
 
                mptcp_sched_create_subflows(mpte);
        } else if (action == NECP_CLIENT_CBACTION_VIABLE ||
-                  action == NECP_CLIENT_CBACTION_INITIAL) {
-               int found_empty = 0, empty_index = -1;
-               struct ifnet *ifp;
+           action == NECP_CLIENT_CBACTION_INITIAL) {
+               int found_slot = 0, slot_index = -1;
+               struct sockaddr *dst;
 
-               /* ToDo - remove after rdar://problem/32007628 */
-               if (!IF_INDEX_IN_RANGE(ifindex))
-                       printf("%s 2 ifindex %u not in range of flow %p action %d\n",
-                              __func__, ifindex, flow, action);
-
-               ifnet_head_lock_shared();
-               ifp = ifindex2ifnet[ifindex];
-               ifnet_head_done();
-
-               /* ToDo - remove after rdar://problem/32007628 */
-               if (!IF_INDEX_IN_RANGE(ifindex))
-                       printf("%s 3 ifindex %u not in range of flow %p action %d\n",
-                              __func__, ifindex, flow, action);
+               if (ifp == NULL) {
+                       goto out;
+               }
 
-               if (ifp == NULL)
+               if (IFNET_IS_COMPANION_LINK(ifp)) {
                        goto out;
+               }
 
                if (IFNET_IS_EXPENSIVE(ifp) &&
-                   (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE))
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
                        goto out;
+               }
+
+               if (IFNET_IS_CONSTRAINED(ifp) &&
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
+                       goto out;
+               }
 
                if (IFNET_IS_CELLULAR(ifp) &&
-                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR))
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
                        goto out;
+               }
 
+               if (IS_INTF_CLAT46(ifp)) {
+                       has_v4 = FALSE;
+               }
+
+               /* Look for the slot on where to store/update the interface-info. */
                for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
+                       /* Found a potential empty slot where we can put it */
                        if (mpte->mpte_itfinfo[i].ifindex == 0) {
-                               found_empty = 1;
-                               empty_index = i;
+                               found_slot = 1;
+                               slot_index = i;
+                       }
+
+                       /*
+                        * The interface is already in our array. Check if we
+                        * need to update it.
+                        */
+                       if (mpte->mpte_itfinfo[i].ifindex == ifindex &&
+                           (mpte->mpte_itfinfo[i].has_v4_conn != has_v4 ||
+                           mpte->mpte_itfinfo[i].has_v6_conn != has_v6 ||
+                           mpte->mpte_itfinfo[i].has_nat64_conn != has_nat64)) {
+                               found_slot = 1;
+                               slot_index = i;
+                               break;
                        }
 
                        if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
-                               /* Ok, it's already there */
+                               /*
+                                * Ok, it's already there and we don't need
+                                * to update it
+                                */
                                goto out;
                        }
                }
 
-               if ((mpte->mpte_dst.sa_family == AF_INET || mpte->mpte_dst.sa_family == 0) &&
-                   !(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4) &&
-                   ifnet_get_nat64prefix(ifp, NULL) == ENOENT) {
-                       mptcp_ask_for_nat64(ifp);
+               dst = mptcp_get_session_dst(mpte, has_v6, has_v4);
+               if (dst && dst->sa_family == AF_INET &&
+                   has_v6 && !has_nat64 && !has_v4) {
+                       if (found_slot) {
+                               mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
+                               mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
+                               mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
+                               mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
+                       }
                        goto out;
                }
 
-               if (found_empty == 0) {
+               if (found_slot == 0) {
                        int new_size = mpte->mpte_itfinfo_size * 2;
                        struct mpt_itf_info *info = _MALLOC(sizeof(*info) * new_size, M_TEMP, M_ZERO);
 
                        if (info == NULL) {
-                               mptcplog((LOG_ERR, "%s malloc failed for %u\n", __func__, new_size),
-                                        MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
+                               os_log_error(mptcp_log_handle, "%s - %lx: malloc failed for %u\n",
+                                   __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), new_size);
                                goto out;
                        }
 
                        memcpy(info, mpte->mpte_itfinfo, mpte->mpte_itfinfo_size * sizeof(*info));
 
-                       if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE)
+                       if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) {
                                _FREE(mpte->mpte_itfinfo, M_TEMP);
+                       }
 
                        /* We allocated a new one, thus the first must be empty */
-                       empty_index = mpte->mpte_itfinfo_size;
+                       slot_index = mpte->mpte_itfinfo_size;
 
                        mpte->mpte_itfinfo = info;
                        mpte->mpte_itfinfo_size = new_size;
-
-                       mptcplog((LOG_DEBUG, "%s Needed to realloc to %u\n", __func__, new_size),
-                           MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
                }
 
-               VERIFY(empty_index >= 0 && empty_index < (int)mpte->mpte_itfinfo_size);
-               mpte->mpte_itfinfo[empty_index].ifindex = ifindex;
-               mpte->mpte_itfinfo[empty_index].has_v4_conn = !!(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4);
-               mpte->mpte_itfinfo[empty_index].has_v6_conn = !!(flow->necp_flow_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6);
+               VERIFY(slot_index >= 0 && slot_index < (int)mpte->mpte_itfinfo_size);
+               mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
+               mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
+               mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
+               mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
 
                mptcp_sched_create_subflows(mpte);
        }
 
 out:
-       if (locked)
-               mpte_unlock(mpte);
+       if (locked) {
+               socket_unlock(mp_so, 1);
+       }
 }
 
 void
@@ -1443,7 +1564,7 @@ mptcp_set_restrictions(struct socket *mp_so)
        struct mptses *mpte = mpsotompte(mp_so);
        uint32_t i;
 
-       mpte_lock_assert_held(mpte);
+       socket_lock_assert_owned(mp_so);
 
        ifnet_head_lock_shared();
 
@@ -1452,20 +1573,30 @@ mptcp_set_restrictions(struct socket *mp_so)
                uint32_t ifindex = info->ifindex;
                struct ifnet *ifp;
 
-               if (ifindex == IFSCOPE_NONE)
+               if (ifindex == IFSCOPE_NONE) {
                        continue;
+               }
 
                ifp = ifindex2ifnet[ifindex];
+               if (ifp == NULL) {
+                       continue;
+               }
 
                if (IFNET_IS_EXPENSIVE(ifp) &&
-                   (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE))
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
                        info->ifindex = IFSCOPE_NONE;
+               }
+
+               if (IFNET_IS_CONSTRAINED(ifp) &&
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
+                       info->ifindex = IFSCOPE_NONE;
+               }
 
                if (IFNET_IS_CELLULAR(ifp) &&
-                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR))
+                   (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
                        info->ifindex = IFSCOPE_NONE;
+               }
        }
 
        ifnet_head_done();
 }
-