X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/39236c6e673c41db228275375ab7fdb0f837b292..7e41aa883dd258f888d0470250eead40a53ef1f5:/bsd/netinet/mptcp_usrreq.c diff --git a/bsd/netinet/mptcp_usrreq.c b/bsd/netinet/mptcp_usrreq.c index d4ea19cd1..e0b8fbcbc 100644 --- a/bsd/netinet/mptcp_usrreq.c +++ b/bsd/netinet/mptcp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2013 Apple Inc. All rights reserved. + * Copyright (c) 2012-2015 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -56,24 +56,25 @@ static int mptcp_usr_detach(struct socket *); static int mptcp_attach(struct socket *, struct proc *); static int mptcp_detach(struct socket *, struct mppcb *); static int mptcp_connectx(struct mptses *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, - uint32_t, void *, uint32_t); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, uint32_t); static int mptcp_usr_connectx(struct socket *, struct sockaddr_list **, - struct sockaddr_list **, struct proc *, uint32_t, associd_t, connid_t *, - uint32_t, void *, uint32_t); + struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t, + sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *); static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t); -static int mptcp_getconnids(struct mptses *, associd_t, uint32_t *, +static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *, user_addr_t); -static int mptcp_getconninfo(struct mptses *, connid_t *, uint32_t *, +static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *, uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *, uint32_t *, user_addr_t, uint32_t *); static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *, struct proc *); -static int mptcp_disconnectx(struct mptses *, associd_t, connid_t); -static int mptcp_usr_disconnectx(struct socket *, associd_t, connid_t); +static int mptcp_disconnectx(struct mptses *, sae_associd_t, sae_connid_t); +static int mptcp_usr_disconnect(struct socket *); +static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t); static struct mptses *mptcp_usrclosed(struct mptses *); -static int mptcp_usr_peeloff(struct socket *, associd_t, struct socket **); -static int mptcp_peeloff(struct mptses *, associd_t, struct socket **); +static int mptcp_usr_peeloff(struct socket *, sae_associd_t, struct socket **); +static int mptcp_peeloff(struct mptses *, sae_associd_t, struct socket **); static int mptcp_usr_rcvd(struct socket *, int); static int mptcp_usr_send(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); @@ -87,12 +88,14 @@ static int mptcp_setopt(struct mptses *, struct sockopt *); static int mptcp_getopt(struct mptses *, struct sockopt *); static int mptcp_default_tcp_optval(struct mptses *, struct sockopt *, int *); static void mptcp_connorder_helper(struct mptsub *mpts); +static int mptcp_usr_preconnect(struct socket *so); struct pr_usrreqs mptcp_usrreqs = { .pru_attach = mptcp_usr_attach, .pru_connectx = mptcp_usr_connectx, .pru_control = mptcp_usr_control, .pru_detach = mptcp_usr_detach, + .pru_disconnect = mptcp_usr_disconnect, .pru_disconnectx = mptcp_usr_disconnectx, .pru_peeloff = mptcp_usr_peeloff, .pru_rcvd = mptcp_usr_rcvd, @@ -101,8 +104,21 @@ struct pr_usrreqs mptcp_usrreqs = { .pru_sosend = mptcp_usr_sosend, .pru_soreceive = soreceive, .pru_socheckopt = mptcp_usr_socheckopt, + .pru_preconnect = mptcp_usr_preconnect, }; +/* + * Sysctl for testing and tuning mptcp connectx with data api. + * Mirrors tcp_preconnect_sbspace for now. + */ +#define MPTCP_PRECONNECT_SBSZ_MAX 1460 +#define MPTCP_PRECONNECT_SBSZ_MIN (TCP_MSS) +#define MPTCP_PRECONNECT_SBSZ_DEF (TCP6_MSS) +static int mptcp_preconnect_sbspace = MPTCP_PRECONNECT_SBSZ_DEF; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mp_preconn_sbsz, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_preconnect_sbspace, 0, "Maximum preconnect space"); + + /* * Attaches an MPTCP control block to a socket. */ @@ -152,9 +168,9 @@ static int mptcp_attach(struct socket *mp_so, struct proc *p) { #pragma unused(p) - struct mptses *mpte; - struct mptcb *mp_tp; - struct mppcb *mpp; + struct mptses *mpte = NULL; + struct mptcb *mp_tp = NULL; + struct mppcb *mpp = NULL; int error = 0; if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) { @@ -163,6 +179,11 @@ mptcp_attach(struct socket *mp_so, struct proc *p) goto out; } + if (mp_so->so_snd.sb_preconn_hiwat == 0) { + soreserve_preconnect(mp_so, imin(MPTCP_PRECONNECT_SBSZ_MAX, + imax(mptcp_preconnect_sbspace, MPTCP_PRECONNECT_SBSZ_MIN))); + } + /* * MPTCP socket buffers cannot be compressed, due to the * fact that each mbuf chained via m_next is a M_PKTHDR @@ -175,25 +196,16 @@ mptcp_attach(struct socket *mp_so, struct proc *p) mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE; mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE; - if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) + if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) { goto out; + } mpp = sotomppcb(mp_so); VERIFY(mpp != NULL); - - mpte = mptcp_sescreate(mp_so, mpp); - if (mpte == NULL) { - mp_pcbdetach(mpp); - error = ENOBUFS; - goto out; - } + mpte = (struct mptses *)mpp->mpp_pcbe; + VERIFY(mpte != NULL); mp_tp = mpte->mpte_mptcb; VERIFY(mp_tp != NULL); - - MPT_LOCK(mp_tp); - mp_tp->mpt_state = MPTCPS_CLOSED; - MPT_UNLOCK(mp_tp); - out: return (error); } @@ -215,7 +227,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp) mppi = mpp->mpp_pcbinfo; VERIFY(mppi != NULL); - mpte = &((struct mpp_mtp *)mpp)->mpp_ses; + __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses); VERIFY(mpte->mpte_mppcb == mpp); MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ @@ -228,7 +240,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp) */ mp_pcbdetach(mpp); - (void) mptcp_disconnectx(mpte, ASSOCID_ALL, CONNID_ALL); + (void) mptcp_disconnectx(mpte, SAE_ASSOCID_ALL, SAE_CONNID_ALL); /* * XXX: adi@apple.com @@ -248,7 +260,7 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp) static int mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, uint32_t arglen) { #pragma unused(p, aid, flags, arg, arglen) @@ -262,9 +274,12 @@ mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl, VERIFY(dst_sl != NULL && *dst_sl != NULL); VERIFY(pcid != NULL); - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); - DTRACE_MPTCP3(connectx, struct mptses *, mpte, associd_t, aid, + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); + + DTRACE_MPTCP3(connectx, struct mptses *, mpte, sae_associd_t, aid, struct socket *, mp_so); mpts = mptcp_subflow_alloc(M_WAITOK); @@ -309,12 +324,14 @@ out: static int mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl, struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope, - associd_t aid, connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, + uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written) { -#pragma unused(arg, arglen) struct mppcb *mpp = sotomppcb(mp_so); - struct mptses *mpte; + struct mptses *mpte = NULL; + struct mptcb *mp_tp = NULL; + user_ssize_t datalen; + int error = 0; if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { @@ -324,8 +341,43 @@ mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl, mpte = mptompte(mpp); VERIFY(mpte != NULL); + mp_tp = mpte->mpte_mptcb; + VERIFY(mp_tp != NULL); + + if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { + error = EINVAL; + goto out; + } + error = mptcp_connectx(mpte, src_sl, dst_sl, p, ifscope, aid, pcid, flags, arg, arglen); + + /* If there is data, copy it */ + if (auio != NULL) { + datalen = uio_resid(auio); + socket_unlock(mp_so, 0); + error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL, + (uio_t) auio, NULL, NULL, 0); + /* check if this can be supported with fast Join also. XXX */ + if (error == 0 || error == EWOULDBLOCK) + *bytes_written = datalen - uio_resid(auio); + + if (error == EWOULDBLOCK) + error = EINPROGRESS; + + socket_lock(mp_so, 0); + MPT_LOCK(mp_tp); + if (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) { + *bytes_written = datalen - uio_resid(auio); + /* + * Override errors like EPIPE that occur as + * a result of doing TFO during TCP fallback. + */ + error = EPROTO; + } + MPT_UNLOCK(mp_tp); + } + out: return (error); } @@ -339,7 +391,7 @@ mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp) MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ /* MPTCP has at most 1 association */ - *cnt = (mpte->mpte_associd != ASSOCID_ANY) ? 1 : 0; + *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0; /* just asking how many there are? */ if (aidp == USER_ADDR_NULL) @@ -353,7 +405,7 @@ mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp) * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain. */ static int -mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt, +mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt, user_addr_t cidp) { struct mptsub *mpts; @@ -361,7 +413,7 @@ mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt, MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL && + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL && aid != mpte->mpte_associd) return (EINVAL); @@ -386,7 +438,7 @@ mptcp_getconnids(struct mptses *mpte, associd_t aid, uint32_t *cnt, * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain. */ static int -mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, +mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags, uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len, user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, user_addr_t aux_data, uint32_t *aux_len) @@ -399,15 +451,15 @@ mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - if (*cid == CONNID_ALL) + if (*cid == SAE_CONNID_ALL) return (EINVAL); TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - if (mpts->mpts_connid == *cid || *cid == CONNID_ANY) + if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY) break; } if (mpts == NULL) - return ((*cid == CONNID_ANY) ? ENXIO : EINVAL); + return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL); MPTS_LOCK(mpts); ifp = mpts->mpts_outif; @@ -481,6 +533,11 @@ mptcp_getconninfo(struct mptses *mpte, connid_t *cid, uint32_t *flags, goto out; } } + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: cid %d flags %x \n", + __func__, mpts->mpts_connid, mpts->mpts_flags), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + out: MPTS_UNLOCK(mpts); return (error); @@ -490,15 +547,17 @@ out: * Handle SIOCSCONNORDER */ int -mptcp_setconnorder(struct mptses *mpte, connid_t cid, uint32_t rank) +mptcp_setconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t rank) { struct mptsub *mpts, *mpts1; int error = 0; MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - mptcplog((LOG_DEBUG, "%s: cid %d rank %d \n", __func__, cid, rank)); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: cid %d rank %d \n", __func__, cid, rank), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); - if (cid == CONNID_ANY || cid == CONNID_ALL) { + if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { error = EINVAL; goto out; } @@ -551,7 +610,7 @@ mptcp_setconnorder(struct mptses *mpte, connid_t cid, uint32_t rank) if (mpts1 != mpts && (mpts1->mpts_flags & MPTSF_PREFERRED)) { mpts1->mpts_flags &= ~MPTSF_PREFERRED; - if (mpte->mpte_nummpcapflows > 1) + if (mpte->mpte_nummpcapflows > 1) mptcp_connorder_helper(mpts1); } else if (mpts1 == mpts) { mpts1->mpts_rank = 1; @@ -583,8 +642,7 @@ mptcp_connorder_helper(struct mptsub *mpts) tp->t_mpflags &= ~TMPF_BACKUP_PATH; else tp->t_mpflags |= TMPF_BACKUP_PATH; - mptcplog((LOG_DEBUG, "%s cid %d flags %x", __func__, - mpts->mpts_connid, mpts->mpts_flags)); + socket_unlock(so, 0); } @@ -593,7 +651,7 @@ mptcp_connorder_helper(struct mptsub *mpts) * Handle SIOCSGONNORDER */ int -mptcp_getconnorder(struct mptses *mpte, connid_t cid, uint32_t *rank) +mptcp_getconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t *rank) { struct mptsub *mpts; int error = 0; @@ -602,7 +660,7 @@ mptcp_getconnorder(struct mptses *mpte, connid_t cid, uint32_t *rank) VERIFY(rank != NULL); *rank = 0; - if (cid == CONNID_ANY || cid == CONNID_ALL) { + if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { error = EINVAL; goto out; } @@ -743,7 +801,7 @@ out: * connection while keeping the MPTCP-level connection (association). */ static int -mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) +mptcp_disconnectx(struct mptses *mpte, sae_associd_t aid, sae_connid_t cid) { struct mptsub *mpts; struct socket *mp_so; @@ -755,16 +813,19 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) mp_so = mpte->mpte_mppcb->mpp_socket; mp_tp = mpte->mpte_mptcb; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx aid %d cid %d\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid)); - DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, associd_t, aid, - connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); - VERIFY(aid == ASSOCID_ANY || aid == ASSOCID_ALL || + DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, sae_associd_t, aid, + sae_connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp); + + VERIFY(aid == SAE_ASSOCID_ANY || aid == SAE_ASSOCID_ALL || aid == mpte->mpte_associd); /* terminate the association? */ - if (cid == CONNID_ANY || cid == CONNID_ALL) { + if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { /* if we're not detached, go thru socket state checks */ if (!(mp_so->so_flags & SOF_PCBCLEARING)) { if (!(mp_so->so_state & (SS_ISCONNECTED| @@ -794,10 +855,27 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) (void) mptcp_output(mpte); } } else { + bool disconnect_embryonic_subflows = false; + struct socket *so = NULL; + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { if (mpts->mpts_connid != cid) continue; + MPTS_LOCK(mpts); + /* + * Check if disconnected subflow is the one used + * to initiate MPTCP connection. + * If it is and the connection is not yet join ready + * disconnect all other subflows. + */ + so = mpts->mpts_socket; + if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) && + so && !(so->so_flags & SOF_MP_SEC_SUBFLOW)) { + disconnect_embryonic_subflows = true; + } + + mpts->mpts_flags |= MPTSF_USER_DISCONNECT; mptcp_subflow_disconnect(mpte, mpts, FALSE); MPTS_UNLOCK(mpts); break; @@ -807,6 +885,16 @@ mptcp_disconnectx(struct mptses *mpte, associd_t aid, connid_t cid) error = EINVAL; goto out; } + + if (disconnect_embryonic_subflows) { + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if (mpts->mpts_connid == cid) + continue; + MPTS_LOCK(mpts); + mptcp_subflow_disconnect(mpte, mpts, TRUE); + MPTS_UNLOCK(mpts); + } + } } if (error == 0) @@ -822,11 +910,23 @@ out: return (error); } +/* + * Wrapper function to support disconnect on socket + */ +static int +mptcp_usr_disconnect(struct socket *mp_so) +{ + int error = 0; + + error = mptcp_usr_disconnectx(mp_so, SAE_ASSOCID_ALL, SAE_CONNID_ALL); + return (error); +} + /* * User-protocol pru_disconnectx callback. */ static int -mptcp_usr_disconnectx(struct socket *mp_so, associd_t aid, connid_t cid) +mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid) { struct mppcb *mpp = sotomppcb(mp_so); struct mptses *mpte; @@ -840,7 +940,7 @@ mptcp_usr_disconnectx(struct socket *mp_so, associd_t aid, connid_t cid) VERIFY(mpte != NULL); MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL && + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL && aid != mpte->mpte_associd) { error = EINVAL; goto out; @@ -868,31 +968,27 @@ mptcp_usrclosed(struct mptses *mpte) MPT_LOCK(mp_tp); mptcp_close_fsm(mp_tp, MPCE_CLOSE); - if (mp_tp->mpt_state == TCPS_CLOSED) { + if (mp_tp->mpt_state == MPTCPS_CLOSED) { mpte = mptcp_close(mpte, mp_tp); MPT_UNLOCK(mp_tp); } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) { MPT_UNLOCK(mp_tp); soisdisconnected(mp_so); + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_USER_DISCONNECT; + MPTS_UNLOCK(mpts); + } } else { - mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */ MPT_UNLOCK(mp_tp); TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { MPTS_LOCK(mpts); + mpts->mpts_flags |= MPTSF_USER_DISCONNECT; mptcp_subflow_disconnect(mpte, mpts, FALSE); MPTS_UNLOCK(mpts); } } - /* - * XXX: adi@apple.com - * - * Do we need to handle time wait specially here? We need to handle - * the case where MPTCP has been established, but we have not usable - * subflow to use. Do we want to wait a while before forcibly - * tearing this MPTCP down, in case we have one or more subflows - * that are flow controlled? - */ return (mpte); } @@ -901,7 +997,7 @@ mptcp_usrclosed(struct mptses *mpte) * User-protocol pru_peeloff callback. */ static int -mptcp_usr_peeloff(struct socket *mp_so, associd_t aid, struct socket **psop) +mptcp_usr_peeloff(struct socket *mp_so, sae_associd_t aid, struct socket **psop) { struct mppcb *mpp = sotomppcb(mp_so); struct mptses *mpte; @@ -928,7 +1024,7 @@ out: * yet associated (MPTCP-level connection has not been established.) */ static int -mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop) +mptcp_peeloff(struct mptses *mpte, sae_associd_t aid, struct socket **psop) { struct socket *so = NULL, *mp_so; struct mptsub *mpts; @@ -940,16 +1036,16 @@ mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop) VERIFY(psop != NULL); *psop = NULL; - DTRACE_MPTCP3(peeloff, struct mptses *, mpte, associd_t, aid, + DTRACE_MPTCP3(peeloff, struct mptses *, mpte, sae_associd_t, aid, struct socket *, mp_so); /* peeloff cannot happen after an association is established */ - if (mpte->mpte_associd != ASSOCID_ANY) { + if (mpte->mpte_associd != SAE_ASSOCID_ANY) { error = EINVAL; goto out; } - if (aid != ASSOCID_ANY && aid != ASSOCID_ALL) { + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) { error = EINVAL; goto out; } @@ -992,8 +1088,11 @@ mptcp_peeloff(struct mptses *mpte, associd_t aid, struct socket **psop) } *psop = so; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so))); + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); + out: return (error); } @@ -1055,7 +1154,8 @@ mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m, mpte = mptompte(mpp); VERIFY(mpte != NULL); - if (!(mp_so->so_state & SS_ISCONNECTED)) { + if (!(mp_so->so_state & SS_ISCONNECTED) && + (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA))) { error = ENOTCONN; goto out; } @@ -1065,15 +1165,22 @@ mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m, (void) sbappendstream(&mp_so->so_snd, m); m = NULL; - if (mpte != NULL) { - /* - * XXX: adi@apple.com - * - * PRUS_MORETOCOME could be set, but we don't check it now. - */ - error = mptcp_output(mpte); + /* + * XXX: adi@apple.com + * + * PRUS_MORETOCOME could be set, but we don't check it now. + */ + error = mptcp_output(mpte); + if (error != 0) + goto out; + + if (mp_so->so_state & SS_ISCONNECTING) { + if (mp_so->so_state & SS_NBIO) + error = EWOULDBLOCK; + else + error = sbwait(&mp_so->so_snd); } - + out: if (error) { if (m != NULL) @@ -1324,6 +1431,10 @@ out: if (control != NULL) m_freem(control); + /* clear SOF1_PRECONNECT_DATA after one write */ + if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA) + mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA; + return (error); } @@ -1398,6 +1509,8 @@ mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt) case SO_RECV_ANYIF: /* MP + subflow */ case SO_RESTRICTIONS: /* MP + subflow */ case SO_FLUSH: /* MP + subflow */ + case SO_MPTCP_FASTJOIN: /* MP + subflow */ + case SO_NOWAKEFROMSLEEP: /* * Tell the caller that these options are to be processed; * these will also be recorded later by mptcp_setopt(). @@ -1574,6 +1687,8 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) case SO_PRIVILEGED_TRAFFIC_CLASS: case SO_RECV_ANYIF: case SO_RESTRICTIONS: + case SO_NOWAKEFROMSLEEP: + case SO_MPTCP_FASTJOIN: /* record it */ break; case SO_FLUSH: @@ -1596,6 +1711,26 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) case PERSIST_TIMEOUT: /* eligible; record it */ break; + case TCP_NOTSENT_LOWAT: + /* record at MPTCP level */ + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + goto out; + if (optval < 0) { + error = EINVAL; + goto out; + } else { + if (optval == 0) { + mp_so->so_flags &= ~SOF_NOTSENT_LOWAT; + error = mptcp_set_notsent_lowat(mpte,0); + } else { + mp_so->so_flags |= SOF_NOTSENT_LOWAT; + error = mptcp_set_notsent_lowat(mpte, + optval); + } + } + goto out; default: /* not eligible */ error = ENOPROTOOPT; @@ -1615,13 +1750,15 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) if (mpo == NULL) { error = ENOBUFS; } else { - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s " "val %d %s\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level, optname, buf, sizeof (buf)), optval, (mpo->mpo_flags & MPOF_ATTACHED) ? - "updated" : "recorded")); + "updated" : "recorded"), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); /* initialize or update, as needed */ mpo->mpo_intval = optval; @@ -1657,16 +1794,20 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) } out: if (error == 0 && mpo != NULL) { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s val %d set %s\n", + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s val %d set %s\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level, optname, buf, sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ? - "pending" : "successful")); + "pending" : "successful"), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); } else if (error != 0) { - mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s can't be issued " + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s can't be issued " "error %d\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level, - optname, buf, sizeof (buf)), error)); + optname, buf, sizeof (buf)), error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); } return (error); } @@ -1701,6 +1842,7 @@ mptcp_getopt(struct mptses *mpte, struct sockopt *sopt) case TCP_CONNECTIONTIMEOUT: case TCP_RXT_CONNDROPTIME: case PERSIST_TIMEOUT: + case TCP_NOTSENT_LOWAT: /* eligible; get the default value just in case */ error = mptcp_default_tcp_optval(mpte, sopt, &optval); break; @@ -1710,6 +1852,15 @@ mptcp_getopt(struct mptses *mpte, struct sockopt *sopt) break; } + switch (sopt->sopt_name) { + case TCP_NOTSENT_LOWAT: + if (mpte->mpte_mppcb->mpp_socket->so_flags & SOF_NOTSENT_LOWAT) + optval = mptcp_get_notsent_lowat(mpte); + else + optval = 0; + goto out; + } + /* * Search for a previously-issued TCP level socket option and * return the recorded option value. This assumes that the @@ -1752,6 +1903,7 @@ mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval) case TCP_KEEPCNT: case TCP_CONNECTIONTIMEOUT: case TCP_RXT_CONNDROPTIME: + case TCP_NOTSENT_LOWAT: *optval = 0; break; @@ -1792,10 +1944,12 @@ mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt) /* we only handle socket and TCP-level socket options for MPTCP */ if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) { char buf[32]; - mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s level not " + mptcplog((LOG_DEBUG, "MPTCP Socket: " + "%s: mp_so 0x%llx sopt %s level not " "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(sopt->sopt_level, - sopt->sopt_name, buf, sizeof (buf)))); + sopt->sopt_name, buf, sizeof (buf))), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); error = EINVAL; goto out; } @@ -1922,6 +2076,12 @@ mptcp_sopt2str(int level, int optname, char *dst, int size) case SO_RECV_ANYIF: o = "SO_RECV_ANYIF"; break; + case SO_NOWAKEFROMSLEEP: + o = "SO_NOWAKEFROMSLEEP"; + break; + case SO_MPTCP_FASTJOIN: + o = "SO_MPTCP_FASTJOIN"; + break; } break; case IPPROTO_TCP: @@ -1952,3 +2112,37 @@ mptcp_sopt2str(int level, int optname, char *dst, int size) (void) snprintf(dst, size, "<%s,%s>", l, o); return (dst); } + +static int +mptcp_usr_preconnect(struct socket *mp_so) +{ + struct mptsub *mpts = NULL; + struct mppcb *mpp = sotomppcb(mp_so); + struct mptses *mpte; + struct socket *so; + struct tcpcb *tp = NULL; + + mpte = mptompte(mpp); + VERIFY(mpte != NULL); + MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + + mpts = mptcp_get_subflow(mpte, NULL, NULL); + if (mpts == NULL) { + mptcplog((LOG_ERR, "MPTCP Socket: " + "%s: mp_so 0x%llx invalid preconnect ", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + return (EINVAL); + } + MPTS_LOCK(mpts); + mpts->mpts_flags &= ~MPTSF_TFO_REQD; + so = mpts->mpts_socket; + socket_lock(so, 0); + tp = intotcpcb(sotoinpcb(so)); + tp->t_mpflags &= ~TMPF_TFO_REQUEST; + int error = tcp_output(sototcpcb(so)); + socket_unlock(so, 0); + MPTS_UNLOCK(mpts); + mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA; + return (error); +}