X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4d15aeb193b2c68f1d38666c317f8d3734f5f083..5ba3f43ea354af8ad55bea84372a2bc834d8757c:/bsd/netinet/mptcp_usrreq.c diff --git a/bsd/netinet/mptcp_usrreq.c b/bsd/netinet/mptcp_usrreq.c index cc84cb40e..1a5589901 100644 --- a/bsd/netinet/mptcp_usrreq.c +++ b/bsd/netinet/mptcp_usrreq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2015 Apple Inc. All rights reserved. + * Copyright (c) 2012-2017 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include #include @@ -54,10 +56,6 @@ static int mptcp_usr_attach(struct socket *, int, struct proc *); static int mptcp_usr_detach(struct socket *); static int mptcp_attach(struct socket *, struct proc *); -static int mptcp_detach(struct socket *, struct mppcb *); -static int mptcp_connectx(struct mptses *, struct sockaddr *, - struct sockaddr *, struct proc *, uint32_t, sae_associd_t, - sae_connid_t *, uint32_t, void *, uint32_t); static int mptcp_usr_connectx(struct socket *, struct sockaddr *, struct sockaddr *, struct proc *, uint32_t, sae_associd_t, sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *); @@ -69,25 +67,20 @@ static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *, uint32_t *, user_addr_t, uint32_t *); static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *, struct proc *); -static int mptcp_disconnectx(struct mptses *, sae_associd_t, sae_connid_t); +static int mptcp_disconnect(struct mptses *); static int mptcp_usr_disconnect(struct socket *); static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t); static struct mptses *mptcp_usrclosed(struct mptses *); -static int mptcp_usr_peeloff(struct socket *, sae_associd_t, struct socket **); -static int mptcp_peeloff(struct mptses *, sae_associd_t, struct socket **); static int mptcp_usr_rcvd(struct socket *, int); static int mptcp_usr_send(struct socket *, int, struct mbuf *, struct sockaddr *, struct mbuf *, struct proc *); static int mptcp_usr_shutdown(struct socket *); -static int mptcp_uiotombuf(struct uio *, int, int, uint32_t, struct mbuf **); static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *, struct mbuf *, struct mbuf *, int); static int mptcp_usr_socheckopt(struct socket *, struct sockopt *); -static int mptcp_setopt_apply(struct mptses *, struct mptopt *); static int mptcp_setopt(struct mptses *, struct sockopt *); static int mptcp_getopt(struct mptses *, struct sockopt *); static int mptcp_default_tcp_optval(struct mptses *, struct sockopt *, int *); -static void mptcp_connorder_helper(struct mptsub *mpts); static int mptcp_usr_preconnect(struct socket *so); struct pr_usrreqs mptcp_usrreqs = { @@ -97,16 +90,29 @@ struct pr_usrreqs mptcp_usrreqs = { .pru_detach = mptcp_usr_detach, .pru_disconnect = mptcp_usr_disconnect, .pru_disconnectx = mptcp_usr_disconnectx, - .pru_peeloff = mptcp_usr_peeloff, + .pru_peeraddr = mp_getpeeraddr, .pru_rcvd = mptcp_usr_rcvd, .pru_send = mptcp_usr_send, .pru_shutdown = mptcp_usr_shutdown, + .pru_sockaddr = mp_getsockaddr, .pru_sosend = mptcp_usr_sosend, .pru_soreceive = soreceive, .pru_socheckopt = mptcp_usr_socheckopt, .pru_preconnect = mptcp_usr_preconnect, }; + +#if (DEVELOPMENT || DEBUG) +static int mptcp_disable_entitlements = 0; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, disable_entitlements, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_disable_entitlements, 0, "Disable Multipath TCP Entitlement Checking"); +#endif + +int mptcp_developer_mode = 0; +SYSCTL_INT(_net_inet_mptcp, OID_AUTO, allow_aggregate, CTLFLAG_RW | CTLFLAG_LOCKED, + &mptcp_developer_mode, 0, "Allow the Multipath aggregation mode"); + + /* * Attaches an MPTCP control block to a socket. */ @@ -116,7 +122,7 @@ mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p) #pragma unused(proto) int error; - VERIFY(sotomppcb(mp_so) == NULL); + VERIFY(mpsotomppcb(mp_so) == NULL); error = mptcp_attach(mp_so, p); if (error != 0) @@ -138,14 +144,27 @@ out: static int mptcp_usr_detach(struct socket *mp_so) { - struct mppcb *mpp = sotomppcb(mp_so); - int error = 0; + struct mptses *mpte = mpsotompte(mp_so); + struct mppcb *mpp = mpsotomppcb(mp_so); - VERIFY(mpp != NULL); - VERIFY(mpp->mpp_socket != NULL); + if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + mptcplog((LOG_ERR, "%s state: %d\n", __func__, + mpp ? mpp->mpp_state : -1), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + return (EINVAL); + } - error = mptcp_detach(mp_so, mpp); - return (error); + /* + * We are done with this MPTCP socket (it has been closed); + * trigger all subflows to be disconnected, if not already, + * by initiating the PCB detach sequence (SOF_PCBCLEARING + * will be set.) + */ + mp_pcbdetach(mp_so); + + mptcp_disconnect(mpte); + + return (0); } /* @@ -162,7 +181,7 @@ mptcp_attach(struct socket *mp_so, struct proc *p) int error = 0; if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) { - error = soreserve(mp_so, tcp_sendspace, MPTCP_RWIN_MAX); + error = soreserve(mp_so, tcp_sendspace, tcp_recvspace); if (error != 0) goto out; } @@ -171,6 +190,11 @@ mptcp_attach(struct socket *mp_so, struct proc *p) soreserve_preconnect(mp_so, 2048); } + if ((mp_so->so_rcv.sb_flags & SB_USRSIZE) == 0) + mp_so->so_rcv.sb_flags |= SB_AUTOSIZE; + if ((mp_so->so_snd.sb_flags & SB_USRSIZE) == 0) + mp_so->so_snd.sb_flags |= SB_AUTOSIZE; + /* * MPTCP socket buffers cannot be compressed, due to the * fact that each mbuf chained via m_next is a M_PKTHDR @@ -179,15 +203,11 @@ mptcp_attach(struct socket *mp_so, struct proc *p) mp_so->so_snd.sb_flags |= SB_NOCOMPRESS; mp_so->so_rcv.sb_flags |= SB_NOCOMPRESS; - /* Disable socket buffer auto-tuning. */ - mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE; - mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE; - if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) { goto out; } - mpp = sotomppcb(mp_so); + mpp = mpsotomppcb(mp_so); VERIFY(mpp != NULL); mpte = (struct mptses *)mpp->mpp_pcbe; VERIFY(mpte != NULL); @@ -197,43 +217,41 @@ out: return (error); } -/* - * Called when the socket layer loses its final reference to the socket; - * at this point, there is only one case in which we will keep things - * around: time wait. - */ static int -mptcp_detach(struct socket *mp_so, struct mppcb *mpp) +mptcp_entitlement_check(struct socket *mp_so) { - struct mptses *mpte; - struct mppcbinfo *mppi; + struct mptses *mpte = mpsotompte(mp_so); - VERIFY(mp_so->so_pcb == mpp); - VERIFY(mpp->mpp_socket == mp_so); - - mppi = mpp->mpp_pcbinfo; - VERIFY(mppi != NULL); - - __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses); - VERIFY(mpte->mpte_mppcb == mpp); + if (soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE) == 0) { + /* + * This means the app has the extended entitlement. Thus, + * it's a first party app and can run without restrictions. + */ + mpte->mpte_flags |= MPTE_FIRSTPARTY; + goto grant; + } - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ +#if (DEVELOPMENT || DEBUG) + if (mptcp_disable_entitlements) + goto grant; +#endif - /* - * We are done with this MPTCP socket (it has been closed); - * trigger all subflows to be disconnected, if not already, - * by initiating the PCB detach sequence (SOF_PCBCLEARING - * will be set.) - */ - mp_pcbdetach(mpp); + if (soopt_cred_check(mp_so, PRIV_NET_PRIVILEGED_MULTIPATH, TRUE)) { + mptcplog((LOG_NOTICE, "%s Multipath Capability needed\n", __func__), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); + return (-1); + } - (void) mptcp_disconnectx(mpte, SAE_ASSOCID_ALL, SAE_CONNID_ALL); + if (mpte->mpte_svctype > MPTCP_SVCTYPE_INTERACTIVE && + mptcp_developer_mode == 0) { + mptcplog((LOG_NOTICE, "%s need to set allow_aggregate sysctl\n", + __func__), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); + return (-1); + } - /* - * XXX: adi@apple.com - * - * Here, we would want to handle time wait state. - */ +grant: + mptcplog((LOG_NOTICE, "%s entitlement granted for %u\n", __func__, mpte->mpte_svctype), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); return (0); } @@ -246,74 +264,20 @@ mptcp_detach(struct socket *mp_so, struct mppcb *mpp) */ static int mptcp_connectx(struct mptses *mpte, struct sockaddr *src, - struct sockaddr *dst, struct proc *p, uint32_t ifscope, - sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, - uint32_t arglen) + struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid) { -#pragma unused(p, aid, flags, arg, arglen) - struct mptsub *mpts; - struct socket *mp_so; + struct socket *mp_so = mptetoso(mpte); int error = 0; - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - mp_so = mpte->mpte_mppcb->mpp_socket; - VERIFY(dst != NULL); VERIFY(pcid != NULL); - mptcplog((LOG_DEBUG, "MPTCP Socket: " - "%s: mp_so 0x%llx\n", __func__, + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); + DTRACE_MPTCP2(connectx, struct mptses *, mpte, struct socket *, mp_so); - DTRACE_MPTCP3(connectx, struct mptses *, mpte, sae_associd_t, aid, - struct socket *, mp_so); - - mpts = mptcp_subflow_alloc(M_WAITOK); - if (mpts == NULL) { - error = ENOBUFS; - goto out; - } - MPTS_ADDREF(mpts); /* for this routine */ - - if (src != NULL) { - int len = src->sa_len; - - MALLOC(mpts->mpts_src, struct sockaddr *, len, M_SONAME, - M_WAITOK | M_ZERO); - if (mpts->mpts_src == NULL) { - error = ENOBUFS; - goto out; - } - bcopy(src, mpts->mpts_src, len); - } - - MALLOC(mpts->mpts_dst, struct sockaddr *, dst->sa_len, M_SONAME, - M_WAITOK | M_ZERO); - if (mpts->mpts_dst == NULL) { - error = ENOBUFS; - goto out; - } - bcopy(dst, mpts->mpts_dst, dst->sa_len); - - error = mptcp_subflow_add(mpte, mpts, p, ifscope); - if (error == 0 && pcid != NULL) - *pcid = mpts->mpts_connid; - -out: - if (mpts != NULL) { - if ((error != 0) && (error != EWOULDBLOCK)) { - MPTS_LOCK(mpts); - if (mpts->mpts_flags & MPTSF_ATTACHED) { - MPTS_UNLOCK(mpts); - MPTS_REMREF(mpts); - mptcp_subflow_del(mpte, mpts, TRUE); - return (error); - } - MPTS_UNLOCK(mpts); - } - MPTS_REMREF(mpts); - } + error = mptcp_subflow_add(mpte, src, dst, ifscope, pcid); return (error); } @@ -327,30 +291,90 @@ mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src, sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg, uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written) { - struct mppcb *mpp = sotomppcb(mp_so); +#pragma unused(p, aid, flags, arg, arglen) + struct mppcb *mpp = mpsotomppcb(mp_so); struct mptses *mpte = NULL; struct mptcb *mp_tp = NULL; user_ssize_t datalen; - int error = 0; if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { + mptcplog((LOG_ERR, "%s state %d\n", __func__, + mpp ? mpp->mpp_state : -1), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); error = EINVAL; goto out; } mpte = mptompte(mpp); VERIFY(mpte != NULL); + mpte_lock_assert_held(mpte); mp_tp = mpte->mpte_mptcb; VERIFY(mp_tp != NULL); if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) { + mptcplog((LOG_ERR, "%s fell back to TCP\n", __func__), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); error = EINVAL; goto out; } - error = mptcp_connectx(mpte, src, dst, p, ifscope, - aid, pcid, flags, arg, arglen); + if (dst->sa_family == AF_INET && + dst->sa_len != sizeof(mpte->__mpte_dst_v4)) { + mptcplog((LOG_ERR, "%s IPv4 dst len %u\n", __func__, + dst->sa_len), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + error = EINVAL; + goto out; + } + + if (dst->sa_family == AF_INET6 && + dst->sa_len != sizeof(mpte->__mpte_dst_v6)) { + mptcplog((LOG_ERR, "%s IPv6 dst len %u\n", __func__, + dst->sa_len), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + error = EINVAL; + goto out; + } + + if (!(mpte->mpte_flags & MPTE_SVCTYPE_CHECKED)) { + if (mptcp_entitlement_check(mp_so) < 0) { + error = EPERM; + goto out; + } + + mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED; + } + + if ((mp_so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { + memcpy(&mpte->mpte_dst, dst, dst->sa_len); + } + + if (src) { + if (src->sa_family == AF_INET && + src->sa_len != sizeof(mpte->__mpte_src_v4)) { + mptcplog((LOG_ERR, "%s IPv4 src len %u\n", __func__, + src->sa_len), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + error = EINVAL; + goto out; + } + + if (src->sa_family == AF_INET6 && + src->sa_len != sizeof(mpte->__mpte_src_v6)) { + mptcplog((LOG_ERR, "%s IPv6 src len %u\n", __func__, + src->sa_len), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + error = EINVAL; + goto out; + } + + if ((mp_so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0) { + memcpy(&mpte->mpte_src, src, src->sa_len); + } + } + + error = mptcp_connectx(mpte, src, dst, ifscope, pcid); /* If there is data, copy it */ if (auio != NULL) { @@ -358,7 +382,7 @@ mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src, socket_unlock(mp_so, 0); error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL, (uio_t) auio, NULL, NULL, 0); - /* check if this can be supported with fast Join also. XXX */ + if (error == 0 || error == EWOULDBLOCK) *bytes_written = datalen - uio_resid(auio); @@ -366,16 +390,6 @@ mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src, error = EINPROGRESS; socket_lock(mp_so, 0); - MPT_LOCK(mp_tp); - if (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) { - *bytes_written = datalen - uio_resid(auio); - /* - * Override errors like EPIPE that occur as - * a result of doing TFO during TCP fallback. - */ - error = EPROTO; - } - MPT_UNLOCK(mp_tp); } out: @@ -388,7 +402,7 @@ out: static int mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp) { - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mpte_lock_assert_held(mpte); /* same as MP socket lock */ /* MPTCP has at most 1 association */ *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0; @@ -411,7 +425,7 @@ mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt, struct mptsub *mpts; int error = 0; - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mpte_lock_assert_held(mpte); /* same as MP socket lock */ if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL && aid != mpte->mpte_associd) @@ -443,237 +457,132 @@ mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags, user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type, user_addr_t aux_data, uint32_t *aux_len) { -#pragma unused(aux_data) - struct ifnet *ifp = NULL; + struct socket *so; + struct inpcb *inp; struct mptsub *mpts; int error = 0; - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - - if (*cid == SAE_CONNID_ALL) - return (EINVAL); - - TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY) - break; - } - if (mpts == NULL) - return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL); - - MPTS_LOCK(mpts); - ifp = mpts->mpts_outif; - *cid = mpts->mpts_connid; - *ifindex = ((ifp != NULL) ? ifp->if_index : 0); - *soerror = mpts->mpts_soerror; *flags = 0; - if (mpts->mpts_flags & MPTSF_CONNECTING) - *flags |= CIF_CONNECTING; - if (mpts->mpts_flags & MPTSF_CONNECTED) - *flags |= CIF_CONNECTED; - if (mpts->mpts_flags & MPTSF_DISCONNECTING) - *flags |= CIF_DISCONNECTING; - if (mpts->mpts_flags & MPTSF_DISCONNECTED) - *flags |= CIF_DISCONNECTED; - if (mpts->mpts_flags & MPTSF_BOUND_IF) - *flags |= CIF_BOUND_IF; - if (mpts->mpts_flags & MPTSF_BOUND_IP) - *flags |= CIF_BOUND_IP; - if (mpts->mpts_flags & MPTSF_BOUND_PORT) - *flags |= CIF_BOUND_PORT; - if (mpts->mpts_flags & MPTSF_PREFERRED) - *flags |= CIF_PREFERRED; - if (mpts->mpts_flags & MPTSF_MP_CAPABLE) - *flags |= CIF_MP_CAPABLE; - if (mpts->mpts_flags & MPTSF_MP_DEGRADED) - *flags |= CIF_MP_DEGRADED; - if (mpts->mpts_flags & MPTSF_MP_READY) - *flags |= CIF_MP_READY; - if (mpts->mpts_flags & MPTSF_ACTIVE) - *flags |= CIF_MP_ACTIVE; + *aux_type = 0; + *ifindex = 0; + *soerror = 0; + + if (*cid == SAE_CONNID_ALL) { + struct socket *mp_so = mptetoso(mpte); + struct mptcb *mp_tp = mpte->mpte_mptcb; + struct conninfo_multipathtcp mptcp_ci; + + if (*aux_len != 0 && *aux_len != sizeof(mptcp_ci)) + return (EINVAL); + + if (mp_so->so_state & SS_ISCONNECTING) + *flags |= CIF_CONNECTING; + if (mp_so->so_state & SS_ISCONNECTED) + *flags |= CIF_CONNECTED; + if (mp_so->so_state & SS_ISDISCONNECTING) + *flags |= CIF_DISCONNECTING; + if (mp_so->so_state & SS_ISDISCONNECTED) + *flags |= CIF_DISCONNECTED; + if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) + *flags |= CIF_MP_CAPABLE; + if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) + *flags |= CIF_MP_DEGRADED; + + *src_len = 0; + *dst_len = 0; + + *aux_type = CIAUX_MPTCP; + *aux_len = sizeof(mptcp_ci); - VERIFY(mpts->mpts_src != NULL); - *src_len = mpts->mpts_src->sa_len; - if (src != USER_ADDR_NULL) { - error = copyout(mpts->mpts_src, src, mpts->mpts_src->sa_len); - if (error != 0) - goto out; - } + if (aux_data != USER_ADDR_NULL) { + unsigned long i = 0; + int initial_info_set = 0; - VERIFY(mpts->mpts_dst != NULL); - *dst_len = mpts->mpts_dst->sa_len; - if (dst != USER_ADDR_NULL) { - error = copyout(mpts->mpts_dst, dst, mpts->mpts_dst->sa_len); - if (error != 0) - goto out; - } + bzero(&mptcp_ci, sizeof (mptcp_ci)); + mptcp_ci.mptcpci_subflow_count = mpte->mpte_numflows; + mptcp_ci.mptcpci_switch_count = mpte->mpte_subflow_switches; - *aux_type = 0; - *aux_len = 0; - if (mpts->mpts_socket != NULL) { - struct conninfo_tcp tcp_ci; + VERIFY(sizeof(mptcp_ci.mptcpci_itfstats) == sizeof(mpte->mpte_itfstats)); + memcpy(mptcp_ci.mptcpci_itfstats, mpte->mpte_itfstats, sizeof(mptcp_ci.mptcpci_itfstats)); - *aux_type = CIAUX_TCP; - *aux_len = sizeof (tcp_ci); + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if (i >= sizeof(mptcp_ci.mptcpci_subflow_connids) / sizeof(sae_connid_t)) + break; + mptcp_ci.mptcpci_subflow_connids[i] = mpts->mpts_connid; - if (aux_data != USER_ADDR_NULL) { - struct socket *so = mpts->mpts_socket; - - VERIFY(SOCK_PROTO(so) == IPPROTO_TCP); - bzero(&tcp_ci, sizeof (tcp_ci)); - socket_lock(so, 0); - tcp_getconninfo(so, &tcp_ci); - socket_unlock(so, 0); - error = copyout(&tcp_ci, aux_data, sizeof (tcp_ci)); - if (error != 0) - goto out; - } - } - mptcplog((LOG_DEBUG, "MPTCP Socket: " - "%s: cid %d flags %x \n", - __func__, mpts->mpts_connid, mpts->mpts_flags), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + if (mpts->mpts_flags & MPTSF_INITIAL_SUB) { + inp = sotoinpcb(mpts->mpts_socket); -out: - MPTS_UNLOCK(mpts); - return (error); -} + mptcp_ci.mptcpci_init_rxbytes = inp->inp_stat->rxbytes; + mptcp_ci.mptcpci_init_txbytes = inp->inp_stat->txbytes; + initial_info_set = 1; + } -/* - * Handle SIOCSCONNORDER - */ -int -mptcp_setconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t rank) -{ - struct mptsub *mpts, *mpts1; - int error = 0; + mptcpstats_update(mptcp_ci.mptcpci_itfstats, mpts); - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - mptcplog((LOG_DEBUG, "MPTCP Socket: " - "%s: cid %d rank %d \n", __func__, cid, rank), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + i++; + } - if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { - error = EINVAL; - goto out; - } + if (initial_info_set == 0) { + mptcp_ci.mptcpci_init_rxbytes = mpte->mpte_init_rxbytes; + mptcp_ci.mptcpci_init_txbytes = mpte->mpte_init_txbytes; + } - TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - if (mpts->mpts_connid == cid) - break; - } - if (mpts == NULL) { - error = ENXIO; - goto out; - } + if (mpte->mpte_flags & MPTE_FIRSTPARTY) + mptcp_ci.mptcpci_flags |= MPTCPCI_FIRSTPARTY; - if (rank == 0 || rank > 1) { - /* - * If rank is 0, determine whether this should be the - * primary or backup subflow, depending on what we have. - * - * Otherwise, if greater than 0, make it a backup flow. - */ - TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) { - MPTS_LOCK(mpts1); - if (mpts1->mpts_flags & MPTSF_PREFERRED) { - MPTS_UNLOCK(mpts1); - break; + error = copyout(&mptcp_ci, aux_data, sizeof(mptcp_ci)); + if (error != 0) { + mptcplog((LOG_ERR, "%s copyout failed: %d\n", + __func__, error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + return (error); } - MPTS_UNLOCK(mpts1); } - MPTS_LOCK(mpts); - mpts->mpts_flags &= ~MPTSF_PREFERRED; - mpts->mpts_rank = rank; - if (mpts1 != NULL && mpts != mpts1) { - /* preferred subflow found; set rank as necessary */ - if (rank == 0) - mpts->mpts_rank = (mpts1->mpts_rank + 1); - } else if (rank == 0) { - /* no preferred one found; promote this */ - rank = 1; - } - MPTS_UNLOCK(mpts); + return (0); } - if (rank == 1) { - /* - * If rank is 1, promote this subflow to be preferred. - */ - TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) { - MPTS_LOCK(mpts1); - if (mpts1 != mpts && - (mpts1->mpts_flags & MPTSF_PREFERRED)) { - mpts1->mpts_flags &= ~MPTSF_PREFERRED; - if (mpte->mpte_nummpcapflows > 1) - mptcp_connorder_helper(mpts1); - } else if (mpts1 == mpts) { - mpts1->mpts_rank = 1; - if (mpts1->mpts_flags & MPTSF_MP_CAPABLE) { - mpts1->mpts_flags |= MPTSF_PREFERRED; - if (mpte->mpte_nummpcapflows > 1) - mptcp_connorder_helper(mpts1); - } - } - MPTS_UNLOCK(mpts1); - } + TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { + if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY) + break; } + if (mpts == NULL) + return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL); -out: - return (error); -} - -static void -mptcp_connorder_helper(struct mptsub *mpts) -{ - struct socket *so = mpts->mpts_socket; - struct tcpcb *tp = NULL; - - socket_lock(so, 0); + so = mpts->mpts_socket; + inp = sotoinpcb(so); - tp = intotcpcb(sotoinpcb(so)); - tp->t_mpflags |= TMPF_SND_MPPRIO; - if (mpts->mpts_flags & MPTSF_PREFERRED) - tp->t_mpflags &= ~TMPF_BACKUP_PATH; + if (inp->inp_vflag & INP_IPV4) + error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex, + soerror, src, src_len, dst, dst_len, + aux_type, aux_data, aux_len); else - tp->t_mpflags |= TMPF_BACKUP_PATH; - - socket_unlock(so, 0); - -} - -/* - * Handle SIOCSGONNORDER - */ -int -mptcp_getconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t *rank) -{ - struct mptsub *mpts; - int error = 0; - - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - VERIFY(rank != NULL); - *rank = 0; + error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex, + soerror, src, src_len, dst, dst_len, + aux_type, aux_data, aux_len); - if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { - error = EINVAL; - goto out; + if (error != 0) { + mptcplog((LOG_ERR, "%s error from in_getconninfo %d\n", + __func__, error), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + return (error); } - TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - if (mpts->mpts_connid == cid) - break; - } - if (mpts == NULL) { - error = ENXIO; - goto out; - } + if (mpts->mpts_flags & MPTSF_MP_CAPABLE) + *flags |= CIF_MP_CAPABLE; + if (mpts->mpts_flags & MPTSF_MP_DEGRADED) + *flags |= CIF_MP_DEGRADED; + if (mpts->mpts_flags & MPTSF_MP_READY) + *flags |= CIF_MP_READY; + if (mpts->mpts_flags & MPTSF_ACTIVE) + *flags |= CIF_MP_ACTIVE; - MPTS_LOCK(mpts); - *rank = mpts->mpts_rank; - MPTS_UNLOCK(mpts); -out: - return (error); + mptcplog((LOG_DEBUG, "%s: cid %d flags %x \n", __func__, + mpts->mpts_connid, mpts->mpts_flags), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE); + + return (0); } /* @@ -684,7 +593,7 @@ mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data, struct ifnet *ifp, struct proc *p) { #pragma unused(ifp, p) - struct mppcb *mpp = sotomppcb(mp_so); + struct mppcb *mpp = mpsotomppcb(mp_so); struct mptses *mpte; int error = 0; @@ -695,7 +604,7 @@ mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data, mpte = mptompte(mpp); VERIFY(mpte != NULL); - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mpte_lock_assert_held(mpte); /* same as MP socket lock */ switch (cmd) { case SIOCGASSOCIDS32: { /* struct so_aidreq32 */ @@ -764,24 +673,6 @@ mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data, break; } - case SIOCSCONNORDER: { /* struct so_cordreq */ - struct so_cordreq cor; - bcopy(data, &cor, sizeof (cor)); - error = mptcp_setconnorder(mpte, cor.sco_cid, cor.sco_rank); - if (error == 0) - bcopy(&cor, data, sizeof (cor)); - break; - } - - case SIOCGCONNORDER: { /* struct so_cordreq */ - struct so_cordreq cor; - bcopy(data, &cor, sizeof (cor)); - error = mptcp_getconnorder(mpte, cor.sco_cid, &cor.sco_rank); - if (error == 0) - bcopy(&cor, data, sizeof (cor)); - break; - } - default: error = EOPNOTSUPP; break; @@ -790,131 +681,66 @@ out: return (error); } -/* - * Initiate a disconnect. MPTCP-level disconnection is specified by - * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow - * connection while keeping the MPTCP-level connection (association). - */ static int -mptcp_disconnectx(struct mptses *mpte, sae_associd_t aid, sae_connid_t cid) +mptcp_disconnect(struct mptses *mpte) { - struct mptsub *mpts; struct socket *mp_so; struct mptcb *mp_tp; int error = 0; - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mpte_lock_assert_held(mpte); /* same as MP socket lock */ - mp_so = mpte->mpte_mppcb->mpp_socket; + mp_so = mptetoso(mpte); mp_tp = mpte->mpte_mptcb; mptcplog((LOG_DEBUG, "MPTCP Socket: " - "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error), + "%s: mp_so 0x%llx %d\n", __func__, + (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_error), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); - DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, sae_associd_t, aid, - sae_connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp); - - VERIFY(aid == SAE_ASSOCID_ANY || aid == SAE_ASSOCID_ALL || - aid == mpte->mpte_associd); - - /* terminate the association? */ - if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) { - /* if we're not detached, go thru socket state checks */ - if (!(mp_so->so_flags & SOF_PCBCLEARING)) { - if (!(mp_so->so_state & (SS_ISCONNECTED| - SS_ISCONNECTING))) { - error = ENOTCONN; - goto out; - } - if (mp_so->so_state & SS_ISDISCONNECTING) { - error = EALREADY; - goto out; - } - } - MPT_LOCK(mp_tp); - mptcp_cancel_all_timers(mp_tp); - if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { - (void) mptcp_close(mpte, mp_tp); - MPT_UNLOCK(mp_tp); - } else if ((mp_so->so_options & SO_LINGER) && - mp_so->so_linger == 0) { - (void) mptcp_drop(mpte, mp_tp, 0); - MPT_UNLOCK(mp_tp); - } else { - MPT_UNLOCK(mp_tp); - soisdisconnecting(mp_so); - sbflush(&mp_so->so_rcv); - if (mptcp_usrclosed(mpte) != NULL) - (void) mptcp_output(mpte); - } - } else { - bool disconnect_embryonic_subflows = false; - struct socket *so = NULL; - - TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - if (mpts->mpts_connid != cid) - continue; - - MPTS_LOCK(mpts); - /* - * Check if disconnected subflow is the one used - * to initiate MPTCP connection. - * If it is and the connection is not yet join ready - * disconnect all other subflows. - */ - so = mpts->mpts_socket; - if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) && - so && !(so->so_flags & SOF_MP_SEC_SUBFLOW)) { - disconnect_embryonic_subflows = true; - } + DTRACE_MPTCP3(disconnectx, struct mptses *, mpte, + struct socket *, mp_so, struct mptcb *, mp_tp); - mpts->mpts_flags |= MPTSF_USER_DISCONNECT; - mptcp_subflow_disconnect(mpte, mpts, FALSE); - MPTS_UNLOCK(mpts); - break; - } - - if (mpts == NULL) { - error = EINVAL; + /* if we're not detached, go thru socket state checks */ + if (!(mp_so->so_flags & SOF_PCBCLEARING)) { + if (!(mp_so->so_state & (SS_ISCONNECTED| + SS_ISCONNECTING))) { + error = ENOTCONN; goto out; } - - if (disconnect_embryonic_subflows) { - TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - if (mpts->mpts_connid == cid) - continue; - MPTS_LOCK(mpts); - mptcp_subflow_disconnect(mpte, mpts, TRUE); - MPTS_UNLOCK(mpts); - } + if (mp_so->so_state & SS_ISDISCONNECTING) { + error = EALREADY; + goto out; } } - if (error == 0) - mptcp_thread_signal(mpte); - - if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) == - (SS_CANTRCVMORE | SS_CANTSENDMORE)) { - /* the socket has been shutdown, no more sockopt's */ - mptcp_flush_sopts(mpte); + mptcp_cancel_all_timers(mp_tp); + if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) { + mptcp_close(mpte, mp_tp); + } else if ((mp_so->so_options & SO_LINGER) && + mp_so->so_linger == 0) { + mptcp_drop(mpte, mp_tp, 0); + } else { + soisdisconnecting(mp_so); + sbflush(&mp_so->so_rcv); + if (mptcp_usrclosed(mpte) != NULL) + mptcp_output(mpte); } + if (error == 0) + mptcp_subflow_workloop(mpte); + out: return (error); } /* - * Wrapper function to support disconnect on socket + * Wrapper function to support disconnect on socket */ static int mptcp_usr_disconnect(struct socket *mp_so) { - int error = 0; - - error = mptcp_usr_disconnectx(mp_so, SAE_ASSOCID_ALL, SAE_CONNID_ALL); - return (error); + return (mptcp_disconnect(mpsotompte(mp_so))); } /* @@ -923,27 +749,13 @@ mptcp_usr_disconnect(struct socket *mp_so) static int mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid) { - struct mppcb *mpp = sotomppcb(mp_so); - struct mptses *mpte; - int error = 0; - - if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { - error = EINVAL; - goto out; - } - mpte = mptompte(mpp); - VERIFY(mpte != NULL); - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) + return (EINVAL); - if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL && - aid != mpte->mpte_associd) { - error = EINVAL; - goto out; - } + if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL) + return (EINVAL); - error = mptcp_disconnectx(mpte, aid, cid); -out: - return (error); + return (mptcp_usr_disconnect(mp_so)); } /* @@ -956,142 +768,29 @@ mptcp_usrclosed(struct mptses *mpte) struct mptcb *mp_tp; struct mptsub *mpts; - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - mp_so = mpte->mpte_mppcb->mpp_socket; + mpte_lock_assert_held(mpte); /* same as MP socket lock */ + mp_so = mptetoso(mpte); mp_tp = mpte->mpte_mptcb; - MPT_LOCK(mp_tp); mptcp_close_fsm(mp_tp, MPCE_CLOSE); if (mp_tp->mpt_state == MPTCPS_CLOSED) { mpte = mptcp_close(mpte, mp_tp); - MPT_UNLOCK(mp_tp); } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) { - MPT_UNLOCK(mp_tp); soisdisconnected(mp_so); - TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - MPTS_LOCK(mpts); - mpts->mpts_flags |= MPTSF_USER_DISCONNECT; - MPTS_UNLOCK(mpts); - } } else { - MPT_UNLOCK(mp_tp); - TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - MPTS_LOCK(mpts); - mpts->mpts_flags |= MPTSF_USER_DISCONNECT; - mptcp_subflow_disconnect(mpte, mpts, FALSE); - MPTS_UNLOCK(mpts); + if ((mp_so->so_state & (SS_CANTRCVMORE|SS_CANTSENDMORE)) == + (SS_CANTRCVMORE | SS_CANTSENDMORE)) + mptcp_subflow_disconnect(mpte, mpts); + else + mptcp_subflow_shutdown(mpte, mpts); } } return (mpte); } -/* - * User-protocol pru_peeloff callback. - */ -static int -mptcp_usr_peeloff(struct socket *mp_so, sae_associd_t aid, struct socket **psop) -{ - struct mppcb *mpp = sotomppcb(mp_so); - struct mptses *mpte; - int error = 0; - - VERIFY(psop != NULL); - - if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) { - error = EINVAL; - goto out; - } - mpte = mptompte(mpp); - VERIFY(mpte != NULL); - - error = mptcp_peeloff(mpte, aid, psop); -out: - return (error); -} - -/* - * Transform a previously connected TCP subflow connection which has - * failed to negotiate MPTCP to its own socket which can be externalized - * with a file descriptor. Valid only when the MPTCP socket is not - * yet associated (MPTCP-level connection has not been established.) - */ -static int -mptcp_peeloff(struct mptses *mpte, sae_associd_t aid, struct socket **psop) -{ - struct socket *so = NULL, *mp_so; - struct mptsub *mpts; - int error = 0; - - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - mp_so = mpte->mpte_mppcb->mpp_socket; - - VERIFY(psop != NULL); - *psop = NULL; - - DTRACE_MPTCP3(peeloff, struct mptses *, mpte, sae_associd_t, aid, - struct socket *, mp_so); - - /* peeloff cannot happen after an association is established */ - if (mpte->mpte_associd != SAE_ASSOCID_ANY) { - error = EINVAL; - goto out; - } - - if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) { - error = EINVAL; - goto out; - } - - TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { - MPTS_LOCK(mpts); - if (mpts->mpts_flags & MPTSF_MP_CAPABLE) { - panic("%s: so %p is MPTCP capable but mp_so %p " - "aid is %d\n", __func__, so, mp_so, - mpte->mpte_associd); - /* NOTREACHED */ - } - MPTS_ADDREF_LOCKED(mpts); /* for us */ - so = mpts->mpts_socket; - VERIFY(so != NULL); - /* - * This subflow socket is about to be externalized; make it - * appear as if it has the same properties as the MPTCP socket, - * undo what's done earlier in mptcp_subflow_add(). - */ - mptcp_subflow_sopeeloff(mpte, mpts, so); - MPTS_UNLOCK(mpts); - - mptcp_subflow_del(mpte, mpts, FALSE); - MPTS_REMREF(mpts); /* ours */ - /* - * XXX adi@apple.com - * - * Here we need to make sure the subflow socket is not - * flow controlled; need to clear both INP_FLOW_CONTROLLED - * and INP_FLOW_SUSPENDED on the subflow socket, since - * we will no longer be monitoring its events. - */ - break; - } - - if (so == NULL) { - error = EINVAL; - goto out; - } - *psop = so; - - mptcplog((LOG_DEBUG, "MPTCP Socket: " - "%s: mp_so 0x%llx\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); - -out: - return (error); -} - /* * After a receive, possible send some update to peer. */ @@ -1099,7 +798,7 @@ static int mptcp_usr_rcvd(struct socket *mp_so, int flags) { #pragma unused(flags) - struct mppcb *mpp = sotomppcb(mp_so); + struct mppcb *mpp = mpsotomppcb(mp_so); struct mptses *mpte; int error = 0; @@ -1123,7 +822,7 @@ mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m, struct sockaddr *nam, struct mbuf *control, struct proc *p) { #pragma unused(nam, p) - struct mppcb *mpp = sotomppcb(mp_so); + struct mppcb *mpp = mpsotomppcb(mp_so); struct mptses *mpte; int error = 0; @@ -1150,21 +849,16 @@ mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m, VERIFY(mpte != NULL); if (!(mp_so->so_state & SS_ISCONNECTED) && - (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA))) { + !(mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) { error = ENOTCONN; goto out; } mptcp_insert_dsn(mpp, m); VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS); - (void) sbappendstream(&mp_so->so_snd, m); + sbappendstream(&mp_so->so_snd, m); m = NULL; - /* - * XXX: adi@apple.com - * - * PRUS_MORETOCOME could be set, but we don't check it now. - */ error = mptcp_output(mpte); if (error != 0) goto out; @@ -1192,7 +886,7 @@ out: static int mptcp_usr_shutdown(struct socket *mp_so) { - struct mppcb *mpp = sotomppcb(mp_so); + struct mppcb *mpp = mpsotomppcb(mp_so); struct mptses *mpte; int error = 0; @@ -1426,9 +1120,7 @@ out: if (control != NULL) m_freem(control); - /* clear SOF1_PRECONNECT_DATA after one write */ - if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA) - mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA; + soclearfastopen(mp_so); return (error); } @@ -1457,13 +1149,7 @@ mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt) * * Need to consider the following cases: * - * a. In the event peeloff(2) occurs on the subflow socket, - * we may want to issue those options which are now - * handled at the MP socket. In that case, we will need - * to record them in mptcp_setopt() so that they can - * be replayed during peeloff. - * - * b. Certain socket options don't have a clear definition + * a. Certain socket options don't have a clear definition * on the expected behavior post connect(2). At the time * those options are issued on the MP socket, there may * be existing subflow sockets that are already connected. @@ -1489,6 +1175,12 @@ mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt) case SO_DEFUNCTOK: /* MP */ case SO_ISDEFUNCT: /* MP */ case SO_TRAFFIC_CLASS_DBG: /* MP */ + case SO_DELEGATED: /* MP */ + case SO_DELEGATED_UUID: /* MP */ +#if NECP + case SO_NECP_ATTRIBUTES: + case SO_NECP_CLIENTUUID: +#endif /* NECP */ /* * Tell the caller that these options are to be processed. */ @@ -1504,9 +1196,9 @@ mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt) case SO_RECV_ANYIF: /* MP + subflow */ case SO_RESTRICTIONS: /* MP + subflow */ case SO_FLUSH: /* MP + subflow */ - case SO_MPTCP_FASTJOIN: /* MP + subflow */ case SO_NOWAKEFROMSLEEP: case SO_NOAPNFALLBK: + case SO_MARK_CELLFALLBACK: /* * Tell the caller that these options are to be processed; * these will also be recorded later by mptcp_setopt(). @@ -1556,8 +1248,8 @@ mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo) goto out; } - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - mp_so = mpte->mpte_mppcb->mpp_socket; + mpte_lock_assert_held(mpte); /* same as MP socket lock */ + mp_so = mptetoso(mpte); /* * Don't bother going further if there's no subflow; mark the option @@ -1581,33 +1273,25 @@ mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo) TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { struct socket *so; - MPTS_LOCK(mpts); mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG); mpts->mpts_oldintval = 0; smpo.mpo_intval = 0; VERIFY(mpts->mpts_socket != NULL); so = mpts->mpts_socket; - socket_lock(so, 0); if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) { mpts->mpts_flags |= MPTSF_SOPT_OLDVAL; mpts->mpts_oldintval = smpo.mpo_intval; } - socket_unlock(so, 0); - MPTS_UNLOCK(mpts); } /* apply socket option */ TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { struct socket *so; - MPTS_LOCK(mpts); mpts->mpts_flags |= MPTSF_SOPT_INPROG; VERIFY(mpts->mpts_socket != NULL); so = mpts->mpts_socket; - socket_lock(so, 0); - error = mptcp_subflow_sosetopt(mpte, so, mpo); - socket_unlock(so, 0); - MPTS_UNLOCK(mpts); + error = mptcp_subflow_sosetopt(mpte, mpts, mpo); if (error != 0) break; } @@ -1616,32 +1300,26 @@ mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo) TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) { struct socket *so; - MPTS_LOCK(mpts); if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) { /* clear in case it's set */ mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL; mpts->mpts_oldintval = 0; - MPTS_UNLOCK(mpts); continue; } if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) { mpts->mpts_flags &= ~MPTSF_SOPT_INPROG; VERIFY(mpts->mpts_oldintval == 0); - MPTS_UNLOCK(mpts); continue; } /* error during sosetopt, so roll it back */ if (error != 0) { VERIFY(mpts->mpts_socket != NULL); so = mpts->mpts_socket; - socket_lock(so, 0); smpo.mpo_intval = mpts->mpts_oldintval; - (void) mptcp_subflow_sosetopt(mpte, so, &smpo); - socket_unlock(so, 0); + mptcp_subflow_sosetopt(mpte, mpts, &smpo); } mpts->mpts_oldintval = 0; mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG); - MPTS_UNLOCK(mpts); } out: @@ -1654,18 +1332,17 @@ out: static int mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) { - int error = 0, optval, level, optname, rec = 1; + int error = 0, optval = 0, level, optname, rec = 1; struct mptopt smpo, *mpo = NULL; struct socket *mp_so; - char buf[32]; level = sopt->sopt_level; optname = sopt->sopt_name; VERIFY(sopt->sopt_dir == SOPT_SET); VERIFY(level == SOL_SOCKET || level == IPPROTO_TCP); - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ - mp_so = mpte->mpte_mppcb->mpp_socket; + mpte_lock_assert_held(mpte); /* same as MP socket lock */ + mp_so = mptetoso(mpte); /* * Record socket options which are applicable to subflow sockets so @@ -1684,16 +1361,46 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) case SO_RECV_ANYIF: case SO_RESTRICTIONS: case SO_NOWAKEFROMSLEEP: - case SO_MPTCP_FASTJOIN: case SO_NOAPNFALLBK: + case SO_MARK_CELLFALLBACK: /* record it */ break; case SO_FLUSH: /* don't record it */ rec = 0; break; + + /* Next ones, record at MPTCP-level */ +#if NECP + case SO_NECP_CLIENTUUID: + if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) { + error = EINVAL; + goto out; + } + + error = sooptcopyin(sopt, &mpsotomppcb(mp_so)->necp_client_uuid, + sizeof(uuid_t), sizeof(uuid_t)); + if (error != 0) { + goto out; + } + + mpsotomppcb(mp_so)->necp_cb = mptcp_session_necp_cb; + error = necp_client_register_multipath_cb(mp_so->last_pid, + mpsotomppcb(mp_so)->necp_client_uuid, + mpsotomppcb(mp_so)); + if (error) + goto out; + + if (uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) { + error = EINVAL; + goto out; + } + + goto out; + case SO_NECP_ATTRIBUTES: +#endif /* NECP */ default: - /* nothing to do; just return success */ + /* nothing to do; just return */ goto out; } } else { @@ -1706,6 +1413,8 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) case TCP_CONNECTIONTIMEOUT: case TCP_RXT_CONNDROPTIME: case PERSIST_TIMEOUT: + case TCP_ADAPTIVE_READ_TIMEOUT: + case TCP_ADAPTIVE_WRITE_TIMEOUT: /* eligible; record it */ break; case TCP_NOTSENT_LOWAT: @@ -1727,6 +1436,27 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) optval); } } + goto out; + case MPTCP_SERVICE_TYPE: + /* record at MPTCP level */ + error = sooptcopyin(sopt, &optval, sizeof(optval), + sizeof(optval)); + if (error) + goto out; + if (optval < 0 || optval >= MPTCP_SVCTYPE_MAX) { + error = EINVAL; + goto out; + } + + mpte->mpte_svctype = optval; + + if (mptcp_entitlement_check(mp_so) < 0) { + error = EACCES; + goto out; + } + + mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED; + goto out; default: /* not eligible */ @@ -1747,12 +1477,9 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) if (mpo == NULL) { error = ENOBUFS; } else { - mptcplog((LOG_DEBUG, "MPTCP Socket: " - "%s: mp_so 0x%llx sopt %s " - "val %d %s\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), - mptcp_sopt2str(level, optname, buf, - sizeof (buf)), optval, + mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s val %d %s\n", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(level, optname), optval, (mpo->mpo_flags & MPOF_ATTACHED) ? "updated" : "recorded"), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); @@ -1791,19 +1518,16 @@ mptcp_setopt(struct mptses *mpte, struct sockopt *sopt) } out: if (error == 0 && mpo != NULL) { - mptcplog((LOG_ERR, "MPTCP Socket: " - "%s: mp_so 0x%llx sopt %s val %d set %s\n", + mptcplog((LOG_INFO, "%s: mp_so 0x%llx sopt %s val %d set %s\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), - mptcp_sopt2str(level, optname, buf, - sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ? + mptcp_sopt2str(level, optname), optval, + (mpo->mpo_flags & MPOF_INTERIM) ? "pending" : "successful"), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); } else if (error != 0) { - mptcplog((LOG_ERR, "MPTCP Socket: " - "%s: mp_so 0x%llx sopt %s can't be issued " - "error %d\n", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level, - optname, buf, sizeof (buf)), error), + mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s (%d, %d) val %d can't be issued error %d\n", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), + mptcp_sopt2str(level, optname), level, optname, optval, error), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); } return (error); @@ -1818,7 +1542,7 @@ mptcp_getopt(struct mptses *mpte, struct sockopt *sopt) int error = 0, optval; VERIFY(sopt->sopt_dir == SOPT_GET); - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mpte_lock_assert_held(mpte); /* same as MP socket lock */ /* * We only handle SOPT_GET for TCP level socket options; we should @@ -1839,7 +1563,10 @@ mptcp_getopt(struct mptses *mpte, struct sockopt *sopt) case TCP_CONNECTIONTIMEOUT: case TCP_RXT_CONNDROPTIME: case PERSIST_TIMEOUT: + case TCP_ADAPTIVE_READ_TIMEOUT: + case TCP_ADAPTIVE_WRITE_TIMEOUT: case TCP_NOTSENT_LOWAT: + case MPTCP_SERVICE_TYPE: /* eligible; get the default value just in case */ error = mptcp_default_tcp_optval(mpte, sopt, &optval); break; @@ -1851,11 +1578,14 @@ mptcp_getopt(struct mptses *mpte, struct sockopt *sopt) switch (sopt->sopt_name) { case TCP_NOTSENT_LOWAT: - if (mpte->mpte_mppcb->mpp_socket->so_flags & SOF_NOTSENT_LOWAT) + if (mptetoso(mpte)->so_flags & SOF_NOTSENT_LOWAT) optval = mptcp_get_notsent_lowat(mpte); else optval = 0; goto out; + case MPTCP_SERVICE_TYPE: + optval = mpte->mpte_svctype; + goto out; } /* @@ -1890,7 +1620,7 @@ mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval) VERIFY(sopt->sopt_level == IPPROTO_TCP); VERIFY(sopt->sopt_dir == SOPT_GET); - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mpte_lock_assert_held(mpte); /* same as MP socket lock */ /* try to do what tcp_newtcpcb() does */ switch (sopt->sopt_name) { @@ -1901,6 +1631,9 @@ mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval) case TCP_CONNECTIONTIMEOUT: case TCP_RXT_CONNDROPTIME: case TCP_NOTSENT_LOWAT: + case TCP_ADAPTIVE_READ_TIMEOUT: + case TCP_ADAPTIVE_WRITE_TIMEOUT: + case MPTCP_SERVICE_TYPE: *optval = 0; break; @@ -1927,7 +1660,7 @@ mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval) int mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt) { - struct mppcb *mpp = sotomppcb(mp_so); + struct mppcb *mpp = mpsotomppcb(mp_so); struct mptses *mpte; int error = 0; @@ -1936,16 +1669,14 @@ mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt) goto out; } mpte = mptompte(mpp); - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mpte_lock_assert_held(mpte); /* same as MP socket lock */ /* we only handle socket and TCP-level socket options for MPTCP */ if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) { - char buf[32]; mptcplog((LOG_DEBUG, "MPTCP Socket: " "%s: mp_so 0x%llx sopt %s level not " "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), - mptcp_sopt2str(sopt->sopt_level, - sopt->sopt_name, buf, sizeof (buf))), + mptcp_sopt2str(sopt->sopt_level, sopt->sopt_name)), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG); error = EINVAL; goto out; @@ -1964,185 +1695,153 @@ out: return (error); } -/* - * Return a string representation of - */ const char * -mptcp_sopt2str(int level, int optname, char *dst, int size) +mptcp_sopt2str(int level, int optname) { - char lbuf[32], obuf[32]; - const char *l = lbuf, *o = obuf; - - (void) snprintf(lbuf, sizeof (lbuf), "0x%x", level); - (void) snprintf(obuf, sizeof (obuf), "0x%x", optname); - switch (level) { case SOL_SOCKET: - l = "SOL_SOCKET"; switch (optname) { case SO_LINGER: - o = "SO_LINGER"; - break; + return ("SO_LINGER"); case SO_LINGER_SEC: - o = "SO_LINGER_SEC"; - break; + return ("SO_LINGER_SEC"); case SO_DEBUG: - o = "SO_DEBUG"; - break; + return ("SO_DEBUG"); case SO_KEEPALIVE: - o = "SO_KEEPALIVE"; - break; + return ("SO_KEEPALIVE"); case SO_USELOOPBACK: - o = "SO_USELOOPBACK"; - break; + return ("SO_USELOOPBACK"); case SO_TYPE: - o = "SO_TYPE"; - break; + return ("SO_TYPE"); case SO_NREAD: - o = "SO_NREAD"; - break; + return ("SO_NREAD"); case SO_NWRITE: - o = "SO_NWRITE"; - break; + return ("SO_NWRITE"); case SO_ERROR: - o = "SO_ERROR"; - break; + return ("SO_ERROR"); case SO_SNDBUF: - o = "SO_SNDBUF"; - break; + return ("SO_SNDBUF"); case SO_RCVBUF: - o = "SO_RCVBUF"; - break; + return ("SO_RCVBUF"); case SO_SNDLOWAT: - o = "SO_SNDLOWAT"; - break; + return ("SO_SNDLOWAT"); case SO_RCVLOWAT: - o = "SO_RCVLOWAT"; - break; + return ("SO_RCVLOWAT"); case SO_SNDTIMEO: - o = "SO_SNDTIMEO"; - break; + return ("SO_SNDTIMEO"); case SO_RCVTIMEO: - o = "SO_RCVTIMEO"; - break; + return ("SO_RCVTIMEO"); case SO_NKE: - o = "SO_NKE"; - break; + return ("SO_NKE"); case SO_NOSIGPIPE: - o = "SO_NOSIGPIPE"; - break; + return ("SO_NOSIGPIPE"); case SO_NOADDRERR: - o = "SO_NOADDRERR"; - break; + return ("SO_NOADDRERR"); case SO_RESTRICTIONS: - o = "SO_RESTRICTIONS"; - break; + return ("SO_RESTRICTIONS"); case SO_LABEL: - o = "SO_LABEL"; - break; + return ("SO_LABEL"); case SO_PEERLABEL: - o = "SO_PEERLABEL"; - break; + return ("SO_PEERLABEL"); case SO_RANDOMPORT: - o = "SO_RANDOMPORT"; - break; + return ("SO_RANDOMPORT"); case SO_TRAFFIC_CLASS: - o = "SO_TRAFFIC_CLASS"; - break; + return ("SO_TRAFFIC_CLASS"); case SO_RECV_TRAFFIC_CLASS: - o = "SO_RECV_TRAFFIC_CLASS"; - break; + return ("SO_RECV_TRAFFIC_CLASS"); case SO_TRAFFIC_CLASS_DBG: - o = "SO_TRAFFIC_CLASS_DBG"; - break; + return ("SO_TRAFFIC_CLASS_DBG"); case SO_PRIVILEGED_TRAFFIC_CLASS: - o = "SO_PRIVILEGED_TRAFFIC_CLASS"; - break; + return ("SO_PRIVILEGED_TRAFFIC_CLASS"); case SO_DEFUNCTOK: - o = "SO_DEFUNCTOK"; - break; + return ("SO_DEFUNCTOK"); case SO_ISDEFUNCT: - o = "SO_ISDEFUNCT"; - break; + return ("SO_ISDEFUNCT"); case SO_OPPORTUNISTIC: - o = "SO_OPPORTUNISTIC"; - break; + return ("SO_OPPORTUNISTIC"); case SO_FLUSH: - o = "SO_FLUSH"; - break; + return ("SO_FLUSH"); case SO_RECV_ANYIF: - o = "SO_RECV_ANYIF"; - break; + return ("SO_RECV_ANYIF"); case SO_NOWAKEFROMSLEEP: - o = "SO_NOWAKEFROMSLEEP"; - break; - case SO_MPTCP_FASTJOIN: - o = "SO_MPTCP_FASTJOIN"; - break; + return ("SO_NOWAKEFROMSLEEP"); case SO_NOAPNFALLBK: - o = "SO_NOAPNFALLBK"; - break; + return ("SO_NOAPNFALLBK"); + case SO_MARK_CELLFALLBACK: + return ("SO_CELLFALLBACK"); + case SO_DELEGATED: + return ("SO_DELEGATED"); + case SO_DELEGATED_UUID: + return ("SO_DELEGATED_UUID"); +#if NECP + case SO_NECP_ATTRIBUTES: + return ("SO_NECP_ATTRIBUTES"); + case SO_NECP_CLIENTUUID: + return ("SO_NECP_CLIENTUUID"); +#endif /* NECP */ } + break; case IPPROTO_TCP: - l = "IPPROTO_TCP"; switch (optname) { + case TCP_NODELAY: + return ("TCP_NODELAY"); case TCP_KEEPALIVE: - o = "TCP_KEEPALIVE"; - break; + return ("TCP_KEEPALIVE"); case TCP_KEEPINTVL: - o = "TCP_KEEPINTVL"; - break; + return ("TCP_KEEPINTVL"); case TCP_KEEPCNT: - o = "TCP_KEEPCNT"; - break; + return ("TCP_KEEPCNT"); case TCP_CONNECTIONTIMEOUT: - o = "TCP_CONNECTIONTIMEOUT"; - break; + return ("TCP_CONNECTIONTIMEOUT"); case TCP_RXT_CONNDROPTIME: - o = "TCP_RXT_CONNDROPTIME"; - break; + return ("TCP_RXT_CONNDROPTIME"); case PERSIST_TIMEOUT: - o = "PERSIST_TIMEOUT"; - break; + return ("PERSIST_TIMEOUT"); + case TCP_NOTSENT_LOWAT: + return ("NOTSENT_LOWAT"); + case TCP_ADAPTIVE_READ_TIMEOUT: + return ("ADAPTIVE_READ_TIMEOUT"); + case TCP_ADAPTIVE_WRITE_TIMEOUT: + return ("ADAPTIVE_WRITE_TIMEOUT"); + case MPTCP_SERVICE_TYPE: + return ("MPTCP_SERVICE_TYPE"); } + break; } - (void) snprintf(dst, size, "<%s,%s>", l, o); - return (dst); + return ("unknown"); } static int mptcp_usr_preconnect(struct socket *mp_so) { struct mptsub *mpts = NULL; - struct mppcb *mpp = sotomppcb(mp_so); + struct mppcb *mpp = mpsotomppcb(mp_so); struct mptses *mpte; struct socket *so; struct tcpcb *tp = NULL; + int error; mpte = mptompte(mpp); VERIFY(mpte != NULL); - MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */ + mpte_lock_assert_held(mpte); /* same as MP socket lock */ mpts = mptcp_get_subflow(mpte, NULL, NULL); if (mpts == NULL) { - mptcplog((LOG_ERR, "MPTCP Socket: " - "%s: mp_so 0x%llx invalid preconnect ", __func__, - (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), - MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); + mptcplog((LOG_ERR, "%s: mp_so 0x%llx invalid preconnect ", + __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)), + MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR); return (EINVAL); } - MPTS_LOCK(mpts); mpts->mpts_flags &= ~MPTSF_TFO_REQD; so = mpts->mpts_socket; - socket_lock(so, 0); tp = intotcpcb(sotoinpcb(so)); tp->t_mpflags &= ~TMPF_TFO_REQUEST; - int error = tcp_output(sototcpcb(so)); - socket_unlock(so, 0); - MPTS_UNLOCK(mpts); - mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA; + error = tcp_output(sototcpcb(so)); + + soclearfastopen(mp_so); + return (error); }