2  * Copyright (c) 2012-2013 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29 #include <sys/param.h> 
  31 #include <sys/systm.h> 
  32 #include <sys/kernel.h> 
  34 #include <sys/mcache.h> 
  35 #include <sys/resourcevar.h> 
  36 #include <sys/socket.h> 
  37 #include <sys/socketvar.h> 
  38 #include <sys/syslog.h> 
  39 #include <sys/domain.h> 
  40 #include <sys/protosw.h> 
  41 #include <sys/sysctl.h> 
  43 #include <kern/zalloc.h> 
  44 #include <kern/locks.h> 
  46 #include <mach/thread_act.h> 
  50 #include <netinet/in.h> 
  51 #include <netinet/in_pcb.h> 
  52 #include <netinet/in_var.h> 
  53 #include <netinet/tcp.h> 
  54 #include <netinet/tcp_fsm.h> 
  55 #include <netinet/tcp_seq.h> 
  56 #include <netinet/tcp_var.h> 
  57 #include <netinet/mptcp_var.h> 
  58 #include <netinet/mptcp.h> 
  59 #include <netinet/mptcp_seq.h> 
  60 #include <netinet/mptcp_timer.h> 
  61 #include <libkern/crypto/sha1.h> 
  63 #include <netinet6/in6_pcb.h> 
  64 #include <netinet6/ip6protosw.h> 
  66 #include <dev/random/randomdev.h> 
  69  * Notes on MPTCP implementation. 
  71  * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH 
  72  * communication domain.  The structure mtcbinfo describes the MPTCP instance 
  73  * of a Multipath protocol in that domain.  It is used to keep track of all 
  74  * MPTCP PCB instances in the system, and is protected by the global lock 
  77  * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM, 
  78  * IPPROTO_TCP).  Upon success, a Multipath PCB gets allocated and along with 
  79  * it comes an MPTCP Session and an MPTCP PCB.  All three structures are 
  80  * allocated from the same memory block, and each structure has a pointer 
  81  * to the adjacent ones.  The layout is defined by the mpp_mtp structure. 
  82  * The socket lock (mpp_lock) is used to protect accesses to the Multipath 
  83  * PCB (mppcb) as well as the MPTCP Session (mptses). 
  85  * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB; 
  86  * in particular, the list of subflows as well as the MPTCP thread. 
  88  * A functioning MPTCP Session consists of one or more subflow sockets.  Each 
  89  * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is 
  90  * represented by the mptsub structure.  Because each subflow requires access 
  91  * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each 
  92  * subflow.  This gets decremented prior to the subflow's destruction.  The 
  93  * subflow lock (mpts_lock) is used to protect accesses to the subflow. 
  95  * To handle events (read, write, control) from the subflows, an MPTCP thread 
  96  * is created; currently, there is one thread per MPTCP Session.  In order to 
  97  * prevent the MPTCP socket from being destroyed while being accessed by the 
  98  * MPTCP thread, we bump up the MPTCP socket's so_usecount for the thread, 
  99  * which will be decremented prior to the thread's termination.  The thread 
 100  * lock (mpte_thread_lock) is used to synchronize its signalling. 
 102  * Lock ordering is defined as follows: 
 104  *      mtcbinfo (mppi_lock) 
 110  * It is not a requirement that all of the above locks need to be acquired 
 111  * in succession, but the correct lock ordering must be followed when there 
 112  * are more than one locks that need to be held.  The MPTCP thread lock is 
 113  * is not constrained by this arrangement, because none of the other locks 
 114  * is ever acquired while holding mpte_thread_lock; therefore it may be called 
 115  * at any moment to signal the thread. 
 117  * An MPTCP socket will be destroyed when its so_usecount drops to zero; this 
 118  * work is done by the MPTCP garbage collector which is invoked on demand by 
 119  * the PF_MULTIPATH garbage collector.  This process will take place once all 
 120  * of the subflows have been destroyed, and the MPTCP thread be instructed to 
 124 static void mptcp_sesdestroy(struct mptses 
*); 
 125 static void mptcp_thread_signal_locked(struct mptses 
*); 
 126 static void mptcp_thread_terminate_signal(struct mptses 
*); 
 127 static void mptcp_thread_dowork(struct mptses 
*); 
 128 static void mptcp_thread_func(void *, wait_result_t
); 
 129 static void mptcp_thread_destroy(struct mptses 
*); 
 130 static void mptcp_key_pool_init(void); 
 131 static void mptcp_attach_to_subf(struct socket 
*, struct mptcb 
*, connid_t
); 
 132 static void mptcp_detach_mptcb_from_subf(struct mptcb 
*, struct socket 
*); 
 133 static void mptcp_conn_properties(struct mptcb 
*); 
 134 static void mptcp_init_statevars(struct mptcb 
*); 
 136 static uint32_t mptcp_gc(struct mppcbinfo 
*); 
 137 static int mptcp_subflow_socreate(struct mptses 
*, struct mptsub 
*, 
 138     int, struct proc 
*, struct socket 
**); 
 139 static int mptcp_subflow_soclose(struct mptsub 
*, struct socket 
*); 
 140 static int mptcp_subflow_soconnectx(struct mptses 
*, struct mptsub 
*); 
 141 static int mptcp_subflow_soreceive(struct socket 
*, struct sockaddr 
**, 
 142     struct uio 
*, struct mbuf 
**, struct mbuf 
**, int *); 
 143 static void mptcp_subflow_rupcall(struct socket 
*, void *, int); 
 144 static void mptcp_subflow_input(struct mptses 
*, struct mptsub 
*); 
 145 static void mptcp_subflow_wupcall(struct socket 
*, void *, int); 
 146 static void mptcp_subflow_eupcall(struct socket 
*, void *, uint32_t); 
 147 static void mptcp_update_last_owner(struct mptsub 
*, struct socket 
*); 
 150  * Possible return values for subflow event handlers.  Note that success 
 151  * values must be greater or equal than MPTS_EVRET_OK.  Values less than that 
 152  * indicate errors or actions which require immediate attention; they will 
 153  * prevent the rest of the handlers from processing their respective events 
 154  * until the next round of events processing. 
 157         MPTS_EVRET_DELETE               
= 1,    /* delete this subflow */ 
 158         MPTS_EVRET_OK                   
= 2,    /* OK */ 
 159         MPTS_EVRET_CONNECT_PENDING      
= 3,    /* resume pended connects */ 
 160         MPTS_EVRET_DISCONNECT_FALLBACK  
= 4,    /* abort all but preferred */ 
 161         MPTS_EVRET_OK_UPDATE            
= 5,    /* OK with conninfo update */ 
 164 static ev_ret_t 
mptcp_subflow_events(struct mptses 
*, struct mptsub 
*); 
 165 static ev_ret_t 
mptcp_subflow_connreset_ev(struct mptses 
*, struct mptsub 
*); 
 166 static ev_ret_t 
mptcp_subflow_cantrcvmore_ev(struct mptses 
*, struct mptsub 
*); 
 167 static ev_ret_t 
mptcp_subflow_cantsendmore_ev(struct mptses 
*, struct mptsub 
*); 
 168 static ev_ret_t 
mptcp_subflow_timeout_ev(struct mptses 
*, struct mptsub 
*); 
 169 static ev_ret_t 
mptcp_subflow_nosrcaddr_ev(struct mptses 
*, struct mptsub 
*); 
 170 static ev_ret_t 
mptcp_subflow_failover_ev(struct mptses 
*, struct mptsub 
*); 
 171 static ev_ret_t 
mptcp_subflow_ifdenied_ev(struct mptses 
*, struct mptsub 
*); 
 172 static ev_ret_t 
mptcp_subflow_suspend_ev(struct mptses 
*, struct mptsub 
*); 
 173 static ev_ret_t 
mptcp_subflow_resume_ev(struct mptses 
*, struct mptsub 
*); 
 174 static ev_ret_t 
mptcp_subflow_connected_ev(struct mptses 
*, struct mptsub 
*); 
 175 static ev_ret_t 
mptcp_subflow_disconnected_ev(struct mptses 
*, struct mptsub 
*); 
 176 static ev_ret_t 
mptcp_subflow_mpstatus_ev(struct mptses 
*, struct mptsub 
*); 
 177 static ev_ret_t 
mptcp_subflow_mustrst_ev(struct mptses 
*, struct mptsub 
*); 
 178 static const char *mptcp_evret2str(ev_ret_t
); 
 180 static mptcp_key_t 
*mptcp_reserve_key(void); 
 181 static int mptcp_do_sha1(mptcp_key_t 
*, char *, int); 
 182 static int mptcp_init_authparms(struct mptcb 
*); 
 183 static int mptcp_delete_ok(struct mptses 
*mpte
, struct mptsub 
*mpts
); 
 185 static unsigned int mptsub_zone_size
;           /* size of mptsub */ 
 186 static struct zone 
*mptsub_zone
;                /* zone for mptsub */ 
 188 static unsigned int mptopt_zone_size
;           /* size of mptopt */ 
 189 static struct zone 
*mptopt_zone
;                /* zone for mptopt */ 
 191 static unsigned int mpt_subauth_entry_size
;     /* size of subf auth entry */ 
 192 static struct zone 
*mpt_subauth_zone
;           /* zone of subf auth entry */ 
 194 struct mppcbinfo mtcbinfo
; 
 196 static struct mptcp_keys_pool_head mptcp_keys_pool
; 
 198 #define MPTCP_SUBFLOW_WRITELEN  (8 * 1024)      /* bytes to write each time */ 
 199 #define MPTCP_SUBFLOW_READLEN   (8 * 1024)      /* bytes to read each time */ 
 201 SYSCTL_DECL(_net_inet
); 
 203 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "MPTCP"); 
 205 uint32_t mptcp_verbose 
= 0;             /* more noise if greater than 1 */ 
 206 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, verbose
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 
 207         &mptcp_verbose
, 0, "MPTCP verbosity level"); 
 209 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
|CTLFLAG_LOCKED
, 
 210         &mtcbinfo
.mppi_count
, 0, "Number of active PCBs"); 
 213  * Since there is one kernel thread per mptcp socket, imposing an artificial 
 214  * limit on number of allowed mptcp sockets. 
 216 uint32_t mptcp_socket_limit 
= MPPCB_LIMIT
; 
 217 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, sk_lim
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 
 218         &mptcp_socket_limit
, 0, "MPTCP socket limit"); 
 220 static struct protosw mptcp_subflow_protosw
; 
 221 static struct pr_usrreqs mptcp_subflow_usrreqs
; 
 223 static struct ip6protosw mptcp_subflow_protosw6
; 
 224 static struct pr_usrreqs mptcp_subflow_usrreqs6
; 
 228  * Protocol pr_init callback. 
 231 mptcp_init(struct protosw 
*pp
, struct domain 
*dp
) 
 234         static int mptcp_initialized 
= 0; 
 237         struct ip6protosw 
*prp6
; 
 240         VERIFY((pp
->pr_flags 
& (PR_INITIALIZED
|PR_ATTACHED
)) == PR_ATTACHED
); 
 242         /* do this only once */ 
 243         if (mptcp_initialized
) 
 245         mptcp_initialized 
= 1; 
 248          * Since PF_MULTIPATH gets initialized after PF_INET/INET6, 
 249          * we must be able to find IPPROTO_TCP entries for both. 
 251         prp 
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
); 
 253         bcopy(prp
, &mptcp_subflow_protosw
, sizeof (*prp
)); 
 254         bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
, 
 255             sizeof (mptcp_subflow_usrreqs
)); 
 256         mptcp_subflow_protosw
.pr_entry
.tqe_next 
= NULL
; 
 257         mptcp_subflow_protosw
.pr_entry
.tqe_prev 
= NULL
; 
 258         mptcp_subflow_protosw
.pr_usrreqs 
= &mptcp_subflow_usrreqs
; 
 259         mptcp_subflow_usrreqs
.pru_soreceive 
= mptcp_subflow_soreceive
; 
 260         mptcp_subflow_usrreqs
.pru_rcvoob 
= pru_rcvoob_notsupp
; 
 262          * Socket filters shouldn't attach/detach to/from this protosw 
 263          * since pr_protosw is to be used instead, which points to the 
 264          * real protocol; if they do, it is a bug and we should panic. 
 266         mptcp_subflow_protosw
.pr_filter_head
.tqh_first 
= 
 267             (struct socket_filter 
*)(uintptr_t)0xdeadbeefdeadbeef; 
 268         mptcp_subflow_protosw
.pr_filter_head
.tqh_last 
= 
 269             (struct socket_filter 
**)(uintptr_t)0xdeadbeefdeadbeef; 
 272         prp6 
= (struct ip6protosw 
*)pffindproto_locked(PF_INET6
, 
 273             IPPROTO_TCP
, SOCK_STREAM
); 
 274         VERIFY(prp6 
!= NULL
); 
 275         bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof (*prp6
)); 
 276         bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
, 
 277             sizeof (mptcp_subflow_usrreqs6
)); 
 278         mptcp_subflow_protosw6
.pr_entry
.tqe_next 
= NULL
; 
 279         mptcp_subflow_protosw6
.pr_entry
.tqe_prev 
= NULL
; 
 280         mptcp_subflow_protosw6
.pr_usrreqs 
= &mptcp_subflow_usrreqs6
; 
 281         mptcp_subflow_usrreqs6
.pru_soreceive 
= mptcp_subflow_soreceive
; 
 282         mptcp_subflow_usrreqs6
.pru_rcvoob 
= pru_rcvoob_notsupp
; 
 284          * Socket filters shouldn't attach/detach to/from this protosw 
 285          * since pr_protosw is to be used instead, which points to the 
 286          * real protocol; if they do, it is a bug and we should panic. 
 288         mptcp_subflow_protosw6
.pr_filter_head
.tqh_first 
= 
 289             (struct socket_filter 
*)(uintptr_t)0xdeadbeefdeadbeef; 
 290         mptcp_subflow_protosw6
.pr_filter_head
.tqh_last 
= 
 291             (struct socket_filter 
**)(uintptr_t)0xdeadbeefdeadbeef; 
 294         bzero(&mtcbinfo
, sizeof (mtcbinfo
)); 
 295         TAILQ_INIT(&mtcbinfo
.mppi_pcbs
); 
 296         mtcbinfo
.mppi_size 
= sizeof (struct mpp_mtp
); 
 297         if ((mtcbinfo
.mppi_zone 
= zinit(mtcbinfo
.mppi_size
, 
 298             1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) { 
 299                 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
); 
 302         zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
); 
 303         zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
); 
 305         mtcbinfo
.mppi_lock_grp_attr 
= lck_grp_attr_alloc_init(); 
 306         mtcbinfo
.mppi_lock_grp 
= lck_grp_alloc_init("mppcb", 
 307             mtcbinfo
.mppi_lock_grp_attr
); 
 308         mtcbinfo
.mppi_lock_attr 
= lck_attr_alloc_init(); 
 309         lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
, 
 310             mtcbinfo
.mppi_lock_attr
); 
 311         mtcbinfo
.mppi_gc 
= mptcp_gc
; 
 313         mtcbinfo
.mppi_timer 
= mptcp_timer
; 
 315         /* attach to MP domain for garbage collection to take place */ 
 316         mp_pcbinfo_attach(&mtcbinfo
); 
 318         mptsub_zone_size 
= sizeof (struct mptsub
); 
 319         if ((mptsub_zone 
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
, 
 320             8192, "mptsub")) == NULL
) { 
 321                 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
); 
 324         zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
); 
 325         zone_change(mptsub_zone
, Z_EXPAND
, TRUE
); 
 327         mptopt_zone_size 
= sizeof (struct mptopt
); 
 328         if ((mptopt_zone 
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
, 
 329             1024, "mptopt")) == NULL
) { 
 330                 panic("%s: unable to allocate MPTCP option zone\n", __func__
); 
 333         zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
); 
 334         zone_change(mptopt_zone
, Z_EXPAND
, TRUE
); 
 336         mpt_subauth_entry_size 
= sizeof (struct mptcp_subf_auth_entry
); 
 337         if ((mpt_subauth_zone 
= zinit(mpt_subauth_entry_size
, 
 338             1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) { 
 339                 panic("%s: unable to allocate MPTCP address auth zone \n", 
 343         zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
); 
 344         zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
); 
 346         /* Set up a list of unique keys */ 
 347         mptcp_key_pool_init(); 
 352  * Create an MPTCP session, called as a result of opening a MPTCP socket. 
 355 mptcp_sescreate(struct socket 
*mp_so
, struct mppcb 
*mpp
) 
 357         struct mppcbinfo 
*mppi
; 
 363         mppi 
= mpp
->mpp_pcbinfo
; 
 364         VERIFY(mppi 
!= NULL
); 
 366         mpte 
= &((struct mpp_mtp 
*)mpp
)->mpp_ses
; 
 367         mp_tp 
= &((struct mpp_mtp 
*)mpp
)->mtcb
; 
 369         /* MPTCP Multipath PCB Extension */ 
 370         bzero(mpte
, sizeof (*mpte
)); 
 371         VERIFY(mpp
->mpp_pcbe 
== NULL
); 
 372         mpp
->mpp_pcbe 
= mpte
; 
 373         mpte
->mpte_mppcb 
= mpp
; 
 374         mpte
->mpte_mptcb 
= mp_tp
; 
 376         TAILQ_INIT(&mpte
->mpte_sopts
); 
 377         TAILQ_INIT(&mpte
->mpte_subflows
); 
 378         mpte
->mpte_associd 
= ASSOCID_ANY
; 
 379         mpte
->mpte_connid_last 
= CONNID_ANY
; 
 381         lck_mtx_init(&mpte
->mpte_thread_lock
, mppi
->mppi_lock_grp
, 
 382             mppi
->mppi_lock_attr
); 
 387          * This can be rather expensive if we have lots of MPTCP sockets, 
 388          * but we need a kernel thread for this model to work.  Perhaps we 
 389          * could amortize the costs by having one worker thread per a group 
 392         if (kernel_thread_start(mptcp_thread_func
, mpte
, 
 393             &mpte
->mpte_thread
) != KERN_SUCCESS
) { 
 397         mp_so
->so_usecount
++;           /* for thread */ 
 399         /* MPTCP Protocol Control Block */ 
 400         bzero(mp_tp
, sizeof (*mp_tp
)); 
 401         lck_mtx_init(&mp_tp
->mpt_lock
, mppi
->mppi_lock_grp
, 
 402             mppi
->mppi_lock_attr
); 
 403         mp_tp
->mpt_mpte 
= mpte
; 
 407                 lck_mtx_destroy(&mpte
->mpte_thread_lock
, mppi
->mppi_lock_grp
); 
 408         DTRACE_MPTCP5(session__create
, struct socket 
*, mp_so
, 
 409             struct sockbuf 
*, &mp_so
->so_rcv
, 
 410             struct sockbuf 
*, &mp_so
->so_snd
, 
 411             struct mppcb 
*, mpp
, int, error
); 
 413         return ((error 
!= 0) ? NULL 
: mpte
); 
 417  * Destroy an MPTCP session. 
 420 mptcp_sesdestroy(struct mptses 
*mpte
) 
 424         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
 426         mp_tp 
= mpte
->mpte_mptcb
; 
 427         VERIFY(mp_tp 
!= NULL
); 
 430          * MPTCP Multipath PCB Extension section 
 432         mptcp_flush_sopts(mpte
); 
 433         VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows 
== 0); 
 435         lck_mtx_destroy(&mpte
->mpte_thread_lock
, 
 436             mpte
->mpte_mppcb
->mpp_pcbinfo
->mppi_lock_grp
); 
 439          * MPTCP Protocol Control Block section 
 441         lck_mtx_destroy(&mp_tp
->mpt_lock
, 
 442             mpte
->mpte_mppcb
->mpp_pcbinfo
->mppi_lock_grp
); 
 444         DTRACE_MPTCP2(session__destroy
, struct mptses 
*, mpte
, 
 445             struct mptcb 
*, mp_tp
); 
 449  * Allocate an MPTCP socket option structure. 
 452 mptcp_sopt_alloc(int how
) 
 456         mpo 
= (how 
== M_WAITOK
) ? zalloc(mptopt_zone
) : 
 457             zalloc_noblock(mptopt_zone
); 
 459                 bzero(mpo
, mptopt_zone_size
); 
 466  * Free an MPTCP socket option structure. 
 469 mptcp_sopt_free(struct mptopt 
*mpo
) 
 471         VERIFY(!(mpo
->mpo_flags 
& MPOF_ATTACHED
)); 
 473         zfree(mptopt_zone
, mpo
); 
 477  * Add a socket option to the MPTCP socket option list. 
 480 mptcp_sopt_insert(struct mptses 
*mpte
, struct mptopt 
*mpo
) 
 482         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
 483         VERIFY(!(mpo
->mpo_flags 
& MPOF_ATTACHED
)); 
 484         mpo
->mpo_flags 
|= MPOF_ATTACHED
; 
 485         TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
); 
 489  * Remove a socket option from the MPTCP socket option list. 
 492 mptcp_sopt_remove(struct mptses 
*mpte
, struct mptopt 
*mpo
) 
 494         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
 495         VERIFY(mpo
->mpo_flags 
& MPOF_ATTACHED
); 
 496         mpo
->mpo_flags 
&= ~MPOF_ATTACHED
; 
 497         TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
); 
 501  * Search for an existing <sopt_level,sopt_name> socket option. 
 504 mptcp_sopt_find(struct mptses 
*mpte
, struct sockopt 
*sopt
) 
 508         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
 510         TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) { 
 511                 if (mpo
->mpo_level 
== sopt
->sopt_level 
&& 
 512                     mpo
->mpo_name 
== sopt
->sopt_name
) 
 515         VERIFY(mpo 
== NULL 
|| sopt
->sopt_valsize 
== sizeof (int)); 
 521  * Flushes all recorded socket options from an MP socket. 
 524 mptcp_flush_sopts(struct mptses 
*mpte
) 
 526         struct mptopt 
*mpo
, *tmpo
; 
 528         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
 530         TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) { 
 531                 mptcp_sopt_remove(mpte
, mpo
); 
 532                 mptcp_sopt_free(mpo
); 
 534         VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
)); 
 538  * Allocate a MPTCP subflow structure. 
 541 mptcp_subflow_alloc(int how
) 
 545         mpts 
= (how 
== M_WAITOK
) ? zalloc(mptsub_zone
) : 
 546             zalloc_noblock(mptsub_zone
); 
 548                 bzero(mpts
, mptsub_zone_size
); 
 549                 lck_mtx_init(&mpts
->mpts_lock
, mtcbinfo
.mppi_lock_grp
, 
 550                     mtcbinfo
.mppi_lock_attr
); 
 557  * Deallocate a subflow structure, called when all of the references held 
 558  * on it have been released.  This implies that the subflow has been deleted. 
 561 mptcp_subflow_free(struct mptsub 
*mpts
) 
 563         MPTS_LOCK_ASSERT_HELD(mpts
); 
 565         VERIFY(mpts
->mpts_refcnt 
== 0); 
 566         VERIFY(!(mpts
->mpts_flags 
& MPTSF_ATTACHED
)); 
 567         VERIFY(mpts
->mpts_mpte 
== NULL
); 
 568         VERIFY(mpts
->mpts_socket 
== NULL
); 
 570         if (mpts
->mpts_src_sl 
!= NULL
) { 
 571                 sockaddrlist_free(mpts
->mpts_src_sl
); 
 572                 mpts
->mpts_src_sl 
= NULL
; 
 574         if (mpts
->mpts_dst_sl 
!= NULL
) { 
 575                 sockaddrlist_free(mpts
->mpts_dst_sl
); 
 576                 mpts
->mpts_dst_sl 
= NULL
; 
 579         lck_mtx_destroy(&mpts
->mpts_lock
, mtcbinfo
.mppi_lock_grp
); 
 581         zfree(mptsub_zone
, mpts
); 
 585  * Create an MPTCP subflow socket. 
 588 mptcp_subflow_socreate(struct mptses 
*mpte
, struct mptsub 
*mpts
, int dom
, 
 589     struct proc 
*p
, struct socket 
**so
) 
 591         struct mptopt smpo
, *mpo
, *tmpo
; 
 592         struct socket 
*mp_so
; 
 596         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
 597         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
 600          * Create the subflow socket (multipath subflow, non-blocking.) 
 602          * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow 
 603          * socket; it will be cleared when the socket is peeled off or closed. 
 604          * It also indicates to the underlying TCP to handle MPTCP options. 
 605          * A multipath subflow socket implies SS_NOFDREF state. 
 607         if ((error 
= socreate_internal(dom
, so
, SOCK_STREAM
, 
 608             IPPROTO_TCP
, p
, SOCF_ASYNC 
| SOCF_MP_SUBFLOW
, PROC_NULL
)) != 0) { 
 609                 mptcplog((LOG_ERR
, "MPTCP ERROR %s: mp_so 0x%llx unable to " 
 610                     "create subflow socket error %d\n", __func__
, 
 611                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), error
)); 
 616         VERIFY((*so
)->so_flags 
& SOF_MP_SUBFLOW
); 
 617         VERIFY(((*so
)->so_state 
& (SS_NBIO
|SS_NOFDREF
)) == 
 618             (SS_NBIO
|SS_NOFDREF
)); 
 620         /* prevent the socket buffers from being compressed */ 
 621         (*so
)->so_rcv
.sb_flags 
|= SB_NOCOMPRESS
; 
 622         (*so
)->so_snd
.sb_flags 
|= SB_NOCOMPRESS
; 
 624         bzero(&smpo
, sizeof (smpo
)); 
 625         smpo
.mpo_flags 
|= MPOF_SUBFLOW_OK
; 
 626         smpo
.mpo_level 
= SOL_SOCKET
; 
 629         /* disable SIGPIPE */ 
 630         smpo
.mpo_name 
= SO_NOSIGPIPE
; 
 631         if ((error 
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0) 
 634         /* find out if the subflow's source address goes away */ 
 635         smpo
.mpo_name 
= SO_NOADDRERR
; 
 636         if ((error 
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0) 
 639         /* enable keepalive */ 
 640         smpo
.mpo_name 
= SO_KEEPALIVE
; 
 641         if ((error 
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0) 
 645          * Limit the receive socket buffer size to 64k. 
 647          * We need to take into consideration the window scale option 
 648          * which could be negotiated in one subflow but disabled in 
 650          * XXX This can be improved in the future. 
 652         smpo
.mpo_name 
= SO_RCVBUF
; 
 653         smpo
.mpo_intval 
= MPTCP_RWIN_MAX
; 
 654         if ((error 
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0) 
 657         /* N.B.: set by sosetopt */ 
 658         VERIFY(!((*so
)->so_rcv
.sb_flags 
& SB_AUTOSIZE
)); 
 659         /* Prevent automatic socket buffer sizing. */ 
 660         (*so
)->so_snd
.sb_flags 
&= ~SB_AUTOSIZE
; 
 662         smpo
.mpo_level 
= IPPROTO_TCP
; 
 663         smpo
.mpo_intval 
= mptcp_subflow_keeptime
; 
 664         smpo
.mpo_name 
= TCP_KEEPALIVE
; 
 665         if ((error 
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0) 
 668         /* replay setsockopt(2) on the subflow sockets for eligible options */ 
 669         TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) { 
 672                 if (!(mpo
->mpo_flags 
& MPOF_SUBFLOW_OK
)) 
 676                  * Skip those that are handled internally; these options 
 677                  * should not have been recorded and marked with the 
 678                  * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case. 
 680                 if (mpo
->mpo_level 
== SOL_SOCKET 
&& 
 681                     (mpo
->mpo_name 
== SO_NOSIGPIPE 
|| 
 682                     mpo
->mpo_name 
== SO_NOADDRERR 
|| 
 683                     mpo
->mpo_name 
== SO_KEEPALIVE
)) 
 686                 interim 
= (mpo
->mpo_flags 
& MPOF_INTERIM
); 
 687                 if (mptcp_subflow_sosetopt(mpte
, *so
, mpo
) != 0 && interim
) { 
 689                         mptcplog((LOG_ERR
, "%s: mp_so 0x%llx sopt %s val %d " 
 690                             "interim record removed\n", __func__
, 
 691                             (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
 692                             mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
, 
 693                             buf
, sizeof (buf
)), mpo
->mpo_intval
)); 
 694                         mptcp_sopt_remove(mpte
, mpo
); 
 695                         mptcp_sopt_free(mpo
); 
 701          * We need to receive everything that the subflow socket has, 
 702          * so use a customized socket receive function.  We will undo 
 703          * this when the socket is peeled off or closed. 
 705         mpts
->mpts_oprotosw 
= (*so
)->so_proto
; 
 708                 (*so
)->so_proto 
= &mptcp_subflow_protosw
; 
 712                 (*so
)->so_proto 
= (struct protosw 
*)&mptcp_subflow_protosw6
; 
 721         socket_unlock(*so
, 0); 
 723         DTRACE_MPTCP4(subflow__create
, struct mptses 
*, mpte
, 
 724             struct mptsub 
*, mpts
, int, dom
, int, error
); 
 730  * Close an MPTCP subflow socket. 
 732  * Note that this may be called on an embryonic subflow, and the only 
 733  * thing that is guaranteed valid is the protocol-user request. 
 736 mptcp_subflow_soclose(struct mptsub 
*mpts
, struct socket 
*so
) 
 738         MPTS_LOCK_ASSERT_HELD(mpts
); 
 741         VERIFY(so
->so_flags 
& SOF_MP_SUBFLOW
); 
 742         VERIFY((so
->so_state 
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
)); 
 744         /* restore protocol-user requests */ 
 745         VERIFY(mpts
->mpts_oprotosw 
!= NULL
); 
 746         so
->so_proto 
= mpts
->mpts_oprotosw
; 
 747         socket_unlock(so
, 0); 
 749         mpts
->mpts_socket 
= NULL
;       /* may already be NULL */ 
 751         DTRACE_MPTCP5(subflow__close
, struct mptsub 
*, mpts
, 
 753             struct sockbuf 
*, &so
->so_rcv
, 
 754             struct sockbuf 
*, &so
->so_snd
, 
 755             struct mptses 
*, mpts
->mpts_mpte
); 
 757         return (soclose(so
)); 
 761  * Connect an MPTCP subflow socket. 
 763  * This may be called inline as part of adding a subflow, or asynchronously 
 764  * by the thread (upon progressing to MPTCPF_JOIN_READY).  Note that in the 
 765  * pending connect case, the subflow socket may have been bound to an interface 
 766  * and/or a source IP address which may no longer be around by the time this 
 767  * routine is called; in that case the connect attempt will most likely fail. 
 770 mptcp_subflow_soconnectx(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
 775         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
 776         MPTS_LOCK_ASSERT_HELD(mpts
); 
 778         VERIFY((mpts
->mpts_flags 
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)) == 
 780         VERIFY(mpts
->mpts_socket 
!= NULL
); 
 781         so 
= mpts
->mpts_socket
; 
 782         af 
= mpts
->mpts_family
; 
 784         if (af 
== AF_INET 
|| af 
== AF_INET6
) { 
 785                 struct sockaddr_entry 
*dst_se
; 
 786                 char dbuf
[MAX_IPv6_STR_LEN
]; 
 788                 dst_se 
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
); 
 789                 VERIFY(dst_se 
!= NULL
); 
 791                 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx dst %s[%d] cid %d " 
 792                     "[pended %s]\n", __func__
, 
 793                     (u_int64_t
)VM_KERNEL_ADDRPERM(mpte
->mpte_mppcb
->mpp_socket
), 
 794                     inet_ntop(af
, ((af 
== AF_INET
) ? 
 795                     (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr 
: 
 796                     (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
), 
 797                     dbuf
, sizeof (dbuf
)), ((af 
== AF_INET
) ? 
 798                     ntohs(SIN(dst_se
->se_addr
)->sin_port
) : 
 799                     ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)), 
 801                     ((mpts
->mpts_flags 
& MPTSF_CONNECT_PENDING
) ? 
 805         mpts
->mpts_flags 
&= ~MPTSF_CONNECT_PENDING
; 
 808         mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpts
->mpts_connid
); 
 809         /* connect the subflow socket */ 
 810         error 
= soconnectxlocked(so
, &mpts
->mpts_src_sl
, &mpts
->mpts_dst_sl
, 
 811             mpts
->mpts_mpcr
.mpcr_proc
, mpts
->mpts_mpcr
.mpcr_ifscope
, 
 812             mpte
->mpte_associd
, NULL
, TCP_CONNREQF_MPTCP
, 
 813             &mpts
->mpts_mpcr
, sizeof (mpts
->mpts_mpcr
)); 
 814         socket_unlock(so
, 0); 
 816         DTRACE_MPTCP3(subflow__connect
, struct mptses 
*, mpte
, 
 817             struct mptsub 
*, mpts
, int, error
); 
 823  * MPTCP subflow socket receive routine, derived from soreceive(). 
 826 mptcp_subflow_soreceive(struct socket 
*so
, struct sockaddr 
**psa
, 
 827     struct uio 
*uio
, struct mbuf 
**mp0
, struct mbuf 
**controlp
, int *flagsp
) 
 830         int flags
, error 
= 0; 
 831         struct proc 
*p 
= current_proc(); 
 832         struct mbuf 
*m
, **mp 
= mp0
; 
 833         struct mbuf 
*nextrecord
; 
 836         VERIFY(so
->so_proto
->pr_flags 
& PR_CONNREQUIRED
); 
 838 #ifdef MORE_LOCKING_DEBUG 
 839         if (so
->so_usecount 
== 1) { 
 840                 panic("%s: so=%x no other reference on socket\n", __func__
, so
); 
 845          * We return all that is there in the subflow's socket receive buffer 
 846          * to the MPTCP layer, so we require that the caller passes in the 
 847          * expected parameters. 
 849         if (mp 
== NULL 
|| controlp 
!= NULL
) { 
 850                 socket_unlock(so
, 1); 
 857                 flags 
= *flagsp 
&~ MSG_EOR
; 
 861         if (flags 
& (MSG_PEEK
|MSG_OOB
|MSG_NEEDSA
|MSG_WAITALL
|MSG_WAITSTREAM
)) { 
 862                 socket_unlock(so
, 1); 
 865         flags 
|= (MSG_DONTWAIT
|MSG_NBIO
); 
 868          * If a recv attempt is made on a previously-accepted socket 
 869          * that has been marked as inactive (disconnected), reject 
 872         if (so
->so_flags 
& SOF_DEFUNCT
) { 
 873                 struct sockbuf 
*sb 
= &so
->so_rcv
; 
 876                 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n", 
 877                     __func__
, proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
), 
 878                     SOCK_DOM(so
), SOCK_TYPE(so
), error
)); 
 880                  * This socket should have been disconnected and flushed 
 881                  * prior to being returned from sodefunct(); there should 
 882                  * be no data on its receive list, so panic otherwise. 
 884                 if (so
->so_state 
& SS_DEFUNCT
) 
 885                         sb_empty_assert(sb
, __func__
); 
 886                 socket_unlock(so
, 1); 
 891          * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE) 
 892          * and if so just return to the caller.  This could happen when 
 893          * soreceive() is called by a socket upcall function during the 
 894          * time the socket is freed.  The socket buffer would have been 
 895          * locked across the upcall, therefore we cannot put this thread 
 896          * to sleep (else we will deadlock) or return EWOULDBLOCK (else 
 897          * we may livelock), because the lock on the socket buffer will 
 898          * only be released when the upcall routine returns to its caller. 
 899          * Because the socket has been officially closed, there can be 
 900          * no further read on it. 
 902          * A multipath subflow socket would have its SS_NOFDREF set by 
 903          * default, so check for SOF_MP_SUBFLOW socket flag; when the 
 904          * socket is closed for real, SOF_MP_SUBFLOW would be cleared. 
 906         if ((so
->so_state 
& (SS_NOFDREF 
| SS_CANTRCVMORE
)) == 
 907             (SS_NOFDREF 
| SS_CANTRCVMORE
) && !(so
->so_flags 
& SOF_MP_SUBFLOW
)) { 
 908                 socket_unlock(so
, 1); 
 913          * For consistency with soreceive() semantics, we need to obey 
 914          * SB_LOCK in case some other code path has locked the buffer. 
 916         error 
= sblock(&so
->so_rcv
, 0); 
 918                 socket_unlock(so
, 1); 
 922         m 
= so
->so_rcv
.sb_mb
; 
 925                  * Panic if we notice inconsistencies in the socket's 
 926                  * receive list; both sb_mb and sb_cc should correctly 
 927                  * reflect the contents of the list, otherwise we may 
 928                  * end up with false positives during select() or poll() 
 929                  * which could put the application in a bad state. 
 931                 SB_MB_CHECK(&so
->so_rcv
); 
 933                 if (so
->so_error 
!= 0) { 
 934                         error 
= so
->so_error
; 
 939                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
 943                 if (!(so
->so_state 
& (SS_ISCONNECTED
|SS_ISCONNECTING
))) { 
 949                  * MSG_DONTWAIT is implicitly defined and this routine will 
 950                  * never block, so return EWOULDBLOCK when there is nothing. 
 956         OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
); 
 957         SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1"); 
 958         SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1"); 
 961                 nextrecord 
= m
->m_nextpkt
; 
 962                 sbfree(&so
->so_rcv
, m
); 
 967                         so
->so_rcv
.sb_mb 
= m 
= m
->m_next
; 
 972                         m
->m_nextpkt 
= nextrecord
; 
 973                         if (nextrecord 
== NULL
) 
 974                                 so
->so_rcv
.sb_lastrecord 
= m
; 
 976                         m 
= so
->so_rcv
.sb_mb 
= nextrecord
; 
 977                         SB_EMPTY_FIXUP(&so
->so_rcv
); 
 979                 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2"); 
 980                 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2"); 
 983         DTRACE_MPTCP3(subflow__receive
, struct socket 
*, so
, 
 984             struct sockbuf 
*, &so
->so_rcv
, struct sockbuf 
*, &so
->so_snd
); 
 985         /* notify protocol that we drained all the data */ 
 986         if ((so
->so_proto
->pr_flags 
& PR_WANTRCVD
) && so
->so_pcb 
!= NULL
) 
 987                 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, flags
); 
 993         sbunlock(&so
->so_rcv
, FALSE
);   /* will unlock socket */ 
1000  * Prepare an MPTCP subflow socket for peeloff(2); basically undo 
1001  * the work done earlier when the subflow socket was created. 
1004 mptcp_subflow_sopeeloff(struct mptses 
*mpte
, struct mptsub 
*mpts
, 
1008         struct socket 
*mp_so
; 
1011         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
1012         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
1013         MPTS_LOCK_ASSERT_HELD(mpts
); 
1016         VERIFY(so
->so_flags 
& SOF_MP_SUBFLOW
); 
1017         VERIFY((so
->so_state 
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
)); 
1019         /* inherit MPTCP socket states */ 
1020         if (!(mp_so
->so_state 
& SS_NBIO
)) 
1021                 so
->so_state 
&= ~SS_NBIO
; 
1024          * At this point, the socket is not yet closed, as there is at least 
1025          * one outstanding usecount previously held by mpts_socket from 
1026          * socreate().  Atomically clear SOF_MP_SUBFLOW and SS_NOFDREF here. 
1028         so
->so_flags 
&= ~SOF_MP_SUBFLOW
; 
1029         so
->so_state 
&= ~SS_NOFDREF
; 
1030         so
->so_state 
&= ~SOF_MPTCP_TRUE
; 
1032         /* allow socket buffers to be compressed */ 
1033         so
->so_rcv
.sb_flags 
&= ~SB_NOCOMPRESS
; 
1034         so
->so_snd
.sb_flags 
&= ~SB_NOCOMPRESS
; 
1037          * Allow socket buffer auto sizing. 
1039          * This will increase the current 64k buffer size to whatever is best. 
1041         so
->so_rcv
.sb_flags 
|= SB_AUTOSIZE
; 
1042         so
->so_snd
.sb_flags 
|= SB_AUTOSIZE
; 
1044         /* restore protocol-user requests */ 
1045         VERIFY(mpts
->mpts_oprotosw 
!= NULL
); 
1046         so
->so_proto 
= mpts
->mpts_oprotosw
; 
1048         bzero(&smpo
, sizeof (smpo
)); 
1049         smpo
.mpo_flags 
|= MPOF_SUBFLOW_OK
; 
1050         smpo
.mpo_level 
= SOL_SOCKET
; 
1052         /* inherit SOF_NOSIGPIPE from parent MP socket */ 
1053         p 
= (mp_so
->so_flags 
& SOF_NOSIGPIPE
); 
1054         c 
= (so
->so_flags 
& SOF_NOSIGPIPE
); 
1055         smpo
.mpo_intval 
= ((p 
- c
) > 0) ? 1 : 0; 
1056         smpo
.mpo_name 
= SO_NOSIGPIPE
; 
1058                 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
); 
1060         /* inherit SOF_NOADDRAVAIL from parent MP socket */ 
1061         p 
= (mp_so
->so_flags 
& SOF_NOADDRAVAIL
); 
1062         c 
= (so
->so_flags 
& SOF_NOADDRAVAIL
); 
1063         smpo
.mpo_intval 
= ((p 
- c
) > 0) ? 1 : 0; 
1064         smpo
.mpo_name 
= SO_NOADDRERR
; 
1066                 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
); 
1068         /* inherit SO_KEEPALIVE from parent MP socket */ 
1069         p 
= (mp_so
->so_options 
& SO_KEEPALIVE
); 
1070         c 
= (so
->so_options 
& SO_KEEPALIVE
); 
1071         smpo
.mpo_intval 
= ((p 
- c
) > 0) ? 1 : 0; 
1072         smpo
.mpo_name 
= SO_KEEPALIVE
; 
1074                 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
); 
1076         /* unset TCP level default keepalive option */ 
1077         p 
= (intotcpcb(sotoinpcb(mp_so
)))->t_keepidle
; 
1078         c 
= (intotcpcb(sotoinpcb(so
)))->t_keepidle
; 
1079         smpo
.mpo_level 
= IPPROTO_TCP
; 
1080         smpo
.mpo_intval 
= 0; 
1081         smpo
.mpo_name 
= TCP_KEEPALIVE
; 
1083                 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
); 
1084         socket_unlock(so
, 0); 
1086         DTRACE_MPTCP5(subflow__peeloff
, struct mptses 
*, mpte
, 
1087             struct mptsub 
*, mpts
, struct socket 
*, so
, 
1088             struct sockbuf 
*, &so
->so_rcv
, struct sockbuf 
*, &so
->so_snd
); 
1092  * Establish an initial MPTCP connection (if first subflow and not yet 
1093  * connected), or add a subflow to an existing MPTCP connection. 
1096 mptcp_subflow_add(struct mptses 
*mpte
, struct mptsub 
*mpts
, 
1097     struct proc 
*p
, uint32_t ifscope
) 
1099         struct sockaddr_entry 
*se
, *src_se 
= NULL
, *dst_se 
= NULL
; 
1100         struct socket 
*mp_so
, *so 
= NULL
; 
1101         struct mptsub_connreq mpcr
; 
1102         struct mptcb 
*mp_tp
; 
1105         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
1106         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
1107         mp_tp 
= mpte
->mpte_mptcb
; 
1110         VERIFY(!(mpts
->mpts_flags 
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
))); 
1111         VERIFY(mpts
->mpts_mpte 
== NULL
); 
1112         VERIFY(mpts
->mpts_socket 
== NULL
); 
1113         VERIFY(mpts
->mpts_dst_sl 
!= NULL
); 
1114         VERIFY(mpts
->mpts_connid 
== CONNID_ANY
); 
1116         /* select source (if specified) and destination addresses */ 
1117         if ((error 
= in_selectaddrs(AF_UNSPEC
, &mpts
->mpts_src_sl
, &src_se
, 
1118             &mpts
->mpts_dst_sl
, &dst_se
)) != 0) 
1121         VERIFY(mpts
->mpts_dst_sl 
!= NULL 
&& dst_se 
!= NULL
); 
1122         VERIFY(src_se 
== NULL 
|| mpts
->mpts_src_sl 
!= NULL
); 
1123         af 
= mpts
->mpts_family 
= dst_se
->se_addr
->sa_family
; 
1124         VERIFY(src_se 
== NULL 
|| src_se
->se_addr
->sa_family 
== af
); 
1125         VERIFY(af 
== AF_INET 
|| af 
== AF_INET6
); 
1128          * If the source address is not specified, allocate a storage for 
1129          * it, so that later on we can fill it in with the actual source 
1130          * IP address chosen by the underlying layer for the subflow after 
1133         if (mpts
->mpts_src_sl 
== NULL
) { 
1135                     sockaddrlist_dup(mpts
->mpts_dst_sl
, M_WAITOK
); 
1136                 if (mpts
->mpts_src_sl 
== NULL
) { 
1140                 se 
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
); 
1141                 VERIFY(se 
!= NULL 
&& se
->se_addr 
!= NULL 
&& 
1142                     se
->se_addr
->sa_len 
== dst_se
->se_addr
->sa_len
); 
1143                 bzero(se
->se_addr
, se
->se_addr
->sa_len
); 
1144                 se
->se_addr
->sa_len 
= dst_se
->se_addr
->sa_len
; 
1145                 se
->se_addr
->sa_family 
= dst_se
->se_addr
->sa_family
; 
1148         /* create the subflow socket */ 
1149         if ((error 
= mptcp_subflow_socreate(mpte
, mpts
, af
, p
, &so
)) != 0) 
1153          * XXX: adi@apple.com 
1155          * This probably needs to be made smarter, but for now simply 
1156          * increment the counter, while avoiding 0 (CONNID_ANY) and 
1157          * -1 (CONNID_ALL).  Assume that an MPTCP connection will not 
1158          * live too long with (2^32)-2 subflow connection attempts. 
1160         mpte
->mpte_connid_last
++; 
1161         if (mpte
->mpte_connid_last 
== CONNID_ALL 
|| 
1162             mpte
->mpte_connid_last 
== CONNID_ANY
) 
1163                 mpte
->mpte_connid_last
++; 
1165         mpts
->mpts_connid 
= mpte
->mpte_connid_last
; 
1166         VERIFY(mpts
->mpts_connid 
!= CONNID_ANY 
&& 
1167             mpts
->mpts_connid 
!= CONNID_ALL
); 
1169         /* bind subflow socket to the specified interface */ 
1170         if (ifscope 
!= IFSCOPE_NONE
) { 
1172                 error 
= inp_bindif(sotoinpcb(so
), ifscope
, &mpts
->mpts_outif
); 
1174                         socket_unlock(so
, 0); 
1175                         (void) mptcp_subflow_soclose(mpts
, so
); 
1178                 VERIFY(mpts
->mpts_outif 
!= NULL
); 
1179                 mpts
->mpts_flags 
|= MPTSF_BOUND_IF
; 
1181                 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx bindif %s[%d] " 
1182                     "cid %d\n", __func__
, 
1183                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
1184                     mpts
->mpts_outif
->if_xname
, 
1185                     ifscope
, mpts
->mpts_connid
)); 
1186                 socket_unlock(so
, 0); 
1189         /* if source address and/or port is specified, bind to it */ 
1190         if (src_se 
!= NULL
) { 
1191                 struct sockaddr 
*sa 
= src_se
->se_addr
; 
1192                 uint32_t mpts_flags 
= 0; 
1197                         if (SIN(sa
)->sin_addr
.s_addr 
!= INADDR_ANY
) 
1198                                 mpts_flags 
|= MPTSF_BOUND_IP
; 
1199                         if ((lport 
= SIN(sa
)->sin_port
) != 0) 
1200                                 mpts_flags 
|= MPTSF_BOUND_PORT
; 
1204                         VERIFY(af 
== AF_INET6
); 
1205                         if (!IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa
)->sin6_addr
)) 
1206                                 mpts_flags 
|= MPTSF_BOUND_IP
; 
1207                         if ((lport 
= SIN6(sa
)->sin6_port
) != 0) 
1208                                 mpts_flags 
|= MPTSF_BOUND_PORT
; 
1213                 error 
= sobindlock(so
, sa
, 1);  /* will lock/unlock socket */ 
1215                         (void) mptcp_subflow_soclose(mpts
, so
); 
1218                 mpts
->mpts_flags 
|= mpts_flags
; 
1220                 if (af 
== AF_INET 
|| af 
== AF_INET6
) { 
1221                         char sbuf
[MAX_IPv6_STR_LEN
]; 
1223                         mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx bindip %s[%d] " 
1224                             "cid %d\n", __func__
, 
1225                             (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
1226                             inet_ntop(af
, ((af 
== AF_INET
) ? 
1227                             (void *)&SIN(sa
)->sin_addr
.s_addr 
: 
1228                             (void *)&SIN6(sa
)->sin6_addr
), sbuf
, sizeof (sbuf
)), 
1229                             ntohs(lport
), mpts
->mpts_connid
)); 
1234          * Insert the subflow into the list, and associate the MPTCP PCB 
1235          * as well as the the subflow socket.  From this point on, removing 
1236          * the subflow needs to be done via mptcp_subflow_del(). 
1238         TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
); 
1239         mpte
->mpte_numflows
++; 
1241         atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
); 
1242         mpts
->mpts_mpte 
= mpte
; 
1243         mpts
->mpts_socket 
= so
; 
1244         MPTS_ADDREF_LOCKED(mpts
);       /* for being in MPTCP subflow list */ 
1245         MPTS_ADDREF_LOCKED(mpts
);       /* for subflow socket */ 
1246         mp_so
->so_usecount
++;           /* for subflow socket */ 
1248         /* register for subflow socket read/write events */ 
1249         (void) sock_setupcalls(so
, mptcp_subflow_rupcall
, mpts
, 
1250             mptcp_subflow_wupcall
, mpts
); 
1253          * Register for subflow socket control events; ignore 
1254          * SO_FILT_HINT_CONNINFO_UPDATED from below since we 
1255          * will generate it here. 
1257         (void) sock_catchevents(so
, mptcp_subflow_eupcall
, mpts
, 
1258             SO_FILT_HINT_CONNRESET 
| SO_FILT_HINT_CANTRCVMORE 
| 
1259             SO_FILT_HINT_CANTSENDMORE 
| SO_FILT_HINT_TIMEOUT 
| 
1260             SO_FILT_HINT_NOSRCADDR 
| SO_FILT_HINT_IFDENIED 
| 
1261             SO_FILT_HINT_SUSPEND 
| SO_FILT_HINT_RESUME 
| 
1262             SO_FILT_HINT_CONNECTED 
| SO_FILT_HINT_DISCONNECTED 
| 
1263             SO_FILT_HINT_MPFAILOVER 
| SO_FILT_HINT_MPSTATUS 
| 
1264             SO_FILT_HINT_MUSTRST
); 
1267         VERIFY(!(mpts
->mpts_flags 
& 
1268             (MPTSF_CONNECTING
|MPTSF_CONNECTED
|MPTSF_CONNECT_PENDING
))); 
1270         bzero(&mpcr
, sizeof (mpcr
)); 
1272         mpcr
.mpcr_ifscope 
= ifscope
; 
1274          * Indicate to the TCP subflow whether or not it should establish 
1275          * the initial MPTCP connection, or join an existing one.  Fill 
1276          * in the connection request structure with additional info needed 
1277          * by the underlying TCP (to be used in the TCP options, etc.) 
1280         if (mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED 
&& mpte
->mpte_numflows 
== 1) { 
1281                 if (mp_tp
->mpt_state 
== MPTCPS_CLOSED
) { 
1282                         mp_tp
->mpt_localkey 
= mptcp_reserve_key(); 
1283                         mptcp_conn_properties(mp_tp
); 
1286                 soisconnecting(mp_so
); 
1287                 mpcr
.mpcr_type 
= MPTSUB_CONNREQ_MP_ENABLE
; 
1289                 if (!(mp_tp
->mpt_flags 
& MPTCPF_JOIN_READY
)) 
1290                         mpts
->mpts_flags 
|= MPTSF_CONNECT_PENDING
; 
1292                 mpcr
.mpcr_type 
= MPTSUB_CONNREQ_MP_ADD
; 
1295         mpts
->mpts_mpcr 
= mpcr
; 
1296         mpts
->mpts_flags 
|= MPTSF_CONNECTING
; 
1298         if (af 
== AF_INET 
|| af 
== AF_INET6
) { 
1299                 char dbuf
[MAX_IPv6_STR_LEN
]; 
1301                 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx dst %s[%d] cid %d " 
1302                     "[pending %s]\n", __func__
, 
1303                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
1304                     inet_ntop(af
, ((af 
== AF_INET
) ? 
1305                     (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr 
: 
1306                     (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
), 
1307                     dbuf
, sizeof (dbuf
)), ((af 
== AF_INET
) ? 
1308                     ntohs(SIN(dst_se
->se_addr
)->sin_port
) : 
1309                     ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)), 
1311                     ((mpts
->mpts_flags 
& MPTSF_CONNECT_PENDING
) ? 
1315         /* connect right away if first attempt, or if join can be done now */ 
1316         if (!(mpts
->mpts_flags 
& MPTSF_CONNECT_PENDING
)) 
1317                 error 
= mptcp_subflow_soconnectx(mpte
, mpts
); 
1322                 soevent(mp_so
, SO_FILT_HINT_LOCKED 
| 
1323                     SO_FILT_HINT_CONNINFO_UPDATED
); 
1329 mptcp_delete_ok(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
1332         struct mptcb 
*mp_tp 
= NULL
; 
1334         MPTE_LOCK_ASSERT_HELD(mpte
); 
1335         mp_tp 
= mpte
->mpte_mptcb
; 
1336         VERIFY(mp_tp 
!= NULL
); 
1339         if ((mpts
->mpts_soerror 
== 0) && 
1340             (mpts
->mpts_flags 
& MPTSF_ACTIVE
) && 
1341             (mp_tp
->mpt_state 
!= MPTCPS_CLOSED
) && 
1342             (mp_tp
->mpt_state 
<= MPTCPS_TIME_WAIT
)) 
1350  * Delete/remove a subflow from an MPTCP.  The underlying subflow socket 
1351  * will no longer be accessible after a subflow is deleted, thus this 
1352  * should occur only after the subflow socket has been disconnected. 
1353  * If peeloff(2) is called, leave the socket open. 
1356 mptcp_subflow_del(struct mptses 
*mpte
, struct mptsub 
*mpts
, boolean_t close
) 
1358         struct socket 
*mp_so
, *so
; 
1360         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
1361         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
1364         so 
= mpts
->mpts_socket
; 
1367         mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d " 
1368             "[close %s] %d %x\n", __func__
, 
1369             (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
1371             mp_so
->so_retaincnt
, mpts
->mpts_connid
, 
1372             (close 
? "YES" : "NO"), mpts
->mpts_soerror
, 
1375         VERIFY(mpts
->mpts_mpte 
== mpte
); 
1376         VERIFY(mpts
->mpts_connid 
!= CONNID_ANY 
&& 
1377             mpts
->mpts_connid 
!= CONNID_ALL
); 
1379         VERIFY(mpts
->mpts_flags 
& MPTSF_ATTACHED
); 
1380         atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
); 
1381         TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
); 
1382         VERIFY(mpte
->mpte_numflows 
!= 0); 
1383         mpte
->mpte_numflows
--; 
1386          * Drop references held by this subflow socket; there 
1387          * will be no further upcalls made from this point. 
1389         (void) sock_setupcalls(so
, NULL
, NULL
, NULL
, NULL
); 
1390         (void) sock_catchevents(so
, NULL
, NULL
, 0); 
1391         mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
); 
1393                 (void) mptcp_subflow_soclose(mpts
, so
); 
1395         VERIFY(mp_so
->so_usecount 
!= 0); 
1396         mp_so
->so_usecount
--;           /* for subflow socket */ 
1397         mpts
->mpts_mpte 
= NULL
; 
1398         mpts
->mpts_socket 
= NULL
; 
1401         MPTS_REMREF(mpts
);              /* for MPTCP subflow list */ 
1402         MPTS_REMREF(mpts
);              /* for subflow socket */ 
1404         soevent(mp_so
, SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_CONNINFO_UPDATED
); 
1408  * Disconnect a subflow socket. 
1411 mptcp_subflow_disconnect(struct mptses 
*mpte
, struct mptsub 
*mpts
, 
1415         struct mptcb 
*mp_tp
; 
1418         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
1419         MPTS_LOCK_ASSERT_HELD(mpts
); 
1421         VERIFY(mpts
->mpts_mpte 
== mpte
); 
1422         VERIFY(mpts
->mpts_socket 
!= NULL
); 
1423         VERIFY(mpts
->mpts_connid 
!= CONNID_ANY 
&& 
1424             mpts
->mpts_connid 
!= CONNID_ALL
); 
1426         if (mpts
->mpts_flags 
& (MPTSF_DISCONNECTING
|MPTSF_DISCONNECTED
)) 
1429         mpts
->mpts_flags 
|= MPTSF_DISCONNECTING
; 
1432          * If this is coming from disconnectx(2) or issued as part of 
1433          * closing the MPTCP socket, the subflow shouldn't stick around. 
1434          * Otherwise let it linger around in case the upper layers need 
1435          * to retrieve its conninfo. 
1438                 mpts
->mpts_flags 
|= MPTSF_DELETEOK
; 
1440         so 
= mpts
->mpts_socket
; 
1441         mp_tp 
= mpte
->mpte_mptcb
; 
1443         if (mp_tp
->mpt_state 
> MPTCPS_ESTABLISHED
) 
1448         if (!(so
->so_state 
& (SS_ISDISCONNECTING 
| SS_ISDISCONNECTED
)) && 
1449             (so
->so_state 
& SS_ISCONNECTED
)) { 
1450                 mptcplog((LOG_DEBUG
, "%s: cid %d fin %d [linger %s]\n", 
1451                     __func__
, mpts
->mpts_connid
, send_dfin
, 
1452                     (deleteok 
? "NO" : "YES"))); 
1455                         mptcp_send_dfin(so
); 
1456                 (void) soshutdownlock(so
, SHUT_RD
); 
1457                 (void) soshutdownlock(so
, SHUT_WR
); 
1458                 (void) sodisconnectlocked(so
); 
1460         socket_unlock(so
, 0); 
1462          * Generate a disconnect event for this subflow socket, in case 
1463          * the lower layer doesn't do it; this is needed because the 
1464          * subflow socket deletion relies on it.  This will also end up 
1465          * generating SO_FILT_HINT_CONNINFO_UPDATED on the MPTCP socket; 
1466          * we cannot do that here because subflow lock is currently held. 
1468         mptcp_subflow_eupcall(so
, mpts
, SO_FILT_HINT_DISCONNECTED
); 
1472  * Subflow socket read upcall. 
1474  * Called when the associated subflow socket posted a read event.  The subflow 
1475  * socket lock has been released prior to invoking the callback.  Note that the 
1476  * upcall may occur synchronously as a result of MPTCP performing an action on 
1477  * it, or asynchronously as a result of an event happening at the subflow layer. 
1478  * Therefore, to maintain lock ordering, the only lock that can be acquired 
1479  * here is the thread lock, for signalling purposes. 
1482 mptcp_subflow_rupcall(struct socket 
*so
, void *arg
, int waitf
) 
1484 #pragma unused(so, waitf) 
1485         struct mptsub 
*mpts 
= arg
; 
1486         struct mptses 
*mpte 
= mpts
->mpts_mpte
; 
1488         VERIFY(mpte 
!= NULL
); 
1490         lck_mtx_lock(&mpte
->mpte_thread_lock
); 
1491         mptcp_thread_signal_locked(mpte
); 
1492         lck_mtx_unlock(&mpte
->mpte_thread_lock
); 
1496  * Subflow socket input. 
1498  * Called in the context of the MPTCP thread, for reading data from the 
1499  * underlying subflow socket and delivering it to MPTCP. 
1502 mptcp_subflow_input(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
1504         struct mbuf 
*m 
= NULL
; 
1507         struct mptsub 
*mpts_alt 
= NULL
; 
1509         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
1510         MPTS_LOCK_ASSERT_HELD(mpts
); 
1512         DTRACE_MPTCP2(subflow__input
, struct mptses 
*, mpte
,  
1513             struct mptsub 
*, mpts
); 
1515         if (!(mpts
->mpts_flags 
& MPTSF_CONNECTED
)) 
1518         so 
= mpts
->mpts_socket
; 
1520         error 
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
); 
1521         if (error 
!= 0 && error 
!= EWOULDBLOCK
) { 
1522                 mptcplog((LOG_ERR
, "%s: cid %d error %d\n", 
1523                     __func__
, mpts
->mpts_connid
, error
)); 
1525                 mpts_alt 
= mptcp_get_subflow(mpte
, mpts
); 
1526                 if (mpts_alt 
== NULL
) { 
1527                         mptcplog((LOG_ERR
, "%s: no alt path cid %d\n", 
1528                             __func__
, mpts
->mpts_connid
)); 
1529                         mpte
->mpte_mppcb
->mpp_socket
->so_error 
= error
; 
1532         } else if (error 
== 0) { 
1533                 mptcplog3((LOG_DEBUG
, "%s: cid %d \n", 
1534                     __func__
, mpts
->mpts_connid
)); 
1537         /* In fallback, make sure to accept data on all but one subflow */ 
1538         if ((mpts
->mpts_flags 
& MPTSF_MP_DEGRADED
) && 
1539             (!(mpts
->mpts_flags 
& MPTSF_ACTIVE
))) { 
1546                  * Release subflow lock since this may trigger MPTCP to send, 
1547                  * possibly on a different subflow.  An extra reference has 
1548                  * been held on the subflow by the MPTCP thread before coming 
1549                  * here, so we can be sure that it won't go away, in the event 
1550                  * the MP socket lock gets released. 
1553                 mptcp_input(mpte
, m
); 
1559  * Subflow socket write upcall. 
1561  * Called when the associated subflow socket posted a read event.  The subflow 
1562  * socket lock has been released prior to invoking the callback.  Note that the 
1563  * upcall may occur synchronously as a result of MPTCP performing an action on 
1564  * it, or asynchronously as a result of an event happening at the subflow layer. 
1565  * Therefore, to maintain lock ordering, the only lock that can be acquired 
1566  * here is the thread lock, for signalling purposes. 
1569 mptcp_subflow_wupcall(struct socket 
*so
, void *arg
, int waitf
) 
1571 #pragma unused(so, waitf) 
1572         struct mptsub 
*mpts 
= arg
; 
1573         struct mptses 
*mpte 
= mpts
->mpts_mpte
; 
1575         VERIFY(mpte 
!= NULL
); 
1577         lck_mtx_lock(&mpte
->mpte_thread_lock
); 
1578         mptcp_thread_signal_locked(mpte
); 
1579         lck_mtx_unlock(&mpte
->mpte_thread_lock
); 
1583  * Subflow socket output. 
1585  * Called for sending data from MPTCP to the underlying subflow socket. 
1588 mptcp_subflow_output(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
1590         struct socket 
*mp_so
, *so
; 
1591         size_t sb_cc 
= 0, tot_sent 
= 0; 
1594         u_int64_t mpt_dsn 
= 0; 
1595         struct mptcb 
*mp_tp 
= mpte
->mpte_mptcb
; 
1596         struct mbuf 
*mpt_mbuf 
= NULL
; 
1597         unsigned int off 
= 0; 
1599         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
1600         MPTS_LOCK_ASSERT_HELD(mpts
); 
1601         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
1602         so 
= mpts
->mpts_socket
; 
1604         DTRACE_MPTCP2(subflow__output
, struct mptses 
*, mpte
,  
1605             struct mptsub 
*, mpts
); 
1607         /* subflow socket is suspended? */ 
1608         if (mpts
->mpts_flags 
& MPTSF_SUSPENDED
) { 
1609                 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx cid %d is flow " 
1610                     "controlled\n", __func__
, 
1611                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
)); 
1615         /* subflow socket is not MPTCP capable? */ 
1616         if (!(mpts
->mpts_flags 
& MPTSF_MP_CAPABLE
) && 
1617             !(mpts
->mpts_flags 
& MPTSF_MP_DEGRADED
)) { 
1618                 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx cid %d not " 
1619                     "MPTCP capable\n", __func__
, 
1620                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
)); 
1624         /* Remove Addr Option is not sent reliably as per I-D */ 
1625         if (mpte
->mpte_flags 
& MPTE_SND_REM_ADDR
) { 
1626                 struct tcpcb 
*tp 
= intotcpcb(sotoinpcb(so
)); 
1627                 tp
->t_rem_aid 
= mpte
->mpte_lost_aid
; 
1628                 if (mptcp_remaddr_enable
) 
1629                         tp
->t_mpflags 
|= TMPF_SND_REM_ADDR
; 
1630                 mpte
->mpte_flags 
&= ~MPTE_SND_REM_ADDR
; 
1634          * The mbuf chains containing the metadata (as well as pointing to 
1635          * the user data sitting at the MPTCP output queue) would then be 
1636          * sent down to the subflow socket. 
1638          * Some notes on data sequencing: 
1640          *   a. Each mbuf must be a M_PKTHDR. 
1641          *   b. MPTCP metadata is stored in the mptcp_pktinfo structure 
1642          *      in the mbuf pkthdr structure. 
1643          *   c. Each mbuf containing the MPTCP metadata must have its 
1644          *      pkt_flags marked with the PKTF_MPTCP flag. 
1647         /* First, drop acknowledged data */ 
1648         sb_mb 
= mp_so
->so_snd
.sb_mb
; 
1649         if (sb_mb 
== NULL
) { 
1653         VERIFY(sb_mb
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
); 
1656         while (mpt_mbuf 
&& mpt_mbuf
->m_pkthdr
.mp_rlen 
== 0) { 
1657                 mpt_mbuf 
= mpt_mbuf
->m_next
; 
1659         if (mpt_mbuf 
&& (mpt_mbuf
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
)) { 
1660                 mpt_dsn 
= mpt_mbuf
->m_pkthdr
.mp_dsn
; 
1666         if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) { 
1668                 len 
= mp_tp
->mpt_snduna 
- mpt_dsn
; 
1669                 sbdrop(&mp_so
->so_snd
, len
); 
1674          * In degraded mode, we don't receive data acks, so force free 
1675          * mbufs less than snd_nxt 
1677         mpt_dsn 
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
; 
1678         if ((mpts
->mpts_flags 
& MPTSF_MP_DEGRADED
) && 
1679             MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_sndnxt
)) { 
1681                 len 
= mp_tp
->mpt_sndnxt 
- mpt_dsn
; 
1682                 sbdrop(&mp_so
->so_snd
, len
); 
1683                 mp_tp
->mpt_snduna 
= mp_tp
->mpt_sndnxt
; 
1687          * Adjust the subflow's notion of next byte to send based on 
1688          * the last unacknowledged byte 
1690         if (MPTCP_SEQ_LT(mpts
->mpts_sndnxt
, mp_tp
->mpt_snduna
)) { 
1691                 mpts
->mpts_sndnxt 
= mp_tp
->mpt_snduna
; 
1695          * Adjust the top level notion of next byte used for retransmissions 
1698         if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) { 
1699                 mp_tp
->mpt_sndnxt 
= mp_tp
->mpt_snduna
; 
1703         /* Now determine the offset from which to start transmitting data */ 
1704         sb_mb 
= mp_so
->so_snd
.sb_mb
; 
1705         sb_cc 
= mp_so
->so_snd
.sb_cc
; 
1706         if (sb_mb 
== NULL
) { 
1710         if (MPTCP_SEQ_LT(mpts
->mpts_sndnxt
, mp_tp
->mpt_sndmax
)) { 
1711                 off 
= mpts
->mpts_sndnxt 
- mp_tp
->mpt_snduna
; 
1720         mpt_dsn 
= mpt_mbuf
->m_pkthdr
.mp_dsn
; 
1722         while (mpt_mbuf 
&& ((mpt_mbuf
->m_pkthdr
.mp_rlen 
== 0) || 
1723             (mpt_mbuf
->m_pkthdr
.mp_rlen 
<= off
))) { 
1724                 off 
-= mpt_mbuf
->m_pkthdr
.mp_rlen
; 
1725                 mpt_mbuf 
= mpt_mbuf
->m_next
; 
1726                 mpt_dsn 
= mpt_mbuf
->m_pkthdr
.mp_dsn
; 
1728         if ((mpts
->mpts_connid 
== 2) || (mpts
->mpts_flags 
& MPTSF_MP_DEGRADED
)) 
1729                 mptcplog((LOG_INFO
, "%s: snduna = %llu off = %d id = %d" 
1732                     mp_tp
->mpt_snduna
, off
, mpts
->mpts_connid
, 
1733                     mpts
->mpts_sndnxt
)); 
1735         VERIFY(mpt_mbuf 
&& (mpt_mbuf
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
)); 
1737         while (tot_sent 
< sb_cc
) { 
1739                 size_t mlen
, len 
= 0; 
1741                 mlen 
= mpt_mbuf
->m_pkthdr
.mp_rlen
; 
1747                         panic("%s: unexpected %lu %lu \n", __func__
, 
1751                 m 
= m_copym_mode(mpt_mbuf
, off
, mlen
, M_DONTWAIT
, 
1758                 /* Create a DSN mapping for the data (m_copym does it) */ 
1759                 mpt_dsn 
= mpt_mbuf
->m_pkthdr
.mp_dsn
; 
1760                 m
->m_pkthdr
.pkt_flags 
|= PKTF_MPTCP
; 
1761                 m
->m_pkthdr
.pkt_flags 
&= ~PKTF_MPSO
; 
1762                 m
->m_pkthdr
.mp_dsn 
= mpt_dsn 
+ off
; 
1763                 m
->m_pkthdr
.mp_rseq 
= mpts
->mpts_rel_seq
; 
1764                 m
->m_pkthdr
.mp_rlen 
= mlen
; 
1765                 mpts
->mpts_rel_seq 
+= mlen
; 
1766                 m
->m_pkthdr
.len 
= mlen
; 
1768                 /* last contiguous mapping is stored for error cases */ 
1769                 if (mpts
->mpts_lastmap
.mptsl_dsn 
+ 
1770                     mpts
->mpts_lastmap
.mptsl_len 
== mpt_dsn
) { 
1771                         mpts
->mpts_lastmap
.mptsl_len 
+= tot_sent
; 
1772                 } else if (MPTCP_SEQ_LT((mpts
->mpts_lastmap
.mptsl_dsn 
+ 
1773                     mpts
->mpts_lastmap
.mptsl_len
), mpt_dsn
)) { 
1774                         if (m
->m_pkthdr
.mp_dsn 
== 0) 
1775                                 panic("%s %llu", __func__
, mpt_dsn
); 
1776                         mpts
->mpts_lastmap
.mptsl_dsn 
= m
->m_pkthdr
.mp_dsn
; 
1777                         mpts
->mpts_lastmap
.mptsl_sseq 
= m
->m_pkthdr
.mp_rseq
; 
1778                         mpts
->mpts_lastmap
.mptsl_len 
= m
->m_pkthdr
.mp_rlen
; 
1781                 error 
= sock_sendmbuf(so
, NULL
, m
, 0, &len
); 
1782                 DTRACE_MPTCP7(send
, struct mbuf 
*, m
, struct socket 
*, so
,  
1783                     struct sockbuf 
*, &so
->so_rcv
, 
1784                     struct sockbuf 
*, &so
->so_snd
, 
1785                     struct mptses 
*, mpte
, struct mptsub 
*, mpts
, 
1788                         mptcplog((LOG_ERR
, "%s: len = %zd error = %d \n", 
1789                             __func__
, len
, error
)); 
1792                 mpts
->mpts_sndnxt 
+= mlen
; 
1794                 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mpts
->mpts_sndnxt
)) { 
1795                         if (MPTCP_DATASEQ_HIGH32(mpts
->mpts_sndnxt
) > 
1796                             MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
)) 
1797                                 mp_tp
->mpt_flags 
|= MPTCPF_SND_64BITDSN
; 
1798                         mp_tp
->mpt_sndnxt 
= mpts
->mpts_sndnxt
; 
1802                         mptcplog((LOG_ERR
, "%s: cid %d wrote %d " 
1803                             "(expected %d)\n", __func__
, 
1804                             mpts
->mpts_connid
, len
, mlen
)); 
1808                 mpt_mbuf 
= mpt_mbuf
->m_next
; 
1811         if (error 
!= 0 && error 
!= EWOULDBLOCK
) { 
1812                 mptcplog((LOG_ERR
, "MPTCP ERROR %s: cid %d error %d\n", 
1813                     __func__
, mpts
->mpts_connid
, error
)); 
1815                 if ((mpts
->mpts_connid 
== 2) || 
1816                     (mpts
->mpts_flags 
& MPTSF_MP_DEGRADED
)) 
1817                         mptcplog((LOG_DEBUG
, "%s: cid %d wrote %d %d\n", 
1818                             __func__
, mpts
->mpts_connid
, tot_sent
, 
1821                 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
); 
1829  * Subflow socket control event upcall. 
1831  * Called when the associated subflow socket posted one or more control events. 
1832  * The subflow socket lock has been released prior to invoking the callback. 
1833  * Note that the upcall may occur synchronously as a result of MPTCP performing 
1834  * an action on it, or asynchronously as a result of an event happening at the 
1835  * subflow layer.  Therefore, to maintain lock ordering, the only lock that can 
1836  * be acquired here is the thread lock, for signalling purposes. 
1839 mptcp_subflow_eupcall(struct socket 
*so
, void *arg
, uint32_t events
) 
1842         struct mptsub 
*mpts 
= arg
; 
1843         struct mptses 
*mpte 
= mpts
->mpts_mpte
; 
1845         VERIFY(mpte 
!= NULL
); 
1847         lck_mtx_lock(&mpte
->mpte_thread_lock
); 
1848         atomic_bitset_32(&mpts
->mpts_evctl
, events
); 
1849         mptcp_thread_signal_locked(mpte
); 
1850         lck_mtx_unlock(&mpte
->mpte_thread_lock
); 
1854  * Subflow socket control events. 
1856  * Called for handling events related to the underlying subflow socket. 
1859 mptcp_subflow_events(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
1862         ev_ret_t ret 
= MPTS_EVRET_OK
; 
1864         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
1865         MPTS_LOCK_ASSERT_HELD(mpts
); 
1867         /* bail if there's nothing to process */ 
1868         if ((events 
= mpts
->mpts_evctl
) == 0) 
1871         if (events 
& (SO_FILT_HINT_CONNRESET
|SO_FILT_HINT_MUSTRST
| 
1872             SO_FILT_HINT_CANTRCVMORE
|SO_FILT_HINT_CANTSENDMORE
| 
1873             SO_FILT_HINT_TIMEOUT
|SO_FILT_HINT_NOSRCADDR
| 
1874             SO_FILT_HINT_IFDENIED
|SO_FILT_HINT_SUSPEND
| 
1875             SO_FILT_HINT_DISCONNECTED
)) { 
1876                 events 
|= SO_FILT_HINT_MPFAILOVER
; 
1879         DTRACE_MPTCP3(subflow__events
, struct mptses 
*, mpte
, 
1880             struct mptsub 
*, mpts
, uint32_t, events
); 
1882         mptcplog2((LOG_DEBUG
, "%s: cid %d events=%b\n", __func__
, 
1883             mpts
->mpts_connid
, events
, SO_FILT_HINT_BITS
)); 
1885         if ((events 
& SO_FILT_HINT_MPFAILOVER
) && (ret 
>= MPTS_EVRET_OK
)) { 
1886                 ev_ret_t error 
= mptcp_subflow_failover_ev(mpte
, mpts
); 
1887                 events 
&= ~SO_FILT_HINT_MPFAILOVER
; 
1888                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1890         if ((events 
& SO_FILT_HINT_CONNRESET
) && (ret 
>= MPTS_EVRET_OK
)) { 
1891                 ev_ret_t error 
= mptcp_subflow_connreset_ev(mpte
, mpts
); 
1892                 events 
&= ~SO_FILT_HINT_CONNRESET
; 
1893                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1895         if ((events 
& SO_FILT_HINT_MUSTRST
) && (ret 
>= MPTS_EVRET_OK
)) { 
1896                 ev_ret_t error 
= mptcp_subflow_mustrst_ev(mpte
, mpts
); 
1897                 events 
&= ~SO_FILT_HINT_MUSTRST
; 
1898                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1900         if ((events 
& SO_FILT_HINT_CANTRCVMORE
) && (ret 
>= MPTS_EVRET_OK
)) { 
1901                 ev_ret_t error 
= mptcp_subflow_cantrcvmore_ev(mpte
, mpts
); 
1902                 events 
&= ~SO_FILT_HINT_CANTRCVMORE
; 
1903                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1905         if ((events 
& SO_FILT_HINT_CANTSENDMORE
) && (ret 
>= MPTS_EVRET_OK
)) { 
1906                 ev_ret_t error 
= mptcp_subflow_cantsendmore_ev(mpte
, mpts
); 
1907                 events 
&= ~SO_FILT_HINT_CANTSENDMORE
; 
1908                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1910         if ((events 
& SO_FILT_HINT_TIMEOUT
) && (ret 
>= MPTS_EVRET_OK
)) { 
1911                 ev_ret_t error 
= mptcp_subflow_timeout_ev(mpte
, mpts
); 
1912                 events 
&= ~SO_FILT_HINT_TIMEOUT
; 
1913                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1915         if ((events 
& SO_FILT_HINT_NOSRCADDR
) && (ret 
>= MPTS_EVRET_OK
)) { 
1916                 ev_ret_t error 
= mptcp_subflow_nosrcaddr_ev(mpte
, mpts
); 
1917                 events 
&= ~SO_FILT_HINT_NOSRCADDR
; 
1918                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1920         if ((events 
& SO_FILT_HINT_IFDENIED
) && (ret 
>= MPTS_EVRET_OK
)) { 
1921                 ev_ret_t error 
= mptcp_subflow_ifdenied_ev(mpte
, mpts
); 
1922                 events 
&= ~SO_FILT_HINT_IFDENIED
; 
1923                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1925         if ((events 
& SO_FILT_HINT_SUSPEND
) && (ret 
>= MPTS_EVRET_OK
)) { 
1926                 ev_ret_t error 
= mptcp_subflow_suspend_ev(mpte
, mpts
); 
1927                 events 
&= ~SO_FILT_HINT_SUSPEND
; 
1928                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1930         if ((events 
& SO_FILT_HINT_RESUME
) && (ret 
>= MPTS_EVRET_OK
)) { 
1931                 ev_ret_t error 
= mptcp_subflow_resume_ev(mpte
, mpts
); 
1932                 events 
&= ~SO_FILT_HINT_RESUME
; 
1933                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1935         if ((events 
& SO_FILT_HINT_CONNECTED
) && (ret 
>= MPTS_EVRET_OK
)) { 
1936                 ev_ret_t error 
= mptcp_subflow_connected_ev(mpte
, mpts
); 
1937                 events 
&= ~SO_FILT_HINT_CONNECTED
; 
1938                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1940         if ((events 
& SO_FILT_HINT_MPSTATUS
) && (ret 
>= MPTS_EVRET_OK
)) { 
1941                 ev_ret_t error 
= mptcp_subflow_mpstatus_ev(mpte
, mpts
); 
1942                 events 
&= ~SO_FILT_HINT_MPSTATUS
; 
1943                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1945         if ((events 
& SO_FILT_HINT_DISCONNECTED
) && (ret 
>= MPTS_EVRET_OK
)) { 
1946                 ev_ret_t error 
= mptcp_subflow_disconnected_ev(mpte
, mpts
); 
1947                 events 
&= ~SO_FILT_HINT_DISCONNECTED
; 
1948                 ret 
= ((error 
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
); 
1951          * We should be getting only events specified via sock_catchevents(), 
1952          * so loudly complain if we have any unprocessed one(s). 
1954         if (events 
!= 0 || ret 
< MPTS_EVRET_OK
) { 
1955                 mptcplog((LOG_ERR
, "%s%s: cid %d evret %s (%d)" 
1956                     " unhandled events=%b\n", 
1957                     (events 
!= 0) ? "MPTCP_ERROR " : "",  
1958                     __func__
, mpts
->mpts_connid
, 
1959                     mptcp_evret2str(ret
), ret
, events
, SO_FILT_HINT_BITS
)); 
1962         /* clear the ones we've processed */ 
1963         atomic_bitclear_32(&mpts
->mpts_evctl
, ~events
); 
1969  * Handle SO_FILT_HINT_CONNRESET subflow socket event. 
1972 mptcp_subflow_connreset_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
1974         struct socket 
*mp_so
, *so
; 
1975         struct mptcb 
*mp_tp
; 
1978         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
1979         MPTS_LOCK_ASSERT_HELD(mpts
); 
1980         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
1981         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
1982         mp_tp 
= mpte
->mpte_mptcb
; 
1983         so 
= mpts
->mpts_socket
; 
1985         linger 
= (!(mpts
->mpts_flags 
& MPTSF_DELETEOK
) && 
1986             !(mp_so
->so_flags 
& SOF_PCBCLEARING
)); 
1988         mptcplog((LOG_DEBUG
, "%s: cid %d [linger %s]\n", __func__
, 
1989             mpts
->mpts_connid
, (linger 
? "YES" : "NO"))); 
1991         if (mpts
->mpts_soerror 
== 0) 
1992                 mpts
->mpts_soerror 
= ECONNREFUSED
; 
1995          * We got a TCP RST for this subflow connection. 
1997          * Right now, we simply propagate ECONNREFUSED to the MPTCP socket 
1998          * client if the MPTCP connection has not been established. Otherwise 
1999          * we close the socket. 
2001         mptcp_subflow_disconnect(mpte
, mpts
, !linger
); 
2004         if (mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED
) { 
2005                 mp_so
->so_error 
= ECONNREFUSED
; 
2010          * Keep the subflow socket around, unless the MPTCP socket has 
2011          * been detached or the subflow has been disconnected explicitly, 
2012          * in which case it should be deleted right away. 
2014         return (linger 
? MPTS_EVRET_OK 
: MPTS_EVRET_DELETE
); 
2018  * Handle SO_FILT_HINT_CANTRCVMORE subflow socket event. 
2021 mptcp_subflow_cantrcvmore_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2025         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2026         MPTS_LOCK_ASSERT_HELD(mpts
); 
2028         so 
= mpts
->mpts_socket
; 
2030         mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
)); 
2033          * We got a FIN for this subflow connection.  This subflow socket 
2034          * is no longer available for receiving data; 
2035          * The FIN may arrive with data. The data is handed up to the 
2036          * mptcp socket and the subflow is disconnected. 
2039         return (MPTS_EVRET_OK
); /* keep the subflow socket around */ 
2043  * Handle SO_FILT_HINT_CANTSENDMORE subflow socket event. 
2046 mptcp_subflow_cantsendmore_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2050         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2051         MPTS_LOCK_ASSERT_HELD(mpts
); 
2053         so 
= mpts
->mpts_socket
; 
2055         mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
)); 
2056         return (MPTS_EVRET_OK
); /* keep the subflow socket around */ 
2060  * Handle SO_FILT_HINT_TIMEOUT subflow socket event. 
2063 mptcp_subflow_timeout_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2065         struct socket 
*mp_so
, *so
; 
2066         struct mptcb 
*mp_tp
; 
2069         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2070         MPTS_LOCK_ASSERT_HELD(mpts
); 
2071         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
2072         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2073         mp_tp 
= mpte
->mpte_mptcb
; 
2074         so 
= mpts
->mpts_socket
; 
2076         linger 
= (!(mpts
->mpts_flags 
& MPTSF_DELETEOK
) && 
2077             !(mp_so
->so_flags 
& SOF_PCBCLEARING
)); 
2079         mptcplog((LOG_NOTICE
, "%s: cid %d [linger %s]\n", __func__
, 
2080             mpts
->mpts_connid
, (linger 
? "YES" : "NO"))); 
2082         if (mpts
->mpts_soerror 
== 0) 
2083                 mpts
->mpts_soerror 
= ETIMEDOUT
; 
2086          * The subflow connection has timed out. 
2088          * Right now, we simply propagate ETIMEDOUT to the MPTCP socket 
2089          * client if the MPTCP connection has not been established. Otherwise 
2092         mptcp_subflow_disconnect(mpte
, mpts
, !linger
); 
2095         if (mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED
) { 
2096                 mp_so
->so_error 
= ETIMEDOUT
; 
2101          * Keep the subflow socket around, unless the MPTCP socket has 
2102          * been detached or the subflow has been disconnected explicitly, 
2103          * in which case it should be deleted right away. 
2105         return (linger 
? MPTS_EVRET_OK 
: MPTS_EVRET_DELETE
); 
2109  * Handle SO_FILT_HINT_NOSRCADDR subflow socket event. 
2112 mptcp_subflow_nosrcaddr_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2114         struct socket 
*mp_so
, *so
; 
2115         struct mptcb 
*mp_tp
; 
2117         struct tcpcb 
*tp 
= NULL
; 
2119         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2120         MPTS_LOCK_ASSERT_HELD(mpts
); 
2122         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
2123         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2124         mp_tp 
= mpte
->mpte_mptcb
; 
2125         so 
= mpts
->mpts_socket
; 
2127         /* Not grabbing socket lock as t_local_aid is write once only */ 
2128         tp 
= intotcpcb(sotoinpcb(so
)); 
2130          * This overwrites any previous mpte_lost_aid to avoid storing 
2131          * too much state when the typical case has only two subflows. 
2133         mpte
->mpte_flags 
|= MPTE_SND_REM_ADDR
; 
2134         mpte
->mpte_lost_aid 
= tp
->t_local_aid
; 
2136         linger 
= (!(mpts
->mpts_flags 
& MPTSF_DELETEOK
) && 
2137             !(mp_so
->so_flags 
& SOF_PCBCLEARING
)); 
2139         mptcplog((LOG_DEBUG
, "%s: cid %d [linger %s]\n", __func__
, 
2140             mpts
->mpts_connid
, (linger 
? "YES" : "NO"))); 
2142         if (mpts
->mpts_soerror 
== 0) 
2143                 mpts
->mpts_soerror 
= EADDRNOTAVAIL
; 
2146          * The subflow connection has lost its source address. 
2148          * Right now, we simply propagate EADDRNOTAVAIL to the MPTCP socket 
2149          * client if the MPTCP connection has not been established.  If it 
2150          * has been established with one subflow , we keep the MPTCP 
2151          * connection valid without any subflows till closed by application. 
2152          * This lets tcp connection manager decide whether to close this or 
2153          * not as it reacts to reachability changes too. 
2155         mptcp_subflow_disconnect(mpte
, mpts
, !linger
); 
2158         if ((mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED
) && 
2159             (mp_so
->so_flags 
& SOF_NOADDRAVAIL
)) { 
2160                 mp_so
->so_error 
= EADDRNOTAVAIL
; 
2165          * Keep the subflow socket around, unless the MPTCP socket has 
2166          * been detached or the subflow has been disconnected explicitly, 
2167          * in which case it should be deleted right away. 
2169         return (linger 
? MPTS_EVRET_OK 
: MPTS_EVRET_DELETE
); 
2173  * Handle SO_FILT_HINT_MPFAILOVER subflow socket event 
2176 mptcp_subflow_failover_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2178         struct mptsub 
*mpts_alt 
= NULL
; 
2179         struct socket 
*so 
= NULL
; 
2180         struct socket 
*mp_so
; 
2181         int altpath_exists 
= 0; 
2183         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2184         MPTS_LOCK_ASSERT_HELD(mpts
); 
2185         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2186         mptcplog2((LOG_NOTICE
, "%s: mp_so 0x%llx\n", __func__
, 
2187             (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
))); 
2190         mpts_alt 
= mptcp_get_subflow(mpte
, mpts
); 
2193          * If there is no alternate eligible subflow, ignore the 
2196         if (mpts_alt 
== NULL
) { 
2197                 mptcplog2((LOG_WARNING
, "%s: no alternate path\n", __func__
)); 
2201         MPTS_LOCK(mpts_alt
); 
2203         so 
= mpts_alt
->mpts_socket
; 
2204         if (mpts_alt
->mpts_flags 
& MPTSF_FAILINGOVER
) { 
2206                 /* All data acknowledged */ 
2207                 if (so
->so_snd
.sb_cc 
== 0) { 
2208                         so
->so_flags 
&= ~SOF_MP_TRYFAILOVER
; 
2209                         mpts_alt
->mpts_flags 
&= ~MPTSF_FAILINGOVER
; 
2211                         /* no alternate path available */ 
2214                 socket_unlock(so
, 1); 
2216         if (altpath_exists
) { 
2217                 mpts_alt
->mpts_flags 
|= MPTSF_ACTIVE
; 
2218                 struct mptcb 
*mp_tp 
= mpte
->mpte_mptcb
; 
2219                 /* Bring the subflow's notion of snd_nxt into the send window */ 
2221                 mpts_alt
->mpts_sndnxt 
= mp_tp
->mpt_snduna
; 
2223                 mpte
->mpte_active_sub 
= mpts_alt
; 
2226                 socket_unlock(so
, 1); 
2228         MPTS_UNLOCK(mpts_alt
); 
2230         if (altpath_exists
) { 
2232                     SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_CONNINFO_UPDATED
); 
2233                 mptcplog((LOG_NOTICE
, "%s: mp_so 0x%llx switched from " 
2234                     "%d to %d\n", __func__
, 
2235                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
2236                     mpts
->mpts_connid
, mpts_alt
->mpts_connid
)); 
2237                 tcpstat
.tcps_mp_switches
++; 
2241         if (altpath_exists
) { 
2242                 mpts
->mpts_flags 
|= MPTSF_FAILINGOVER
; 
2243                 mpts
->mpts_flags 
&= ~MPTSF_ACTIVE
; 
2245                 so 
= mpts
->mpts_socket
; 
2247                 so
->so_flags 
&= ~SOF_MP_TRYFAILOVER
; 
2248                 socket_unlock(so
, 1); 
2251         MPTS_LOCK_ASSERT_HELD(mpts
); 
2252         return (MPTS_EVRET_OK
); 
2256  * Handle SO_FILT_HINT_IFDENIED subflow socket event. 
2259 mptcp_subflow_ifdenied_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2261         struct socket 
*mp_so
, *so
; 
2262         struct mptcb 
*mp_tp
; 
2265         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2266         MPTS_LOCK_ASSERT_HELD(mpts
); 
2267         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
2268         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2269         mp_tp 
= mpte
->mpte_mptcb
; 
2270         so 
= mpts
->mpts_socket
; 
2272         linger 
= (!(mpts
->mpts_flags 
& MPTSF_DELETEOK
) && 
2273             !(mp_so
->so_flags 
& SOF_PCBCLEARING
)); 
2275         mptcplog((LOG_DEBUG
, "%s: cid %d [linger %s]\n", __func__
, 
2276             mpts
->mpts_connid
, (linger 
? "YES" : "NO"))); 
2278         if (mpts
->mpts_soerror 
== 0) 
2279                 mpts
->mpts_soerror 
= EHOSTUNREACH
; 
2282          * The subflow connection cannot use the outgoing interface. 
2284          * Right now, we simply propagate EHOSTUNREACH to the MPTCP socket 
2285          * client if the MPTCP connection has not been established.  If it 
2286          * has been established, let the upper layer call disconnectx. 
2288         mptcp_subflow_disconnect(mpte
, mpts
, !linger
); 
2291         soevent(mp_so
, SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_IFDENIED
); 
2294         if (mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED
) { 
2295                 mp_so
->so_error 
= EHOSTUNREACH
; 
2301          * Keep the subflow socket around, unless the MPTCP socket has 
2302          * been detached or the subflow has been disconnected explicitly, 
2303          * in which case it should be deleted right away. 
2305         return (linger 
? MPTS_EVRET_OK 
: MPTS_EVRET_DELETE
); 
2309  * Handle SO_FILT_HINT_SUSPEND subflow socket event. 
2312 mptcp_subflow_suspend_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2316         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2317         MPTS_LOCK_ASSERT_HELD(mpts
); 
2319         so 
= mpts
->mpts_socket
; 
2321         /* the subflow connection is being flow controlled */ 
2322         mpts
->mpts_flags 
|= MPTSF_SUSPENDED
; 
2324         mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, 
2325             mpts
->mpts_connid
)); 
2327         return (MPTS_EVRET_OK
); /* keep the subflow socket around */ 
2331  * Handle SO_FILT_HINT_RESUME subflow socket event. 
2334 mptcp_subflow_resume_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2338         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2339         MPTS_LOCK_ASSERT_HELD(mpts
); 
2341         so 
= mpts
->mpts_socket
; 
2343         /* the subflow connection is no longer flow controlled */ 
2344         mpts
->mpts_flags 
&= ~MPTSF_SUSPENDED
; 
2346         mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
)); 
2348         return (MPTS_EVRET_OK
); /* keep the subflow socket around */ 
2352  * Handle SO_FILT_HINT_CONNECTED subflow socket event. 
2355 mptcp_subflow_connected_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2357         char buf0
[MAX_IPv6_STR_LEN
], buf1
[MAX_IPv6_STR_LEN
]; 
2358         struct sockaddr_entry 
*src_se
, *dst_se
; 
2359         struct sockaddr_storage src
; 
2360         struct socket 
*mp_so
, *so
; 
2361         struct mptcb 
*mp_tp
; 
2362         struct ifnet 
*outifp
; 
2364         boolean_t mpok 
= FALSE
; 
2366         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2367         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
2368         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2369         mp_tp 
= mpte
->mpte_mptcb
; 
2371         MPTS_LOCK_ASSERT_HELD(mpts
); 
2372         so 
= mpts
->mpts_socket
; 
2373         af 
= mpts
->mpts_family
; 
2375         if (mpts
->mpts_flags 
& MPTSF_CONNECTED
) 
2376                 return (MPTS_EVRET_OK
); 
2378         if ((mpts
->mpts_flags 
& MPTSF_DISCONNECTED
) || 
2379             (mpts
->mpts_flags 
& MPTSF_DISCONNECTING
)) { 
2380                 return (MPTS_EVRET_OK
); 
2384          * The subflow connection has been connected.  Find out whether it 
2385          * is connected as a regular TCP or as a MPTCP subflow.  The idea is: 
2387          *   a. If MPTCP connection is not yet established, then this must be 
2388          *      the first subflow connection.  If MPTCP failed to negotiate, 
2389          *      indicate to the MPTCP socket client via EPROTO, that the 
2390          *      underlying TCP connection may be peeled off via peeloff(2). 
2391          *      Otherwise, mark the MPTCP socket as connected. 
2393          *   b. If MPTCP connection has been established, then this must be 
2394          *      one of the subsequent subflow connections. If MPTCP failed 
2395          *      to negotiate, disconnect the connection since peeloff(2) 
2396          *      is no longer possible. 
2398          * Right now, we simply unblock any waiters at the MPTCP socket layer 
2399          * if the MPTCP connection has not been established. 
2403         if (so
->so_state 
& SS_ISDISCONNECTED
) { 
2405                  * With MPTCP joins, a connection is connected at the subflow 
2406                  * level, but the 4th ACK from the server elevates the MPTCP 
2407                  * subflow to connected state. So there is a small window  
2408                  * where the subflow could get disconnected before the  
2409                  * connected event is processed. 
2411                 socket_unlock(so
, 0); 
2412                 return (MPTS_EVRET_OK
); 
2415         mpts
->mpts_soerror 
= 0; 
2416         mpts
->mpts_flags 
&= ~MPTSF_CONNECTING
; 
2417         mpts
->mpts_flags 
|= MPTSF_CONNECTED
; 
2418         if (sototcpcb(so
)->t_mpflags 
& TMPF_MPTCP_TRUE
) 
2419                 mpts
->mpts_flags 
|= MPTSF_MP_CAPABLE
; 
2421         VERIFY(mpts
->mpts_dst_sl 
!= NULL
); 
2422         dst_se 
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
); 
2423         VERIFY(dst_se 
!= NULL 
&& dst_se
->se_addr 
!= NULL 
&& 
2424             dst_se
->se_addr
->sa_family 
== af
); 
2426         VERIFY(mpts
->mpts_src_sl 
!= NULL
); 
2427         src_se 
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
); 
2428         VERIFY(src_se 
!= NULL 
&& src_se
->se_addr 
!= NULL 
&& 
2429             src_se
->se_addr
->sa_family 
== af
); 
2431         /* get/check source IP address */ 
2434                 error 
= in_getsockaddr_s(so
, &src
); 
2436                         struct sockaddr_in 
*ms 
= SIN(src_se
->se_addr
); 
2437                         struct sockaddr_in 
*s 
= SIN(&src
); 
2439                         VERIFY(s
->sin_len 
== ms
->sin_len
); 
2440                         VERIFY(ms
->sin_family 
== AF_INET
); 
2442                         if ((mpts
->mpts_flags 
& MPTSF_BOUND_IP
) && 
2443                             bcmp(&ms
->sin_addr
, &s
->sin_addr
, 
2444                             sizeof (ms
->sin_addr
)) != 0) { 
2445                                 mptcplog((LOG_ERR
, "%s: cid %d local " 
2446                                     "address %s (expected %s)\n", __func__
, 
2447                                     mpts
->mpts_connid
, inet_ntop(AF_INET
, 
2448                                     (void *)&s
->sin_addr
.s_addr
, buf0
, 
2449                                     sizeof (buf0
)), inet_ntop(AF_INET
, 
2450                                     (void *)&ms
->sin_addr
.s_addr
, buf1
, 
2453                         bcopy(s
, ms
, sizeof (*s
)); 
2459                 error 
= in6_getsockaddr_s(so
, &src
); 
2461                         struct sockaddr_in6 
*ms 
= SIN6(src_se
->se_addr
); 
2462                         struct sockaddr_in6 
*s 
= SIN6(&src
); 
2464                         VERIFY(s
->sin6_len 
== ms
->sin6_len
); 
2465                         VERIFY(ms
->sin6_family 
== AF_INET6
); 
2467                         if ((mpts
->mpts_flags 
& MPTSF_BOUND_IP
) && 
2468                             bcmp(&ms
->sin6_addr
, &s
->sin6_addr
, 
2469                             sizeof (ms
->sin6_addr
)) != 0) { 
2470                                 mptcplog((LOG_ERR
, "%s: cid %d local " 
2471                                     "address %s (expected %s)\n", __func__
, 
2472                                     mpts
->mpts_connid
, inet_ntop(AF_INET6
, 
2473                                     (void *)&s
->sin6_addr
, buf0
, 
2474                                     sizeof (buf0
)), inet_ntop(AF_INET6
, 
2475                                     (void *)&ms
->sin6_addr
, buf1
, 
2478                         bcopy(s
, ms
, sizeof (*s
)); 
2489                 mptcplog((LOG_ERR
, "%s: cid %d getsockaddr failed (%d)\n", 
2490                     __func__
, mpts
->mpts_connid
, error
)); 
2493         /* get/verify the outbound interface */ 
2494         outifp 
= sotoinpcb(so
)->inp_last_outifp
;        /* could be NULL */ 
2495         if (mpts
->mpts_flags 
& MPTSF_BOUND_IF
) { 
2496                 VERIFY(mpts
->mpts_outif 
!= NULL
); 
2497                 if (mpts
->mpts_outif 
!= outifp
) { 
2498                         mptcplog((LOG_ERR
, "%s: cid %d outif %s " 
2499                             "(expected %s)\n", __func__
, mpts
->mpts_connid
, 
2500                             ((outifp 
!= NULL
) ? outifp
->if_xname 
: "NULL"), 
2501                             mpts
->mpts_outif
->if_xname
)); 
2503                                 outifp 
= mpts
->mpts_outif
; 
2506                 mpts
->mpts_outif 
= outifp
; 
2509         socket_unlock(so
, 0); 
2511         mptcplog((LOG_DEBUG
, "%s: cid %d outif %s %s[%d] -> %s[%d] " 
2512             "is %s\n", __func__
, mpts
->mpts_connid
, ((outifp 
!= NULL
) ? 
2513             outifp
->if_xname 
: "NULL"), inet_ntop(af
, (af 
== AF_INET
) ? 
2514             (void *)&SIN(src_se
->se_addr
)->sin_addr
.s_addr 
: 
2515             (void *)&SIN6(src_se
->se_addr
)->sin6_addr
, buf0
, sizeof (buf0
)), 
2516             ((af 
== AF_INET
) ? ntohs(SIN(src_se
->se_addr
)->sin_port
) : 
2517             ntohs(SIN6(src_se
->se_addr
)->sin6_port
)), 
2518             inet_ntop(af
, ((af 
== AF_INET
) ? 
2519             (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr 
: 
2520             (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
), buf1
, sizeof (buf1
)), 
2521             ((af 
== AF_INET
) ? ntohs(SIN(dst_se
->se_addr
)->sin_port
) : 
2522             ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)), 
2523             ((mpts
->mpts_flags 
& MPTSF_MP_CAPABLE
) ? 
2524             "MPTCP capable" : "a regular TCP"))); 
2526         mpok 
= (mpts
->mpts_flags 
& MPTSF_MP_CAPABLE
); 
2529         soevent(mp_so
, SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_CONNINFO_UPDATED
); 
2532         if (mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED
) { 
2533                 /* case (a) above */ 
2535                         mp_tp
->mpt_flags 
|= MPTCPF_PEEL_OFF
; 
2536                         (void) mptcp_drop(mpte
, mp_tp
, EPROTO
); 
2539                         if (mptcp_init_authparms(mp_tp
) != 0) { 
2540                                 mp_tp
->mpt_flags 
|= MPTCPF_PEEL_OFF
; 
2541                                 (void) mptcp_drop(mpte
, mp_tp
, EPROTO
); 
2545                                 mp_tp
->mpt_state 
= MPTCPS_ESTABLISHED
; 
2546                                 mpte
->mpte_associd 
= mpts
->mpts_connid
; 
2547                                 DTRACE_MPTCP2(state__change
,  
2548                                     struct mptcb 
*, mp_tp
,  
2549                                     uint32_t, 0 /* event */); 
2550                                 mptcp_init_statevars(mp_tp
); 
2553                                 (void) mptcp_setconnorder(mpte
, 
2554                                     mpts
->mpts_connid
, 1); 
2555                                 soisconnected(mp_so
); 
2560                         /* Initialize the relative sequence number */ 
2561                         mpts
->mpts_rel_seq 
= 1; 
2562                         mpts
->mpts_flags 
|= MPTSF_MPCAP_CTRSET
; 
2563                         mpte
->mpte_nummpcapflows
++; 
2564                         MPT_LOCK_SPIN(mp_tp
); 
2565                         mpts
->mpts_sndnxt 
= mp_tp
->mpt_snduna
; 
2572                  * In case of additional flows, the MPTCP socket is not 
2573                  * MPTSF_MP_CAPABLE until an ACK is received from server 
2574                  * for 3-way handshake.  TCP would have guaranteed that this 
2575                  * is an MPTCP subflow. 
2578                 mpts
->mpts_flags 
|= MPTSF_MPCAP_CTRSET
; 
2579                 mpte
->mpte_nummpcapflows
++; 
2580                 mpts
->mpts_rel_seq 
= 1; 
2581                 MPT_LOCK_SPIN(mp_tp
); 
2582                 mpts
->mpts_sndnxt 
= mp_tp
->mpt_snduna
; 
2585         MPTS_LOCK_ASSERT_HELD(mpts
); 
2587         return (MPTS_EVRET_OK
); /* keep the subflow socket around */ 
2591  * Handle SO_FILT_HINT_DISCONNECTED subflow socket event. 
2594 mptcp_subflow_disconnected_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2596         struct socket 
*mp_so
, *so
; 
2597         struct mptcb 
*mp_tp
; 
2600         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2601         MPTS_LOCK_ASSERT_HELD(mpts
); 
2602         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
2603         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2604         mp_tp 
= mpte
->mpte_mptcb
; 
2605         so 
= mpts
->mpts_socket
; 
2607         linger 
= (!(mpts
->mpts_flags 
& MPTSF_DELETEOK
) && 
2608             !(mp_so
->so_flags 
& SOF_PCBCLEARING
)); 
2610         mptcplog2((LOG_DEBUG
, "%s: cid %d [linger %s]\n", __func__
, 
2611             mpts
->mpts_connid
, (linger 
? "YES" : "NO"))); 
2613         if (mpts
->mpts_flags 
& MPTSF_DISCONNECTED
) 
2614                 return (linger 
? MPTS_EVRET_OK 
: MPTS_EVRET_DELETE
); 
2617          * Clear flags that are used by getconninfo to return state. 
2618          * Retain like MPTSF_DELETEOK, MPTSF_ACTIVE for internal purposes. 
2620         mpts
->mpts_flags 
&= ~(MPTSF_CONNECTING
|MPTSF_CONNECT_PENDING
| 
2621             MPTSF_CONNECTED
|MPTSF_DISCONNECTING
|MPTSF_PREFERRED
| 
2622             MPTSF_MP_CAPABLE
|MPTSF_MP_READY
|MPTSF_MP_DEGRADED
| 
2623             MPTSF_SUSPENDED
|MPTSF_ACTIVE
); 
2624         mpts
->mpts_flags 
|= MPTSF_DISCONNECTED
; 
2627          * The subflow connection has been disconnected. 
2629          * Right now, we simply unblock any waiters at the MPTCP socket layer 
2630          * if the MPTCP connection has not been established. 
2634         soevent(mp_so
, SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_CONNINFO_UPDATED
); 
2636         if (mpts
->mpts_flags 
& MPTSF_MPCAP_CTRSET
) { 
2637                 mpte
->mpte_nummpcapflows
--; 
2638                 mpts
->mpts_flags 
&= ~MPTSF_MPCAP_CTRSET
; 
2642         if (mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED
) { 
2644                 soisdisconnected(mp_so
); 
2651          * The underlying subflow socket has been disconnected; 
2652          * it is no longer useful to us.  Keep the subflow socket 
2653          * around, unless the MPTCP socket has been detached or 
2654          * the subflow has been disconnected explicitly, in which 
2655          * case it should be deleted right away. 
2657         return (linger 
? MPTS_EVRET_OK 
: MPTS_EVRET_DELETE
); 
2661  * Handle SO_FILT_HINT_MPSTATUS subflow socket event 
2664 mptcp_subflow_mpstatus_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2666         struct socket 
*mp_so
, *so
; 
2667         struct mptcb 
*mp_tp
; 
2668         ev_ret_t ret 
= MPTS_EVRET_OK_UPDATE
; 
2670         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2671         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
2672         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2673         mp_tp 
= mpte
->mpte_mptcb
; 
2675         MPTS_LOCK_ASSERT_HELD(mpts
); 
2676         so 
= mpts
->mpts_socket
; 
2681         if (sototcpcb(so
)->t_mpflags 
& TMPF_MPTCP_TRUE
) 
2682                 mpts
->mpts_flags 
|= MPTSF_MP_CAPABLE
; 
2684                 mpts
->mpts_flags 
&= ~MPTSF_MP_CAPABLE
; 
2686         if (sototcpcb(so
)->t_mpflags 
& TMPF_TCP_FALLBACK
) { 
2687                 if (mpts
->mpts_flags 
& MPTSF_MP_DEGRADED
) 
2689                 mpts
->mpts_flags 
|= MPTSF_MP_DEGRADED
; 
2692                 mpts
->mpts_flags 
&= ~MPTSF_MP_DEGRADED
; 
2694         if (sototcpcb(so
)->t_mpflags 
& TMPF_MPTCP_READY
) 
2695                 mpts
->mpts_flags 
|= MPTSF_MP_READY
; 
2697                 mpts
->mpts_flags 
&= ~MPTSF_MP_READY
; 
2699         if (mpts
->mpts_flags 
& MPTSF_MP_DEGRADED
) { 
2700                 mp_tp
->mpt_flags 
|= MPTCPF_FALLBACK_TO_TCP
; 
2701                 mp_tp
->mpt_flags 
&= ~MPTCPF_JOIN_READY
; 
2704         if (mp_tp
->mpt_flags 
& MPTCPF_FALLBACK_TO_TCP
) { 
2705                 VERIFY(!(mp_tp
->mpt_flags 
& MPTCPF_JOIN_READY
)); 
2706                 ret 
= MPTS_EVRET_DISCONNECT_FALLBACK
; 
2707         } else if (mpts
->mpts_flags 
& MPTSF_MP_READY
) { 
2708                 mp_tp
->mpt_flags 
|= MPTCPF_JOIN_READY
; 
2709                 ret 
= MPTS_EVRET_CONNECT_PENDING
; 
2712         mptcplog2((LOG_DEBUG
, "%s: mp_so 0x%llx mpt_flags=%b cid %d " 
2713             "mptsf=%b\n", __func__
, 
2714             (u_int64_t
)VM_KERNEL_ADDRPERM(mpte
->mpte_mppcb
->mpp_socket
), 
2715             mp_tp
->mpt_flags
, MPTCPF_BITS
, mpts
->mpts_connid
, 
2716             mpts
->mpts_flags
, MPTSF_BITS
)); 
2719         socket_unlock(so
, 0); 
2725  * Handle SO_FILT_HINT_MUSTRST subflow socket event 
2728 mptcp_subflow_mustrst_ev(struct mptses 
*mpte
, struct mptsub 
*mpts
) 
2730         struct socket 
*mp_so
, *so
; 
2731         struct mptcb 
*mp_tp
; 
2735         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2736         MPTS_LOCK_ASSERT_HELD(mpts
); 
2737         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
2738         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2739         mp_tp 
= mpte
->mpte_mptcb
; 
2740         so 
= mpts
->mpts_socket
; 
2742         linger 
= (!(mpts
->mpts_flags 
& MPTSF_DELETEOK
) && 
2743             !(mp_so
->so_flags 
& SOF_PCBCLEARING
)); 
2745         if (mpts
->mpts_soerror 
== 0) 
2746                 mpts
->mpts_soerror 
= ECONNABORTED
; 
2748         so
->so_error 
= ECONNABORTED
; 
2750         /* We got an invalid option or a fast close */ 
2752         struct tcptemp 
*t_template
; 
2753         struct inpcb 
*inp 
= sotoinpcb(so
); 
2754         struct tcpcb 
*tp 
= NULL
; 
2756         tp 
= intotcpcb(inp
); 
2758         t_template 
= tcp_maketemplate(tp
); 
2760                 unsigned int ifscope
, nocell 
= 0; 
2762                 if (inp
->inp_flags 
& INP_BOUND_IF
) 
2763                         ifscope 
= inp
->inp_boundifp
->if_index
; 
2765                         ifscope 
= IFSCOPE_NONE
; 
2767                 if (inp
->inp_flags 
& INP_NO_IFT_CELLULAR
) 
2770                 tcp_respond(tp
, t_template
->tt_ipgen
, 
2771                     &t_template
->tt_t
, (struct mbuf 
*)NULL
, 
2772                     tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, ifscope
, nocell
); 
2773                 (void) m_free(dtom(t_template
)); 
2774                 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx cid %d \n", 
2775                     __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
2776                     so
, mpts
->mpts_connid
)); 
2778         socket_unlock(so
, 0); 
2779         mptcp_subflow_disconnect(mpte
, mpts
, !linger
); 
2782         soevent(mp_so
, SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_CONNINFO_UPDATED
); 
2785         if (mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED
) { 
2786                 mp_so
->so_error 
= ECONNABORTED
; 
2792          * Keep the subflow socket around unless the subflow has been 
2793          * disconnected explicitly. 
2795         return (linger 
? MPTS_EVRET_OK 
: MPTS_EVRET_DELETE
); 
2799 mptcp_evret2str(ev_ret_t ret
) 
2801         const char *c 
= "UNKNOWN"; 
2804         case MPTS_EVRET_DELETE
: 
2805                 c 
= "MPTS_EVRET_DELETE"; 
2807         case MPTS_EVRET_CONNECT_PENDING
: 
2808                 c 
= "MPTS_EVRET_CONNECT_PENDING"; 
2810         case MPTS_EVRET_DISCONNECT_FALLBACK
: 
2811                 c 
= "MPTS_EVRET_DISCONNECT_FALLBACK"; 
2814                 c 
= "MPTS_EVRET_OK"; 
2816         case MPTS_EVRET_OK_UPDATE
: 
2817                 c 
= "MPTS_EVRET_OK_UPDATE"; 
2824  * Add a reference to a subflow structure; used by MPTS_ADDREF(). 
2827 mptcp_subflow_addref(struct mptsub 
*mpts
, int locked
) 
2832                 MPTS_LOCK_ASSERT_HELD(mpts
); 
2834         if (++mpts
->mpts_refcnt 
== 0) { 
2835                 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
); 
2843  * Remove a reference held on a subflow structure; used by MPTS_REMREF(); 
2846 mptcp_subflow_remref(struct mptsub 
*mpts
) 
2849         if (mpts
->mpts_refcnt 
== 0) { 
2850                 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
); 
2853         if (--mpts
->mpts_refcnt 
> 0) { 
2857         /* callee will unlock and destroy lock */ 
2858         mptcp_subflow_free(mpts
); 
2862  * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked, 
2863  * caller must ensure that the option can be issued on subflow sockets, via 
2864  * MPOF_SUBFLOW_OK flag. 
2867 mptcp_subflow_sosetopt(struct mptses 
*mpte
, struct socket 
*so
, 
2870         struct socket 
*mp_so
; 
2871         struct sockopt sopt
; 
2875         VERIFY(mpo
->mpo_flags 
& MPOF_SUBFLOW_OK
); 
2876         mpo
->mpo_flags 
&= ~MPOF_INTERIM
; 
2878         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2879         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2881         bzero(&sopt
, sizeof (sopt
)); 
2882         sopt
.sopt_dir 
= SOPT_SET
; 
2883         sopt
.sopt_level 
= mpo
->mpo_level
; 
2884         sopt
.sopt_name 
= mpo
->mpo_name
; 
2885         sopt
.sopt_val 
= CAST_USER_ADDR_T(&mpo
->mpo_intval
); 
2886         sopt
.sopt_valsize 
= sizeof (int); 
2887         sopt
.sopt_p 
= kernproc
; 
2889         error 
= sosetoptlock(so
, &sopt
, 0);     /* already locked */ 
2891                 mptcplog2((LOG_DEBUG
, "%s: mp_so 0x%llx sopt %s " 
2892                     "val %d set successful\n", __func__
, 
2893                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
2894                     mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
, 
2895                     buf
, sizeof (buf
)), mpo
->mpo_intval
)); 
2897                 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx sopt %s " 
2898                     "val %d set error %d\n", __func__
, 
2899                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
2900                     mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
, 
2901                     buf
, sizeof (buf
)), mpo
->mpo_intval
, error
)); 
2907  * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked, 
2908  * caller must ensure that the option can be issued on subflow sockets, via 
2909  * MPOF_SUBFLOW_OK flag. 
2912 mptcp_subflow_sogetopt(struct mptses 
*mpte
, struct socket 
*so
, 
2915         struct socket 
*mp_so
; 
2916         struct sockopt sopt
; 
2920         VERIFY(mpo
->mpo_flags 
& MPOF_SUBFLOW_OK
); 
2921         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
2922         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
2924         bzero(&sopt
, sizeof (sopt
)); 
2925         sopt
.sopt_dir 
= SOPT_GET
; 
2926         sopt
.sopt_level 
= mpo
->mpo_level
; 
2927         sopt
.sopt_name 
= mpo
->mpo_name
; 
2928         sopt
.sopt_val 
= CAST_USER_ADDR_T(&mpo
->mpo_intval
); 
2929         sopt
.sopt_valsize 
= sizeof (int); 
2930         sopt
.sopt_p 
= kernproc
; 
2932         error 
= sogetoptlock(so
, &sopt
, 0);     /* already locked */ 
2934                 mptcplog2((LOG_DEBUG
, "%s: mp_so 0x%llx sopt %s " 
2935                     "val %d get successful\n", __func__
, 
2936                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
2937                     mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
, 
2938                     buf
, sizeof (buf
)), mpo
->mpo_intval
)); 
2940                 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx sopt %s get error %d\n", 
2941                     __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
2942                     mptcp_sopt2str(mpo
->mpo_level
, 
2943                     mpo
->mpo_name
, buf
, sizeof (buf
)), error
)); 
2950  * MPTCP garbage collector. 
2952  * This routine is called by the MP domain on-demand, periodic callout, 
2953  * which is triggered when a MPTCP socket is closed.  The callout will 
2954  * repeat as long as this routine returns a non-zero value. 
2957 mptcp_gc(struct mppcbinfo 
*mppi
) 
2959         struct mppcb 
*mpp
, *tmpp
; 
2960         uint32_t active 
= 0; 
2962         lck_mtx_assert(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
); 
2964         mptcplog3((LOG_DEBUG
, "%s: running\n", __func__
)); 
2966         TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) { 
2967                 struct socket 
*mp_so
; 
2968                 struct mptses 
*mpte
; 
2969                 struct mptcb 
*mp_tp
; 
2971                 VERIFY(mpp
->mpp_flags 
& MPP_ATTACHED
); 
2972                 mp_so 
= mpp
->mpp_socket
; 
2973                 VERIFY(mp_so 
!= NULL
); 
2974                 mpte 
= mptompte(mpp
); 
2975                 VERIFY(mpte 
!= NULL
); 
2976                 mp_tp 
= mpte
->mpte_mptcb
; 
2977                 VERIFY(mp_tp 
!= NULL
); 
2979                 mptcplog3((LOG_DEBUG
, "%s: mp_so 0x%llx found " 
2980                     "(u=%d,r=%d,s=%d)\n", __func__
, 
2981                     (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mp_so
->so_usecount
, 
2982                     mp_so
->so_retaincnt
, mpp
->mpp_state
)); 
2984                 if (!lck_mtx_try_lock(&mpp
->mpp_lock
)) { 
2985                         mptcplog3((LOG_DEBUG
, "%s: mp_so 0x%llx skipped " 
2986                             "(u=%d,r=%d)\n", __func__
, 
2987                             (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
2988                             mp_so
->so_usecount
, mp_so
->so_retaincnt
)); 
2993                 /* check again under the lock */ 
2994                 if (mp_so
->so_usecount 
> 1) { 
2995                         boolean_t wakeup 
= FALSE
; 
2996                         struct mptsub 
*mpts
, *tmpts
; 
2998                         mptcplog3((LOG_DEBUG
, "%s: mp_so 0x%llx skipped " 
2999                             "[u=%d,r=%d] %d %d\n", __func__
, 
3000                             (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
3001                             mp_so
->so_usecount
, mp_so
->so_retaincnt
, 
3002                             mp_tp
->mpt_gc_ticks
, 
3005                         if (mp_tp
->mpt_state 
>= MPTCPS_FIN_WAIT_1
) { 
3006                                 if (mp_tp
->mpt_gc_ticks 
> 0) 
3007                                         mp_tp
->mpt_gc_ticks
--; 
3008                                 if (mp_tp
->mpt_gc_ticks 
== 0) { 
3010                                         if (mp_tp
->mpt_localkey 
!= NULL
) { 
3012                                                     mp_tp
->mpt_localkey
); 
3013                                                 mp_tp
->mpt_localkey 
= NULL
; 
3019                                 TAILQ_FOREACH_SAFE(mpts
, 
3020                                     &mpte
->mpte_subflows
, mpts_entry
, tmpts
) { 
3022                                         mpts
->mpts_flags 
|= MPTSF_DELETEOK
; 
3023                                         if (mpts
->mpts_soerror 
== 0) 
3024                                                 mpts
->mpts_soerror 
= ETIMEDOUT
; 
3025                                         mptcp_subflow_eupcall(mpts
->mpts_socket
, 
3026                                             mpts
, SO_FILT_HINT_DISCONNECTED
); 
3030                         lck_mtx_unlock(&mpp
->mpp_lock
); 
3035                 if (mpp
->mpp_state 
!= MPPCB_STATE_DEAD
) { 
3036                         mptcplog3((LOG_DEBUG
, "%s: mp_so 0x%llx skipped " 
3037                             "[u=%d,r=%d,s=%d]\n", __func__
, 
3038                             (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
3039                             mp_so
->so_usecount
, mp_so
->so_retaincnt
, 
3041                         lck_mtx_unlock(&mpp
->mpp_lock
); 
3047                  * The PCB has been detached, and there is exactly 1 refnct 
3048                  * held by the MPTCP thread.  Signal that thread to terminate, 
3049                  * after which the last refcnt will be released.  That will 
3050                  * allow it to be destroyed below during the next round. 
3052                 if (mp_so
->so_usecount 
== 1) { 
3053                         mptcplog2((LOG_DEBUG
, "%s: mp_so 0x%llx scheduled for " 
3054                             "termination [u=%d,r=%d]\n", __func__
, 
3055                             (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
3056                             mp_so
->so_usecount
, mp_so
->so_retaincnt
)); 
3057                         /* signal MPTCP thread to terminate */ 
3058                         mptcp_thread_terminate_signal(mpte
); 
3059                         lck_mtx_unlock(&mpp
->mpp_lock
); 
3064                 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n", 
3065                     __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), 
3066                     mp_so
->so_usecount
, mp_so
->so_retaincnt
)); 
3067                 DTRACE_MPTCP4(dispose
, struct socket 
*, mp_so
,  
3068                     struct sockbuf 
*, &mp_so
->so_rcv
, 
3069                     struct sockbuf 
*, &mp_so
->so_snd
, 
3070                     struct mppcb 
*, mpp
); 
3079  * Drop a MPTCP connection, reporting the specified error. 
3082 mptcp_drop(struct mptses 
*mpte
, struct mptcb 
*mp_tp
, int errno
) 
3084         struct socket 
*mp_so
; 
3086         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
3087         MPT_LOCK_ASSERT_HELD(mp_tp
); 
3088         VERIFY(mpte
->mpte_mptcb 
== mp_tp
); 
3089         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
3091         mp_tp
->mpt_state 
= MPTCPS_CLOSED
; 
3092         DTRACE_MPTCP2(state__change
, struct mptcb 
*, mp_tp
,  
3093             uint32_t, 0 /* event */); 
3095         if (errno 
== ETIMEDOUT 
&& mp_tp
->mpt_softerror 
!= 0) 
3096                 errno 
= mp_tp
->mpt_softerror
; 
3097         mp_so
->so_error 
= errno
; 
3099         return (mptcp_close(mpte
, mp_tp
)); 
3103  * Close a MPTCP control block. 
3106 mptcp_close(struct mptses 
*mpte
, struct mptcb 
*mp_tp
) 
3108         struct socket 
*mp_so
; 
3109         struct mptsub 
*mpts
, *tmpts
; 
3111         MPTE_LOCK_ASSERT_HELD(mpte
);    /* same as MP socket lock */ 
3112         MPT_LOCK_ASSERT_HELD(mp_tp
); 
3113         VERIFY(mpte
->mpte_mptcb 
== mp_tp
); 
3114         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
3115         if (mp_tp
->mpt_localkey 
!= NULL
) { 
3116                 mptcp_free_key(mp_tp
->mpt_localkey
); 
3117                 mp_tp
->mpt_localkey 
= NULL
; 
3121         soisdisconnected(mp_so
); 
3124         if (mp_tp
->mpt_flags 
& MPTCPF_PEEL_OFF
) { 
3129         /* Clean up all subflows */ 
3130         TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) { 
3132                 mptcp_subflow_disconnect(mpte
, mpts
, TRUE
); 
3134                 mptcp_subflow_del(mpte
, mpts
, TRUE
); 
3142 mptcp_notify_close(struct socket 
*so
) 
3144         soevent(so
, (SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_DISCONNECTED
)); 
3148  * Signal MPTCP thread to wake up. 
3151 mptcp_thread_signal(struct mptses 
*mpte
) 
3153         lck_mtx_lock(&mpte
->mpte_thread_lock
); 
3154         mptcp_thread_signal_locked(mpte
); 
3155         lck_mtx_unlock(&mpte
->mpte_thread_lock
); 
3159  * Signal MPTCP thread to wake up (locked version) 
3162 mptcp_thread_signal_locked(struct mptses 
*mpte
) 
3164         lck_mtx_assert(&mpte
->mpte_thread_lock
, LCK_MTX_ASSERT_OWNED
); 
3166         mpte
->mpte_thread_reqs
++; 
3167         if (!mpte
->mpte_thread_active 
&& mpte
->mpte_thread 
!= THREAD_NULL
) 
3168                 wakeup_one((caddr_t
)&mpte
->mpte_thread
); 
3172  * Signal MPTCP thread to terminate. 
3175 mptcp_thread_terminate_signal(struct mptses 
*mpte
) 
3177         lck_mtx_lock(&mpte
->mpte_thread_lock
); 
3178         if (mpte
->mpte_thread 
!= THREAD_NULL
) { 
3179                 mpte
->mpte_thread 
= THREAD_NULL
; 
3180                 mpte
->mpte_thread_reqs
++; 
3181                 if (!mpte
->mpte_thread_active
) 
3182                         wakeup_one((caddr_t
)&mpte
->mpte_thread
); 
3184         lck_mtx_unlock(&mpte
->mpte_thread_lock
); 
3188  * MPTCP thread workloop. 
3191 mptcp_thread_dowork(struct mptses 
*mpte
) 
3193         struct socket 
*mp_so
; 
3194         struct mptsub 
*mpts
, *tmpts
; 
3195         boolean_t connect_pending 
= FALSE
, disconnect_fallback 
= FALSE
; 
3196         boolean_t conninfo_update 
= FALSE
; 
3198         MPTE_LOCK(mpte
);                /* same as MP socket lock */ 
3199         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
3200         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
3201         VERIFY(mp_so 
!= NULL
); 
3203         TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) { 
3207                 MPTS_ADDREF_LOCKED(mpts
);       /* for us */ 
3209                 /* Update process ownership based on parent mptcp socket */ 
3210                 mptcp_update_last_owner(mpts
, mp_so
); 
3212                 mptcp_subflow_input(mpte
, mpts
); 
3213                 ret 
= mptcp_subflow_events(mpte
, mpts
); 
3215                 if (mpts
->mpts_flags 
& MPTSF_ACTIVE
) { 
3216                         mptcplog3((LOG_INFO
, "%s: cid %d \n", __func__
, 
3217                             mpts
->mpts_connid
)); 
3218                         (void) mptcp_subflow_output(mpte
, mpts
); 
3222                  * If MPTCP socket is closed, disconnect all subflows. 
3223                  * This will generate a disconnect event which will 
3224                  * be handled during the next iteration, causing a 
3225                  * non-zero error to be returned above. 
3227                 if (mp_so
->so_flags 
& SOF_PCBCLEARING
) 
3228                         mptcp_subflow_disconnect(mpte
, mpts
, FALSE
); 
3232                 case MPTS_EVRET_OK_UPDATE
: 
3233                         conninfo_update 
= TRUE
; 
3238                 case MPTS_EVRET_DELETE
: 
3239                         if (mptcp_delete_ok(mpte
, mpts
)) { 
3240                                 mptcp_subflow_del(mpte
, mpts
, TRUE
); 
3243                 case MPTS_EVRET_CONNECT_PENDING
: 
3244                         connect_pending 
= TRUE
; 
3246                 case MPTS_EVRET_DISCONNECT_FALLBACK
: 
3247                         disconnect_fallback 
= TRUE
; 
3250                 MPTS_REMREF(mpts
);              /* ours */ 
3253         if (conninfo_update
) { 
3254                 soevent(mp_so
, SO_FILT_HINT_LOCKED 
| 
3255                     SO_FILT_HINT_CONNINFO_UPDATED
); 
3258         if (!connect_pending 
&& !disconnect_fallback
) { 
3263         TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) { 
3265                 if (disconnect_fallback
) { 
3266                         struct socket 
*so 
= NULL
; 
3267                         struct inpcb 
*inp 
= NULL
; 
3268                         struct tcpcb 
*tp 
= NULL
; 
3270                         if (mpts
->mpts_flags 
& MPTSF_MP_DEGRADED
) { 
3275                         mpts
->mpts_flags 
|= MPTSF_MP_DEGRADED
; 
3277                         if (mpts
->mpts_flags 
& (MPTSF_DISCONNECTING
| 
3278                             MPTSF_DISCONNECTED
)) { 
3282                         so 
= mpts
->mpts_socket
; 
3285                          * The MPTCP connection has degraded to a fallback 
3286                          * mode, so there is no point in keeping this subflow 
3287                          * regardless of its MPTCP-readiness state, unless it 
3288                          * is the primary one which we use for fallback.  This 
3289                          * assumes that the subflow used for fallback is the 
3294                         inp 
= sotoinpcb(so
); 
3295                         tp 
= intotcpcb(inp
); 
3297                             ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
); 
3298                         tp
->t_mpflags 
|= TMPF_TCP_FALLBACK
; 
3299                         if (mpts
->mpts_flags 
& MPTSF_ACTIVE
) { 
3300                                 socket_unlock(so
, 1); 
3304                         tp
->t_mpflags 
|= TMPF_RESET
; 
3305                         soevent(so
, SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_MUSTRST
); 
3306                         socket_unlock(so
, 1); 
3308                 } else if (connect_pending
) { 
3310                          * The MPTCP connection has progressed to a state 
3311                          * where it supports full multipath semantics; allow 
3312                          * additional joins to be attempted for all subflows 
3313                          * that are in the PENDING state. 
3315                         if (mpts
->mpts_flags 
& MPTSF_CONNECT_PENDING
) { 
3316                                 (void) mptcp_subflow_soconnectx(mpte
, mpts
); 
3329 mptcp_thread_func(void *v
, wait_result_t w
) 
3332         struct mptses 
*mpte 
= v
; 
3333         struct timespec 
*ts 
= NULL
; 
3335         VERIFY(mpte 
!= NULL
); 
3337         lck_mtx_lock_spin(&mpte
->mpte_thread_lock
); 
3340                 lck_mtx_assert(&mpte
->mpte_thread_lock
, LCK_MTX_ASSERT_OWNED
); 
3342                 if (mpte
->mpte_thread 
!= THREAD_NULL
) { 
3343                         (void) msleep(&mpte
->mpte_thread
, 
3344                             &mpte
->mpte_thread_lock
, (PZERO 
- 1) | PSPIN
, 
3348                 /* MPTCP socket is closed? */ 
3349                 if (mpte
->mpte_thread 
== THREAD_NULL
) { 
3350                         lck_mtx_unlock(&mpte
->mpte_thread_lock
); 
3351                         /* callee will destroy thread lock */ 
3352                         mptcp_thread_destroy(mpte
); 
3357                 mpte
->mpte_thread_active 
= 1; 
3359                         uint32_t reqs 
= mpte
->mpte_thread_reqs
; 
3361                         lck_mtx_unlock(&mpte
->mpte_thread_lock
); 
3362                         mptcp_thread_dowork(mpte
); 
3363                         lck_mtx_lock_spin(&mpte
->mpte_thread_lock
); 
3365                         /* if there's no pending request, we're done */ 
3366                         if (reqs 
== mpte
->mpte_thread_reqs 
|| 
3367                             mpte
->mpte_thread 
== THREAD_NULL
) 
3370                 mpte
->mpte_thread_reqs 
= 0; 
3371                 mpte
->mpte_thread_active 
= 0; 
3376  * Destroy a MTCP thread, to be called in the MPTCP thread context 
3377  * upon receiving an indication to self-terminate.  This routine 
3378  * will not return, as the current thread is terminated at the end. 
3381 mptcp_thread_destroy(struct mptses 
*mpte
) 
3383         struct socket 
*mp_so
; 
3385         MPTE_LOCK(mpte
);                /* same as MP socket lock */ 
3386         VERIFY(mpte
->mpte_thread 
== THREAD_NULL
); 
3387         VERIFY(mpte
->mpte_mppcb 
!= NULL
); 
3389         mptcp_sesdestroy(mpte
); 
3391         mp_so 
= mpte
->mpte_mppcb
->mpp_socket
; 
3392         VERIFY(mp_so 
!= NULL
); 
3393         VERIFY(mp_so
->so_usecount 
!= 0); 
3394         mp_so
->so_usecount
--;           /* for thread */ 
3395         mpte
->mpte_mppcb
->mpp_flags 
|= MPP_DEFUNCT
; 
3398         /* for the extra refcnt from kernel_thread_start() */ 
3399         thread_deallocate(current_thread()); 
3400         /* this is the end */ 
3401         thread_terminate(current_thread()); 
3406  * Protocol pr_lock callback. 
3409 mptcp_lock(struct socket 
*mp_so
, int refcount
, void *lr
) 
3411         struct mppcb 
*mpp 
= sotomppcb(mp_so
); 
3415                 lr_saved 
= __builtin_return_address(0); 
3420                 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
, 
3421                     mp_so
, lr_saved
, solockhistory_nr(mp_so
)); 
3424         lck_mtx_lock(&mpp
->mpp_lock
); 
3426         if (mp_so
->so_usecount 
< 0) { 
3427                 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
, 
3428                     mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
, 
3429                     solockhistory_nr(mp_so
)); 
3433                 mp_so
->so_usecount
++; 
3434         mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
; 
3435         mp_so
->next_lock_lr 
= (mp_so
->next_lock_lr 
+ 1) % SO_LCKDBG_MAX
; 
3441  * Protocol pr_unlock callback. 
3444 mptcp_unlock(struct socket 
*mp_so
, int refcount
, void *lr
) 
3446         struct mppcb 
*mpp 
= sotomppcb(mp_so
); 
3450                 lr_saved 
= __builtin_return_address(0); 
3455                 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
, 
3456                     mp_so
, mp_so
->so_usecount
, lr_saved
, 
3457                     solockhistory_nr(mp_so
)); 
3460         lck_mtx_assert(&mpp
->mpp_lock
, LCK_MTX_ASSERT_OWNED
); 
3463                 mp_so
->so_usecount
--; 
3465         if (mp_so
->so_usecount 
< 0) { 
3466                 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
, 
3467                     mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
)); 
3470         mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
; 
3471         mp_so
->next_unlock_lr 
= (mp_so
->next_unlock_lr 
+ 1) % SO_LCKDBG_MAX
; 
3472         lck_mtx_unlock(&mpp
->mpp_lock
); 
3478  * Protocol pr_getlock callback. 
3481 mptcp_getlock(struct socket 
*mp_so
, int locktype
) 
3483 #pragma unused(locktype) 
3484         struct mppcb 
*mpp 
= sotomppcb(mp_so
); 
3487                 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
, 
3488                     solockhistory_nr(mp_so
)); 
3491         if (mp_so
->so_usecount 
< 0) { 
3492                 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
, 
3493                     mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
)); 
3496         return (&mpp
->mpp_lock
); 
3500  * Key generation functions 
3503 mptcp_generate_unique_key(struct mptcp_key_entry 
*key_entry
) 
3505         struct mptcp_key_entry 
*key_elm
; 
3507         read_random(&key_entry
->mkey_value
, sizeof (key_entry
->mkey_value
)); 
3508         if (key_entry
->mkey_value 
== 0) 
3510         mptcp_do_sha1(&key_entry
->mkey_value
, key_entry
->mkey_digest
, 
3511             sizeof (key_entry
->mkey_digest
)); 
3513         LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) { 
3514                 if (key_elm
->mkey_value 
== key_entry
->mkey_value
) { 
3517                 if (bcmp(key_elm
->mkey_digest
, key_entry
->mkey_digest
, 4) == 
3524 static mptcp_key_t 
* 
3525 mptcp_reserve_key(void) 
3527         struct mptcp_key_entry 
*key_elm
; 
3528         struct mptcp_key_entry 
*found_elm 
= NULL
; 
3530         lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
); 
3531         LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) { 
3532                 if (key_elm
->mkey_flags 
== MKEYF_FREE
) { 
3533                         key_elm
->mkey_flags 
= MKEYF_INUSE
; 
3534                         found_elm 
= key_elm
; 
3538         lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
); 
3541                 return (&found_elm
->mkey_value
); 
3544         key_elm 
= (struct mptcp_key_entry 
*) 
3545             zalloc(mptcp_keys_pool
.mkph_key_entry_zone
); 
3546         key_elm
->mkey_flags 
= MKEYF_INUSE
; 
3548         lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
); 
3549         mptcp_generate_unique_key(key_elm
); 
3550         LIST_INSERT_HEAD(&mptcp_keys_pool
, key_elm
, mkey_next
); 
3551         mptcp_keys_pool
.mkph_count 
+= 1; 
3552         lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
); 
3553         return (&key_elm
->mkey_value
); 
3557 mptcp_get_stored_digest(mptcp_key_t 
*key
) 
3559         struct mptcp_key_entry 
*key_holder
; 
3560         caddr_t digest 
= NULL
; 
3562         lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
); 
3563         key_holder 
= (struct mptcp_key_entry 
*)(void *)((caddr_t
)key 
- 
3564             offsetof(struct mptcp_key_entry
, mkey_value
)); 
3565         if (key_holder
->mkey_flags 
!= MKEYF_INUSE
) 
3566                 panic_plain("%s", __func__
); 
3567         digest 
= &key_holder
->mkey_digest
[0]; 
3568         lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
); 
3573 mptcp_free_key(mptcp_key_t 
*key
) 
3575         struct mptcp_key_entry 
*key_holder
; 
3576         struct mptcp_key_entry 
*key_elm
; 
3577         int pt 
= RandomULong(); 
3579         mptcplog((LOG_INFO
, "%s\n", __func__
)); 
3581         lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
); 
3582         key_holder 
= (struct mptcp_key_entry 
*)(void*)((caddr_t
)key 
- 
3583             offsetof(struct mptcp_key_entry
, mkey_value
)); 
3584         key_holder
->mkey_flags 
= MKEYF_FREE
; 
3586         LIST_REMOVE(key_holder
, mkey_next
); 
3587         mptcp_keys_pool
.mkph_count 
-= 1; 
3589         /* Free half the time */ 
3591                 zfree(mptcp_keys_pool
.mkph_key_entry_zone
, key_holder
); 
3593                 /* Insert it at random point to avoid early reuse */ 
3595                 if (mptcp_keys_pool
.mkph_count 
> 1) { 
3596                         pt 
= pt 
% (mptcp_keys_pool
.mkph_count 
- 1); 
3597                         LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) { 
3599                                         LIST_INSERT_AFTER(key_elm
, key_holder
, 
3605                                 panic("missed insertion"); 
3607                         LIST_INSERT_HEAD(&mptcp_keys_pool
, key_holder
, 
3610                 mptcp_keys_pool
.mkph_count 
+= 1; 
3612         lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
); 
3616 mptcp_key_pool_init(void) 
3619         struct mptcp_key_entry 
*key_entry
; 
3621         LIST_INIT(&mptcp_keys_pool
); 
3622         mptcp_keys_pool
.mkph_count 
= 0; 
3624         mptcp_keys_pool
.mkph_key_elm_sz 
= (vm_size_t
) 
3625             (sizeof (struct mptcp_key_entry
)); 
3626         mptcp_keys_pool
.mkph_key_entry_zone 
= zinit( 
3627             mptcp_keys_pool
.mkph_key_elm_sz
, 
3628             MPTCP_MX_KEY_ALLOCS 
* mptcp_keys_pool
.mkph_key_elm_sz
, 
3629             MPTCP_MX_PREALLOC_ZONE_SZ
, "mptkeys"); 
3630         if (mptcp_keys_pool
.mkph_key_entry_zone 
== NULL
) { 
3631                 panic("%s: unable to allocate MPTCP keys zone \n", __func__
); 
3634         zone_change(mptcp_keys_pool
.mkph_key_entry_zone
, Z_CALLERACCT
, FALSE
); 
3635         zone_change(mptcp_keys_pool
.mkph_key_entry_zone
, Z_EXPAND
, TRUE
); 
3637         for (i 
= 0; i 
< MPTCP_KEY_PREALLOCS_MX
; i
++) { 
3638                 key_entry 
= (struct mptcp_key_entry 
*) 
3639                     zalloc(mptcp_keys_pool
.mkph_key_entry_zone
); 
3640                 key_entry
->mkey_flags 
= MKEYF_FREE
; 
3641                 mptcp_generate_unique_key(key_entry
); 
3642                 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_entry
, mkey_next
); 
3643                 mptcp_keys_pool
.mkph_count 
+= 1; 
3645         lck_mtx_init(&mptcp_keys_pool
.mkph_lock
, mtcbinfo
.mppi_lock_grp
, 
3646             mtcbinfo
.mppi_lock_attr
); 
3650  * MPTCP Join support 
3654 mptcp_attach_to_subf(struct socket 
*so
, struct mptcb 
*mp_tp
, 
3657         struct tcpcb 
*tp 
= sototcpcb(so
); 
3658         struct mptcp_subf_auth_entry 
*sauth_entry
; 
3659         MPT_LOCK_ASSERT_NOTHELD(mp_tp
); 
3661         MPT_LOCK_SPIN(mp_tp
); 
3662         tp
->t_mptcb 
= mp_tp
; 
3665          * As long as the mpts_connid is unique it can be used as the 
3666          * address ID for additional subflows. 
3667          * The address ID of the first flow is implicitly 0. 
3669         if (mp_tp
->mpt_state 
== MPTCPS_CLOSED
) { 
3670                 tp
->t_local_aid 
= 0; 
3672                 tp
->t_local_aid 
= conn_id
; 
3673                 tp
->t_mpflags 
|= (TMPF_PREESTABLISHED 
| TMPF_JOINED_FLOW
); 
3674                 so
->so_flags 
|= SOF_MP_SEC_SUBFLOW
; 
3676         sauth_entry 
= zalloc(mpt_subauth_zone
); 
3677         sauth_entry
->msae_laddr_id 
= tp
->t_local_aid
; 
3678         sauth_entry
->msae_raddr_id 
= 0; 
3679         sauth_entry
->msae_raddr_rand 
= 0; 
3681         sauth_entry
->msae_laddr_rand 
= RandomULong(); 
3682         if (sauth_entry
->msae_laddr_rand 
== 0) 
3684         LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
); 
3688 mptcp_detach_mptcb_from_subf(struct mptcb 
*mp_tp
, struct socket 
*so
) 
3690         struct mptcp_subf_auth_entry 
*sauth_entry
; 
3691         struct tcpcb 
*tp 
= sototcpcb(so
); 
3698         LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) { 
3699                 if (sauth_entry
->msae_laddr_id 
== tp
->t_local_aid
) { 
3705                 LIST_REMOVE(sauth_entry
, msae_next
); 
3706                 zfree(mpt_subauth_zone
, sauth_entry
); 
3713 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb 
*mp_tp
, u_int32_t 
*lrand
, 
3716         struct mptcp_subf_auth_entry 
*sauth_entry
; 
3717         MPT_LOCK_ASSERT_NOTHELD(mp_tp
); 
3720         LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) { 
3721                 if (sauth_entry
->msae_laddr_id 
== addr_id
) { 
3723                                 *lrand 
= sauth_entry
->msae_laddr_rand
; 
3725                                 *rrand 
= sauth_entry
->msae_raddr_rand
; 
3733 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb 
*mp_tp
, 
3734     mptcp_addr_id raddr_id
, u_int32_t raddr_rand
) 
3736         struct mptcp_subf_auth_entry 
*sauth_entry
; 
3737         MPT_LOCK_ASSERT_NOTHELD(mp_tp
); 
3740         LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) { 
3741                 if (sauth_entry
->msae_laddr_id 
== laddr_id
) { 
3742                         if ((sauth_entry
->msae_raddr_id 
!= 0) && 
3743                             (sauth_entry
->msae_raddr_id 
!= raddr_id
)) { 
3744                                 mptcplog((LOG_ERR
, "MPTCP ERROR %s: mismatched" 
3745                                     " address ids %d %d \n", __func__
, raddr_id
, 
3746                                     sauth_entry
->msae_raddr_id
)); 
3750                         sauth_entry
->msae_raddr_id 
= raddr_id
; 
3751                         if ((sauth_entry
->msae_raddr_rand 
!= 0) && 
3752                             (sauth_entry
->msae_raddr_rand 
!= raddr_rand
)) { 
3753                                 mptcplog((LOG_ERR
, "%s: dup SYN_ACK %d %d \n", 
3754                                     __func__
, raddr_rand
, 
3755                                     sauth_entry
->msae_raddr_rand
)); 
3759                         sauth_entry
->msae_raddr_rand 
= raddr_rand
; 
3768  * SHA1 support for MPTCP 
3771 mptcp_do_sha1(mptcp_key_t 
*key
, char *sha_digest
, int digest_len
) 
3774         const unsigned char *sha1_base
; 
3777         if (digest_len 
!= SHA1_RESULTLEN
) { 
3781         sha1_base 
= (const unsigned char *) key
; 
3782         sha1_size 
= sizeof (mptcp_key_t
); 
3783         SHA1Init(&sha1ctxt
); 
3784         SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
); 
3785         SHA1Final(sha_digest
, &sha1ctxt
); 
3790 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
, 
3791         u_int32_t rand1
, u_int32_t rand2
, u_char 
*digest
, int digest_len
) 
3794         mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */ 
3795         mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */ 
3799         bzero(digest
, digest_len
); 
3801         /* Set up the Key for HMAC */ 
3808         /* Set up the message for HMAC */ 
3812         /* Key is 512 block length, so no need to compute hash */ 
3814         /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */ 
3816         for (i 
= 0; i 
< 8; i
++) { 
3817                 key_ipad
[i
] ^= 0x3636363636363636; 
3818                 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c; 
3821         /* Perform inner SHA1 */ 
3822         SHA1Init(&sha1ctxt
); 
3823         SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof (key_ipad
)); 
3824         SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof (data
)); 
3825         SHA1Final(digest
, &sha1ctxt
); 
3827         /* Perform outer SHA1 */ 
3828         SHA1Init(&sha1ctxt
); 
3829         SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof (key_opad
)); 
3830         SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
); 
3831         SHA1Final(digest
, &sha1ctxt
); 
3835  * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A)) 
3836  * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B)) 
3839 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb 
*mp_tp
, u_char 
*digest
, 
3842         uint32_t lrand
, rrand
; 
3843         mptcp_key_t localkey
, remotekey
; 
3844         MPT_LOCK_ASSERT_NOTHELD(mp_tp
); 
3846         if (digest_len 
!= SHA1_RESULTLEN
) 
3850         mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
); 
3851         MPT_LOCK_SPIN(mp_tp
); 
3852         localkey 
= *mp_tp
->mpt_localkey
; 
3853         remotekey 
= mp_tp
->mpt_remotekey
; 
3855         mptcp_hmac_sha1(localkey
, remotekey
, lrand
, rrand
, digest
, 
3860 mptcp_get_trunced_hmac(mptcp_addr_id aid
, struct mptcb 
*mp_tp
) 
3862         u_char digest
[SHA1_RESULTLEN
]; 
3863         u_int64_t trunced_digest
; 
3865         mptcp_get_hmac(aid
, mp_tp
, &digest
[0], sizeof (digest
)); 
3866         bcopy(digest
, &trunced_digest
, 8); 
3867         return (trunced_digest
); 
3871  * Authentication data generation 
3874 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
, 
3877         VERIFY(token_len 
== sizeof (u_int32_t
)); 
3878         VERIFY(sha_digest_len 
== SHA1_RESULTLEN
); 
3880         /* Most significant 32 bits of the SHA1 hash */ 
3881         bcopy(sha_digest
, token
, sizeof (u_int32_t
)); 
3886 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
, 
3889         VERIFY(idsn_len 
== sizeof (u_int64_t
)); 
3890         VERIFY(sha_digest_len 
== SHA1_RESULTLEN
); 
3893          * Least significant 64 bits of the SHA1 hash 
3896         idsn
[7] = sha_digest
[12]; 
3897         idsn
[6] = sha_digest
[13]; 
3898         idsn
[5] = sha_digest
[14]; 
3899         idsn
[4] = sha_digest
[15]; 
3900         idsn
[3] = sha_digest
[16]; 
3901         idsn
[2] = sha_digest
[17]; 
3902         idsn
[1] = sha_digest
[18]; 
3903         idsn
[0] = sha_digest
[19]; 
3908 mptcp_init_authparms(struct mptcb 
*mp_tp
) 
3910         caddr_t local_digest 
= NULL
; 
3911         char remote_digest
[MPTCP_SHA1_RESULTLEN
]; 
3912         MPT_LOCK_ASSERT_HELD(mp_tp
); 
3914         /* Only Version 0 is supported for auth purposes */ 
3915         if (mp_tp
->mpt_version 
!= MP_DRAFT_VERSION_12
) 
3918         /* Setup local and remote tokens and Initial DSNs */ 
3919         local_digest 
= mptcp_get_stored_digest(mp_tp
->mpt_localkey
); 
3920         mptcp_generate_token(local_digest
, SHA1_RESULTLEN
, 
3921             (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof (mp_tp
->mpt_localtoken
)); 
3922         mptcp_generate_idsn(local_digest
, SHA1_RESULTLEN
, 
3923             (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof (u_int64_t
)); 
3925         if (!mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
, 
3927                 mptcplog((LOG_ERR
, "MPTCP ERROR %s: unexpected failure", 
3931         mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
, 
3932             (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof (mp_tp
->mpt_localtoken
)); 
3933         mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
, 
3934             (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof (u_int64_t
)); 
3939 mptcp_init_statevars(struct mptcb 
*mp_tp
) 
3941         MPT_LOCK_ASSERT_HELD(mp_tp
); 
3943         /* The subflow SYN is also first MPTCP byte */ 
3944         mp_tp
->mpt_snduna 
= mp_tp
->mpt_sndmax 
= mp_tp
->mpt_local_idsn 
+ 1; 
3945         mp_tp
->mpt_sndnxt 
= mp_tp
->mpt_snduna
; 
3947         mp_tp
->mpt_rcvatmark 
= mp_tp
->mpt_rcvnxt 
= mp_tp
->mpt_remote_idsn 
+ 1; 
3951 mptcp_conn_properties(struct mptcb 
*mp_tp
) 
3953         /* There is only Version 0 at this time */ 
3954         mp_tp
->mpt_version 
= MP_DRAFT_VERSION_12
; 
3956         /* Set DSS checksum flag */ 
3958                 mp_tp
->mpt_flags 
|= MPTCPF_CHECKSUM
; 
3960         /* Set up receive window */ 
3961         mp_tp
->mpt_rcvwnd 
= mptcp_sbspace(mp_tp
); 
3963         /* Set up gc ticks */ 
3964         mp_tp
->mpt_gc_ticks 
= MPT_GC_TICKS
; 
3971 mptcp_get_localtoken(void* mptcb_arg
) 
3973         struct mptcb 
*mp_tp 
= (struct mptcb 
*)mptcb_arg
; 
3974         return (mp_tp
->mpt_localtoken
); 
3978 mptcp_get_remotetoken(void* mptcb_arg
) 
3980         struct mptcb 
*mp_tp 
= (struct mptcb 
*)mptcb_arg
; 
3981         return (mp_tp
->mpt_remotetoken
); 
3985 mptcp_get_localkey(void* mptcb_arg
) 
3987         struct mptcb 
*mp_tp 
= (struct mptcb 
*)mptcb_arg
; 
3988         if (mp_tp
->mpt_localkey 
!= NULL
) 
3989                 return (*mp_tp
->mpt_localkey
); 
3995 mptcp_get_remotekey(void* mptcb_arg
) 
3997         struct mptcb 
*mp_tp 
= (struct mptcb 
*)mptcb_arg
; 
3998         return (mp_tp
->mpt_remotekey
); 
4002 mptcp_send_dfin(struct socket 
*so
) 
4004         struct tcpcb 
*tp 
= NULL
; 
4005         struct inpcb 
*inp 
= NULL
; 
4007         inp 
= sotoinpcb(so
); 
4011         tp 
= intotcpcb(inp
); 
4015         if (!(tp
->t_mpflags 
& TMPF_RESET
)) 
4016                 tp
->t_mpflags 
|= TMPF_SEND_DFIN
; 
4020  * Data Sequence Mapping routines 
4023 mptcp_insert_dsn(struct mppcb 
*mpp
, struct mbuf 
*m
) 
4025         struct mptcb 
*mp_tp
; 
4030         mp_tp 
= &((struct mpp_mtp 
*)mpp
)->mtcb
; 
4032         if (mp_tp
->mpt_state 
< MPTCPS_ESTABLISHED
) { 
4034                 panic("%s: data write before establishment.", 
4040                 VERIFY(m
->m_flags 
& M_PKTHDR
); 
4041                 m
->m_pkthdr
.pkt_flags 
|= (PKTF_MPTCP 
| PKTF_MPSO
); 
4042                 m
->m_pkthdr
.mp_dsn 
= mp_tp
->mpt_sndmax
; 
4043                 m
->m_pkthdr
.mp_rlen 
= m_pktlen(m
); 
4044                 mp_tp
->mpt_sndmax 
+= m_pktlen(m
); 
4051 mptcp_preproc_sbdrop(struct mbuf 
*m
, unsigned int len
) 
4053         u_int32_t sub_len 
= 0; 
4056                 VERIFY(m
->m_flags 
& M_PKTHDR
); 
4058                 if (m
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
) { 
4059                         sub_len 
= m
->m_pkthdr
.mp_rlen
; 
4061                         if (sub_len 
< len
) { 
4062                                 m
->m_pkthdr
.mp_dsn 
+= sub_len
; 
4063                                 if (!(m
->m_pkthdr
.pkt_flags 
& PKTF_MPSO
)) { 
4064                                         m
->m_pkthdr
.mp_rseq 
+= sub_len
; 
4066                                 m
->m_pkthdr
.mp_rlen 
= 0; 
4069                                 /* sub_len >= len */ 
4070                                 m
->m_pkthdr
.mp_dsn 
+= len
; 
4071                                 if (!(m
->m_pkthdr
.pkt_flags 
& PKTF_MPSO
)) { 
4072                                         m
->m_pkthdr
.mp_rseq 
+= len
; 
4074                                 mptcplog3((LOG_INFO
, 
4075                                     "%s: %llu %u %d %d\n", __func__
, 
4076                                     m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rseq
, 
4077                                     m
->m_pkthdr
.mp_rlen
, len
)); 
4078                                 m
->m_pkthdr
.mp_rlen 
-= len
; 
4082                         panic("%s: MPTCP tag not set", __func__
); 
4089 /* Obtain the DSN mapping stored in the mbuf */ 
4091 mptcp_output_getm_dsnmap32(struct socket 
*so
, int off
, uint32_t datalen
, 
4092     u_int32_t 
*dsn
, u_int32_t 
*relseq
, u_int16_t 
*data_len
, u_int64_t 
*dsn64p
) 
4096         mptcp_output_getm_dsnmap64(so
, off
, datalen
, &dsn64
, relseq
, data_len
); 
4097         *dsn 
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
); 
4102 mptcp_output_getm_dsnmap64(struct socket 
*so
, int off
, uint32_t datalen
, 
4103     u_int64_t 
*dsn
, u_int32_t 
*relseq
, u_int16_t 
*data_len
) 
4105         struct mbuf 
*m 
= so
->so_snd
.sb_mb
; 
4106         struct mbuf 
*mnext 
= NULL
; 
4107         uint32_t runlen 
= 0; 
4109         uint32_t contig_len 
= 0; 
4117          * In the subflow socket, the DSN sequencing can be discontiguous, 
4118          * but the subflow sequence mapping is contiguous. Use the subflow 
4119          * sequence property to find the right mbuf and corresponding dsn 
4124                 VERIFY(m
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
); 
4125                 VERIFY(m
->m_flags 
& M_PKTHDR
); 
4127                 if ((unsigned int)off 
>= m
->m_pkthdr
.mp_rlen
) { 
4128                         off 
-= m
->m_pkthdr
.mp_rlen
; 
4136                 panic("%s: bad offset", __func__
); 
4140         dsn64 
= m
->m_pkthdr
.mp_dsn 
+ off
; 
4142         *relseq 
= m
->m_pkthdr
.mp_rseq 
+ off
; 
4145          * Now find the last contiguous byte and its length from 
4148         runlen 
= m
->m_pkthdr
.mp_rlen 
- off
; 
4149         contig_len 
= runlen
; 
4151         /* If datalen does not span multiple mbufs, return */ 
4152         if (datalen 
<= runlen
) { 
4153                 *data_len 
= min(datalen
, UINT16_MAX
); 
4158         while (datalen 
> runlen
) { 
4159                 if (mnext 
== NULL
) { 
4160                         panic("%s: bad datalen = %d, %d %d", __func__
, datalen
, 
4164                 VERIFY(mnext
->m_flags 
& M_PKTHDR
); 
4165                 VERIFY(mnext
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
); 
4168                  * case A. contiguous DSN stream 
4169                  * case B. discontiguous DSN stream 
4171                 if (mnext
->m_pkthdr
.mp_dsn 
== (dsn64 
+ runlen
)) { 
4173                         runlen 
+= mnext
->m_pkthdr
.mp_rlen
; 
4174                         contig_len 
+= mnext
->m_pkthdr
.mp_rlen
; 
4175                         mptcplog3((LOG_INFO
, "%s: contig \n", 
4179                         mptcplog((LOG_INFO
, "%s: discontig %d %d \n", 
4180                             __func__
, datalen
, contig_len
)); 
4183                 mnext 
= mnext
->m_next
; 
4185         datalen 
= min(datalen
, UINT16_MAX
); 
4186         *data_len 
= min(datalen
, contig_len
); 
4187         mptcplog3((LOG_INFO
, "%s: %llu %u %d %d \n", __func__
, 
4188             *dsn
, *relseq
, *data_len
, off
)); 
4192  * MPTCP's notion of the next insequence Data Sequence number is adjusted 
4193  * here. It must be called from mptcp_adj_rmap() which is called only after 
4194  * reassembly of out of order data. The rcvnxt variable must 
4195  * be updated only when atleast some insequence new data is received. 
4198 mptcp_adj_rcvnxt(struct tcpcb 
*tp
, struct mbuf 
*m
) 
4200         struct mptcb 
*mp_tp 
= tptomptp(tp
); 
4205         if ((MPTCP_SEQ_GEQ(mp_tp
->mpt_rcvnxt
, m
->m_pkthdr
.mp_dsn
)) && 
4206             (MPTCP_SEQ_LEQ(mp_tp
->mpt_rcvnxt
, (m
->m_pkthdr
.mp_dsn 
+ 
4207             m
->m_pkthdr
.mp_rlen
)))) { 
4208                 mp_tp
->mpt_rcvnxt 
= m
->m_pkthdr
.mp_dsn 
+ m
->m_pkthdr
.mp_rlen
; 
4214  * Note that this is called only from tcp_input() which may trim data 
4215  * after the dsn mapping is inserted into the mbuf. When it trims data 
4216  * tcp_input calls m_adj() which does not remove the m_pkthdr even if the 
4217  * m_len becomes 0 as a result of trimming the mbuf. The dsn map insertion 
4218  * cannot be delayed after trim, because data can be in the reassembly 
4219  * queue for a while and the DSN option info in tp will be overwritten for 
4220  * every new packet received. 
4221  * The dsn map will be adjusted just prior to appending to subflow sockbuf 
4222  * with mptcp_adj_rmap() 
4225 mptcp_insert_rmap(struct tcpcb 
*tp
, struct mbuf 
*m
) 
4227         VERIFY(!(m
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
)); 
4229         if (tp
->t_mpflags 
& TMPF_EMBED_DSN
) { 
4230                 VERIFY(m
->m_flags 
& M_PKTHDR
); 
4231                 m
->m_pkthdr
.mp_dsn 
= tp
->t_rcv_map
.mpt_dsn
; 
4232                 m
->m_pkthdr
.mp_rseq 
= tp
->t_rcv_map
.mpt_sseq
; 
4233                 m
->m_pkthdr
.mp_rlen 
= tp
->t_rcv_map
.mpt_len
; 
4234                 m
->m_pkthdr
.pkt_flags 
|= PKTF_MPTCP
; 
4235                 tp
->t_mpflags 
&= ~TMPF_EMBED_DSN
; 
4236                 tp
->t_mpflags 
|= TMPF_MPTCP_ACKNOW
; 
4241 mptcp_adj_rmap(struct socket 
*so
, struct mbuf 
*m
) 
4244         u_int32_t sseq
, datalen
; 
4245         struct tcpcb 
*tp 
= intotcpcb(sotoinpcb(so
)); 
4246         u_int32_t old_rcvnxt 
= 0; 
4248         if (m_pktlen(m
) == 0) 
4251         if (m
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
) { 
4252                 VERIFY(m
->m_flags 
& M_PKTHDR
); 
4254                 dsn 
= m
->m_pkthdr
.mp_dsn
; 
4255                 sseq 
= m
->m_pkthdr
.mp_rseq 
+ tp
->irs
; 
4256                 datalen 
= m
->m_pkthdr
.mp_rlen
; 
4258                 /* data arrived without an DSS option mapping */ 
4259                 mptcp_notify_mpfail(so
); 
4263         /* In the common case, data is in window and in sequence */ 
4264         if (m
->m_pkthdr
.len 
== (int)datalen
) { 
4265                 mptcp_adj_rcvnxt(tp
, m
); 
4269         if (m
->m_pkthdr
.len 
> (int)datalen
) { 
4270                 panic("%s: mbuf len = %d expected = %d", __func__
, 
4271                     m
->m_pkthdr
.len
, datalen
); 
4274         old_rcvnxt 
= tp
->rcv_nxt 
- m
->m_pkthdr
.len
; 
4275         if (SEQ_GT(old_rcvnxt
, sseq
)) { 
4276                 /* data trimmed from the left */ 
4277                 int off 
= old_rcvnxt 
- sseq
; 
4278                 m
->m_pkthdr
.mp_dsn 
+= off
; 
4279                 m
->m_pkthdr
.mp_rseq 
+= off
; 
4280                 m
->m_pkthdr
.mp_rlen 
-= off
; 
4281         } else if (old_rcvnxt 
== sseq
) { 
4283                  * Data was trimmed from the right 
4285                 m
->m_pkthdr
.mp_rlen 
= m
->m_pkthdr
.len
; 
4287                 /* XXX handle gracefully with reass or fallback in January */ 
4288                 panic("%s: partial map %u %u", __func__
, old_rcvnxt
, sseq
); 
4291         mptcp_adj_rcvnxt(tp
, m
); 
4296  * Following routines help with failure detection and failover of data 
4297  * transfer from one subflow to another. 
4300 mptcp_act_on_txfail(struct socket 
*so
) 
4302         struct tcpcb 
*tp 
= NULL
; 
4303         struct inpcb 
*inp 
= sotoinpcb(so
); 
4308         tp 
= intotcpcb(inp
); 
4312         if (tp
->t_state 
!= TCPS_ESTABLISHED
) 
4313                 mptcplog((LOG_INFO
, "%s: state = %d \n", __func__
, 
4316         if (so
->so_flags 
& SOF_MP_TRYFAILOVER
) { 
4320         so
->so_flags 
|= SOF_MP_TRYFAILOVER
; 
4321         soevent(so
, (SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_MPFAILOVER
)); 
4325  * Support for MP_FAIL option 
4328 mptcp_get_map_for_dsn(struct socket 
*so
, u_int64_t dsn_fail
, u_int32_t 
*tcp_seq
) 
4330         struct mbuf 
*m 
= so
->so_snd
.sb_mb
; 
4339                 VERIFY(m
->m_pkthdr
.pkt_flags 
& PKTF_MPTCP
); 
4340                 VERIFY(m
->m_flags 
& M_PKTHDR
); 
4341                 dsn 
= m
->m_pkthdr
.mp_dsn
; 
4342                 datalen 
= m
->m_pkthdr
.mp_rlen
; 
4343                 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) && 
4344                     (MPTCP_SEQ_GEQ(dsn 
+ datalen
, dsn_fail
))) { 
4345                         off 
= dsn_fail 
- dsn
; 
4346                         *tcp_seq 
= m
->m_pkthdr
.mp_rseq 
+ off
; 
4354          * If there was no mbuf data and a fallback to TCP occurred, there's 
4355          * not much else to do. 
4358         mptcplog((LOG_ERR
, "%s: %llu not found \n", __func__
, dsn_fail
)); 
4363  * Support for sending contiguous MPTCP bytes in subflow 
4366 mptcp_adj_sendlen(struct socket 
*so
, int32_t off
, int32_t len
) 
4368         u_int64_t       mdss_dsn 
= 0; 
4369         u_int32_t       mdss_subflow_seq 
= 0; 
4370         u_int16_t       mdss_data_len 
= 0; 
4375         mptcp_output_getm_dsnmap64(so
, off
, (u_int32_t
)len
, 
4376             &mdss_dsn
, &mdss_subflow_seq
, &mdss_data_len
); 
4378         return (mdss_data_len
); 
4382 mptcp_sbspace(struct mptcb 
*mpt
) 
4388         MPT_LOCK_ASSERT_HELD(mpt
); 
4389         MPTE_LOCK_ASSERT_HELD(mpt
->mpt_mpte
); 
4391         sb 
= &mpt
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
; 
4392         rcvbuf 
= sb
->sb_hiwat
; 
4393         space 
= ((int32_t)imin((rcvbuf 
- sb
->sb_cc
), 
4394             (sb
->sb_mbmax 
- sb
->sb_mbcnt
))); 
4397         /* XXX check if it's too small? */ 
4403  * Support Fallback to Regular TCP 
4406 mptcp_notify_mpready(struct socket 
*so
) 
4408         struct tcpcb 
*tp 
= NULL
; 
4413         tp 
= intotcpcb(sotoinpcb(so
)); 
4418         DTRACE_MPTCP4(multipath__ready
, struct socket 
*, so
, 
4419             struct sockbuf 
*, &so
->so_rcv
, struct sockbuf 
*, &so
->so_snd
, 
4420             struct tcpcb 
*, tp
); 
4422         if (!(tp
->t_mpflags 
& TMPF_MPTCP_TRUE
)) 
4425         if (tp
->t_mpflags 
& TMPF_MPTCP_READY
) 
4428         tp
->t_mpflags 
&= ~TMPF_TCP_FALLBACK
; 
4429         tp
->t_mpflags 
|= TMPF_MPTCP_READY
; 
4431         soevent(so
, (SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_MPSTATUS
)); 
4435 mptcp_notify_mpfail(struct socket 
*so
) 
4437         struct tcpcb 
*tp 
= NULL
; 
4442         tp 
= intotcpcb(sotoinpcb(so
)); 
4447         DTRACE_MPTCP4(multipath__failed
, struct socket 
*, so
, 
4448             struct sockbuf 
*, &so
->so_rcv
, struct sockbuf 
*, &so
->so_snd
, 
4449             struct tcpcb 
*, tp
); 
4451         if (tp
->t_mpflags 
& TMPF_TCP_FALLBACK
) 
4454         tp
->t_mpflags 
&= ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
); 
4455         tp
->t_mpflags 
|= TMPF_TCP_FALLBACK
; 
4457         soevent(so
, (SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_MPSTATUS
)); 
4461  * Keepalive helper function 
4464 mptcp_ok_to_keepalive(struct mptcb 
*mp_tp
) 
4467         VERIFY(mp_tp 
!= NULL
); 
4469         if (mp_tp
->mpt_state 
>= MPTCPS_CLOSE_WAIT
) { 
4477  * MPTCP t_maxseg adjustment function 
4480 mptcp_adj_mss(struct tcpcb 
*tp
, boolean_t mtudisc
) 
4483         struct mptcb 
*mp_tp 
= tptomptp(tp
); 
4485 #define MPTCP_COMPUTE_LEN {                             \ 
4486         mss_lower = sizeof (struct mptcp_dss_ack_opt);  \ 
4488         if (mp_tp->mpt_flags & MPTCPF_CHECKSUM)         \ 
4491                 /* adjust to 32-bit boundary + EOL */   \ 
4493         MPT_UNLOCK(mp_tp);                              \ 
4499          * For the first subflow and subsequent subflows, adjust mss for 
4500          * most common MPTCP option size, for case where tcp_mss is called 
4501          * during option processing and MTU discovery. 
4503         if ((tp
->t_mpflags 
& TMPF_PREESTABLISHED
) && 
4504             (!(tp
->t_mpflags 
& TMPF_JOINED_FLOW
))) { 
4508         if ((tp
->t_mpflags 
& TMPF_PREESTABLISHED
) && 
4509             (tp
->t_mpflags 
& TMPF_SENT_JOIN
)) { 
4513         if ((mtudisc
) && (tp
->t_mpflags 
& TMPF_MPTCP_TRUE
)) { 
4521  * Update the pid, upid, uuid of the subflow so, based on parent so 
4524 mptcp_update_last_owner(struct mptsub 
*mpts
, struct socket 
*parent_mpso
) 
4526         struct socket 
*subflow_so 
= mpts
->mpts_socket
; 
4528         MPTS_LOCK_ASSERT_HELD(mpts
); 
4530         socket_lock(subflow_so
, 0); 
4531         if ((subflow_so
->last_pid 
!= parent_mpso
->last_pid
) || 
4532                 (subflow_so
->last_upid 
!= parent_mpso
->last_upid
)) { 
4533                 subflow_so
->last_upid 
= parent_mpso
->last_upid
; 
4534                 subflow_so
->last_pid 
= parent_mpso
->last_pid
; 
4535                 uuid_copy(subflow_so
->last_uuid
, parent_mpso
->last_uuid
); 
4537         so_update_policy(subflow_so
); 
4538         socket_unlock(subflow_so
, 0); 
4542 fill_mptcp_subflow(struct socket 
*so
, mptcp_flow_t 
*flow
, struct mptsub 
*mpts
) 
4546         tcp_getconninfo(so
, &flow
->flow_ci
); 
4547         inp 
= sotoinpcb(so
); 
4549         if ((inp
->inp_vflag 
& INP_IPV6
) != 0) { 
4550                 flow
->flow_src
.ss_family 
= AF_INET6
; 
4551                 flow
->flow_dst
.ss_family 
= AF_INET6
; 
4552                 flow
->flow_src
.ss_len 
= sizeof(struct sockaddr_in6
); 
4553                 flow
->flow_dst
.ss_len 
= sizeof(struct sockaddr_in6
); 
4554                 SIN6(&flow
->flow_src
)->sin6_port 
= inp
->in6p_lport
; 
4555                 SIN6(&flow
->flow_dst
)->sin6_port 
= inp
->in6p_fport
; 
4556                 SIN6(&flow
->flow_src
)->sin6_addr 
= inp
->in6p_laddr
; 
4557                 SIN6(&flow
->flow_dst
)->sin6_addr 
= inp
->in6p_faddr
; 
4561                 flow
->flow_src
.ss_family 
= AF_INET
; 
4562                 flow
->flow_dst
.ss_family 
= AF_INET
; 
4563                 flow
->flow_src
.ss_len 
= sizeof(struct sockaddr_in
); 
4564                 flow
->flow_dst
.ss_len 
= sizeof(struct sockaddr_in
); 
4565                 SIN(&flow
->flow_src
)->sin_port 
= inp
->inp_lport
; 
4566                 SIN(&flow
->flow_dst
)->sin_port 
= inp
->inp_fport
; 
4567                 SIN(&flow
->flow_src
)->sin_addr 
= inp
->inp_laddr
; 
4568                 SIN(&flow
->flow_dst
)->sin_addr 
= inp
->inp_faddr
; 
4570         flow
->flow_flags 
= mpts
->mpts_flags
; 
4571         flow
->flow_cid 
= mpts
->mpts_connid
; 
4575 mptcp_pcblist SYSCTL_HANDLER_ARGS
 
4577 #pragma unused(oidp, arg1, arg2) 
4581         struct mptses 
*mpte
; 
4582         struct mptcb 
*mp_tp
; 
4583         struct mptsub 
*mpts
; 
4585         conninfo_mptcp_t mptcpci
; 
4586         mptcp_flow_t 
*flows
; 
4588         if (req
->newptr 
!= USER_ADDR_NULL
) 
4591         lck_mtx_lock(&mtcbinfo
.mppi_lock
); 
4592         n 
= mtcbinfo
.mppi_count
; 
4593         if (req
->oldptr 
== USER_ADDR_NULL
) { 
4594                 lck_mtx_unlock(&mtcbinfo
.mppi_lock
); 
4595                 req
->oldidx 
= (n 
+ n
/8) * sizeof(conninfo_mptcp_t
) +  
4596                     4 * (n 
+ n
/8)  * sizeof(mptcp_flow_t
); 
4599         TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) { 
4600                 bzero(&mptcpci
, sizeof(mptcpci
)); 
4601                 lck_mtx_lock(&mpp
->mpp_lock
); 
4602                 VERIFY(mpp
->mpp_flags 
& MPP_ATTACHED
); 
4603                 mpte 
= mptompte(mpp
); 
4604                 VERIFY(mpte 
!= NULL
); 
4605                 mp_tp 
= mpte
->mpte_mptcb
; 
4606                 VERIFY(mp_tp 
!= NULL
); 
4607                 len 
= sizeof(*flows
) * mpte
->mpte_numflows
; 
4608                 flows 
= _MALLOC(len
, M_TEMP
, M_WAITOK 
| M_ZERO
); 
4609                 if (flows 
== NULL
) { 
4610                         lck_mtx_unlock(&mpp
->mpp_lock
); 
4613                 /* N.B. we don't take the mpt_lock just for the state. */ 
4614                 mptcpci
.mptcpci_state 
= mp_tp
->mpt_state
; 
4615                 mptcpci
.mptcpci_nflows 
= mpte
->mpte_numflows
; 
4616                 mptcpci
.mptcpci_len 
= sizeof(mptcpci
) + 
4617                     sizeof(*flows
) * (mptcpci
.mptcpci_nflows 
- 1); 
4618                 error 
= SYSCTL_OUT(req
, &mptcpci
,  
4619                     sizeof(mptcpci
) - sizeof(*flows
)); 
4621                         lck_mtx_unlock(&mpp
->mpp_lock
); 
4622                         FREE(flows
, M_TEMP
); 
4626                 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) { 
4628                         so 
= mpts
->mpts_socket
; 
4630                         fill_mptcp_subflow(so
, &flows
[f
], mpts
); 
4631                         socket_unlock(so
, 0); 
4635                 lck_mtx_unlock(&mpp
->mpp_lock
); 
4636                 error 
= SYSCTL_OUT(req
, flows
, len
); 
4637                 FREE(flows
, M_TEMP
); 
4641         lck_mtx_unlock(&mtcbinfo
.mppi_lock
); 
4646 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD 
| CTLFLAG_LOCKED
, 
4647     0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",  
4648     "List of active MPTCP connections");