2 * Copyright (c) 2012-2014 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
34 #include <sys/mcache.h>
35 #include <sys/resourcevar.h>
36 #include <sys/socket.h>
37 #include <sys/socketvar.h>
38 #include <sys/syslog.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/sysctl.h>
43 #include <kern/zalloc.h>
44 #include <kern/locks.h>
46 #include <mach/thread_act.h>
50 #include <netinet/in.h>
51 #include <netinet/in_pcb.h>
52 #include <netinet/in_var.h>
53 #include <netinet/tcp.h>
54 #include <netinet/tcp_fsm.h>
55 #include <netinet/tcp_seq.h>
56 #include <netinet/tcp_var.h>
57 #include <netinet/mptcp_var.h>
58 #include <netinet/mptcp.h>
59 #include <netinet/mptcp_seq.h>
60 #include <netinet/mptcp_timer.h>
61 #include <libkern/crypto/sha1.h>
63 #include <netinet6/in6_pcb.h>
64 #include <netinet6/ip6protosw.h>
66 #include <dev/random/randomdev.h>
69 * Notes on MPTCP implementation.
71 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
72 * communication domain. The structure mtcbinfo describes the MPTCP instance
73 * of a Multipath protocol in that domain. It is used to keep track of all
74 * MPTCP PCB instances in the system, and is protected by the global lock
77 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
78 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
79 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
80 * allocated from the same memory block, and each structure has a pointer
81 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
82 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
83 * PCB (mppcb) as well as the MPTCP Session (mptses).
85 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
86 * in particular, the list of subflows as well as the MPTCP thread.
88 * A functioning MPTCP Session consists of one or more subflow sockets. Each
89 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
90 * represented by the mptsub structure. Because each subflow requires access
91 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
92 * subflow. This gets decremented prior to the subflow's destruction. The
93 * subflow lock (mpts_lock) is used to protect accesses to the subflow.
95 * To handle events (read, write, control) from the subflows, an MPTCP thread
96 * is created; currently, there is one thread per MPTCP Session. In order to
97 * prevent the MPTCP socket from being destroyed while being accessed by the
98 * MPTCP thread, we bump up the MPTCP socket's so_usecount for the thread,
99 * which will be decremented prior to the thread's termination. The thread
100 * lock (mpte_thread_lock) is used to synchronize its signalling.
102 * Lock ordering is defined as follows:
104 * mtcbinfo (mppi_lock)
110 * It is not a requirement that all of the above locks need to be acquired
111 * in succession, but the correct lock ordering must be followed when there
112 * are more than one locks that need to be held. The MPTCP thread lock is
113 * is not constrained by this arrangement, because none of the other locks
114 * is ever acquired while holding mpte_thread_lock; therefore it may be called
115 * at any moment to signal the thread.
117 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
118 * work is done by the MPTCP garbage collector which is invoked on demand by
119 * the PF_MULTIPATH garbage collector. This process will take place once all
120 * of the subflows have been destroyed, and the MPTCP thread be instructed to
124 static void mptcp_sesdestroy(struct mptses
*);
125 static void mptcp_thread_signal_locked(struct mptses
*);
126 static void mptcp_thread_terminate_signal(struct mptses
*);
127 static void mptcp_thread_dowork(struct mptses
*);
128 static void mptcp_thread_func(void *, wait_result_t
);
129 static void mptcp_thread_destroy(struct mptses
*);
130 static void mptcp_key_pool_init(void);
131 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
132 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
133 static void mptcp_conn_properties(struct mptcb
*);
134 static void mptcp_init_statevars(struct mptcb
*);
136 static uint32_t mptcp_gc(struct mppcbinfo
*);
137 static int mptcp_subflow_socreate(struct mptses
*, struct mptsub
*,
138 int, struct proc
*, struct socket
**);
139 static int mptcp_subflow_soclose(struct mptsub
*, struct socket
*);
140 static int mptcp_subflow_soconnectx(struct mptses
*, struct mptsub
*);
141 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
142 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
143 static void mptcp_subflow_rupcall(struct socket
*, void *, int);
144 static void mptcp_subflow_input(struct mptses
*, struct mptsub
*);
145 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
146 static void mptcp_subflow_eupcall(struct socket
*, void *, uint32_t);
147 static void mptcp_update_last_owner(struct mptsub
*, struct socket
*);
148 static void mptcp_output_needed(struct mptses
*mpte
, struct mptsub
*to_mpts
);
151 * Possible return values for subflow event handlers. Note that success
152 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
153 * indicate errors or actions which require immediate attention; they will
154 * prevent the rest of the handlers from processing their respective events
155 * until the next round of events processing.
158 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
159 MPTS_EVRET_OK
= 2, /* OK */
160 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
161 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
162 MPTS_EVRET_OK_UPDATE
= 5, /* OK with conninfo update */
165 static ev_ret_t
mptcp_subflow_events(struct mptses
*, struct mptsub
*);
166 static ev_ret_t
mptcp_subflow_connreset_ev(struct mptses
*, struct mptsub
*);
167 static ev_ret_t
mptcp_subflow_cantrcvmore_ev(struct mptses
*, struct mptsub
*);
168 static ev_ret_t
mptcp_subflow_cantsendmore_ev(struct mptses
*, struct mptsub
*);
169 static ev_ret_t
mptcp_subflow_timeout_ev(struct mptses
*, struct mptsub
*);
170 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*);
171 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*);
172 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*);
173 static ev_ret_t
mptcp_subflow_suspend_ev(struct mptses
*, struct mptsub
*);
174 static ev_ret_t
mptcp_subflow_resume_ev(struct mptses
*, struct mptsub
*);
175 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*);
176 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*);
177 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*);
178 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*);
179 static ev_ret_t
mptcp_fastjoin_ev(struct mptses
*, struct mptsub
*);
180 static ev_ret_t
mptcp_deleteok_ev(struct mptses
*, struct mptsub
*);
181 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*);
183 static const char *mptcp_evret2str(ev_ret_t
);
185 static mptcp_key_t
*mptcp_reserve_key(void);
186 static int mptcp_do_sha1(mptcp_key_t
*, char *, int);
187 static int mptcp_init_authparms(struct mptcb
*);
189 static unsigned int mptsub_zone_size
; /* size of mptsub */
190 static struct zone
*mptsub_zone
; /* zone for mptsub */
192 static unsigned int mptopt_zone_size
; /* size of mptopt */
193 static struct zone
*mptopt_zone
; /* zone for mptopt */
195 static unsigned int mpt_subauth_entry_size
; /* size of subf auth entry */
196 static struct zone
*mpt_subauth_zone
; /* zone of subf auth entry */
198 struct mppcbinfo mtcbinfo
;
200 static struct mptcp_keys_pool_head mptcp_keys_pool
;
202 #define MPTCP_SUBFLOW_WRITELEN (8 * 1024) /* bytes to write each time */
203 #define MPTCP_SUBFLOW_READLEN (8 * 1024) /* bytes to read each time */
205 SYSCTL_DECL(_net_inet
);
207 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "MPTCP");
209 uint32_t mptcp_verbose
= 0; /* more noise if greater than 1 */
210 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, verbose
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
211 &mptcp_verbose
, 0, "MPTCP verbosity level");
213 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
214 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
217 * Since there is one kernel thread per mptcp socket, imposing an artificial
218 * limit on number of allowed mptcp sockets.
220 uint32_t mptcp_socket_limit
= MPPCB_LIMIT
;
221 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, sk_lim
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
222 &mptcp_socket_limit
, 0, "MPTCP socket limit");
225 * SYSCTL to turn on delayed cellular subflow start.
227 uint32_t mptcp_delayed_subf_start
= 0;
228 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, delayed
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
229 &mptcp_delayed_subf_start
, 0, "MPTCP Delayed Subflow start");
232 * SYSCTL for RTT spike measurement threshold in msecs.
234 int32_t mptcp_rto_spike_thresh
= 3000;
235 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, rto_spikethresh
,
236 CTLFLAG_RW
|CTLFLAG_LOCKED
, &mptcp_rto_spike_thresh
, 0,
237 "MPTCP RTT spike thresh");
239 static struct protosw mptcp_subflow_protosw
;
240 static struct pr_usrreqs mptcp_subflow_usrreqs
;
242 static struct ip6protosw mptcp_subflow_protosw6
;
243 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
247 * Protocol pr_init callback.
250 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
253 static int mptcp_initialized
= 0;
256 struct ip6protosw
*prp6
;
259 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
|PR_ATTACHED
)) == PR_ATTACHED
);
261 /* do this only once */
262 if (mptcp_initialized
)
264 mptcp_initialized
= 1;
267 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
268 * we must be able to find IPPROTO_TCP entries for both.
270 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
272 bcopy(prp
, &mptcp_subflow_protosw
, sizeof (*prp
));
273 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
274 sizeof (mptcp_subflow_usrreqs
));
275 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
276 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
277 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
278 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
279 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
281 * Socket filters shouldn't attach/detach to/from this protosw
282 * since pr_protosw is to be used instead, which points to the
283 * real protocol; if they do, it is a bug and we should panic.
285 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
286 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
287 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
288 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
291 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
292 IPPROTO_TCP
, SOCK_STREAM
);
293 VERIFY(prp6
!= NULL
);
294 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof (*prp6
));
295 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
296 sizeof (mptcp_subflow_usrreqs6
));
297 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
298 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
299 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
300 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
301 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
303 * Socket filters shouldn't attach/detach to/from this protosw
304 * since pr_protosw is to be used instead, which points to the
305 * real protocol; if they do, it is a bug and we should panic.
307 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
308 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
309 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
310 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
313 bzero(&mtcbinfo
, sizeof (mtcbinfo
));
314 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
315 mtcbinfo
.mppi_size
= sizeof (struct mpp_mtp
);
316 if ((mtcbinfo
.mppi_zone
= zinit(mtcbinfo
.mppi_size
,
317 1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) {
318 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
);
321 zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
);
322 zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
);
324 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
325 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
326 mtcbinfo
.mppi_lock_grp_attr
);
327 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
328 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
329 mtcbinfo
.mppi_lock_attr
);
330 mtcbinfo
.mppi_gc
= mptcp_gc
;
332 mtcbinfo
.mppi_timer
= mptcp_timer
;
334 /* attach to MP domain for garbage collection to take place */
335 mp_pcbinfo_attach(&mtcbinfo
);
337 mptsub_zone_size
= sizeof (struct mptsub
);
338 if ((mptsub_zone
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
,
339 8192, "mptsub")) == NULL
) {
340 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
);
343 zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
);
344 zone_change(mptsub_zone
, Z_EXPAND
, TRUE
);
346 mptopt_zone_size
= sizeof (struct mptopt
);
347 if ((mptopt_zone
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
,
348 1024, "mptopt")) == NULL
) {
349 panic("%s: unable to allocate MPTCP option zone\n", __func__
);
352 zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
);
353 zone_change(mptopt_zone
, Z_EXPAND
, TRUE
);
355 mpt_subauth_entry_size
= sizeof (struct mptcp_subf_auth_entry
);
356 if ((mpt_subauth_zone
= zinit(mpt_subauth_entry_size
,
357 1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) {
358 panic("%s: unable to allocate MPTCP address auth zone \n",
362 zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
);
363 zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
);
365 /* Set up a list of unique keys */
366 mptcp_key_pool_init();
371 * Create an MPTCP session, called as a result of opening a MPTCP socket.
374 mptcp_sescreate(struct socket
*mp_so
, struct mppcb
*mpp
)
376 struct mppcbinfo
*mppi
;
382 mppi
= mpp
->mpp_pcbinfo
;
383 VERIFY(mppi
!= NULL
);
385 mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
;
386 mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
;
388 /* MPTCP Multipath PCB Extension */
389 bzero(mpte
, sizeof (*mpte
));
390 VERIFY(mpp
->mpp_pcbe
== NULL
);
391 mpp
->mpp_pcbe
= mpte
;
392 mpte
->mpte_mppcb
= mpp
;
393 mpte
->mpte_mptcb
= mp_tp
;
395 TAILQ_INIT(&mpte
->mpte_sopts
);
396 TAILQ_INIT(&mpte
->mpte_subflows
);
397 mpte
->mpte_associd
= ASSOCID_ANY
;
398 mpte
->mpte_connid_last
= CONNID_ANY
;
400 lck_mtx_init(&mpte
->mpte_thread_lock
, mppi
->mppi_lock_grp
,
401 mppi
->mppi_lock_attr
);
406 * This can be rather expensive if we have lots of MPTCP sockets,
407 * but we need a kernel thread for this model to work. Perhaps we
408 * could amortize the costs by having one worker thread per a group
411 if (kernel_thread_start(mptcp_thread_func
, mpte
,
412 &mpte
->mpte_thread
) != KERN_SUCCESS
) {
416 mp_so
->so_usecount
++; /* for thread */
418 /* MPTCP Protocol Control Block */
419 bzero(mp_tp
, sizeof (*mp_tp
));
420 lck_mtx_init(&mp_tp
->mpt_lock
, mppi
->mppi_lock_grp
,
421 mppi
->mppi_lock_attr
);
422 mp_tp
->mpt_mpte
= mpte
;
426 lck_mtx_destroy(&mpte
->mpte_thread_lock
, mppi
->mppi_lock_grp
);
427 DTRACE_MPTCP5(session__create
, struct socket
*, mp_so
,
428 struct sockbuf
*, &mp_so
->so_rcv
,
429 struct sockbuf
*, &mp_so
->so_snd
,
430 struct mppcb
*, mpp
, int, error
);
432 return ((error
!= 0) ? NULL
: mpte
);
436 * Destroy an MPTCP session.
439 mptcp_sesdestroy(struct mptses
*mpte
)
443 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
445 mp_tp
= mpte
->mpte_mptcb
;
446 VERIFY(mp_tp
!= NULL
);
449 * MPTCP Multipath PCB Extension section
451 mptcp_flush_sopts(mpte
);
452 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
454 lck_mtx_destroy(&mpte
->mpte_thread_lock
,
455 mpte
->mpte_mppcb
->mpp_pcbinfo
->mppi_lock_grp
);
458 * MPTCP Protocol Control Block section
460 lck_mtx_destroy(&mp_tp
->mpt_lock
,
461 mpte
->mpte_mppcb
->mpp_pcbinfo
->mppi_lock_grp
);
463 DTRACE_MPTCP2(session__destroy
, struct mptses
*, mpte
,
464 struct mptcb
*, mp_tp
);
468 * Allocate an MPTCP socket option structure.
471 mptcp_sopt_alloc(int how
)
475 mpo
= (how
== M_WAITOK
) ? zalloc(mptopt_zone
) :
476 zalloc_noblock(mptopt_zone
);
478 bzero(mpo
, mptopt_zone_size
);
485 * Free an MPTCP socket option structure.
488 mptcp_sopt_free(struct mptopt
*mpo
)
490 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
492 zfree(mptopt_zone
, mpo
);
496 * Add a socket option to the MPTCP socket option list.
499 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
501 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
502 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
503 mpo
->mpo_flags
|= MPOF_ATTACHED
;
504 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
508 * Remove a socket option from the MPTCP socket option list.
511 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
513 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
514 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
515 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
516 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
520 * Search for an existing <sopt_level,sopt_name> socket option.
523 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
527 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
529 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
530 if (mpo
->mpo_level
== sopt
->sopt_level
&&
531 mpo
->mpo_name
== sopt
->sopt_name
)
534 VERIFY(mpo
== NULL
|| sopt
->sopt_valsize
== sizeof (int));
540 * Flushes all recorded socket options from an MP socket.
543 mptcp_flush_sopts(struct mptses
*mpte
)
545 struct mptopt
*mpo
, *tmpo
;
547 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
549 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
550 mptcp_sopt_remove(mpte
, mpo
);
551 mptcp_sopt_free(mpo
);
553 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
557 * Allocate a MPTCP subflow structure.
560 mptcp_subflow_alloc(int how
)
564 mpts
= (how
== M_WAITOK
) ? zalloc(mptsub_zone
) :
565 zalloc_noblock(mptsub_zone
);
567 bzero(mpts
, mptsub_zone_size
);
568 lck_mtx_init(&mpts
->mpts_lock
, mtcbinfo
.mppi_lock_grp
,
569 mtcbinfo
.mppi_lock_attr
);
576 * Deallocate a subflow structure, called when all of the references held
577 * on it have been released. This implies that the subflow has been deleted.
580 mptcp_subflow_free(struct mptsub
*mpts
)
582 MPTS_LOCK_ASSERT_HELD(mpts
);
584 VERIFY(mpts
->mpts_refcnt
== 0);
585 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
586 VERIFY(mpts
->mpts_mpte
== NULL
);
587 VERIFY(mpts
->mpts_socket
== NULL
);
589 if (mpts
->mpts_src_sl
!= NULL
) {
590 sockaddrlist_free(mpts
->mpts_src_sl
);
591 mpts
->mpts_src_sl
= NULL
;
593 if (mpts
->mpts_dst_sl
!= NULL
) {
594 sockaddrlist_free(mpts
->mpts_dst_sl
);
595 mpts
->mpts_dst_sl
= NULL
;
598 lck_mtx_destroy(&mpts
->mpts_lock
, mtcbinfo
.mppi_lock_grp
);
600 zfree(mptsub_zone
, mpts
);
604 * Create an MPTCP subflow socket.
607 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
608 struct proc
*p
, struct socket
**so
)
610 struct mptopt smpo
, *mpo
, *tmpo
;
611 struct socket
*mp_so
;
615 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
616 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
619 * Create the subflow socket (multipath subflow, non-blocking.)
621 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
622 * socket; it will be cleared when the socket is peeled off or closed.
623 * It also indicates to the underlying TCP to handle MPTCP options.
624 * A multipath subflow socket implies SS_NOFDREF state.
626 if ((error
= socreate_internal(dom
, so
, SOCK_STREAM
,
627 IPPROTO_TCP
, p
, SOCF_ASYNC
| SOCF_MP_SUBFLOW
, PROC_NULL
)) != 0) {
628 mptcplog((LOG_ERR
, "MPTCP ERROR %s: mp_so 0x%llx unable to "
629 "create subflow socket error %d\n", __func__
,
630 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), error
));
635 VERIFY((*so
)->so_flags
& SOF_MP_SUBFLOW
);
636 VERIFY(((*so
)->so_state
& (SS_NBIO
|SS_NOFDREF
)) ==
637 (SS_NBIO
|SS_NOFDREF
));
639 /* prevent the socket buffers from being compressed */
640 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
641 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
643 bzero(&smpo
, sizeof (smpo
));
644 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
645 smpo
.mpo_level
= SOL_SOCKET
;
648 /* disable SIGPIPE */
649 smpo
.mpo_name
= SO_NOSIGPIPE
;
650 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
653 /* find out if the subflow's source address goes away */
654 smpo
.mpo_name
= SO_NOADDRERR
;
655 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
658 /* enable keepalive */
659 smpo
.mpo_name
= SO_KEEPALIVE
;
660 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
664 * Limit the receive socket buffer size to 64k.
666 * We need to take into consideration the window scale option
667 * which could be negotiated in one subflow but disabled in
669 * XXX This can be improved in the future.
671 smpo
.mpo_name
= SO_RCVBUF
;
672 smpo
.mpo_intval
= MPTCP_RWIN_MAX
;
673 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
676 /* N.B.: set by sosetopt */
677 VERIFY(!((*so
)->so_rcv
.sb_flags
& SB_AUTOSIZE
));
678 /* Prevent automatic socket buffer sizing. */
679 (*so
)->so_snd
.sb_flags
&= ~SB_AUTOSIZE
;
681 smpo
.mpo_level
= IPPROTO_TCP
;
682 smpo
.mpo_intval
= mptcp_subflow_keeptime
;
683 smpo
.mpo_name
= TCP_KEEPALIVE
;
684 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
687 /* replay setsockopt(2) on the subflow sockets for eligible options */
688 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
691 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
))
695 * Skip those that are handled internally; these options
696 * should not have been recorded and marked with the
697 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
699 if (mpo
->mpo_level
== SOL_SOCKET
&&
700 (mpo
->mpo_name
== SO_NOSIGPIPE
||
701 mpo
->mpo_name
== SO_NOADDRERR
||
702 mpo
->mpo_name
== SO_KEEPALIVE
))
705 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
706 if (mptcp_subflow_sosetopt(mpte
, *so
, mpo
) != 0 && interim
) {
708 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx sopt %s val %d "
709 "interim record removed\n", __func__
,
710 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
711 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
712 buf
, sizeof (buf
)), mpo
->mpo_intval
));
713 mptcp_sopt_remove(mpte
, mpo
);
714 mptcp_sopt_free(mpo
);
720 * We need to receive everything that the subflow socket has,
721 * so use a customized socket receive function. We will undo
722 * this when the socket is peeled off or closed.
724 mpts
->mpts_oprotosw
= (*so
)->so_proto
;
727 (*so
)->so_proto
= &mptcp_subflow_protosw
;
731 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
740 socket_unlock(*so
, 0);
742 DTRACE_MPTCP4(subflow__create
, struct mptses
*, mpte
,
743 struct mptsub
*, mpts
, int, dom
, int, error
);
749 * Close an MPTCP subflow socket.
751 * Note that this may be called on an embryonic subflow, and the only
752 * thing that is guaranteed valid is the protocol-user request.
755 mptcp_subflow_soclose(struct mptsub
*mpts
, struct socket
*so
)
757 MPTS_LOCK_ASSERT_HELD(mpts
);
760 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
761 VERIFY((so
->so_state
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
));
763 /* restore protocol-user requests */
764 VERIFY(mpts
->mpts_oprotosw
!= NULL
);
765 so
->so_proto
= mpts
->mpts_oprotosw
;
766 socket_unlock(so
, 0);
768 mpts
->mpts_socket
= NULL
; /* may already be NULL */
770 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
772 struct sockbuf
*, &so
->so_rcv
,
773 struct sockbuf
*, &so
->so_snd
,
774 struct mptses
*, mpts
->mpts_mpte
);
776 return (soclose(so
));
780 * Connect an MPTCP subflow socket.
782 * This may be called inline as part of adding a subflow, or asynchronously
783 * by the thread (upon progressing to MPTCPF_JOIN_READY). Note that in the
784 * pending connect case, the subflow socket may have been bound to an interface
785 * and/or a source IP address which may no longer be around by the time this
786 * routine is called; in that case the connect attempt will most likely fail.
789 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
794 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
795 MPTS_LOCK_ASSERT_HELD(mpts
);
797 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)) ==
799 VERIFY(mpts
->mpts_socket
!= NULL
);
800 so
= mpts
->mpts_socket
;
801 af
= mpts
->mpts_family
;
803 if (af
== AF_INET
|| af
== AF_INET6
) {
804 struct sockaddr_entry
*dst_se
;
805 char dbuf
[MAX_IPv6_STR_LEN
];
807 dst_se
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
);
808 VERIFY(dst_se
!= NULL
);
810 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx dst %s[%d] cid %d "
811 "[pended %s]\n", __func__
,
812 (u_int64_t
)VM_KERNEL_ADDRPERM(mpte
->mpte_mppcb
->mpp_socket
),
813 inet_ntop(af
, ((af
== AF_INET
) ?
814 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
815 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
),
816 dbuf
, sizeof (dbuf
)), ((af
== AF_INET
) ?
817 ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
818 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
820 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
824 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
827 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
829 /* connect the subflow socket */
830 error
= soconnectxlocked(so
, &mpts
->mpts_src_sl
, &mpts
->mpts_dst_sl
,
831 mpts
->mpts_mpcr
.mpcr_proc
, mpts
->mpts_mpcr
.mpcr_ifscope
,
832 mpte
->mpte_associd
, NULL
, TCP_CONNREQF_MPTCP
,
833 &mpts
->mpts_mpcr
, sizeof (mpts
->mpts_mpcr
));
834 socket_unlock(so
, 0);
836 /* Allocate a unique address id per subflow */
837 mpte
->mpte_addrid_last
++;
838 if (mpte
->mpte_addrid_last
== 0)
839 mpte
->mpte_addrid_last
++;
841 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
842 struct mptsub
*, mpts
, int, error
);
848 * MPTCP subflow socket receive routine, derived from soreceive().
851 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
852 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
855 int flags
, error
= 0;
856 struct proc
*p
= current_proc();
857 struct mbuf
*m
, **mp
= mp0
;
858 struct mbuf
*nextrecord
;
861 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
863 #ifdef MORE_LOCKING_DEBUG
864 if (so
->so_usecount
== 1) {
865 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
870 * We return all that is there in the subflow's socket receive buffer
871 * to the MPTCP layer, so we require that the caller passes in the
872 * expected parameters.
874 if (mp
== NULL
|| controlp
!= NULL
) {
875 socket_unlock(so
, 1);
882 flags
= *flagsp
&~ MSG_EOR
;
886 if (flags
& (MSG_PEEK
|MSG_OOB
|MSG_NEEDSA
|MSG_WAITALL
|MSG_WAITSTREAM
)) {
887 socket_unlock(so
, 1);
890 flags
|= (MSG_DONTWAIT
|MSG_NBIO
);
893 * If a recv attempt is made on a previously-accepted socket
894 * that has been marked as inactive (disconnected), reject
897 if (so
->so_flags
& SOF_DEFUNCT
) {
898 struct sockbuf
*sb
= &so
->so_rcv
;
901 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
902 __func__
, proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
),
903 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
905 * This socket should have been disconnected and flushed
906 * prior to being returned from sodefunct(); there should
907 * be no data on its receive list, so panic otherwise.
909 if (so
->so_state
& SS_DEFUNCT
)
910 sb_empty_assert(sb
, __func__
);
911 socket_unlock(so
, 1);
916 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
917 * and if so just return to the caller. This could happen when
918 * soreceive() is called by a socket upcall function during the
919 * time the socket is freed. The socket buffer would have been
920 * locked across the upcall, therefore we cannot put this thread
921 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
922 * we may livelock), because the lock on the socket buffer will
923 * only be released when the upcall routine returns to its caller.
924 * Because the socket has been officially closed, there can be
925 * no further read on it.
927 * A multipath subflow socket would have its SS_NOFDREF set by
928 * default, so check for SOF_MP_SUBFLOW socket flag; when the
929 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
931 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
932 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
933 socket_unlock(so
, 1);
938 * For consistency with soreceive() semantics, we need to obey
939 * SB_LOCK in case some other code path has locked the buffer.
941 error
= sblock(&so
->so_rcv
, 0);
943 socket_unlock(so
, 1);
947 m
= so
->so_rcv
.sb_mb
;
950 * Panic if we notice inconsistencies in the socket's
951 * receive list; both sb_mb and sb_cc should correctly
952 * reflect the contents of the list, otherwise we may
953 * end up with false positives during select() or poll()
954 * which could put the application in a bad state.
956 SB_MB_CHECK(&so
->so_rcv
);
958 if (so
->so_error
!= 0) {
959 error
= so
->so_error
;
964 if (so
->so_state
& SS_CANTRCVMORE
) {
968 if (!(so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
))) {
974 * MSG_DONTWAIT is implicitly defined and this routine will
975 * never block, so return EWOULDBLOCK when there is nothing.
981 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
982 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
983 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
986 nextrecord
= m
->m_nextpkt
;
987 sbfree(&so
->so_rcv
, m
);
992 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
997 m
->m_nextpkt
= nextrecord
;
998 if (nextrecord
== NULL
)
999 so
->so_rcv
.sb_lastrecord
= m
;
1001 m
= so
->so_rcv
.sb_mb
= nextrecord
;
1002 SB_EMPTY_FIXUP(&so
->so_rcv
);
1004 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1005 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1008 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
1009 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1010 /* notify protocol that we drained all the data */
1011 if ((so
->so_proto
->pr_flags
& PR_WANTRCVD
) && so
->so_pcb
!= NULL
)
1012 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
1018 sbunlock(&so
->so_rcv
, FALSE
); /* will unlock socket */
1025 * Prepare an MPTCP subflow socket for peeloff(2); basically undo
1026 * the work done earlier when the subflow socket was created.
1029 mptcp_subflow_sopeeloff(struct mptses
*mpte
, struct mptsub
*mpts
,
1033 struct socket
*mp_so
;
1036 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1037 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1038 MPTS_LOCK_ASSERT_HELD(mpts
);
1041 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1042 VERIFY((so
->so_state
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
));
1044 /* inherit MPTCP socket states */
1045 if (!(mp_so
->so_state
& SS_NBIO
))
1046 so
->so_state
&= ~SS_NBIO
;
1049 * At this point, the socket is not yet closed, as there is at least
1050 * one outstanding usecount previously held by mpts_socket from
1051 * socreate(). Atomically clear SOF_MP_SUBFLOW and SS_NOFDREF here.
1053 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
1054 so
->so_state
&= ~SS_NOFDREF
;
1055 so
->so_flags
&= ~SOF_MPTCP_TRUE
;
1057 /* allow socket buffers to be compressed */
1058 so
->so_rcv
.sb_flags
&= ~SB_NOCOMPRESS
;
1059 so
->so_snd
.sb_flags
&= ~SB_NOCOMPRESS
;
1062 * Allow socket buffer auto sizing.
1064 * This will increase the current 64k buffer size to whatever is best.
1066 if (!(so
->so_rcv
.sb_flags
& SB_USRSIZE
))
1067 so
->so_rcv
.sb_flags
|= SB_AUTOSIZE
;
1068 if (!(so
->so_snd
.sb_flags
& SB_USRSIZE
))
1069 so
->so_snd
.sb_flags
|= SB_AUTOSIZE
;
1071 /* restore protocol-user requests */
1072 VERIFY(mpts
->mpts_oprotosw
!= NULL
);
1073 so
->so_proto
= mpts
->mpts_oprotosw
;
1075 bzero(&smpo
, sizeof (smpo
));
1076 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1077 smpo
.mpo_level
= SOL_SOCKET
;
1079 /* inherit SOF_NOSIGPIPE from parent MP socket */
1080 p
= (mp_so
->so_flags
& SOF_NOSIGPIPE
);
1081 c
= (so
->so_flags
& SOF_NOSIGPIPE
);
1082 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1083 smpo
.mpo_name
= SO_NOSIGPIPE
;
1085 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1087 /* inherit SOF_NOADDRAVAIL from parent MP socket */
1088 p
= (mp_so
->so_flags
& SOF_NOADDRAVAIL
);
1089 c
= (so
->so_flags
& SOF_NOADDRAVAIL
);
1090 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1091 smpo
.mpo_name
= SO_NOADDRERR
;
1093 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1095 /* inherit SO_KEEPALIVE from parent MP socket */
1096 p
= (mp_so
->so_options
& SO_KEEPALIVE
);
1097 c
= (so
->so_options
& SO_KEEPALIVE
);
1098 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1099 smpo
.mpo_name
= SO_KEEPALIVE
;
1101 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1103 /* unset TCP level default keepalive option */
1104 p
= (intotcpcb(sotoinpcb(mp_so
)))->t_keepidle
;
1105 c
= (intotcpcb(sotoinpcb(so
)))->t_keepidle
;
1106 smpo
.mpo_level
= IPPROTO_TCP
;
1107 smpo
.mpo_intval
= 0;
1108 smpo
.mpo_name
= TCP_KEEPALIVE
;
1110 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1111 socket_unlock(so
, 0);
1113 DTRACE_MPTCP5(subflow__peeloff
, struct mptses
*, mpte
,
1114 struct mptsub
*, mpts
, struct socket
*, so
,
1115 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1119 * Establish an initial MPTCP connection (if first subflow and not yet
1120 * connected), or add a subflow to an existing MPTCP connection.
1123 mptcp_subflow_add(struct mptses
*mpte
, struct mptsub
*mpts
,
1124 struct proc
*p
, uint32_t ifscope
)
1126 struct sockaddr_entry
*se
, *src_se
= NULL
, *dst_se
= NULL
;
1127 struct socket
*mp_so
, *so
= NULL
;
1128 struct mptsub_connreq mpcr
;
1129 struct mptcb
*mp_tp
;
1132 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1133 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1134 mp_tp
= mpte
->mpte_mptcb
;
1137 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
1138 /* If the remote end sends Data FIN, refuse subflow adds */
1146 VERIFY(!(mpts
->mpts_flags
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)));
1147 VERIFY(mpts
->mpts_mpte
== NULL
);
1148 VERIFY(mpts
->mpts_socket
== NULL
);
1149 VERIFY(mpts
->mpts_dst_sl
!= NULL
);
1150 VERIFY(mpts
->mpts_connid
== CONNID_ANY
);
1152 /* select source (if specified) and destination addresses */
1153 if ((error
= in_selectaddrs(AF_UNSPEC
, &mpts
->mpts_src_sl
, &src_se
,
1154 &mpts
->mpts_dst_sl
, &dst_se
)) != 0)
1157 VERIFY(mpts
->mpts_dst_sl
!= NULL
&& dst_se
!= NULL
);
1158 VERIFY(src_se
== NULL
|| mpts
->mpts_src_sl
!= NULL
);
1159 af
= mpts
->mpts_family
= dst_se
->se_addr
->sa_family
;
1160 VERIFY(src_se
== NULL
|| src_se
->se_addr
->sa_family
== af
);
1161 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1164 * If the source address is not specified, allocate a storage for
1165 * it, so that later on we can fill it in with the actual source
1166 * IP address chosen by the underlying layer for the subflow after
1169 if (mpts
->mpts_src_sl
== NULL
) {
1171 sockaddrlist_dup(mpts
->mpts_dst_sl
, M_WAITOK
);
1172 if (mpts
->mpts_src_sl
== NULL
) {
1176 se
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
);
1177 VERIFY(se
!= NULL
&& se
->se_addr
!= NULL
&&
1178 se
->se_addr
->sa_len
== dst_se
->se_addr
->sa_len
);
1179 bzero(se
->se_addr
, se
->se_addr
->sa_len
);
1180 se
->se_addr
->sa_len
= dst_se
->se_addr
->sa_len
;
1181 se
->se_addr
->sa_family
= dst_se
->se_addr
->sa_family
;
1184 /* create the subflow socket */
1185 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, p
, &so
)) != 0)
1188 /* If fastjoin is requested, set state in mpts */
1189 if ((so
->so_flags
& SOF_MPTCP_FASTJOIN
) &&
1190 (mp_tp
->mpt_state
== MPTCPS_ESTABLISHED
) &&
1191 (mpte
->mpte_nummpcapflows
== 0)) {
1192 mpts
->mpts_flags
|= MPTSF_FASTJ_REQD
;
1193 mpts
->mpts_rel_seq
= 1;
1195 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
1200 * Increment the counter, while avoiding 0 (CONNID_ANY) and
1203 mpte
->mpte_connid_last
++;
1204 if (mpte
->mpte_connid_last
== CONNID_ALL
||
1205 mpte
->mpte_connid_last
== CONNID_ANY
)
1206 mpte
->mpte_connid_last
++;
1208 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
1209 VERIFY(mpts
->mpts_connid
!= CONNID_ANY
&&
1210 mpts
->mpts_connid
!= CONNID_ALL
);
1212 /* Allocate a unique address id per subflow */
1213 mpte
->mpte_addrid_last
++;
1214 if (mpte
->mpte_addrid_last
== 0)
1215 mpte
->mpte_addrid_last
++;
1217 /* bind subflow socket to the specified interface */
1218 if (ifscope
!= IFSCOPE_NONE
) {
1220 error
= inp_bindif(sotoinpcb(so
), ifscope
, &mpts
->mpts_outif
);
1222 socket_unlock(so
, 0);
1223 (void) mptcp_subflow_soclose(mpts
, so
);
1226 VERIFY(mpts
->mpts_outif
!= NULL
);
1227 mpts
->mpts_flags
|= MPTSF_BOUND_IF
;
1229 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx bindif %s[%d] "
1230 "cid %d\n", __func__
,
1231 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1232 mpts
->mpts_outif
->if_xname
,
1233 ifscope
, mpts
->mpts_connid
));
1234 socket_unlock(so
, 0);
1237 /* if source address and/or port is specified, bind to it */
1238 if (src_se
!= NULL
) {
1239 struct sockaddr
*sa
= src_se
->se_addr
;
1240 uint32_t mpts_flags
= 0;
1245 if (SIN(sa
)->sin_addr
.s_addr
!= INADDR_ANY
)
1246 mpts_flags
|= MPTSF_BOUND_IP
;
1247 if ((lport
= SIN(sa
)->sin_port
) != 0)
1248 mpts_flags
|= MPTSF_BOUND_PORT
;
1252 VERIFY(af
== AF_INET6
);
1253 if (!IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa
)->sin6_addr
))
1254 mpts_flags
|= MPTSF_BOUND_IP
;
1255 if ((lport
= SIN6(sa
)->sin6_port
) != 0)
1256 mpts_flags
|= MPTSF_BOUND_PORT
;
1261 error
= sobindlock(so
, sa
, 1); /* will lock/unlock socket */
1263 (void) mptcp_subflow_soclose(mpts
, so
);
1266 mpts
->mpts_flags
|= mpts_flags
;
1268 if (af
== AF_INET
|| af
== AF_INET6
) {
1269 char sbuf
[MAX_IPv6_STR_LEN
];
1271 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx bindip %s[%d] "
1272 "cid %d\n", __func__
,
1273 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1274 inet_ntop(af
, ((af
== AF_INET
) ?
1275 (void *)&SIN(sa
)->sin_addr
.s_addr
:
1276 (void *)&SIN6(sa
)->sin6_addr
), sbuf
, sizeof (sbuf
)),
1277 ntohs(lport
), mpts
->mpts_connid
));
1282 * Insert the subflow into the list, and associate the MPTCP PCB
1283 * as well as the the subflow socket. From this point on, removing
1284 * the subflow needs to be done via mptcp_subflow_del().
1286 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1287 mpte
->mpte_numflows
++;
1289 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1290 mpts
->mpts_mpte
= mpte
;
1291 mpts
->mpts_socket
= so
;
1292 MPTS_ADDREF_LOCKED(mpts
); /* for being in MPTCP subflow list */
1293 MPTS_ADDREF_LOCKED(mpts
); /* for subflow socket */
1294 mp_so
->so_usecount
++; /* for subflow socket */
1296 /* register for subflow socket read/write events */
1297 (void) sock_setupcalls(so
, mptcp_subflow_rupcall
, mpts
,
1298 mptcp_subflow_wupcall
, mpts
);
1301 * Register for subflow socket control events; ignore
1302 * SO_FILT_HINT_CONNINFO_UPDATED from below since we
1303 * will generate it here.
1305 (void) sock_catchevents(so
, mptcp_subflow_eupcall
, mpts
,
1306 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
1307 SO_FILT_HINT_CANTSENDMORE
| SO_FILT_HINT_TIMEOUT
|
1308 SO_FILT_HINT_NOSRCADDR
| SO_FILT_HINT_IFDENIED
|
1309 SO_FILT_HINT_SUSPEND
| SO_FILT_HINT_RESUME
|
1310 SO_FILT_HINT_CONNECTED
| SO_FILT_HINT_DISCONNECTED
|
1311 SO_FILT_HINT_MPFAILOVER
| SO_FILT_HINT_MPSTATUS
|
1312 SO_FILT_HINT_MUSTRST
| SO_FILT_HINT_MPFASTJ
|
1313 SO_FILT_HINT_DELETEOK
| SO_FILT_HINT_MPCANTRCVMORE
);
1316 VERIFY(!(mpts
->mpts_flags
&
1317 (MPTSF_CONNECTING
|MPTSF_CONNECTED
|MPTSF_CONNECT_PENDING
)));
1319 bzero(&mpcr
, sizeof (mpcr
));
1321 mpcr
.mpcr_ifscope
= ifscope
;
1323 * Indicate to the TCP subflow whether or not it should establish
1324 * the initial MPTCP connection, or join an existing one. Fill
1325 * in the connection request structure with additional info needed
1326 * by the underlying TCP (to be used in the TCP options, etc.)
1329 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
1330 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
1331 mp_tp
->mpt_localkey
= mptcp_reserve_key();
1332 mptcp_conn_properties(mp_tp
);
1335 soisconnecting(mp_so
);
1336 mpcr
.mpcr_type
= MPTSUB_CONNREQ_MP_ENABLE
;
1338 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
))
1339 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
1341 /* avoid starting up cellular subflow unless required */
1342 if ((mptcp_delayed_subf_start
) &&
1343 (IFNET_IS_CELLULAR(mpts
->mpts_outif
))) {
1344 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
1347 mpcr
.mpcr_type
= MPTSUB_CONNREQ_MP_ADD
;
1350 mpts
->mpts_mpcr
= mpcr
;
1351 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
1353 if (af
== AF_INET
|| af
== AF_INET6
) {
1354 char dbuf
[MAX_IPv6_STR_LEN
];
1356 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx dst %s[%d] cid %d "
1357 "[pending %s]\n", __func__
,
1358 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1359 inet_ntop(af
, ((af
== AF_INET
) ?
1360 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
1361 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
),
1362 dbuf
, sizeof (dbuf
)), ((af
== AF_INET
) ?
1363 ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
1364 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
1366 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
1370 /* connect right away if first attempt, or if join can be done now */
1371 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
))
1372 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
1377 soevent(mp_so
, SO_FILT_HINT_LOCKED
|
1378 SO_FILT_HINT_CONNINFO_UPDATED
);
1384 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
1385 * will no longer be accessible after a subflow is deleted, thus this
1386 * should occur only after the subflow socket has been disconnected.
1387 * If peeloff(2) is called, leave the socket open.
1390 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
, boolean_t close
)
1392 struct socket
*mp_so
, *so
;
1394 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1395 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1398 so
= mpts
->mpts_socket
;
1401 if (close
&& !((mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
1402 (mpts
->mpts_flags
& MPTSF_USER_DISCONNECT
))) {
1404 mptcplog((LOG_DEBUG
, "%s: %d %x\n", __func__
,
1405 mpts
->mpts_soerror
, mpts
->mpts_flags
));
1409 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d "
1410 "[close %s] %d %x\n", __func__
,
1411 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1413 mp_so
->so_retaincnt
, mpts
->mpts_connid
,
1414 (close
? "YES" : "NO"), mpts
->mpts_soerror
,
1417 VERIFY(mpts
->mpts_mpte
== mpte
);
1418 VERIFY(mpts
->mpts_connid
!= CONNID_ANY
&&
1419 mpts
->mpts_connid
!= CONNID_ALL
);
1421 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
1422 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1423 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1424 VERIFY(mpte
->mpte_numflows
!= 0);
1425 mpte
->mpte_numflows
--;
1426 if (mpte
->mpte_active_sub
== mpts
)
1427 mpte
->mpte_active_sub
= NULL
;
1430 * Drop references held by this subflow socket; there
1431 * will be no further upcalls made from this point.
1433 (void) sock_setupcalls(so
, NULL
, NULL
, NULL
, NULL
);
1434 (void) sock_catchevents(so
, NULL
, NULL
, 0);
1436 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
1439 (void) mptcp_subflow_soclose(mpts
, so
);
1441 VERIFY(mp_so
->so_usecount
!= 0);
1442 mp_so
->so_usecount
--; /* for subflow socket */
1443 mpts
->mpts_mpte
= NULL
;
1444 mpts
->mpts_socket
= NULL
;
1447 MPTS_REMREF(mpts
); /* for MPTCP subflow list */
1448 MPTS_REMREF(mpts
); /* for subflow socket */
1450 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
);
1454 * Disconnect a subflow socket.
1457 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
,
1461 struct mptcb
*mp_tp
;
1464 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1465 MPTS_LOCK_ASSERT_HELD(mpts
);
1467 VERIFY(mpts
->mpts_mpte
== mpte
);
1468 VERIFY(mpts
->mpts_socket
!= NULL
);
1469 VERIFY(mpts
->mpts_connid
!= CONNID_ANY
&&
1470 mpts
->mpts_connid
!= CONNID_ALL
);
1472 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|MPTSF_DISCONNECTED
))
1475 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
1478 * If this is coming from disconnectx(2) or issued as part of
1479 * closing the MPTCP socket, the subflow shouldn't stick around.
1480 * Otherwise let it linger around in case the upper layers need
1481 * to retrieve its conninfo.
1484 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
1486 so
= mpts
->mpts_socket
;
1487 mp_tp
= mpte
->mpte_mptcb
;
1489 if (mp_tp
->mpt_state
> MPTCPS_ESTABLISHED
)
1494 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
1495 (so
->so_state
& SS_ISCONNECTED
)) {
1496 mptcplog((LOG_DEBUG
, "%s: cid %d fin %d [linger %s]\n",
1497 __func__
, mpts
->mpts_connid
, send_dfin
,
1498 (deleteok
? "NO" : "YES")));
1501 mptcp_send_dfin(so
);
1502 (void) soshutdownlock(so
, SHUT_RD
);
1503 (void) soshutdownlock(so
, SHUT_WR
);
1504 (void) sodisconnectlocked(so
);
1506 socket_unlock(so
, 0);
1508 * Generate a disconnect event for this subflow socket, in case
1509 * the lower layer doesn't do it; this is needed because the
1510 * subflow socket deletion relies on it. This will also end up
1511 * generating SO_FILT_HINT_CONNINFO_UPDATED on the MPTCP socket;
1512 * we cannot do that here because subflow lock is currently held.
1514 mptcp_subflow_eupcall(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
1518 * Subflow socket read upcall.
1520 * Called when the associated subflow socket posted a read event. The subflow
1521 * socket lock has been released prior to invoking the callback. Note that the
1522 * upcall may occur synchronously as a result of MPTCP performing an action on
1523 * it, or asynchronously as a result of an event happening at the subflow layer.
1524 * Therefore, to maintain lock ordering, the only lock that can be acquired
1525 * here is the thread lock, for signalling purposes.
1528 mptcp_subflow_rupcall(struct socket
*so
, void *arg
, int waitf
)
1530 #pragma unused(so, waitf)
1531 struct mptsub
*mpts
= arg
;
1532 struct mptses
*mpte
= mpts
->mpts_mpte
;
1535 * mpte should never be NULL, except in a race with
1541 lck_mtx_lock(&mpte
->mpte_thread_lock
);
1542 mptcp_thread_signal_locked(mpte
);
1543 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
1547 * Subflow socket input.
1549 * Called in the context of the MPTCP thread, for reading data from the
1550 * underlying subflow socket and delivering it to MPTCP.
1553 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
1555 struct mbuf
*m
= NULL
;
1558 struct mptsub
*mpts_alt
= NULL
;
1560 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1561 MPTS_LOCK_ASSERT_HELD(mpts
);
1563 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
1564 struct mptsub
*, mpts
);
1566 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
))
1569 so
= mpts
->mpts_socket
;
1571 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
1572 if (error
!= 0 && error
!= EWOULDBLOCK
) {
1573 mptcplog((LOG_ERR
, "%s: cid %d error %d\n",
1574 __func__
, mpts
->mpts_connid
, error
));
1576 mpts_alt
= mptcp_get_subflow(mpte
, mpts
);
1577 if (mpts_alt
== NULL
) {
1578 if (mptcp_delayed_subf_start
) {
1579 mpts_alt
= mptcp_get_pending_subflow(mpte
,
1582 mptcplog((LOG_INFO
,"%s: pending %d\n",
1583 __func__
, mpts_alt
->mpts_connid
));
1585 mptcplog((LOG_ERR
, "%s: no pending",
1587 mpts
->mpts_connid
));
1588 mpte
->mpte_mppcb
->mpp_socket
->so_error
=
1592 mptcplog((LOG_ERR
, "%s: no alt path cid %d\n",
1593 __func__
, mpts
->mpts_connid
));
1594 mpte
->mpte_mppcb
->mpp_socket
->so_error
= error
;
1598 } else if (error
== 0) {
1599 mptcplog3((LOG_DEBUG
, "%s: cid %d \n",
1600 __func__
, mpts
->mpts_connid
));
1603 /* In fallback, make sure to accept data on all but one subflow */
1604 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1605 (!(mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
1612 * Release subflow lock since this may trigger MPTCP to send,
1613 * possibly on a different subflow. An extra reference has
1614 * been held on the subflow by the MPTCP thread before coming
1615 * here, so we can be sure that it won't go away, in the event
1616 * the MP socket lock gets released.
1619 mptcp_input(mpte
, m
);
1625 * Subflow socket write upcall.
1627 * Called when the associated subflow socket posted a read event. The subflow
1628 * socket lock has been released prior to invoking the callback. Note that the
1629 * upcall may occur synchronously as a result of MPTCP performing an action on
1630 * it, or asynchronously as a result of an event happening at the subflow layer.
1631 * Therefore, to maintain lock ordering, the only lock that can be acquired
1632 * here is the thread lock, for signalling purposes.
1635 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
1637 #pragma unused(so, waitf)
1638 struct mptsub
*mpts
= arg
;
1639 struct mptses
*mpte
= mpts
->mpts_mpte
;
1642 * mpte should never be NULL except in a race with
1643 * mptcp_subflow_del which doesn't hold socket lock across critical
1644 * section. This upcall is made after releasing the socket lock.
1645 * Interleaving of socket operations becomes possible therefore.
1650 lck_mtx_lock(&mpte
->mpte_thread_lock
);
1651 mptcp_thread_signal_locked(mpte
);
1652 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
1656 * Subflow socket output.
1658 * Called for sending data from MPTCP to the underlying subflow socket.
1661 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
)
1663 struct socket
*mp_so
, *so
;
1664 size_t sb_cc
= 0, tot_sent
= 0;
1667 u_int64_t mpt_dsn
= 0;
1668 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1669 struct mbuf
*mpt_mbuf
= NULL
;
1671 struct mbuf
*head
, *tail
;
1673 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1674 MPTS_LOCK_ASSERT_HELD(mpts
);
1675 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1676 so
= mpts
->mpts_socket
;
1678 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
1679 struct mptsub
*, mpts
);
1681 /* subflow socket is suspended? */
1682 if (mpts
->mpts_flags
& MPTSF_SUSPENDED
) {
1683 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx cid %d is flow "
1684 "controlled\n", __func__
,
1685 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
));
1689 /* subflow socket is not MPTCP capable? */
1690 if (!(mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) &&
1691 !(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1692 !(mpts
->mpts_flags
& MPTSF_FASTJ_SEND
)) {
1693 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx cid %d not "
1694 "MPTCP capable\n", __func__
,
1695 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
));
1699 /* Remove Addr Option is not sent reliably as per I-D */
1700 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
1701 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
1702 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
1703 if (mptcp_remaddr_enable
)
1704 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
1705 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
1709 * The mbuf chains containing the metadata (as well as pointing to
1710 * the user data sitting at the MPTCP output queue) would then be
1711 * sent down to the subflow socket.
1713 * Some notes on data sequencing:
1715 * a. Each mbuf must be a M_PKTHDR.
1716 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
1717 * in the mbuf pkthdr structure.
1718 * c. Each mbuf containing the MPTCP metadata must have its
1719 * pkt_flags marked with the PKTF_MPTCP flag.
1722 /* First, drop acknowledged data */
1723 sb_mb
= mp_so
->so_snd
.sb_mb
;
1724 if (sb_mb
== NULL
) {
1728 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
1731 while (mpt_mbuf
&& mpt_mbuf
->m_pkthdr
.mp_rlen
== 0) {
1732 mpt_mbuf
= mpt_mbuf
->m_next
;
1734 if (mpt_mbuf
&& (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
1735 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1741 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
1743 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
1744 sbdrop(&mp_so
->so_snd
, (int)len
);
1749 * In degraded mode, we don't receive data acks, so force free
1750 * mbufs less than snd_nxt
1752 if (mp_so
->so_snd
.sb_mb
== NULL
) {
1757 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
1758 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1759 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
1760 MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_sndnxt
)) {
1762 len
= mp_tp
->mpt_sndnxt
- mpt_dsn
;
1763 sbdrop(&mp_so
->so_snd
, (int)len
);
1764 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndnxt
;
1767 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1768 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
1769 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
1770 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
1771 if (mp_tp
->mpt_flags
& MPTCPF_RECVD_MPFAIL
)
1772 mpts
->mpts_sndnxt
= mp_tp
->mpt_dsn_at_csum_fail
;
1776 * Adjust the subflow's notion of next byte to send based on
1777 * the last unacknowledged byte
1779 if (MPTCP_SEQ_LT(mpts
->mpts_sndnxt
, mp_tp
->mpt_snduna
)) {
1780 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
1782 * With FastJoin, a write before the fastjoin event will use
1783 * an uninitialized relative sequence number.
1785 if (mpts
->mpts_rel_seq
== 0)
1786 mpts
->mpts_rel_seq
= 1;
1790 * Adjust the top level notion of next byte used for retransmissions
1793 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
1794 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
1798 /* Now determine the offset from which to start transmitting data */
1799 sb_mb
= mp_so
->so_snd
.sb_mb
;
1800 sb_cc
= mp_so
->so_snd
.sb_cc
;
1801 if (sb_mb
== NULL
) {
1805 if (MPTCP_SEQ_LT(mpts
->mpts_sndnxt
, mp_tp
->mpt_sndmax
)) {
1806 off
= mpts
->mpts_sndnxt
- mp_tp
->mpt_snduna
;
1807 sb_cc
-= (size_t)off
;
1815 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1817 while (mpt_mbuf
&& ((mpt_mbuf
->m_pkthdr
.mp_rlen
== 0) ||
1818 (mpt_mbuf
->m_pkthdr
.mp_rlen
<= (u_int32_t
)off
))) {
1819 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
1820 mpt_mbuf
= mpt_mbuf
->m_next
;
1821 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1823 if ((mpts
->mpts_connid
== 2) || (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
))
1824 mptcplog2((LOG_INFO
, "%s: snduna = %llu off = %lld id = %d"
1827 mp_tp
->mpt_snduna
, off
, mpts
->mpts_connid
,
1828 mpts
->mpts_sndnxt
));
1830 VERIFY(mpt_mbuf
&& (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
1834 while (tot_sent
< sb_cc
) {
1838 mlen
= mpt_mbuf
->m_pkthdr
.mp_rlen
;
1844 panic("%s: unexpected %lu %lu \n", __func__
,
1848 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
1849 M_COPYM_MUST_COPY_HDR
);
1855 /* Create a DSN mapping for the data (m_copym does it) */
1856 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1857 VERIFY(m
->m_flags
& M_PKTHDR
);
1858 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
1859 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
1860 m
->m_pkthdr
.mp_dsn
= mpt_dsn
+ off
;
1861 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
1862 m
->m_pkthdr
.mp_rlen
= mlen
;
1863 mpts
->mpts_rel_seq
+= mlen
;
1864 m
->m_pkthdr
.len
= mlen
;
1873 /* last contiguous mapping is stored for error cases */
1874 if (mpts
->mpts_lastmap
.mptsl_dsn
+
1875 mpts
->mpts_lastmap
.mptsl_len
== mpt_dsn
) {
1876 mpts
->mpts_lastmap
.mptsl_len
+= tot_sent
;
1877 } else if (MPTCP_SEQ_LT((mpts
->mpts_lastmap
.mptsl_dsn
+
1878 mpts
->mpts_lastmap
.mptsl_len
), mpt_dsn
)) {
1879 if (m
->m_pkthdr
.mp_dsn
== 0)
1880 panic("%s %llu", __func__
, mpt_dsn
);
1881 mpts
->mpts_lastmap
.mptsl_dsn
= m
->m_pkthdr
.mp_dsn
;
1882 mpts
->mpts_lastmap
.mptsl_sseq
= m
->m_pkthdr
.mp_rseq
;
1883 mpts
->mpts_lastmap
.mptsl_len
= m
->m_pkthdr
.mp_rlen
;
1888 mpt_mbuf
= mpt_mbuf
->m_next
;
1893 if (mpts
->mpts_flags
& MPTSF_FASTJ_SEND
) {
1894 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
1895 tp
->t_mpflags
|= TMPF_FASTJOIN_SEND
;
1898 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
1900 DTRACE_MPTCP7(send
, struct mbuf
*, head
, struct socket
*, so
,
1901 struct sockbuf
*, &so
->so_rcv
,
1902 struct sockbuf
*, &so
->so_snd
,
1903 struct mptses
*, mpte
, struct mptsub
*, mpts
,
1908 mpts
->mpts_sndnxt
+= tot_sent
;
1910 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mpts
->mpts_sndnxt
)) {
1911 if (MPTCP_DATASEQ_HIGH32(mpts
->mpts_sndnxt
) >
1912 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
))
1913 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
1914 mp_tp
->mpt_sndnxt
= mpts
->mpts_sndnxt
;
1916 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
1919 /* Send once in SYN_SENT state to avoid sending SYN spam */
1920 if (mpts
->mpts_flags
& MPTSF_FASTJ_SEND
) {
1921 so
->so_flags
&= ~SOF_MPTCP_FASTJOIN
;
1922 mpts
->mpts_flags
&= ~MPTSF_FASTJ_SEND
;
1925 if ((mpts
->mpts_connid
>= 2) ||
1926 (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
))
1927 mptcplog2((LOG_DEBUG
, "%s: cid %d wrote %d %d\n",
1928 __func__
, mpts
->mpts_connid
, (int)tot_sent
,
1931 mptcplog((LOG_ERR
, "MPTCP ERROR %s: cid %d error %d len %zd\n",
1932 __func__
, mpts
->mpts_connid
, error
, tot_sent
));
1939 * Subflow socket control event upcall.
1941 * Called when the associated subflow socket posted one or more control events.
1942 * The subflow socket lock has been released prior to invoking the callback.
1943 * Note that the upcall may occur synchronously as a result of MPTCP performing
1944 * an action on it, or asynchronously as a result of an event happening at the
1945 * subflow layer. Therefore, to maintain lock ordering, the only lock that can
1946 * be acquired here is the thread lock, for signalling purposes.
1949 mptcp_subflow_eupcall(struct socket
*so
, void *arg
, uint32_t events
)
1952 struct mptsub
*mpts
= arg
;
1953 struct mptses
*mpte
= mpts
->mpts_mpte
;
1955 VERIFY(mpte
!= NULL
);
1957 lck_mtx_lock(&mpte
->mpte_thread_lock
);
1958 atomic_bitset_32(&mpts
->mpts_evctl
, events
);
1959 mptcp_thread_signal_locked(mpte
);
1960 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
1964 * Subflow socket control events.
1966 * Called for handling events related to the underlying subflow socket.
1969 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
)
1971 uint32_t events
, save_events
;
1972 ev_ret_t ret
= MPTS_EVRET_OK
;
1974 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1975 MPTS_LOCK_ASSERT_HELD(mpts
);
1977 /* bail if there's nothing to process */
1978 if ((events
= mpts
->mpts_evctl
) == 0)
1981 if (events
& (SO_FILT_HINT_CONNRESET
|SO_FILT_HINT_MUSTRST
|
1982 SO_FILT_HINT_CANTRCVMORE
|SO_FILT_HINT_CANTSENDMORE
|
1983 SO_FILT_HINT_TIMEOUT
|SO_FILT_HINT_NOSRCADDR
|
1984 SO_FILT_HINT_IFDENIED
|SO_FILT_HINT_SUSPEND
|
1985 SO_FILT_HINT_DISCONNECTED
)) {
1986 events
|= SO_FILT_HINT_MPFAILOVER
;
1989 save_events
= events
;
1991 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
1992 struct mptsub
*, mpts
, uint32_t, events
);
1994 mptcplog2((LOG_DEBUG
, "%s: cid %d events=%b\n", __func__
,
1995 mpts
->mpts_connid
, events
, SO_FILT_HINT_BITS
));
1997 if ((events
& SO_FILT_HINT_MPCANTRCVMORE
) && (ret
>= MPTS_EVRET_OK
)) {
1998 ev_ret_t error
= mptcp_subflow_mpcantrcvmore_ev(mpte
, mpts
);
1999 events
&= ~SO_FILT_HINT_MPCANTRCVMORE
;
2000 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2002 if ((events
& SO_FILT_HINT_MPFAILOVER
) && (ret
>= MPTS_EVRET_OK
)) {
2003 ev_ret_t error
= mptcp_subflow_failover_ev(mpte
, mpts
);
2004 events
&= ~SO_FILT_HINT_MPFAILOVER
;
2005 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2007 if ((events
& SO_FILT_HINT_CONNRESET
) && (ret
>= MPTS_EVRET_OK
)) {
2008 ev_ret_t error
= mptcp_subflow_connreset_ev(mpte
, mpts
);
2009 events
&= ~SO_FILT_HINT_CONNRESET
;
2010 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2012 if ((events
& SO_FILT_HINT_MUSTRST
) && (ret
>= MPTS_EVRET_OK
)) {
2013 ev_ret_t error
= mptcp_subflow_mustrst_ev(mpte
, mpts
);
2014 events
&= ~SO_FILT_HINT_MUSTRST
;
2015 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2017 if ((events
& SO_FILT_HINT_CANTRCVMORE
) && (ret
>= MPTS_EVRET_OK
)) {
2018 ev_ret_t error
= mptcp_subflow_cantrcvmore_ev(mpte
, mpts
);
2019 events
&= ~SO_FILT_HINT_CANTRCVMORE
;
2020 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2022 if ((events
& SO_FILT_HINT_CANTSENDMORE
) && (ret
>= MPTS_EVRET_OK
)) {
2023 ev_ret_t error
= mptcp_subflow_cantsendmore_ev(mpte
, mpts
);
2024 events
&= ~SO_FILT_HINT_CANTSENDMORE
;
2025 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2027 if ((events
& SO_FILT_HINT_TIMEOUT
) && (ret
>= MPTS_EVRET_OK
)) {
2028 ev_ret_t error
= mptcp_subflow_timeout_ev(mpte
, mpts
);
2029 events
&= ~SO_FILT_HINT_TIMEOUT
;
2030 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2032 if ((events
& SO_FILT_HINT_NOSRCADDR
) && (ret
>= MPTS_EVRET_OK
)) {
2033 ev_ret_t error
= mptcp_subflow_nosrcaddr_ev(mpte
, mpts
);
2034 events
&= ~SO_FILT_HINT_NOSRCADDR
;
2035 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2037 if ((events
& SO_FILT_HINT_IFDENIED
) && (ret
>= MPTS_EVRET_OK
)) {
2038 ev_ret_t error
= mptcp_subflow_ifdenied_ev(mpte
, mpts
);
2039 events
&= ~SO_FILT_HINT_IFDENIED
;
2040 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2042 if ((events
& SO_FILT_HINT_SUSPEND
) && (ret
>= MPTS_EVRET_OK
)) {
2043 ev_ret_t error
= mptcp_subflow_suspend_ev(mpte
, mpts
);
2044 events
&= ~SO_FILT_HINT_SUSPEND
;
2045 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2047 if ((events
& SO_FILT_HINT_RESUME
) && (ret
>= MPTS_EVRET_OK
)) {
2048 ev_ret_t error
= mptcp_subflow_resume_ev(mpte
, mpts
);
2049 events
&= ~SO_FILT_HINT_RESUME
;
2050 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2052 if ((events
& SO_FILT_HINT_CONNECTED
) && (ret
>= MPTS_EVRET_OK
)) {
2053 ev_ret_t error
= mptcp_subflow_connected_ev(mpte
, mpts
);
2054 events
&= ~SO_FILT_HINT_CONNECTED
;
2055 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2057 if ((events
& SO_FILT_HINT_MPSTATUS
) && (ret
>= MPTS_EVRET_OK
)) {
2058 ev_ret_t error
= mptcp_subflow_mpstatus_ev(mpte
, mpts
);
2059 events
&= ~SO_FILT_HINT_MPSTATUS
;
2060 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2062 if ((events
& SO_FILT_HINT_DELETEOK
) && (ret
>= MPTS_EVRET_OK
)) {
2063 ev_ret_t error
= mptcp_deleteok_ev(mpte
, mpts
);
2064 events
&= ~SO_FILT_HINT_DELETEOK
;
2065 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2067 if ((events
& SO_FILT_HINT_DISCONNECTED
) && (ret
>= MPTS_EVRET_OK
)) {
2068 ev_ret_t error
= mptcp_subflow_disconnected_ev(mpte
, mpts
);
2069 events
&= ~SO_FILT_HINT_DISCONNECTED
;
2070 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2072 if ((events
& SO_FILT_HINT_MPFASTJ
) && (ret
>= MPTS_EVRET_OK
)) {
2073 ev_ret_t error
= mptcp_fastjoin_ev(mpte
, mpts
);
2074 events
&= ~SO_FILT_HINT_MPFASTJ
;
2075 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2079 * We should be getting only events specified via sock_catchevents(),
2080 * so loudly complain if we have any unprocessed one(s).
2082 if (events
!= 0 || ret
< MPTS_EVRET_OK
) {
2083 mptcplog((LOG_ERR
, "%s%s: cid %d evret %s (%d)"
2084 " unhandled events=%b\n",
2085 (events
!= 0) ? "MPTCP_ERROR " : "",
2086 __func__
, mpts
->mpts_connid
,
2087 mptcp_evret2str(ret
), ret
, events
, SO_FILT_HINT_BITS
));
2090 /* clear the ones we've processed */
2091 atomic_bitclear_32(&mpts
->mpts_evctl
, save_events
);
2097 * Handle SO_FILT_HINT_CONNRESET subflow socket event.
2100 mptcp_subflow_connreset_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2102 struct socket
*mp_so
, *so
;
2103 struct mptcb
*mp_tp
;
2106 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2107 MPTS_LOCK_ASSERT_HELD(mpts
);
2108 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2109 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2110 mp_tp
= mpte
->mpte_mptcb
;
2111 so
= mpts
->mpts_socket
;
2113 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2114 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2116 mptcplog((LOG_DEBUG
, "%s: cid %d [linger %s]\n", __func__
,
2117 mpts
->mpts_connid
, (linger
? "YES" : "NO")));
2120 * We got a TCP RST for this subflow connection.
2122 * Right now, we simply propagate ECONNREFUSED to the MPTCP socket
2123 * client if the MPTCP connection has not been established or
2124 * if the connection has only one subflow and is a connection being
2125 * resumed. Otherwise we close the socket.
2127 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2130 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2131 mpts
->mpts_soerror
= mp_so
->so_error
= ECONNREFUSED
;
2132 } else if (mpte
->mpte_nummpcapflows
< 1) {
2133 mpts
->mpts_soerror
= mp_so
->so_error
= ECONNRESET
;
2136 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNRESET
);
2143 * Keep the subflow socket around, unless the MPTCP socket has
2144 * been detached or the subflow has been disconnected explicitly,
2145 * in which case it should be deleted right away.
2147 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2151 * Handle SO_FILT_HINT_CANTRCVMORE subflow socket event.
2154 mptcp_subflow_cantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2158 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2159 MPTS_LOCK_ASSERT_HELD(mpts
);
2161 so
= mpts
->mpts_socket
;
2163 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
));
2166 * We got a FIN for this subflow connection. This subflow socket
2167 * is no longer available for receiving data;
2168 * The FIN may arrive with data. The data is handed up to the
2169 * mptcp socket and the subflow is disconnected.
2172 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2176 * Handle SO_FILT_HINT_CANTSENDMORE subflow socket event.
2179 mptcp_subflow_cantsendmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2183 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2184 MPTS_LOCK_ASSERT_HELD(mpts
);
2186 so
= mpts
->mpts_socket
;
2188 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
));
2189 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2193 * Handle SO_FILT_HINT_TIMEOUT subflow socket event.
2196 mptcp_subflow_timeout_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2198 struct socket
*mp_so
, *so
;
2199 struct mptcb
*mp_tp
;
2202 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2203 MPTS_LOCK_ASSERT_HELD(mpts
);
2204 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2205 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2206 mp_tp
= mpte
->mpte_mptcb
;
2207 so
= mpts
->mpts_socket
;
2209 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2210 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2212 mptcplog((LOG_NOTICE
, "%s: cid %d [linger %s]\n", __func__
,
2213 mpts
->mpts_connid
, (linger
? "YES" : "NO")));
2215 if (mpts
->mpts_soerror
== 0)
2216 mpts
->mpts_soerror
= ETIMEDOUT
;
2219 * The subflow connection has timed out.
2221 * Right now, we simply propagate ETIMEDOUT to the MPTCP socket
2222 * client if the MPTCP connection has not been established. Otherwise
2225 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2228 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2229 mp_so
->so_error
= ETIMEDOUT
;
2234 * Keep the subflow socket around, unless the MPTCP socket has
2235 * been detached or the subflow has been disconnected explicitly,
2236 * in which case it should be deleted right away.
2238 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2242 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
2245 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2247 struct socket
*mp_so
, *so
;
2248 struct mptcb
*mp_tp
;
2250 struct tcpcb
*tp
= NULL
;
2252 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2253 MPTS_LOCK_ASSERT_HELD(mpts
);
2255 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2256 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2257 mp_tp
= mpte
->mpte_mptcb
;
2258 so
= mpts
->mpts_socket
;
2260 /* Not grabbing socket lock as t_local_aid is write once only */
2261 tp
= intotcpcb(sotoinpcb(so
));
2263 * This overwrites any previous mpte_lost_aid to avoid storing
2264 * too much state when the typical case has only two subflows.
2266 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
2267 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
2269 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2270 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2272 mptcplog((LOG_DEBUG
, "%s: cid %d [linger %s]\n", __func__
,
2273 mpts
->mpts_connid
, (linger
? "YES" : "NO")));
2275 if (mpts
->mpts_soerror
== 0)
2276 mpts
->mpts_soerror
= EADDRNOTAVAIL
;
2279 * The subflow connection has lost its source address.
2281 * Right now, we simply propagate EADDRNOTAVAIL to the MPTCP socket
2282 * client if the MPTCP connection has not been established. If it
2283 * has been established with one subflow , we keep the MPTCP
2284 * connection valid without any subflows till closed by application.
2285 * This lets tcp connection manager decide whether to close this or
2286 * not as it reacts to reachability changes too.
2288 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2291 if ((mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) &&
2292 (mp_so
->so_flags
& SOF_NOADDRAVAIL
)) {
2293 mp_so
->so_error
= EADDRNOTAVAIL
;
2298 * Keep the subflow socket around, unless the MPTCP socket has
2299 * been detached or the subflow has been disconnected explicitly,
2300 * in which case it should be deleted right away.
2302 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2306 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
2307 * indicates that the remote side sent a Data FIN
2310 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2312 struct socket
*so
, *mp_so
;
2313 struct mptcb
*mp_tp
;
2315 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2316 MPTS_LOCK_ASSERT_HELD(mpts
);
2317 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2318 so
= mpts
->mpts_socket
;
2319 mp_tp
= mpte
->mpte_mptcb
;
2321 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
));
2324 * We got a Data FIN for the MPTCP connection.
2325 * The FIN may arrive with data. The data is handed up to the
2326 * mptcp socket and the user is notified so that it may close
2327 * the socket if needed.
2330 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
) {
2333 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CANTRCVMORE
);
2338 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2342 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
2345 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2347 struct mptsub
*mpts_alt
= NULL
;
2348 struct socket
*so
= NULL
;
2349 struct socket
*mp_so
;
2350 int altpath_exists
= 0;
2352 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2353 MPTS_LOCK_ASSERT_HELD(mpts
);
2354 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2355 mptcplog2((LOG_NOTICE
, "%s: mp_so 0x%llx\n", __func__
,
2356 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)));
2359 mpts_alt
= mptcp_get_subflow(mpte
, mpts
);
2362 * If there is no alternate eligible subflow, ignore the
2365 if (mpts_alt
== NULL
) {
2366 mptcplog2((LOG_WARNING
, "%s: no alternate path\n", __func__
));
2367 if (mptcp_delayed_subf_start
) {
2368 mpts_alt
= mptcp_get_pending_subflow(mpte
, mpts
);
2369 if (mpts_alt
!= NULL
) {
2370 MPTS_LOCK(mpts_alt
);
2371 (void) mptcp_subflow_soconnectx(mpte
,
2373 MPTS_UNLOCK(mpts_alt
);
2379 MPTS_LOCK(mpts_alt
);
2381 so
= mpts_alt
->mpts_socket
;
2382 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
2384 /* All data acknowledged and no RTT spike */
2385 if ((so
->so_snd
.sb_cc
== 0) &&
2386 (mptcp_no_rto_spike(so
))) {
2387 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
2388 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
2390 /* no alternate path available */
2393 socket_unlock(so
, 1);
2395 if (altpath_exists
) {
2396 mptcplog2((LOG_INFO
, "%s: cid = %d\n",
2397 __func__
, mpts_alt
->mpts_connid
));
2398 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
2399 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2400 /* Bring the subflow's notion of snd_nxt into the send window */
2402 mpts_alt
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2404 mpte
->mpte_active_sub
= mpts_alt
;
2407 socket_unlock(so
, 1);
2409 MPTS_UNLOCK(mpts_alt
);
2411 if (altpath_exists
) {
2413 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
);
2414 mptcplog((LOG_NOTICE
, "%s: mp_so 0x%llx switched from "
2415 "%d to %d\n", __func__
,
2416 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2417 mpts
->mpts_connid
, mpts_alt
->mpts_connid
));
2418 tcpstat
.tcps_mp_switches
++;
2422 if (altpath_exists
) {
2423 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
2424 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
2426 mptcplog2((LOG_INFO
, "%s: no alt cid = %d\n",
2427 __func__
, mpts
->mpts_connid
));
2429 so
= mpts
->mpts_socket
;
2431 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
2432 socket_unlock(so
, 1);
2434 MPTS_LOCK_ASSERT_HELD(mpts
);
2435 return (MPTS_EVRET_OK
);
2439 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
2442 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2444 struct socket
*mp_so
, *so
;
2445 struct mptcb
*mp_tp
;
2448 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2449 MPTS_LOCK_ASSERT_HELD(mpts
);
2450 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2451 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2452 mp_tp
= mpte
->mpte_mptcb
;
2453 so
= mpts
->mpts_socket
;
2455 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2456 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2458 mptcplog((LOG_DEBUG
, "%s: cid %d [linger %s]\n", __func__
,
2459 mpts
->mpts_connid
, (linger
? "YES" : "NO")));
2461 if (mpts
->mpts_soerror
== 0)
2462 mpts
->mpts_soerror
= EHOSTUNREACH
;
2465 * The subflow connection cannot use the outgoing interface.
2467 * Right now, we simply propagate EHOSTUNREACH to the MPTCP socket
2468 * client if the MPTCP connection has not been established. If it
2469 * has been established, let the upper layer call disconnectx.
2471 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2474 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_IFDENIED
);
2477 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2478 mp_so
->so_error
= EHOSTUNREACH
;
2484 * Keep the subflow socket around, unless the MPTCP socket has
2485 * been detached or the subflow has been disconnected explicitly,
2486 * in which case it should be deleted right away.
2488 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2492 * Handle SO_FILT_HINT_SUSPEND subflow socket event.
2495 mptcp_subflow_suspend_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2499 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2500 MPTS_LOCK_ASSERT_HELD(mpts
);
2502 so
= mpts
->mpts_socket
;
2504 /* the subflow connection is being flow controlled */
2505 mpts
->mpts_flags
|= MPTSF_SUSPENDED
;
2507 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
,
2508 mpts
->mpts_connid
));
2510 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2514 * Handle SO_FILT_HINT_RESUME subflow socket event.
2517 mptcp_subflow_resume_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2521 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2522 MPTS_LOCK_ASSERT_HELD(mpts
);
2524 so
= mpts
->mpts_socket
;
2526 /* the subflow connection is no longer flow controlled */
2527 mpts
->mpts_flags
&= ~MPTSF_SUSPENDED
;
2529 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
));
2531 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2535 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
2538 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2540 char buf0
[MAX_IPv6_STR_LEN
], buf1
[MAX_IPv6_STR_LEN
];
2541 struct sockaddr_entry
*src_se
, *dst_se
;
2542 struct sockaddr_storage src
;
2543 struct socket
*mp_so
, *so
;
2544 struct mptcb
*mp_tp
;
2545 struct ifnet
*outifp
;
2547 boolean_t mpok
= FALSE
;
2549 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2550 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2551 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2552 mp_tp
= mpte
->mpte_mptcb
;
2554 MPTS_LOCK_ASSERT_HELD(mpts
);
2555 so
= mpts
->mpts_socket
;
2556 af
= mpts
->mpts_family
;
2558 if (mpts
->mpts_flags
& MPTSF_CONNECTED
)
2559 return (MPTS_EVRET_OK
);
2561 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
2562 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
2564 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2565 (so
->so_state
& SS_ISCONNECTED
)) {
2566 mptcplog((LOG_DEBUG
, "%s: cid %d disconnect before tcp connect\n",
2567 __func__
, mpts
->mpts_connid
));
2568 (void) soshutdownlock(so
, SHUT_RD
);
2569 (void) soshutdownlock(so
, SHUT_WR
);
2570 (void) sodisconnectlocked(so
);
2572 socket_unlock(so
, 0);
2573 return (MPTS_EVRET_OK
);
2577 * The subflow connection has been connected. Find out whether it
2578 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
2580 * a. If MPTCP connection is not yet established, then this must be
2581 * the first subflow connection. If MPTCP failed to negotiate,
2582 * indicate to the MPTCP socket client via EPROTO, that the
2583 * underlying TCP connection may be peeled off via peeloff(2).
2584 * Otherwise, mark the MPTCP socket as connected.
2586 * b. If MPTCP connection has been established, then this must be
2587 * one of the subsequent subflow connections. If MPTCP failed
2588 * to negotiate, disconnect the connection since peeloff(2)
2589 * is no longer possible.
2591 * Right now, we simply unblock any waiters at the MPTCP socket layer
2592 * if the MPTCP connection has not been established.
2596 if (so
->so_state
& SS_ISDISCONNECTED
) {
2598 * With MPTCP joins, a connection is connected at the subflow
2599 * level, but the 4th ACK from the server elevates the MPTCP
2600 * subflow to connected state. So there is a small window
2601 * where the subflow could get disconnected before the
2602 * connected event is processed.
2604 socket_unlock(so
, 0);
2605 return (MPTS_EVRET_OK
);
2608 mpts
->mpts_soerror
= 0;
2609 mpts
->mpts_flags
&= ~MPTSF_CONNECTING
;
2610 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
2611 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
)
2612 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
2614 VERIFY(mpts
->mpts_dst_sl
!= NULL
);
2615 dst_se
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
);
2616 VERIFY(dst_se
!= NULL
&& dst_se
->se_addr
!= NULL
&&
2617 dst_se
->se_addr
->sa_family
== af
);
2619 VERIFY(mpts
->mpts_src_sl
!= NULL
);
2620 src_se
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
);
2621 VERIFY(src_se
!= NULL
&& src_se
->se_addr
!= NULL
&&
2622 src_se
->se_addr
->sa_family
== af
);
2624 /* get/check source IP address */
2627 error
= in_getsockaddr_s(so
, &src
);
2629 struct sockaddr_in
*ms
= SIN(src_se
->se_addr
);
2630 struct sockaddr_in
*s
= SIN(&src
);
2632 VERIFY(s
->sin_len
== ms
->sin_len
);
2633 VERIFY(ms
->sin_family
== AF_INET
);
2635 if ((mpts
->mpts_flags
& MPTSF_BOUND_IP
) &&
2636 bcmp(&ms
->sin_addr
, &s
->sin_addr
,
2637 sizeof (ms
->sin_addr
)) != 0) {
2638 mptcplog((LOG_ERR
, "%s: cid %d local "
2639 "address %s (expected %s)\n", __func__
,
2640 mpts
->mpts_connid
, inet_ntop(AF_INET
,
2641 (void *)&s
->sin_addr
.s_addr
, buf0
,
2642 sizeof (buf0
)), inet_ntop(AF_INET
,
2643 (void *)&ms
->sin_addr
.s_addr
, buf1
,
2646 bcopy(s
, ms
, sizeof (*s
));
2652 error
= in6_getsockaddr_s(so
, &src
);
2654 struct sockaddr_in6
*ms
= SIN6(src_se
->se_addr
);
2655 struct sockaddr_in6
*s
= SIN6(&src
);
2657 VERIFY(s
->sin6_len
== ms
->sin6_len
);
2658 VERIFY(ms
->sin6_family
== AF_INET6
);
2660 if ((mpts
->mpts_flags
& MPTSF_BOUND_IP
) &&
2661 bcmp(&ms
->sin6_addr
, &s
->sin6_addr
,
2662 sizeof (ms
->sin6_addr
)) != 0) {
2663 mptcplog((LOG_ERR
, "%s: cid %d local "
2664 "address %s (expected %s)\n", __func__
,
2665 mpts
->mpts_connid
, inet_ntop(AF_INET6
,
2666 (void *)&s
->sin6_addr
, buf0
,
2667 sizeof (buf0
)), inet_ntop(AF_INET6
,
2668 (void *)&ms
->sin6_addr
, buf1
,
2671 bcopy(s
, ms
, sizeof (*s
));
2682 mptcplog((LOG_ERR
, "%s: cid %d getsockaddr failed (%d)\n",
2683 __func__
, mpts
->mpts_connid
, error
));
2686 /* get/verify the outbound interface */
2687 outifp
= sotoinpcb(so
)->inp_last_outifp
; /* could be NULL */
2688 if (mpts
->mpts_flags
& MPTSF_BOUND_IF
) {
2689 VERIFY(mpts
->mpts_outif
!= NULL
);
2690 if (mpts
->mpts_outif
!= outifp
) {
2691 mptcplog((LOG_ERR
, "%s: cid %d outif %s "
2692 "(expected %s)\n", __func__
, mpts
->mpts_connid
,
2693 ((outifp
!= NULL
) ? outifp
->if_xname
: "NULL"),
2694 mpts
->mpts_outif
->if_xname
));
2696 outifp
= mpts
->mpts_outif
;
2699 mpts
->mpts_outif
= outifp
;
2702 socket_unlock(so
, 0);
2704 mptcplog((LOG_DEBUG
, "%s: cid %d outif %s %s[%d] -> %s[%d] "
2705 "is %s\n", __func__
, mpts
->mpts_connid
, ((outifp
!= NULL
) ?
2706 outifp
->if_xname
: "NULL"), inet_ntop(af
, (af
== AF_INET
) ?
2707 (void *)&SIN(src_se
->se_addr
)->sin_addr
.s_addr
:
2708 (void *)&SIN6(src_se
->se_addr
)->sin6_addr
, buf0
, sizeof (buf0
)),
2709 ((af
== AF_INET
) ? ntohs(SIN(src_se
->se_addr
)->sin_port
) :
2710 ntohs(SIN6(src_se
->se_addr
)->sin6_port
)),
2711 inet_ntop(af
, ((af
== AF_INET
) ?
2712 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
2713 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
), buf1
, sizeof (buf1
)),
2714 ((af
== AF_INET
) ? ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
2715 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
2716 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ?
2717 "MPTCP capable" : "a regular TCP")));
2719 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
2722 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
);
2725 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2726 /* case (a) above */
2728 mp_tp
->mpt_flags
|= MPTCPF_PEEL_OFF
;
2729 (void) mptcp_drop(mpte
, mp_tp
, EPROTO
);
2732 if (mptcp_init_authparms(mp_tp
) != 0) {
2733 mp_tp
->mpt_flags
|= MPTCPF_PEEL_OFF
;
2734 (void) mptcp_drop(mpte
, mp_tp
, EPROTO
);
2738 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
2739 mpte
->mpte_associd
= mpts
->mpts_connid
;
2740 DTRACE_MPTCP2(state__change
,
2741 struct mptcb
*, mp_tp
,
2742 uint32_t, 0 /* event */);
2743 mptcp_init_statevars(mp_tp
);
2746 (void) mptcp_setconnorder(mpte
,
2747 mpts
->mpts_connid
, 1);
2748 soisconnected(mp_so
);
2753 /* Initialize the relative sequence number */
2754 mpts
->mpts_rel_seq
= 1;
2755 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
2756 mpte
->mpte_nummpcapflows
++;
2757 MPT_LOCK_SPIN(mp_tp
);
2758 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2763 if (mptcp_rwnotify
&& (mpte
->mpte_nummpcapflows
== 0)) {
2764 /* Experimental code, disabled by default. */
2770 * In case of additional flows, the MPTCP socket is not
2771 * MPTSF_MP_CAPABLE until an ACK is received from server
2772 * for 3-way handshake. TCP would have guaranteed that this
2773 * is an MPTCP subflow.
2776 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
2777 mpts
->mpts_flags
&= ~MPTSF_FASTJ_REQD
;
2778 mpte
->mpte_nummpcapflows
++;
2779 /* With Fastjoin, rel sequence will be nonzero */
2780 if (mpts
->mpts_rel_seq
== 0)
2781 mpts
->mpts_rel_seq
= 1;
2782 MPT_LOCK_SPIN(mp_tp
);
2783 /* With Fastjoin, sndnxt is updated before connected_ev */
2784 if (mpts
->mpts_sndnxt
== 0) {
2785 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2788 mptcp_output_needed(mpte
, mpts
);
2794 MPTS_LOCK_ASSERT_HELD(mpts
);
2796 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2800 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
2803 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2805 struct socket
*mp_so
, *so
;
2806 struct mptcb
*mp_tp
;
2809 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2810 MPTS_LOCK_ASSERT_HELD(mpts
);
2811 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2812 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2813 mp_tp
= mpte
->mpte_mptcb
;
2814 so
= mpts
->mpts_socket
;
2816 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2817 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2819 mptcplog2((LOG_DEBUG
, "%s: cid %d [linger %s]\n", __func__
,
2820 mpts
->mpts_connid
, (linger
? "YES" : "NO")));
2822 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
2823 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2826 * Clear flags that are used by getconninfo to return state.
2827 * Retain like MPTSF_DELETEOK for internal purposes.
2829 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
|MPTSF_CONNECT_PENDING
|
2830 MPTSF_CONNECTED
|MPTSF_DISCONNECTING
|MPTSF_PREFERRED
|
2831 MPTSF_MP_CAPABLE
|MPTSF_MP_READY
|MPTSF_MP_DEGRADED
|
2832 MPTSF_SUSPENDED
|MPTSF_ACTIVE
);
2833 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
2836 * The subflow connection has been disconnected.
2838 * Right now, we simply unblock any waiters at the MPTCP socket layer
2839 * if the MPTCP connection has not been established.
2843 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
);
2845 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
2846 mpte
->mpte_nummpcapflows
--;
2847 if (mpte
->mpte_active_sub
== mpts
) {
2848 mpte
->mpte_active_sub
= NULL
;
2849 mptcplog((LOG_DEBUG
, "%s: resetting active subflow \n",
2852 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
2856 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2858 soisdisconnected(mp_so
);
2865 * The underlying subflow socket has been disconnected;
2866 * it is no longer useful to us. Keep the subflow socket
2867 * around, unless the MPTCP socket has been detached or
2868 * the subflow has been disconnected explicitly, in which
2869 * case it should be deleted right away.
2871 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2875 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
2878 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2880 struct socket
*mp_so
, *so
;
2881 struct mptcb
*mp_tp
;
2882 ev_ret_t ret
= MPTS_EVRET_OK_UPDATE
;
2884 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2885 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2886 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2887 mp_tp
= mpte
->mpte_mptcb
;
2889 MPTS_LOCK_ASSERT_HELD(mpts
);
2890 so
= mpts
->mpts_socket
;
2895 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
)
2896 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
2898 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
2900 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
2901 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
2903 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
2906 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
2908 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
)
2909 mpts
->mpts_flags
|= MPTSF_MP_READY
;
2911 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
2913 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
2914 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
2915 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
2918 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
2919 VERIFY(!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
));
2920 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
2921 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
2922 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
2923 ret
= MPTS_EVRET_CONNECT_PENDING
;
2926 mptcplog2((LOG_DEBUG
, "%s: mp_so 0x%llx mpt_flags=%b cid %d "
2927 "mptsf=%b\n", __func__
,
2928 (u_int64_t
)VM_KERNEL_ADDRPERM(mpte
->mpte_mppcb
->mpp_socket
),
2929 mp_tp
->mpt_flags
, MPTCPF_BITS
, mpts
->mpts_connid
,
2930 mpts
->mpts_flags
, MPTSF_BITS
));
2933 socket_unlock(so
, 0);
2938 * Handle SO_FILT_HINT_MUSTRST subflow socket event
2941 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
2943 struct socket
*mp_so
, *so
;
2944 struct mptcb
*mp_tp
;
2948 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2949 MPTS_LOCK_ASSERT_HELD(mpts
);
2950 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2951 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2952 mp_tp
= mpte
->mpte_mptcb
;
2953 so
= mpts
->mpts_socket
;
2955 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2956 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2958 if (mpts
->mpts_soerror
== 0)
2959 mpts
->mpts_soerror
= ECONNABORTED
;
2961 /* We got an invalid option or a fast close */
2963 struct tcptemp
*t_template
;
2964 struct inpcb
*inp
= sotoinpcb(so
);
2965 struct tcpcb
*tp
= NULL
;
2967 tp
= intotcpcb(inp
);
2968 so
->so_error
= ECONNABORTED
;
2970 t_template
= tcp_maketemplate(tp
);
2972 struct tcp_respond_args tra
;
2974 bzero(&tra
, sizeof(tra
));
2975 if (inp
->inp_flags
& INP_BOUND_IF
)
2976 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
2978 tra
.ifscope
= IFSCOPE_NONE
;
2979 tra
.awdl_unrestricted
= 1;
2981 tcp_respond(tp
, t_template
->tt_ipgen
,
2982 &t_template
->tt_t
, (struct mbuf
*)NULL
,
2983 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
2984 (void) m_free(dtom(t_template
));
2985 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx cid %d \n",
2986 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2987 so
, mpts
->mpts_connid
));
2989 socket_unlock(so
, 0);
2990 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2993 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
|
2994 SO_FILT_HINT_CONNRESET
);
2997 if ((mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) ||
2998 (mp_tp
->mpt_state
== MPTCPS_FASTCLOSE_WAIT
)) {
2999 mp_so
->so_error
= ECONNABORTED
;
3005 * Keep the subflow socket around unless the subflow has been
3006 * disconnected explicitly.
3008 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
3012 mptcp_fastjoin_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
3014 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3015 MPTS_LOCK_ASSERT_HELD(mpts
);
3016 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3018 if (mpte
->mpte_nummpcapflows
== 0) {
3019 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3020 mptcplog((LOG_DEBUG
,"%s %llx %llx \n",
3021 __func__
, mp_tp
->mpt_snduna
, mpts
->mpts_sndnxt
));
3022 mpte
->mpte_active_sub
= mpts
;
3023 mpts
->mpts_flags
|= (MPTSF_FASTJ_SEND
| MPTSF_ACTIVE
);
3026 * If mptcp_subflow_output is called before fastjoin_ev
3027 * then mpts->mpts_sndnxt is initialized to mp_tp->mpt_snduna
3028 * and further mpts->mpts_sndnxt is incremented by len copied.
3030 if (mpts
->mpts_sndnxt
== 0) {
3031 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
3032 mpts
->mpts_rel_seq
= 1;
3037 return (MPTS_EVRET_OK
);
3041 mptcp_deleteok_ev(struct mptses
*mpte
, struct mptsub
*mpts
)
3043 MPTE_LOCK_ASSERT_HELD(mpte
);
3044 MPTS_LOCK_ASSERT_HELD(mpts
);
3045 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3046 mptcplog((LOG_DEBUG
, "%s cid %d\n", __func__
, mpts
->mpts_connid
));
3048 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
3049 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
3050 return (MPTS_EVRET_DELETE
);
3052 return (MPTS_EVRET_OK
);
3056 mptcp_evret2str(ev_ret_t ret
)
3058 const char *c
= "UNKNOWN";
3061 case MPTS_EVRET_DELETE
:
3062 c
= "MPTS_EVRET_DELETE";
3064 case MPTS_EVRET_CONNECT_PENDING
:
3065 c
= "MPTS_EVRET_CONNECT_PENDING";
3067 case MPTS_EVRET_DISCONNECT_FALLBACK
:
3068 c
= "MPTS_EVRET_DISCONNECT_FALLBACK";
3071 c
= "MPTS_EVRET_OK";
3073 case MPTS_EVRET_OK_UPDATE
:
3074 c
= "MPTS_EVRET_OK_UPDATE";
3081 * Add a reference to a subflow structure; used by MPTS_ADDREF().
3084 mptcp_subflow_addref(struct mptsub
*mpts
, int locked
)
3089 MPTS_LOCK_ASSERT_HELD(mpts
);
3091 if (++mpts
->mpts_refcnt
== 0) {
3092 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
3100 * Remove a reference held on a subflow structure; used by MPTS_REMREF();
3103 mptcp_subflow_remref(struct mptsub
*mpts
)
3106 if (mpts
->mpts_refcnt
== 0) {
3107 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
3110 if (--mpts
->mpts_refcnt
> 0) {
3114 /* callee will unlock and destroy lock */
3115 mptcp_subflow_free(mpts
);
3119 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
3120 * caller must ensure that the option can be issued on subflow sockets, via
3121 * MPOF_SUBFLOW_OK flag.
3124 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct socket
*so
,
3127 struct socket
*mp_so
;
3128 struct sockopt sopt
;
3132 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3133 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
3135 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3136 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3138 bzero(&sopt
, sizeof (sopt
));
3139 sopt
.sopt_dir
= SOPT_SET
;
3140 sopt
.sopt_level
= mpo
->mpo_level
;
3141 sopt
.sopt_name
= mpo
->mpo_name
;
3142 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3143 sopt
.sopt_valsize
= sizeof (int);
3144 sopt
.sopt_p
= kernproc
;
3146 error
= sosetoptlock(so
, &sopt
, 0); /* already locked */
3148 mptcplog2((LOG_DEBUG
, "%s: mp_so 0x%llx sopt %s "
3149 "val %d set successful\n", __func__
,
3150 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3151 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3152 buf
, sizeof (buf
)), mpo
->mpo_intval
));
3154 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx sopt %s "
3155 "val %d set error %d\n", __func__
,
3156 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3157 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3158 buf
, sizeof (buf
)), mpo
->mpo_intval
, error
));
3164 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
3165 * caller must ensure that the option can be issued on subflow sockets, via
3166 * MPOF_SUBFLOW_OK flag.
3169 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
3172 struct socket
*mp_so
;
3173 struct sockopt sopt
;
3177 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3178 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3179 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3181 bzero(&sopt
, sizeof (sopt
));
3182 sopt
.sopt_dir
= SOPT_GET
;
3183 sopt
.sopt_level
= mpo
->mpo_level
;
3184 sopt
.sopt_name
= mpo
->mpo_name
;
3185 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3186 sopt
.sopt_valsize
= sizeof (int);
3187 sopt
.sopt_p
= kernproc
;
3189 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
3191 mptcplog2((LOG_DEBUG
, "%s: mp_so 0x%llx sopt %s "
3192 "val %d get successful\n", __func__
,
3193 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3194 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3195 buf
, sizeof (buf
)), mpo
->mpo_intval
));
3197 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx sopt %s get error %d\n",
3198 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3199 mptcp_sopt2str(mpo
->mpo_level
,
3200 mpo
->mpo_name
, buf
, sizeof (buf
)), error
));
3207 * MPTCP garbage collector.
3209 * This routine is called by the MP domain on-demand, periodic callout,
3210 * which is triggered when a MPTCP socket is closed. The callout will
3211 * repeat as long as this routine returns a non-zero value.
3214 mptcp_gc(struct mppcbinfo
*mppi
)
3216 struct mppcb
*mpp
, *tmpp
;
3217 uint32_t active
= 0;
3219 lck_mtx_assert(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
3221 mptcplog3((LOG_DEBUG
, "%s: running\n", __func__
));
3223 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
3224 struct socket
*mp_so
;
3225 struct mptses
*mpte
;
3226 struct mptcb
*mp_tp
;
3228 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
3229 mp_so
= mpp
->mpp_socket
;
3230 VERIFY(mp_so
!= NULL
);
3231 mpte
= mptompte(mpp
);
3232 VERIFY(mpte
!= NULL
);
3233 mp_tp
= mpte
->mpte_mptcb
;
3234 VERIFY(mp_tp
!= NULL
);
3236 mptcplog3((LOG_DEBUG
, "%s: mp_so 0x%llx found "
3237 "(u=%d,r=%d,s=%d)\n", __func__
,
3238 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mp_so
->so_usecount
,
3239 mp_so
->so_retaincnt
, mpp
->mpp_state
));
3241 if (!lck_mtx_try_lock(&mpp
->mpp_lock
)) {
3242 mptcplog3((LOG_DEBUG
, "%s: mp_so 0x%llx skipped "
3243 "(u=%d,r=%d)\n", __func__
,
3244 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3245 mp_so
->so_usecount
, mp_so
->so_retaincnt
));
3250 /* check again under the lock */
3251 if (mp_so
->so_usecount
> 1) {
3252 boolean_t wakeup
= FALSE
;
3253 struct mptsub
*mpts
, *tmpts
;
3255 mptcplog3((LOG_DEBUG
, "%s: mp_so 0x%llx skipped "
3256 "[u=%d,r=%d] %d %d\n", __func__
,
3257 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3258 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3259 mp_tp
->mpt_gc_ticks
,
3262 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
3263 if (mp_tp
->mpt_gc_ticks
> 0)
3264 mp_tp
->mpt_gc_ticks
--;
3265 if (mp_tp
->mpt_gc_ticks
== 0) {
3267 if (mp_tp
->mpt_localkey
!= NULL
) {
3269 mp_tp
->mpt_localkey
);
3270 mp_tp
->mpt_localkey
= NULL
;
3276 TAILQ_FOREACH_SAFE(mpts
,
3277 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3279 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
3280 if (mpts
->mpts_soerror
== 0)
3281 mpts
->mpts_soerror
= ETIMEDOUT
;
3282 mptcp_subflow_eupcall(mpts
->mpts_socket
,
3283 mpts
, SO_FILT_HINT_DISCONNECTED
);
3287 lck_mtx_unlock(&mpp
->mpp_lock
);
3292 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
3293 mptcplog3((LOG_DEBUG
, "%s: mp_so 0x%llx skipped "
3294 "[u=%d,r=%d,s=%d]\n", __func__
,
3295 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3296 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3298 lck_mtx_unlock(&mpp
->mpp_lock
);
3304 * The PCB has been detached, and there is exactly 1 refnct
3305 * held by the MPTCP thread. Signal that thread to terminate,
3306 * after which the last refcnt will be released. That will
3307 * allow it to be destroyed below during the next round.
3309 if (mp_so
->so_usecount
== 1) {
3310 mptcplog2((LOG_DEBUG
, "%s: mp_so 0x%llx scheduled for "
3311 "termination [u=%d,r=%d]\n", __func__
,
3312 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3313 mp_so
->so_usecount
, mp_so
->so_retaincnt
));
3314 /* signal MPTCP thread to terminate */
3315 mptcp_thread_terminate_signal(mpte
);
3316 lck_mtx_unlock(&mpp
->mpp_lock
);
3321 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
3322 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3323 mp_so
->so_usecount
, mp_so
->so_retaincnt
));
3324 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
3325 struct sockbuf
*, &mp_so
->so_rcv
,
3326 struct sockbuf
*, &mp_so
->so_snd
,
3327 struct mppcb
*, mpp
);
3336 * Drop a MPTCP connection, reporting the specified error.
3339 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, int errno
)
3341 struct socket
*mp_so
;
3343 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3344 MPT_LOCK_ASSERT_HELD(mp_tp
);
3345 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
3346 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3348 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
3349 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
3350 uint32_t, 0 /* event */);
3352 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0)
3353 errno
= mp_tp
->mpt_softerror
;
3354 mp_so
->so_error
= errno
;
3356 return (mptcp_close(mpte
, mp_tp
));
3360 * Close a MPTCP control block.
3363 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
3365 struct socket
*mp_so
;
3366 struct mptsub
*mpts
, *tmpts
;
3368 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3369 MPT_LOCK_ASSERT_HELD(mp_tp
);
3370 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
3371 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3372 if (mp_tp
->mpt_localkey
!= NULL
) {
3373 mptcp_free_key(mp_tp
->mpt_localkey
);
3374 mp_tp
->mpt_localkey
= NULL
;
3378 soisdisconnected(mp_so
);
3381 if (mp_tp
->mpt_flags
& MPTCPF_PEEL_OFF
) {
3386 /* Clean up all subflows */
3387 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3389 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
3390 mptcp_subflow_disconnect(mpte
, mpts
, TRUE
);
3392 mptcp_subflow_del(mpte
, mpts
, TRUE
);
3400 mptcp_notify_close(struct socket
*so
)
3402 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
3406 * Signal MPTCP thread to wake up.
3409 mptcp_thread_signal(struct mptses
*mpte
)
3411 lck_mtx_lock(&mpte
->mpte_thread_lock
);
3412 mptcp_thread_signal_locked(mpte
);
3413 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3417 * Signal MPTCP thread to wake up (locked version)
3420 mptcp_thread_signal_locked(struct mptses
*mpte
)
3422 lck_mtx_assert(&mpte
->mpte_thread_lock
, LCK_MTX_ASSERT_OWNED
);
3424 mpte
->mpte_thread_reqs
++;
3425 if (!mpte
->mpte_thread_active
&& mpte
->mpte_thread
!= THREAD_NULL
)
3426 wakeup_one((caddr_t
)&mpte
->mpte_thread
);
3430 * Signal MPTCP thread to terminate.
3433 mptcp_thread_terminate_signal(struct mptses
*mpte
)
3435 lck_mtx_lock(&mpte
->mpte_thread_lock
);
3436 if (mpte
->mpte_thread
!= THREAD_NULL
) {
3437 mpte
->mpte_thread
= THREAD_NULL
;
3438 mpte
->mpte_thread_reqs
++;
3439 if (!mpte
->mpte_thread_active
)
3440 wakeup_one((caddr_t
)&mpte
->mpte_thread
);
3442 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3446 * MPTCP thread workloop.
3449 mptcp_thread_dowork(struct mptses
*mpte
)
3451 struct socket
*mp_so
;
3452 struct mptsub
*mpts
, *tmpts
;
3453 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
3454 boolean_t conninfo_update
= FALSE
;
3456 MPTE_LOCK(mpte
); /* same as MP socket lock */
3457 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3458 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3459 VERIFY(mp_so
!= NULL
);
3461 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3465 MPTS_ADDREF_LOCKED(mpts
); /* for us */
3467 /* Update process ownership based on parent mptcp socket */
3468 mptcp_update_last_owner(mpts
, mp_so
);
3470 mptcp_subflow_input(mpte
, mpts
);
3471 ret
= mptcp_subflow_events(mpte
, mpts
);
3473 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
3474 mptcplog3((LOG_INFO
, "%s: cid %d \n", __func__
,
3475 mpts
->mpts_connid
));
3476 (void) mptcp_subflow_output(mpte
, mpts
);
3480 * If MPTCP socket is closed, disconnect all subflows.
3481 * This will generate a disconnect event which will
3482 * be handled during the next iteration, causing a
3483 * non-zero error to be returned above.
3485 if (mp_so
->so_flags
& SOF_PCBCLEARING
)
3486 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
3490 case MPTS_EVRET_OK_UPDATE
:
3491 conninfo_update
= TRUE
;
3496 case MPTS_EVRET_DELETE
:
3497 mptcp_subflow_del(mpte
, mpts
, TRUE
);
3499 case MPTS_EVRET_CONNECT_PENDING
:
3500 connect_pending
= TRUE
;
3502 case MPTS_EVRET_DISCONNECT_FALLBACK
:
3503 disconnect_fallback
= TRUE
;
3506 MPTS_REMREF(mpts
); /* ours */
3509 if (conninfo_update
) {
3510 soevent(mp_so
, SO_FILT_HINT_LOCKED
|
3511 SO_FILT_HINT_CONNINFO_UPDATED
);
3514 if (!connect_pending
&& !disconnect_fallback
) {
3519 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3521 if (disconnect_fallback
) {
3522 struct socket
*so
= NULL
;
3523 struct inpcb
*inp
= NULL
;
3524 struct tcpcb
*tp
= NULL
;
3526 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3531 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
3533 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
3534 MPTSF_DISCONNECTED
)) {
3538 so
= mpts
->mpts_socket
;
3541 * The MPTCP connection has degraded to a fallback
3542 * mode, so there is no point in keeping this subflow
3543 * regardless of its MPTCP-readiness state, unless it
3544 * is the primary one which we use for fallback. This
3545 * assumes that the subflow used for fallback is the
3550 inp
= sotoinpcb(so
);
3551 tp
= intotcpcb(inp
);
3553 ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
3554 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
3555 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
3556 socket_unlock(so
, 1);
3560 tp
->t_mpflags
|= TMPF_RESET
;
3561 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
3562 socket_unlock(so
, 1);
3564 } else if (connect_pending
) {
3566 * If delayed subflow start is set and cellular,
3567 * delay the connect till a retransmission timeout
3570 if ((mptcp_delayed_subf_start
) &&
3571 (IFNET_IS_CELLULAR(mpts
->mpts_outif
))) {
3577 * The MPTCP connection has progressed to a state
3578 * where it supports full multipath semantics; allow
3579 * additional joins to be attempted for all subflows
3580 * that are in the PENDING state.
3582 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
3583 (void) mptcp_subflow_soconnectx(mpte
, mpts
);
3596 mptcp_thread_func(void *v
, wait_result_t w
)
3599 struct mptses
*mpte
= v
;
3600 struct timespec
*ts
= NULL
;
3602 VERIFY(mpte
!= NULL
);
3604 lck_mtx_lock_spin(&mpte
->mpte_thread_lock
);
3607 lck_mtx_assert(&mpte
->mpte_thread_lock
, LCK_MTX_ASSERT_OWNED
);
3609 if (mpte
->mpte_thread
!= THREAD_NULL
) {
3610 (void) msleep(&mpte
->mpte_thread
,
3611 &mpte
->mpte_thread_lock
, (PZERO
- 1) | PSPIN
,
3615 /* MPTCP socket is closed? */
3616 if (mpte
->mpte_thread
== THREAD_NULL
) {
3617 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3618 /* callee will destroy thread lock */
3619 mptcp_thread_destroy(mpte
);
3624 mpte
->mpte_thread_active
= 1;
3626 uint32_t reqs
= mpte
->mpte_thread_reqs
;
3628 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3629 mptcp_thread_dowork(mpte
);
3630 lck_mtx_lock_spin(&mpte
->mpte_thread_lock
);
3632 /* if there's no pending request, we're done */
3633 if (reqs
== mpte
->mpte_thread_reqs
||
3634 mpte
->mpte_thread
== THREAD_NULL
)
3637 mpte
->mpte_thread_reqs
= 0;
3638 mpte
->mpte_thread_active
= 0;
3643 * Destroy a MTCP thread, to be called in the MPTCP thread context
3644 * upon receiving an indication to self-terminate. This routine
3645 * will not return, as the current thread is terminated at the end.
3648 mptcp_thread_destroy(struct mptses
*mpte
)
3650 struct socket
*mp_so
;
3652 MPTE_LOCK(mpte
); /* same as MP socket lock */
3653 VERIFY(mpte
->mpte_thread
== THREAD_NULL
);
3654 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3656 mptcp_sesdestroy(mpte
);
3658 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3659 VERIFY(mp_so
!= NULL
);
3660 VERIFY(mp_so
->so_usecount
!= 0);
3661 mp_so
->so_usecount
--; /* for thread */
3662 mpte
->mpte_mppcb
->mpp_flags
|= MPP_DEFUNCT
;
3665 /* for the extra refcnt from kernel_thread_start() */
3666 thread_deallocate(current_thread());
3667 /* this is the end */
3668 thread_terminate(current_thread());
3673 * Protocol pr_lock callback.
3676 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
3678 struct mppcb
*mpp
= sotomppcb(mp_so
);
3682 lr_saved
= __builtin_return_address(0);
3687 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
3688 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
3691 lck_mtx_lock(&mpp
->mpp_lock
);
3693 if (mp_so
->so_usecount
< 0) {
3694 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
3695 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
3696 solockhistory_nr(mp_so
));
3700 mp_so
->so_usecount
++;
3701 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
3702 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
3708 * Protocol pr_unlock callback.
3711 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
3713 struct mppcb
*mpp
= sotomppcb(mp_so
);
3717 lr_saved
= __builtin_return_address(0);
3722 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
3723 mp_so
, mp_so
->so_usecount
, lr_saved
,
3724 solockhistory_nr(mp_so
));
3727 lck_mtx_assert(&mpp
->mpp_lock
, LCK_MTX_ASSERT_OWNED
);
3730 mp_so
->so_usecount
--;
3732 if (mp_so
->so_usecount
< 0) {
3733 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
3734 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
3737 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
3738 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
3739 lck_mtx_unlock(&mpp
->mpp_lock
);
3745 * Protocol pr_getlock callback.
3748 mptcp_getlock(struct socket
*mp_so
, int locktype
)
3750 #pragma unused(locktype)
3751 struct mppcb
*mpp
= sotomppcb(mp_so
);
3754 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
3755 solockhistory_nr(mp_so
));
3758 if (mp_so
->so_usecount
< 0) {
3759 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
3760 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
3763 return (&mpp
->mpp_lock
);
3767 * Key generation functions
3770 mptcp_generate_unique_key(struct mptcp_key_entry
*key_entry
)
3772 struct mptcp_key_entry
*key_elm
;
3774 read_random(&key_entry
->mkey_value
, sizeof (key_entry
->mkey_value
));
3775 if (key_entry
->mkey_value
== 0)
3777 mptcp_do_sha1(&key_entry
->mkey_value
, key_entry
->mkey_digest
,
3778 sizeof (key_entry
->mkey_digest
));
3780 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
3781 if (key_elm
->mkey_value
== key_entry
->mkey_value
) {
3784 if (bcmp(key_elm
->mkey_digest
, key_entry
->mkey_digest
, 4) ==
3791 static mptcp_key_t
*
3792 mptcp_reserve_key(void)
3794 struct mptcp_key_entry
*key_elm
;
3795 struct mptcp_key_entry
*found_elm
= NULL
;
3797 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
3798 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
3799 if (key_elm
->mkey_flags
== MKEYF_FREE
) {
3800 key_elm
->mkey_flags
= MKEYF_INUSE
;
3801 found_elm
= key_elm
;
3805 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
3808 return (&found_elm
->mkey_value
);
3811 key_elm
= (struct mptcp_key_entry
*)
3812 zalloc(mptcp_keys_pool
.mkph_key_entry_zone
);
3813 key_elm
->mkey_flags
= MKEYF_INUSE
;
3815 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
3816 mptcp_generate_unique_key(key_elm
);
3817 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_elm
, mkey_next
);
3818 mptcp_keys_pool
.mkph_count
+= 1;
3819 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
3820 return (&key_elm
->mkey_value
);
3824 mptcp_get_stored_digest(mptcp_key_t
*key
)
3826 struct mptcp_key_entry
*key_holder
;
3827 caddr_t digest
= NULL
;
3829 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
3830 key_holder
= (struct mptcp_key_entry
*)(void *)((caddr_t
)key
-
3831 offsetof(struct mptcp_key_entry
, mkey_value
));
3832 if (key_holder
->mkey_flags
!= MKEYF_INUSE
)
3833 panic_plain("%s", __func__
);
3834 digest
= &key_holder
->mkey_digest
[0];
3835 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
3840 mptcp_free_key(mptcp_key_t
*key
)
3842 struct mptcp_key_entry
*key_holder
;
3843 struct mptcp_key_entry
*key_elm
;
3844 int pt
= RandomULong();
3846 mptcplog((LOG_INFO
, "%s\n", __func__
));
3848 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
3849 key_holder
= (struct mptcp_key_entry
*)(void*)((caddr_t
)key
-
3850 offsetof(struct mptcp_key_entry
, mkey_value
));
3851 key_holder
->mkey_flags
= MKEYF_FREE
;
3853 LIST_REMOVE(key_holder
, mkey_next
);
3854 mptcp_keys_pool
.mkph_count
-= 1;
3856 /* Free half the time */
3858 zfree(mptcp_keys_pool
.mkph_key_entry_zone
, key_holder
);
3860 /* Insert it at random point to avoid early reuse */
3862 if (mptcp_keys_pool
.mkph_count
> 1) {
3863 pt
= pt
% (mptcp_keys_pool
.mkph_count
- 1);
3864 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
3866 LIST_INSERT_AFTER(key_elm
, key_holder
,
3872 panic("missed insertion");
3874 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_holder
,
3877 mptcp_keys_pool
.mkph_count
+= 1;
3879 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
3883 mptcp_key_pool_init(void)
3886 struct mptcp_key_entry
*key_entry
;
3888 LIST_INIT(&mptcp_keys_pool
);
3889 mptcp_keys_pool
.mkph_count
= 0;
3891 mptcp_keys_pool
.mkph_key_elm_sz
= (vm_size_t
)
3892 (sizeof (struct mptcp_key_entry
));
3893 mptcp_keys_pool
.mkph_key_entry_zone
= zinit(
3894 mptcp_keys_pool
.mkph_key_elm_sz
,
3895 MPTCP_MX_KEY_ALLOCS
* mptcp_keys_pool
.mkph_key_elm_sz
,
3896 MPTCP_MX_PREALLOC_ZONE_SZ
, "mptkeys");
3897 if (mptcp_keys_pool
.mkph_key_entry_zone
== NULL
) {
3898 panic("%s: unable to allocate MPTCP keys zone \n", __func__
);
3901 zone_change(mptcp_keys_pool
.mkph_key_entry_zone
, Z_CALLERACCT
, FALSE
);
3902 zone_change(mptcp_keys_pool
.mkph_key_entry_zone
, Z_EXPAND
, TRUE
);
3904 for (i
= 0; i
< MPTCP_KEY_PREALLOCS_MX
; i
++) {
3905 key_entry
= (struct mptcp_key_entry
*)
3906 zalloc(mptcp_keys_pool
.mkph_key_entry_zone
);
3907 key_entry
->mkey_flags
= MKEYF_FREE
;
3908 mptcp_generate_unique_key(key_entry
);
3909 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_entry
, mkey_next
);
3910 mptcp_keys_pool
.mkph_count
+= 1;
3912 lck_mtx_init(&mptcp_keys_pool
.mkph_lock
, mtcbinfo
.mppi_lock_grp
,
3913 mtcbinfo
.mppi_lock_attr
);
3917 * MPTCP Join support
3921 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
,
3924 struct tcpcb
*tp
= sototcpcb(so
);
3925 struct mptcp_subf_auth_entry
*sauth_entry
;
3926 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
3928 MPT_LOCK_SPIN(mp_tp
);
3929 tp
->t_mptcb
= mp_tp
;
3931 * The address ID of the first flow is implicitly 0.
3933 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
3934 tp
->t_local_aid
= 0;
3936 tp
->t_local_aid
= addr_id
;
3937 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
3938 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
3941 sauth_entry
= zalloc(mpt_subauth_zone
);
3942 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
3943 sauth_entry
->msae_raddr_id
= 0;
3944 sauth_entry
->msae_raddr_rand
= 0;
3946 sauth_entry
->msae_laddr_rand
= RandomULong();
3947 if (sauth_entry
->msae_laddr_rand
== 0)
3949 MPT_LOCK_SPIN(mp_tp
);
3950 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
3955 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
3957 struct mptcp_subf_auth_entry
*sauth_entry
;
3958 struct tcpcb
*tp
= NULL
;
3964 socket_unlock(so
, 0);
3969 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
3970 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
3976 LIST_REMOVE(sauth_entry
, msae_next
);
3977 zfree(mpt_subauth_zone
, sauth_entry
);
3982 socket_unlock(so
, 0);
3986 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
3989 struct mptcp_subf_auth_entry
*sauth_entry
;
3990 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
3993 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
3994 if (sauth_entry
->msae_laddr_id
== addr_id
) {
3996 *lrand
= sauth_entry
->msae_laddr_rand
;
3998 *rrand
= sauth_entry
->msae_raddr_rand
;
4006 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
4007 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
4009 struct mptcp_subf_auth_entry
*sauth_entry
;
4010 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4013 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4014 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
4015 if ((sauth_entry
->msae_raddr_id
!= 0) &&
4016 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
4017 mptcplog((LOG_ERR
, "MPTCP ERROR %s: mismatched"
4018 " address ids %d %d \n", __func__
, raddr_id
,
4019 sauth_entry
->msae_raddr_id
));
4023 sauth_entry
->msae_raddr_id
= raddr_id
;
4024 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
4025 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
4026 mptcplog((LOG_ERR
, "%s: dup SYN_ACK %d %d \n",
4027 __func__
, raddr_rand
,
4028 sauth_entry
->msae_raddr_rand
));
4032 sauth_entry
->msae_raddr_rand
= raddr_rand
;
4041 * SHA1 support for MPTCP
4044 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
, int digest_len
)
4047 const unsigned char *sha1_base
;
4050 if (digest_len
!= SHA1_RESULTLEN
) {
4054 sha1_base
= (const unsigned char *) key
;
4055 sha1_size
= sizeof (mptcp_key_t
);
4056 SHA1Init(&sha1ctxt
);
4057 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
4058 SHA1Final(sha_digest
, &sha1ctxt
);
4063 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
4064 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
, int digest_len
)
4067 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
4068 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
4072 bzero(digest
, digest_len
);
4074 /* Set up the Key for HMAC */
4081 /* Set up the message for HMAC */
4085 /* Key is 512 block length, so no need to compute hash */
4087 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
4089 for (i
= 0; i
< 8; i
++) {
4090 key_ipad
[i
] ^= 0x3636363636363636;
4091 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
4094 /* Perform inner SHA1 */
4095 SHA1Init(&sha1ctxt
);
4096 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof (key_ipad
));
4097 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof (data
));
4098 SHA1Final(digest
, &sha1ctxt
);
4100 /* Perform outer SHA1 */
4101 SHA1Init(&sha1ctxt
);
4102 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof (key_opad
));
4103 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
4104 SHA1Final(digest
, &sha1ctxt
);
4108 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
4109 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
4112 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
,
4115 uint32_t lrand
, rrand
;
4116 mptcp_key_t localkey
, remotekey
;
4117 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4119 if (digest_len
!= SHA1_RESULTLEN
)
4123 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
4124 MPT_LOCK_SPIN(mp_tp
);
4125 localkey
= *mp_tp
->mpt_localkey
;
4126 remotekey
= mp_tp
->mpt_remotekey
;
4128 mptcp_hmac_sha1(localkey
, remotekey
, lrand
, rrand
, digest
,
4133 mptcp_get_trunced_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
)
4135 u_char digest
[SHA1_RESULTLEN
];
4136 u_int64_t trunced_digest
;
4138 mptcp_get_hmac(aid
, mp_tp
, &digest
[0], sizeof (digest
));
4139 bcopy(digest
, &trunced_digest
, 8);
4140 return (trunced_digest
);
4144 * Authentication data generation
4147 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
4150 VERIFY(token_len
== sizeof (u_int32_t
));
4151 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4153 /* Most significant 32 bits of the SHA1 hash */
4154 bcopy(sha_digest
, token
, sizeof (u_int32_t
));
4159 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
4162 VERIFY(idsn_len
== sizeof (u_int64_t
));
4163 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4166 * Least significant 64 bits of the SHA1 hash
4169 idsn
[7] = sha_digest
[12];
4170 idsn
[6] = sha_digest
[13];
4171 idsn
[5] = sha_digest
[14];
4172 idsn
[4] = sha_digest
[15];
4173 idsn
[3] = sha_digest
[16];
4174 idsn
[2] = sha_digest
[17];
4175 idsn
[1] = sha_digest
[18];
4176 idsn
[0] = sha_digest
[19];
4181 mptcp_init_authparms(struct mptcb
*mp_tp
)
4183 caddr_t local_digest
= NULL
;
4184 char remote_digest
[MPTCP_SHA1_RESULTLEN
];
4185 MPT_LOCK_ASSERT_HELD(mp_tp
);
4187 /* Only Version 0 is supported for auth purposes */
4188 if (mp_tp
->mpt_version
!= MP_DRAFT_VERSION_12
)
4191 /* Setup local and remote tokens and Initial DSNs */
4192 local_digest
= mptcp_get_stored_digest(mp_tp
->mpt_localkey
);
4193 mptcp_generate_token(local_digest
, SHA1_RESULTLEN
,
4194 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof (mp_tp
->mpt_localtoken
));
4195 mptcp_generate_idsn(local_digest
, SHA1_RESULTLEN
,
4196 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof (u_int64_t
));
4198 if (!mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
,
4200 mptcplog((LOG_ERR
, "MPTCP ERROR %s: unexpected failure",
4204 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
4205 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof (mp_tp
->mpt_localtoken
));
4206 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
4207 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof (u_int64_t
));
4212 mptcp_init_statevars(struct mptcb
*mp_tp
)
4214 MPT_LOCK_ASSERT_HELD(mp_tp
);
4216 /* The subflow SYN is also first MPTCP byte */
4217 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
4218 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
4220 mp_tp
->mpt_rcvatmark
= mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
4224 mptcp_conn_properties(struct mptcb
*mp_tp
)
4226 /* There is only Version 0 at this time */
4227 mp_tp
->mpt_version
= MP_DRAFT_VERSION_12
;
4229 /* Set DSS checksum flag */
4231 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
4233 /* Set up receive window */
4234 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
4236 /* Set up gc ticks */
4237 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
4244 mptcp_get_localtoken(void* mptcb_arg
)
4246 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4247 return (mp_tp
->mpt_localtoken
);
4251 mptcp_get_remotetoken(void* mptcb_arg
)
4253 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4254 return (mp_tp
->mpt_remotetoken
);
4258 mptcp_get_localkey(void* mptcb_arg
)
4260 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4261 if (mp_tp
->mpt_localkey
!= NULL
)
4262 return (*mp_tp
->mpt_localkey
);
4268 mptcp_get_remotekey(void* mptcb_arg
)
4270 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4271 return (mp_tp
->mpt_remotekey
);
4275 mptcp_send_dfin(struct socket
*so
)
4277 struct tcpcb
*tp
= NULL
;
4278 struct inpcb
*inp
= NULL
;
4280 inp
= sotoinpcb(so
);
4284 tp
= intotcpcb(inp
);
4288 if (!(tp
->t_mpflags
& TMPF_RESET
))
4289 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
4293 * Data Sequence Mapping routines
4296 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
4298 struct mptcb
*mp_tp
;
4303 mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
;
4305 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
4307 panic("%s: data write before establishment.",
4313 VERIFY(m
->m_flags
& M_PKTHDR
);
4314 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
4315 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
4316 m
->m_pkthdr
.mp_rlen
= m_pktlen(m
);
4317 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
4324 mptcp_preproc_sbdrop(struct mbuf
*m
, unsigned int len
)
4326 u_int32_t sub_len
= 0;
4329 VERIFY(m
->m_flags
& M_PKTHDR
);
4331 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
) {
4332 sub_len
= m
->m_pkthdr
.mp_rlen
;
4334 if (sub_len
< len
) {
4335 m
->m_pkthdr
.mp_dsn
+= sub_len
;
4336 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4337 m
->m_pkthdr
.mp_rseq
+= sub_len
;
4339 m
->m_pkthdr
.mp_rlen
= 0;
4342 /* sub_len >= len */
4343 m
->m_pkthdr
.mp_dsn
+= len
;
4344 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4345 m
->m_pkthdr
.mp_rseq
+= len
;
4347 mptcplog3((LOG_INFO
,
4348 "%s: %llu %u %d %d\n", __func__
,
4349 m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rseq
,
4350 m
->m_pkthdr
.mp_rlen
, len
));
4351 m
->m_pkthdr
.mp_rlen
-= len
;
4355 panic("%s: MPTCP tag not set", __func__
);
4362 /* Obtain the DSN mapping stored in the mbuf */
4364 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
, uint32_t datalen
,
4365 u_int32_t
*dsn
, u_int32_t
*relseq
, u_int16_t
*data_len
, u_int64_t
*dsn64p
)
4369 mptcp_output_getm_dsnmap64(so
, off
, datalen
, &dsn64
, relseq
, data_len
);
4370 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
4375 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint32_t datalen
,
4376 u_int64_t
*dsn
, u_int32_t
*relseq
, u_int16_t
*data_len
)
4378 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4379 struct mbuf
*mnext
= NULL
;
4380 uint32_t runlen
= 0;
4382 uint32_t contig_len
= 0;
4390 * In the subflow socket, the DSN sequencing can be discontiguous,
4391 * but the subflow sequence mapping is contiguous. Use the subflow
4392 * sequence property to find the right mbuf and corresponding dsn
4397 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4398 VERIFY(m
->m_flags
& M_PKTHDR
);
4400 if ((unsigned int)off
>= m
->m_pkthdr
.mp_rlen
) {
4401 off
-= m
->m_pkthdr
.mp_rlen
;
4409 panic("%s: bad offset", __func__
);
4413 dsn64
= m
->m_pkthdr
.mp_dsn
+ off
;
4415 *relseq
= m
->m_pkthdr
.mp_rseq
+ off
;
4418 * Now find the last contiguous byte and its length from
4421 runlen
= m
->m_pkthdr
.mp_rlen
- off
;
4422 contig_len
= runlen
;
4424 /* If datalen does not span multiple mbufs, return */
4425 if (datalen
<= runlen
) {
4426 *data_len
= min(datalen
, UINT16_MAX
);
4431 while (datalen
> runlen
) {
4432 if (mnext
== NULL
) {
4433 panic("%s: bad datalen = %d, %d %d", __func__
, datalen
,
4437 VERIFY(mnext
->m_flags
& M_PKTHDR
);
4438 VERIFY(mnext
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4441 * case A. contiguous DSN stream
4442 * case B. discontiguous DSN stream
4444 if (mnext
->m_pkthdr
.mp_dsn
== (dsn64
+ runlen
)) {
4446 runlen
+= mnext
->m_pkthdr
.mp_rlen
;
4447 contig_len
+= mnext
->m_pkthdr
.mp_rlen
;
4448 mptcplog3((LOG_INFO
, "%s: contig \n",
4453 "%s: discontig datalen %d contig_len %d cc %d \n",
4454 __func__
, datalen
, contig_len
, so
->so_snd
.sb_cc
));
4457 mnext
= mnext
->m_next
;
4459 datalen
= min(datalen
, UINT16_MAX
);
4460 *data_len
= min(datalen
, contig_len
);
4461 mptcplog3((LOG_INFO
, "%s: %llu %u %d %d \n", __func__
,
4462 *dsn
, *relseq
, *data_len
, off
));
4466 * MPTCP's notion of the next insequence Data Sequence number is adjusted
4467 * here. It must be called from mptcp_adj_rmap() which is called only after
4468 * reassembly of out of order data. The rcvnxt variable must
4469 * be updated only when atleast some insequence new data is received.
4472 mptcp_adj_rcvnxt(struct tcpcb
*tp
, struct mbuf
*m
)
4474 struct mptcb
*mp_tp
= tptomptp(tp
);
4479 if ((MPTCP_SEQ_GEQ(mp_tp
->mpt_rcvnxt
, m
->m_pkthdr
.mp_dsn
)) &&
4480 (MPTCP_SEQ_LEQ(mp_tp
->mpt_rcvnxt
, (m
->m_pkthdr
.mp_dsn
+
4481 m
->m_pkthdr
.mp_rlen
)))) {
4482 mp_tp
->mpt_rcvnxt
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
4488 * Note that this is called only from tcp_input() which may trim data
4489 * after the dsn mapping is inserted into the mbuf. When it trims data
4490 * tcp_input calls m_adj() which does not remove the m_pkthdr even if the
4491 * m_len becomes 0 as a result of trimming the mbuf. The dsn map insertion
4492 * cannot be delayed after trim, because data can be in the reassembly
4493 * queue for a while and the DSN option info in tp will be overwritten for
4494 * every new packet received.
4495 * The dsn map will be adjusted just prior to appending to subflow sockbuf
4496 * with mptcp_adj_rmap()
4499 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
)
4501 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
4503 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
4504 VERIFY(m
->m_flags
& M_PKTHDR
);
4505 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
4506 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
4507 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
4508 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
4509 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
4510 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
4515 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
)
4518 u_int32_t sseq
, datalen
;
4519 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
4520 u_int32_t old_rcvnxt
= 0;
4522 if (m_pktlen(m
) == 0)
4525 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
) {
4526 VERIFY(m
->m_flags
& M_PKTHDR
);
4528 dsn
= m
->m_pkthdr
.mp_dsn
;
4529 sseq
= m
->m_pkthdr
.mp_rseq
+ tp
->irs
;
4530 datalen
= m
->m_pkthdr
.mp_rlen
;
4532 /* data arrived without an DSS option mapping */
4534 /* initial subflow can fallback right after SYN handshake */
4535 mptcp_notify_mpfail(so
);
4539 /* In the common case, data is in window and in sequence */
4540 if (m
->m_pkthdr
.len
== (int)datalen
) {
4541 mptcp_adj_rcvnxt(tp
, m
);
4545 if (m
->m_pkthdr
.len
> (int)datalen
) {
4546 panic("%s: mbuf len = %d expected = %d", __func__
,
4547 m
->m_pkthdr
.len
, datalen
);
4550 old_rcvnxt
= tp
->rcv_nxt
- m
->m_pkthdr
.len
;
4551 if (SEQ_GT(old_rcvnxt
, sseq
)) {
4552 /* data trimmed from the left */
4553 int off
= old_rcvnxt
- sseq
;
4554 m
->m_pkthdr
.mp_dsn
+= off
;
4555 m
->m_pkthdr
.mp_rseq
+= off
;
4556 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
4557 } else if (old_rcvnxt
== sseq
) {
4559 * Data was trimmed from the right
4561 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
4563 /* handle gracefully with reass or fallback */
4564 mptcp_notify_mpfail(so
);
4565 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPTCP
;
4569 mptcp_adj_rcvnxt(tp
, m
);
4574 * Following routines help with failure detection and failover of data
4575 * transfer from one subflow to another.
4578 mptcp_act_on_txfail(struct socket
*so
)
4580 struct tcpcb
*tp
= NULL
;
4581 struct inpcb
*inp
= sotoinpcb(so
);
4586 tp
= intotcpcb(inp
);
4590 if (tp
->t_state
!= TCPS_ESTABLISHED
)
4591 mptcplog((LOG_INFO
, "%s: state = %d \n", __func__
,
4594 mptcplog((LOG_INFO
, "%s: Failover = %d \n", __func__
,
4595 (so
->so_flags
& SOF_MP_TRYFAILOVER
) ? 1 : 0));
4597 if (so
->so_flags
& SOF_MP_TRYFAILOVER
) {
4601 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
4602 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
4606 * Support for MP_FAIL option
4609 mptcp_get_map_for_dsn(struct socket
*so
, u_int64_t dsn_fail
, u_int32_t
*tcp_seq
)
4611 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4620 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4621 VERIFY(m
->m_flags
& M_PKTHDR
);
4622 dsn
= m
->m_pkthdr
.mp_dsn
;
4623 datalen
= m
->m_pkthdr
.mp_rlen
;
4624 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
4625 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
4626 off
= dsn_fail
- dsn
;
4627 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
4628 mptcplog((LOG_INFO
, "%s: %llu %llu \n",
4629 __func__
, dsn
, dsn_fail
));
4637 * If there was no mbuf data and a fallback to TCP occurred, there's
4638 * not much else to do.
4641 mptcplog((LOG_ERR
, "%s: %llu not found \n", __func__
, dsn_fail
));
4646 * Support for sending contiguous MPTCP bytes in subflow
4647 * Also for preventing sending data with ACK in 3-way handshake
4650 mptcp_adj_sendlen(struct socket
*so
, int32_t off
, int32_t len
)
4652 u_int64_t mdss_dsn
= 0;
4653 u_int32_t mdss_subflow_seq
= 0;
4654 u_int16_t mdss_data_len
= 0;
4659 mptcp_output_getm_dsnmap64(so
, off
, (u_int32_t
)len
,
4660 &mdss_dsn
, &mdss_subflow_seq
, &mdss_data_len
);
4663 * Special case handling for Fast Join. We want to send data right
4664 * after ACK of the 3-way handshake, but not piggyback the data
4665 * with the 3rd ACK of the 3WHS. TMPF_FASTJOINBY2_SEND and
4666 * mdss_data_len control this.
4668 struct tcpcb
*tp
= NULL
;
4669 tp
= intotcpcb(sotoinpcb(so
));
4670 if ((tp
->t_mpflags
& TMPF_JOINED_FLOW
) &&
4671 (tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
4672 (!(tp
->t_mpflags
& TMPF_RECVD_JOIN
)) &&
4673 (tp
->t_mpflags
& TMPF_SENT_JOIN
) &&
4674 (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) &&
4675 (!(tp
->t_mpflags
& TMPF_FASTJOINBY2_SEND
))) {
4677 tp
->t_mpflags
|= TMPF_FASTJOINBY2_SEND
;
4679 return (mdss_data_len
);
4683 mptcp_sbspace(struct mptcb
*mpt
)
4689 MPT_LOCK_ASSERT_HELD(mpt
);
4690 MPTE_LOCK_ASSERT_HELD(mpt
->mpt_mpte
);
4692 sb
= &mpt
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
4693 rcvbuf
= sb
->sb_hiwat
;
4694 space
= ((int32_t)imin((rcvbuf
- sb
->sb_cc
),
4695 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
4698 /* XXX check if it's too small? */
4704 * Support Fallback to Regular TCP
4707 mptcp_notify_mpready(struct socket
*so
)
4709 struct tcpcb
*tp
= NULL
;
4714 tp
= intotcpcb(sotoinpcb(so
));
4719 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
4720 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
4721 struct tcpcb
*, tp
);
4723 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
))
4726 if (tp
->t_mpflags
& TMPF_MPTCP_READY
)
4729 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
4730 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
4732 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
4736 mptcp_notify_mpfail(struct socket
*so
)
4738 struct tcpcb
*tp
= NULL
;
4743 tp
= intotcpcb(sotoinpcb(so
));
4748 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
4749 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
4750 struct tcpcb
*, tp
);
4752 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
)
4755 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
4756 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
4758 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
4762 * Keepalive helper function
4765 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
4768 VERIFY(mp_tp
!= NULL
);
4770 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
4778 * MPTCP t_maxseg adjustment function
4781 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
4784 struct mptcb
*mp_tp
= tptomptp(tp
);
4786 #define MPTCP_COMPUTE_LEN { \
4787 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
4789 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
4792 /* adjust to 32-bit boundary + EOL */ \
4794 MPT_UNLOCK(mp_tp); \
4800 * For the first subflow and subsequent subflows, adjust mss for
4801 * most common MPTCP option size, for case where tcp_mss is called
4802 * during option processing and MTU discovery.
4804 if ((tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
4805 (!(tp
->t_mpflags
& TMPF_JOINED_FLOW
))) {
4809 if ((tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
4810 (tp
->t_mpflags
& TMPF_SENT_JOIN
)) {
4814 if ((mtudisc
) && (tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) {
4822 * Update the pid, upid, uuid of the subflow so, based on parent so
4825 mptcp_update_last_owner(struct mptsub
*mpts
, struct socket
*parent_mpso
)
4827 struct socket
*subflow_so
= mpts
->mpts_socket
;
4829 MPTS_LOCK_ASSERT_HELD(mpts
);
4831 socket_lock(subflow_so
, 0);
4832 if ((subflow_so
->last_pid
!= parent_mpso
->last_pid
) ||
4833 (subflow_so
->last_upid
!= parent_mpso
->last_upid
)) {
4834 subflow_so
->last_upid
= parent_mpso
->last_upid
;
4835 subflow_so
->last_pid
= parent_mpso
->last_pid
;
4836 uuid_copy(subflow_so
->last_uuid
, parent_mpso
->last_uuid
);
4838 so_update_policy(subflow_so
);
4839 socket_unlock(subflow_so
, 0);
4843 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
4847 tcp_getconninfo(so
, &flow
->flow_ci
);
4848 inp
= sotoinpcb(so
);
4850 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
4851 flow
->flow_src
.ss_family
= AF_INET6
;
4852 flow
->flow_dst
.ss_family
= AF_INET6
;
4853 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
4854 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
4855 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
4856 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
4857 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
4858 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
4862 flow
->flow_src
.ss_family
= AF_INET
;
4863 flow
->flow_dst
.ss_family
= AF_INET
;
4864 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
4865 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
4866 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
4867 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
4868 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
4869 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
4871 flow
->flow_flags
= mpts
->mpts_flags
;
4872 flow
->flow_cid
= mpts
->mpts_connid
;
4876 mptcp_pcblist SYSCTL_HANDLER_ARGS
4878 #pragma unused(oidp, arg1, arg2)
4882 struct mptses
*mpte
;
4883 struct mptcb
*mp_tp
;
4884 struct mptsub
*mpts
;
4886 conninfo_mptcp_t mptcpci
;
4887 mptcp_flow_t
*flows
= NULL
;
4889 if (req
->newptr
!= USER_ADDR_NULL
)
4892 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
4893 n
= mtcbinfo
.mppi_count
;
4894 if (req
->oldptr
== USER_ADDR_NULL
) {
4895 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
4896 req
->oldidx
= (n
+ n
/8) * sizeof(conninfo_mptcp_t
) +
4897 4 * (n
+ n
/8) * sizeof(mptcp_flow_t
);
4900 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
4902 bzero(&mptcpci
, sizeof(mptcpci
));
4903 lck_mtx_lock(&mpp
->mpp_lock
);
4904 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
4905 mpte
= mptompte(mpp
);
4906 VERIFY(mpte
!= NULL
);
4907 mp_tp
= mpte
->mpte_mptcb
;
4908 VERIFY(mp_tp
!= NULL
);
4909 /* N.B. we don't take the mpt_lock just for the state. */
4910 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
4911 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
4912 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
4913 if (mpte
->mpte_numflows
!= 0) {
4914 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
4915 if (flows
== NULL
) {
4916 lck_mtx_unlock(&mpp
->mpp_lock
);
4919 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
4920 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
4921 error
= SYSCTL_OUT(req
, &mptcpci
,
4922 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
4924 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
4925 error
= SYSCTL_OUT(req
, &mptcpci
,
4929 lck_mtx_unlock(&mpp
->mpp_lock
);
4930 FREE(flows
, M_TEMP
);
4934 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4936 so
= mpts
->mpts_socket
;
4938 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
4939 socket_unlock(so
, 0);
4943 lck_mtx_unlock(&mpp
->mpp_lock
);
4945 error
= SYSCTL_OUT(req
, flows
, len
);
4946 FREE(flows
, M_TEMP
);
4951 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
4956 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
4957 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
4958 "List of active MPTCP connections");
4961 * Check the health of the other subflows and do an mptcp_output if
4962 * there is no other active or functional subflow at the time of
4963 * call of this function.
4966 mptcp_output_needed(struct mptses
*mpte
, struct mptsub
*to_mpts
)
4968 struct mptsub
*from_mpts
= NULL
;
4970 MPTE_LOCK_ASSERT_HELD(mpte
);
4972 MPTS_UNLOCK(to_mpts
);
4974 from_mpts
= mpte
->mpte_active_sub
;
4976 if (from_mpts
== NULL
)
4979 MPTS_LOCK(from_mpts
);
4981 if ((from_mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
4982 (from_mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
4983 MPTS_UNLOCK(from_mpts
);
4987 MPTS_UNLOCK(from_mpts
);
4998 * When WiFi signal starts fading, there's more loss and RTT spikes.
4999 * Check if there has been a large spike by comparing against
5000 * a tolerable RTT spike threshold.
5003 mptcp_no_rto_spike(struct socket
*so
)
5005 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
5008 if (tp
->t_rxtcur
> mptcp_rto_spike_thresh
) {
5009 spike
= tp
->t_rxtcur
- mptcp_rto_spike_thresh
;
5011 mptcplog2((LOG_INFO
, "%s: spike = %d rto = %d",
5012 "best = %d cur = %d\n", __func__
, spike
,
5013 tp
->t_rxtcur
, tp
->t_rttbest
>> TCP_RTT_SHIFT
,
5026 * Set notsent lowat mark on the MPTCB
5029 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
5031 struct mptcb
*mp_tp
= NULL
;
5034 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5035 mp_tp
= mpte
->mpte_mptcb
;
5038 mp_tp
->mpt_notsent_lowat
= optval
;
5046 mptcp_get_notsent_lowat(struct mptses
*mpte
)
5048 struct mptcb
*mp_tp
= NULL
;
5050 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5051 mp_tp
= mpte
->mpte_mptcb
;
5054 return mp_tp
->mpt_notsent_lowat
;
5060 mptcp_notsent_lowat_check(struct socket
*so
) {
5061 struct mptses
*mpte
;
5063 struct mptcb
*mp_tp
;
5064 struct mptsub
*mpts
;
5068 mpp
= sotomppcb(so
);
5069 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
5073 mpte
= mptompte(mpp
);
5074 mp_tp
= mpte
->mpte_mptcb
;
5077 notsent
= so
->so_snd
.sb_cc
;
5079 if ((notsent
== 0) ||
5080 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
5081 mp_tp
->mpt_notsent_lowat
)) {
5082 mptcplog3((LOG_INFO
, "%s: lowat %d notsent %d actual %d \n",
5083 __func__
, mp_tp
->mpt_notsent_lowat
, notsent
,
5084 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)));
5090 /* When Nagle's algorithm is not disabled, it is better
5091 * to wakeup the client even before there is atleast one
5092 * maxseg of data to write.
5094 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5097 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
5098 struct socket
*subf_so
= mpts
->mpts_socket
;
5099 socket_lock(subf_so
, 0);
5100 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
5102 notsent
= so
->so_snd
.sb_cc
-
5103 (tp
->snd_nxt
- tp
->snd_una
);
5105 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
5106 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
5109 mptcplog3((LOG_INFO
, "%s: lowat %d notsent %d"
5110 " nodelay false \n",
5111 __func__
, mp_tp
->mpt_notsent_lowat
, notsent
));
5112 socket_unlock(subf_so
, 0);