2 * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
34 #include <sys/mcache.h>
35 #include <sys/resourcevar.h>
36 #include <sys/socket.h>
37 #include <sys/socketvar.h>
38 #include <sys/syslog.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/sysctl.h>
43 #include <kern/zalloc.h>
44 #include <kern/locks.h>
46 #include <mach/thread_act.h>
50 #include <net/if_var.h>
51 #include <netinet/in.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/in_var.h>
54 #include <netinet/tcp.h>
55 #include <netinet/tcp_fsm.h>
56 #include <netinet/tcp_seq.h>
57 #include <netinet/tcp_var.h>
58 #include <netinet/mptcp_var.h>
59 #include <netinet/mptcp.h>
60 #include <netinet/mptcp_seq.h>
61 #include <netinet/mptcp_timer.h>
62 #include <libkern/crypto/sha1.h>
64 #include <netinet6/in6_pcb.h>
65 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
70 * Notes on MPTCP implementation.
72 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
73 * communication domain. The structure mtcbinfo describes the MPTCP instance
74 * of a Multipath protocol in that domain. It is used to keep track of all
75 * MPTCP PCB instances in the system, and is protected by the global lock
78 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
79 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
80 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
81 * allocated from the same memory block, and each structure has a pointer
82 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
83 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
84 * PCB (mppcb) as well as the MPTCP Session (mptses).
86 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
87 * in particular, the list of subflows as well as the MPTCP thread.
89 * A functioning MPTCP Session consists of one or more subflow sockets. Each
90 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
91 * represented by the mptsub structure. Because each subflow requires access
92 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
93 * subflow. This gets decremented prior to the subflow's destruction. The
94 * subflow lock (mpts_lock) is used to protect accesses to the subflow.
96 * To handle events (read, write, control) from the subflows, an MPTCP thread
97 * is created; currently, there is one thread per MPTCP Session. In order to
98 * prevent the MPTCP socket from being destroyed while being accessed by the
99 * MPTCP thread, we bump up the MPTCP socket's so_usecount for the thread,
100 * which will be decremented prior to the thread's termination. The thread
101 * lock (mpte_thread_lock) is used to synchronize its signalling.
103 * Lock ordering is defined as follows:
105 * mtcbinfo (mppi_lock)
111 * It is not a requirement that all of the above locks need to be acquired
112 * in succession, but the correct lock ordering must be followed when there
113 * are more than one locks that need to be held. The MPTCP thread lock is
114 * is not constrained by this arrangement, because none of the other locks
115 * is ever acquired while holding mpte_thread_lock; therefore it may be called
116 * at any moment to signal the thread.
118 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
119 * work is done by the MPTCP garbage collector which is invoked on demand by
120 * the PF_MULTIPATH garbage collector. This process will take place once all
121 * of the subflows have been destroyed, and the MPTCP thread be instructed to
125 static void mptcp_sesdestroy(struct mptses
*);
126 static void mptcp_thread_signal_locked(struct mptses
*);
127 static void mptcp_thread_terminate_signal(struct mptses
*);
128 static void mptcp_thread_dowork(struct mptses
*);
129 static void mptcp_thread_func(void *, wait_result_t
);
130 static void mptcp_thread_destroy(struct mptses
*);
131 static void mptcp_key_pool_init(void);
132 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
133 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
134 static void mptcp_conn_properties(struct mptcb
*);
135 static void mptcp_init_statevars(struct mptcb
*);
137 static uint32_t mptcp_gc(struct mppcbinfo
*);
138 static int mptcp_subflow_socreate(struct mptses
*, struct mptsub
*,
139 int, struct proc
*, struct socket
**);
140 static int mptcp_subflow_soclose(struct mptsub
*, struct socket
*);
141 static int mptcp_subflow_soconnectx(struct mptses
*, struct mptsub
*);
142 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
143 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
144 static void mptcp_subflow_rupcall(struct socket
*, void *, int);
145 static void mptcp_subflow_input(struct mptses
*, struct mptsub
*);
146 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
147 static void mptcp_subflow_eupcall(struct socket
*, void *, uint32_t);
148 static void mptcp_update_last_owner(struct mptsub
*, struct socket
*);
149 static void mptcp_output_needed(struct mptses
*mpte
, struct mptsub
*to_mpts
);
150 static void mptcp_get_rtt_measurement(struct mptsub
*, struct mptses
*);
153 * Possible return values for subflow event handlers. Note that success
154 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
155 * indicate errors or actions which require immediate attention; they will
156 * prevent the rest of the handlers from processing their respective events
157 * until the next round of events processing.
160 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
161 MPTS_EVRET_OK
= 2, /* OK */
162 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
163 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
166 static ev_ret_t
mptcp_subflow_events(struct mptses
*, struct mptsub
*, uint64_t *);
167 static ev_ret_t
mptcp_subflow_connreset_ev(struct mptses
*, struct mptsub
*, uint64_t *);
168 static ev_ret_t
mptcp_subflow_cantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *);
169 static ev_ret_t
mptcp_subflow_cantsendmore_ev(struct mptses
*, struct mptsub
*, uint64_t *);
170 static ev_ret_t
mptcp_subflow_timeout_ev(struct mptses
*, struct mptsub
*, uint64_t *);
171 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*, uint64_t *);
172 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*, uint64_t *);
173 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*, uint64_t *);
174 static ev_ret_t
mptcp_subflow_suspend_ev(struct mptses
*, struct mptsub
*, uint64_t *);
175 static ev_ret_t
mptcp_subflow_resume_ev(struct mptses
*, struct mptsub
*, uint64_t *);
176 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*, uint64_t *);
177 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*, uint64_t *);
178 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*, uint64_t *);
179 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*, uint64_t *);
180 static ev_ret_t
mptcp_fastjoin_ev(struct mptses
*, struct mptsub
*, uint64_t *);
181 static ev_ret_t
mptcp_deleteok_ev(struct mptses
*, struct mptsub
*, uint64_t *);
182 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *);
184 static const char *mptcp_evret2str(ev_ret_t
);
186 static mptcp_key_t
*mptcp_reserve_key(void);
187 static int mptcp_do_sha1(mptcp_key_t
*, char *, int);
188 static int mptcp_init_authparms(struct mptcb
*);
190 static unsigned int mptsub_zone_size
; /* size of mptsub */
191 static struct zone
*mptsub_zone
; /* zone for mptsub */
193 static unsigned int mptopt_zone_size
; /* size of mptopt */
194 static struct zone
*mptopt_zone
; /* zone for mptopt */
196 static unsigned int mpt_subauth_entry_size
; /* size of subf auth entry */
197 static struct zone
*mpt_subauth_zone
; /* zone of subf auth entry */
199 struct mppcbinfo mtcbinfo
;
201 static struct mptcp_keys_pool_head mptcp_keys_pool
;
203 #define MPTCP_SUBFLOW_WRITELEN (8 * 1024) /* bytes to write each time */
204 #define MPTCP_SUBFLOW_READLEN (8 * 1024) /* bytes to read each time */
206 SYSCTL_DECL(_net_inet
);
208 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "MPTCP");
210 uint32_t mptcp_dbg_area
= 0; /* more noise if greater than 1 */
211 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, dbg_area
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
212 &mptcp_dbg_area
, 0, "MPTCP debug area");
214 uint32_t mptcp_dbg_level
= 0;
215 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, dbg_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
216 &mptcp_dbg_level
, 0, "MPTCP debug level");
219 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
220 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
223 * Since there is one kernel thread per mptcp socket, imposing an artificial
224 * limit on number of allowed mptcp sockets.
226 uint32_t mptcp_socket_limit
= MPPCB_LIMIT
;
227 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, sk_lim
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
228 &mptcp_socket_limit
, 0, "MPTCP socket limit");
231 * SYSCTL to turn on delayed cellular subflow start.
233 uint32_t mptcp_delayed_subf_start
= 0;
234 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, delayed
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
235 &mptcp_delayed_subf_start
, 0, "MPTCP Delayed Subflow start");
238 * sysctl to use network status hints from symptomsd
240 uint32_t mptcp_use_symptomsd
= 1;
241 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, usesymptoms
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
242 &mptcp_use_symptomsd
, 0, "MPTCP Use SymptomsD");
244 static struct protosw mptcp_subflow_protosw
;
245 static struct pr_usrreqs mptcp_subflow_usrreqs
;
247 static struct ip6protosw mptcp_subflow_protosw6
;
248 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
251 typedef struct mptcp_subflow_event_entry
{
252 uint64_t sofilt_hint_mask
;
253 ev_ret_t (*sofilt_hint_ev_hdlr
)(
256 uint64_t *p_mpsofilt_hint
);
259 static mptsub_ev_entry_t mpsub_ev_entry_tbl
[] = {
261 .sofilt_hint_mask
= SO_FILT_HINT_MPCANTRCVMORE
,
262 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpcantrcvmore_ev
,
265 .sofilt_hint_mask
= SO_FILT_HINT_MPFAILOVER
,
266 .sofilt_hint_ev_hdlr
= mptcp_subflow_failover_ev
,
269 .sofilt_hint_mask
= SO_FILT_HINT_CONNRESET
,
270 .sofilt_hint_ev_hdlr
= mptcp_subflow_connreset_ev
,
273 .sofilt_hint_mask
= SO_FILT_HINT_MUSTRST
,
274 .sofilt_hint_ev_hdlr
= mptcp_subflow_mustrst_ev
,
277 .sofilt_hint_mask
= SO_FILT_HINT_CANTRCVMORE
,
278 .sofilt_hint_ev_hdlr
= mptcp_subflow_cantrcvmore_ev
,
280 { .sofilt_hint_mask
= SO_FILT_HINT_CANTSENDMORE
,
281 .sofilt_hint_ev_hdlr
= mptcp_subflow_cantsendmore_ev
,
284 .sofilt_hint_mask
= SO_FILT_HINT_TIMEOUT
,
285 .sofilt_hint_ev_hdlr
= mptcp_subflow_timeout_ev
,
288 .sofilt_hint_mask
= SO_FILT_HINT_NOSRCADDR
,
289 .sofilt_hint_ev_hdlr
= mptcp_subflow_nosrcaddr_ev
,
292 .sofilt_hint_mask
= SO_FILT_HINT_IFDENIED
,
293 .sofilt_hint_ev_hdlr
= mptcp_subflow_ifdenied_ev
,
296 .sofilt_hint_mask
= SO_FILT_HINT_SUSPEND
,
297 .sofilt_hint_ev_hdlr
= mptcp_subflow_suspend_ev
,
300 .sofilt_hint_mask
= SO_FILT_HINT_RESUME
,
301 .sofilt_hint_ev_hdlr
= mptcp_subflow_resume_ev
,
304 .sofilt_hint_mask
= SO_FILT_HINT_CONNECTED
,
305 .sofilt_hint_ev_hdlr
= mptcp_subflow_connected_ev
,
308 .sofilt_hint_mask
= SO_FILT_HINT_MPSTATUS
,
309 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpstatus_ev
,
312 .sofilt_hint_mask
= SO_FILT_HINT_DELETEOK
,
313 .sofilt_hint_ev_hdlr
= mptcp_deleteok_ev
,
316 .sofilt_hint_mask
= SO_FILT_HINT_DISCONNECTED
,
317 .sofilt_hint_ev_hdlr
= mptcp_subflow_disconnected_ev
,
320 .sofilt_hint_mask
= SO_FILT_HINT_MPFASTJ
,
321 .sofilt_hint_ev_hdlr
= mptcp_fastjoin_ev
,
326 * Protocol pr_init callback.
329 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
332 static int mptcp_initialized
= 0;
335 struct ip6protosw
*prp6
;
338 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
|PR_ATTACHED
)) == PR_ATTACHED
);
340 /* do this only once */
341 if (mptcp_initialized
)
343 mptcp_initialized
= 1;
346 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
347 * we must be able to find IPPROTO_TCP entries for both.
349 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
351 bcopy(prp
, &mptcp_subflow_protosw
, sizeof (*prp
));
352 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
353 sizeof (mptcp_subflow_usrreqs
));
354 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
355 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
356 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
357 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
358 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
360 * Socket filters shouldn't attach/detach to/from this protosw
361 * since pr_protosw is to be used instead, which points to the
362 * real protocol; if they do, it is a bug and we should panic.
364 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
365 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
366 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
367 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
370 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
371 IPPROTO_TCP
, SOCK_STREAM
);
372 VERIFY(prp6
!= NULL
);
373 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof (*prp6
));
374 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
375 sizeof (mptcp_subflow_usrreqs6
));
376 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
377 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
378 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
379 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
380 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
382 * Socket filters shouldn't attach/detach to/from this protosw
383 * since pr_protosw is to be used instead, which points to the
384 * real protocol; if they do, it is a bug and we should panic.
386 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
387 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
388 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
389 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
392 bzero(&mtcbinfo
, sizeof (mtcbinfo
));
393 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
394 mtcbinfo
.mppi_size
= sizeof (struct mpp_mtp
);
395 if ((mtcbinfo
.mppi_zone
= zinit(mtcbinfo
.mppi_size
,
396 1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) {
397 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
);
400 zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
);
401 zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
);
403 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
404 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
405 mtcbinfo
.mppi_lock_grp_attr
);
406 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
407 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
408 mtcbinfo
.mppi_lock_attr
);
410 mtcbinfo
.mppi_gc
= mptcp_gc
;
411 mtcbinfo
.mppi_timer
= mptcp_timer
;
412 mtcbinfo
.mppi_pcbe_create
= mptcp_sescreate
;
414 /* attach to MP domain for garbage collection to take place */
415 mp_pcbinfo_attach(&mtcbinfo
);
417 mptsub_zone_size
= sizeof (struct mptsub
);
418 if ((mptsub_zone
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
,
419 8192, "mptsub")) == NULL
) {
420 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
);
423 zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
);
424 zone_change(mptsub_zone
, Z_EXPAND
, TRUE
);
426 mptopt_zone_size
= sizeof (struct mptopt
);
427 if ((mptopt_zone
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
,
428 1024, "mptopt")) == NULL
) {
429 panic("%s: unable to allocate MPTCP option zone\n", __func__
);
432 zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
);
433 zone_change(mptopt_zone
, Z_EXPAND
, TRUE
);
435 mpt_subauth_entry_size
= sizeof (struct mptcp_subf_auth_entry
);
436 if ((mpt_subauth_zone
= zinit(mpt_subauth_entry_size
,
437 1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) {
438 panic("%s: unable to allocate MPTCP address auth zone \n",
442 zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
);
443 zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
);
445 /* Set up a list of unique keys */
446 mptcp_key_pool_init();
450 * Create an MPTCP session, called as a result of opening a MPTCP socket.
453 mptcp_sescreate(struct socket
*mp_so
, struct mppcb
*mpp
)
455 struct mppcbinfo
*mppi
;
461 mppi
= mpp
->mpp_pcbinfo
;
462 VERIFY(mppi
!= NULL
);
464 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
465 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
467 /* MPTCP Multipath PCB Extension */
468 bzero(mpte
, sizeof (*mpte
));
469 VERIFY(mpp
->mpp_pcbe
== NULL
);
470 mpp
->mpp_pcbe
= mpte
;
471 mpte
->mpte_mppcb
= mpp
;
472 mpte
->mpte_mptcb
= mp_tp
;
474 TAILQ_INIT(&mpte
->mpte_sopts
);
475 TAILQ_INIT(&mpte
->mpte_subflows
);
476 mpte
->mpte_associd
= SAE_ASSOCID_ANY
;
477 mpte
->mpte_connid_last
= SAE_CONNID_ANY
;
479 lck_mtx_init(&mpte
->mpte_thread_lock
, mppi
->mppi_lock_grp
,
480 mppi
->mppi_lock_attr
);
485 * This can be rather expensive if we have lots of MPTCP sockets,
486 * but we need a kernel thread for this model to work. Perhaps we
487 * could amortize the costs by having one worker thread per a group
490 if (kernel_thread_start(mptcp_thread_func
, mpte
,
491 &mpte
->mpte_thread
) != KERN_SUCCESS
) {
495 mp_so
->so_usecount
++; /* for thread */
497 /* MPTCP Protocol Control Block */
498 bzero(mp_tp
, sizeof (*mp_tp
));
499 lck_mtx_init(&mp_tp
->mpt_lock
, mppi
->mppi_lock_grp
,
500 mppi
->mppi_lock_attr
);
501 mp_tp
->mpt_mpte
= mpte
;
502 mp_tp
->mpt_state
= MPTCPS_CLOSED
;
505 lck_mtx_destroy(&mpte
->mpte_thread_lock
, mppi
->mppi_lock_grp
);
506 DTRACE_MPTCP5(session__create
, struct socket
*, mp_so
,
507 struct sockbuf
*, &mp_so
->so_rcv
,
508 struct sockbuf
*, &mp_so
->so_snd
,
509 struct mppcb
*, mpp
, int, error
);
511 return ((error
!= 0) ? NULL
: mpte
);
515 * Destroy an MPTCP session.
518 mptcp_sesdestroy(struct mptses
*mpte
)
522 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
524 mp_tp
= mpte
->mpte_mptcb
;
525 VERIFY(mp_tp
!= NULL
);
528 * MPTCP Multipath PCB Extension section
530 mptcp_flush_sopts(mpte
);
531 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
533 lck_mtx_destroy(&mpte
->mpte_thread_lock
,
534 mpte
->mpte_mppcb
->mpp_pcbinfo
->mppi_lock_grp
);
537 * MPTCP Protocol Control Block section
539 lck_mtx_destroy(&mp_tp
->mpt_lock
,
540 mpte
->mpte_mppcb
->mpp_pcbinfo
->mppi_lock_grp
);
542 DTRACE_MPTCP2(session__destroy
, struct mptses
*, mpte
,
543 struct mptcb
*, mp_tp
);
547 * Allocate an MPTCP socket option structure.
550 mptcp_sopt_alloc(int how
)
554 mpo
= (how
== M_WAITOK
) ? zalloc(mptopt_zone
) :
555 zalloc_noblock(mptopt_zone
);
557 bzero(mpo
, mptopt_zone_size
);
564 * Free an MPTCP socket option structure.
567 mptcp_sopt_free(struct mptopt
*mpo
)
569 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
571 zfree(mptopt_zone
, mpo
);
575 * Add a socket option to the MPTCP socket option list.
578 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
580 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
581 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
582 mpo
->mpo_flags
|= MPOF_ATTACHED
;
583 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
587 * Remove a socket option from the MPTCP socket option list.
590 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
592 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
593 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
594 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
595 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
599 * Search for an existing <sopt_level,sopt_name> socket option.
602 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
606 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
608 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
609 if (mpo
->mpo_level
== sopt
->sopt_level
&&
610 mpo
->mpo_name
== sopt
->sopt_name
)
613 VERIFY(mpo
== NULL
|| sopt
->sopt_valsize
== sizeof (int));
619 * Flushes all recorded socket options from an MP socket.
622 mptcp_flush_sopts(struct mptses
*mpte
)
624 struct mptopt
*mpo
, *tmpo
;
626 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
628 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
629 mptcp_sopt_remove(mpte
, mpo
);
630 mptcp_sopt_free(mpo
);
632 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
636 * Allocate a MPTCP subflow structure.
639 mptcp_subflow_alloc(int how
)
643 mpts
= (how
== M_WAITOK
) ? zalloc(mptsub_zone
) :
644 zalloc_noblock(mptsub_zone
);
646 bzero(mpts
, mptsub_zone_size
);
647 lck_mtx_init(&mpts
->mpts_lock
, mtcbinfo
.mppi_lock_grp
,
648 mtcbinfo
.mppi_lock_attr
);
655 * Deallocate a subflow structure, called when all of the references held
656 * on it have been released. This implies that the subflow has been deleted.
659 mptcp_subflow_free(struct mptsub
*mpts
)
661 MPTS_LOCK_ASSERT_HELD(mpts
);
663 VERIFY(mpts
->mpts_refcnt
== 0);
664 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
665 VERIFY(mpts
->mpts_mpte
== NULL
);
666 VERIFY(mpts
->mpts_socket
== NULL
);
668 if (mpts
->mpts_src_sl
!= NULL
) {
669 sockaddrlist_free(mpts
->mpts_src_sl
);
670 mpts
->mpts_src_sl
= NULL
;
672 if (mpts
->mpts_dst_sl
!= NULL
) {
673 sockaddrlist_free(mpts
->mpts_dst_sl
);
674 mpts
->mpts_dst_sl
= NULL
;
677 lck_mtx_destroy(&mpts
->mpts_lock
, mtcbinfo
.mppi_lock_grp
);
679 zfree(mptsub_zone
, mpts
);
683 * Create an MPTCP subflow socket.
686 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
687 struct proc
*p
, struct socket
**so
)
689 struct mptopt smpo
, *mpo
, *tmpo
;
690 struct socket
*mp_so
;
694 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
695 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
698 * Create the subflow socket (multipath subflow, non-blocking.)
700 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
701 * socket; it will be cleared when the socket is peeled off or closed.
702 * It also indicates to the underlying TCP to handle MPTCP options.
703 * A multipath subflow socket implies SS_NOFDREF state.
705 if ((error
= socreate_internal(dom
, so
, SOCK_STREAM
,
706 IPPROTO_TCP
, p
, SOCF_ASYNC
| SOCF_MP_SUBFLOW
, PROC_NULL
)) != 0) {
707 mptcplog((LOG_ERR
, "MPTCP Socket: subflow socreate mp_so 0x%llx"
708 " unable to create subflow socket error %d\n",
709 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), error
),
710 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
715 VERIFY((*so
)->so_flags
& SOF_MP_SUBFLOW
);
716 VERIFY(((*so
)->so_state
& (SS_NBIO
|SS_NOFDREF
)) ==
717 (SS_NBIO
|SS_NOFDREF
));
719 /* prevent the socket buffers from being compressed */
720 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
721 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
723 bzero(&smpo
, sizeof (smpo
));
724 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
725 smpo
.mpo_level
= SOL_SOCKET
;
728 /* disable SIGPIPE */
729 smpo
.mpo_name
= SO_NOSIGPIPE
;
730 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
733 /* find out if the subflow's source address goes away */
734 smpo
.mpo_name
= SO_NOADDRERR
;
735 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
738 /* enable keepalive */
739 smpo
.mpo_name
= SO_KEEPALIVE
;
740 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
744 * Limit the receive socket buffer size to 64k.
746 * We need to take into consideration the window scale option
747 * which could be negotiated in one subflow but disabled in
749 * XXX This can be improved in the future.
751 smpo
.mpo_name
= SO_RCVBUF
;
752 smpo
.mpo_intval
= MPTCP_RWIN_MAX
;
753 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
756 /* N.B.: set by sosetopt */
757 VERIFY(!((*so
)->so_rcv
.sb_flags
& SB_AUTOSIZE
));
758 /* Prevent automatic socket buffer sizing. */
759 (*so
)->so_snd
.sb_flags
&= ~SB_AUTOSIZE
;
761 smpo
.mpo_level
= IPPROTO_TCP
;
762 smpo
.mpo_intval
= mptcp_subflow_keeptime
;
763 smpo
.mpo_name
= TCP_KEEPALIVE
;
764 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
767 /* replay setsockopt(2) on the subflow sockets for eligible options */
768 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
771 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
))
775 * Skip those that are handled internally; these options
776 * should not have been recorded and marked with the
777 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
779 if (mpo
->mpo_level
== SOL_SOCKET
&&
780 (mpo
->mpo_name
== SO_NOSIGPIPE
||
781 mpo
->mpo_name
== SO_NOADDRERR
||
782 mpo
->mpo_name
== SO_KEEPALIVE
))
785 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
786 if (mptcp_subflow_sosetopt(mpte
, *so
, mpo
) != 0 && interim
) {
788 mptcplog((LOG_ERR
, "MPTCP Socket: subflow socreate"
790 " sopt %s val %d interim record removed\n",
791 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
792 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
793 buf
, sizeof (buf
)), mpo
->mpo_intval
),
794 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
795 mptcp_sopt_remove(mpte
, mpo
);
796 mptcp_sopt_free(mpo
);
802 * We need to receive everything that the subflow socket has,
803 * so use a customized socket receive function. We will undo
804 * this when the socket is peeled off or closed.
806 mpts
->mpts_oprotosw
= (*so
)->so_proto
;
809 (*so
)->so_proto
= &mptcp_subflow_protosw
;
813 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
822 socket_unlock(*so
, 0);
824 DTRACE_MPTCP4(subflow__create
, struct mptses
*, mpte
,
825 struct mptsub
*, mpts
, int, dom
, int, error
);
831 * Close an MPTCP subflow socket.
833 * Note that this may be called on an embryonic subflow, and the only
834 * thing that is guaranteed valid is the protocol-user request.
837 mptcp_subflow_soclose(struct mptsub
*mpts
, struct socket
*so
)
839 MPTS_LOCK_ASSERT_HELD(mpts
);
842 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
843 VERIFY((so
->so_state
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
));
845 /* restore protocol-user requests */
846 VERIFY(mpts
->mpts_oprotosw
!= NULL
);
847 so
->so_proto
= mpts
->mpts_oprotosw
;
848 socket_unlock(so
, 0);
850 mpts
->mpts_socket
= NULL
; /* may already be NULL */
852 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
854 struct sockbuf
*, &so
->so_rcv
,
855 struct sockbuf
*, &so
->so_snd
,
856 struct mptses
*, mpts
->mpts_mpte
);
858 return (soclose(so
));
862 * Connect an MPTCP subflow socket.
864 * This may be called inline as part of adding a subflow, or asynchronously
865 * by the thread (upon progressing to MPTCPF_JOIN_READY). Note that in the
866 * pending connect case, the subflow socket may have been bound to an interface
867 * and/or a source IP address which may no longer be around by the time this
868 * routine is called; in that case the connect attempt will most likely fail.
871 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
876 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
877 MPTS_LOCK_ASSERT_HELD(mpts
);
879 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)) ==
881 VERIFY(mpts
->mpts_socket
!= NULL
);
882 so
= mpts
->mpts_socket
;
883 af
= mpts
->mpts_family
;
885 if (af
== AF_INET
|| af
== AF_INET6
) {
886 struct sockaddr_entry
*dst_se
;
887 char dbuf
[MAX_IPv6_STR_LEN
];
889 dst_se
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
);
890 VERIFY(dst_se
!= NULL
);
892 mptcplog((LOG_DEBUG
, "MPTCP Socket: connectx mp_so 0x%llx "
893 "dst %s[%d] cid %d [pended %s]\n",
894 (u_int64_t
)VM_KERNEL_ADDRPERM(mpte
->mpte_mppcb
->mpp_socket
),
895 inet_ntop(af
, ((af
== AF_INET
) ?
896 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
897 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
),
898 dbuf
, sizeof (dbuf
)), ((af
== AF_INET
) ?
899 ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
900 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
902 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
904 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
907 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
910 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
912 /* connect the subflow socket */
913 error
= soconnectxlocked(so
, &mpts
->mpts_src_sl
, &mpts
->mpts_dst_sl
,
914 mpts
->mpts_mpcr
.mpcr_proc
, mpts
->mpts_mpcr
.mpcr_ifscope
,
915 mpte
->mpte_associd
, NULL
, CONNREQF_MPTCP
,
916 &mpts
->mpts_mpcr
, sizeof (mpts
->mpts_mpcr
), NULL
, NULL
);
917 socket_unlock(so
, 0);
919 /* Allocate a unique address id per subflow */
920 mpte
->mpte_addrid_last
++;
921 if (mpte
->mpte_addrid_last
== 0)
922 mpte
->mpte_addrid_last
++;
924 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
925 struct mptsub
*, mpts
, int, error
);
931 * MPTCP subflow socket receive routine, derived from soreceive().
934 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
935 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
938 int flags
, error
= 0;
939 struct proc
*p
= current_proc();
940 struct mbuf
*m
, **mp
= mp0
;
941 struct mbuf
*nextrecord
;
944 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
946 #ifdef MORE_LOCKING_DEBUG
947 if (so
->so_usecount
== 1) {
948 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
953 * We return all that is there in the subflow's socket receive buffer
954 * to the MPTCP layer, so we require that the caller passes in the
955 * expected parameters.
957 if (mp
== NULL
|| controlp
!= NULL
) {
958 socket_unlock(so
, 1);
965 flags
= *flagsp
&~ MSG_EOR
;
969 if (flags
& (MSG_PEEK
|MSG_OOB
|MSG_NEEDSA
|MSG_WAITALL
|MSG_WAITSTREAM
)) {
970 socket_unlock(so
, 1);
973 flags
|= (MSG_DONTWAIT
|MSG_NBIO
);
976 * If a recv attempt is made on a previously-accepted socket
977 * that has been marked as inactive (disconnected), reject
980 if (so
->so_flags
& SOF_DEFUNCT
) {
981 struct sockbuf
*sb
= &so
->so_rcv
;
984 SODEFUNCTLOG(("%s[%d]: defunct so 0x%llx [%d,%d] (%d)\n",
985 __func__
, proc_pid(p
), (uint64_t)VM_KERNEL_ADDRPERM(so
),
986 SOCK_DOM(so
), SOCK_TYPE(so
), error
));
988 * This socket should have been disconnected and flushed
989 * prior to being returned from sodefunct(); there should
990 * be no data on its receive list, so panic otherwise.
992 if (so
->so_state
& SS_DEFUNCT
)
993 sb_empty_assert(sb
, __func__
);
994 socket_unlock(so
, 1);
999 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
1000 * and if so just return to the caller. This could happen when
1001 * soreceive() is called by a socket upcall function during the
1002 * time the socket is freed. The socket buffer would have been
1003 * locked across the upcall, therefore we cannot put this thread
1004 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
1005 * we may livelock), because the lock on the socket buffer will
1006 * only be released when the upcall routine returns to its caller.
1007 * Because the socket has been officially closed, there can be
1008 * no further read on it.
1010 * A multipath subflow socket would have its SS_NOFDREF set by
1011 * default, so check for SOF_MP_SUBFLOW socket flag; when the
1012 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1014 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
1015 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
1016 socket_unlock(so
, 1);
1021 * For consistency with soreceive() semantics, we need to obey
1022 * SB_LOCK in case some other code path has locked the buffer.
1024 error
= sblock(&so
->so_rcv
, 0);
1026 socket_unlock(so
, 1);
1030 m
= so
->so_rcv
.sb_mb
;
1033 * Panic if we notice inconsistencies in the socket's
1034 * receive list; both sb_mb and sb_cc should correctly
1035 * reflect the contents of the list, otherwise we may
1036 * end up with false positives during select() or poll()
1037 * which could put the application in a bad state.
1039 SB_MB_CHECK(&so
->so_rcv
);
1041 if (so
->so_error
!= 0) {
1042 error
= so
->so_error
;
1047 if (so
->so_state
& SS_CANTRCVMORE
) {
1051 if (!(so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
))) {
1057 * MSG_DONTWAIT is implicitly defined and this routine will
1058 * never block, so return EWOULDBLOCK when there is nothing.
1060 error
= EWOULDBLOCK
;
1064 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
1065 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1066 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1069 nextrecord
= m
->m_nextpkt
;
1070 sbfree(&so
->so_rcv
, m
);
1075 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1080 m
->m_nextpkt
= nextrecord
;
1081 if (nextrecord
== NULL
)
1082 so
->so_rcv
.sb_lastrecord
= m
;
1084 m
= so
->so_rcv
.sb_mb
= nextrecord
;
1085 SB_EMPTY_FIXUP(&so
->so_rcv
);
1087 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1088 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1091 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
1092 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1093 /* notify protocol that we drained all the data */
1094 if ((so
->so_proto
->pr_flags
& PR_WANTRCVD
) && so
->so_pcb
!= NULL
)
1095 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
1101 sbunlock(&so
->so_rcv
, FALSE
); /* will unlock socket */
1108 * Prepare an MPTCP subflow socket for peeloff(2); basically undo
1109 * the work done earlier when the subflow socket was created.
1112 mptcp_subflow_sopeeloff(struct mptses
*mpte
, struct mptsub
*mpts
,
1116 struct socket
*mp_so
;
1119 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1120 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1121 MPTS_LOCK_ASSERT_HELD(mpts
);
1124 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1125 VERIFY((so
->so_state
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
));
1127 /* inherit MPTCP socket states */
1128 if (!(mp_so
->so_state
& SS_NBIO
))
1129 so
->so_state
&= ~SS_NBIO
;
1132 * At this point, the socket is not yet closed, as there is at least
1133 * one outstanding usecount previously held by mpts_socket from
1134 * socreate(). Atomically clear SOF_MP_SUBFLOW and SS_NOFDREF here.
1136 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
1137 so
->so_state
&= ~SS_NOFDREF
;
1138 so
->so_flags
&= ~SOF_MPTCP_TRUE
;
1140 /* allow socket buffers to be compressed */
1141 so
->so_rcv
.sb_flags
&= ~SB_NOCOMPRESS
;
1142 so
->so_snd
.sb_flags
&= ~SB_NOCOMPRESS
;
1145 * Allow socket buffer auto sizing.
1147 * This will increase the current 64k buffer size to whatever is best.
1149 if (!(so
->so_rcv
.sb_flags
& SB_USRSIZE
))
1150 so
->so_rcv
.sb_flags
|= SB_AUTOSIZE
;
1151 if (!(so
->so_snd
.sb_flags
& SB_USRSIZE
))
1152 so
->so_snd
.sb_flags
|= SB_AUTOSIZE
;
1154 /* restore protocol-user requests */
1155 VERIFY(mpts
->mpts_oprotosw
!= NULL
);
1156 so
->so_proto
= mpts
->mpts_oprotosw
;
1158 bzero(&smpo
, sizeof (smpo
));
1159 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1160 smpo
.mpo_level
= SOL_SOCKET
;
1162 /* inherit SOF_NOSIGPIPE from parent MP socket */
1163 p
= (mp_so
->so_flags
& SOF_NOSIGPIPE
);
1164 c
= (so
->so_flags
& SOF_NOSIGPIPE
);
1165 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1166 smpo
.mpo_name
= SO_NOSIGPIPE
;
1168 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1170 /* inherit SOF_NOADDRAVAIL from parent MP socket */
1171 p
= (mp_so
->so_flags
& SOF_NOADDRAVAIL
);
1172 c
= (so
->so_flags
& SOF_NOADDRAVAIL
);
1173 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1174 smpo
.mpo_name
= SO_NOADDRERR
;
1176 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1178 /* inherit SO_KEEPALIVE from parent MP socket */
1179 p
= (mp_so
->so_options
& SO_KEEPALIVE
);
1180 c
= (so
->so_options
& SO_KEEPALIVE
);
1181 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1182 smpo
.mpo_name
= SO_KEEPALIVE
;
1184 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1186 /* unset TCP level default keepalive option */
1187 p
= (intotcpcb(sotoinpcb(mp_so
)))->t_keepidle
;
1188 c
= (intotcpcb(sotoinpcb(so
)))->t_keepidle
;
1189 smpo
.mpo_level
= IPPROTO_TCP
;
1190 smpo
.mpo_intval
= 0;
1191 smpo
.mpo_name
= TCP_KEEPALIVE
;
1193 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1194 socket_unlock(so
, 0);
1196 DTRACE_MPTCP5(subflow__peeloff
, struct mptses
*, mpte
,
1197 struct mptsub
*, mpts
, struct socket
*, so
,
1198 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1202 * Establish an initial MPTCP connection (if first subflow and not yet
1203 * connected), or add a subflow to an existing MPTCP connection.
1206 mptcp_subflow_add(struct mptses
*mpte
, struct mptsub
*mpts
,
1207 struct proc
*p
, uint32_t ifscope
)
1209 struct sockaddr_entry
*se
, *src_se
= NULL
, *dst_se
= NULL
;
1210 struct socket
*mp_so
, *so
= NULL
;
1211 struct mptsub_connreq mpcr
;
1212 struct mptcb
*mp_tp
;
1215 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1216 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1217 mp_tp
= mpte
->mpte_mptcb
;
1220 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
1221 /* If the remote end sends Data FIN, refuse subflow adds */
1229 VERIFY(!(mpts
->mpts_flags
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)));
1230 VERIFY(mpts
->mpts_mpte
== NULL
);
1231 VERIFY(mpts
->mpts_socket
== NULL
);
1232 VERIFY(mpts
->mpts_dst_sl
!= NULL
);
1233 VERIFY(mpts
->mpts_connid
== SAE_CONNID_ANY
);
1235 /* select source (if specified) and destination addresses */
1236 if ((error
= in_selectaddrs(AF_UNSPEC
, &mpts
->mpts_src_sl
, &src_se
,
1237 &mpts
->mpts_dst_sl
, &dst_se
)) != 0)
1240 VERIFY(mpts
->mpts_dst_sl
!= NULL
&& dst_se
!= NULL
);
1241 VERIFY(src_se
== NULL
|| mpts
->mpts_src_sl
!= NULL
);
1242 af
= mpts
->mpts_family
= dst_se
->se_addr
->sa_family
;
1243 VERIFY(src_se
== NULL
|| src_se
->se_addr
->sa_family
== af
);
1244 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1247 * If the source address is not specified, allocate a storage for
1248 * it, so that later on we can fill it in with the actual source
1249 * IP address chosen by the underlying layer for the subflow after
1252 if (mpts
->mpts_src_sl
== NULL
) {
1254 sockaddrlist_dup(mpts
->mpts_dst_sl
, M_WAITOK
);
1255 if (mpts
->mpts_src_sl
== NULL
) {
1259 se
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
);
1260 VERIFY(se
!= NULL
&& se
->se_addr
!= NULL
&&
1261 se
->se_addr
->sa_len
== dst_se
->se_addr
->sa_len
);
1262 bzero(se
->se_addr
, se
->se_addr
->sa_len
);
1263 se
->se_addr
->sa_len
= dst_se
->se_addr
->sa_len
;
1264 se
->se_addr
->sa_family
= dst_se
->se_addr
->sa_family
;
1267 /* create the subflow socket */
1268 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, p
, &so
)) != 0)
1271 /* If fastjoin is requested, set state in mpts */
1272 if ((so
->so_flags
& SOF_MPTCP_FASTJOIN
) &&
1273 (mp_tp
->mpt_state
== MPTCPS_ESTABLISHED
) &&
1274 (mpte
->mpte_nummpcapflows
== 0)) {
1275 mpts
->mpts_flags
|= MPTSF_FASTJ_REQD
;
1276 mpts
->mpts_rel_seq
= 1;
1278 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
1283 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
1284 * -1 (SAE_CONNID_ALL).
1286 mpte
->mpte_connid_last
++;
1287 if (mpte
->mpte_connid_last
== SAE_CONNID_ALL
||
1288 mpte
->mpte_connid_last
== SAE_CONNID_ANY
)
1289 mpte
->mpte_connid_last
++;
1291 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
1292 VERIFY(mpts
->mpts_connid
!= SAE_CONNID_ANY
&&
1293 mpts
->mpts_connid
!= SAE_CONNID_ALL
);
1295 /* Allocate a unique address id per subflow */
1296 mpte
->mpte_addrid_last
++;
1297 if (mpte
->mpte_addrid_last
== 0)
1298 mpte
->mpte_addrid_last
++;
1300 /* bind subflow socket to the specified interface */
1301 if (ifscope
!= IFSCOPE_NONE
) {
1303 error
= inp_bindif(sotoinpcb(so
), ifscope
, &mpts
->mpts_outif
);
1305 socket_unlock(so
, 0);
1306 (void) mptcp_subflow_soclose(mpts
, so
);
1309 VERIFY(mpts
->mpts_outif
!= NULL
);
1310 mpts
->mpts_flags
|= MPTSF_BOUND_IF
;
1312 mptcplog((LOG_DEBUG
, "MPTCP Socket: subflow_add mp_so 0x%llx "
1313 "bindif %s[%d] cid d\n",
1314 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1315 mpts
->mpts_outif
->if_xname
,
1316 ifscope
, mpts
->mpts_connid
),
1317 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
1318 socket_unlock(so
, 0);
1321 /* if source address and/or port is specified, bind to it */
1322 if (src_se
!= NULL
) {
1323 struct sockaddr
*sa
= src_se
->se_addr
;
1324 uint32_t mpts_flags
= 0;
1329 if (SIN(sa
)->sin_addr
.s_addr
!= INADDR_ANY
)
1330 mpts_flags
|= MPTSF_BOUND_IP
;
1331 if ((lport
= SIN(sa
)->sin_port
) != 0)
1332 mpts_flags
|= MPTSF_BOUND_PORT
;
1336 VERIFY(af
== AF_INET6
);
1337 if (!IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa
)->sin6_addr
))
1338 mpts_flags
|= MPTSF_BOUND_IP
;
1339 if ((lport
= SIN6(sa
)->sin6_port
) != 0)
1340 mpts_flags
|= MPTSF_BOUND_PORT
;
1345 error
= sobindlock(so
, sa
, 1); /* will lock/unlock socket */
1347 (void) mptcp_subflow_soclose(mpts
, so
);
1350 mpts
->mpts_flags
|= mpts_flags
;
1352 if (af
== AF_INET
|| af
== AF_INET6
) {
1353 char sbuf
[MAX_IPv6_STR_LEN
];
1355 mptcplog((LOG_DEBUG
, "MPTCP Socket: subflow_add "
1356 "mp_so 0x%llx bindip %s[%d] cid %d\n",
1357 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1358 inet_ntop(af
, ((af
== AF_INET
) ?
1359 (void *)&SIN(sa
)->sin_addr
.s_addr
:
1360 (void *)&SIN6(sa
)->sin6_addr
), sbuf
, sizeof (sbuf
)),
1361 ntohs(lport
), mpts
->mpts_connid
),
1362 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1367 * Insert the subflow into the list, and associate the MPTCP PCB
1368 * as well as the the subflow socket. From this point on, removing
1369 * the subflow needs to be done via mptcp_subflow_del().
1371 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1372 mpte
->mpte_numflows
++;
1374 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1375 mpts
->mpts_mpte
= mpte
;
1376 mpts
->mpts_socket
= so
;
1377 MPTS_ADDREF_LOCKED(mpts
); /* for being in MPTCP subflow list */
1378 MPTS_ADDREF_LOCKED(mpts
); /* for subflow socket */
1379 mp_so
->so_usecount
++; /* for subflow socket */
1381 /* register for subflow socket read/write events */
1382 (void) sock_setupcalls(so
, mptcp_subflow_rupcall
, mpts
,
1383 mptcp_subflow_wupcall
, mpts
);
1386 * Register for subflow socket control events; ignore
1387 * SO_FILT_HINT_CONNINFO_UPDATED from below since we
1388 * will generate it here.
1390 (void) sock_catchevents(so
, mptcp_subflow_eupcall
, mpts
,
1391 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
1392 SO_FILT_HINT_CANTSENDMORE
| SO_FILT_HINT_TIMEOUT
|
1393 SO_FILT_HINT_NOSRCADDR
| SO_FILT_HINT_IFDENIED
|
1394 SO_FILT_HINT_SUSPEND
| SO_FILT_HINT_RESUME
|
1395 SO_FILT_HINT_CONNECTED
| SO_FILT_HINT_DISCONNECTED
|
1396 SO_FILT_HINT_MPFAILOVER
| SO_FILT_HINT_MPSTATUS
|
1397 SO_FILT_HINT_MUSTRST
| SO_FILT_HINT_MPFASTJ
|
1398 SO_FILT_HINT_DELETEOK
| SO_FILT_HINT_MPCANTRCVMORE
);
1401 VERIFY(!(mpts
->mpts_flags
&
1402 (MPTSF_CONNECTING
|MPTSF_CONNECTED
|MPTSF_CONNECT_PENDING
)));
1404 bzero(&mpcr
, sizeof (mpcr
));
1406 mpcr
.mpcr_ifscope
= ifscope
;
1408 * Indicate to the TCP subflow whether or not it should establish
1409 * the initial MPTCP connection, or join an existing one. Fill
1410 * in the connection request structure with additional info needed
1411 * by the underlying TCP (to be used in the TCP options, etc.)
1414 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
1415 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
1416 mp_tp
->mpt_localkey
= mptcp_reserve_key();
1417 mptcp_conn_properties(mp_tp
);
1420 soisconnecting(mp_so
);
1421 mpcr
.mpcr_type
= MPTSUB_CONNREQ_MP_ENABLE
;
1423 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
))
1424 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
1426 /* avoid starting up cellular subflow unless required */
1427 if ((mptcp_delayed_subf_start
) &&
1428 (IFNET_IS_CELLULAR(mpts
->mpts_outif
))) {
1429 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
1432 mpcr
.mpcr_type
= MPTSUB_CONNREQ_MP_ADD
;
1435 mpts
->mpts_mpcr
= mpcr
;
1436 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
1438 if (af
== AF_INET
|| af
== AF_INET6
) {
1439 char dbuf
[MAX_IPv6_STR_LEN
];
1441 mptcplog((LOG_DEBUG
, "MPTCP Socket: %s "
1442 "mp_so 0x%llx dst %s[%d] cid %d "
1443 "[pending %s]\n", __func__
,
1444 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1445 inet_ntop(af
, ((af
== AF_INET
) ?
1446 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
1447 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
),
1448 dbuf
, sizeof (dbuf
)), ((af
== AF_INET
) ?
1449 ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
1450 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
1452 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
1454 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1457 /* connect right away if first attempt, or if join can be done now */
1458 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
))
1459 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
1464 soevent(mp_so
, SO_FILT_HINT_LOCKED
|
1465 SO_FILT_HINT_CONNINFO_UPDATED
);
1471 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
1472 * will no longer be accessible after a subflow is deleted, thus this
1473 * should occur only after the subflow socket has been disconnected.
1474 * If peeloff(2) is called, leave the socket open.
1477 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
, boolean_t close
)
1479 struct socket
*mp_so
, *so
;
1481 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1482 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1485 so
= mpts
->mpts_socket
;
1488 if (close
&& !((mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
1489 (mpts
->mpts_flags
& MPTSF_USER_DISCONNECT
))) {
1491 mptcplog((LOG_DEBUG
, "MPTCP Socket: subflow_del returning"
1492 " mp_so 0x%llx flags %x\n",
1493 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_flags
),
1494 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1498 mptcplog((LOG_DEBUG
, "MPTCP Socket: subflow_del mp_so 0x%llx "
1499 "[u=%d,r=%d] cid %d [close %s] %d %x error %d\n",
1500 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1502 mp_so
->so_retaincnt
, mpts
->mpts_connid
,
1503 (close
? "YES" : "NO"), mpts
->mpts_soerror
,
1506 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1508 VERIFY(mpts
->mpts_mpte
== mpte
);
1509 VERIFY(mpts
->mpts_connid
!= SAE_CONNID_ANY
&&
1510 mpts
->mpts_connid
!= SAE_CONNID_ALL
);
1512 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
1513 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1514 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1515 VERIFY(mpte
->mpte_numflows
!= 0);
1516 mpte
->mpte_numflows
--;
1517 if (mpte
->mpte_active_sub
== mpts
)
1518 mpte
->mpte_active_sub
= NULL
;
1521 * Drop references held by this subflow socket; there
1522 * will be no further upcalls made from this point.
1524 (void) sock_setupcalls(so
, NULL
, NULL
, NULL
, NULL
);
1525 (void) sock_catchevents(so
, NULL
, NULL
, 0);
1527 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
1530 (void) mptcp_subflow_soclose(mpts
, so
);
1532 VERIFY(mp_so
->so_usecount
!= 0);
1533 mp_so
->so_usecount
--; /* for subflow socket */
1534 mpts
->mpts_mpte
= NULL
;
1535 mpts
->mpts_socket
= NULL
;
1538 MPTS_REMREF(mpts
); /* for MPTCP subflow list */
1539 MPTS_REMREF(mpts
); /* for subflow socket */
1541 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
);
1545 * Disconnect a subflow socket.
1548 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
,
1552 struct mptcb
*mp_tp
;
1555 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1556 MPTS_LOCK_ASSERT_HELD(mpts
);
1558 VERIFY(mpts
->mpts_mpte
== mpte
);
1559 VERIFY(mpts
->mpts_socket
!= NULL
);
1560 VERIFY(mpts
->mpts_connid
!= SAE_CONNID_ANY
&&
1561 mpts
->mpts_connid
!= SAE_CONNID_ALL
);
1563 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|MPTSF_DISCONNECTED
))
1566 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
1569 * If this is coming from disconnectx(2) or issued as part of
1570 * closing the MPTCP socket, the subflow shouldn't stick around.
1571 * Otherwise let it linger around in case the upper layers need
1572 * to retrieve its conninfo.
1575 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
1577 so
= mpts
->mpts_socket
;
1578 mp_tp
= mpte
->mpte_mptcb
;
1580 if (mp_tp
->mpt_state
> MPTCPS_ESTABLISHED
)
1585 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
1586 (so
->so_state
& SS_ISCONNECTED
)) {
1587 mptcplog((LOG_DEBUG
, "MPTCP Socket %s: cid %d fin %d "
1588 "[linger %s]\n", __func__
, mpts
->mpts_connid
, send_dfin
,
1589 (deleteok
? "NO" : "YES")),
1590 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1593 mptcp_send_dfin(so
);
1594 (void) soshutdownlock(so
, SHUT_RD
);
1595 (void) soshutdownlock(so
, SHUT_WR
);
1596 (void) sodisconnectlocked(so
);
1598 socket_unlock(so
, 0);
1600 * Generate a disconnect event for this subflow socket, in case
1601 * the lower layer doesn't do it; this is needed because the
1602 * subflow socket deletion relies on it. This will also end up
1603 * generating SO_FILT_HINT_CONNINFO_UPDATED on the MPTCP socket;
1604 * we cannot do that here because subflow lock is currently held.
1606 mptcp_subflow_eupcall(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
1610 * Subflow socket read upcall.
1612 * Called when the associated subflow socket posted a read event. The subflow
1613 * socket lock has been released prior to invoking the callback. Note that the
1614 * upcall may occur synchronously as a result of MPTCP performing an action on
1615 * it, or asynchronously as a result of an event happening at the subflow layer.
1616 * Therefore, to maintain lock ordering, the only lock that can be acquired
1617 * here is the thread lock, for signalling purposes.
1620 mptcp_subflow_rupcall(struct socket
*so
, void *arg
, int waitf
)
1622 #pragma unused(so, waitf)
1623 struct mptsub
*mpts
= arg
;
1624 struct mptses
*mpte
= mpts
->mpts_mpte
;
1627 * mpte should never be NULL, except in a race with
1633 lck_mtx_lock(&mpte
->mpte_thread_lock
);
1634 mptcp_thread_signal_locked(mpte
);
1635 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
1639 * Subflow socket input.
1641 * Called in the context of the MPTCP thread, for reading data from the
1642 * underlying subflow socket and delivering it to MPTCP.
1645 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
1647 struct mbuf
*m
= NULL
;
1650 struct mptsub
*mpts_alt
= NULL
;
1652 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1653 MPTS_LOCK_ASSERT_HELD(mpts
);
1655 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
1656 struct mptsub
*, mpts
);
1658 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
))
1661 so
= mpts
->mpts_socket
;
1663 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
1664 if (error
!= 0 && error
!= EWOULDBLOCK
) {
1665 mptcplog((LOG_ERR
, "MPTCP Receiver: %s cid %d error %d\n",
1666 __func__
, mpts
->mpts_connid
, error
),
1667 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
1669 mpts_alt
= mptcp_get_subflow(mpte
, mpts
, NULL
);
1670 if (mpts_alt
== NULL
) {
1671 if (mptcp_delayed_subf_start
) {
1672 mpts_alt
= mptcp_get_pending_subflow(mpte
,
1675 mptcplog((LOG_DEBUG
,"MPTCP Receiver:"
1676 " %s: pending %d\n",
1677 __func__
, mpts_alt
->mpts_connid
),
1678 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
1680 mptcplog((LOG_ERR
, "MPTCP Receiver:"
1681 " %s: no pending flow for cid %d",
1682 __func__
, mpts
->mpts_connid
),
1683 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
1686 mptcplog((LOG_ERR
, "MPTCP Receiver: %s: no alt"
1687 " path for cid %d\n", __func__
,
1689 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
1693 } else if (error
== 0) {
1694 mptcplog((LOG_DEBUG
, "MPTCP Receiver: %s: cid %d \n",
1695 __func__
, mpts
->mpts_connid
),
1696 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
1699 /* In fallback, make sure to accept data on all but one subflow */
1700 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1701 (!(mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
1708 /* Did we receive data on the backup subflow? */
1709 if (!(mpts
->mpts_flags
& MPTSF_ACTIVE
))
1710 mpts
->mpts_peerswitch
++;
1712 mpts
->mpts_peerswitch
= 0;
1715 * Release subflow lock since this may trigger MPTCP to send,
1716 * possibly on a different subflow. An extra reference has
1717 * been held on the subflow by the MPTCP thread before coming
1718 * here, so we can be sure that it won't go away, in the event
1719 * the MP socket lock gets released.
1722 mptcp_input(mpte
, m
);
1728 * Subflow socket write upcall.
1730 * Called when the associated subflow socket posted a read event. The subflow
1731 * socket lock has been released prior to invoking the callback. Note that the
1732 * upcall may occur synchronously as a result of MPTCP performing an action on
1733 * it, or asynchronously as a result of an event happening at the subflow layer.
1734 * Therefore, to maintain lock ordering, the only lock that can be acquired
1735 * here is the thread lock, for signalling purposes.
1738 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
1740 #pragma unused(so, waitf)
1741 struct mptsub
*mpts
= arg
;
1742 struct mptses
*mpte
= mpts
->mpts_mpte
;
1745 * mpte should never be NULL except in a race with
1746 * mptcp_subflow_del which doesn't hold socket lock across critical
1747 * section. This upcall is made after releasing the socket lock.
1748 * Interleaving of socket operations becomes possible therefore.
1753 lck_mtx_lock(&mpte
->mpte_thread_lock
);
1754 mptcp_thread_signal_locked(mpte
);
1755 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
1759 * Subflow socket output.
1761 * Called for sending data from MPTCP to the underlying subflow socket.
1764 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
)
1766 struct socket
*mp_so
, *so
;
1767 size_t sb_cc
= 0, tot_sent
= 0;
1770 u_int64_t mpt_dsn
= 0;
1771 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1772 struct mbuf
*mpt_mbuf
= NULL
;
1774 struct mbuf
*head
, *tail
;
1776 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1777 MPTS_LOCK_ASSERT_HELD(mpts
);
1778 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1779 so
= mpts
->mpts_socket
;
1781 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
1782 struct mptsub
*, mpts
);
1784 /* subflow socket is suspended? */
1785 if (mpts
->mpts_flags
& MPTSF_SUSPENDED
) {
1786 mptcplog((LOG_ERR
, "MPTCP Sender: %s mp_so 0x%llx cid %d is "
1787 "flow controlled\n", __func__
,
1788 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
),
1789 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
1793 /* subflow socket is not MPTCP capable? */
1794 if (!(mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) &&
1795 !(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1796 !(mpts
->mpts_flags
& MPTSF_FASTJ_SEND
)) {
1797 mptcplog((LOG_ERR
, "MPTCP Sender: %s mp_so 0x%llx cid %d not "
1798 "MPTCP capable\n", __func__
,
1799 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
),
1800 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
1804 /* Remove Addr Option is not sent reliably as per I-D */
1805 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
1806 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
1807 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
1808 if (mptcp_remaddr_enable
)
1809 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
1810 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
1814 * The mbuf chains containing the metadata (as well as pointing to
1815 * the user data sitting at the MPTCP output queue) would then be
1816 * sent down to the subflow socket.
1818 * Some notes on data sequencing:
1820 * a. Each mbuf must be a M_PKTHDR.
1821 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
1822 * in the mbuf pkthdr structure.
1823 * c. Each mbuf containing the MPTCP metadata must have its
1824 * pkt_flags marked with the PKTF_MPTCP flag.
1827 /* First, drop acknowledged data */
1828 sb_mb
= mp_so
->so_snd
.sb_mb
;
1829 if (sb_mb
== NULL
) {
1833 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
1836 while (mpt_mbuf
&& mpt_mbuf
->m_pkthdr
.mp_rlen
== 0) {
1837 mpt_mbuf
= mpt_mbuf
->m_next
;
1839 if (mpt_mbuf
&& (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
1840 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1846 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
1848 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
1850 sbdrop(&mp_so
->so_snd
, (int)len
);
1855 * In degraded mode, we don't receive data acks, so force free
1856 * mbufs less than snd_nxt
1858 if (mp_so
->so_snd
.sb_mb
== NULL
) {
1863 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
1864 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1865 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
1866 MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_sndnxt
)) {
1868 len
= mp_tp
->mpt_sndnxt
- mpt_dsn
;
1869 sbdrop(&mp_so
->so_snd
, (int)len
);
1870 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndnxt
;
1873 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1874 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
1875 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
1876 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
1877 if (mp_tp
->mpt_flags
& MPTCPF_RECVD_MPFAIL
)
1878 mpts
->mpts_sndnxt
= mp_tp
->mpt_dsn_at_csum_fail
;
1882 * Adjust the subflow's notion of next byte to send based on
1883 * the last unacknowledged byte
1885 if (MPTCP_SEQ_LT(mpts
->mpts_sndnxt
, mp_tp
->mpt_snduna
)) {
1886 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
1888 * With FastJoin, a write before the fastjoin event will use
1889 * an uninitialized relative sequence number.
1891 if (mpts
->mpts_rel_seq
== 0)
1892 mpts
->mpts_rel_seq
= 1;
1896 * Adjust the top level notion of next byte used for retransmissions
1899 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
1900 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
1904 /* Now determine the offset from which to start transmitting data */
1905 sb_mb
= mp_so
->so_snd
.sb_mb
;
1906 sb_cc
= mp_so
->so_snd
.sb_cc
;
1907 if (sb_mb
== NULL
) {
1911 if (MPTCP_SEQ_LT(mpts
->mpts_sndnxt
, mp_tp
->mpt_sndmax
)) {
1912 off
= mpts
->mpts_sndnxt
- mp_tp
->mpt_snduna
;
1913 sb_cc
-= (size_t)off
;
1921 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1923 while (mpt_mbuf
&& ((mpt_mbuf
->m_pkthdr
.mp_rlen
== 0) ||
1924 (mpt_mbuf
->m_pkthdr
.mp_rlen
<= (u_int32_t
)off
))) {
1925 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
1926 mpt_mbuf
= mpt_mbuf
->m_next
;
1927 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1929 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
1930 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s cid = %d "
1931 "snduna = %llu sndnxt = %llu probe %d\n",
1932 __func__
, mpts
->mpts_connid
,
1933 mp_tp
->mpt_snduna
, mpts
->mpts_sndnxt
,
1934 mpts
->mpts_probecnt
),
1935 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
1937 VERIFY(mpt_mbuf
&& (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
1941 while (tot_sent
< sb_cc
) {
1945 mlen
= mpt_mbuf
->m_pkthdr
.mp_rlen
;
1951 panic("%s: unexpected %lu %lu \n", __func__
,
1955 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
1956 M_COPYM_MUST_COPY_HDR
);
1962 /* Create a DSN mapping for the data (m_copym does it) */
1963 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1964 VERIFY(m
->m_flags
& M_PKTHDR
);
1965 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
1966 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
1967 m
->m_pkthdr
.mp_dsn
= mpt_dsn
+ off
;
1968 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
1969 m
->m_pkthdr
.mp_rlen
= mlen
;
1970 mpts
->mpts_rel_seq
+= mlen
;
1971 m
->m_pkthdr
.len
= mlen
;
1982 mpt_mbuf
= mpt_mbuf
->m_next
;
1987 if (mpts
->mpts_flags
& MPTSF_FASTJ_SEND
) {
1988 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
1989 tp
->t_mpflags
|= TMPF_FASTJOIN_SEND
;
1992 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
1994 DTRACE_MPTCP7(send
, struct mbuf
*, head
, struct socket
*, so
,
1995 struct sockbuf
*, &so
->so_rcv
,
1996 struct sockbuf
*, &so
->so_snd
,
1997 struct mptses
*, mpte
, struct mptsub
*, mpts
,
2002 mpts
->mpts_sndnxt
+= tot_sent
;
2004 if (mpts
->mpts_probesoon
&& mpts
->mpts_maxseg
&& tot_sent
) {
2005 tcpstat
.tcps_mp_num_probes
++;
2006 if (tot_sent
< mpts
->mpts_maxseg
)
2007 mpts
->mpts_probecnt
+= 1;
2009 mpts
->mpts_probecnt
+=
2010 tot_sent
/mpts
->mpts_maxseg
;
2015 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mpts
->mpts_sndnxt
)) {
2016 if (MPTCP_DATASEQ_HIGH32(mpts
->mpts_sndnxt
) >
2017 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
))
2018 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
2019 mp_tp
->mpt_sndnxt
= mpts
->mpts_sndnxt
;
2021 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
2024 /* Send once in SYN_SENT state to avoid sending SYN spam */
2025 if (mpts
->mpts_flags
& MPTSF_FASTJ_SEND
) {
2026 so
->so_flags
&= ~SOF_MPTCP_FASTJOIN
;
2027 mpts
->mpts_flags
&= ~MPTSF_FASTJ_SEND
;
2030 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2031 (mpts
->mpts_probesoon
!= 0))
2032 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s cid %d "
2033 "wrote %d %d probe %d probedelta %d\n",
2034 __func__
, mpts
->mpts_connid
, (int)tot_sent
,
2035 (int) sb_cc
, mpts
->mpts_probecnt
,
2036 (tcp_now
- mpts
->mpts_probesoon
)),
2037 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
2039 mptcplog((LOG_ERR
, "MPTCP Sender: %s cid %d error %d len %zd\n",
2040 __func__
, mpts
->mpts_connid
, error
, tot_sent
),
2041 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2048 * Subflow socket control event upcall.
2050 * Called when the associated subflow socket posted one or more control events.
2051 * The subflow socket lock has been released prior to invoking the callback.
2052 * Note that the upcall may occur synchronously as a result of MPTCP performing
2053 * an action on it, or asynchronously as a result of an event happening at the
2054 * subflow layer. Therefore, to maintain lock ordering, the only lock that can
2055 * be acquired here is the thread lock, for signalling purposes.
2058 mptcp_subflow_eupcall(struct socket
*so
, void *arg
, uint32_t events
)
2061 struct mptsub
*mpts
= arg
;
2062 struct mptses
*mpte
= mpts
->mpts_mpte
;
2064 VERIFY(mpte
!= NULL
);
2066 lck_mtx_lock(&mpte
->mpte_thread_lock
);
2067 atomic_bitset_32(&mpts
->mpts_evctl
, events
);
2068 mptcp_thread_signal_locked(mpte
);
2069 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
2073 * Subflow socket control events.
2075 * Called for handling events related to the underlying subflow socket.
2078 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
,
2079 uint64_t *p_mpsofilt_hint
)
2081 uint32_t events
, save_events
;
2082 ev_ret_t ret
= MPTS_EVRET_OK
;
2084 int mpsub_ev_entry_count
= sizeof(mpsub_ev_entry_tbl
)/
2085 sizeof(mpsub_ev_entry_tbl
[0]);
2086 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2087 MPTS_LOCK_ASSERT_HELD(mpts
);
2089 /* bail if there's nothing to process */
2090 if ((events
= mpts
->mpts_evctl
) == 0)
2093 if (events
& (SO_FILT_HINT_CONNRESET
|SO_FILT_HINT_MUSTRST
|
2094 SO_FILT_HINT_CANTRCVMORE
|SO_FILT_HINT_CANTSENDMORE
|
2095 SO_FILT_HINT_TIMEOUT
|SO_FILT_HINT_NOSRCADDR
|
2096 SO_FILT_HINT_IFDENIED
|SO_FILT_HINT_SUSPEND
|
2097 SO_FILT_HINT_DISCONNECTED
)) {
2098 events
|= SO_FILT_HINT_MPFAILOVER
;
2101 save_events
= events
;
2103 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
2104 struct mptsub
*, mpts
, uint32_t, events
);
2106 mptcplog((LOG_DEBUG
, "MPTCP Events: %s cid %d events=%b\n", __func__
,
2107 mpts
->mpts_connid
, events
, SO_FILT_HINT_BITS
),
2108 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
2111 * Process all the socket filter hints and reset the hint
2112 * once it is handled
2114 for (i
= 0; (i
< mpsub_ev_entry_count
) && events
; i
++) {
2115 if ((events
& mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
) &&
2116 (ret
>= MPTS_EVRET_OK
)) {
2118 mpsub_ev_entry_tbl
[i
].sofilt_hint_ev_hdlr(mpte
, mpts
, p_mpsofilt_hint
);
2119 events
&= ~mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
;
2120 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2125 * We should be getting only events specified via sock_catchevents(),
2126 * so loudly complain if we have any unprocessed one(s).
2128 if (events
!= 0 || ret
< MPTS_EVRET_OK
) {
2129 mptcplog((LOG_ERR
, "MPTCP Events %s%s: cid %d evret %s (%d)"
2130 " unhandled events=%b\n",
2131 (events
!= 0) && (ret
== MPTS_EVRET_OK
) ? "MPTCP_ERROR " : "",
2132 __func__
, mpts
->mpts_connid
,
2133 mptcp_evret2str(ret
), ret
, events
, SO_FILT_HINT_BITS
),
2134 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2137 /* clear the ones we've processed */
2138 atomic_bitclear_32(&mpts
->mpts_evctl
, save_events
);
2143 * Handle SO_FILT_HINT_CONNRESET subflow socket event.
2146 mptcp_subflow_connreset_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2147 uint64_t *p_mpsofilt_hint
)
2149 struct socket
*mp_so
, *so
;
2150 struct mptcb
*mp_tp
;
2153 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2154 MPTS_LOCK_ASSERT_HELD(mpts
);
2155 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2156 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2157 mp_tp
= mpte
->mpte_mptcb
;
2158 so
= mpts
->mpts_socket
;
2160 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2161 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2163 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2164 "%s: cid %d [linger %s]\n", __func__
,
2165 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2166 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2169 * We got a TCP RST for this subflow connection.
2171 * Right now, we simply propagate ECONNREFUSED to the MPTCP socket
2172 * client if the MPTCP connection has not been established or
2173 * if the connection has only one subflow and is a connection being
2174 * resumed. Otherwise we close the socket.
2176 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2179 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2180 mpts
->mpts_soerror
= mp_so
->so_error
= ECONNREFUSED
;
2181 } else if (mpte
->mpte_nummpcapflows
< 1) {
2182 mpts
->mpts_soerror
= mp_so
->so_error
= ECONNRESET
;
2183 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNRESET
;
2188 * Keep the subflow socket around, unless the MPTCP socket has
2189 * been detached or the subflow has been disconnected explicitly,
2190 * in which case it should be deleted right away.
2192 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2196 * Handle SO_FILT_HINT_CANTRCVMORE subflow socket event.
2199 mptcp_subflow_cantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2200 uint64_t *p_mpsofilt_hint
)
2202 #pragma unused(p_mpsofilt_hint)
2205 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2206 MPTS_LOCK_ASSERT_HELD(mpts
);
2208 so
= mpts
->mpts_socket
;
2210 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2211 "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
2212 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2215 * We got a FIN for this subflow connection. This subflow socket
2216 * is no longer available for receiving data;
2217 * The FIN may arrive with data. The data is handed up to the
2218 * mptcp socket and the subflow is disconnected.
2221 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2225 * Handle SO_FILT_HINT_CANTSENDMORE subflow socket event.
2228 mptcp_subflow_cantsendmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2229 uint64_t *p_mpsofilt_hint
)
2231 #pragma unused(p_mpsofilt_hint)
2234 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2235 MPTS_LOCK_ASSERT_HELD(mpts
);
2237 so
= mpts
->mpts_socket
;
2239 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2240 "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
2241 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2243 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2247 * Handle SO_FILT_HINT_TIMEOUT subflow socket event.
2250 mptcp_subflow_timeout_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2251 uint64_t *p_mpsofilt_hint
)
2253 #pragma unused(p_mpsofilt_hint)
2254 struct socket
*mp_so
, *so
;
2255 struct mptcb
*mp_tp
;
2258 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2259 MPTS_LOCK_ASSERT_HELD(mpts
);
2260 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2261 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2262 mp_tp
= mpte
->mpte_mptcb
;
2263 so
= mpts
->mpts_socket
;
2265 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2266 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2268 mptcplog((LOG_NOTICE
, "MPTCP Events: "
2269 "%s: cid %d [linger %s]\n", __func__
,
2270 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2271 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2273 if (mpts
->mpts_soerror
== 0)
2274 mpts
->mpts_soerror
= ETIMEDOUT
;
2277 * The subflow connection has timed out.
2279 * Right now, we simply propagate ETIMEDOUT to the MPTCP socket
2280 * client if the MPTCP connection has not been established. Otherwise
2283 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2286 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2287 mp_so
->so_error
= ETIMEDOUT
;
2292 * Keep the subflow socket around, unless the MPTCP socket has
2293 * been detached or the subflow has been disconnected explicitly,
2294 * in which case it should be deleted right away.
2296 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2300 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
2303 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2304 uint64_t *p_mpsofilt_hint
)
2306 #pragma unused(p_mpsofilt_hint)
2307 struct socket
*mp_so
, *so
;
2308 struct mptcb
*mp_tp
;
2310 struct tcpcb
*tp
= NULL
;
2312 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2313 MPTS_LOCK_ASSERT_HELD(mpts
);
2315 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2316 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2317 mp_tp
= mpte
->mpte_mptcb
;
2318 so
= mpts
->mpts_socket
;
2320 /* Not grabbing socket lock as t_local_aid is write once only */
2321 tp
= intotcpcb(sotoinpcb(so
));
2323 * This overwrites any previous mpte_lost_aid to avoid storing
2324 * too much state when the typical case has only two subflows.
2326 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
2327 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
2329 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2330 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2332 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2333 "%s cid %d [linger %s]\n", __func__
,
2334 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2335 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2337 if (mpts
->mpts_soerror
== 0)
2338 mpts
->mpts_soerror
= EADDRNOTAVAIL
;
2341 * The subflow connection has lost its source address.
2343 * Right now, we simply propagate EADDRNOTAVAIL to the MPTCP socket
2344 * client if the MPTCP connection has not been established. If it
2345 * has been established with one subflow , we keep the MPTCP
2346 * connection valid without any subflows till closed by application.
2347 * This lets tcp connection manager decide whether to close this or
2348 * not as it reacts to reachability changes too.
2350 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2353 if ((mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) &&
2354 (mp_so
->so_flags
& SOF_NOADDRAVAIL
)) {
2355 mp_so
->so_error
= EADDRNOTAVAIL
;
2360 * Keep the subflow socket around, unless the MPTCP socket has
2361 * been detached or the subflow has been disconnected explicitly,
2362 * in which case it should be deleted right away.
2364 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2368 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
2369 * indicates that the remote side sent a Data FIN
2372 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2373 uint64_t *p_mpsofilt_hint
)
2375 struct socket
*so
, *mp_so
;
2376 struct mptcb
*mp_tp
;
2378 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2379 MPTS_LOCK_ASSERT_HELD(mpts
);
2380 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2381 so
= mpts
->mpts_socket
;
2382 mp_tp
= mpte
->mpte_mptcb
;
2384 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2385 "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
2386 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2389 * We got a Data FIN for the MPTCP connection.
2390 * The FIN may arrive with data. The data is handed up to the
2391 * mptcp socket and the user is notified so that it may close
2392 * the socket if needed.
2395 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
) {
2396 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CANTRCVMORE
;
2399 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2403 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
2406 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2407 uint64_t *p_mpsofilt_hint
)
2409 struct mptsub
*mpts_alt
= NULL
;
2410 struct socket
*so
= NULL
;
2411 struct socket
*mp_so
;
2412 int altpath_exists
= 0;
2414 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2415 MPTS_LOCK_ASSERT_HELD(mpts
);
2416 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2417 mptcplog((LOG_NOTICE
, "MPTCP Events: "
2418 "%s: mp_so 0x%llx\n", __func__
,
2419 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
2420 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2423 mpts_alt
= mptcp_get_subflow(mpte
, mpts
, NULL
);
2426 * If there is no alternate eligible subflow, ignore the
2429 if (mpts_alt
== NULL
) {
2430 mptcplog((LOG_WARNING
, "MPTCP Events: "
2431 "%s: no alternate path\n", __func__
),
2432 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2434 if (mptcp_delayed_subf_start
) {
2435 mpts_alt
= mptcp_get_pending_subflow(mpte
, mpts
);
2436 if (mpts_alt
!= NULL
) {
2437 MPTS_LOCK(mpts_alt
);
2438 (void) mptcp_subflow_soconnectx(mpte
,
2440 MPTS_UNLOCK(mpts_alt
);
2446 MPTS_LOCK(mpts_alt
);
2448 so
= mpts_alt
->mpts_socket
;
2449 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
2451 /* All data acknowledged and no RTT spike */
2452 if ((so
->so_snd
.sb_cc
== 0) &&
2453 (mptcp_no_rto_spike(so
))) {
2454 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
2455 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
2457 /* no alternate path available */
2460 socket_unlock(so
, 1);
2462 if (altpath_exists
) {
2463 mptcplog((LOG_INFO
, "MPTCP Events: "
2465 __func__
, mpts_alt
->mpts_connid
),
2466 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2467 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
2468 mpts_alt
->mpts_peerswitch
= 0;
2469 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2470 /* Bring the subflow's notion of snd_nxt into the send window */
2472 mpts_alt
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2474 mpte
->mpte_active_sub
= mpts_alt
;
2477 socket_unlock(so
, 1);
2479 MPTS_UNLOCK(mpts_alt
);
2481 if (altpath_exists
) {
2482 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
;
2483 mptcplog((LOG_NOTICE
, "MPTCP Events: "
2484 "%s: mp_so 0x%llx switched from "
2485 "%d to %d\n", __func__
,
2486 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2487 mpts
->mpts_connid
, mpts_alt
->mpts_connid
),
2488 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2489 tcpstat
.tcps_mp_switches
++;
2493 if (altpath_exists
) {
2494 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
2495 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
2497 mptcplog((LOG_DEBUG
, "MPTCP Events %s: no alt cid = %d\n",
2498 __func__
, mpts
->mpts_connid
),
2499 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2501 so
= mpts
->mpts_socket
;
2503 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
2504 socket_unlock(so
, 1);
2506 MPTS_LOCK_ASSERT_HELD(mpts
);
2507 return (MPTS_EVRET_OK
);
2511 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
2514 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2515 uint64_t *p_mpsofilt_hint
)
2517 struct socket
*mp_so
, *so
;
2518 struct mptcb
*mp_tp
;
2521 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2522 MPTS_LOCK_ASSERT_HELD(mpts
);
2523 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2524 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2525 mp_tp
= mpte
->mpte_mptcb
;
2526 so
= mpts
->mpts_socket
;
2528 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2529 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2531 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2532 "%s: cid %d [linger %s]\n", __func__
,
2533 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2534 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2536 if (mpts
->mpts_soerror
== 0)
2537 mpts
->mpts_soerror
= EHOSTUNREACH
;
2540 * The subflow connection cannot use the outgoing interface.
2542 * Right now, we simply propagate EHOSTUNREACH to the MPTCP socket
2543 * client if the MPTCP connection has not been established. If it
2544 * has been established, let the upper layer call disconnectx.
2546 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2547 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_IFDENIED
;
2550 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2551 mp_so
->so_error
= EHOSTUNREACH
;
2556 * Keep the subflow socket around, unless the MPTCP socket has
2557 * been detached or the subflow has been disconnected explicitly,
2558 * in which case it should be deleted right away.
2560 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2564 * Handle SO_FILT_HINT_SUSPEND subflow socket event.
2567 mptcp_subflow_suspend_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2568 uint64_t *p_mpsofilt_hint
)
2570 #pragma unused(p_mpsofilt_hint)
2573 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2574 MPTS_LOCK_ASSERT_HELD(mpts
);
2576 so
= mpts
->mpts_socket
;
2578 /* the subflow connection is being flow controlled */
2579 mpts
->mpts_flags
|= MPTSF_SUSPENDED
;
2581 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2582 "%s: cid %d\n", __func__
,
2583 mpts
->mpts_connid
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2585 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2589 * Handle SO_FILT_HINT_RESUME subflow socket event.
2592 mptcp_subflow_resume_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2593 uint64_t *p_mpsofilt_hint
)
2595 #pragma unused(p_mpsofilt_hint)
2598 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2599 MPTS_LOCK_ASSERT_HELD(mpts
);
2601 so
= mpts
->mpts_socket
;
2603 /* the subflow connection is no longer flow controlled */
2604 mpts
->mpts_flags
&= ~MPTSF_SUSPENDED
;
2606 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2607 "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
2608 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2610 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2614 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
2617 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2618 uint64_t *p_mpsofilt_hint
)
2620 char buf0
[MAX_IPv6_STR_LEN
], buf1
[MAX_IPv6_STR_LEN
];
2621 struct sockaddr_entry
*src_se
, *dst_se
;
2622 struct sockaddr_storage src
;
2623 struct socket
*mp_so
, *so
;
2624 struct mptcb
*mp_tp
;
2625 struct ifnet
*outifp
;
2627 boolean_t mpok
= FALSE
;
2628 boolean_t cell
= FALSE
;
2629 boolean_t wifi
= FALSE
;
2630 boolean_t wired
= FALSE
;
2632 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2633 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2634 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2635 mp_tp
= mpte
->mpte_mptcb
;
2637 MPTS_LOCK_ASSERT_HELD(mpts
);
2638 so
= mpts
->mpts_socket
;
2639 af
= mpts
->mpts_family
;
2641 if (mpts
->mpts_flags
& MPTSF_CONNECTED
)
2642 return (MPTS_EVRET_OK
);
2644 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
2645 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
2647 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2648 (so
->so_state
& SS_ISCONNECTED
)) {
2649 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2650 "%s: cid %d disconnect before tcp connect\n",
2651 __func__
, mpts
->mpts_connid
),
2652 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2653 (void) soshutdownlock(so
, SHUT_RD
);
2654 (void) soshutdownlock(so
, SHUT_WR
);
2655 (void) sodisconnectlocked(so
);
2657 socket_unlock(so
, 0);
2658 return (MPTS_EVRET_OK
);
2662 * The subflow connection has been connected. Find out whether it
2663 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
2665 * a. If MPTCP connection is not yet established, then this must be
2666 * the first subflow connection. If MPTCP failed to negotiate,
2667 * indicate to the MPTCP socket client via EPROTO, that the
2668 * underlying TCP connection may be peeled off via peeloff(2).
2669 * Otherwise, mark the MPTCP socket as connected.
2671 * b. If MPTCP connection has been established, then this must be
2672 * one of the subsequent subflow connections. If MPTCP failed
2673 * to negotiate, disconnect the connection since peeloff(2)
2674 * is no longer possible.
2676 * Right now, we simply unblock any waiters at the MPTCP socket layer
2677 * if the MPTCP connection has not been established.
2681 if (so
->so_state
& SS_ISDISCONNECTED
) {
2683 * With MPTCP joins, a connection is connected at the subflow
2684 * level, but the 4th ACK from the server elevates the MPTCP
2685 * subflow to connected state. So there is a small window
2686 * where the subflow could get disconnected before the
2687 * connected event is processed.
2689 socket_unlock(so
, 0);
2690 return (MPTS_EVRET_OK
);
2693 mpts
->mpts_soerror
= 0;
2694 mpts
->mpts_flags
&= ~MPTSF_CONNECTING
;
2695 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
2696 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
)
2697 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
2699 VERIFY(mpts
->mpts_dst_sl
!= NULL
);
2700 dst_se
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
);
2701 VERIFY(dst_se
!= NULL
&& dst_se
->se_addr
!= NULL
&&
2702 dst_se
->se_addr
->sa_family
== af
);
2704 VERIFY(mpts
->mpts_src_sl
!= NULL
);
2705 src_se
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
);
2706 VERIFY(src_se
!= NULL
&& src_se
->se_addr
!= NULL
&&
2707 src_se
->se_addr
->sa_family
== af
);
2709 /* get/check source IP address */
2712 error
= in_getsockaddr_s(so
, &src
);
2714 struct sockaddr_in
*ms
= SIN(src_se
->se_addr
);
2715 struct sockaddr_in
*s
= SIN(&src
);
2717 VERIFY(s
->sin_len
== ms
->sin_len
);
2718 VERIFY(ms
->sin_family
== AF_INET
);
2720 if ((mpts
->mpts_flags
& MPTSF_BOUND_IP
) &&
2721 bcmp(&ms
->sin_addr
, &s
->sin_addr
,
2722 sizeof (ms
->sin_addr
)) != 0) {
2723 mptcplog((LOG_ERR
, "MPTCP Events: "
2725 "address %s (expected %s)\n", __func__
,
2726 mpts
->mpts_connid
, inet_ntop(AF_INET
,
2727 (void *)&s
->sin_addr
.s_addr
, buf0
,
2728 sizeof (buf0
)), inet_ntop(AF_INET
,
2729 (void *)&ms
->sin_addr
.s_addr
, buf1
,
2731 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2733 bcopy(s
, ms
, sizeof (*s
));
2739 error
= in6_getsockaddr_s(so
, &src
);
2741 struct sockaddr_in6
*ms
= SIN6(src_se
->se_addr
);
2742 struct sockaddr_in6
*s
= SIN6(&src
);
2744 VERIFY(s
->sin6_len
== ms
->sin6_len
);
2745 VERIFY(ms
->sin6_family
== AF_INET6
);
2747 if ((mpts
->mpts_flags
& MPTSF_BOUND_IP
) &&
2748 bcmp(&ms
->sin6_addr
, &s
->sin6_addr
,
2749 sizeof (ms
->sin6_addr
)) != 0) {
2750 mptcplog((LOG_ERR
, "MPTCP Events: "
2752 "address %s (expected %s)\n", __func__
,
2753 mpts
->mpts_connid
, inet_ntop(AF_INET6
,
2754 (void *)&s
->sin6_addr
, buf0
,
2755 sizeof (buf0
)), inet_ntop(AF_INET6
,
2756 (void *)&ms
->sin6_addr
, buf1
,
2758 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2760 bcopy(s
, ms
, sizeof (*s
));
2771 mptcplog((LOG_ERR
, "MPTCP Events "
2772 "%s: cid %d getsockaddr failed (%d)\n",
2773 __func__
, mpts
->mpts_connid
, error
),
2774 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2777 /* get/verify the outbound interface */
2778 outifp
= sotoinpcb(so
)->inp_last_outifp
; /* could be NULL */
2779 if (mpts
->mpts_flags
& MPTSF_BOUND_IF
) {
2780 VERIFY(mpts
->mpts_outif
!= NULL
);
2781 if (mpts
->mpts_outif
!= outifp
) {
2782 mptcplog((LOG_ERR
, "MPTCP Events: %s: cid %d outif %s "
2783 "(expected %s)\n", __func__
, mpts
->mpts_connid
,
2784 ((outifp
!= NULL
) ? outifp
->if_xname
: "NULL"),
2785 mpts
->mpts_outif
->if_xname
),
2786 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2789 outifp
= mpts
->mpts_outif
;
2792 mpts
->mpts_outif
= outifp
;
2795 mpts
->mpts_srtt
= (intotcpcb(sotoinpcb(so
)))->t_srtt
;
2796 mpts
->mpts_rxtcur
= (intotcpcb(sotoinpcb(so
)))->t_rxtcur
;
2797 mpts
->mpts_maxseg
= (intotcpcb(sotoinpcb(so
)))->t_maxseg
;
2799 cell
= IFNET_IS_CELLULAR(mpts
->mpts_outif
);
2800 wifi
= (!cell
&& IFNET_IS_WIFI(mpts
->mpts_outif
));
2801 wired
= (!wifi
&& IFNET_IS_WIRED(mpts
->mpts_outif
));
2804 mpts
->mpts_linktype
|= MPTSL_CELL
;
2806 mpts
->mpts_linktype
|= MPTSL_WIFI
;
2808 mpts
->mpts_linktype
|= MPTSL_WIRED
;
2810 socket_unlock(so
, 0);
2812 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s: cid %d "
2813 "establishment srtt %d \n", __func__
,
2814 mpts
->mpts_connid
, (mpts
->mpts_srtt
>> 5)),
2815 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
2818 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
2819 "%s: cid %d outif %s %s[%d] -> %s[%d] "
2820 "is %s\n", __func__
, mpts
->mpts_connid
, ((outifp
!= NULL
) ?
2821 outifp
->if_xname
: "NULL"), inet_ntop(af
, (af
== AF_INET
) ?
2822 (void *)&SIN(src_se
->se_addr
)->sin_addr
.s_addr
:
2823 (void *)&SIN6(src_se
->se_addr
)->sin6_addr
, buf0
, sizeof (buf0
)),
2824 ((af
== AF_INET
) ? ntohs(SIN(src_se
->se_addr
)->sin_port
) :
2825 ntohs(SIN6(src_se
->se_addr
)->sin6_port
)),
2826 inet_ntop(af
, ((af
== AF_INET
) ?
2827 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
2828 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
), buf1
, sizeof (buf1
)),
2829 ((af
== AF_INET
) ? ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
2830 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
2831 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ?
2832 "MPTCP capable" : "a regular TCP")),
2833 (MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
), MPTCP_LOGLVL_LOG
);
2835 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
2838 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
;
2841 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2842 /* case (a) above */
2844 mp_tp
->mpt_flags
|= MPTCPF_PEEL_OFF
;
2845 (void) mptcp_drop(mpte
, mp_tp
, EPROTO
);
2848 if (mptcp_init_authparms(mp_tp
) != 0) {
2849 mp_tp
->mpt_flags
|= MPTCPF_PEEL_OFF
;
2850 (void) mptcp_drop(mpte
, mp_tp
, EPROTO
);
2854 mptcplog((LOG_DEBUG
, "MPTCP State: "
2855 "MPTCPS_ESTABLISHED for mp_so 0x%llx \n",
2856 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
2857 MPTCP_STATE_DBG
, MPTCP_LOGLVL_LOG
);
2858 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
2859 mpte
->mpte_associd
= mpts
->mpts_connid
;
2860 DTRACE_MPTCP2(state__change
,
2861 struct mptcb
*, mp_tp
,
2862 uint32_t, 0 /* event */);
2863 mptcp_init_statevars(mp_tp
);
2866 (void) mptcp_setconnorder(mpte
,
2867 mpts
->mpts_connid
, 1);
2868 soisconnected(mp_so
);
2873 /* Initialize the relative sequence number */
2874 mpts
->mpts_rel_seq
= 1;
2875 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
2876 mpte
->mpte_nummpcapflows
++;
2877 MPT_LOCK_SPIN(mp_tp
);
2878 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2883 if (mptcp_rwnotify
&& (mpte
->mpte_nummpcapflows
== 0)) {
2884 /* Experimental code, disabled by default. */
2890 * In case of additional flows, the MPTCP socket is not
2891 * MPTSF_MP_CAPABLE until an ACK is received from server
2892 * for 3-way handshake. TCP would have guaranteed that this
2893 * is an MPTCP subflow.
2896 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
2897 mpts
->mpts_flags
&= ~MPTSF_FASTJ_REQD
;
2898 mpte
->mpte_nummpcapflows
++;
2899 /* With Fastjoin, rel sequence will be nonzero */
2900 if (mpts
->mpts_rel_seq
== 0)
2901 mpts
->mpts_rel_seq
= 1;
2902 MPT_LOCK_SPIN(mp_tp
);
2903 /* With Fastjoin, sndnxt is updated before connected_ev */
2904 if (mpts
->mpts_sndnxt
== 0) {
2905 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2908 mptcp_output_needed(mpte
, mpts
);
2914 MPTS_LOCK_ASSERT_HELD(mpts
);
2916 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2920 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
2923 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2924 uint64_t *p_mpsofilt_hint
)
2926 struct socket
*mp_so
, *so
;
2927 struct mptcb
*mp_tp
;
2930 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2931 MPTS_LOCK_ASSERT_HELD(mpts
);
2932 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2933 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2934 mp_tp
= mpte
->mpte_mptcb
;
2935 so
= mpts
->mpts_socket
;
2937 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2938 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2940 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2941 "%s: cid %d [linger %s]\n", __func__
,
2942 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2943 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2945 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
2946 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2949 * Clear flags that are used by getconninfo to return state.
2950 * Retain like MPTSF_DELETEOK for internal purposes.
2952 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
|MPTSF_CONNECT_PENDING
|
2953 MPTSF_CONNECTED
|MPTSF_DISCONNECTING
|MPTSF_PREFERRED
|
2954 MPTSF_MP_CAPABLE
|MPTSF_MP_READY
|MPTSF_MP_DEGRADED
|
2955 MPTSF_SUSPENDED
|MPTSF_ACTIVE
);
2956 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
2959 * The subflow connection has been disconnected.
2961 * Right now, we simply unblock any waiters at the MPTCP socket layer
2962 * if the MPTCP connection has not been established.
2964 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
;
2966 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
2967 mpte
->mpte_nummpcapflows
--;
2968 if (mpte
->mpte_active_sub
== mpts
) {
2969 mpte
->mpte_active_sub
= NULL
;
2970 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2971 "%s: resetting active subflow \n",
2972 __func__
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2974 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
2978 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2981 soisdisconnected(mp_so
);
2988 * The underlying subflow socket has been disconnected;
2989 * it is no longer useful to us. Keep the subflow socket
2990 * around, unless the MPTCP socket has been detached or
2991 * the subflow has been disconnected explicitly, in which
2992 * case it should be deleted right away.
2994 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2998 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
3001 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3002 uint64_t *p_mpsofilt_hint
)
3004 struct socket
*mp_so
, *so
;
3005 struct mptcb
*mp_tp
;
3006 ev_ret_t ret
= MPTS_EVRET_OK
;
3008 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3009 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3010 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3011 mp_tp
= mpte
->mpte_mptcb
;
3013 MPTS_LOCK_ASSERT_HELD(mpts
);
3014 so
= mpts
->mpts_socket
;
3019 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
)
3020 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
3022 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
3024 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
3025 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
3027 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
3030 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
3032 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
)
3033 mpts
->mpts_flags
|= MPTSF_MP_READY
;
3035 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
3037 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3038 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
3039 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
3042 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
3043 VERIFY(!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
));
3044 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
3045 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
|
3046 SO_FILT_HINT_CONNINFO_UPDATED
;
3047 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
3048 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
3049 ret
= MPTS_EVRET_CONNECT_PENDING
;
3051 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
|
3052 SO_FILT_HINT_CONNINFO_UPDATED
;
3055 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3056 "%s: mp_so 0x%llx mpt_flags=%b cid %d "
3057 "mptsf=%b\n", __func__
,
3058 (u_int64_t
)VM_KERNEL_ADDRPERM(mpte
->mpte_mppcb
->mpp_socket
),
3059 mp_tp
->mpt_flags
, MPTCPF_BITS
, mpts
->mpts_connid
,
3060 mpts
->mpts_flags
, MPTSF_BITS
),
3061 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3065 socket_unlock(so
, 0);
3070 * Handle SO_FILT_HINT_MUSTRST subflow socket event
3073 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3074 uint64_t *p_mpsofilt_hint
)
3076 struct socket
*mp_so
, *so
;
3077 struct mptcb
*mp_tp
;
3081 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3082 MPTS_LOCK_ASSERT_HELD(mpts
);
3083 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3084 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3085 mp_tp
= mpte
->mpte_mptcb
;
3086 so
= mpts
->mpts_socket
;
3088 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
3089 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
3091 if (mpts
->mpts_soerror
== 0)
3092 mpts
->mpts_soerror
= ECONNABORTED
;
3094 /* We got an invalid option or a fast close */
3096 struct tcptemp
*t_template
;
3097 struct inpcb
*inp
= sotoinpcb(so
);
3098 struct tcpcb
*tp
= NULL
;
3100 tp
= intotcpcb(inp
);
3101 so
->so_error
= ECONNABORTED
;
3103 t_template
= tcp_maketemplate(tp
);
3105 struct tcp_respond_args tra
;
3107 bzero(&tra
, sizeof(tra
));
3108 if (inp
->inp_flags
& INP_BOUND_IF
)
3109 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
3111 tra
.ifscope
= IFSCOPE_NONE
;
3112 tra
.awdl_unrestricted
= 1;
3114 tcp_respond(tp
, t_template
->tt_ipgen
,
3115 &t_template
->tt_t
, (struct mbuf
*)NULL
,
3116 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
3117 (void) m_free(dtom(t_template
));
3118 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3119 "%s: mp_so 0x%llx cid %d \n",
3120 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3121 so
, mpts
->mpts_connid
),
3122 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3124 socket_unlock(so
, 0);
3125 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
3127 *p_mpsofilt_hint
|= (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
);
3129 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
))
3130 *p_mpsofilt_hint
|= SO_FILT_HINT_CONNRESET
;
3133 if ((mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) ||
3134 (mp_tp
->mpt_state
== MPTCPS_FASTCLOSE_WAIT
)) {
3135 mp_so
->so_error
= ECONNABORTED
;
3138 * Ideally there should be a state transition for when a FASTCLOSE
3139 * is received. Right now we keep the connection in MPTCPS_ESTABLISHED
3140 * state and only go to terminal state when the user level code calls
3141 * close after processing the SO_FILT_HINT_CONNRESET event.
3143 if (mp_tp
->mpt_gc_ticks
== MPT_GC_TICKS
)
3144 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS_FAST
;
3148 * Keep the subflow socket around unless the subflow has been
3149 * disconnected explicitly.
3151 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
3155 mptcp_fastjoin_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3156 uint64_t *p_mpsofilt_hint
)
3158 #pragma unused(p_mpsofilt_hint)
3159 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3160 MPTS_LOCK_ASSERT_HELD(mpts
);
3161 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3163 if (mpte
->mpte_nummpcapflows
== 0) {
3164 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3165 mptcplog((LOG_DEBUG
,"MPTCP Events: %s: %llx %llx \n",
3166 __func__
, mp_tp
->mpt_snduna
, mpts
->mpts_sndnxt
),
3167 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3169 mpte
->mpte_active_sub
= mpts
;
3170 mpts
->mpts_flags
|= (MPTSF_FASTJ_SEND
| MPTSF_ACTIVE
);
3173 * If mptcp_subflow_output is called before fastjoin_ev
3174 * then mpts->mpts_sndnxt is initialized to mp_tp->mpt_snduna
3175 * and further mpts->mpts_sndnxt is incremented by len copied.
3177 if (mpts
->mpts_sndnxt
== 0) {
3178 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
3179 mpts
->mpts_rel_seq
= 1;
3184 return (MPTS_EVRET_OK
);
3188 mptcp_deleteok_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3189 uint64_t *p_mpsofilt_hint
)
3191 #pragma unused(p_mpsofilt_hint)
3192 MPTE_LOCK_ASSERT_HELD(mpte
);
3193 MPTS_LOCK_ASSERT_HELD(mpts
);
3194 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3196 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3197 "%s cid %d\n", __func__
, mpts
->mpts_connid
),
3198 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3200 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
3201 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
3202 return (MPTS_EVRET_DELETE
);
3204 return (MPTS_EVRET_OK
);
3208 mptcp_evret2str(ev_ret_t ret
)
3210 const char *c
= "UNKNOWN";
3213 case MPTS_EVRET_DELETE
:
3214 c
= "MPTS_EVRET_DELETE";
3216 case MPTS_EVRET_CONNECT_PENDING
:
3217 c
= "MPTS_EVRET_CONNECT_PENDING";
3219 case MPTS_EVRET_DISCONNECT_FALLBACK
:
3220 c
= "MPTS_EVRET_DISCONNECT_FALLBACK";
3223 c
= "MPTS_EVRET_OK";
3232 * Add a reference to a subflow structure; used by MPTS_ADDREF().
3235 mptcp_subflow_addref(struct mptsub
*mpts
, int locked
)
3240 MPTS_LOCK_ASSERT_HELD(mpts
);
3242 if (++mpts
->mpts_refcnt
== 0) {
3243 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
3251 * Remove a reference held on a subflow structure; used by MPTS_REMREF();
3254 mptcp_subflow_remref(struct mptsub
*mpts
)
3257 if (mpts
->mpts_refcnt
== 0) {
3258 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
3261 if (--mpts
->mpts_refcnt
> 0) {
3265 /* callee will unlock and destroy lock */
3266 mptcp_subflow_free(mpts
);
3270 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
3271 * caller must ensure that the option can be issued on subflow sockets, via
3272 * MPOF_SUBFLOW_OK flag.
3275 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct socket
*so
,
3278 struct socket
*mp_so
;
3279 struct sockopt sopt
;
3283 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3284 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
3286 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3287 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3289 bzero(&sopt
, sizeof (sopt
));
3290 sopt
.sopt_dir
= SOPT_SET
;
3291 sopt
.sopt_level
= mpo
->mpo_level
;
3292 sopt
.sopt_name
= mpo
->mpo_name
;
3293 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3294 sopt
.sopt_valsize
= sizeof (int);
3295 sopt
.sopt_p
= kernproc
;
3297 error
= sosetoptlock(so
, &sopt
, 0); /* already locked */
3299 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3300 "%s: mp_so 0x%llx sopt %s "
3301 "val %d set successful\n", __func__
,
3302 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3303 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3304 buf
, sizeof (buf
)), mpo
->mpo_intval
),
3305 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3307 mptcplog((LOG_ERR
, "MPTCP Socket: "
3308 "%s: mp_so 0x%llx sopt %s "
3309 "val %d set error %d\n", __func__
,
3310 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3311 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3312 buf
, sizeof (buf
)), mpo
->mpo_intval
, error
),
3313 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3319 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
3320 * caller must ensure that the option can be issued on subflow sockets, via
3321 * MPOF_SUBFLOW_OK flag.
3324 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
3327 struct socket
*mp_so
;
3328 struct sockopt sopt
;
3332 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3333 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3334 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3336 bzero(&sopt
, sizeof (sopt
));
3337 sopt
.sopt_dir
= SOPT_GET
;
3338 sopt
.sopt_level
= mpo
->mpo_level
;
3339 sopt
.sopt_name
= mpo
->mpo_name
;
3340 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3341 sopt
.sopt_valsize
= sizeof (int);
3342 sopt
.sopt_p
= kernproc
;
3344 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
3346 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3347 "%s: mp_so 0x%llx sopt %s "
3348 "val %d get successful\n", __func__
,
3349 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3350 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3351 buf
, sizeof (buf
)), mpo
->mpo_intval
),
3352 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3354 mptcplog((LOG_ERR
, "MPTCP Socket: "
3355 "%s: mp_so 0x%llx sopt %s get error %d\n",
3356 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3357 mptcp_sopt2str(mpo
->mpo_level
,
3358 mpo
->mpo_name
, buf
, sizeof (buf
)), error
),
3359 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
3366 * MPTCP garbage collector.
3368 * This routine is called by the MP domain on-demand, periodic callout,
3369 * which is triggered when a MPTCP socket is closed. The callout will
3370 * repeat as long as this routine returns a non-zero value.
3373 mptcp_gc(struct mppcbinfo
*mppi
)
3375 struct mppcb
*mpp
, *tmpp
;
3376 uint32_t active
= 0;
3378 lck_mtx_assert(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
3380 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
3381 struct socket
*mp_so
;
3382 struct mptses
*mpte
;
3383 struct mptcb
*mp_tp
;
3385 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
3386 mp_so
= mpp
->mpp_socket
;
3387 VERIFY(mp_so
!= NULL
);
3388 mpte
= mptompte(mpp
);
3389 VERIFY(mpte
!= NULL
);
3390 mp_tp
= mpte
->mpte_mptcb
;
3391 VERIFY(mp_tp
!= NULL
);
3393 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3394 "%s: mp_so 0x%llx found "
3395 "(u=%d,r=%d,s=%d)\n", __func__
,
3396 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mp_so
->so_usecount
,
3397 mp_so
->so_retaincnt
, mpp
->mpp_state
),
3398 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3400 if (!lck_mtx_try_lock(&mpp
->mpp_lock
)) {
3401 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3402 "%s: mp_so 0x%llx skipped "
3403 "(u=%d,r=%d)\n", __func__
,
3404 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3405 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3406 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3411 /* check again under the lock */
3412 if (mp_so
->so_usecount
> 1) {
3413 boolean_t wakeup
= FALSE
;
3414 struct mptsub
*mpts
, *tmpts
;
3416 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3417 "%s: mp_so 0x%llx skipped "
3418 "[u=%d,r=%d] %d %d\n", __func__
,
3419 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3420 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3421 mp_tp
->mpt_gc_ticks
,
3423 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3426 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
3427 if (mp_tp
->mpt_gc_ticks
> 0)
3428 mp_tp
->mpt_gc_ticks
--;
3429 if (mp_tp
->mpt_gc_ticks
== 0) {
3431 if (mp_tp
->mpt_localkey
!= NULL
) {
3433 mp_tp
->mpt_localkey
);
3434 mp_tp
->mpt_localkey
= NULL
;
3440 TAILQ_FOREACH_SAFE(mpts
,
3441 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3443 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
3444 if (mpts
->mpts_soerror
== 0)
3445 mpts
->mpts_soerror
= ETIMEDOUT
;
3446 mptcp_subflow_eupcall(mpts
->mpts_socket
,
3447 mpts
, SO_FILT_HINT_DISCONNECTED
);
3451 lck_mtx_unlock(&mpp
->mpp_lock
);
3456 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
3457 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3458 "%s: mp_so 0x%llx skipped "
3459 "[u=%d,r=%d,s=%d]\n", __func__
,
3460 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3461 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3463 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3464 lck_mtx_unlock(&mpp
->mpp_lock
);
3470 * The PCB has been detached, and there is exactly 1 refnct
3471 * held by the MPTCP thread. Signal that thread to terminate,
3472 * after which the last refcnt will be released. That will
3473 * allow it to be destroyed below during the next round.
3475 if (mp_so
->so_usecount
== 1) {
3476 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3477 "%s: mp_so 0x%llx scheduled for "
3478 "termination [u=%d,r=%d]\n", __func__
,
3479 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3480 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3481 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3483 /* signal MPTCP thread to terminate */
3484 mptcp_thread_terminate_signal(mpte
);
3485 lck_mtx_unlock(&mpp
->mpp_lock
);
3490 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3491 "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
3492 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3493 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3494 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3496 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
3497 struct sockbuf
*, &mp_so
->so_rcv
,
3498 struct sockbuf
*, &mp_so
->so_snd
,
3499 struct mppcb
*, mpp
);
3508 * Drop a MPTCP connection, reporting the specified error.
3511 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, int errno
)
3513 struct socket
*mp_so
;
3515 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3516 MPT_LOCK_ASSERT_HELD(mp_tp
);
3517 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
3518 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3520 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
3521 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
3522 uint32_t, 0 /* event */);
3524 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0)
3525 errno
= mp_tp
->mpt_softerror
;
3526 mp_so
->so_error
= errno
;
3528 return (mptcp_close(mpte
, mp_tp
));
3532 * Close a MPTCP control block.
3535 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
3537 struct socket
*mp_so
= NULL
;
3538 struct mptsub
*mpts
= NULL
, *tmpts
= NULL
;
3540 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3541 MPT_LOCK_ASSERT_HELD(mp_tp
);
3542 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
3543 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3544 if (mp_tp
->mpt_localkey
!= NULL
) {
3545 mptcp_free_key(mp_tp
->mpt_localkey
);
3546 mp_tp
->mpt_localkey
= NULL
;
3550 soisdisconnected(mp_so
);
3553 if (mp_tp
->mpt_flags
& MPTCPF_PEEL_OFF
) {
3558 /* Clean up all subflows */
3559 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3561 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
3562 mptcp_subflow_disconnect(mpte
, mpts
, TRUE
);
3564 mptcp_subflow_del(mpte
, mpts
, TRUE
);
3572 mptcp_notify_close(struct socket
*so
)
3574 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
3578 * Signal MPTCP thread to wake up.
3581 mptcp_thread_signal(struct mptses
*mpte
)
3583 lck_mtx_lock(&mpte
->mpte_thread_lock
);
3584 mptcp_thread_signal_locked(mpte
);
3585 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3589 * Signal MPTCP thread to wake up (locked version)
3592 mptcp_thread_signal_locked(struct mptses
*mpte
)
3594 lck_mtx_assert(&mpte
->mpte_thread_lock
, LCK_MTX_ASSERT_OWNED
);
3596 mpte
->mpte_thread_reqs
++;
3597 if (!mpte
->mpte_thread_active
&& mpte
->mpte_thread
!= THREAD_NULL
)
3598 wakeup_one((caddr_t
)&mpte
->mpte_thread
);
3602 * Signal MPTCP thread to terminate.
3605 mptcp_thread_terminate_signal(struct mptses
*mpte
)
3607 lck_mtx_lock(&mpte
->mpte_thread_lock
);
3608 if (mpte
->mpte_thread
!= THREAD_NULL
) {
3609 mpte
->mpte_thread
= THREAD_NULL
;
3610 mpte
->mpte_thread_reqs
++;
3611 if (!mpte
->mpte_thread_active
)
3612 wakeup_one((caddr_t
)&mpte
->mpte_thread
);
3614 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3618 * MPTCP thread workloop.
3621 mptcp_thread_dowork(struct mptses
*mpte
)
3623 struct socket
*mp_so
;
3624 struct mptsub
*mpts
, *tmpts
;
3625 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
3626 uint64_t mpsofilt_hint_mask
= 0;
3628 MPTE_LOCK(mpte
); /* same as MP socket lock */
3629 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3630 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3631 VERIFY(mp_so
!= NULL
);
3633 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3637 MPTS_ADDREF_LOCKED(mpts
); /* for us */
3639 /* Update process ownership based on parent mptcp socket */
3640 mptcp_update_last_owner(mpts
, mp_so
);
3642 mptcp_subflow_input(mpte
, mpts
);
3644 mptcp_get_rtt_measurement(mpts
, mpte
);
3646 ret
= mptcp_subflow_events(mpte
, mpts
, &mpsofilt_hint_mask
);
3648 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
3649 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3650 "%s: cid %d \n", __func__
,
3652 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3653 (void) mptcp_subflow_output(mpte
, mpts
);
3657 * If MPTCP socket is closed, disconnect all subflows.
3658 * This will generate a disconnect event which will
3659 * be handled during the next iteration, causing a
3660 * non-zero error to be returned above.
3662 if (mp_so
->so_flags
& SOF_PCBCLEARING
)
3663 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
3670 case MPTS_EVRET_DELETE
:
3671 mptcp_subflow_del(mpte
, mpts
, TRUE
);
3673 case MPTS_EVRET_CONNECT_PENDING
:
3674 connect_pending
= TRUE
;
3676 case MPTS_EVRET_DISCONNECT_FALLBACK
:
3677 disconnect_fallback
= TRUE
;
3680 mptcplog((LOG_DEBUG
,
3681 "MPTCP Socket: %s: mptcp_subflow_events "
3682 "returned invalid value: %d\n", __func__
,
3684 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3687 MPTS_REMREF(mpts
); /* ours */
3690 if (mpsofilt_hint_mask
) {
3691 soevent(mp_so
, mpsofilt_hint_mask
);
3694 if (!connect_pending
&& !disconnect_fallback
) {
3699 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3701 if (disconnect_fallback
) {
3702 struct socket
*so
= NULL
;
3703 struct inpcb
*inp
= NULL
;
3704 struct tcpcb
*tp
= NULL
;
3706 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3711 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
3713 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
3714 MPTSF_DISCONNECTED
|MPTSF_CONNECT_PENDING
)) {
3718 so
= mpts
->mpts_socket
;
3721 * The MPTCP connection has degraded to a fallback
3722 * mode, so there is no point in keeping this subflow
3723 * regardless of its MPTCP-readiness state, unless it
3724 * is the primary one which we use for fallback. This
3725 * assumes that the subflow used for fallback is the
3730 inp
= sotoinpcb(so
);
3731 tp
= intotcpcb(inp
);
3733 ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
3734 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
3735 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
3736 socket_unlock(so
, 1);
3740 tp
->t_mpflags
|= TMPF_RESET
;
3741 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
3742 socket_unlock(so
, 1);
3744 } else if (connect_pending
) {
3746 * If delayed subflow start is set and cellular,
3747 * delay the connect till a retransmission timeout
3750 if ((mptcp_delayed_subf_start
) &&
3751 (IFNET_IS_CELLULAR(mpts
->mpts_outif
))) {
3757 * The MPTCP connection has progressed to a state
3758 * where it supports full multipath semantics; allow
3759 * additional joins to be attempted for all subflows
3760 * that are in the PENDING state.
3762 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
3763 (void) mptcp_subflow_soconnectx(mpte
, mpts
);
3776 mptcp_thread_func(void *v
, wait_result_t w
)
3779 struct mptses
*mpte
= v
;
3780 struct timespec
*ts
= NULL
;
3782 VERIFY(mpte
!= NULL
);
3784 lck_mtx_lock_spin(&mpte
->mpte_thread_lock
);
3787 lck_mtx_assert(&mpte
->mpte_thread_lock
, LCK_MTX_ASSERT_OWNED
);
3789 if (mpte
->mpte_thread
!= THREAD_NULL
) {
3790 (void) msleep(&mpte
->mpte_thread
,
3791 &mpte
->mpte_thread_lock
, (PZERO
- 1) | PSPIN
,
3795 /* MPTCP socket is closed? */
3796 if (mpte
->mpte_thread
== THREAD_NULL
) {
3797 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3798 /* callee will destroy thread lock */
3799 mptcp_thread_destroy(mpte
);
3804 mpte
->mpte_thread_active
= 1;
3806 uint32_t reqs
= mpte
->mpte_thread_reqs
;
3808 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3809 mptcp_thread_dowork(mpte
);
3810 lck_mtx_lock_spin(&mpte
->mpte_thread_lock
);
3812 /* if there's no pending request, we're done */
3813 if (reqs
== mpte
->mpte_thread_reqs
||
3814 mpte
->mpte_thread
== THREAD_NULL
)
3817 mpte
->mpte_thread_reqs
= 0;
3818 mpte
->mpte_thread_active
= 0;
3823 * Destroy a MTCP thread, to be called in the MPTCP thread context
3824 * upon receiving an indication to self-terminate. This routine
3825 * will not return, as the current thread is terminated at the end.
3828 mptcp_thread_destroy(struct mptses
*mpte
)
3830 struct socket
*mp_so
;
3832 MPTE_LOCK(mpte
); /* same as MP socket lock */
3833 VERIFY(mpte
->mpte_thread
== THREAD_NULL
);
3834 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3836 mptcp_sesdestroy(mpte
);
3838 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3839 VERIFY(mp_so
!= NULL
);
3840 VERIFY(mp_so
->so_usecount
!= 0);
3841 mp_so
->so_usecount
--; /* for thread */
3842 mpte
->mpte_mppcb
->mpp_flags
|= MPP_DEFUNCT
;
3845 /* for the extra refcnt from kernel_thread_start() */
3846 thread_deallocate(current_thread());
3847 /* this is the end */
3848 thread_terminate(current_thread());
3853 * Protocol pr_lock callback.
3856 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
3858 struct mppcb
*mpp
= sotomppcb(mp_so
);
3862 lr_saved
= __builtin_return_address(0);
3867 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
3868 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
3871 lck_mtx_lock(&mpp
->mpp_lock
);
3873 if (mp_so
->so_usecount
< 0) {
3874 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
3875 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
3876 solockhistory_nr(mp_so
));
3880 mp_so
->so_usecount
++;
3881 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
3882 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
3888 * Protocol pr_unlock callback.
3891 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
3893 struct mppcb
*mpp
= sotomppcb(mp_so
);
3897 lr_saved
= __builtin_return_address(0);
3902 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
3903 mp_so
, mp_so
->so_usecount
, lr_saved
,
3904 solockhistory_nr(mp_so
));
3907 lck_mtx_assert(&mpp
->mpp_lock
, LCK_MTX_ASSERT_OWNED
);
3910 mp_so
->so_usecount
--;
3912 if (mp_so
->so_usecount
< 0) {
3913 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
3914 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
3917 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
3918 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
3919 lck_mtx_unlock(&mpp
->mpp_lock
);
3925 * Protocol pr_getlock callback.
3928 mptcp_getlock(struct socket
*mp_so
, int locktype
)
3930 #pragma unused(locktype)
3931 struct mppcb
*mpp
= sotomppcb(mp_so
);
3934 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
3935 solockhistory_nr(mp_so
));
3938 if (mp_so
->so_usecount
< 0) {
3939 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
3940 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
3943 return (&mpp
->mpp_lock
);
3947 * Key generation functions
3950 mptcp_generate_unique_key(struct mptcp_key_entry
*key_entry
)
3952 struct mptcp_key_entry
*key_elm
;
3954 read_random(&key_entry
->mkey_value
, sizeof (key_entry
->mkey_value
));
3955 if (key_entry
->mkey_value
== 0)
3957 mptcp_do_sha1(&key_entry
->mkey_value
, key_entry
->mkey_digest
,
3958 sizeof (key_entry
->mkey_digest
));
3960 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
3961 if (key_elm
->mkey_value
== key_entry
->mkey_value
) {
3964 if (bcmp(key_elm
->mkey_digest
, key_entry
->mkey_digest
, 4) ==
3971 static mptcp_key_t
*
3972 mptcp_reserve_key(void)
3974 struct mptcp_key_entry
*key_elm
;
3975 struct mptcp_key_entry
*found_elm
= NULL
;
3977 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
3978 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
3979 if (key_elm
->mkey_flags
== MKEYF_FREE
) {
3980 key_elm
->mkey_flags
= MKEYF_INUSE
;
3981 found_elm
= key_elm
;
3985 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
3988 return (&found_elm
->mkey_value
);
3991 key_elm
= (struct mptcp_key_entry
*)
3992 zalloc(mptcp_keys_pool
.mkph_key_entry_zone
);
3993 key_elm
->mkey_flags
= MKEYF_INUSE
;
3995 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
3996 mptcp_generate_unique_key(key_elm
);
3997 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_elm
, mkey_next
);
3998 mptcp_keys_pool
.mkph_count
+= 1;
3999 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
4000 return (&key_elm
->mkey_value
);
4004 mptcp_get_stored_digest(mptcp_key_t
*key
)
4006 struct mptcp_key_entry
*key_holder
;
4007 caddr_t digest
= NULL
;
4009 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
4010 key_holder
= (struct mptcp_key_entry
*)(void *)((caddr_t
)key
-
4011 offsetof(struct mptcp_key_entry
, mkey_value
));
4012 if (key_holder
->mkey_flags
!= MKEYF_INUSE
)
4013 panic_plain("%s", __func__
);
4014 digest
= &key_holder
->mkey_digest
[0];
4015 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
4020 mptcp_free_key(mptcp_key_t
*key
)
4022 struct mptcp_key_entry
*key_holder
;
4023 struct mptcp_key_entry
*key_elm
;
4024 int pt
= RandomULong();
4026 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
4027 key_holder
= (struct mptcp_key_entry
*)(void*)((caddr_t
)key
-
4028 offsetof(struct mptcp_key_entry
, mkey_value
));
4029 key_holder
->mkey_flags
= MKEYF_FREE
;
4031 LIST_REMOVE(key_holder
, mkey_next
);
4032 mptcp_keys_pool
.mkph_count
-= 1;
4034 /* Free half the time */
4036 zfree(mptcp_keys_pool
.mkph_key_entry_zone
, key_holder
);
4038 /* Insert it at random point to avoid early reuse */
4040 if (mptcp_keys_pool
.mkph_count
> 1) {
4041 pt
= pt
% (mptcp_keys_pool
.mkph_count
- 1);
4042 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
4044 LIST_INSERT_AFTER(key_elm
, key_holder
,
4050 panic("missed insertion");
4052 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_holder
,
4055 mptcp_keys_pool
.mkph_count
+= 1;
4057 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
4061 mptcp_key_pool_init(void)
4064 struct mptcp_key_entry
*key_entry
;
4066 LIST_INIT(&mptcp_keys_pool
);
4067 mptcp_keys_pool
.mkph_count
= 0;
4069 mptcp_keys_pool
.mkph_key_elm_sz
= (vm_size_t
)
4070 (sizeof (struct mptcp_key_entry
));
4071 mptcp_keys_pool
.mkph_key_entry_zone
= zinit(
4072 mptcp_keys_pool
.mkph_key_elm_sz
,
4073 MPTCP_MX_KEY_ALLOCS
* mptcp_keys_pool
.mkph_key_elm_sz
,
4074 MPTCP_MX_PREALLOC_ZONE_SZ
, "mptkeys");
4075 if (mptcp_keys_pool
.mkph_key_entry_zone
== NULL
) {
4076 panic("%s: unable to allocate MPTCP keys zone \n", __func__
);
4079 zone_change(mptcp_keys_pool
.mkph_key_entry_zone
, Z_CALLERACCT
, FALSE
);
4080 zone_change(mptcp_keys_pool
.mkph_key_entry_zone
, Z_EXPAND
, TRUE
);
4082 for (i
= 0; i
< MPTCP_KEY_PREALLOCS_MX
; i
++) {
4083 key_entry
= (struct mptcp_key_entry
*)
4084 zalloc(mptcp_keys_pool
.mkph_key_entry_zone
);
4085 key_entry
->mkey_flags
= MKEYF_FREE
;
4086 mptcp_generate_unique_key(key_entry
);
4087 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_entry
, mkey_next
);
4088 mptcp_keys_pool
.mkph_count
+= 1;
4090 lck_mtx_init(&mptcp_keys_pool
.mkph_lock
, mtcbinfo
.mppi_lock_grp
,
4091 mtcbinfo
.mppi_lock_attr
);
4095 * MPTCP Join support
4099 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
,
4102 struct tcpcb
*tp
= sototcpcb(so
);
4103 struct mptcp_subf_auth_entry
*sauth_entry
;
4104 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4106 MPT_LOCK_SPIN(mp_tp
);
4107 tp
->t_mptcb
= mp_tp
;
4109 * The address ID of the first flow is implicitly 0.
4111 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
4112 tp
->t_local_aid
= 0;
4114 tp
->t_local_aid
= addr_id
;
4115 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
4116 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
4119 sauth_entry
= zalloc(mpt_subauth_zone
);
4120 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
4121 sauth_entry
->msae_raddr_id
= 0;
4122 sauth_entry
->msae_raddr_rand
= 0;
4124 sauth_entry
->msae_laddr_rand
= RandomULong();
4125 if (sauth_entry
->msae_laddr_rand
== 0)
4127 MPT_LOCK_SPIN(mp_tp
);
4128 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
4133 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
4135 struct mptcp_subf_auth_entry
*sauth_entry
;
4136 struct tcpcb
*tp
= NULL
;
4142 socket_unlock(so
, 0);
4147 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4148 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
4154 LIST_REMOVE(sauth_entry
, msae_next
);
4159 zfree(mpt_subauth_zone
, sauth_entry
);
4162 socket_unlock(so
, 0);
4166 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
4169 struct mptcp_subf_auth_entry
*sauth_entry
;
4170 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4173 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4174 if (sauth_entry
->msae_laddr_id
== addr_id
) {
4176 *lrand
= sauth_entry
->msae_laddr_rand
;
4178 *rrand
= sauth_entry
->msae_raddr_rand
;
4186 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
4187 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
4189 struct mptcp_subf_auth_entry
*sauth_entry
;
4190 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4193 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4194 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
4195 if ((sauth_entry
->msae_raddr_id
!= 0) &&
4196 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
4197 mptcplog((LOG_ERR
, "MPTCP Socket: %s mismatched"
4198 " address ids %d %d \n", __func__
, raddr_id
,
4199 sauth_entry
->msae_raddr_id
),
4200 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4204 sauth_entry
->msae_raddr_id
= raddr_id
;
4205 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
4206 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
4207 mptcplog((LOG_ERR
, "MPTCP Socket: "
4208 "%s: dup SYN_ACK %d %d \n",
4209 __func__
, raddr_rand
,
4210 sauth_entry
->msae_raddr_rand
),
4211 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4215 sauth_entry
->msae_raddr_rand
= raddr_rand
;
4224 * SHA1 support for MPTCP
4227 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
, int digest_len
)
4230 const unsigned char *sha1_base
;
4233 if (digest_len
!= SHA1_RESULTLEN
) {
4237 sha1_base
= (const unsigned char *) key
;
4238 sha1_size
= sizeof (mptcp_key_t
);
4239 SHA1Init(&sha1ctxt
);
4240 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
4241 SHA1Final(sha_digest
, &sha1ctxt
);
4246 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
4247 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
, int digest_len
)
4250 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
4251 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
4255 bzero(digest
, digest_len
);
4257 /* Set up the Key for HMAC */
4264 /* Set up the message for HMAC */
4268 /* Key is 512 block length, so no need to compute hash */
4270 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
4272 for (i
= 0; i
< 8; i
++) {
4273 key_ipad
[i
] ^= 0x3636363636363636;
4274 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
4277 /* Perform inner SHA1 */
4278 SHA1Init(&sha1ctxt
);
4279 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof (key_ipad
));
4280 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof (data
));
4281 SHA1Final(digest
, &sha1ctxt
);
4283 /* Perform outer SHA1 */
4284 SHA1Init(&sha1ctxt
);
4285 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof (key_opad
));
4286 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
4287 SHA1Final(digest
, &sha1ctxt
);
4291 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
4292 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
4295 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
,
4298 uint32_t lrand
, rrand
;
4299 mptcp_key_t localkey
, remotekey
;
4300 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4302 if (digest_len
!= SHA1_RESULTLEN
)
4306 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
4307 MPT_LOCK_SPIN(mp_tp
);
4308 localkey
= *mp_tp
->mpt_localkey
;
4309 remotekey
= mp_tp
->mpt_remotekey
;
4311 mptcp_hmac_sha1(localkey
, remotekey
, lrand
, rrand
, digest
,
4316 mptcp_get_trunced_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
)
4318 u_char digest
[SHA1_RESULTLEN
];
4319 u_int64_t trunced_digest
;
4321 mptcp_get_hmac(aid
, mp_tp
, &digest
[0], sizeof (digest
));
4322 bcopy(digest
, &trunced_digest
, 8);
4323 return (trunced_digest
);
4327 * Authentication data generation
4330 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
4333 VERIFY(token_len
== sizeof (u_int32_t
));
4334 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4336 /* Most significant 32 bits of the SHA1 hash */
4337 bcopy(sha_digest
, token
, sizeof (u_int32_t
));
4342 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
4345 VERIFY(idsn_len
== sizeof (u_int64_t
));
4346 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4349 * Least significant 64 bits of the SHA1 hash
4352 idsn
[7] = sha_digest
[12];
4353 idsn
[6] = sha_digest
[13];
4354 idsn
[5] = sha_digest
[14];
4355 idsn
[4] = sha_digest
[15];
4356 idsn
[3] = sha_digest
[16];
4357 idsn
[2] = sha_digest
[17];
4358 idsn
[1] = sha_digest
[18];
4359 idsn
[0] = sha_digest
[19];
4364 mptcp_init_authparms(struct mptcb
*mp_tp
)
4366 caddr_t local_digest
= NULL
;
4367 char remote_digest
[MPTCP_SHA1_RESULTLEN
];
4368 MPT_LOCK_ASSERT_HELD(mp_tp
);
4370 /* Only Version 0 is supported for auth purposes */
4371 if (mp_tp
->mpt_version
!= MPTCP_STD_VERSION_0
)
4374 /* Setup local and remote tokens and Initial DSNs */
4375 local_digest
= mptcp_get_stored_digest(mp_tp
->mpt_localkey
);
4376 mptcp_generate_token(local_digest
, SHA1_RESULTLEN
,
4377 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof (mp_tp
->mpt_localtoken
));
4378 mptcp_generate_idsn(local_digest
, SHA1_RESULTLEN
,
4379 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof (u_int64_t
));
4381 if (!mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
,
4383 mptcplog((LOG_ERR
, "MPTCP Socket: %s: unexpected failure",
4384 __func__
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4387 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
4388 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof (mp_tp
->mpt_localtoken
));
4389 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
4390 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof (u_int64_t
));
4395 mptcp_init_statevars(struct mptcb
*mp_tp
)
4397 MPT_LOCK_ASSERT_HELD(mp_tp
);
4399 /* The subflow SYN is also first MPTCP byte */
4400 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
4401 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
4403 mp_tp
->mpt_rcvatmark
= mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
4407 mptcp_conn_properties(struct mptcb
*mp_tp
)
4409 /* There is only Version 0 at this time */
4410 mp_tp
->mpt_version
= MPTCP_STD_VERSION_0
;
4412 /* Set DSS checksum flag */
4414 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
4416 /* Set up receive window */
4417 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
4419 /* Set up gc ticks */
4420 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
4427 mptcp_get_localtoken(void* mptcb_arg
)
4429 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4430 return (mp_tp
->mpt_localtoken
);
4434 mptcp_get_remotetoken(void* mptcb_arg
)
4436 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4437 return (mp_tp
->mpt_remotetoken
);
4441 mptcp_get_localkey(void* mptcb_arg
)
4443 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4444 if (mp_tp
->mpt_localkey
!= NULL
)
4445 return (*mp_tp
->mpt_localkey
);
4451 mptcp_get_remotekey(void* mptcb_arg
)
4453 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4454 return (mp_tp
->mpt_remotekey
);
4458 mptcp_send_dfin(struct socket
*so
)
4460 struct tcpcb
*tp
= NULL
;
4461 struct inpcb
*inp
= NULL
;
4463 inp
= sotoinpcb(so
);
4467 tp
= intotcpcb(inp
);
4471 if (!(tp
->t_mpflags
& TMPF_RESET
))
4472 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
4476 * Data Sequence Mapping routines
4479 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
4481 struct mptcb
*mp_tp
;
4486 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
4488 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
4490 panic("%s: data write before establishment.",
4496 VERIFY(m
->m_flags
& M_PKTHDR
);
4497 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
4498 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
4499 m
->m_pkthdr
.mp_rlen
= m_pktlen(m
);
4500 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
4507 mptcp_preproc_sbdrop(struct mbuf
*m
, unsigned int len
)
4509 u_int32_t sub_len
= 0;
4512 VERIFY(m
->m_flags
& M_PKTHDR
);
4514 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
) {
4515 sub_len
= m
->m_pkthdr
.mp_rlen
;
4517 if (sub_len
< len
) {
4518 m
->m_pkthdr
.mp_dsn
+= sub_len
;
4519 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4520 m
->m_pkthdr
.mp_rseq
+= sub_len
;
4522 m
->m_pkthdr
.mp_rlen
= 0;
4525 /* sub_len >= len */
4526 m
->m_pkthdr
.mp_dsn
+= len
;
4527 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4528 m
->m_pkthdr
.mp_rseq
+= len
;
4530 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
4531 "%s: dsn 0x%llu ssn %u len %d %d\n",
4533 m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rseq
,
4534 m
->m_pkthdr
.mp_rlen
, len
),
4535 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4536 m
->m_pkthdr
.mp_rlen
-= len
;
4540 panic("%s: MPTCP tag not set", __func__
);
4547 /* Obtain the DSN mapping stored in the mbuf */
4549 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
, uint32_t datalen
,
4550 u_int32_t
*dsn
, u_int32_t
*relseq
, u_int16_t
*data_len
, u_int64_t
*dsn64p
)
4554 mptcp_output_getm_dsnmap64(so
, off
, datalen
, &dsn64
, relseq
, data_len
);
4555 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
4560 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint32_t datalen
,
4561 u_int64_t
*dsn
, u_int32_t
*relseq
, u_int16_t
*data_len
)
4563 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4564 struct mbuf
*mnext
= NULL
;
4565 uint32_t runlen
= 0;
4567 uint32_t contig_len
= 0;
4575 * In the subflow socket, the DSN sequencing can be discontiguous,
4576 * but the subflow sequence mapping is contiguous. Use the subflow
4577 * sequence property to find the right mbuf and corresponding dsn
4582 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4583 VERIFY(m
->m_flags
& M_PKTHDR
);
4585 if ((unsigned int)off
>= m
->m_pkthdr
.mp_rlen
) {
4586 off
-= m
->m_pkthdr
.mp_rlen
;
4594 panic("%s: bad offset", __func__
);
4598 dsn64
= m
->m_pkthdr
.mp_dsn
+ off
;
4600 *relseq
= m
->m_pkthdr
.mp_rseq
+ off
;
4603 * Now find the last contiguous byte and its length from
4606 runlen
= m
->m_pkthdr
.mp_rlen
- off
;
4607 contig_len
= runlen
;
4609 /* If datalen does not span multiple mbufs, return */
4610 if (datalen
<= runlen
) {
4611 *data_len
= min(datalen
, UINT16_MAX
);
4616 while (datalen
> runlen
) {
4617 if (mnext
== NULL
) {
4618 panic("%s: bad datalen = %d, %d %d", __func__
, datalen
,
4622 VERIFY(mnext
->m_flags
& M_PKTHDR
);
4623 VERIFY(mnext
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4626 * case A. contiguous DSN stream
4627 * case B. discontiguous DSN stream
4629 if (mnext
->m_pkthdr
.mp_dsn
== (dsn64
+ runlen
)) {
4631 runlen
+= mnext
->m_pkthdr
.mp_rlen
;
4632 contig_len
+= mnext
->m_pkthdr
.mp_rlen
;
4633 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s: contig \n",
4634 __func__
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4637 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
4638 "%s: discontig datalen %d contig_len %d cc %d \n",
4639 __func__
, datalen
, contig_len
, so
->so_snd
.sb_cc
),
4640 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4643 mnext
= mnext
->m_next
;
4645 datalen
= min(datalen
, UINT16_MAX
);
4646 *data_len
= min(datalen
, contig_len
);
4647 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
4648 "%s: %llu %u %d %d \n", __func__
,
4649 *dsn
, *relseq
, *data_len
, off
),
4650 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4654 * MPTCP's notion of the next insequence Data Sequence number is adjusted
4655 * here. It must be called from mptcp_adj_rmap() which is called only after
4656 * reassembly of out of order data. The rcvnxt variable must
4657 * be updated only when atleast some insequence new data is received.
4660 mptcp_adj_rcvnxt(struct tcpcb
*tp
, struct mbuf
*m
)
4662 struct mptcb
*mp_tp
= tptomptp(tp
);
4667 if ((MPTCP_SEQ_GEQ(mp_tp
->mpt_rcvnxt
, m
->m_pkthdr
.mp_dsn
)) &&
4668 (MPTCP_SEQ_LEQ(mp_tp
->mpt_rcvnxt
, (m
->m_pkthdr
.mp_dsn
+
4669 m
->m_pkthdr
.mp_rlen
)))) {
4670 mp_tp
->mpt_rcvnxt
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
4676 * Note that this is called only from tcp_input() via mptcp_input_preproc()
4677 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
4678 * When it trims data tcp_input calls m_adj() which does not remove the
4679 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
4680 * The dsn map insertion cannot be delayed after trim, because data can be in
4681 * the reassembly queue for a while and the DSN option info in tp will be
4682 * overwritten for every new packet received.
4683 * The dsn map will be adjusted just prior to appending to subflow sockbuf
4684 * with mptcp_adj_rmap()
4687 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
)
4689 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
4691 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
4692 VERIFY(m
->m_flags
& M_PKTHDR
);
4693 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
4694 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
4695 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
4696 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
4697 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
4698 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
4703 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
)
4706 u_int32_t sseq
, datalen
;
4707 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
4708 u_int32_t old_rcvnxt
= 0;
4710 if (m_pktlen(m
) == 0)
4713 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
) {
4714 VERIFY(m
->m_flags
& M_PKTHDR
);
4716 dsn
= m
->m_pkthdr
.mp_dsn
;
4717 sseq
= m
->m_pkthdr
.mp_rseq
+ tp
->irs
;
4718 datalen
= m
->m_pkthdr
.mp_rlen
;
4720 /* data arrived without an DSS option mapping */
4722 /* initial subflow can fallback right after SYN handshake */
4723 mptcp_notify_mpfail(so
);
4727 /* In the common case, data is in window and in sequence */
4728 if (m
->m_pkthdr
.len
== (int)datalen
) {
4729 mptcp_adj_rcvnxt(tp
, m
);
4733 old_rcvnxt
= tp
->rcv_nxt
- m
->m_pkthdr
.len
;
4734 if (SEQ_GT(old_rcvnxt
, sseq
)) {
4735 /* data trimmed from the left */
4736 int off
= old_rcvnxt
- sseq
;
4737 m
->m_pkthdr
.mp_dsn
+= off
;
4738 m
->m_pkthdr
.mp_rseq
+= off
;
4739 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
4740 } else if (old_rcvnxt
== sseq
) {
4742 * data was trimmed from the right
4744 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
4746 mptcp_notify_mpfail(so
);
4749 mptcp_adj_rcvnxt(tp
, m
);
4754 * Following routines help with failure detection and failover of data
4755 * transfer from one subflow to another.
4758 mptcp_act_on_txfail(struct socket
*so
)
4760 struct tcpcb
*tp
= NULL
;
4761 struct inpcb
*inp
= sotoinpcb(so
);
4766 tp
= intotcpcb(inp
);
4770 if (so
->so_flags
& SOF_MP_TRYFAILOVER
) {
4774 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
4775 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
4779 * Support for MP_FAIL option
4782 mptcp_get_map_for_dsn(struct socket
*so
, u_int64_t dsn_fail
, u_int32_t
*tcp_seq
)
4784 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4793 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4794 VERIFY(m
->m_flags
& M_PKTHDR
);
4795 dsn
= m
->m_pkthdr
.mp_dsn
;
4796 datalen
= m
->m_pkthdr
.mp_rlen
;
4797 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
4798 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
4799 off
= dsn_fail
- dsn
;
4800 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
4801 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s: %llu %llu \n",
4802 __func__
, dsn
, dsn_fail
),
4803 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
4811 * If there was no mbuf data and a fallback to TCP occurred, there's
4812 * not much else to do.
4815 mptcplog((LOG_ERR
, "MPTCP Sender: "
4816 "%s: %llu not found \n", __func__
, dsn_fail
),
4817 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
4822 * Support for sending contiguous MPTCP bytes in subflow
4823 * Also for preventing sending data with ACK in 3-way handshake
4826 mptcp_adj_sendlen(struct socket
*so
, int32_t off
, int32_t len
)
4828 u_int64_t mdss_dsn
= 0;
4829 u_int32_t mdss_subflow_seq
= 0;
4830 u_int16_t mdss_data_len
= 0;
4835 mptcp_output_getm_dsnmap64(so
, off
, (u_int32_t
)len
,
4836 &mdss_dsn
, &mdss_subflow_seq
, &mdss_data_len
);
4839 * Special case handling for Fast Join. We want to send data right
4840 * after ACK of the 3-way handshake, but not piggyback the data
4841 * with the 3rd ACK of the 3WHS. TMPF_FASTJOINBY2_SEND and
4842 * mdss_data_len control this.
4844 struct tcpcb
*tp
= NULL
;
4845 tp
= intotcpcb(sotoinpcb(so
));
4846 if ((tp
->t_mpflags
& TMPF_JOINED_FLOW
) &&
4847 (tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
4848 (!(tp
->t_mpflags
& TMPF_RECVD_JOIN
)) &&
4849 (tp
->t_mpflags
& TMPF_SENT_JOIN
) &&
4850 (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) &&
4851 (!(tp
->t_mpflags
& TMPF_FASTJOINBY2_SEND
))) {
4853 tp
->t_mpflags
|= TMPF_FASTJOINBY2_SEND
;
4855 return (mdss_data_len
);
4859 mptcp_sbspace(struct mptcb
*mpt
)
4865 MPT_LOCK_ASSERT_HELD(mpt
);
4866 MPTE_LOCK_ASSERT_HELD(mpt
->mpt_mpte
);
4868 sb
= &mpt
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
4869 rcvbuf
= sb
->sb_hiwat
;
4870 space
= ((int32_t)imin((rcvbuf
- sb
->sb_cc
),
4871 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
4874 /* XXX check if it's too small? */
4880 * Support Fallback to Regular TCP
4883 mptcp_notify_mpready(struct socket
*so
)
4885 struct tcpcb
*tp
= NULL
;
4890 tp
= intotcpcb(sotoinpcb(so
));
4895 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
4896 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
4897 struct tcpcb
*, tp
);
4899 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
))
4902 if (tp
->t_mpflags
& TMPF_MPTCP_READY
)
4905 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
4906 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
4908 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
4912 mptcp_notify_mpfail(struct socket
*so
)
4914 struct tcpcb
*tp
= NULL
;
4919 tp
= intotcpcb(sotoinpcb(so
));
4924 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
4925 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
4926 struct tcpcb
*, tp
);
4928 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
)
4931 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
4932 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
4934 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
4938 * Keepalive helper function
4941 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
4944 VERIFY(mp_tp
!= NULL
);
4946 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
4954 * MPTCP t_maxseg adjustment function
4957 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
4960 struct mptcb
*mp_tp
= tptomptp(tp
);
4962 #define MPTCP_COMPUTE_LEN { \
4963 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
4965 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
4968 /* adjust to 32-bit boundary + EOL */ \
4970 MPT_UNLOCK(mp_tp); \
4976 * For the first subflow and subsequent subflows, adjust mss for
4977 * most common MPTCP option size, for case where tcp_mss is called
4978 * during option processing and MTU discovery.
4980 if ((tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
4981 (!(tp
->t_mpflags
& TMPF_JOINED_FLOW
))) {
4985 if ((tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
4986 (tp
->t_mpflags
& TMPF_SENT_JOIN
)) {
4990 if ((mtudisc
) && (tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) {
4998 * Update the pid, upid, uuid of the subflow so, based on parent so
5001 mptcp_update_last_owner(struct mptsub
*mpts
, struct socket
*parent_mpso
)
5003 struct socket
*subflow_so
= mpts
->mpts_socket
;
5005 MPTS_LOCK_ASSERT_HELD(mpts
);
5007 socket_lock(subflow_so
, 0);
5008 if ((subflow_so
->last_pid
!= parent_mpso
->last_pid
) ||
5009 (subflow_so
->last_upid
!= parent_mpso
->last_upid
)) {
5010 subflow_so
->last_upid
= parent_mpso
->last_upid
;
5011 subflow_so
->last_pid
= parent_mpso
->last_pid
;
5012 uuid_copy(subflow_so
->last_uuid
, parent_mpso
->last_uuid
);
5014 so_update_policy(subflow_so
);
5015 socket_unlock(subflow_so
, 0);
5019 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
5023 tcp_getconninfo(so
, &flow
->flow_ci
);
5024 inp
= sotoinpcb(so
);
5026 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
5027 flow
->flow_src
.ss_family
= AF_INET6
;
5028 flow
->flow_dst
.ss_family
= AF_INET6
;
5029 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
5030 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
5031 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
5032 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
5033 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
5034 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
5037 if ((inp
->inp_vflag
& INP_IPV4
) != 0) {
5038 flow
->flow_src
.ss_family
= AF_INET
;
5039 flow
->flow_dst
.ss_family
= AF_INET
;
5040 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
5041 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
5042 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
5043 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
5044 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
5045 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
5047 flow
->flow_len
= sizeof(*flow
);
5048 flow
->flow_tcpci_offset
= offsetof(mptcp_flow_t
, flow_ci
);
5049 flow
->flow_flags
= mpts
->mpts_flags
;
5050 flow
->flow_cid
= mpts
->mpts_connid
;
5051 flow
->flow_sndnxt
= mpts
->mpts_sndnxt
;
5052 flow
->flow_relseq
= mpts
->mpts_rel_seq
;
5053 flow
->flow_soerror
= mpts
->mpts_soerror
;
5054 flow
->flow_probecnt
= mpts
->mpts_probecnt
;
5055 flow
->flow_peerswitch
= mpts
->mpts_peerswitch
;
5059 mptcp_pcblist SYSCTL_HANDLER_ARGS
5061 #pragma unused(oidp, arg1, arg2)
5065 struct mptses
*mpte
;
5066 struct mptcb
*mp_tp
;
5067 struct mptsub
*mpts
;
5069 conninfo_mptcp_t mptcpci
;
5070 mptcp_flow_t
*flows
= NULL
;
5072 if (req
->newptr
!= USER_ADDR_NULL
)
5075 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5076 n
= mtcbinfo
.mppi_count
;
5077 if (req
->oldptr
== USER_ADDR_NULL
) {
5078 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5079 req
->oldidx
= (n
+ n
/8) * sizeof(conninfo_mptcp_t
) +
5080 4 * (n
+ n
/8) * sizeof(mptcp_flow_t
);
5083 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5085 lck_mtx_lock(&mpp
->mpp_lock
);
5086 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
5087 if (mpp
->mpp_flags
& MPP_DEFUNCT
) {
5088 lck_mtx_unlock(&mpp
->mpp_lock
);
5091 mpte
= mptompte(mpp
);
5092 VERIFY(mpte
!= NULL
);
5093 mp_tp
= mpte
->mpte_mptcb
;
5094 VERIFY(mp_tp
!= NULL
);
5096 bzero(&mptcpci
, sizeof(mptcpci
));
5098 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
5099 mptcpci
.mptcpci_flags
= mp_tp
->mpt_flags
;
5100 mptcpci
.mptcpci_ltoken
= mp_tp
->mpt_localtoken
;
5101 mptcpci
.mptcpci_rtoken
= mp_tp
->mpt_remotetoken
;
5102 mptcpci
.mptcpci_notsent_lowat
= mp_tp
->mpt_notsent_lowat
;
5103 mptcpci
.mptcpci_snduna
= mp_tp
->mpt_snduna
;
5104 mptcpci
.mptcpci_sndnxt
= mp_tp
->mpt_sndnxt
;
5105 mptcpci
.mptcpci_sndmax
= mp_tp
->mpt_sndmax
;
5106 mptcpci
.mptcpci_lidsn
= mp_tp
->mpt_local_idsn
;
5107 mptcpci
.mptcpci_sndwnd
= mp_tp
->mpt_sndwnd
;
5108 mptcpci
.mptcpci_rcvnxt
= mp_tp
->mpt_rcvnxt
;
5109 mptcpci
.mptcpci_rcvatmark
= mp_tp
->mpt_rcvatmark
;
5110 mptcpci
.mptcpci_ridsn
= mp_tp
->mpt_remote_idsn
;
5111 mptcpci
.mptcpci_rcvwnd
= mp_tp
->mpt_rcvwnd
;
5114 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
5115 mptcpci
.mptcpci_mpte_flags
= mpte
->mpte_flags
;
5116 mptcpci
.mptcpci_mpte_addrid
= mpte
->mpte_addrid_last
;
5117 mptcpci
.mptcpci_flow_offset
=
5118 offsetof(conninfo_mptcp_t
, mptcpci_flows
);
5120 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
5121 if (mpte
->mpte_numflows
!= 0) {
5122 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
5123 if (flows
== NULL
) {
5124 lck_mtx_unlock(&mpp
->mpp_lock
);
5127 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
5128 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
5129 error
= SYSCTL_OUT(req
, &mptcpci
,
5130 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
5132 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
5133 error
= SYSCTL_OUT(req
, &mptcpci
, sizeof(mptcpci
));
5136 lck_mtx_unlock(&mpp
->mpp_lock
);
5137 FREE(flows
, M_TEMP
);
5141 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5143 so
= mpts
->mpts_socket
;
5145 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
5146 socket_unlock(so
, 0);
5150 lck_mtx_unlock(&mpp
->mpp_lock
);
5152 error
= SYSCTL_OUT(req
, flows
, len
);
5153 FREE(flows
, M_TEMP
);
5158 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5163 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
5164 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
5165 "List of active MPTCP connections");
5168 * Check the health of the other subflows and do an mptcp_output if
5169 * there is no other active or functional subflow at the time of
5170 * call of this function.
5173 mptcp_output_needed(struct mptses
*mpte
, struct mptsub
*to_mpts
)
5175 struct mptsub
*from_mpts
= NULL
;
5177 MPTE_LOCK_ASSERT_HELD(mpte
);
5179 MPTS_UNLOCK(to_mpts
);
5181 from_mpts
= mpte
->mpte_active_sub
;
5183 if (from_mpts
== NULL
)
5186 MPTS_LOCK(from_mpts
);
5188 if ((from_mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
5189 (from_mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
5190 MPTS_UNLOCK(from_mpts
);
5194 MPTS_UNLOCK(from_mpts
);
5204 * Set notsent lowat mark on the MPTCB
5207 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
5209 struct mptcb
*mp_tp
= NULL
;
5212 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5213 mp_tp
= mpte
->mpte_mptcb
;
5216 mp_tp
->mpt_notsent_lowat
= optval
;
5224 mptcp_get_notsent_lowat(struct mptses
*mpte
)
5226 struct mptcb
*mp_tp
= NULL
;
5228 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5229 mp_tp
= mpte
->mpte_mptcb
;
5232 return mp_tp
->mpt_notsent_lowat
;
5238 mptcp_notsent_lowat_check(struct socket
*so
) {
5239 struct mptses
*mpte
;
5241 struct mptcb
*mp_tp
;
5242 struct mptsub
*mpts
;
5246 mpp
= sotomppcb(so
);
5247 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
5251 mpte
= mptompte(mpp
);
5252 mp_tp
= mpte
->mpte_mptcb
;
5255 notsent
= so
->so_snd
.sb_cc
;
5257 if ((notsent
== 0) ||
5258 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
5259 mp_tp
->mpt_notsent_lowat
)) {
5260 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
5261 "lowat %d notsent %d actual %d \n",
5262 mp_tp
->mpt_notsent_lowat
, notsent
,
5263 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)),
5264 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5270 /* When Nagle's algorithm is not disabled, it is better
5271 * to wakeup the client even before there is atleast one
5272 * maxseg of data to write.
5274 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5277 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
5278 struct socket
*subf_so
= mpts
->mpts_socket
;
5279 socket_lock(subf_so
, 0);
5280 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
5282 notsent
= so
->so_snd
.sb_cc
-
5283 (tp
->snd_nxt
- tp
->snd_una
);
5285 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
5286 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
5289 mptcplog((LOG_DEBUG
, "MPTCP Sender: lowat %d notsent %d"
5290 " nodelay false \n",
5291 mp_tp
->mpt_notsent_lowat
, notsent
),
5292 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5293 socket_unlock(subf_so
, 0);
5303 mptcp_get_rtt_measurement(struct mptsub
*mpts
, struct mptses
*mpte
)
5305 MPTE_LOCK_ASSERT_HELD(mpte
);
5306 MPTS_LOCK_ASSERT_HELD(mpts
);
5308 struct socket
*subflow_so
= mpts
->mpts_socket
;
5309 socket_lock(subflow_so
, 0);
5310 mpts
->mpts_srtt
= (intotcpcb(sotoinpcb(subflow_so
)))->t_srtt
;
5311 mpts
->mpts_rxtcur
= (intotcpcb(sotoinpcb(subflow_so
)))->t_rxtcur
;
5312 socket_unlock(subflow_so
, 0);
5315 /* Using Symptoms Advisory to detect poor WiFi or poor Cell */
5316 static kern_ctl_ref mptcp_kern_ctrl_ref
= NULL
;
5317 static uint32_t mptcp_kern_skt_inuse
= 0;
5318 symptoms_advisory_t mptcp_advisory
;
5321 mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
5324 #pragma unused(kctlref, sac, unitinfo)
5326 * We don't need to do anything here. But we can atleast ensure
5327 * only one user opens the MPTCP_KERN_CTL_NAME control socket.
5329 if (OSCompareAndSwap(0, 1, &mptcp_kern_skt_inuse
))
5336 mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
,
5339 #pragma unused(kctlref, kcunit, unitinfo)
5340 if (OSCompareAndSwap(1, 0, &mptcp_kern_skt_inuse
)) {
5341 /* TBD needs to be locked if the size grows more than an int */
5342 bzero(&mptcp_advisory
, sizeof(mptcp_advisory
));
5351 mptcp_symptoms_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
5352 mbuf_t m
, int flags
)
5354 #pragma unused(kctlref, kcunit, unitinfo, flags)
5355 symptoms_advisory_t
*sa
= NULL
;
5357 if (mbuf_pkthdr_len(m
) < sizeof(*sa
)) {
5362 if (mbuf_len(m
) >= sizeof(*sa
))
5367 if (mptcp_advisory
.sa_nwk_status_int
!= sa
->sa_nwk_status_int
) {
5369 * we could use this notification to notify all mptcp pcbs
5370 * of the change in network status. But its difficult to
5371 * define if sending REMOVE_ADDR or MP_PRIO is appropriate
5372 * given that these are only soft indicators of the network
5373 * state. Leaving this as TBD for now.
5377 if (sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_NOCOMMENT
) {
5378 mptcplog((LOG_DEBUG
, "MPTCP Events: %s wifi %d,%d cell %d,%d\n",
5379 __func__
, sa
->sa_wifi_status
, mptcp_advisory
.sa_wifi_status
,
5380 sa
->sa_cell_status
, mptcp_advisory
.sa_cell_status
),
5381 MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
,
5384 if ((sa
->sa_wifi_status
&
5385 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
)) !=
5386 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
)) {
5387 mptcp_advisory
.sa_wifi_status
= sa
->sa_wifi_status
;
5390 if ((sa
->sa_cell_status
&
5391 (SYMPTOMS_ADVISORY_CELL_BAD
| SYMPTOMS_ADVISORY_CELL_OK
)) !=
5392 (SYMPTOMS_ADVISORY_CELL_BAD
| SYMPTOMS_ADVISORY_CELL_OK
)) {
5393 mptcp_advisory
.sa_cell_status
= sa
->sa_cell_status
;
5396 mptcplog((LOG_DEBUG
, "MPTCP Events: %s NOCOMMENT "
5397 "wifi %d cell %d\n", __func__
,
5398 mptcp_advisory
.sa_wifi_status
,
5399 mptcp_advisory
.sa_cell_status
),
5400 MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
5406 mptcp_control_register(void)
5408 /* Set up the advisory control socket */
5409 struct kern_ctl_reg mptcp_kern_ctl
;
5411 bzero(&mptcp_kern_ctl
, sizeof(mptcp_kern_ctl
));
5412 strlcpy(mptcp_kern_ctl
.ctl_name
, MPTCP_KERN_CTL_NAME
,
5413 sizeof(mptcp_kern_ctl
.ctl_name
));
5414 mptcp_kern_ctl
.ctl_connect
= mptcp_symptoms_ctl_connect
;
5415 mptcp_kern_ctl
.ctl_disconnect
= mptcp_symptoms_ctl_disconnect
;
5416 mptcp_kern_ctl
.ctl_send
= mptcp_symptoms_ctl_send
;
5417 mptcp_kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
;
5419 (void)ctl_register(&mptcp_kern_ctl
, &mptcp_kern_ctrl_ref
);
5423 mptcp_is_wifi_unusable(void)
5425 /* a false return val indicates there is no info or wifi is ok */
5426 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
);
5430 mptcp_is_cell_unusable(void)
5432 /* a false return val indicates there is no info or cell is ok */
5433 return (mptcp_advisory
.sa_cell_status
& SYMPTOMS_ADVISORY_CELL_BAD
);
5437 mptcp_use_symptoms_hints(struct mptsub
* best
, struct mptsub
*second_best
)
5439 struct mptsub
*cellsub
= NULL
;
5440 struct mptsub
*wifisub
= NULL
;
5441 struct mptsub
*wiredsub
= NULL
;
5443 VERIFY ((best
!= NULL
) && (second_best
!= NULL
));
5445 if (!mptcp_use_symptomsd
)
5448 if (!mptcp_kern_skt_inuse
)
5452 * There could be devices with more than one wifi interface or
5453 * more than one wired or cell interfaces.
5454 * TBD: SymptomsD is unavailable on such platforms as of now.
5455 * Try to prefer best when possible in general.
5456 * Also, SymptomsD sends notifications about wifi only when it
5459 if (best
->mpts_linktype
& MPTSL_WIFI
)
5461 else if (best
->mpts_linktype
& MPTSL_CELL
)
5463 else if (best
->mpts_linktype
& MPTSL_WIRED
)
5467 * On platforms with wired paths, don't use hints about wifi or cell.
5468 * Currently, SymptomsD is not available on platforms with wired paths.
5473 if ((wifisub
== NULL
) && (second_best
->mpts_linktype
& MPTSL_WIFI
))
5474 wifisub
= second_best
;
5476 if ((cellsub
== NULL
) && (second_best
->mpts_linktype
& MPTSL_CELL
))
5477 cellsub
= second_best
;
5479 if ((wiredsub
== NULL
) && (second_best
->mpts_linktype
& MPTSL_WIRED
))
5480 wiredsub
= second_best
;
5482 if ((wifisub
== best
) && mptcp_is_wifi_unusable()) {
5483 tcpstat
.tcps_mp_sel_symtomsd
++;
5484 if (mptcp_is_cell_unusable()) {
5485 mptcplog((LOG_DEBUG
, "MPTCP Sender: SymptomsD hint"
5486 " suggests both Wifi and Cell are bad. Wired %s.",
5487 (wiredsub
== NULL
) ? "none" : "present"),
5488 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5491 mptcplog((LOG_DEBUG
, "MPTCP Sender: SymptomsD hint"
5492 " suggests Wifi bad, Cell good. Wired %s.",
5493 (wiredsub
== NULL
) ? "none" : "present"),
5494 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5495 return ((wiredsub
!= NULL
) ? wiredsub
: cellsub
);
5499 if ((cellsub
== best
) && (mptcp_is_cell_unusable())) {
5500 tcpstat
.tcps_mp_sel_symtomsd
++;
5501 if (mptcp_is_wifi_unusable()) {
5502 mptcplog((LOG_DEBUG
, "MPTCP Sender: SymptomsD hint"
5503 " suggests both Cell and Wifi are bad. Wired %s.",
5504 (wiredsub
== NULL
) ? "none" : "present"),
5505 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5508 mptcplog((LOG_DEBUG
, "MPTCP Sender: SymptomsD hint"
5509 " suggests Cell bad, Wifi good. Wired %s.",
5510 (wiredsub
== NULL
) ? "none" : "present"),
5511 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5512 return ((wiredsub
!= NULL
) ? wiredsub
: wifisub
);
5516 /* little is known about the state of the network or wifi is good */