2 * Copyright (c) 2012-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/kernel.h>
34 #include <sys/mcache.h>
35 #include <sys/resourcevar.h>
36 #include <sys/socket.h>
37 #include <sys/socketvar.h>
38 #include <sys/syslog.h>
39 #include <sys/domain.h>
40 #include <sys/protosw.h>
41 #include <sys/sysctl.h>
43 #include <kern/zalloc.h>
44 #include <kern/locks.h>
46 #include <mach/thread_act.h>
50 #include <net/if_var.h>
51 #include <netinet/in.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/in_var.h>
54 #include <netinet/tcp.h>
55 #include <netinet/tcp_fsm.h>
56 #include <netinet/tcp_seq.h>
57 #include <netinet/tcp_var.h>
58 #include <netinet/mptcp_var.h>
59 #include <netinet/mptcp.h>
60 #include <netinet/mptcp_seq.h>
61 #include <netinet/mptcp_timer.h>
62 #include <libkern/crypto/sha1.h>
64 #include <netinet6/in6_pcb.h>
65 #include <netinet6/ip6protosw.h>
67 #include <dev/random/randomdev.h>
69 extern char *proc_best_name(proc_t
);
72 * Notes on MPTCP implementation.
74 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
75 * communication domain. The structure mtcbinfo describes the MPTCP instance
76 * of a Multipath protocol in that domain. It is used to keep track of all
77 * MPTCP PCB instances in the system, and is protected by the global lock
80 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
81 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
82 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
83 * allocated from the same memory block, and each structure has a pointer
84 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
85 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
86 * PCB (mppcb) as well as the MPTCP Session (mptses).
88 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
89 * in particular, the list of subflows as well as the MPTCP thread.
91 * A functioning MPTCP Session consists of one or more subflow sockets. Each
92 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
93 * represented by the mptsub structure. Because each subflow requires access
94 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
95 * subflow. This gets decremented prior to the subflow's destruction. The
96 * subflow lock (mpts_lock) is used to protect accesses to the subflow.
98 * To handle events (read, write, control) from the subflows, an MPTCP thread
99 * is created; currently, there is one thread per MPTCP Session. In order to
100 * prevent the MPTCP socket from being destroyed while being accessed by the
101 * MPTCP thread, we bump up the MPTCP socket's so_usecount for the thread,
102 * which will be decremented prior to the thread's termination. The thread
103 * lock (mpte_thread_lock) is used to synchronize its signalling.
105 * Lock ordering is defined as follows:
107 * mtcbinfo (mppi_lock)
113 * It is not a requirement that all of the above locks need to be acquired
114 * in succession, but the correct lock ordering must be followed when there
115 * are more than one locks that need to be held. The MPTCP thread lock is
116 * is not constrained by this arrangement, because none of the other locks
117 * is ever acquired while holding mpte_thread_lock; therefore it may be called
118 * at any moment to signal the thread.
120 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
121 * work is done by the MPTCP garbage collector which is invoked on demand by
122 * the PF_MULTIPATH garbage collector. This process will take place once all
123 * of the subflows have been destroyed, and the MPTCP thread be instructed to
127 static void mptcp_sesdestroy(struct mptses
*);
128 static void mptcp_thread_signal_locked(struct mptses
*);
129 static void mptcp_thread_terminate_signal(struct mptses
*);
130 static void mptcp_thread_dowork(struct mptses
*);
131 static void mptcp_thread_func(void *, wait_result_t
);
132 static void mptcp_thread_destroy(struct mptses
*);
133 static void mptcp_key_pool_init(void);
134 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
135 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
137 static uint32_t mptcp_gc(struct mppcbinfo
*);
138 static int mptcp_subflow_soclose(struct mptsub
*, struct socket
*);
139 static int mptcp_subflow_soconnectx(struct mptses
*, struct mptsub
*);
140 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
141 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
142 static void mptcp_subflow_rupcall(struct socket
*, void *, int);
143 static void mptcp_subflow_input(struct mptses
*, struct mptsub
*);
144 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
145 static void mptcp_subflow_eupcall(struct socket
*, void *, uint32_t);
146 static void mptcp_update_last_owner(struct mptsub
*, struct socket
*);
147 static void mptcp_output_needed(struct mptses
*mpte
, struct mptsub
*to_mpts
);
148 static void mptcp_get_rtt_measurement(struct mptsub
*, struct mptses
*);
149 static void mptcp_drop_tfo_data(struct mptses
*, struct mptsub
*, int *);
152 * Possible return values for subflow event handlers. Note that success
153 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
154 * indicate errors or actions which require immediate attention; they will
155 * prevent the rest of the handlers from processing their respective events
156 * until the next round of events processing.
159 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
160 MPTS_EVRET_OK
= 2, /* OK */
161 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
162 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
165 static ev_ret_t
mptcp_subflow_events(struct mptses
*, struct mptsub
*, uint64_t *);
166 static ev_ret_t
mptcp_subflow_connreset_ev(struct mptses
*, struct mptsub
*, uint64_t *);
167 static ev_ret_t
mptcp_subflow_cantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *);
168 static ev_ret_t
mptcp_subflow_cantsendmore_ev(struct mptses
*, struct mptsub
*, uint64_t *);
169 static ev_ret_t
mptcp_subflow_timeout_ev(struct mptses
*, struct mptsub
*, uint64_t *);
170 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*, uint64_t *);
171 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*, uint64_t *);
172 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*, uint64_t *);
173 static ev_ret_t
mptcp_subflow_suspend_ev(struct mptses
*, struct mptsub
*, uint64_t *);
174 static ev_ret_t
mptcp_subflow_resume_ev(struct mptses
*, struct mptsub
*, uint64_t *);
175 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*, uint64_t *);
176 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*, uint64_t *);
177 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*, uint64_t *);
178 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*, uint64_t *);
179 static ev_ret_t
mptcp_fastjoin_ev(struct mptses
*, struct mptsub
*, uint64_t *);
180 static ev_ret_t
mptcp_deleteok_ev(struct mptses
*, struct mptsub
*, uint64_t *);
181 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *);
183 static const char *mptcp_evret2str(ev_ret_t
);
185 static mptcp_key_t
*mptcp_reserve_key(void);
186 static int mptcp_do_sha1(mptcp_key_t
*, char *, int);
187 static void mptcp_init_local_parms(struct mptcb
*);
189 static unsigned int mptsub_zone_size
; /* size of mptsub */
190 static struct zone
*mptsub_zone
; /* zone for mptsub */
192 static unsigned int mptopt_zone_size
; /* size of mptopt */
193 static struct zone
*mptopt_zone
; /* zone for mptopt */
195 static unsigned int mpt_subauth_entry_size
; /* size of subf auth entry */
196 static struct zone
*mpt_subauth_zone
; /* zone of subf auth entry */
198 struct mppcbinfo mtcbinfo
;
200 static struct mptcp_keys_pool_head mptcp_keys_pool
;
202 #define MPTCP_SUBFLOW_WRITELEN (8 * 1024) /* bytes to write each time */
203 #define MPTCP_SUBFLOW_READLEN (8 * 1024) /* bytes to read each time */
205 SYSCTL_DECL(_net_inet
);
207 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "MPTCP");
209 uint32_t mptcp_dbg_area
= 0; /* more noise if greater than 1 */
210 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, dbg_area
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
211 &mptcp_dbg_area
, 0, "MPTCP debug area");
213 uint32_t mptcp_dbg_level
= 0;
214 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, dbg_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
215 &mptcp_dbg_level
, 0, "MPTCP debug level");
218 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
219 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
222 * Since there is one kernel thread per mptcp socket, imposing an artificial
223 * limit on number of allowed mptcp sockets.
225 uint32_t mptcp_socket_limit
= MPPCB_LIMIT
;
226 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, sk_lim
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
227 &mptcp_socket_limit
, 0, "MPTCP socket limit");
230 * SYSCTL to turn on delayed cellular subflow start.
232 uint32_t mptcp_delayed_subf_start
= 0;
233 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, delayed
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
234 &mptcp_delayed_subf_start
, 0, "MPTCP Delayed Subflow start");
237 * sysctl to use network status hints from symptomsd
239 uint32_t mptcp_use_symptomsd
= 1;
240 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, usesymptoms
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
241 &mptcp_use_symptomsd
, 0, "MPTCP Use SymptomsD");
243 static struct protosw mptcp_subflow_protosw
;
244 static struct pr_usrreqs mptcp_subflow_usrreqs
;
246 static struct ip6protosw mptcp_subflow_protosw6
;
247 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
250 typedef struct mptcp_subflow_event_entry
{
251 uint64_t sofilt_hint_mask
;
252 ev_ret_t (*sofilt_hint_ev_hdlr
)(
255 uint64_t *p_mpsofilt_hint
);
259 * XXX The order of the event handlers below is really
261 * SO_FILT_HINT_DELETEOK event has to be handled first,
262 * else we may end up missing on this event.
263 * Please read radar://24043716 for more details.
265 static mptsub_ev_entry_t mpsub_ev_entry_tbl
[] = {
267 .sofilt_hint_mask
= SO_FILT_HINT_DELETEOK
,
268 .sofilt_hint_ev_hdlr
= mptcp_deleteok_ev
,
271 .sofilt_hint_mask
= SO_FILT_HINT_MPCANTRCVMORE
,
272 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpcantrcvmore_ev
,
275 .sofilt_hint_mask
= SO_FILT_HINT_MPFAILOVER
,
276 .sofilt_hint_ev_hdlr
= mptcp_subflow_failover_ev
,
279 .sofilt_hint_mask
= SO_FILT_HINT_CONNRESET
,
280 .sofilt_hint_ev_hdlr
= mptcp_subflow_connreset_ev
,
283 .sofilt_hint_mask
= SO_FILT_HINT_MUSTRST
,
284 .sofilt_hint_ev_hdlr
= mptcp_subflow_mustrst_ev
,
287 .sofilt_hint_mask
= SO_FILT_HINT_CANTRCVMORE
,
288 .sofilt_hint_ev_hdlr
= mptcp_subflow_cantrcvmore_ev
,
290 { .sofilt_hint_mask
= SO_FILT_HINT_CANTSENDMORE
,
291 .sofilt_hint_ev_hdlr
= mptcp_subflow_cantsendmore_ev
,
294 .sofilt_hint_mask
= SO_FILT_HINT_TIMEOUT
,
295 .sofilt_hint_ev_hdlr
= mptcp_subflow_timeout_ev
,
298 .sofilt_hint_mask
= SO_FILT_HINT_NOSRCADDR
,
299 .sofilt_hint_ev_hdlr
= mptcp_subflow_nosrcaddr_ev
,
302 .sofilt_hint_mask
= SO_FILT_HINT_IFDENIED
,
303 .sofilt_hint_ev_hdlr
= mptcp_subflow_ifdenied_ev
,
306 .sofilt_hint_mask
= SO_FILT_HINT_SUSPEND
,
307 .sofilt_hint_ev_hdlr
= mptcp_subflow_suspend_ev
,
310 .sofilt_hint_mask
= SO_FILT_HINT_RESUME
,
311 .sofilt_hint_ev_hdlr
= mptcp_subflow_resume_ev
,
314 .sofilt_hint_mask
= SO_FILT_HINT_CONNECTED
,
315 .sofilt_hint_ev_hdlr
= mptcp_subflow_connected_ev
,
318 .sofilt_hint_mask
= SO_FILT_HINT_MPSTATUS
,
319 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpstatus_ev
,
322 .sofilt_hint_mask
= SO_FILT_HINT_DISCONNECTED
,
323 .sofilt_hint_ev_hdlr
= mptcp_subflow_disconnected_ev
,
326 .sofilt_hint_mask
= SO_FILT_HINT_MPFASTJ
,
327 .sofilt_hint_ev_hdlr
= mptcp_fastjoin_ev
,
332 * Protocol pr_init callback.
335 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
338 static int mptcp_initialized
= 0;
341 struct ip6protosw
*prp6
;
344 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
|PR_ATTACHED
)) == PR_ATTACHED
);
346 /* do this only once */
347 if (mptcp_initialized
)
349 mptcp_initialized
= 1;
352 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
353 * we must be able to find IPPROTO_TCP entries for both.
355 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
357 bcopy(prp
, &mptcp_subflow_protosw
, sizeof (*prp
));
358 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
359 sizeof (mptcp_subflow_usrreqs
));
360 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
361 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
362 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
363 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
364 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
366 * Socket filters shouldn't attach/detach to/from this protosw
367 * since pr_protosw is to be used instead, which points to the
368 * real protocol; if they do, it is a bug and we should panic.
370 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
371 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
372 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
373 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
376 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
377 IPPROTO_TCP
, SOCK_STREAM
);
378 VERIFY(prp6
!= NULL
);
379 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof (*prp6
));
380 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
381 sizeof (mptcp_subflow_usrreqs6
));
382 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
383 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
384 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
385 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
386 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
388 * Socket filters shouldn't attach/detach to/from this protosw
389 * since pr_protosw is to be used instead, which points to the
390 * real protocol; if they do, it is a bug and we should panic.
392 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
393 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
394 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
395 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
398 bzero(&mtcbinfo
, sizeof (mtcbinfo
));
399 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
400 mtcbinfo
.mppi_size
= sizeof (struct mpp_mtp
);
401 if ((mtcbinfo
.mppi_zone
= zinit(mtcbinfo
.mppi_size
,
402 1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) {
403 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
);
406 zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
);
407 zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
);
409 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
410 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
411 mtcbinfo
.mppi_lock_grp_attr
);
412 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
413 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
414 mtcbinfo
.mppi_lock_attr
);
416 mtcbinfo
.mppi_gc
= mptcp_gc
;
417 mtcbinfo
.mppi_timer
= mptcp_timer
;
418 mtcbinfo
.mppi_pcbe_create
= mptcp_sescreate
;
420 /* attach to MP domain for garbage collection to take place */
421 mp_pcbinfo_attach(&mtcbinfo
);
423 mptsub_zone_size
= sizeof (struct mptsub
);
424 if ((mptsub_zone
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
,
425 8192, "mptsub")) == NULL
) {
426 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
);
429 zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
);
430 zone_change(mptsub_zone
, Z_EXPAND
, TRUE
);
432 mptopt_zone_size
= sizeof (struct mptopt
);
433 if ((mptopt_zone
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
,
434 1024, "mptopt")) == NULL
) {
435 panic("%s: unable to allocate MPTCP option zone\n", __func__
);
438 zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
);
439 zone_change(mptopt_zone
, Z_EXPAND
, TRUE
);
441 mpt_subauth_entry_size
= sizeof (struct mptcp_subf_auth_entry
);
442 if ((mpt_subauth_zone
= zinit(mpt_subauth_entry_size
,
443 1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) {
444 panic("%s: unable to allocate MPTCP address auth zone \n",
448 zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
);
449 zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
);
451 /* Set up a list of unique keys */
452 mptcp_key_pool_init();
456 * Create an MPTCP session, called as a result of opening a MPTCP socket.
459 mptcp_sescreate(struct socket
*mp_so
, struct mppcb
*mpp
)
461 struct mppcbinfo
*mppi
;
467 mppi
= mpp
->mpp_pcbinfo
;
468 VERIFY(mppi
!= NULL
);
470 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
471 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
473 /* MPTCP Multipath PCB Extension */
474 bzero(mpte
, sizeof (*mpte
));
475 VERIFY(mpp
->mpp_pcbe
== NULL
);
476 mpp
->mpp_pcbe
= mpte
;
477 mpte
->mpte_mppcb
= mpp
;
478 mpte
->mpte_mptcb
= mp_tp
;
480 TAILQ_INIT(&mpte
->mpte_sopts
);
481 TAILQ_INIT(&mpte
->mpte_subflows
);
482 mpte
->mpte_associd
= SAE_ASSOCID_ANY
;
483 mpte
->mpte_connid_last
= SAE_CONNID_ANY
;
485 lck_mtx_init(&mpte
->mpte_thread_lock
, mppi
->mppi_lock_grp
,
486 mppi
->mppi_lock_attr
);
491 * This can be rather expensive if we have lots of MPTCP sockets,
492 * but we need a kernel thread for this model to work. Perhaps we
493 * could amortize the costs by having one worker thread per a group
496 if (kernel_thread_start(mptcp_thread_func
, mpte
,
497 &mpte
->mpte_thread
) != KERN_SUCCESS
) {
501 mp_so
->so_usecount
++; /* for thread */
503 /* MPTCP Protocol Control Block */
504 bzero(mp_tp
, sizeof (*mp_tp
));
505 lck_mtx_init(&mp_tp
->mpt_lock
, mppi
->mppi_lock_grp
,
506 mppi
->mppi_lock_attr
);
507 mp_tp
->mpt_mpte
= mpte
;
508 mp_tp
->mpt_state
= MPTCPS_CLOSED
;
511 lck_mtx_destroy(&mpte
->mpte_thread_lock
, mppi
->mppi_lock_grp
);
512 DTRACE_MPTCP5(session__create
, struct socket
*, mp_so
,
513 struct sockbuf
*, &mp_so
->so_rcv
,
514 struct sockbuf
*, &mp_so
->so_snd
,
515 struct mppcb
*, mpp
, int, error
);
517 return ((error
!= 0) ? NULL
: mpte
);
521 * Destroy an MPTCP session.
524 mptcp_sesdestroy(struct mptses
*mpte
)
528 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
530 mp_tp
= mpte
->mpte_mptcb
;
531 VERIFY(mp_tp
!= NULL
);
534 * MPTCP Multipath PCB Extension section
536 mptcp_flush_sopts(mpte
);
537 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
539 lck_mtx_destroy(&mpte
->mpte_thread_lock
,
540 mpte
->mpte_mppcb
->mpp_pcbinfo
->mppi_lock_grp
);
543 * MPTCP Protocol Control Block section
545 lck_mtx_destroy(&mp_tp
->mpt_lock
,
546 mpte
->mpte_mppcb
->mpp_pcbinfo
->mppi_lock_grp
);
548 DTRACE_MPTCP2(session__destroy
, struct mptses
*, mpte
,
549 struct mptcb
*, mp_tp
);
553 * Allocate an MPTCP socket option structure.
556 mptcp_sopt_alloc(int how
)
560 mpo
= (how
== M_WAITOK
) ? zalloc(mptopt_zone
) :
561 zalloc_noblock(mptopt_zone
);
563 bzero(mpo
, mptopt_zone_size
);
570 * Free an MPTCP socket option structure.
573 mptcp_sopt_free(struct mptopt
*mpo
)
575 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
577 zfree(mptopt_zone
, mpo
);
581 * Add a socket option to the MPTCP socket option list.
584 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
586 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
587 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
588 mpo
->mpo_flags
|= MPOF_ATTACHED
;
589 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
593 * Remove a socket option from the MPTCP socket option list.
596 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
598 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
599 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
600 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
601 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
605 * Search for an existing <sopt_level,sopt_name> socket option.
608 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
612 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
614 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
615 if (mpo
->mpo_level
== sopt
->sopt_level
&&
616 mpo
->mpo_name
== sopt
->sopt_name
)
619 VERIFY(mpo
== NULL
|| sopt
->sopt_valsize
== sizeof (int));
625 * Flushes all recorded socket options from an MP socket.
628 mptcp_flush_sopts(struct mptses
*mpte
)
630 struct mptopt
*mpo
, *tmpo
;
632 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
634 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
635 mptcp_sopt_remove(mpte
, mpo
);
636 mptcp_sopt_free(mpo
);
638 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
642 * Allocate a MPTCP subflow structure.
645 mptcp_subflow_alloc(int how
)
649 mpts
= (how
== M_WAITOK
) ? zalloc(mptsub_zone
) :
650 zalloc_noblock(mptsub_zone
);
652 bzero(mpts
, mptsub_zone_size
);
653 lck_mtx_init(&mpts
->mpts_lock
, mtcbinfo
.mppi_lock_grp
,
654 mtcbinfo
.mppi_lock_attr
);
661 * Deallocate a subflow structure, called when all of the references held
662 * on it have been released. This implies that the subflow has been deleted.
665 mptcp_subflow_free(struct mptsub
*mpts
)
667 MPTS_LOCK_ASSERT_HELD(mpts
);
669 VERIFY(mpts
->mpts_refcnt
== 0);
670 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
671 VERIFY(mpts
->mpts_mpte
== NULL
);
672 VERIFY(mpts
->mpts_socket
== NULL
);
674 if (mpts
->mpts_src_sl
!= NULL
) {
675 sockaddrlist_free(mpts
->mpts_src_sl
);
676 mpts
->mpts_src_sl
= NULL
;
678 if (mpts
->mpts_dst_sl
!= NULL
) {
679 sockaddrlist_free(mpts
->mpts_dst_sl
);
680 mpts
->mpts_dst_sl
= NULL
;
683 lck_mtx_destroy(&mpts
->mpts_lock
, mtcbinfo
.mppi_lock_grp
);
685 zfree(mptsub_zone
, mpts
);
689 * Create an MPTCP subflow socket.
692 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
693 struct proc
*p
, struct socket
**so
)
695 struct mptopt smpo
, *mpo
, *tmpo
;
696 struct socket
*mp_so
;
700 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
701 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
704 * Create the subflow socket (multipath subflow, non-blocking.)
706 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
707 * socket; it will be cleared when the socket is peeled off or closed.
708 * It also indicates to the underlying TCP to handle MPTCP options.
709 * A multipath subflow socket implies SS_NOFDREF state.
711 if ((error
= socreate_internal(dom
, so
, SOCK_STREAM
,
712 IPPROTO_TCP
, p
, SOCF_ASYNC
| SOCF_MP_SUBFLOW
, PROC_NULL
)) != 0) {
713 mptcplog((LOG_ERR
, "MPTCP Socket: subflow socreate mp_so 0x%llx"
714 " unable to create subflow socket error %d\n",
715 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), error
),
716 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
721 VERIFY((*so
)->so_flags
& SOF_MP_SUBFLOW
);
722 VERIFY(((*so
)->so_state
& (SS_NBIO
|SS_NOFDREF
)) ==
723 (SS_NBIO
|SS_NOFDREF
));
725 /* prevent the socket buffers from being compressed */
726 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
727 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
729 /* Inherit preconnect and TFO data flags */
730 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)
731 (*so
)->so_flags1
|= SOF1_PRECONNECT_DATA
;
733 if (mp_so
->so_flags1
& SOF1_DATA_IDEMPOTENT
)
734 (*so
)->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
736 bzero(&smpo
, sizeof (smpo
));
737 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
738 smpo
.mpo_level
= SOL_SOCKET
;
741 /* disable SIGPIPE */
742 smpo
.mpo_name
= SO_NOSIGPIPE
;
743 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
746 /* find out if the subflow's source address goes away */
747 smpo
.mpo_name
= SO_NOADDRERR
;
748 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
751 /* enable keepalive */
752 smpo
.mpo_name
= SO_KEEPALIVE
;
753 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
757 * Limit the receive socket buffer size to 64k.
759 * We need to take into consideration the window scale option
760 * which could be negotiated in one subflow but disabled in
762 * XXX This can be improved in the future.
764 smpo
.mpo_name
= SO_RCVBUF
;
765 smpo
.mpo_intval
= MPTCP_RWIN_MAX
;
766 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
769 /* N.B.: set by sosetopt */
770 VERIFY(!((*so
)->so_rcv
.sb_flags
& SB_AUTOSIZE
));
771 /* Prevent automatic socket buffer sizing. */
772 (*so
)->so_snd
.sb_flags
&= ~SB_AUTOSIZE
;
774 smpo
.mpo_level
= IPPROTO_TCP
;
775 smpo
.mpo_intval
= mptcp_subflow_keeptime
;
776 smpo
.mpo_name
= TCP_KEEPALIVE
;
777 if ((error
= mptcp_subflow_sosetopt(mpte
, *so
, &smpo
)) != 0)
780 /* replay setsockopt(2) on the subflow sockets for eligible options */
781 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
784 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
))
788 * Skip those that are handled internally; these options
789 * should not have been recorded and marked with the
790 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
792 if (mpo
->mpo_level
== SOL_SOCKET
&&
793 (mpo
->mpo_name
== SO_NOSIGPIPE
||
794 mpo
->mpo_name
== SO_NOADDRERR
||
795 mpo
->mpo_name
== SO_KEEPALIVE
))
798 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
799 if (mptcp_subflow_sosetopt(mpte
, *so
, mpo
) != 0 && interim
) {
801 mptcplog((LOG_ERR
, "MPTCP Socket: subflow socreate"
803 " sopt %s val %d interim record removed\n",
804 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
805 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
806 buf
, sizeof (buf
)), mpo
->mpo_intval
),
807 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
808 mptcp_sopt_remove(mpte
, mpo
);
809 mptcp_sopt_free(mpo
);
815 * We need to receive everything that the subflow socket has,
816 * so use a customized socket receive function. We will undo
817 * this when the socket is peeled off or closed.
819 mpts
->mpts_oprotosw
= (*so
)->so_proto
;
822 (*so
)->so_proto
= &mptcp_subflow_protosw
;
826 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
835 socket_unlock(*so
, 0);
837 DTRACE_MPTCP4(subflow__create
, struct mptses
*, mpte
,
838 struct mptsub
*, mpts
, int, dom
, int, error
);
844 * Close an MPTCP subflow socket.
846 * Note that this may be called on an embryonic subflow, and the only
847 * thing that is guaranteed valid is the protocol-user request.
850 mptcp_subflow_soclose(struct mptsub
*mpts
, struct socket
*so
)
852 MPTS_LOCK_ASSERT_HELD(mpts
);
855 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
856 VERIFY((so
->so_state
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
));
858 /* restore protocol-user requests */
859 VERIFY(mpts
->mpts_oprotosw
!= NULL
);
860 so
->so_proto
= mpts
->mpts_oprotosw
;
861 socket_unlock(so
, 0);
863 mpts
->mpts_socket
= NULL
; /* may already be NULL */
865 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
867 struct sockbuf
*, &so
->so_rcv
,
868 struct sockbuf
*, &so
->so_snd
,
869 struct mptses
*, mpts
->mpts_mpte
);
871 return (soclose(so
));
875 * Connect an MPTCP subflow socket.
877 * This may be called inline as part of adding a subflow, or asynchronously
878 * by the thread (upon progressing to MPTCPF_JOIN_READY). Note that in the
879 * pending connect case, the subflow socket may have been bound to an interface
880 * and/or a source IP address which may no longer be around by the time this
881 * routine is called; in that case the connect attempt will most likely fail.
884 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
889 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
890 MPTS_LOCK_ASSERT_HELD(mpts
);
892 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)) ==
894 VERIFY(mpts
->mpts_socket
!= NULL
);
895 so
= mpts
->mpts_socket
;
896 af
= mpts
->mpts_family
;
898 if (af
== AF_INET
|| af
== AF_INET6
) {
899 struct sockaddr_entry
*dst_se
;
900 char dbuf
[MAX_IPv6_STR_LEN
];
902 dst_se
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
);
903 VERIFY(dst_se
!= NULL
);
905 mptcplog((LOG_DEBUG
, "MPTCP Socket: connectx mp_so 0x%llx "
906 "dst %s[%d] cid %d [pended %s]\n",
907 (u_int64_t
)VM_KERNEL_ADDRPERM(mpte
->mpte_mppcb
->mpp_socket
),
908 inet_ntop(af
, ((af
== AF_INET
) ?
909 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
910 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
),
911 dbuf
, sizeof (dbuf
)), ((af
== AF_INET
) ?
912 ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
913 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
915 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
917 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
920 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
923 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
925 /* connect the subflow socket */
926 error
= soconnectxlocked(so
, &mpts
->mpts_src_sl
, &mpts
->mpts_dst_sl
,
927 mpts
->mpts_mpcr
.mpcr_proc
, mpts
->mpts_mpcr
.mpcr_ifscope
,
928 mpte
->mpte_associd
, NULL
, CONNREQF_MPTCP
,
929 &mpts
->mpts_mpcr
, sizeof (mpts
->mpts_mpcr
), NULL
, NULL
);
930 socket_unlock(so
, 0);
932 /* Allocate a unique address id per subflow */
933 mpte
->mpte_addrid_last
++;
934 if (mpte
->mpte_addrid_last
== 0)
935 mpte
->mpte_addrid_last
++;
937 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
938 struct mptsub
*, mpts
, int, error
);
944 * MPTCP subflow socket receive routine, derived from soreceive().
947 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
948 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
951 int flags
, error
= 0;
952 struct proc
*p
= current_proc();
953 struct mbuf
*m
, **mp
= mp0
;
954 struct mbuf
*nextrecord
;
957 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
959 #ifdef MORE_LOCKING_DEBUG
960 if (so
->so_usecount
== 1) {
961 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
966 * We return all that is there in the subflow's socket receive buffer
967 * to the MPTCP layer, so we require that the caller passes in the
968 * expected parameters.
970 if (mp
== NULL
|| controlp
!= NULL
) {
971 socket_unlock(so
, 1);
978 flags
= *flagsp
&~ MSG_EOR
;
982 if (flags
& (MSG_PEEK
|MSG_OOB
|MSG_NEEDSA
|MSG_WAITALL
|MSG_WAITSTREAM
)) {
983 socket_unlock(so
, 1);
986 flags
|= (MSG_DONTWAIT
|MSG_NBIO
);
989 * If a recv attempt is made on a previously-accepted socket
990 * that has been marked as inactive (disconnected), reject
993 if (so
->so_flags
& SOF_DEFUNCT
) {
994 struct sockbuf
*sb
= &so
->so_rcv
;
997 SODEFUNCTLOG("%s[%d, %s]: defunct so 0x%llx [%d,%d] (%d)\n",
998 __func__
, proc_pid(p
), proc_best_name(p
),
999 (uint64_t)VM_KERNEL_ADDRPERM(so
),
1000 SOCK_DOM(so
), SOCK_TYPE(so
), error
);
1002 * This socket should have been disconnected and flushed
1003 * prior to being returned from sodefunct(); there should
1004 * be no data on its receive list, so panic otherwise.
1006 if (so
->so_state
& SS_DEFUNCT
)
1007 sb_empty_assert(sb
, __func__
);
1008 socket_unlock(so
, 1);
1013 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
1014 * and if so just return to the caller. This could happen when
1015 * soreceive() is called by a socket upcall function during the
1016 * time the socket is freed. The socket buffer would have been
1017 * locked across the upcall, therefore we cannot put this thread
1018 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
1019 * we may livelock), because the lock on the socket buffer will
1020 * only be released when the upcall routine returns to its caller.
1021 * Because the socket has been officially closed, there can be
1022 * no further read on it.
1024 * A multipath subflow socket would have its SS_NOFDREF set by
1025 * default, so check for SOF_MP_SUBFLOW socket flag; when the
1026 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1028 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
1029 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
1030 socket_unlock(so
, 1);
1035 * For consistency with soreceive() semantics, we need to obey
1036 * SB_LOCK in case some other code path has locked the buffer.
1038 error
= sblock(&so
->so_rcv
, 0);
1040 socket_unlock(so
, 1);
1044 m
= so
->so_rcv
.sb_mb
;
1047 * Panic if we notice inconsistencies in the socket's
1048 * receive list; both sb_mb and sb_cc should correctly
1049 * reflect the contents of the list, otherwise we may
1050 * end up with false positives during select() or poll()
1051 * which could put the application in a bad state.
1053 SB_MB_CHECK(&so
->so_rcv
);
1055 if (so
->so_error
!= 0) {
1056 error
= so
->so_error
;
1061 if (so
->so_state
& SS_CANTRCVMORE
) {
1065 if (!(so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
))) {
1071 * MSG_DONTWAIT is implicitly defined and this routine will
1072 * never block, so return EWOULDBLOCK when there is nothing.
1074 error
= EWOULDBLOCK
;
1078 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
1079 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1080 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1083 nextrecord
= m
->m_nextpkt
;
1084 sbfree(&so
->so_rcv
, m
);
1089 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1094 m
->m_nextpkt
= nextrecord
;
1095 if (nextrecord
== NULL
)
1096 so
->so_rcv
.sb_lastrecord
= m
;
1098 m
= so
->so_rcv
.sb_mb
= nextrecord
;
1099 SB_EMPTY_FIXUP(&so
->so_rcv
);
1101 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1102 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1105 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
1106 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1107 /* notify protocol that we drained all the data */
1108 if ((so
->so_proto
->pr_flags
& PR_WANTRCVD
) && so
->so_pcb
!= NULL
)
1109 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, flags
);
1115 sbunlock(&so
->so_rcv
, FALSE
); /* will unlock socket */
1122 * Prepare an MPTCP subflow socket for peeloff(2); basically undo
1123 * the work done earlier when the subflow socket was created.
1126 mptcp_subflow_sopeeloff(struct mptses
*mpte
, struct mptsub
*mpts
,
1130 struct socket
*mp_so
;
1133 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1134 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1135 MPTS_LOCK_ASSERT_HELD(mpts
);
1138 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1139 VERIFY((so
->so_state
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
));
1141 /* inherit MPTCP socket states */
1142 if (!(mp_so
->so_state
& SS_NBIO
))
1143 so
->so_state
&= ~SS_NBIO
;
1146 * At this point, the socket is not yet closed, as there is at least
1147 * one outstanding usecount previously held by mpts_socket from
1148 * socreate(). Atomically clear SOF_MP_SUBFLOW and SS_NOFDREF here.
1150 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
1151 so
->so_state
&= ~SS_NOFDREF
;
1152 so
->so_flags
&= ~SOF_MPTCP_TRUE
;
1154 /* allow socket buffers to be compressed */
1155 so
->so_rcv
.sb_flags
&= ~SB_NOCOMPRESS
;
1156 so
->so_snd
.sb_flags
&= ~SB_NOCOMPRESS
;
1159 * Allow socket buffer auto sizing.
1161 * This will increase the current 64k buffer size to whatever is best.
1163 if (!(so
->so_rcv
.sb_flags
& SB_USRSIZE
))
1164 so
->so_rcv
.sb_flags
|= SB_AUTOSIZE
;
1165 if (!(so
->so_snd
.sb_flags
& SB_USRSIZE
))
1166 so
->so_snd
.sb_flags
|= SB_AUTOSIZE
;
1168 /* restore protocol-user requests */
1169 VERIFY(mpts
->mpts_oprotosw
!= NULL
);
1170 so
->so_proto
= mpts
->mpts_oprotosw
;
1172 bzero(&smpo
, sizeof (smpo
));
1173 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1174 smpo
.mpo_level
= SOL_SOCKET
;
1176 /* inherit SOF_NOSIGPIPE from parent MP socket */
1177 p
= (mp_so
->so_flags
& SOF_NOSIGPIPE
);
1178 c
= (so
->so_flags
& SOF_NOSIGPIPE
);
1179 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1180 smpo
.mpo_name
= SO_NOSIGPIPE
;
1182 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1184 /* inherit SOF_NOADDRAVAIL from parent MP socket */
1185 p
= (mp_so
->so_flags
& SOF_NOADDRAVAIL
);
1186 c
= (so
->so_flags
& SOF_NOADDRAVAIL
);
1187 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1188 smpo
.mpo_name
= SO_NOADDRERR
;
1190 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1192 /* inherit SO_KEEPALIVE from parent MP socket */
1193 p
= (mp_so
->so_options
& SO_KEEPALIVE
);
1194 c
= (so
->so_options
& SO_KEEPALIVE
);
1195 smpo
.mpo_intval
= ((p
- c
) > 0) ? 1 : 0;
1196 smpo
.mpo_name
= SO_KEEPALIVE
;
1198 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1200 /* unset TCP level default keepalive option */
1201 p
= (intotcpcb(sotoinpcb(mp_so
)))->t_keepidle
;
1202 c
= (intotcpcb(sotoinpcb(so
)))->t_keepidle
;
1203 smpo
.mpo_level
= IPPROTO_TCP
;
1204 smpo
.mpo_intval
= 0;
1205 smpo
.mpo_name
= TCP_KEEPALIVE
;
1207 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1208 socket_unlock(so
, 0);
1210 DTRACE_MPTCP5(subflow__peeloff
, struct mptses
*, mpte
,
1211 struct mptsub
*, mpts
, struct socket
*, so
,
1212 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1216 * Establish an initial MPTCP connection (if first subflow and not yet
1217 * connected), or add a subflow to an existing MPTCP connection.
1220 mptcp_subflow_add(struct mptses
*mpte
, struct mptsub
*mpts
,
1221 struct proc
*p
, uint32_t ifscope
)
1223 struct sockaddr_entry
*se
, *src_se
= NULL
, *dst_se
= NULL
;
1224 struct socket
*mp_so
, *so
= NULL
;
1225 struct mptsub_connreq mpcr
;
1226 struct mptcb
*mp_tp
;
1229 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1230 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1231 mp_tp
= mpte
->mpte_mptcb
;
1234 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
1235 /* If the remote end sends Data FIN, refuse subflow adds */
1243 VERIFY(!(mpts
->mpts_flags
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)));
1244 VERIFY(mpts
->mpts_mpte
== NULL
);
1245 VERIFY(mpts
->mpts_socket
== NULL
);
1246 VERIFY(mpts
->mpts_dst_sl
!= NULL
);
1247 VERIFY(mpts
->mpts_connid
== SAE_CONNID_ANY
);
1249 /* select source (if specified) and destination addresses */
1250 if ((error
= in_selectaddrs(AF_UNSPEC
, &mpts
->mpts_src_sl
, &src_se
,
1251 &mpts
->mpts_dst_sl
, &dst_se
)) != 0)
1254 VERIFY(mpts
->mpts_dst_sl
!= NULL
&& dst_se
!= NULL
);
1255 VERIFY(src_se
== NULL
|| mpts
->mpts_src_sl
!= NULL
);
1256 af
= mpts
->mpts_family
= dst_se
->se_addr
->sa_family
;
1257 VERIFY(src_se
== NULL
|| src_se
->se_addr
->sa_family
== af
);
1258 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1261 * If the source address is not specified, allocate a storage for
1262 * it, so that later on we can fill it in with the actual source
1263 * IP address chosen by the underlying layer for the subflow after
1266 if (mpts
->mpts_src_sl
== NULL
) {
1268 sockaddrlist_dup(mpts
->mpts_dst_sl
, M_WAITOK
);
1269 if (mpts
->mpts_src_sl
== NULL
) {
1273 se
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
);
1274 VERIFY(se
!= NULL
&& se
->se_addr
!= NULL
&&
1275 se
->se_addr
->sa_len
== dst_se
->se_addr
->sa_len
);
1276 bzero(se
->se_addr
, se
->se_addr
->sa_len
);
1277 se
->se_addr
->sa_len
= dst_se
->se_addr
->sa_len
;
1278 se
->se_addr
->sa_family
= dst_se
->se_addr
->sa_family
;
1281 /* create the subflow socket */
1282 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, p
, &so
)) != 0)
1286 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
1287 * -1 (SAE_CONNID_ALL).
1289 mpte
->mpte_connid_last
++;
1290 if (mpte
->mpte_connid_last
== SAE_CONNID_ALL
||
1291 mpte
->mpte_connid_last
== SAE_CONNID_ANY
)
1292 mpte
->mpte_connid_last
++;
1294 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
1295 VERIFY(mpts
->mpts_connid
!= SAE_CONNID_ANY
&&
1296 mpts
->mpts_connid
!= SAE_CONNID_ALL
);
1298 mpts
->mpts_rel_seq
= 1;
1300 /* Allocate a unique address id per subflow */
1301 mpte
->mpte_addrid_last
++;
1302 if (mpte
->mpte_addrid_last
== 0)
1303 mpte
->mpte_addrid_last
++;
1305 /* bind subflow socket to the specified interface */
1306 if (ifscope
!= IFSCOPE_NONE
) {
1308 error
= inp_bindif(sotoinpcb(so
), ifscope
, &mpts
->mpts_outif
);
1310 socket_unlock(so
, 0);
1311 (void) mptcp_subflow_soclose(mpts
, so
);
1314 VERIFY(mpts
->mpts_outif
!= NULL
);
1315 mpts
->mpts_flags
|= MPTSF_BOUND_IF
;
1317 if (IFNET_IS_EXPENSIVE(mpts
->mpts_outif
)) {
1318 sototcpcb(so
)->t_mpflags
|= TMPF_BACKUP_PATH
;
1320 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
1323 mptcplog((LOG_DEBUG
, "MPTCP Socket: subflow_add mp_so 0x%llx "
1324 "bindif %s[%d] cid %d expensive %d\n",
1325 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1326 mpts
->mpts_outif
->if_xname
,
1327 ifscope
, mpts
->mpts_connid
,
1328 IFNET_IS_EXPENSIVE(mpts
->mpts_outif
)),
1329 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
1330 socket_unlock(so
, 0);
1333 /* if source address and/or port is specified, bind to it */
1334 if (src_se
!= NULL
) {
1335 struct sockaddr
*sa
= src_se
->se_addr
;
1336 uint32_t mpts_flags
= 0;
1341 if (SIN(sa
)->sin_addr
.s_addr
!= INADDR_ANY
)
1342 mpts_flags
|= MPTSF_BOUND_IP
;
1343 if ((lport
= SIN(sa
)->sin_port
) != 0)
1344 mpts_flags
|= MPTSF_BOUND_PORT
;
1348 VERIFY(af
== AF_INET6
);
1349 if (!IN6_IS_ADDR_UNSPECIFIED(&SIN6(sa
)->sin6_addr
))
1350 mpts_flags
|= MPTSF_BOUND_IP
;
1351 if ((lport
= SIN6(sa
)->sin6_port
) != 0)
1352 mpts_flags
|= MPTSF_BOUND_PORT
;
1357 error
= sobindlock(so
, sa
, 1); /* will lock/unlock socket */
1359 (void) mptcp_subflow_soclose(mpts
, so
);
1362 mpts
->mpts_flags
|= mpts_flags
;
1364 if (af
== AF_INET
|| af
== AF_INET6
) {
1365 char sbuf
[MAX_IPv6_STR_LEN
];
1367 mptcplog((LOG_DEBUG
, "MPTCP Socket: subflow_add "
1368 "mp_so 0x%llx bindip %s[%d] cid %d\n",
1369 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1370 inet_ntop(af
, ((af
== AF_INET
) ?
1371 (void *)&SIN(sa
)->sin_addr
.s_addr
:
1372 (void *)&SIN6(sa
)->sin6_addr
), sbuf
, sizeof (sbuf
)),
1373 ntohs(lport
), mpts
->mpts_connid
),
1374 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1379 * Insert the subflow into the list, and associate the MPTCP PCB
1380 * as well as the the subflow socket. From this point on, removing
1381 * the subflow needs to be done via mptcp_subflow_del().
1383 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1384 mpte
->mpte_numflows
++;
1386 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1387 mpts
->mpts_mpte
= mpte
;
1388 mpts
->mpts_socket
= so
;
1389 MPTS_ADDREF_LOCKED(mpts
); /* for being in MPTCP subflow list */
1390 MPTS_ADDREF_LOCKED(mpts
); /* for subflow socket */
1391 mp_so
->so_usecount
++; /* for subflow socket */
1393 /* register for subflow socket read/write events */
1394 (void) sock_setupcalls(so
, mptcp_subflow_rupcall
, mpts
,
1395 mptcp_subflow_wupcall
, mpts
);
1398 * Register for subflow socket control events; ignore
1399 * SO_FILT_HINT_CONNINFO_UPDATED from below since we
1400 * will generate it here.
1402 (void) sock_catchevents(so
, mptcp_subflow_eupcall
, mpts
,
1403 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
1404 SO_FILT_HINT_CANTSENDMORE
| SO_FILT_HINT_TIMEOUT
|
1405 SO_FILT_HINT_NOSRCADDR
| SO_FILT_HINT_IFDENIED
|
1406 SO_FILT_HINT_SUSPEND
| SO_FILT_HINT_RESUME
|
1407 SO_FILT_HINT_CONNECTED
| SO_FILT_HINT_DISCONNECTED
|
1408 SO_FILT_HINT_MPFAILOVER
| SO_FILT_HINT_MPSTATUS
|
1409 SO_FILT_HINT_MUSTRST
| SO_FILT_HINT_MPFASTJ
|
1410 SO_FILT_HINT_DELETEOK
| SO_FILT_HINT_MPCANTRCVMORE
);
1413 VERIFY(!(mpts
->mpts_flags
&
1414 (MPTSF_CONNECTING
|MPTSF_CONNECTED
|MPTSF_CONNECT_PENDING
)));
1416 bzero(&mpcr
, sizeof (mpcr
));
1418 mpcr
.mpcr_ifscope
= ifscope
;
1420 * Indicate to the TCP subflow whether or not it should establish
1421 * the initial MPTCP connection, or join an existing one. Fill
1422 * in the connection request structure with additional info needed
1423 * by the underlying TCP (to be used in the TCP options, etc.)
1426 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
1427 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
1428 mptcp_init_local_parms(mp_tp
);
1431 soisconnecting(mp_so
);
1432 mpcr
.mpcr_type
= MPTSUB_CONNREQ_MP_ENABLE
;
1434 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
))
1435 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
1437 /* avoid starting up cellular subflow unless required */
1438 if ((mptcp_delayed_subf_start
) &&
1439 (IFNET_IS_CELLULAR(mpts
->mpts_outif
))) {
1440 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
1443 mpcr
.mpcr_type
= MPTSUB_CONNREQ_MP_ADD
;
1446 /* If fastjoin or fastopen is requested, set state in mpts */
1447 if (mpte
->mpte_nummpcapflows
== 0) {
1448 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1450 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
1451 mpts
->mpts_flags
|= MPTSF_TFO_REQD
;
1452 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
1457 if (so
->so_flags
& SOF_MPTCP_FASTJOIN
) {
1459 if (mp_tp
->mpt_state
== MPTCPS_ESTABLISHED
) {
1460 mpts
->mpts_flags
|= MPTSF_FASTJ_REQD
;
1461 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
1467 mpts
->mpts_mpcr
= mpcr
;
1468 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
1470 if (af
== AF_INET
|| af
== AF_INET6
) {
1471 char dbuf
[MAX_IPv6_STR_LEN
];
1473 mptcplog((LOG_DEBUG
, "MPTCP Socket: %s "
1474 "mp_so 0x%llx dst %s[%d] cid %d "
1475 "[pending %s]\n", __func__
,
1476 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1477 inet_ntop(af
, ((af
== AF_INET
) ?
1478 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
1479 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
),
1480 dbuf
, sizeof (dbuf
)), ((af
== AF_INET
) ?
1481 ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
1482 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
1484 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
1486 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1489 /* connect right away if first attempt, or if join can be done now */
1490 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
))
1491 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
1496 soevent(mp_so
, SO_FILT_HINT_LOCKED
|
1497 SO_FILT_HINT_CONNINFO_UPDATED
);
1503 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
1504 * will no longer be accessible after a subflow is deleted, thus this
1505 * should occur only after the subflow socket has been disconnected.
1506 * If peeloff(2) is called, leave the socket open.
1509 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
, boolean_t close
)
1511 struct socket
*mp_so
, *so
;
1513 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1514 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1517 so
= mpts
->mpts_socket
;
1520 if (close
&& !((mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
1521 (mpts
->mpts_flags
& MPTSF_USER_DISCONNECT
))) {
1523 mptcplog((LOG_DEBUG
, "MPTCP Socket: subflow_del returning"
1524 " mp_so 0x%llx flags %x\n",
1525 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_flags
),
1526 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1530 mptcplog((LOG_DEBUG
, "MPTCP Socket: subflow_del mp_so 0x%llx "
1531 "[u=%d,r=%d] cid %d [close %s] %d %x error %d\n",
1532 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1534 mp_so
->so_retaincnt
, mpts
->mpts_connid
,
1535 (close
? "YES" : "NO"), mpts
->mpts_soerror
,
1538 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1540 VERIFY(mpts
->mpts_mpte
== mpte
);
1541 VERIFY(mpts
->mpts_connid
!= SAE_CONNID_ANY
&&
1542 mpts
->mpts_connid
!= SAE_CONNID_ALL
);
1544 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
1545 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1546 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1547 VERIFY(mpte
->mpte_numflows
!= 0);
1548 mpte
->mpte_numflows
--;
1549 if (mpte
->mpte_active_sub
== mpts
)
1550 mpte
->mpte_active_sub
= NULL
;
1553 * Drop references held by this subflow socket; there
1554 * will be no further upcalls made from this point.
1556 (void) sock_setupcalls(so
, NULL
, NULL
, NULL
, NULL
);
1557 (void) sock_catchevents(so
, NULL
, NULL
, 0);
1559 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
1562 (void) mptcp_subflow_soclose(mpts
, so
);
1564 VERIFY(mp_so
->so_usecount
> 0);
1565 mp_so
->so_usecount
--; /* for subflow socket */
1566 mpts
->mpts_mpte
= NULL
;
1567 mpts
->mpts_socket
= NULL
;
1570 MPTS_REMREF(mpts
); /* for MPTCP subflow list */
1571 MPTS_REMREF(mpts
); /* for subflow socket */
1573 soevent(mp_so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
);
1577 * Disconnect a subflow socket.
1580 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
,
1584 struct mptcb
*mp_tp
;
1587 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1588 MPTS_LOCK_ASSERT_HELD(mpts
);
1590 VERIFY(mpts
->mpts_mpte
== mpte
);
1591 VERIFY(mpts
->mpts_socket
!= NULL
);
1592 VERIFY(mpts
->mpts_connid
!= SAE_CONNID_ANY
&&
1593 mpts
->mpts_connid
!= SAE_CONNID_ALL
);
1595 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|MPTSF_DISCONNECTED
))
1598 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
1601 * If this is coming from disconnectx(2) or issued as part of
1602 * closing the MPTCP socket, the subflow shouldn't stick around.
1603 * Otherwise let it linger around in case the upper layers need
1604 * to retrieve its conninfo.
1607 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
1609 so
= mpts
->mpts_socket
;
1610 mp_tp
= mpte
->mpte_mptcb
;
1612 if (mp_tp
->mpt_state
> MPTCPS_ESTABLISHED
)
1617 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
1618 (so
->so_state
& SS_ISCONNECTED
)) {
1619 mptcplog((LOG_DEBUG
, "MPTCP Socket %s: cid %d fin %d "
1620 "[linger %s]\n", __func__
, mpts
->mpts_connid
, send_dfin
,
1621 (deleteok
? "NO" : "YES")),
1622 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1625 mptcp_send_dfin(so
);
1626 (void) soshutdownlock(so
, SHUT_RD
);
1627 (void) soshutdownlock(so
, SHUT_WR
);
1628 (void) sodisconnectlocked(so
);
1630 socket_unlock(so
, 0);
1632 * Generate a disconnect event for this subflow socket, in case
1633 * the lower layer doesn't do it; this is needed because the
1634 * subflow socket deletion relies on it. This will also end up
1635 * generating SO_FILT_HINT_CONNINFO_UPDATED on the MPTCP socket;
1636 * we cannot do that here because subflow lock is currently held.
1638 mptcp_subflow_eupcall(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
1642 * Subflow socket read upcall.
1644 * Called when the associated subflow socket posted a read event. The subflow
1645 * socket lock has been released prior to invoking the callback. Note that the
1646 * upcall may occur synchronously as a result of MPTCP performing an action on
1647 * it, or asynchronously as a result of an event happening at the subflow layer.
1648 * Therefore, to maintain lock ordering, the only lock that can be acquired
1649 * here is the thread lock, for signalling purposes.
1652 mptcp_subflow_rupcall(struct socket
*so
, void *arg
, int waitf
)
1654 #pragma unused(so, waitf)
1655 struct mptsub
*mpts
= arg
;
1656 struct mptses
*mpte
= mpts
->mpts_mpte
;
1659 * mpte should never be NULL, except in a race with
1665 lck_mtx_lock(&mpte
->mpte_thread_lock
);
1666 mptcp_thread_signal_locked(mpte
);
1667 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
1671 * Subflow socket input.
1673 * Called in the context of the MPTCP thread, for reading data from the
1674 * underlying subflow socket and delivering it to MPTCP.
1677 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
1679 struct mbuf
*m
= NULL
;
1682 struct mptsub
*mpts_alt
= NULL
;
1684 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1685 MPTS_LOCK_ASSERT_HELD(mpts
);
1687 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
1688 struct mptsub
*, mpts
);
1690 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
))
1693 so
= mpts
->mpts_socket
;
1695 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
1696 if (error
!= 0 && error
!= EWOULDBLOCK
) {
1697 mptcplog((LOG_ERR
, "MPTCP Receiver: %s cid %d error %d\n",
1698 __func__
, mpts
->mpts_connid
, error
),
1699 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
1701 mpts_alt
= mptcp_get_subflow(mpte
, mpts
, NULL
);
1702 if (mpts_alt
== NULL
) {
1703 if (mptcp_delayed_subf_start
) {
1704 mpts_alt
= mptcp_get_pending_subflow(mpte
,
1707 mptcplog((LOG_DEBUG
,"MPTCP Receiver:"
1708 " %s: pending %d\n",
1709 __func__
, mpts_alt
->mpts_connid
),
1710 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
1712 mptcplog((LOG_ERR
, "MPTCP Receiver:"
1713 " %s: no pending flow for cid %d",
1714 __func__
, mpts
->mpts_connid
),
1715 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
1718 mptcplog((LOG_ERR
, "MPTCP Receiver: %s: no alt"
1719 " path for cid %d\n", __func__
,
1721 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
1723 if (error
== ENODATA
) {
1725 * Don't ignore ENODATA so as to discover
1726 * nasty middleboxes.
1728 struct socket
*mp_so
=
1729 mpte
->mpte_mppcb
->mpp_socket
;
1730 mp_so
->so_error
= ENODATA
;
1735 } else if (error
== 0) {
1736 mptcplog((LOG_DEBUG
, "MPTCP Receiver: %s: cid %d \n",
1737 __func__
, mpts
->mpts_connid
),
1738 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
1741 /* In fallback, make sure to accept data on all but one subflow */
1742 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1743 (!(mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
1750 /* Did we receive data on the backup subflow? */
1751 if (!(mpts
->mpts_flags
& MPTSF_ACTIVE
))
1752 mpts
->mpts_peerswitch
++;
1754 mpts
->mpts_peerswitch
= 0;
1757 * Release subflow lock since this may trigger MPTCP to send,
1758 * possibly on a different subflow. An extra reference has
1759 * been held on the subflow by the MPTCP thread before coming
1760 * here, so we can be sure that it won't go away, in the event
1761 * the MP socket lock gets released.
1764 mptcp_input(mpte
, m
);
1770 * Subflow socket write upcall.
1772 * Called when the associated subflow socket posted a read event. The subflow
1773 * socket lock has been released prior to invoking the callback. Note that the
1774 * upcall may occur synchronously as a result of MPTCP performing an action on
1775 * it, or asynchronously as a result of an event happening at the subflow layer.
1776 * Therefore, to maintain lock ordering, the only lock that can be acquired
1777 * here is the thread lock, for signalling purposes.
1780 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
1782 #pragma unused(so, waitf)
1783 struct mptsub
*mpts
= arg
;
1784 struct mptses
*mpte
= mpts
->mpts_mpte
;
1787 * mpte should never be NULL except in a race with
1788 * mptcp_subflow_del which doesn't hold socket lock across critical
1789 * section. This upcall is made after releasing the socket lock.
1790 * Interleaving of socket operations becomes possible therefore.
1795 lck_mtx_lock(&mpte
->mpte_thread_lock
);
1796 mptcp_thread_signal_locked(mpte
);
1797 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
1801 * Subflow socket output.
1803 * Called for sending data from MPTCP to the underlying subflow socket.
1806 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
)
1808 struct socket
*mp_so
, *so
;
1809 size_t sb_cc
= 0, tot_sent
= 0;
1811 int error
= 0, wakeup
= 0;
1812 u_int64_t mpt_dsn
= 0;
1813 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1814 struct mbuf
*mpt_mbuf
= NULL
;
1816 struct mbuf
*head
, *tail
;
1817 int tcp_zero_len_write
= 0;
1819 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1820 MPTS_LOCK_ASSERT_HELD(mpts
);
1821 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1822 so
= mpts
->mpts_socket
;
1824 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
1825 struct mptsub
*, mpts
);
1827 /* subflow socket is suspended? */
1828 if (mpts
->mpts_flags
& MPTSF_SUSPENDED
) {
1829 mptcplog((LOG_ERR
, "MPTCP Sender: %s mp_so 0x%llx cid %d is "
1830 "flow controlled\n", __func__
,
1831 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
),
1832 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
1836 /* subflow socket is not MPTCP capable? */
1837 if (!(mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) &&
1838 !(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1839 !(mpts
->mpts_flags
& MPTSF_FASTJ_SEND
) &&
1840 !(mpts
->mpts_flags
& MPTSF_TFO_REQD
)) {
1841 mptcplog((LOG_ERR
, "MPTCP Sender: %s mp_so 0x%llx cid %d not "
1842 "MPTCP capable\n", __func__
,
1843 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
),
1844 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
1848 /* Remove Addr Option is not sent reliably as per I-D */
1849 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
1850 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
1851 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
1852 if (mptcp_remaddr_enable
)
1853 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
1854 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
1857 if (mpts
->mpts_flags
& MPTSF_TFO_REQD
) {
1858 mptcp_drop_tfo_data(mpte
, mpts
, &wakeup
);
1862 * The mbuf chains containing the metadata (as well as pointing to
1863 * the user data sitting at the MPTCP output queue) would then be
1864 * sent down to the subflow socket.
1866 * Some notes on data sequencing:
1868 * a. Each mbuf must be a M_PKTHDR.
1869 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
1870 * in the mbuf pkthdr structure.
1871 * c. Each mbuf containing the MPTCP metadata must have its
1872 * pkt_flags marked with the PKTF_MPTCP flag.
1875 /* First, drop acknowledged data */
1876 sb_mb
= mp_so
->so_snd
.sb_mb
;
1877 if (sb_mb
== NULL
) {
1881 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
1884 while (mpt_mbuf
&& mpt_mbuf
->m_pkthdr
.mp_rlen
== 0) {
1885 if (((so
->so_state
& SS_ISCONNECTED
) == 0) &&
1886 (mpt_mbuf
->m_next
== NULL
) &&
1887 (so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1889 * If TFO, allow connection establishment with zero
1892 tcp_zero_len_write
= 1;
1893 goto zero_len_write
;
1895 mpt_mbuf
= mpt_mbuf
->m_next
;
1897 if (mpt_mbuf
&& (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
1898 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
1904 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
1906 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
1908 sbdrop(&mp_so
->so_snd
, (int)len
);
1914 * In degraded mode, we don't receive data acks, so force free
1915 * mbufs less than snd_nxt
1917 if (mp_so
->so_snd
.sb_mb
== NULL
) {
1922 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
1923 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1924 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
1925 MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_sndnxt
)) {
1927 len
= mp_tp
->mpt_sndnxt
- mpt_dsn
;
1928 sbdrop(&mp_so
->so_snd
, (int)len
);
1930 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndnxt
;
1933 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
1934 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
1935 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
1936 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
1937 if (mp_tp
->mpt_flags
& MPTCPF_RECVD_MPFAIL
)
1938 mpts
->mpts_sndnxt
= mp_tp
->mpt_dsn_at_csum_fail
;
1942 * Adjust the subflow's notion of next byte to send based on
1943 * the last unacknowledged byte
1945 if (MPTCP_SEQ_LT(mpts
->mpts_sndnxt
, mp_tp
->mpt_snduna
)) {
1946 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
1950 * Adjust the top level notion of next byte used for retransmissions
1953 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
1954 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
1958 /* Now determine the offset from which to start transmitting data */
1959 sb_mb
= mp_so
->so_snd
.sb_mb
;
1960 sb_cc
= mp_so
->so_snd
.sb_cc
;
1961 if (sb_mb
== NULL
) {
1965 if (MPTCP_SEQ_LT(mpts
->mpts_sndnxt
, mp_tp
->mpt_sndmax
)) {
1966 off
= mpts
->mpts_sndnxt
- mp_tp
->mpt_snduna
;
1967 sb_cc
-= (size_t)off
;
1976 while (mpt_mbuf
&& ((mpt_mbuf
->m_pkthdr
.mp_rlen
== 0) ||
1977 (mpt_mbuf
->m_pkthdr
.mp_rlen
<= (u_int32_t
)off
))) {
1978 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
1979 mpt_mbuf
= mpt_mbuf
->m_next
;
1981 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
1982 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s cid = %d "
1983 "snduna = %llu sndnxt = %llu probe %d\n",
1984 __func__
, mpts
->mpts_connid
,
1985 mp_tp
->mpt_snduna
, mpts
->mpts_sndnxt
,
1986 mpts
->mpts_probecnt
),
1987 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
1989 VERIFY((mpt_mbuf
== NULL
) || (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
1993 while (tot_sent
< sb_cc
) {
1997 mlen
= mpt_mbuf
->m_pkthdr
.mp_rlen
;
2003 panic("%s: unexpected %lu %lu \n", __func__
,
2007 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
2008 M_COPYM_MUST_COPY_HDR
);
2014 /* Create a DSN mapping for the data (m_copym does it) */
2015 mpt_dsn
= mpt_mbuf
->m_pkthdr
.mp_dsn
;
2016 VERIFY(m
->m_flags
& M_PKTHDR
);
2017 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
2018 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
2019 m
->m_pkthdr
.mp_dsn
= mpt_dsn
+ off
;
2020 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
2021 m
->m_pkthdr
.mp_rlen
= mlen
;
2022 mpts
->mpts_rel_seq
+= mlen
;
2023 m
->m_pkthdr
.len
= mlen
;
2034 mpt_mbuf
= mpt_mbuf
->m_next
;
2038 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
2040 if ((mpts
->mpts_flags
& MPTSF_TFO_REQD
) &&
2041 (tp
->t_tfo_stats
== 0)) {
2042 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
2043 } else if (mpts
->mpts_flags
& MPTSF_FASTJ_SEND
) {
2044 tp
->t_mpflags
|= TMPF_FASTJOIN_SEND
;
2047 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
2049 DTRACE_MPTCP7(send
, struct mbuf
*, head
, struct socket
*, so
,
2050 struct sockbuf
*, &so
->so_rcv
,
2051 struct sockbuf
*, &so
->so_snd
,
2052 struct mptses
*, mpte
, struct mptsub
*, mpts
,
2054 } else if (tcp_zero_len_write
== 1) {
2057 /* Opting to call pru_send as no mbuf at subflow level */
2058 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)
2059 (so
, 0, NULL
, NULL
, NULL
, current_proc());
2060 socket_unlock(so
, 1);
2063 if ((error
== 0) || (error
== EWOULDBLOCK
)) {
2064 mpts
->mpts_sndnxt
+= tot_sent
;
2066 if (mpts
->mpts_probesoon
&& mpts
->mpts_maxseg
&& tot_sent
) {
2067 tcpstat
.tcps_mp_num_probes
++;
2068 if (tot_sent
< mpts
->mpts_maxseg
)
2069 mpts
->mpts_probecnt
+= 1;
2071 mpts
->mpts_probecnt
+=
2072 tot_sent
/mpts
->mpts_maxseg
;
2077 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mpts
->mpts_sndnxt
)) {
2078 if (MPTCP_DATASEQ_HIGH32(mpts
->mpts_sndnxt
) >
2079 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
))
2080 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
2081 mp_tp
->mpt_sndnxt
= mpts
->mpts_sndnxt
;
2083 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
2086 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
)
2087 so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;
2089 /* Send once in SYN_SENT state to avoid sending SYN spam */
2090 if (mpts
->mpts_flags
& MPTSF_FASTJ_SEND
) {
2091 so
->so_flags
&= ~SOF_MPTCP_FASTJOIN
;
2092 mpts
->mpts_flags
&= ~MPTSF_FASTJ_SEND
;
2095 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2096 (mpts
->mpts_probesoon
!= 0))
2097 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s cid %d "
2098 "wrote %d %d probe %d probedelta %d\n",
2099 __func__
, mpts
->mpts_connid
, (int)tot_sent
,
2100 (int) sb_cc
, mpts
->mpts_probecnt
,
2101 (tcp_now
- mpts
->mpts_probesoon
)),
2102 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
2104 mptcplog((LOG_ERR
, "MPTCP Sender: %s cid %d error %d len %zd\n",
2105 __func__
, mpts
->mpts_connid
, error
, tot_sent
),
2106 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2116 * Subflow socket control event upcall.
2118 * Called when the associated subflow socket posted one or more control events.
2119 * The subflow socket lock has been released prior to invoking the callback.
2120 * Note that the upcall may occur synchronously as a result of MPTCP performing
2121 * an action on it, or asynchronously as a result of an event happening at the
2122 * subflow layer. Therefore, to maintain lock ordering, the only lock that can
2123 * be acquired here is the thread lock, for signalling purposes.
2126 mptcp_subflow_eupcall(struct socket
*so
, void *arg
, uint32_t events
)
2129 struct mptsub
*mpts
= arg
;
2130 struct mptses
*mpte
= mpts
->mpts_mpte
;
2132 VERIFY(mpte
!= NULL
);
2134 lck_mtx_lock(&mpte
->mpte_thread_lock
);
2135 atomic_bitset_32(&mpts
->mpts_evctl
, events
);
2136 mptcp_thread_signal_locked(mpte
);
2137 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
2141 * Subflow socket control events.
2143 * Called for handling events related to the underlying subflow socket.
2146 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
,
2147 uint64_t *p_mpsofilt_hint
)
2149 uint32_t events
, save_events
;
2150 ev_ret_t ret
= MPTS_EVRET_OK
;
2152 int mpsub_ev_entry_count
= sizeof(mpsub_ev_entry_tbl
)/
2153 sizeof(mpsub_ev_entry_tbl
[0]);
2154 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2155 MPTS_LOCK_ASSERT_HELD(mpts
);
2157 /* bail if there's nothing to process */
2158 if ((events
= mpts
->mpts_evctl
) == 0)
2161 if (events
& (SO_FILT_HINT_CONNRESET
|SO_FILT_HINT_MUSTRST
|
2162 SO_FILT_HINT_CANTRCVMORE
|SO_FILT_HINT_CANTSENDMORE
|
2163 SO_FILT_HINT_TIMEOUT
|SO_FILT_HINT_NOSRCADDR
|
2164 SO_FILT_HINT_IFDENIED
|SO_FILT_HINT_SUSPEND
|
2165 SO_FILT_HINT_DISCONNECTED
)) {
2166 events
|= SO_FILT_HINT_MPFAILOVER
;
2169 save_events
= events
;
2171 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
2172 struct mptsub
*, mpts
, uint32_t, events
);
2174 mptcplog((LOG_DEBUG
, "MPTCP Events: %s cid %d events=%b\n", __func__
,
2175 mpts
->mpts_connid
, events
, SO_FILT_HINT_BITS
),
2176 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
2179 * Process all the socket filter hints and reset the hint
2180 * once it is handled
2182 for (i
= 0; (i
< mpsub_ev_entry_count
) && events
; i
++) {
2184 * Always execute the DISCONNECTED event, because it will wakeup
2187 if ((events
& mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
) &&
2188 (ret
>= MPTS_EVRET_OK
||
2189 mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
== SO_FILT_HINT_DISCONNECTED
)) {
2191 mpsub_ev_entry_tbl
[i
].sofilt_hint_ev_hdlr(mpte
, mpts
, p_mpsofilt_hint
);
2192 events
&= ~mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
;
2193 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
2198 * We should be getting only events specified via sock_catchevents(),
2199 * so loudly complain if we have any unprocessed one(s).
2201 if (events
!= 0 || ret
< MPTS_EVRET_OK
) {
2202 mptcplog((LOG_ERR
, "MPTCP Events %s%s: cid %d evret %s (%d)"
2203 " unhandled events=%b\n",
2204 (events
!= 0) && (ret
== MPTS_EVRET_OK
) ? "MPTCP_ERROR " : "",
2205 __func__
, mpts
->mpts_connid
,
2206 mptcp_evret2str(ret
), ret
, events
, SO_FILT_HINT_BITS
),
2207 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2210 /* clear the ones we've processed */
2211 atomic_bitclear_32(&mpts
->mpts_evctl
, save_events
);
2216 * Handle SO_FILT_HINT_CONNRESET subflow socket event.
2219 mptcp_subflow_connreset_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2220 uint64_t *p_mpsofilt_hint
)
2222 struct socket
*mp_so
, *so
;
2223 struct mptcb
*mp_tp
;
2226 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2227 MPTS_LOCK_ASSERT_HELD(mpts
);
2228 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2229 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2230 mp_tp
= mpte
->mpte_mptcb
;
2231 so
= mpts
->mpts_socket
;
2233 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2234 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2236 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2237 "%s: cid %d [linger %s]\n", __func__
,
2238 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2239 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2242 * We got a TCP RST for this subflow connection.
2244 * Right now, we simply propagate ECONNREFUSED to the MPTCP socket
2245 * client if the MPTCP connection has not been established or
2246 * if the connection has only one subflow and is a connection being
2247 * resumed. Otherwise we close the socket.
2249 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2252 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2253 mpts
->mpts_soerror
= mp_so
->so_error
= ECONNREFUSED
;
2254 } else if (mpte
->mpte_nummpcapflows
< 1 ||
2255 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) &&
2256 (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
2257 mpts
->mpts_soerror
= mp_so
->so_error
= ECONNRESET
;
2258 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNRESET
;
2263 * Keep the subflow socket around, unless the MPTCP socket has
2264 * been detached or the subflow has been disconnected explicitly,
2265 * in which case it should be deleted right away.
2267 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2271 * Handle SO_FILT_HINT_CANTRCVMORE subflow socket event.
2274 mptcp_subflow_cantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2275 uint64_t *p_mpsofilt_hint
)
2277 struct mptcb
*mp_tp
;
2280 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2281 MPTS_LOCK_ASSERT_HELD(mpts
);
2283 mp_tp
= mpte
->mpte_mptcb
;
2284 so
= mpts
->mpts_socket
;
2286 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2287 "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
2288 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2291 * A FIN on a fallen back MPTCP-connection should be treated like a
2295 if ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) &&
2296 (mpts
->mpts_flags
& MPTSF_ACTIVE
)) {
2297 mptcp_close_fsm(mp_tp
, MPCE_RECV_DATA_FIN
);
2298 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
) {
2299 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CANTRCVMORE
;
2304 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2308 * Handle SO_FILT_HINT_CANTSENDMORE subflow socket event.
2311 mptcp_subflow_cantsendmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2312 uint64_t *p_mpsofilt_hint
)
2314 #pragma unused(p_mpsofilt_hint)
2317 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2318 MPTS_LOCK_ASSERT_HELD(mpts
);
2320 so
= mpts
->mpts_socket
;
2322 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2323 "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
2324 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2326 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2330 * Handle SO_FILT_HINT_TIMEOUT subflow socket event.
2333 mptcp_subflow_timeout_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2334 uint64_t *p_mpsofilt_hint
)
2336 #pragma unused(p_mpsofilt_hint)
2337 struct socket
*mp_so
, *so
;
2338 struct mptcb
*mp_tp
;
2341 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2342 MPTS_LOCK_ASSERT_HELD(mpts
);
2343 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2344 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2345 mp_tp
= mpte
->mpte_mptcb
;
2346 so
= mpts
->mpts_socket
;
2348 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2349 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2351 mptcplog((LOG_NOTICE
, "MPTCP Events: "
2352 "%s: cid %d [linger %s]\n", __func__
,
2353 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2354 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2356 if (mpts
->mpts_soerror
== 0)
2357 mpts
->mpts_soerror
= ETIMEDOUT
;
2360 * The subflow connection has timed out.
2362 * Right now, we simply propagate ETIMEDOUT to the MPTCP socket
2363 * client if the MPTCP connection has not been established. Otherwise
2366 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2369 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2370 mp_so
->so_error
= ETIMEDOUT
;
2375 * Keep the subflow socket around, unless the MPTCP socket has
2376 * been detached or the subflow has been disconnected explicitly,
2377 * in which case it should be deleted right away.
2379 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2383 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
2386 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2387 uint64_t *p_mpsofilt_hint
)
2389 #pragma unused(p_mpsofilt_hint)
2390 struct socket
*mp_so
, *so
;
2391 struct mptcb
*mp_tp
;
2393 struct tcpcb
*tp
= NULL
;
2395 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2396 MPTS_LOCK_ASSERT_HELD(mpts
);
2398 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2399 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2400 mp_tp
= mpte
->mpte_mptcb
;
2401 so
= mpts
->mpts_socket
;
2403 /* Not grabbing socket lock as t_local_aid is write once only */
2404 tp
= intotcpcb(sotoinpcb(so
));
2406 * This overwrites any previous mpte_lost_aid to avoid storing
2407 * too much state when the typical case has only two subflows.
2409 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
2410 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
2412 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2413 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2415 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2416 "%s cid %d [linger %s]\n", __func__
,
2417 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2418 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2420 if (mpts
->mpts_soerror
== 0)
2421 mpts
->mpts_soerror
= EADDRNOTAVAIL
;
2424 * The subflow connection has lost its source address.
2426 * Right now, we simply propagate EADDRNOTAVAIL to the MPTCP socket
2427 * client if the MPTCP connection has not been established. If it
2428 * has been established with one subflow , we keep the MPTCP
2429 * connection valid without any subflows till closed by application.
2430 * This lets tcp connection manager decide whether to close this or
2431 * not as it reacts to reachability changes too.
2433 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2436 if ((mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) &&
2437 (mp_so
->so_flags
& SOF_NOADDRAVAIL
)) {
2438 mp_so
->so_error
= EADDRNOTAVAIL
;
2443 * Keep the subflow socket around, unless the MPTCP socket has
2444 * been detached or the subflow has been disconnected explicitly,
2445 * in which case it should be deleted right away.
2447 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2451 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
2452 * indicates that the remote side sent a Data FIN
2455 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2456 uint64_t *p_mpsofilt_hint
)
2458 struct socket
*so
, *mp_so
;
2459 struct mptcb
*mp_tp
;
2461 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2462 MPTS_LOCK_ASSERT_HELD(mpts
);
2463 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2464 so
= mpts
->mpts_socket
;
2465 mp_tp
= mpte
->mpte_mptcb
;
2467 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2468 "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
2469 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2472 * We got a Data FIN for the MPTCP connection.
2473 * The FIN may arrive with data. The data is handed up to the
2474 * mptcp socket and the user is notified so that it may close
2475 * the socket if needed.
2478 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
)
2479 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CANTRCVMORE
;
2482 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2486 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
2489 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2490 uint64_t *p_mpsofilt_hint
)
2492 struct mptsub
*mpts_alt
= NULL
;
2493 struct socket
*so
= NULL
;
2494 struct socket
*mp_so
;
2495 int altpath_exists
= 0;
2497 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2498 MPTS_LOCK_ASSERT_HELD(mpts
);
2499 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2500 mptcplog((LOG_NOTICE
, "MPTCP Events: "
2501 "%s: mp_so 0x%llx\n", __func__
,
2502 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
2503 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2506 mpts_alt
= mptcp_get_subflow(mpte
, mpts
, NULL
);
2509 * If there is no alternate eligible subflow, ignore the
2512 if (mpts_alt
== NULL
) {
2513 mptcplog((LOG_WARNING
, "MPTCP Events: "
2514 "%s: no alternate path\n", __func__
),
2515 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2517 if (mptcp_delayed_subf_start
) {
2518 mpts_alt
= mptcp_get_pending_subflow(mpte
, mpts
);
2519 if (mpts_alt
!= NULL
) {
2520 MPTS_LOCK(mpts_alt
);
2521 (void) mptcp_subflow_soconnectx(mpte
,
2523 MPTS_UNLOCK(mpts_alt
);
2529 MPTS_LOCK(mpts_alt
);
2531 so
= mpts_alt
->mpts_socket
;
2532 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
2534 /* All data acknowledged and no RTT spike */
2535 if ((so
->so_snd
.sb_cc
== 0) &&
2536 (mptcp_no_rto_spike(so
))) {
2537 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
2538 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
2540 /* no alternate path available */
2543 socket_unlock(so
, 1);
2545 if (altpath_exists
) {
2546 mptcplog((LOG_INFO
, "MPTCP Events: "
2548 __func__
, mpts_alt
->mpts_connid
),
2549 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2550 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
2551 mpts_alt
->mpts_peerswitch
= 0;
2552 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2553 /* Bring the subflow's notion of snd_nxt into the send window */
2555 mpts_alt
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2557 mpte
->mpte_active_sub
= mpts_alt
;
2560 socket_unlock(so
, 1);
2562 MPTS_UNLOCK(mpts_alt
);
2564 if (altpath_exists
) {
2565 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
;
2566 mptcplog((LOG_NOTICE
, "MPTCP Events: "
2567 "%s: mp_so 0x%llx switched from "
2568 "%d to %d\n", __func__
,
2569 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2570 mpts
->mpts_connid
, mpts_alt
->mpts_connid
),
2571 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2572 tcpstat
.tcps_mp_switches
++;
2576 if (altpath_exists
) {
2577 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
2578 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
2580 mptcplog((LOG_DEBUG
, "MPTCP Events %s: no alt cid = %d\n",
2581 __func__
, mpts
->mpts_connid
),
2582 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2584 so
= mpts
->mpts_socket
;
2586 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
2587 socket_unlock(so
, 1);
2589 MPTS_LOCK_ASSERT_HELD(mpts
);
2590 return (MPTS_EVRET_OK
);
2594 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
2597 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2598 uint64_t *p_mpsofilt_hint
)
2600 struct socket
*mp_so
, *so
;
2601 struct mptcb
*mp_tp
;
2604 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2605 MPTS_LOCK_ASSERT_HELD(mpts
);
2606 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2607 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2608 mp_tp
= mpte
->mpte_mptcb
;
2609 so
= mpts
->mpts_socket
;
2611 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
2612 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
2614 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2615 "%s: cid %d [linger %s]\n", __func__
,
2616 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
2617 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2619 if (mpts
->mpts_soerror
== 0)
2620 mpts
->mpts_soerror
= EHOSTUNREACH
;
2623 * The subflow connection cannot use the outgoing interface.
2625 * Right now, we simply propagate EHOSTUNREACH to the MPTCP socket
2626 * client if the MPTCP connection has not been established. If it
2627 * has been established, let the upper layer call disconnectx.
2629 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
2630 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_IFDENIED
;
2633 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2634 mp_so
->so_error
= EHOSTUNREACH
;
2639 * Keep the subflow socket around, unless the MPTCP socket has
2640 * been detached or the subflow has been disconnected explicitly,
2641 * in which case it should be deleted right away.
2643 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
2647 * Handle SO_FILT_HINT_SUSPEND subflow socket event.
2650 mptcp_subflow_suspend_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2651 uint64_t *p_mpsofilt_hint
)
2653 #pragma unused(p_mpsofilt_hint)
2656 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2657 MPTS_LOCK_ASSERT_HELD(mpts
);
2659 so
= mpts
->mpts_socket
;
2661 /* the subflow connection is being flow controlled */
2662 mpts
->mpts_flags
|= MPTSF_SUSPENDED
;
2664 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2665 "%s: cid %d\n", __func__
,
2666 mpts
->mpts_connid
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2668 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2672 * Handle SO_FILT_HINT_RESUME subflow socket event.
2675 mptcp_subflow_resume_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2676 uint64_t *p_mpsofilt_hint
)
2678 #pragma unused(p_mpsofilt_hint)
2681 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2682 MPTS_LOCK_ASSERT_HELD(mpts
);
2684 so
= mpts
->mpts_socket
;
2686 /* the subflow connection is no longer flow controlled */
2687 mpts
->mpts_flags
&= ~MPTSF_SUSPENDED
;
2689 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2690 "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
2691 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2693 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
2697 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
2700 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
2701 uint64_t *p_mpsofilt_hint
)
2703 char buf0
[MAX_IPv6_STR_LEN
], buf1
[MAX_IPv6_STR_LEN
];
2704 struct sockaddr_entry
*src_se
, *dst_se
;
2705 struct sockaddr_storage src
;
2706 struct socket
*mp_so
, *so
;
2707 struct mptcb
*mp_tp
;
2708 struct ifnet
*outifp
;
2710 boolean_t mpok
= FALSE
;
2711 boolean_t cell
= FALSE
;
2712 boolean_t wifi
= FALSE
;
2713 boolean_t wired
= FALSE
;
2715 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2716 VERIFY(mpte
->mpte_mppcb
!= NULL
);
2717 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
2718 mp_tp
= mpte
->mpte_mptcb
;
2720 MPTS_LOCK_ASSERT_HELD(mpts
);
2721 so
= mpts
->mpts_socket
;
2722 af
= mpts
->mpts_family
;
2724 if (mpts
->mpts_flags
& MPTSF_CONNECTED
)
2725 return (MPTS_EVRET_OK
);
2727 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
2728 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
2730 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2731 (so
->so_state
& SS_ISCONNECTED
)) {
2732 mptcplog((LOG_DEBUG
, "MPTCP Events: "
2733 "%s: cid %d disconnect before tcp connect\n",
2734 __func__
, mpts
->mpts_connid
),
2735 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
2736 (void) soshutdownlock(so
, SHUT_RD
);
2737 (void) soshutdownlock(so
, SHUT_WR
);
2738 (void) sodisconnectlocked(so
);
2740 socket_unlock(so
, 0);
2741 return (MPTS_EVRET_OK
);
2745 * The subflow connection has been connected. Find out whether it
2746 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
2748 * a. If MPTCP connection is not yet established, then this must be
2749 * the first subflow connection. If MPTCP failed to negotiate,
2750 * indicate to the MPTCP socket client via EPROTO, that the
2751 * underlying TCP connection may be peeled off via peeloff(2).
2752 * Otherwise, mark the MPTCP socket as connected.
2754 * b. If MPTCP connection has been established, then this must be
2755 * one of the subsequent subflow connections. If MPTCP failed
2756 * to negotiate, disconnect the connection since peeloff(2)
2757 * is no longer possible.
2759 * Right now, we simply unblock any waiters at the MPTCP socket layer
2760 * if the MPTCP connection has not been established.
2764 if (so
->so_state
& SS_ISDISCONNECTED
) {
2766 * With MPTCP joins, a connection is connected at the subflow
2767 * level, but the 4th ACK from the server elevates the MPTCP
2768 * subflow to connected state. So there is a small window
2769 * where the subflow could get disconnected before the
2770 * connected event is processed.
2772 socket_unlock(so
, 0);
2773 return (MPTS_EVRET_OK
);
2776 mpts
->mpts_soerror
= 0;
2777 mpts
->mpts_flags
&= ~MPTSF_CONNECTING
;
2778 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
2780 if (!(so
->so_flags1
& SOF1_DATA_IDEMPOTENT
))
2781 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
2783 struct tcpcb
*tp
= sototcpcb(so
);
2784 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
)
2785 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
2787 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
2789 VERIFY(mpts
->mpts_dst_sl
!= NULL
);
2790 dst_se
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
);
2791 VERIFY(dst_se
!= NULL
&& dst_se
->se_addr
!= NULL
&&
2792 dst_se
->se_addr
->sa_family
== af
);
2794 VERIFY(mpts
->mpts_src_sl
!= NULL
);
2795 src_se
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
);
2796 VERIFY(src_se
!= NULL
&& src_se
->se_addr
!= NULL
&&
2797 src_se
->se_addr
->sa_family
== af
);
2799 /* get/check source IP address */
2802 error
= in_getsockaddr_s(so
, &src
);
2804 struct sockaddr_in
*ms
= SIN(src_se
->se_addr
);
2805 struct sockaddr_in
*s
= SIN(&src
);
2807 VERIFY(s
->sin_len
== ms
->sin_len
);
2808 VERIFY(ms
->sin_family
== AF_INET
);
2810 if ((mpts
->mpts_flags
& MPTSF_BOUND_IP
) &&
2811 bcmp(&ms
->sin_addr
, &s
->sin_addr
,
2812 sizeof (ms
->sin_addr
)) != 0) {
2813 mptcplog((LOG_ERR
, "MPTCP Events: "
2815 "address %s (expected %s)\n", __func__
,
2816 mpts
->mpts_connid
, inet_ntop(AF_INET
,
2817 (void *)&s
->sin_addr
.s_addr
, buf0
,
2818 sizeof (buf0
)), inet_ntop(AF_INET
,
2819 (void *)&ms
->sin_addr
.s_addr
, buf1
,
2821 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2823 bcopy(s
, ms
, sizeof (*s
));
2829 error
= in6_getsockaddr_s(so
, &src
);
2831 struct sockaddr_in6
*ms
= SIN6(src_se
->se_addr
);
2832 struct sockaddr_in6
*s
= SIN6(&src
);
2834 VERIFY(s
->sin6_len
== ms
->sin6_len
);
2835 VERIFY(ms
->sin6_family
== AF_INET6
);
2837 if ((mpts
->mpts_flags
& MPTSF_BOUND_IP
) &&
2838 bcmp(&ms
->sin6_addr
, &s
->sin6_addr
,
2839 sizeof (ms
->sin6_addr
)) != 0) {
2840 mptcplog((LOG_ERR
, "MPTCP Events: "
2842 "address %s (expected %s)\n", __func__
,
2843 mpts
->mpts_connid
, inet_ntop(AF_INET6
,
2844 (void *)&s
->sin6_addr
, buf0
,
2845 sizeof (buf0
)), inet_ntop(AF_INET6
,
2846 (void *)&ms
->sin6_addr
, buf1
,
2848 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2850 bcopy(s
, ms
, sizeof (*s
));
2861 mptcplog((LOG_ERR
, "MPTCP Events "
2862 "%s: cid %d getsockaddr failed (%d)\n",
2863 __func__
, mpts
->mpts_connid
, error
),
2864 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2867 /* get/verify the outbound interface */
2868 outifp
= sotoinpcb(so
)->inp_last_outifp
; /* could be NULL */
2869 if (mpts
->mpts_flags
& MPTSF_BOUND_IF
) {
2870 VERIFY(mpts
->mpts_outif
!= NULL
);
2871 if (mpts
->mpts_outif
!= outifp
) {
2872 mptcplog((LOG_ERR
, "MPTCP Events: %s: cid %d outif %s "
2873 "(expected %s)\n", __func__
, mpts
->mpts_connid
,
2874 ((outifp
!= NULL
) ? outifp
->if_xname
: "NULL"),
2875 mpts
->mpts_outif
->if_xname
),
2876 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_ERR
);
2879 outifp
= mpts
->mpts_outif
;
2882 mpts
->mpts_outif
= outifp
;
2885 mpts
->mpts_srtt
= (intotcpcb(sotoinpcb(so
)))->t_srtt
;
2886 mpts
->mpts_rxtcur
= (intotcpcb(sotoinpcb(so
)))->t_rxtcur
;
2887 mpts
->mpts_maxseg
= (intotcpcb(sotoinpcb(so
)))->t_maxseg
;
2889 cell
= IFNET_IS_CELLULAR(mpts
->mpts_outif
);
2890 wifi
= (!cell
&& IFNET_IS_WIFI(mpts
->mpts_outif
));
2891 wired
= (!wifi
&& IFNET_IS_WIRED(mpts
->mpts_outif
));
2894 mpts
->mpts_linktype
|= MPTSL_CELL
;
2896 mpts
->mpts_linktype
|= MPTSL_WIFI
;
2898 mpts
->mpts_linktype
|= MPTSL_WIRED
;
2900 socket_unlock(so
, 0);
2902 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s: cid %d "
2903 "establishment srtt %d \n", __func__
,
2904 mpts
->mpts_connid
, (mpts
->mpts_srtt
>> 5)),
2905 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
2908 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
2909 "%s: cid %d outif %s %s[%d] -> %s[%d] "
2910 "is %s\n", __func__
, mpts
->mpts_connid
, ((outifp
!= NULL
) ?
2911 outifp
->if_xname
: "NULL"), inet_ntop(af
, (af
== AF_INET
) ?
2912 (void *)&SIN(src_se
->se_addr
)->sin_addr
.s_addr
:
2913 (void *)&SIN6(src_se
->se_addr
)->sin6_addr
, buf0
, sizeof (buf0
)),
2914 ((af
== AF_INET
) ? ntohs(SIN(src_se
->se_addr
)->sin_port
) :
2915 ntohs(SIN6(src_se
->se_addr
)->sin6_port
)),
2916 inet_ntop(af
, ((af
== AF_INET
) ?
2917 (void *)&SIN(dst_se
->se_addr
)->sin_addr
.s_addr
:
2918 (void *)&SIN6(dst_se
->se_addr
)->sin6_addr
), buf1
, sizeof (buf1
)),
2919 ((af
== AF_INET
) ? ntohs(SIN(dst_se
->se_addr
)->sin_port
) :
2920 ntohs(SIN6(dst_se
->se_addr
)->sin6_port
)),
2921 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ?
2922 "MPTCP capable" : "a regular TCP")),
2923 (MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
), MPTCP_LOGLVL_LOG
);
2925 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
2928 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
;
2931 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
2932 /* case (a) above */
2934 mp_tp
->mpt_flags
|= MPTCPF_PEEL_OFF
;
2935 (void) mptcp_drop(mpte
, mp_tp
, EPROTO
);
2939 mptcplog((LOG_DEBUG
, "MPTCP State: "
2940 "MPTCPS_ESTABLISHED for mp_so 0x%llx \n",
2941 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
2942 MPTCP_STATE_DBG
, MPTCP_LOGLVL_LOG
);
2943 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
2944 mpte
->mpte_associd
= mpts
->mpts_connid
;
2945 DTRACE_MPTCP2(state__change
,
2946 struct mptcb
*, mp_tp
,
2947 uint32_t, 0 /* event */);
2949 if (mpts
->mpts_outif
&&
2950 IFNET_IS_EXPENSIVE(mpts
->mpts_outif
)) {
2951 sototcpcb(so
)->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
2953 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
2955 soisconnected(mp_so
);
2959 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
2960 mpte
->mpte_nummpcapflows
++;
2961 MPT_LOCK_SPIN(mp_tp
);
2962 /* With TFO, sndnxt may be initialized earlier */
2963 if (mpts
->mpts_sndnxt
== 0)
2964 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2969 if (mptcp_rwnotify
&& (mpte
->mpte_nummpcapflows
== 0)) {
2970 /* Experimental code, disabled by default. */
2976 * In case of additional flows, the MPTCP socket is not
2977 * MPTSF_MP_CAPABLE until an ACK is received from server
2978 * for 3-way handshake. TCP would have guaranteed that this
2979 * is an MPTCP subflow.
2982 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
2983 mpts
->mpts_flags
&= ~MPTSF_FASTJ_REQD
;
2984 mpte
->mpte_nummpcapflows
++;
2985 MPT_LOCK_SPIN(mp_tp
);
2986 /* With Fastjoin, sndnxt is updated before connected_ev */
2987 if (mpts
->mpts_sndnxt
== 0) {
2988 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
2989 mpts
->mpts_rel_seq
= 1;
2992 mptcp_output_needed(mpte
, mpts
);
2998 MPTS_LOCK_ASSERT_HELD(mpts
);
3000 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
3004 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
3007 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3008 uint64_t *p_mpsofilt_hint
)
3010 struct socket
*mp_so
, *so
;
3011 struct mptcb
*mp_tp
;
3014 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3015 MPTS_LOCK_ASSERT_HELD(mpts
);
3016 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3017 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3018 mp_tp
= mpte
->mpte_mptcb
;
3019 so
= mpts
->mpts_socket
;
3021 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
3022 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
3024 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3025 "%s: cid %d [linger %s]\n", __func__
,
3026 mpts
->mpts_connid
, (linger
? "YES" : "NO")),
3027 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3029 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
3030 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
3033 * Clear flags that are used by getconninfo to return state.
3034 * Retain like MPTSF_DELETEOK for internal purposes.
3036 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
|MPTSF_CONNECT_PENDING
|
3037 MPTSF_CONNECTED
|MPTSF_DISCONNECTING
|MPTSF_PREFERRED
|
3038 MPTSF_MP_CAPABLE
|MPTSF_MP_READY
|MPTSF_MP_DEGRADED
|
3039 MPTSF_SUSPENDED
|MPTSF_ACTIVE
);
3040 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
3043 * The subflow connection has been disconnected.
3045 * Right now, we simply unblock any waiters at the MPTCP socket layer
3046 * if the MPTCP connection has not been established.
3048 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
;
3050 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
3051 mpte
->mpte_nummpcapflows
--;
3052 if (mpte
->mpte_active_sub
== mpts
) {
3053 mpte
->mpte_active_sub
= NULL
;
3054 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3055 "%s: resetting active subflow \n",
3056 __func__
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3058 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
3062 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
3065 soisdisconnected(mp_so
);
3072 * The underlying subflow socket has been disconnected;
3073 * it is no longer useful to us. Keep the subflow socket
3074 * around, unless the MPTCP socket has been detached or
3075 * the subflow has been disconnected explicitly, in which
3076 * case it should be deleted right away.
3078 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
3082 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
3085 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3086 uint64_t *p_mpsofilt_hint
)
3088 struct socket
*mp_so
, *so
;
3089 struct mptcb
*mp_tp
;
3090 ev_ret_t ret
= MPTS_EVRET_OK
;
3092 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3093 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3094 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3095 mp_tp
= mpte
->mpte_mptcb
;
3097 MPTS_LOCK_ASSERT_HELD(mpts
);
3098 so
= mpts
->mpts_socket
;
3103 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
)
3104 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
3106 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
3108 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
3109 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
3111 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
3114 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
3116 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
)
3117 mpts
->mpts_flags
|= MPTSF_MP_READY
;
3119 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
3121 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3122 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
3123 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
3126 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
3127 VERIFY(!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
));
3128 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
3129 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
|
3130 SO_FILT_HINT_CONNINFO_UPDATED
;
3131 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
3132 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
3133 ret
= MPTS_EVRET_CONNECT_PENDING
;
3135 *p_mpsofilt_hint
|= SO_FILT_HINT_LOCKED
|
3136 SO_FILT_HINT_CONNINFO_UPDATED
;
3139 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3140 "%s: mp_so 0x%llx mpt_flags=%b cid %d "
3141 "mptsf=%b\n", __func__
,
3142 (u_int64_t
)VM_KERNEL_ADDRPERM(mpte
->mpte_mppcb
->mpp_socket
),
3143 mp_tp
->mpt_flags
, MPTCPF_BITS
, mpts
->mpts_connid
,
3144 mpts
->mpts_flags
, MPTSF_BITS
),
3145 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3149 socket_unlock(so
, 0);
3154 * Handle SO_FILT_HINT_MUSTRST subflow socket event
3157 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3158 uint64_t *p_mpsofilt_hint
)
3160 struct socket
*mp_so
, *so
;
3161 struct mptcb
*mp_tp
;
3162 boolean_t linger
, is_fastclose
;
3165 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3166 MPTS_LOCK_ASSERT_HELD(mpts
);
3167 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3168 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3169 mp_tp
= mpte
->mpte_mptcb
;
3170 so
= mpts
->mpts_socket
;
3172 linger
= (!(mpts
->mpts_flags
& MPTSF_DELETEOK
) &&
3173 !(mp_so
->so_flags
& SOF_PCBCLEARING
));
3175 if (mpts
->mpts_soerror
== 0)
3176 mpts
->mpts_soerror
= ECONNABORTED
;
3178 /* We got an invalid option or a fast close */
3180 struct tcptemp
*t_template
;
3181 struct inpcb
*inp
= sotoinpcb(so
);
3182 struct tcpcb
*tp
= NULL
;
3184 tp
= intotcpcb(inp
);
3185 so
->so_error
= ECONNABORTED
;
3187 is_fastclose
= !!(tp
->t_mpflags
& TMPF_FASTCLOSERCV
);
3189 t_template
= tcp_maketemplate(tp
);
3191 struct tcp_respond_args tra
;
3193 bzero(&tra
, sizeof(tra
));
3194 if (inp
->inp_flags
& INP_BOUND_IF
)
3195 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
3197 tra
.ifscope
= IFSCOPE_NONE
;
3198 tra
.awdl_unrestricted
= 1;
3200 tcp_respond(tp
, t_template
->tt_ipgen
,
3201 &t_template
->tt_t
, (struct mbuf
*)NULL
,
3202 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
3203 (void) m_free(dtom(t_template
));
3204 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3205 "%s: mp_so 0x%llx cid %d \n",
3206 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3207 so
, mpts
->mpts_connid
),
3208 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3210 socket_unlock(so
, 0);
3211 mptcp_subflow_disconnect(mpte
, mpts
, !linger
);
3213 *p_mpsofilt_hint
|= (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_CONNINFO_UPDATED
);
3217 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && is_fastclose
) {
3218 *p_mpsofilt_hint
|= SO_FILT_HINT_CONNRESET
;
3220 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
)
3221 mp_so
->so_error
= ECONNABORTED
;
3223 mp_so
->so_error
= ECONNRESET
;
3226 * mptcp_drop is being called after processing the events, to fully
3227 * close the MPTCP connection
3231 if (mp_tp
->mpt_gc_ticks
== MPT_GC_TICKS
)
3232 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS_FAST
;
3236 * Keep the subflow socket around unless the subflow has been
3237 * disconnected explicitly.
3239 return (linger
? MPTS_EVRET_OK
: MPTS_EVRET_DELETE
);
3243 mptcp_fastjoin_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3244 uint64_t *p_mpsofilt_hint
)
3246 #pragma unused(p_mpsofilt_hint)
3247 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3248 MPTS_LOCK_ASSERT_HELD(mpts
);
3249 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3251 if (mpte
->mpte_nummpcapflows
== 0) {
3252 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3253 mptcplog((LOG_DEBUG
,"MPTCP Events: %s: %llx %llx \n",
3254 __func__
, mp_tp
->mpt_snduna
, mpts
->mpts_sndnxt
),
3255 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3257 mpte
->mpte_active_sub
= mpts
;
3258 mpts
->mpts_flags
|= (MPTSF_FASTJ_SEND
| MPTSF_ACTIVE
);
3261 * If mptcp_subflow_output is called before fastjoin_ev
3262 * then mpts->mpts_sndnxt is initialized to mp_tp->mpt_snduna
3263 * and further mpts->mpts_sndnxt is incremented by len copied.
3265 if (mpts
->mpts_sndnxt
== 0) {
3266 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
3271 return (MPTS_EVRET_OK
);
3275 mptcp_deleteok_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3276 uint64_t *p_mpsofilt_hint
)
3278 #pragma unused(p_mpsofilt_hint)
3279 MPTE_LOCK_ASSERT_HELD(mpte
);
3280 MPTS_LOCK_ASSERT_HELD(mpts
);
3281 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3283 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3284 "%s cid %d\n", __func__
, mpts
->mpts_connid
),
3285 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3287 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
3288 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
3289 return (MPTS_EVRET_DELETE
);
3291 return (MPTS_EVRET_OK
);
3295 mptcp_evret2str(ev_ret_t ret
)
3297 const char *c
= "UNKNOWN";
3300 case MPTS_EVRET_DELETE
:
3301 c
= "MPTS_EVRET_DELETE";
3303 case MPTS_EVRET_CONNECT_PENDING
:
3304 c
= "MPTS_EVRET_CONNECT_PENDING";
3306 case MPTS_EVRET_DISCONNECT_FALLBACK
:
3307 c
= "MPTS_EVRET_DISCONNECT_FALLBACK";
3310 c
= "MPTS_EVRET_OK";
3319 * Add a reference to a subflow structure; used by MPTS_ADDREF().
3322 mptcp_subflow_addref(struct mptsub
*mpts
, int locked
)
3327 MPTS_LOCK_ASSERT_HELD(mpts
);
3329 if (++mpts
->mpts_refcnt
== 0) {
3330 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
3338 * Remove a reference held on a subflow structure; used by MPTS_REMREF();
3341 mptcp_subflow_remref(struct mptsub
*mpts
)
3344 if (mpts
->mpts_refcnt
== 0) {
3345 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
3348 if (--mpts
->mpts_refcnt
> 0) {
3352 /* callee will unlock and destroy lock */
3353 mptcp_subflow_free(mpts
);
3357 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
3358 * caller must ensure that the option can be issued on subflow sockets, via
3359 * MPOF_SUBFLOW_OK flag.
3362 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct socket
*so
,
3365 struct socket
*mp_so
;
3366 struct sockopt sopt
;
3370 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3371 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
3373 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3374 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3376 bzero(&sopt
, sizeof (sopt
));
3377 sopt
.sopt_dir
= SOPT_SET
;
3378 sopt
.sopt_level
= mpo
->mpo_level
;
3379 sopt
.sopt_name
= mpo
->mpo_name
;
3380 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3381 sopt
.sopt_valsize
= sizeof (int);
3382 sopt
.sopt_p
= kernproc
;
3384 error
= sosetoptlock(so
, &sopt
, 0); /* already locked */
3386 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3387 "%s: mp_so 0x%llx sopt %s "
3388 "val %d set successful\n", __func__
,
3389 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3390 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3391 buf
, sizeof (buf
)), mpo
->mpo_intval
),
3392 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3394 mptcplog((LOG_ERR
, "MPTCP Socket: "
3395 "%s: mp_so 0x%llx sopt %s "
3396 "val %d set error %d\n", __func__
,
3397 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3398 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3399 buf
, sizeof (buf
)), mpo
->mpo_intval
, error
),
3400 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3406 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
3407 * caller must ensure that the option can be issued on subflow sockets, via
3408 * MPOF_SUBFLOW_OK flag.
3411 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
3414 struct socket
*mp_so
;
3415 struct sockopt sopt
;
3419 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3420 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3421 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3423 bzero(&sopt
, sizeof (sopt
));
3424 sopt
.sopt_dir
= SOPT_GET
;
3425 sopt
.sopt_level
= mpo
->mpo_level
;
3426 sopt
.sopt_name
= mpo
->mpo_name
;
3427 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3428 sopt
.sopt_valsize
= sizeof (int);
3429 sopt
.sopt_p
= kernproc
;
3431 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
3433 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3434 "%s: mp_so 0x%llx sopt %s "
3435 "val %d get successful\n", __func__
,
3436 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3437 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
,
3438 buf
, sizeof (buf
)), mpo
->mpo_intval
),
3439 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3441 mptcplog((LOG_ERR
, "MPTCP Socket: "
3442 "%s: mp_so 0x%llx sopt %s get error %d\n",
3443 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3444 mptcp_sopt2str(mpo
->mpo_level
,
3445 mpo
->mpo_name
, buf
, sizeof (buf
)), error
),
3446 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
3453 * MPTCP garbage collector.
3455 * This routine is called by the MP domain on-demand, periodic callout,
3456 * which is triggered when a MPTCP socket is closed. The callout will
3457 * repeat as long as this routine returns a non-zero value.
3460 mptcp_gc(struct mppcbinfo
*mppi
)
3462 struct mppcb
*mpp
, *tmpp
;
3463 uint32_t active
= 0;
3465 lck_mtx_assert(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
3467 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
3468 struct socket
*mp_so
;
3469 struct mptses
*mpte
;
3470 struct mptcb
*mp_tp
;
3472 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
3473 mp_so
= mpp
->mpp_socket
;
3474 VERIFY(mp_so
!= NULL
);
3475 mpte
= mptompte(mpp
);
3476 VERIFY(mpte
!= NULL
);
3477 mp_tp
= mpte
->mpte_mptcb
;
3478 VERIFY(mp_tp
!= NULL
);
3480 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3481 "%s: mp_so 0x%llx found "
3482 "(u=%d,r=%d,s=%d)\n", __func__
,
3483 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mp_so
->so_usecount
,
3484 mp_so
->so_retaincnt
, mpp
->mpp_state
),
3485 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3487 if (!lck_mtx_try_lock(&mpp
->mpp_lock
)) {
3488 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3489 "%s: mp_so 0x%llx skipped "
3490 "(u=%d,r=%d)\n", __func__
,
3491 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3492 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3493 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3498 /* check again under the lock */
3499 if (mp_so
->so_usecount
> 1) {
3500 boolean_t wakeup
= FALSE
;
3501 struct mptsub
*mpts
, *tmpts
;
3503 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3504 "%s: mp_so 0x%llx skipped "
3505 "[u=%d,r=%d] %d %d\n", __func__
,
3506 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3507 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3508 mp_tp
->mpt_gc_ticks
,
3510 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3513 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
3514 if (mp_tp
->mpt_gc_ticks
> 0)
3515 mp_tp
->mpt_gc_ticks
--;
3516 if (mp_tp
->mpt_gc_ticks
== 0) {
3518 if (mp_tp
->mpt_localkey
!= NULL
) {
3520 mp_tp
->mpt_localkey
);
3521 mp_tp
->mpt_localkey
= NULL
;
3527 TAILQ_FOREACH_SAFE(mpts
,
3528 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3530 mpts
->mpts_flags
|= MPTSF_DELETEOK
;
3531 if (mpts
->mpts_soerror
== 0)
3532 mpts
->mpts_soerror
= ETIMEDOUT
;
3533 mptcp_subflow_eupcall(mpts
->mpts_socket
,
3534 mpts
, SO_FILT_HINT_DISCONNECTED
);
3538 lck_mtx_unlock(&mpp
->mpp_lock
);
3543 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
3544 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3545 "%s: mp_so 0x%llx skipped "
3546 "[u=%d,r=%d,s=%d]\n", __func__
,
3547 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3548 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3550 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3551 lck_mtx_unlock(&mpp
->mpp_lock
);
3557 * The PCB has been detached, and there is exactly 1 refnct
3558 * held by the MPTCP thread. Signal that thread to terminate,
3559 * after which the last refcnt will be released. That will
3560 * allow it to be destroyed below during the next round.
3562 if (mp_so
->so_usecount
== 1) {
3563 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3564 "%s: mp_so 0x%llx scheduled for "
3565 "termination [u=%d,r=%d]\n", __func__
,
3566 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3567 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3568 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3570 /* signal MPTCP thread to terminate */
3571 mptcp_thread_terminate_signal(mpte
);
3572 lck_mtx_unlock(&mpp
->mpp_lock
);
3577 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3578 "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
3579 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3580 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3581 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3583 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
3584 struct sockbuf
*, &mp_so
->so_rcv
,
3585 struct sockbuf
*, &mp_so
->so_snd
,
3586 struct mppcb
*, mpp
);
3596 * Drop a MPTCP connection, reporting the specified error.
3599 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, int errno
)
3601 struct socket
*mp_so
;
3603 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3604 MPT_LOCK_ASSERT_HELD(mp_tp
);
3605 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
3606 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3608 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
3609 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
3610 uint32_t, 0 /* event */);
3612 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0)
3613 errno
= mp_tp
->mpt_softerror
;
3614 mp_so
->so_error
= errno
;
3616 return (mptcp_close(mpte
, mp_tp
));
3620 * Close a MPTCP control block.
3623 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
3625 struct socket
*mp_so
= NULL
;
3626 struct mptsub
*mpts
= NULL
, *tmpts
= NULL
;
3628 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
3629 MPT_LOCK_ASSERT_HELD(mp_tp
);
3630 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
3631 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3632 if (mp_tp
->mpt_localkey
!= NULL
) {
3633 mptcp_free_key(mp_tp
->mpt_localkey
);
3634 mp_tp
->mpt_localkey
= NULL
;
3638 soisdisconnected(mp_so
);
3641 if (mp_tp
->mpt_flags
& MPTCPF_PEEL_OFF
) {
3646 /* Clean up all subflows */
3647 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3649 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
3650 mptcp_subflow_disconnect(mpte
, mpts
, TRUE
);
3652 mptcp_subflow_del(mpte
, mpts
, TRUE
);
3660 mptcp_notify_close(struct socket
*so
)
3662 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
3666 * Signal MPTCP thread to wake up.
3669 mptcp_thread_signal(struct mptses
*mpte
)
3671 lck_mtx_lock(&mpte
->mpte_thread_lock
);
3672 mptcp_thread_signal_locked(mpte
);
3673 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3677 * Signal MPTCP thread to wake up (locked version)
3680 mptcp_thread_signal_locked(struct mptses
*mpte
)
3682 lck_mtx_assert(&mpte
->mpte_thread_lock
, LCK_MTX_ASSERT_OWNED
);
3684 mpte
->mpte_thread_reqs
++;
3685 if (!mpte
->mpte_thread_active
&& mpte
->mpte_thread
!= THREAD_NULL
)
3686 wakeup_one((caddr_t
)&mpte
->mpte_thread
);
3690 * Signal MPTCP thread to terminate.
3693 mptcp_thread_terminate_signal(struct mptses
*mpte
)
3695 lck_mtx_lock(&mpte
->mpte_thread_lock
);
3696 if (mpte
->mpte_thread
!= THREAD_NULL
) {
3697 mpte
->mpte_thread
= THREAD_NULL
;
3698 mpte
->mpte_thread_reqs
++;
3699 if (!mpte
->mpte_thread_active
)
3700 wakeup_one((caddr_t
)&mpte
->mpte_thread
);
3702 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3706 * MPTCP thread workloop.
3709 mptcp_thread_dowork(struct mptses
*mpte
)
3711 struct socket
*mp_so
;
3712 struct mptsub
*mpts
, *tmpts
;
3713 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
3714 uint64_t mpsofilt_hint_mask
= 0;
3716 MPTE_LOCK(mpte
); /* same as MP socket lock */
3717 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3718 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3719 VERIFY(mp_so
!= NULL
);
3721 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3725 MPTS_ADDREF_LOCKED(mpts
); /* for us */
3727 /* Update process ownership based on parent mptcp socket */
3728 mptcp_update_last_owner(mpts
, mp_so
);
3730 mptcp_subflow_input(mpte
, mpts
);
3732 mptcp_get_rtt_measurement(mpts
, mpte
);
3734 ret
= mptcp_subflow_events(mpte
, mpts
, &mpsofilt_hint_mask
);
3736 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
3737 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3738 "%s: cid %d \n", __func__
,
3740 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3741 (void) mptcp_subflow_output(mpte
, mpts
);
3745 * If MPTCP socket is closed, disconnect all subflows.
3746 * This will generate a disconnect event which will
3747 * be handled during the next iteration, causing a
3748 * non-zero error to be returned above.
3750 if (mp_so
->so_flags
& SOF_PCBCLEARING
)
3751 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
3758 case MPTS_EVRET_DELETE
:
3759 mptcp_subflow_del(mpte
, mpts
, TRUE
);
3761 case MPTS_EVRET_CONNECT_PENDING
:
3762 connect_pending
= TRUE
;
3764 case MPTS_EVRET_DISCONNECT_FALLBACK
:
3765 disconnect_fallback
= TRUE
;
3768 mptcplog((LOG_DEBUG
,
3769 "MPTCP Socket: %s: mptcp_subflow_events "
3770 "returned invalid value: %d\n", __func__
,
3772 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3775 MPTS_REMREF(mpts
); /* ours */
3778 if (mpsofilt_hint_mask
) {
3779 if (mpsofilt_hint_mask
& SO_FILT_HINT_CANTRCVMORE
) {
3780 socantrcvmore(mp_so
);
3781 mpsofilt_hint_mask
&= ~SO_FILT_HINT_CANTRCVMORE
;
3784 if (mpsofilt_hint_mask
& SO_FILT_HINT_CONNRESET
) {
3785 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3788 mptcp_drop(mpte
, mp_tp
, ECONNRESET
);
3792 soevent(mp_so
, mpsofilt_hint_mask
);
3795 if (!connect_pending
&& !disconnect_fallback
) {
3800 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3802 if (disconnect_fallback
) {
3803 struct socket
*so
= NULL
;
3804 struct inpcb
*inp
= NULL
;
3805 struct tcpcb
*tp
= NULL
;
3807 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3812 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
3814 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
3815 MPTSF_DISCONNECTED
|MPTSF_CONNECT_PENDING
)) {
3820 if (mpts
->mpts_flags
& MPTSF_TFO_REQD
)
3821 mptcp_drop_tfo_data(mpte
, mpts
, NULL
);
3823 so
= mpts
->mpts_socket
;
3826 * The MPTCP connection has degraded to a fallback
3827 * mode, so there is no point in keeping this subflow
3828 * regardless of its MPTCP-readiness state, unless it
3829 * is the primary one which we use for fallback. This
3830 * assumes that the subflow used for fallback is the
3835 inp
= sotoinpcb(so
);
3836 tp
= intotcpcb(inp
);
3838 ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
3839 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
3841 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
3842 socket_unlock(so
, 1);
3846 tp
->t_mpflags
|= TMPF_RESET
;
3847 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
3848 socket_unlock(so
, 1);
3850 } else if (connect_pending
) {
3852 * If delayed subflow start is set and cellular,
3853 * delay the connect till a retransmission timeout
3856 if ((mptcp_delayed_subf_start
) &&
3857 (IFNET_IS_CELLULAR(mpts
->mpts_outif
))) {
3863 * The MPTCP connection has progressed to a state
3864 * where it supports full multipath semantics; allow
3865 * additional joins to be attempted for all subflows
3866 * that are in the PENDING state.
3868 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
3869 (void) mptcp_subflow_soconnectx(mpte
, mpts
);
3882 mptcp_thread_func(void *v
, wait_result_t w
)
3885 struct mptses
*mpte
= v
;
3886 struct timespec
*ts
= NULL
;
3888 VERIFY(mpte
!= NULL
);
3890 lck_mtx_lock_spin(&mpte
->mpte_thread_lock
);
3893 lck_mtx_assert(&mpte
->mpte_thread_lock
, LCK_MTX_ASSERT_OWNED
);
3895 if (mpte
->mpte_thread
!= THREAD_NULL
) {
3896 (void) msleep(&mpte
->mpte_thread
,
3897 &mpte
->mpte_thread_lock
, (PZERO
- 1) | PSPIN
,
3901 /* MPTCP socket is closed? */
3902 if (mpte
->mpte_thread
== THREAD_NULL
) {
3903 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3904 /* callee will destroy thread lock */
3905 mptcp_thread_destroy(mpte
);
3910 mpte
->mpte_thread_active
= 1;
3912 uint32_t reqs
= mpte
->mpte_thread_reqs
;
3914 lck_mtx_unlock(&mpte
->mpte_thread_lock
);
3915 mptcp_thread_dowork(mpte
);
3916 lck_mtx_lock_spin(&mpte
->mpte_thread_lock
);
3918 /* if there's no pending request, we're done */
3919 if (reqs
== mpte
->mpte_thread_reqs
||
3920 mpte
->mpte_thread
== THREAD_NULL
)
3923 mpte
->mpte_thread_reqs
= 0;
3924 mpte
->mpte_thread_active
= 0;
3929 * Destroy a MTCP thread, to be called in the MPTCP thread context
3930 * upon receiving an indication to self-terminate. This routine
3931 * will not return, as the current thread is terminated at the end.
3934 mptcp_thread_destroy(struct mptses
*mpte
)
3936 struct socket
*mp_so
;
3938 MPTE_LOCK(mpte
); /* same as MP socket lock */
3939 VERIFY(mpte
->mpte_thread
== THREAD_NULL
);
3940 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3942 mptcp_sesdestroy(mpte
);
3944 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
3945 VERIFY(mp_so
!= NULL
);
3946 VERIFY(mp_so
->so_usecount
> 0);
3947 mp_so
->so_usecount
--; /* for thread */
3948 mpte
->mpte_mppcb
->mpp_flags
|= MPP_DEFUNCT
;
3951 /* for the extra refcnt from kernel_thread_start() */
3952 thread_deallocate(current_thread());
3953 /* this is the end */
3954 thread_terminate(current_thread());
3959 * Protocol pr_lock callback.
3962 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
3964 struct mppcb
*mpp
= sotomppcb(mp_so
);
3968 lr_saved
= __builtin_return_address(0);
3973 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
3974 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
3977 lck_mtx_lock(&mpp
->mpp_lock
);
3979 if (mp_so
->so_usecount
< 0) {
3980 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
3981 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
3982 solockhistory_nr(mp_so
));
3986 mp_so
->so_usecount
++;
3987 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
3988 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
3994 * Protocol pr_unlock callback.
3997 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
3999 struct mppcb
*mpp
= sotomppcb(mp_so
);
4003 lr_saved
= __builtin_return_address(0);
4008 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
4009 mp_so
, mp_so
->so_usecount
, lr_saved
,
4010 solockhistory_nr(mp_so
));
4013 lck_mtx_assert(&mpp
->mpp_lock
, LCK_MTX_ASSERT_OWNED
);
4016 mp_so
->so_usecount
--;
4018 if (mp_so
->so_usecount
< 0) {
4019 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4020 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4023 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
4024 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
4025 lck_mtx_unlock(&mpp
->mpp_lock
);
4031 * Protocol pr_getlock callback.
4034 mptcp_getlock(struct socket
*mp_so
, int locktype
)
4036 #pragma unused(locktype)
4037 struct mppcb
*mpp
= sotomppcb(mp_so
);
4040 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
4041 solockhistory_nr(mp_so
));
4044 if (mp_so
->so_usecount
< 0) {
4045 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4046 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4049 return (&mpp
->mpp_lock
);
4053 * Key generation functions
4056 mptcp_generate_unique_key(struct mptcp_key_entry
*key_entry
)
4058 struct mptcp_key_entry
*key_elm
;
4060 read_random(&key_entry
->mkey_value
, sizeof (key_entry
->mkey_value
));
4061 if (key_entry
->mkey_value
== 0)
4063 mptcp_do_sha1(&key_entry
->mkey_value
, key_entry
->mkey_digest
,
4064 sizeof (key_entry
->mkey_digest
));
4066 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
4067 if (key_elm
->mkey_value
== key_entry
->mkey_value
) {
4070 if (bcmp(key_elm
->mkey_digest
, key_entry
->mkey_digest
, 4) ==
4077 static mptcp_key_t
*
4078 mptcp_reserve_key(void)
4080 struct mptcp_key_entry
*key_elm
;
4081 struct mptcp_key_entry
*found_elm
= NULL
;
4083 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
4084 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
4085 if (key_elm
->mkey_flags
== MKEYF_FREE
) {
4086 key_elm
->mkey_flags
= MKEYF_INUSE
;
4087 found_elm
= key_elm
;
4091 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
4094 return (&found_elm
->mkey_value
);
4097 key_elm
= (struct mptcp_key_entry
*)
4098 zalloc(mptcp_keys_pool
.mkph_key_entry_zone
);
4099 key_elm
->mkey_flags
= MKEYF_INUSE
;
4101 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
4102 mptcp_generate_unique_key(key_elm
);
4103 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_elm
, mkey_next
);
4104 mptcp_keys_pool
.mkph_count
+= 1;
4105 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
4106 return (&key_elm
->mkey_value
);
4110 mptcp_get_stored_digest(mptcp_key_t
*key
)
4112 struct mptcp_key_entry
*key_holder
;
4113 caddr_t digest
= NULL
;
4115 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
4116 key_holder
= (struct mptcp_key_entry
*)(void *)((caddr_t
)key
-
4117 offsetof(struct mptcp_key_entry
, mkey_value
));
4118 if (key_holder
->mkey_flags
!= MKEYF_INUSE
)
4119 panic_plain("%s", __func__
);
4120 digest
= &key_holder
->mkey_digest
[0];
4121 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
4126 mptcp_free_key(mptcp_key_t
*key
)
4128 struct mptcp_key_entry
*key_holder
;
4129 struct mptcp_key_entry
*key_elm
;
4130 int pt
= RandomULong();
4132 lck_mtx_lock(&mptcp_keys_pool
.mkph_lock
);
4133 key_holder
= (struct mptcp_key_entry
*)(void*)((caddr_t
)key
-
4134 offsetof(struct mptcp_key_entry
, mkey_value
));
4135 key_holder
->mkey_flags
= MKEYF_FREE
;
4137 LIST_REMOVE(key_holder
, mkey_next
);
4138 mptcp_keys_pool
.mkph_count
-= 1;
4140 /* Free half the time */
4142 zfree(mptcp_keys_pool
.mkph_key_entry_zone
, key_holder
);
4144 /* Insert it at random point to avoid early reuse */
4146 if (mptcp_keys_pool
.mkph_count
> 1) {
4147 pt
= pt
% (mptcp_keys_pool
.mkph_count
- 1);
4148 LIST_FOREACH(key_elm
, &mptcp_keys_pool
, mkey_next
) {
4150 LIST_INSERT_AFTER(key_elm
, key_holder
,
4156 panic("missed insertion");
4158 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_holder
,
4161 mptcp_keys_pool
.mkph_count
+= 1;
4163 lck_mtx_unlock(&mptcp_keys_pool
.mkph_lock
);
4167 mptcp_key_pool_init(void)
4170 struct mptcp_key_entry
*key_entry
;
4172 LIST_INIT(&mptcp_keys_pool
);
4173 mptcp_keys_pool
.mkph_count
= 0;
4175 mptcp_keys_pool
.mkph_key_elm_sz
= (vm_size_t
)
4176 (sizeof (struct mptcp_key_entry
));
4177 mptcp_keys_pool
.mkph_key_entry_zone
= zinit(
4178 mptcp_keys_pool
.mkph_key_elm_sz
,
4179 MPTCP_MX_KEY_ALLOCS
* mptcp_keys_pool
.mkph_key_elm_sz
,
4180 MPTCP_MX_PREALLOC_ZONE_SZ
, "mptkeys");
4181 if (mptcp_keys_pool
.mkph_key_entry_zone
== NULL
) {
4182 panic("%s: unable to allocate MPTCP keys zone \n", __func__
);
4185 zone_change(mptcp_keys_pool
.mkph_key_entry_zone
, Z_CALLERACCT
, FALSE
);
4186 zone_change(mptcp_keys_pool
.mkph_key_entry_zone
, Z_EXPAND
, TRUE
);
4188 for (i
= 0; i
< MPTCP_KEY_PREALLOCS_MX
; i
++) {
4189 key_entry
= (struct mptcp_key_entry
*)
4190 zalloc(mptcp_keys_pool
.mkph_key_entry_zone
);
4191 key_entry
->mkey_flags
= MKEYF_FREE
;
4192 mptcp_generate_unique_key(key_entry
);
4193 LIST_INSERT_HEAD(&mptcp_keys_pool
, key_entry
, mkey_next
);
4194 mptcp_keys_pool
.mkph_count
+= 1;
4196 lck_mtx_init(&mptcp_keys_pool
.mkph_lock
, mtcbinfo
.mppi_lock_grp
,
4197 mtcbinfo
.mppi_lock_attr
);
4201 * MPTCP Join support
4205 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
,
4208 struct tcpcb
*tp
= sototcpcb(so
);
4209 struct mptcp_subf_auth_entry
*sauth_entry
;
4210 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4212 MPT_LOCK_SPIN(mp_tp
);
4213 tp
->t_mptcb
= mp_tp
;
4215 * The address ID of the first flow is implicitly 0.
4217 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
4218 tp
->t_local_aid
= 0;
4220 tp
->t_local_aid
= addr_id
;
4221 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
4222 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
4225 sauth_entry
= zalloc(mpt_subauth_zone
);
4226 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
4227 sauth_entry
->msae_raddr_id
= 0;
4228 sauth_entry
->msae_raddr_rand
= 0;
4230 sauth_entry
->msae_laddr_rand
= RandomULong();
4231 if (sauth_entry
->msae_laddr_rand
== 0)
4233 MPT_LOCK_SPIN(mp_tp
);
4234 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
4239 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
4241 struct mptcp_subf_auth_entry
*sauth_entry
;
4242 struct tcpcb
*tp
= NULL
;
4248 socket_unlock(so
, 0);
4253 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4254 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
4260 LIST_REMOVE(sauth_entry
, msae_next
);
4265 zfree(mpt_subauth_zone
, sauth_entry
);
4268 socket_unlock(so
, 0);
4272 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
4275 struct mptcp_subf_auth_entry
*sauth_entry
;
4276 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4279 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4280 if (sauth_entry
->msae_laddr_id
== addr_id
) {
4282 *lrand
= sauth_entry
->msae_laddr_rand
;
4284 *rrand
= sauth_entry
->msae_raddr_rand
;
4292 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
4293 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
4295 struct mptcp_subf_auth_entry
*sauth_entry
;
4296 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4299 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4300 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
4301 if ((sauth_entry
->msae_raddr_id
!= 0) &&
4302 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
4303 mptcplog((LOG_ERR
, "MPTCP Socket: %s mismatched"
4304 " address ids %d %d \n", __func__
, raddr_id
,
4305 sauth_entry
->msae_raddr_id
),
4306 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4310 sauth_entry
->msae_raddr_id
= raddr_id
;
4311 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
4312 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
4313 mptcplog((LOG_ERR
, "MPTCP Socket: "
4314 "%s: dup SYN_ACK %d %d \n",
4315 __func__
, raddr_rand
,
4316 sauth_entry
->msae_raddr_rand
),
4317 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4321 sauth_entry
->msae_raddr_rand
= raddr_rand
;
4330 * SHA1 support for MPTCP
4333 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
, int digest_len
)
4336 const unsigned char *sha1_base
;
4339 if (digest_len
!= SHA1_RESULTLEN
) {
4343 sha1_base
= (const unsigned char *) key
;
4344 sha1_size
= sizeof (mptcp_key_t
);
4345 SHA1Init(&sha1ctxt
);
4346 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
4347 SHA1Final(sha_digest
, &sha1ctxt
);
4352 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
4353 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
, int digest_len
)
4356 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
4357 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
4361 bzero(digest
, digest_len
);
4363 /* Set up the Key for HMAC */
4370 /* Set up the message for HMAC */
4374 /* Key is 512 block length, so no need to compute hash */
4376 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
4378 for (i
= 0; i
< 8; i
++) {
4379 key_ipad
[i
] ^= 0x3636363636363636;
4380 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
4383 /* Perform inner SHA1 */
4384 SHA1Init(&sha1ctxt
);
4385 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof (key_ipad
));
4386 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof (data
));
4387 SHA1Final(digest
, &sha1ctxt
);
4389 /* Perform outer SHA1 */
4390 SHA1Init(&sha1ctxt
);
4391 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof (key_opad
));
4392 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
4393 SHA1Final(digest
, &sha1ctxt
);
4397 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
4398 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
4401 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
,
4404 uint32_t lrand
, rrand
;
4405 mptcp_key_t localkey
, remotekey
;
4406 MPT_LOCK_ASSERT_NOTHELD(mp_tp
);
4408 if (digest_len
!= SHA1_RESULTLEN
)
4412 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
4413 MPT_LOCK_SPIN(mp_tp
);
4414 localkey
= *mp_tp
->mpt_localkey
;
4415 remotekey
= mp_tp
->mpt_remotekey
;
4417 mptcp_hmac_sha1(localkey
, remotekey
, lrand
, rrand
, digest
,
4422 mptcp_get_trunced_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
)
4424 u_char digest
[SHA1_RESULTLEN
];
4425 u_int64_t trunced_digest
;
4427 mptcp_get_hmac(aid
, mp_tp
, &digest
[0], sizeof (digest
));
4428 bcopy(digest
, &trunced_digest
, 8);
4429 return (trunced_digest
);
4433 * Authentication data generation
4436 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
4439 VERIFY(token_len
== sizeof (u_int32_t
));
4440 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4442 /* Most significant 32 bits of the SHA1 hash */
4443 bcopy(sha_digest
, token
, sizeof (u_int32_t
));
4448 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
4451 VERIFY(idsn_len
== sizeof (u_int64_t
));
4452 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4455 * Least significant 64 bits of the SHA1 hash
4458 idsn
[7] = sha_digest
[12];
4459 idsn
[6] = sha_digest
[13];
4460 idsn
[5] = sha_digest
[14];
4461 idsn
[4] = sha_digest
[15];
4462 idsn
[3] = sha_digest
[16];
4463 idsn
[2] = sha_digest
[17];
4464 idsn
[1] = sha_digest
[18];
4465 idsn
[0] = sha_digest
[19];
4470 mptcp_conn_properties(struct mptcb
*mp_tp
)
4472 /* There is only Version 0 at this time */
4473 mp_tp
->mpt_version
= MPTCP_STD_VERSION_0
;
4475 /* Set DSS checksum flag */
4477 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
4479 /* Set up receive window */
4480 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
4482 /* Set up gc ticks */
4483 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
4487 mptcp_init_local_parms(struct mptcb
*mp_tp
)
4489 caddr_t local_digest
= NULL
;
4491 mp_tp
->mpt_localkey
= mptcp_reserve_key();
4492 local_digest
= mptcp_get_stored_digest(mp_tp
->mpt_localkey
);
4493 mptcp_generate_token(local_digest
, SHA1_RESULTLEN
,
4494 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof (mp_tp
->mpt_localtoken
));
4495 mptcp_generate_idsn(local_digest
, SHA1_RESULTLEN
,
4496 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof (u_int64_t
));
4498 /* The subflow SYN is also first MPTCP byte */
4499 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
4500 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
4502 mptcp_conn_properties(mp_tp
);
4506 mptcp_init_remote_parms(struct mptcb
*mp_tp
)
4508 char remote_digest
[MPTCP_SHA1_RESULTLEN
];
4509 MPT_LOCK_ASSERT_HELD(mp_tp
);
4511 /* Only Version 0 is supported for auth purposes */
4512 if (mp_tp
->mpt_version
!= MPTCP_STD_VERSION_0
)
4515 /* Setup local and remote tokens and Initial DSNs */
4517 if (!mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
,
4519 mptcplog((LOG_ERR
, "MPTCP Socket: %s: unexpected failure",
4520 __func__
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4523 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
4524 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof (mp_tp
->mpt_remotetoken
));
4525 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
4526 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof (u_int64_t
));
4527 mp_tp
->mpt_rcvatmark
= mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
4536 mptcp_get_localtoken(void* mptcb_arg
)
4538 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4539 return (mp_tp
->mpt_localtoken
);
4543 mptcp_get_remotetoken(void* mptcb_arg
)
4545 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4546 return (mp_tp
->mpt_remotetoken
);
4550 mptcp_get_localkey(void* mptcb_arg
)
4552 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4553 if (mp_tp
->mpt_localkey
!= NULL
)
4554 return (*mp_tp
->mpt_localkey
);
4560 mptcp_get_remotekey(void* mptcb_arg
)
4562 struct mptcb
*mp_tp
= (struct mptcb
*)mptcb_arg
;
4563 return (mp_tp
->mpt_remotekey
);
4567 mptcp_send_dfin(struct socket
*so
)
4569 struct tcpcb
*tp
= NULL
;
4570 struct inpcb
*inp
= NULL
;
4572 inp
= sotoinpcb(so
);
4576 tp
= intotcpcb(inp
);
4580 if (!(tp
->t_mpflags
& TMPF_RESET
))
4581 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
4585 * Data Sequence Mapping routines
4588 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
4590 struct mptcb
*mp_tp
;
4595 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
4598 VERIFY(m
->m_flags
& M_PKTHDR
);
4599 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
4600 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
4601 m
->m_pkthdr
.mp_rlen
= m_pktlen(m
);
4602 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
4609 mptcp_preproc_sbdrop(struct socket
*so
, struct mbuf
*m
, unsigned int len
)
4611 u_int32_t sub_len
= 0;
4614 if (so
->so_flags1
& SOF1_DATA_IDEMPOTENT
) {
4615 /* TFO makes things complicated. */
4616 if (so
->so_flags1
& SOF1_TFO_REWIND
) {
4618 so
->so_flags1
&= ~SOF1_TFO_REWIND
;
4623 VERIFY(m
->m_flags
& M_PKTHDR
);
4625 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
) {
4626 sub_len
= m
->m_pkthdr
.mp_rlen
;
4628 if (sub_len
< len
) {
4629 m
->m_pkthdr
.mp_dsn
+= sub_len
;
4630 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4631 m
->m_pkthdr
.mp_rseq
+= sub_len
;
4633 m
->m_pkthdr
.mp_rlen
= 0;
4636 /* sub_len >= len */
4638 m
->m_pkthdr
.mp_dsn
+= len
;
4639 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4641 m
->m_pkthdr
.mp_rseq
+= len
;
4643 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
4644 "%s: dsn 0x%llx ssn %u len %d %d\n",
4646 m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rseq
,
4647 m
->m_pkthdr
.mp_rlen
, len
),
4648 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4649 m
->m_pkthdr
.mp_rlen
-= len
;
4653 panic("%s: MPTCP tag not set", __func__
);
4659 if (so
->so_flags
& SOF_MP_SUBFLOW
&&
4660 !(sototcpcb(so
)->t_mpflags
& TMPF_TFO_REQUEST
) &&
4661 !(sototcpcb(so
)->t_mpflags
& TMPF_RCVD_DACK
)) {
4663 * Received an ack without receiving a DATA_ACK.
4664 * Need to fallback to regular TCP (or destroy this subflow).
4666 mptcp_notify_mpfail(so
);
4670 /* Obtain the DSN mapping stored in the mbuf */
4672 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
, uint32_t datalen
,
4673 u_int32_t
*dsn
, u_int32_t
*relseq
, u_int16_t
*data_len
, u_int64_t
*dsn64p
)
4677 mptcp_output_getm_dsnmap64(so
, off
, datalen
, &dsn64
, relseq
, data_len
);
4678 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
4683 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint32_t datalen
,
4684 u_int64_t
*dsn
, u_int32_t
*relseq
, u_int16_t
*data_len
)
4686 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4687 struct mbuf
*mnext
= NULL
;
4688 uint32_t runlen
= 0;
4690 uint32_t contig_len
= 0;
4698 * In the subflow socket, the DSN sequencing can be discontiguous,
4699 * but the subflow sequence mapping is contiguous. Use the subflow
4700 * sequence property to find the right mbuf and corresponding dsn
4705 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4706 VERIFY(m
->m_flags
& M_PKTHDR
);
4708 if ((unsigned int)off
>= m
->m_pkthdr
.mp_rlen
) {
4709 off
-= m
->m_pkthdr
.mp_rlen
;
4717 panic("%s: bad offset", __func__
);
4721 dsn64
= m
->m_pkthdr
.mp_dsn
+ off
;
4723 *relseq
= m
->m_pkthdr
.mp_rseq
+ off
;
4726 * Now find the last contiguous byte and its length from
4729 runlen
= m
->m_pkthdr
.mp_rlen
- off
;
4730 contig_len
= runlen
;
4732 /* If datalen does not span multiple mbufs, return */
4733 if (datalen
<= runlen
) {
4734 *data_len
= min(datalen
, UINT16_MAX
);
4739 while (datalen
> runlen
) {
4740 if (mnext
== NULL
) {
4741 panic("%s: bad datalen = %d, %d %d", __func__
, datalen
,
4745 VERIFY(mnext
->m_flags
& M_PKTHDR
);
4746 VERIFY(mnext
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4749 * case A. contiguous DSN stream
4750 * case B. discontiguous DSN stream
4752 if (mnext
->m_pkthdr
.mp_dsn
== (dsn64
+ runlen
)) {
4754 runlen
+= mnext
->m_pkthdr
.mp_rlen
;
4755 contig_len
+= mnext
->m_pkthdr
.mp_rlen
;
4756 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s: contig \n",
4757 __func__
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4760 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
4761 "%s: discontig datalen %d contig_len %d cc %d \n",
4762 __func__
, datalen
, contig_len
, so
->so_snd
.sb_cc
),
4763 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4766 mnext
= mnext
->m_next
;
4768 datalen
= min(datalen
, UINT16_MAX
);
4769 *data_len
= min(datalen
, contig_len
);
4770 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
4771 "%s: %llu %u %d %d \n", __func__
,
4772 *dsn
, *relseq
, *data_len
, off
),
4773 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4777 * MPTCP's notion of the next insequence Data Sequence number is adjusted
4778 * here. It must be called from mptcp_adj_rmap() which is called only after
4779 * reassembly of out of order data. The rcvnxt variable must
4780 * be updated only when atleast some insequence new data is received.
4783 mptcp_adj_rcvnxt(struct tcpcb
*tp
, struct mbuf
*m
)
4785 struct mptcb
*mp_tp
= tptomptp(tp
);
4790 if ((MPTCP_SEQ_GEQ(mp_tp
->mpt_rcvnxt
, m
->m_pkthdr
.mp_dsn
)) &&
4791 (MPTCP_SEQ_LEQ(mp_tp
->mpt_rcvnxt
, (m
->m_pkthdr
.mp_dsn
+
4792 m
->m_pkthdr
.mp_rlen
)))) {
4793 mp_tp
->mpt_rcvnxt
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
4799 * Note that this is called only from tcp_input() via mptcp_input_preproc()
4800 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
4801 * When it trims data tcp_input calls m_adj() which does not remove the
4802 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
4803 * The dsn map insertion cannot be delayed after trim, because data can be in
4804 * the reassembly queue for a while and the DSN option info in tp will be
4805 * overwritten for every new packet received.
4806 * The dsn map will be adjusted just prior to appending to subflow sockbuf
4807 * with mptcp_adj_rmap()
4810 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
)
4812 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
4814 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
4815 VERIFY(m
->m_flags
& M_PKTHDR
);
4816 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
4817 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
4818 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
4819 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
4820 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
4821 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
4826 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
)
4829 u_int32_t sseq
, datalen
;
4830 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
4831 u_int32_t old_rcvnxt
= 0;
4833 if (m_pktlen(m
) == 0)
4836 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
) {
4837 VERIFY(m
->m_flags
& M_PKTHDR
);
4839 dsn
= m
->m_pkthdr
.mp_dsn
;
4840 sseq
= m
->m_pkthdr
.mp_rseq
+ tp
->irs
;
4841 datalen
= m
->m_pkthdr
.mp_rlen
;
4843 /* data arrived without an DSS option mapping */
4845 /* initial subflow can fallback right after SYN handshake */
4846 mptcp_notify_mpfail(so
);
4850 /* In the common case, data is in window and in sequence */
4851 if (m
->m_pkthdr
.len
== (int)datalen
) {
4852 mptcp_adj_rcvnxt(tp
, m
);
4856 old_rcvnxt
= tp
->rcv_nxt
- m
->m_pkthdr
.len
;
4857 if (SEQ_GT(old_rcvnxt
, sseq
)) {
4858 /* data trimmed from the left */
4859 int off
= old_rcvnxt
- sseq
;
4860 m
->m_pkthdr
.mp_dsn
+= off
;
4861 m
->m_pkthdr
.mp_rseq
+= off
;
4862 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
4863 } else if (old_rcvnxt
== sseq
) {
4865 * data was trimmed from the right
4867 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
4869 mptcp_notify_mpfail(so
);
4872 mptcp_adj_rcvnxt(tp
, m
);
4877 * Following routines help with failure detection and failover of data
4878 * transfer from one subflow to another.
4881 mptcp_act_on_txfail(struct socket
*so
)
4883 struct tcpcb
*tp
= NULL
;
4884 struct inpcb
*inp
= sotoinpcb(so
);
4889 tp
= intotcpcb(inp
);
4893 if (so
->so_flags
& SOF_MP_TRYFAILOVER
) {
4897 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
4898 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
4902 * Support for MP_FAIL option
4905 mptcp_get_map_for_dsn(struct socket
*so
, u_int64_t dsn_fail
, u_int32_t
*tcp_seq
)
4907 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4916 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4917 VERIFY(m
->m_flags
& M_PKTHDR
);
4918 dsn
= m
->m_pkthdr
.mp_dsn
;
4919 datalen
= m
->m_pkthdr
.mp_rlen
;
4920 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
4921 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
4922 off
= dsn_fail
- dsn
;
4923 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
4924 mptcplog((LOG_DEBUG
, "MPTCP Sender: %s: %llu %llu \n",
4925 __func__
, dsn
, dsn_fail
),
4926 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
4934 * If there was no mbuf data and a fallback to TCP occurred, there's
4935 * not much else to do.
4938 mptcplog((LOG_ERR
, "MPTCP Sender: "
4939 "%s: %llu not found \n", __func__
, dsn_fail
),
4940 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
4945 * Support for sending contiguous MPTCP bytes in subflow
4946 * Also for preventing sending data with ACK in 3-way handshake
4949 mptcp_adj_sendlen(struct socket
*so
, int32_t off
, int32_t len
)
4951 u_int64_t mdss_dsn
= 0;
4952 u_int32_t mdss_subflow_seq
= 0;
4953 u_int16_t mdss_data_len
= 0;
4958 mptcp_output_getm_dsnmap64(so
, off
, (u_int32_t
)len
,
4959 &mdss_dsn
, &mdss_subflow_seq
, &mdss_data_len
);
4962 * Special case handling for Fast Join. We want to send data right
4963 * after ACK of the 3-way handshake, but not piggyback the data
4964 * with the 3rd ACK of the 3WHS. TMPF_FASTJOINBY2_SEND and
4965 * mdss_data_len control this.
4967 struct tcpcb
*tp
= NULL
;
4968 tp
= intotcpcb(sotoinpcb(so
));
4969 if ((tp
->t_mpflags
& TMPF_JOINED_FLOW
) &&
4970 (tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
4971 (!(tp
->t_mpflags
& TMPF_RECVD_JOIN
)) &&
4972 (tp
->t_mpflags
& TMPF_SENT_JOIN
) &&
4973 (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) &&
4974 (!(tp
->t_mpflags
& TMPF_FASTJOINBY2_SEND
))) {
4976 tp
->t_mpflags
|= TMPF_FASTJOINBY2_SEND
;
4979 if ((tp
->t_state
> TCPS_SYN_SENT
) &&
4980 (tp
->t_mpflags
& TMPF_TFO_REQUEST
)) {
4982 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
4984 return (mdss_data_len
);
4988 mptcp_sbspace(struct mptcb
*mpt
)
4994 MPT_LOCK_ASSERT_HELD(mpt
);
4995 MPTE_LOCK_ASSERT_HELD(mpt
->mpt_mpte
);
4997 sb
= &mpt
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
4998 rcvbuf
= sb
->sb_hiwat
;
4999 space
= ((int32_t)imin((rcvbuf
- sb
->sb_cc
),
5000 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
5003 /* XXX check if it's too small? */
5009 * Support Fallback to Regular TCP
5012 mptcp_notify_mpready(struct socket
*so
)
5014 struct tcpcb
*tp
= NULL
;
5019 tp
= intotcpcb(sotoinpcb(so
));
5024 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
5025 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5026 struct tcpcb
*, tp
);
5028 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
))
5031 if (tp
->t_mpflags
& TMPF_MPTCP_READY
)
5034 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
5035 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
5037 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5041 mptcp_notify_mpfail(struct socket
*so
)
5043 struct tcpcb
*tp
= NULL
;
5048 tp
= intotcpcb(sotoinpcb(so
));
5053 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
5054 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5055 struct tcpcb
*, tp
);
5057 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
)
5060 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
5061 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
5063 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5067 * Keepalive helper function
5070 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
5073 VERIFY(mp_tp
!= NULL
);
5075 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
5083 * MPTCP t_maxseg adjustment function
5086 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
5089 struct mptcb
*mp_tp
= tptomptp(tp
);
5091 #define MPTCP_COMPUTE_LEN { \
5092 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
5094 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
5097 /* adjust to 32-bit boundary + EOL */ \
5099 MPT_UNLOCK(mp_tp); \
5105 * For the first subflow and subsequent subflows, adjust mss for
5106 * most common MPTCP option size, for case where tcp_mss is called
5107 * during option processing and MTU discovery.
5109 if ((tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
5110 (!(tp
->t_mpflags
& TMPF_JOINED_FLOW
))) {
5114 if ((tp
->t_mpflags
& TMPF_PREESTABLISHED
) &&
5115 (tp
->t_mpflags
& TMPF_SENT_JOIN
)) {
5119 if ((mtudisc
) && (tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) {
5127 * Update the pid, upid, uuid of the subflow so, based on parent so
5130 mptcp_update_last_owner(struct mptsub
*mpts
, struct socket
*parent_mpso
)
5132 struct socket
*subflow_so
= mpts
->mpts_socket
;
5134 MPTS_LOCK_ASSERT_HELD(mpts
);
5136 socket_lock(subflow_so
, 0);
5137 if ((subflow_so
->last_pid
!= parent_mpso
->last_pid
) ||
5138 (subflow_so
->last_upid
!= parent_mpso
->last_upid
)) {
5139 subflow_so
->last_upid
= parent_mpso
->last_upid
;
5140 subflow_so
->last_pid
= parent_mpso
->last_pid
;
5141 uuid_copy(subflow_so
->last_uuid
, parent_mpso
->last_uuid
);
5143 so_update_policy(subflow_so
);
5144 socket_unlock(subflow_so
, 0);
5148 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
5152 tcp_getconninfo(so
, &flow
->flow_ci
);
5153 inp
= sotoinpcb(so
);
5155 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
5156 flow
->flow_src
.ss_family
= AF_INET6
;
5157 flow
->flow_dst
.ss_family
= AF_INET6
;
5158 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
5159 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
5160 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
5161 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
5162 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
5163 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
5166 if ((inp
->inp_vflag
& INP_IPV4
) != 0) {
5167 flow
->flow_src
.ss_family
= AF_INET
;
5168 flow
->flow_dst
.ss_family
= AF_INET
;
5169 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
5170 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
5171 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
5172 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
5173 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
5174 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
5176 flow
->flow_len
= sizeof(*flow
);
5177 flow
->flow_tcpci_offset
= offsetof(mptcp_flow_t
, flow_ci
);
5178 flow
->flow_flags
= mpts
->mpts_flags
;
5179 flow
->flow_cid
= mpts
->mpts_connid
;
5180 flow
->flow_sndnxt
= mpts
->mpts_sndnxt
;
5181 flow
->flow_relseq
= mpts
->mpts_rel_seq
;
5182 flow
->flow_soerror
= mpts
->mpts_soerror
;
5183 flow
->flow_probecnt
= mpts
->mpts_probecnt
;
5184 flow
->flow_peerswitch
= mpts
->mpts_peerswitch
;
5188 mptcp_pcblist SYSCTL_HANDLER_ARGS
5190 #pragma unused(oidp, arg1, arg2)
5194 struct mptses
*mpte
;
5195 struct mptcb
*mp_tp
;
5196 struct mptsub
*mpts
;
5198 conninfo_mptcp_t mptcpci
;
5199 mptcp_flow_t
*flows
= NULL
;
5201 if (req
->newptr
!= USER_ADDR_NULL
)
5204 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5205 n
= mtcbinfo
.mppi_count
;
5206 if (req
->oldptr
== USER_ADDR_NULL
) {
5207 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5208 req
->oldidx
= (n
+ n
/8) * sizeof(conninfo_mptcp_t
) +
5209 4 * (n
+ n
/8) * sizeof(mptcp_flow_t
);
5212 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5214 lck_mtx_lock(&mpp
->mpp_lock
);
5215 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
5216 if (mpp
->mpp_flags
& MPP_DEFUNCT
) {
5217 lck_mtx_unlock(&mpp
->mpp_lock
);
5220 mpte
= mptompte(mpp
);
5221 VERIFY(mpte
!= NULL
);
5222 mp_tp
= mpte
->mpte_mptcb
;
5223 VERIFY(mp_tp
!= NULL
);
5225 bzero(&mptcpci
, sizeof(mptcpci
));
5227 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
5228 mptcpci
.mptcpci_flags
= mp_tp
->mpt_flags
;
5229 mptcpci
.mptcpci_ltoken
= mp_tp
->mpt_localtoken
;
5230 mptcpci
.mptcpci_rtoken
= mp_tp
->mpt_remotetoken
;
5231 mptcpci
.mptcpci_notsent_lowat
= mp_tp
->mpt_notsent_lowat
;
5232 mptcpci
.mptcpci_snduna
= mp_tp
->mpt_snduna
;
5233 mptcpci
.mptcpci_sndnxt
= mp_tp
->mpt_sndnxt
;
5234 mptcpci
.mptcpci_sndmax
= mp_tp
->mpt_sndmax
;
5235 mptcpci
.mptcpci_lidsn
= mp_tp
->mpt_local_idsn
;
5236 mptcpci
.mptcpci_sndwnd
= mp_tp
->mpt_sndwnd
;
5237 mptcpci
.mptcpci_rcvnxt
= mp_tp
->mpt_rcvnxt
;
5238 mptcpci
.mptcpci_rcvatmark
= mp_tp
->mpt_rcvatmark
;
5239 mptcpci
.mptcpci_ridsn
= mp_tp
->mpt_remote_idsn
;
5240 mptcpci
.mptcpci_rcvwnd
= mp_tp
->mpt_rcvwnd
;
5243 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
5244 mptcpci
.mptcpci_mpte_flags
= mpte
->mpte_flags
;
5245 mptcpci
.mptcpci_mpte_addrid
= mpte
->mpte_addrid_last
;
5246 mptcpci
.mptcpci_flow_offset
=
5247 offsetof(conninfo_mptcp_t
, mptcpci_flows
);
5249 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
5250 if (mpte
->mpte_numflows
!= 0) {
5251 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
5252 if (flows
== NULL
) {
5253 lck_mtx_unlock(&mpp
->mpp_lock
);
5256 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
5257 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
5258 error
= SYSCTL_OUT(req
, &mptcpci
,
5259 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
5261 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
5262 error
= SYSCTL_OUT(req
, &mptcpci
, sizeof(mptcpci
));
5265 lck_mtx_unlock(&mpp
->mpp_lock
);
5266 FREE(flows
, M_TEMP
);
5270 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5272 so
= mpts
->mpts_socket
;
5274 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
5275 socket_unlock(so
, 0);
5279 lck_mtx_unlock(&mpp
->mpp_lock
);
5281 error
= SYSCTL_OUT(req
, flows
, len
);
5282 FREE(flows
, M_TEMP
);
5287 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5292 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
5293 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
5294 "List of active MPTCP connections");
5297 * Check the health of the other subflows and do an mptcp_output if
5298 * there is no other active or functional subflow at the time of
5299 * call of this function.
5302 mptcp_output_needed(struct mptses
*mpte
, struct mptsub
*to_mpts
)
5304 struct mptsub
*from_mpts
= NULL
;
5306 MPTE_LOCK_ASSERT_HELD(mpte
);
5308 MPTS_UNLOCK(to_mpts
);
5310 from_mpts
= mpte
->mpte_active_sub
;
5312 if (from_mpts
== NULL
)
5315 MPTS_LOCK(from_mpts
);
5317 if ((from_mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
5318 (from_mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
5319 MPTS_UNLOCK(from_mpts
);
5323 MPTS_UNLOCK(from_mpts
);
5333 * Set notsent lowat mark on the MPTCB
5336 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
5338 struct mptcb
*mp_tp
= NULL
;
5341 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5342 mp_tp
= mpte
->mpte_mptcb
;
5345 mp_tp
->mpt_notsent_lowat
= optval
;
5353 mptcp_get_notsent_lowat(struct mptses
*mpte
)
5355 struct mptcb
*mp_tp
= NULL
;
5357 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5358 mp_tp
= mpte
->mpte_mptcb
;
5361 return mp_tp
->mpt_notsent_lowat
;
5367 mptcp_notsent_lowat_check(struct socket
*so
) {
5368 struct mptses
*mpte
;
5370 struct mptcb
*mp_tp
;
5371 struct mptsub
*mpts
;
5375 mpp
= sotomppcb(so
);
5376 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
5380 mpte
= mptompte(mpp
);
5381 mp_tp
= mpte
->mpte_mptcb
;
5384 notsent
= so
->so_snd
.sb_cc
;
5386 if ((notsent
== 0) ||
5387 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
5388 mp_tp
->mpt_notsent_lowat
)) {
5389 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
5390 "lowat %d notsent %d actual %d \n",
5391 mp_tp
->mpt_notsent_lowat
, notsent
,
5392 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)),
5393 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5399 /* When Nagle's algorithm is not disabled, it is better
5400 * to wakeup the client even before there is atleast one
5401 * maxseg of data to write.
5403 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5406 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
5407 struct socket
*subf_so
= mpts
->mpts_socket
;
5408 socket_lock(subf_so
, 0);
5409 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
5411 notsent
= so
->so_snd
.sb_cc
-
5412 (tp
->snd_nxt
- tp
->snd_una
);
5414 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
5415 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
5418 mptcplog((LOG_DEBUG
, "MPTCP Sender: lowat %d notsent %d"
5419 " nodelay false \n",
5420 mp_tp
->mpt_notsent_lowat
, notsent
),
5421 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5422 socket_unlock(subf_so
, 0);
5432 mptcp_get_rtt_measurement(struct mptsub
*mpts
, struct mptses
*mpte
)
5434 MPTE_LOCK_ASSERT_HELD(mpte
);
5435 MPTS_LOCK_ASSERT_HELD(mpts
);
5437 struct socket
*subflow_so
= mpts
->mpts_socket
;
5438 socket_lock(subflow_so
, 0);
5439 mpts
->mpts_srtt
= (intotcpcb(sotoinpcb(subflow_so
)))->t_srtt
;
5440 mpts
->mpts_rxtcur
= (intotcpcb(sotoinpcb(subflow_so
)))->t_rxtcur
;
5441 socket_unlock(subflow_so
, 0);
5444 /* Using Symptoms Advisory to detect poor WiFi or poor Cell */
5445 static kern_ctl_ref mptcp_kern_ctrl_ref
= NULL
;
5446 static uint32_t mptcp_kern_skt_inuse
= 0;
5447 symptoms_advisory_t mptcp_advisory
;
5450 mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
5453 #pragma unused(kctlref, sac, unitinfo)
5455 * We don't need to do anything here. But we can atleast ensure
5456 * only one user opens the MPTCP_KERN_CTL_NAME control socket.
5458 if (OSCompareAndSwap(0, 1, &mptcp_kern_skt_inuse
))
5465 mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
,
5468 #pragma unused(kctlref, kcunit, unitinfo)
5469 if (OSCompareAndSwap(1, 0, &mptcp_kern_skt_inuse
)) {
5470 /* TBD needs to be locked if the size grows more than an int */
5471 bzero(&mptcp_advisory
, sizeof(mptcp_advisory
));
5480 mptcp_symptoms_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
5481 mbuf_t m
, int flags
)
5483 #pragma unused(kctlref, kcunit, unitinfo, flags)
5484 symptoms_advisory_t
*sa
= NULL
;
5486 if (mbuf_pkthdr_len(m
) < sizeof(*sa
)) {
5491 if (mbuf_len(m
) >= sizeof(*sa
))
5496 if (mptcp_advisory
.sa_nwk_status_int
!= sa
->sa_nwk_status_int
) {
5498 * we could use this notification to notify all mptcp pcbs
5499 * of the change in network status. But its difficult to
5500 * define if sending REMOVE_ADDR or MP_PRIO is appropriate
5501 * given that these are only soft indicators of the network
5502 * state. Leaving this as TBD for now.
5506 if (sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_NOCOMMENT
) {
5507 mptcplog((LOG_DEBUG
, "MPTCP Events: %s wifi %d,%d cell %d,%d\n",
5508 __func__
, sa
->sa_wifi_status
, mptcp_advisory
.sa_wifi_status
,
5509 sa
->sa_cell_status
, mptcp_advisory
.sa_cell_status
),
5510 MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
,
5513 if ((sa
->sa_wifi_status
&
5514 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
)) !=
5515 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
)) {
5516 mptcp_advisory
.sa_wifi_status
= sa
->sa_wifi_status
;
5519 if ((sa
->sa_cell_status
&
5520 (SYMPTOMS_ADVISORY_CELL_BAD
| SYMPTOMS_ADVISORY_CELL_OK
)) !=
5521 (SYMPTOMS_ADVISORY_CELL_BAD
| SYMPTOMS_ADVISORY_CELL_OK
)) {
5522 mptcp_advisory
.sa_cell_status
= sa
->sa_cell_status
;
5525 mptcplog((LOG_DEBUG
, "MPTCP Events: %s NOCOMMENT "
5526 "wifi %d cell %d\n", __func__
,
5527 mptcp_advisory
.sa_wifi_status
,
5528 mptcp_advisory
.sa_cell_status
),
5529 MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
5535 mptcp_control_register(void)
5537 /* Set up the advisory control socket */
5538 struct kern_ctl_reg mptcp_kern_ctl
;
5540 bzero(&mptcp_kern_ctl
, sizeof(mptcp_kern_ctl
));
5541 strlcpy(mptcp_kern_ctl
.ctl_name
, MPTCP_KERN_CTL_NAME
,
5542 sizeof(mptcp_kern_ctl
.ctl_name
));
5543 mptcp_kern_ctl
.ctl_connect
= mptcp_symptoms_ctl_connect
;
5544 mptcp_kern_ctl
.ctl_disconnect
= mptcp_symptoms_ctl_disconnect
;
5545 mptcp_kern_ctl
.ctl_send
= mptcp_symptoms_ctl_send
;
5546 mptcp_kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
;
5548 (void)ctl_register(&mptcp_kern_ctl
, &mptcp_kern_ctrl_ref
);
5552 mptcp_is_wifi_unusable(void)
5554 /* a false return val indicates there is no info or wifi is ok */
5555 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
);
5559 mptcp_is_cell_unusable(void)
5561 /* a false return val indicates there is no info or cell is ok */
5562 return (mptcp_advisory
.sa_cell_status
& SYMPTOMS_ADVISORY_CELL_BAD
);
5566 mptcp_use_symptoms_hints(struct mptsub
* best
, struct mptsub
*second_best
)
5568 struct mptsub
*cellsub
= NULL
;
5569 struct mptsub
*wifisub
= NULL
;
5570 struct mptsub
*wiredsub
= NULL
;
5572 VERIFY ((best
!= NULL
) && (second_best
!= NULL
));
5574 if (!mptcp_use_symptomsd
)
5577 if (!mptcp_kern_skt_inuse
)
5581 * There could be devices with more than one wifi interface or
5582 * more than one wired or cell interfaces.
5583 * TBD: SymptomsD is unavailable on such platforms as of now.
5584 * Try to prefer best when possible in general.
5585 * Also, SymptomsD sends notifications about wifi only when it
5588 if (best
->mpts_linktype
& MPTSL_WIFI
)
5590 else if (best
->mpts_linktype
& MPTSL_CELL
)
5592 else if (best
->mpts_linktype
& MPTSL_WIRED
)
5596 * On platforms with wired paths, don't use hints about wifi or cell.
5597 * Currently, SymptomsD is not available on platforms with wired paths.
5602 if ((wifisub
== NULL
) && (second_best
->mpts_linktype
& MPTSL_WIFI
))
5603 wifisub
= second_best
;
5605 if ((cellsub
== NULL
) && (second_best
->mpts_linktype
& MPTSL_CELL
))
5606 cellsub
= second_best
;
5608 if ((wiredsub
== NULL
) && (second_best
->mpts_linktype
& MPTSL_WIRED
))
5609 wiredsub
= second_best
;
5611 if ((wifisub
== best
) && mptcp_is_wifi_unusable()) {
5612 tcpstat
.tcps_mp_sel_symtomsd
++;
5613 if (mptcp_is_cell_unusable()) {
5614 mptcplog((LOG_DEBUG
, "MPTCP Sender: SymptomsD hint"
5615 " suggests both Wifi and Cell are bad. Wired %s.",
5616 (wiredsub
== NULL
) ? "none" : "present"),
5617 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5620 mptcplog((LOG_DEBUG
, "MPTCP Sender: SymptomsD hint"
5621 " suggests Wifi bad, Cell good. Wired %s.",
5622 (wiredsub
== NULL
) ? "none" : "present"),
5623 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5624 return ((wiredsub
!= NULL
) ? wiredsub
: cellsub
);
5628 if ((cellsub
== best
) && (mptcp_is_cell_unusable())) {
5629 tcpstat
.tcps_mp_sel_symtomsd
++;
5630 if (mptcp_is_wifi_unusable()) {
5631 mptcplog((LOG_DEBUG
, "MPTCP Sender: SymptomsD hint"
5632 " suggests both Cell and Wifi are bad. Wired %s.",
5633 (wiredsub
== NULL
) ? "none" : "present"),
5634 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5637 mptcplog((LOG_DEBUG
, "MPTCP Sender: SymptomsD hint"
5638 " suggests Cell bad, Wifi good. Wired %s.",
5639 (wiredsub
== NULL
) ? "none" : "present"),
5640 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5641 return ((wiredsub
!= NULL
) ? wiredsub
: wifisub
);
5645 /* little is known about the state of the network or wifi is good */
5649 /* If TFO data is succesfully acked, it must be dropped from the mptcp so */
5651 mptcp_drop_tfo_data(struct mptses
*mpte
, struct mptsub
*mpts
, int *wakeup
)
5653 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
5654 struct socket
*so
= mpts
->mpts_socket
;
5655 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
5656 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
5658 /* If data was sent with SYN, rewind state */
5659 if (tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) {
5660 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
5661 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
5663 u_int64_t mp_droplen
= mpts
->mpts_sndnxt
- mp_tp
->mpt_snduna
;
5664 unsigned int tcp_droplen
= tp
->snd_una
- tp
->iss
- 1;
5665 VERIFY(mp_droplen
<= (UINT_MAX
));
5666 VERIFY(mp_droplen
>= tcp_droplen
);
5668 if (mp_droplen
> tcp_droplen
) {
5669 /* handle partial TCP ack */
5670 mp_so
->so_flags1
|= SOF1_TFO_REWIND
;
5671 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
+ (mp_droplen
- tcp_droplen
);
5672 mpts
->mpts_sndnxt
= mp_tp
->mpt_sndnxt
;
5673 mp_droplen
= tcp_droplen
;
5675 /* all data on SYN was acked */
5676 mpts
->mpts_rel_seq
= 1;
5677 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
5678 mpts
->mpts_sndnxt
= mp_tp
->mpt_snduna
;
5680 mp_tp
->mpt_sndmax
-= tcp_droplen
;
5683 if (mp_droplen
!= 0) {
5684 VERIFY(mp_so
->so_snd
.sb_mb
!= NULL
);
5685 sbdrop(&mp_so
->so_snd
, (int)mp_droplen
);
5689 mptcplog((LOG_ERR
, "MPTCP Sender: %s mp_so 0x%llx cid %d "
5690 "TFO tcp len %d mptcp len %d\n", __func__
,
5691 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpts
->mpts_connid
,
5692 tcp_droplen
, mp_droplen
),
5693 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);