2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/locks.h>
30 #include <kern/policy_internal.h>
31 #include <kern/zalloc.h>
35 #include <sys/domain.h>
36 #include <sys/kdebug.h>
37 #include <sys/kern_control.h>
38 #include <sys/kernel.h>
40 #include <sys/mcache.h>
41 #include <sys/param.h>
43 #include <sys/protosw.h>
44 #include <sys/resourcevar.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/systm.h>
51 #include <net/content_filter.h>
53 #include <net/if_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/in_var.h>
57 #include <netinet/tcp.h>
58 #include <netinet/tcp_fsm.h>
59 #include <netinet/tcp_seq.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/mptcp_var.h>
62 #include <netinet/mptcp.h>
63 #include <netinet/mptcp_opt.h>
64 #include <netinet/mptcp_seq.h>
65 #include <netinet/mptcp_timer.h>
66 #include <libkern/crypto/sha1.h>
68 #include <netinet6/in6_pcb.h>
69 #include <netinet6/ip6protosw.h>
71 #include <dev/random/randomdev.h>
74 * Notes on MPTCP implementation.
76 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
77 * communication domain. The structure mtcbinfo describes the MPTCP instance
78 * of a Multipath protocol in that domain. It is used to keep track of all
79 * MPTCP PCB instances in the system, and is protected by the global lock
82 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
83 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
84 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
85 * allocated from the same memory block, and each structure has a pointer
86 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
87 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
88 * PCB (mppcb) as well as the MPTCP Session (mptses).
90 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
92 * A functioning MPTCP Session consists of one or more subflow sockets. Each
93 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
94 * represented by the mptsub structure. Because each subflow requires access
95 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
96 * subflow. This gets decremented prior to the subflow's destruction.
98 * To handle events (read, write, control) from the subflows, we do direct
99 * upcalls into the specific function.
101 * The whole MPTCP connection is protected by a single lock, the MPTCP socket's
102 * lock. Incoming data on a subflow also ends up taking this single lock. To
103 * achieve the latter, tcp_lock/unlock has been changed to rather use the lock
104 * of the MPTCP-socket.
106 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
107 * work is done by the MPTCP garbage collector which is invoked on demand by
108 * the PF_MULTIPATH garbage collector. This process will take place once all
109 * of the subflows have been destroyed.
112 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
113 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
115 static uint32_t mptcp_gc(struct mppcbinfo
*);
116 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
117 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
118 static int mptcp_subflow_sosend(struct socket
*, struct sockaddr
*,
119 struct uio
*, struct mbuf
*, struct mbuf
*, int);
120 static void mptcp_subflow_rupcall(struct socket
*, void *, int);
121 static void mptcp_subflow_input(struct mptses
*, struct mptsub
*);
122 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
123 static void mptcp_subflow_eupcall1(struct socket
*, void *, uint32_t);
124 static void mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
);
125 static void mptcp_drop_tfo_data(struct mptses
*, struct mptsub
*);
127 static void mptcp_subflow_abort(struct mptsub
*, int);
129 static void mptcp_send_dfin(struct socket
*so
);
132 * Possible return values for subflow event handlers. Note that success
133 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
134 * indicate errors or actions which require immediate attention; they will
135 * prevent the rest of the handlers from processing their respective events
136 * until the next round of events processing.
139 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
140 MPTS_EVRET_OK
= 2, /* OK */
141 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
142 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
145 static ev_ret_t
mptcp_subflow_events(struct mptses
*, struct mptsub
*, uint64_t *);
146 static ev_ret_t
mptcp_subflow_propagate_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
147 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
148 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
149 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
150 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
151 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
152 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
153 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
154 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
155 static ev_ret_t
mptcp_subflow_adaptive_rtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
156 static ev_ret_t
mptcp_subflow_adaptive_wtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
158 static const char *mptcp_evret2str(ev_ret_t
);
160 static void mptcp_do_sha1(mptcp_key_t
*, char *);
161 static void mptcp_init_local_parms(struct mptses
*);
163 static unsigned int mptsub_zone_size
; /* size of mptsub */
164 static struct zone
*mptsub_zone
; /* zone for mptsub */
166 static unsigned int mptopt_zone_size
; /* size of mptopt */
167 static struct zone
*mptopt_zone
; /* zone for mptopt */
169 static unsigned int mpt_subauth_entry_size
; /* size of subf auth entry */
170 static struct zone
*mpt_subauth_zone
; /* zone of subf auth entry */
172 struct mppcbinfo mtcbinfo
;
174 #define MPTCP_SUBFLOW_WRITELEN (8 * 1024) /* bytes to write each time */
175 #define MPTCP_SUBFLOW_READLEN (8 * 1024) /* bytes to read each time */
177 SYSCTL_DECL(_net_inet
);
179 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "MPTCP");
181 uint32_t mptcp_dbg_area
= 31; /* more noise if greater than 1 */
182 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, dbg_area
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
183 &mptcp_dbg_area
, 0, "MPTCP debug area");
185 uint32_t mptcp_dbg_level
= 1;
186 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, dbg_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
187 &mptcp_dbg_level
, 0, "MPTCP debug level");
189 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
190 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
193 static int mptcp_alternate_port
= 0;
194 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, alternate_port
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
195 &mptcp_alternate_port
, 0, "Set alternate port for MPTCP connections");
197 static struct protosw mptcp_subflow_protosw
;
198 static struct pr_usrreqs mptcp_subflow_usrreqs
;
200 static struct ip6protosw mptcp_subflow_protosw6
;
201 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
204 static uint8_t mptcp_create_subflows_scheduled
;
206 typedef struct mptcp_subflow_event_entry
{
207 uint64_t sofilt_hint_mask
;
208 ev_ret_t (*sofilt_hint_ev_hdlr
)(
211 uint64_t *p_mpsofilt_hint
,
215 static uint8_t mptcp_cellicon_is_set
;
216 static uint32_t mptcp_last_cellicon_set
;
217 #define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */
220 * XXX The order of the event handlers below is really
221 * really important. Think twice before changing it.
223 static mptsub_ev_entry_t mpsub_ev_entry_tbl
[] = {
225 .sofilt_hint_mask
= SO_FILT_HINT_MPCANTRCVMORE
,
226 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpcantrcvmore_ev
,
229 .sofilt_hint_mask
= SO_FILT_HINT_MPFAILOVER
,
230 .sofilt_hint_ev_hdlr
= mptcp_subflow_failover_ev
,
233 .sofilt_hint_mask
= SO_FILT_HINT_CONNRESET
,
234 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
237 .sofilt_hint_mask
= SO_FILT_HINT_MUSTRST
,
238 .sofilt_hint_ev_hdlr
= mptcp_subflow_mustrst_ev
,
241 .sofilt_hint_mask
= SO_FILT_HINT_CANTRCVMORE
,
242 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
245 .sofilt_hint_mask
= SO_FILT_HINT_TIMEOUT
,
246 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
249 .sofilt_hint_mask
= SO_FILT_HINT_NOSRCADDR
,
250 .sofilt_hint_ev_hdlr
= mptcp_subflow_nosrcaddr_ev
,
253 .sofilt_hint_mask
= SO_FILT_HINT_IFDENIED
,
254 .sofilt_hint_ev_hdlr
= mptcp_subflow_ifdenied_ev
,
257 .sofilt_hint_mask
= SO_FILT_HINT_CONNECTED
,
258 .sofilt_hint_ev_hdlr
= mptcp_subflow_connected_ev
,
261 .sofilt_hint_mask
= SO_FILT_HINT_MPSTATUS
,
262 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpstatus_ev
,
265 .sofilt_hint_mask
= SO_FILT_HINT_DISCONNECTED
,
266 .sofilt_hint_ev_hdlr
= mptcp_subflow_disconnected_ev
,
269 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_RTIMO
,
270 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_rtimo_ev
,
273 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_WTIMO
,
274 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_wtimo_ev
,
278 os_log_t mptcp_log_handle
;
281 * Protocol pr_init callback.
284 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
287 static int mptcp_initialized
= 0;
290 struct ip6protosw
*prp6
;
293 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
| PR_ATTACHED
)) == PR_ATTACHED
);
295 /* do this only once */
296 if (mptcp_initialized
) {
299 mptcp_initialized
= 1;
302 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
303 * we must be able to find IPPROTO_TCP entries for both.
305 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
307 bcopy(prp
, &mptcp_subflow_protosw
, sizeof(*prp
));
308 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
309 sizeof(mptcp_subflow_usrreqs
));
310 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
311 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
312 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
313 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
314 mptcp_subflow_usrreqs
.pru_sosend
= mptcp_subflow_sosend
;
315 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
317 * Socket filters shouldn't attach/detach to/from this protosw
318 * since pr_protosw is to be used instead, which points to the
319 * real protocol; if they do, it is a bug and we should panic.
321 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
322 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
323 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
324 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
327 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
328 IPPROTO_TCP
, SOCK_STREAM
);
329 VERIFY(prp6
!= NULL
);
330 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof(*prp6
));
331 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
332 sizeof(mptcp_subflow_usrreqs6
));
333 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
334 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
335 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
336 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
337 mptcp_subflow_usrreqs6
.pru_sosend
= mptcp_subflow_sosend
;
338 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
340 * Socket filters shouldn't attach/detach to/from this protosw
341 * since pr_protosw is to be used instead, which points to the
342 * real protocol; if they do, it is a bug and we should panic.
344 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
345 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
346 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
347 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
350 bzero(&mtcbinfo
, sizeof(mtcbinfo
));
351 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
352 mtcbinfo
.mppi_size
= sizeof(struct mpp_mtp
);
353 if ((mtcbinfo
.mppi_zone
= zinit(mtcbinfo
.mppi_size
,
354 1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) {
355 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
);
358 zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
);
359 zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
);
361 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
362 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
363 mtcbinfo
.mppi_lock_grp_attr
);
364 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
365 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
366 mtcbinfo
.mppi_lock_attr
);
368 mtcbinfo
.mppi_gc
= mptcp_gc
;
369 mtcbinfo
.mppi_timer
= mptcp_timer
;
371 /* attach to MP domain for garbage collection to take place */
372 mp_pcbinfo_attach(&mtcbinfo
);
374 mptsub_zone_size
= sizeof(struct mptsub
);
375 if ((mptsub_zone
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
,
376 8192, "mptsub")) == NULL
) {
377 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
);
380 zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
);
381 zone_change(mptsub_zone
, Z_EXPAND
, TRUE
);
383 mptopt_zone_size
= sizeof(struct mptopt
);
384 if ((mptopt_zone
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
,
385 1024, "mptopt")) == NULL
) {
386 panic("%s: unable to allocate MPTCP option zone\n", __func__
);
389 zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
);
390 zone_change(mptopt_zone
, Z_EXPAND
, TRUE
);
392 mpt_subauth_entry_size
= sizeof(struct mptcp_subf_auth_entry
);
393 if ((mpt_subauth_zone
= zinit(mpt_subauth_entry_size
,
394 1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) {
395 panic("%s: unable to allocate MPTCP address auth zone \n",
399 zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
);
400 zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
);
402 mptcp_last_cellicon_set
= tcp_now
;
404 mptcp_log_handle
= os_log_create("com.apple.xnu.net.mptcp", "mptcp");
408 mptcp_get_statsindex(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
410 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
415 mptcplog((LOG_ERR
, "%s: no ifp on subflow\n", __func__
),
416 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
420 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
421 if (stats
[i
].ifindex
== IFSCOPE_NONE
) {
428 if (stats
[i
].ifindex
== ifp
->if_index
) {
435 stats
[index
].ifindex
= ifp
->if_index
;
436 if (stats
[index
].is_expensive
== 0) {
437 stats
[index
].is_expensive
= IFNET_IS_CELLULAR(ifp
);
445 mptcpstats_inc_switch(struct mptses
*mpte
, const struct mptsub
*mpts
)
449 tcpstat
.tcps_mp_switches
++;
450 mpte
->mpte_subflow_switches
++;
452 index
= mptcp_get_statsindex(mpte
->mpte_itfstats
, mpts
);
455 mpte
->mpte_itfstats
[index
].switches
++;
460 * Flushes all recorded socket options from an MP socket.
463 mptcp_flush_sopts(struct mptses
*mpte
)
465 struct mptopt
*mpo
, *tmpo
;
467 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
468 mptcp_sopt_remove(mpte
, mpo
);
469 mptcp_sopt_free(mpo
);
471 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
475 * Create an MPTCP session, called as a result of opening a MPTCP socket.
478 mptcp_sescreate(struct mppcb
*mpp
)
480 struct mppcbinfo
*mppi
;
485 mppi
= mpp
->mpp_pcbinfo
;
486 VERIFY(mppi
!= NULL
);
488 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
489 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
491 /* MPTCP Multipath PCB Extension */
492 bzero(mpte
, sizeof(*mpte
));
493 VERIFY(mpp
->mpp_pcbe
== NULL
);
494 mpp
->mpp_pcbe
= mpte
;
495 mpte
->mpte_mppcb
= mpp
;
496 mpte
->mpte_mptcb
= mp_tp
;
498 TAILQ_INIT(&mpte
->mpte_sopts
);
499 TAILQ_INIT(&mpte
->mpte_subflows
);
500 mpte
->mpte_associd
= SAE_ASSOCID_ANY
;
501 mpte
->mpte_connid_last
= SAE_CONNID_ANY
;
503 mpte
->mpte_itfinfo
= &mpte
->_mpte_itfinfo
[0];
504 mpte
->mpte_itfinfo_size
= MPTE_ITFINFO_SIZE
;
506 if (mptcp_alternate_port
) {
507 mpte
->mpte_alternate_port
= htons(mptcp_alternate_port
);
510 /* MPTCP Protocol Control Block */
511 bzero(mp_tp
, sizeof(*mp_tp
));
512 mp_tp
->mpt_mpte
= mpte
;
513 mp_tp
->mpt_state
= MPTCPS_CLOSED
;
515 DTRACE_MPTCP1(session__create
, struct mppcb
*, mpp
);
521 mptcpstats_get_bytes(struct mptses
*mpte
, boolean_t initial_cell
,
522 uint64_t *cellbytes
, uint64_t *allbytes
)
524 int64_t mycellbytes
= 0;
525 uint64_t myallbytes
= 0;
528 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
529 if (mpte
->mpte_itfstats
[i
].is_expensive
) {
530 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
531 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
534 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
535 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
539 mycellbytes
-= mpte
->mpte_init_txbytes
;
540 mycellbytes
-= mpte
->mpte_init_txbytes
;
543 if (mycellbytes
< 0) {
544 mptcplog((LOG_ERR
, "%s cellbytes is %d\n", __func__
, mycellbytes
),
545 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
549 *cellbytes
= mycellbytes
;
550 *allbytes
= myallbytes
;
555 mptcpstats_session_wrapup(struct mptses
*mpte
)
557 boolean_t cell
= mpte
->mpte_initial_cell
;
559 switch (mpte
->mpte_svctype
) {
560 case MPTCP_SVCTYPE_HANDOVER
:
561 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
562 tcpstat
.tcps_mptcp_fp_handover_attempt
++;
564 if (cell
&& mpte
->mpte_handshake_success
) {
565 tcpstat
.tcps_mptcp_fp_handover_success_cell
++;
567 if (mpte
->mpte_used_wifi
) {
568 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
570 } else if (mpte
->mpte_handshake_success
) {
571 tcpstat
.tcps_mptcp_fp_handover_success_wifi
++;
573 if (mpte
->mpte_used_cell
) {
574 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
578 tcpstat
.tcps_mptcp_handover_attempt
++;
580 if (cell
&& mpte
->mpte_handshake_success
) {
581 tcpstat
.tcps_mptcp_handover_success_cell
++;
583 if (mpte
->mpte_used_wifi
) {
584 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
586 } else if (mpte
->mpte_handshake_success
) {
587 tcpstat
.tcps_mptcp_handover_success_wifi
++;
589 if (mpte
->mpte_used_cell
) {
590 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
595 if (mpte
->mpte_handshake_success
) {
599 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
601 tcpstat
.tcps_mptcp_handover_cell_bytes
+= cellbytes
;
602 tcpstat
.tcps_mptcp_handover_all_bytes
+= allbytes
;
605 case MPTCP_SVCTYPE_INTERACTIVE
:
606 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
607 tcpstat
.tcps_mptcp_fp_interactive_attempt
++;
609 if (mpte
->mpte_handshake_success
) {
610 tcpstat
.tcps_mptcp_fp_interactive_success
++;
612 if (!cell
&& mpte
->mpte_used_cell
) {
613 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
617 tcpstat
.tcps_mptcp_interactive_attempt
++;
619 if (mpte
->mpte_handshake_success
) {
620 tcpstat
.tcps_mptcp_interactive_success
++;
622 if (!cell
&& mpte
->mpte_used_cell
) {
623 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
628 if (mpte
->mpte_handshake_success
) {
632 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
634 tcpstat
.tcps_mptcp_interactive_cell_bytes
+= cellbytes
;
635 tcpstat
.tcps_mptcp_interactive_all_bytes
+= allbytes
;
638 case MPTCP_SVCTYPE_AGGREGATE
:
639 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
640 tcpstat
.tcps_mptcp_fp_aggregate_attempt
++;
642 if (mpte
->mpte_handshake_success
) {
643 tcpstat
.tcps_mptcp_fp_aggregate_success
++;
646 tcpstat
.tcps_mptcp_aggregate_attempt
++;
648 if (mpte
->mpte_handshake_success
) {
649 tcpstat
.tcps_mptcp_aggregate_success
++;
653 if (mpte
->mpte_handshake_success
) {
657 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
659 tcpstat
.tcps_mptcp_aggregate_cell_bytes
+= cellbytes
;
660 tcpstat
.tcps_mptcp_aggregate_all_bytes
+= allbytes
;
665 if (cell
&& mpte
->mpte_handshake_success
&& mpte
->mpte_used_wifi
) {
666 tcpstat
.tcps_mptcp_back_to_wifi
++;
669 if (mpte
->mpte_triggered_cell
) {
670 tcpstat
.tcps_mptcp_triggered_cell
++;
675 * Destroy an MPTCP session.
678 mptcp_session_destroy(struct mptses
*mpte
)
682 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
684 mp_tp
= mpte
->mpte_mptcb
;
685 VERIFY(mp_tp
!= NULL
);
687 mptcpstats_session_wrapup(mpte
);
689 mptcp_unset_cellicon();
692 * MPTCP Multipath PCB Extension section
694 mptcp_flush_sopts(mpte
);
695 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
697 if (mpte
->mpte_itfinfo_size
> MPTE_ITFINFO_SIZE
) {
698 _FREE(mpte
->mpte_itfinfo
, M_TEMP
);
701 mpte
->mpte_itfinfo
= NULL
;
703 m_freem_list(mpte
->mpte_reinjectq
);
706 * MPTCP Protocol Control Block section
708 DTRACE_MPTCP2(session__destroy
, struct mptses
*, mpte
,
709 struct mptcb
*, mp_tp
);
713 mptcp_ok_to_create_subflows(struct mptcb
*mp_tp
)
715 return mp_tp
->mpt_state
>= MPTCPS_ESTABLISHED
&&
716 mp_tp
->mpt_state
< MPTCPS_FIN_WAIT_1
&&
717 !(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
);
721 mptcp_synthesize_nat64(struct in6_addr
*addr
, uint32_t len
, struct in_addr
*addrv4
)
723 static const struct in6_addr well_known_prefix
= {
724 .__u6_addr
.__u6_addr8
= {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
725 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
726 0x00, 0x00, 0x00, 0x00},
728 char buf
[MAX_IPv6_STR_LEN
];
729 char *ptrv4
= (char *)addrv4
;
730 char *ptr
= (char *)addr
;
732 if (IN_ZERONET(ntohl(addrv4
->s_addr
)) || // 0.0.0.0/8 Source hosts on local network
733 IN_LOOPBACK(ntohl(addrv4
->s_addr
)) || // 127.0.0.0/8 Loopback
734 IN_LINKLOCAL(ntohl(addrv4
->s_addr
)) || // 169.254.0.0/16 Link Local
735 IN_DS_LITE(ntohl(addrv4
->s_addr
)) || // 192.0.0.0/29 DS-Lite
736 IN_6TO4_RELAY_ANYCAST(ntohl(addrv4
->s_addr
)) || // 192.88.99.0/24 6to4 Relay Anycast
737 IN_MULTICAST(ntohl(addrv4
->s_addr
)) || // 224.0.0.0/4 Multicast
738 INADDR_BROADCAST
== addrv4
->s_addr
) { // 255.255.255.255/32 Limited Broadcast
742 /* Check for the well-known prefix */
743 if (len
== NAT64_PREFIX_LEN_96
&&
744 IN6_ARE_ADDR_EQUAL(addr
, &well_known_prefix
)) {
745 if (IN_PRIVATE(ntohl(addrv4
->s_addr
)) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use
746 IN_SHARED_ADDRESS_SPACE(ntohl(addrv4
->s_addr
))) { // 100.64.0.0/10 Shared Address Space
752 case NAT64_PREFIX_LEN_96
:
753 memcpy(ptr
+ 12, ptrv4
, 4);
755 case NAT64_PREFIX_LEN_64
:
756 memcpy(ptr
+ 9, ptrv4
, 4);
758 case NAT64_PREFIX_LEN_56
:
759 memcpy(ptr
+ 7, ptrv4
, 1);
760 memcpy(ptr
+ 9, ptrv4
+ 1, 3);
762 case NAT64_PREFIX_LEN_48
:
763 memcpy(ptr
+ 6, ptrv4
, 2);
764 memcpy(ptr
+ 9, ptrv4
+ 2, 2);
766 case NAT64_PREFIX_LEN_40
:
767 memcpy(ptr
+ 5, ptrv4
, 3);
768 memcpy(ptr
+ 9, ptrv4
+ 3, 1);
770 case NAT64_PREFIX_LEN_32
:
771 memcpy(ptr
+ 4, ptrv4
, 4);
774 panic("NAT64-prefix len is wrong: %u\n", len
);
777 os_log_info(mptcp_log_handle
, "%s: nat64prefix-len %u synthesized %s\n",
779 inet_ntop(AF_INET6
, (void *)addr
, buf
, sizeof(buf
)));
785 mptcp_trigger_cell_bringup(struct mptses
*mpte
)
787 struct socket
*mp_so
= mptetoso(mpte
);
789 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
790 uuid_string_t uuidstr
;
794 err
= necp_client_assert_bb_radio_manager(mpsotomppcb(mp_so
)->necp_client_uuid
,
799 mpte
->mpte_triggered_cell
= 1;
802 uuid_unparse_upper(mpsotomppcb(mp_so
)->necp_client_uuid
, uuidstr
);
803 os_log_info(mptcp_log_handle
, "%s asked irat to bringup cell for uuid %s, err %d\n",
804 __func__
, uuidstr
, err
);
806 os_log_info(mptcp_log_handle
, "%s UUID is already null\n", __func__
);
812 mptcp_check_subflows_and_add(struct mptses
*mpte
)
814 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
815 boolean_t cellular_viable
= FALSE
;
816 boolean_t want_cellular
= TRUE
;
819 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
823 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
824 struct mpt_itf_info
*info
;
830 info
= &mpte
->mpte_itfinfo
[i
];
832 if (info
->no_mptcp_support
) {
836 ifindex
= info
->ifindex
;
837 if (ifindex
== IFSCOPE_NONE
) {
841 ifnet_head_lock_shared();
842 ifp
= ifindex2ifnet
[ifindex
];
849 if (IFNET_IS_CELLULAR(ifp
)) {
850 cellular_viable
= TRUE
;
853 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
854 const struct ifnet
*subifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
856 if (subifp
== NULL
) {
861 * In Handover mode, only create cell subflow if
862 * 1. Wi-Fi Assist is active
863 * 2. Symptoms marked WiFi as weak
864 * 3. We are experiencing RTOs or we are not sending data.
866 * This covers the scenario, where:
867 * 1. We send and get retransmission timeouts (thus,
868 * we confirmed that WiFi is indeed bad).
869 * 2. We are not sending and the server tries to send.
870 * Establshing a cell-subflow gives the server a
871 * chance to send us some data over cell if WiFi
872 * is dead. We establish the subflow with the
873 * backup-bit set, so the server is not allowed to
874 * send on this subflow as long as WiFi is providing
877 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
&&
878 !IFNET_IS_CELLULAR(subifp
) &&
879 !(mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
| MPTSF_CLOSE_REQD
)) &&
880 (mptcp_is_wifi_unusable(mpte
) == 0 ||
881 (sototcpcb(mpts
->mpts_socket
)->t_rxtshift
< mptcp_fail_thresh
* 2 &&
882 ((mpte
->mpte_flags
& MPTE_FIRSTPARTY
) || mptetoso(mpte
)->so_snd
.sb_cc
)))) {
883 os_log_debug(mptcp_log_handle
, "%s handover, wifi state %d rxt %u first-party %u sb_cc %u ifindex %u this %u\n",
884 __func__
, mptcp_is_wifi_unusable(mpte
),
885 sototcpcb(mpts
->mpts_socket
)->t_rxtshift
,
886 !!(mpte
->mpte_flags
& MPTE_FIRSTPARTY
),
887 mptetoso(mpte
)->so_snd
.sb_cc
,
888 ifindex
, subifp
->if_index
);
891 /* We found a proper subflow on WiFi - no need for cell */
892 want_cellular
= FALSE
;
895 os_log_debug(mptcp_log_handle
, "%s svc %u cell %u flags %#x unusable %d rtx %u first %u sbcc %u\n",
896 __func__
, mpte
->mpte_svctype
, IFNET_IS_CELLULAR(subifp
), mpts
->mpts_flags
,
897 mptcp_is_wifi_unusable(mpte
), sototcpcb(mpts
->mpts_socket
)->t_rxtshift
,
898 !!(mpte
->mpte_flags
& MPTE_FIRSTPARTY
), mptetoso(mpte
)->so_snd
.sb_cc
);
901 if (subifp
->if_index
== ifindex
&&
902 !(mpts
->mpts_socket
->so_state
& SS_ISDISCONNECTED
) &&
903 sototcpcb(mpts
->mpts_socket
)->t_state
!= TCPS_CLOSED
) {
905 * We found a subflow on this interface.
906 * No need to create a new one.
913 if (!found
&& !(mpte
->mpte_flags
& MPTE_FIRSTPARTY
) &&
914 !(mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
) &&
915 mptcp_developer_mode
== 0) {
916 mptcp_ask_symptoms(mpte
);
921 struct sockaddr
*dst
= &mpte
->mpte_dst
;
922 struct sockaddr_in6 nat64pre
;
924 if (mpte
->mpte_dst
.sa_family
== AF_INET
&&
925 !info
->has_v4_conn
&& info
->has_nat64_conn
) {
926 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
929 bzero(&nat64pre
, sizeof(struct sockaddr_in6
));
931 error
= ifnet_get_nat64prefix(ifp
, nat64prefixes
);
933 os_log_error(mptcp_log_handle
, "%s: no NAT64-prefix on itf %s, error %d\n",
934 __func__
, ifp
->if_name
, error
);
938 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
939 if (nat64prefixes
[j
].prefix_len
!= 0) {
944 VERIFY(j
< NAT64_MAX_NUM_PREFIXES
);
946 error
= mptcp_synthesize_nat64(&nat64prefixes
[j
].ipv6_prefix
,
947 nat64prefixes
[j
].prefix_len
,
948 &mpte
->__mpte_dst_v4
.sin_addr
);
950 os_log_info(mptcp_log_handle
, "%s: cannot synthesize this addr\n",
955 memcpy(&nat64pre
.sin6_addr
,
956 &nat64prefixes
[j
].ipv6_prefix
,
957 sizeof(nat64pre
.sin6_addr
));
958 nat64pre
.sin6_len
= sizeof(struct sockaddr_in6
);
959 nat64pre
.sin6_family
= AF_INET6
;
960 nat64pre
.sin6_port
= mpte
->__mpte_dst_v6
.sin6_port
;
961 nat64pre
.sin6_flowinfo
= 0;
962 nat64pre
.sin6_scope_id
= 0;
964 dst
= (struct sockaddr
*)&nat64pre
;
967 /* Initial subflow started on a NAT64'd address? */
968 if (mpte
->mpte_dst
.sa_family
== AF_INET6
&&
969 mpte
->mpte_dst_v4_nat64
.sin_family
== AF_INET
) {
970 dst
= (struct sockaddr
*)&mpte
->mpte_dst_v4_nat64
;
973 if (dst
->sa_family
== AF_INET
&& !info
->has_v4_conn
) {
976 if (dst
->sa_family
== AF_INET6
&& !info
->has_v6_conn
) {
980 mptcp_subflow_add(mpte
, NULL
, dst
, ifindex
, NULL
);
984 if (!cellular_viable
&& want_cellular
) {
985 /* Trigger Cell Bringup */
986 mptcp_trigger_cell_bringup(mpte
);
991 * Based on the MPTCP Service-type and the state of the subflows, we
992 * will destroy subflows here.
995 mptcp_check_subflows_and_remove(struct mptses
*mpte
)
997 struct mptsub
*mpts
, *tmpts
;
998 int found_working_subflow
= 0, removed_some
= 0;
999 int wifi_unusable
= mptcp_is_wifi_unusable(mpte
);
1001 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
) {
1006 * Look for a subflow that is on a non-cellular interface
1007 * and actually works (aka, no retransmission timeout).
1009 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1010 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1014 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
)) {
1018 so
= mpts
->mpts_socket
;
1021 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
) ||
1022 tp
->t_state
!= TCPS_ESTABLISHED
) {
1026 /* Is this subflow in good condition? */
1027 if (tp
->t_rxtshift
== 0) {
1028 found_working_subflow
= 1;
1031 /* Or WiFi is fine */
1032 if (!wifi_unusable
) {
1033 found_working_subflow
= 1;
1038 * Couldn't find a working subflow, let's not remove those on a cellular
1041 if (!found_working_subflow
) {
1045 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1046 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1048 /* Only remove cellular subflows */
1049 if (ifp
== NULL
|| !IFNET_IS_CELLULAR(ifp
)) {
1053 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1058 mptcp_unset_cellicon();
1063 mptcp_remove_subflows(struct mptses
*mpte
)
1065 struct mptsub
*mpts
, *tmpts
;
1067 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1068 if (mpts
->mpts_flags
& MPTSF_CLOSE_REQD
) {
1069 mpts
->mpts_flags
&= ~MPTSF_CLOSE_REQD
;
1071 soevent(mpts
->mpts_socket
,
1072 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
1078 mptcp_create_subflows(__unused
void *arg
)
1083 * Start with clearing, because we might be processing connections
1084 * while a new event comes in.
1086 if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled
)) {
1087 mptcplog((LOG_ERR
, "%s: bit was already cleared!\n", __func__
),
1088 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1091 /* Iterate over all MPTCP connections */
1093 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
1095 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
1096 struct mptses
*mpte
;
1097 struct socket
*mp_so
;
1099 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1105 mpp
->mpp_flags
&= ~MPP_CREATE_SUBFLOWS
;
1107 mpte
= mpp
->mpp_pcbe
;
1108 mp_so
= mpp
->mpp_socket
;
1110 VERIFY(mp_so
->so_usecount
> 0);
1112 mptcp_check_subflows_and_add(mpte
);
1113 mptcp_remove_subflows(mpte
);
1115 mp_so
->so_usecount
--; /* See mptcp_sched_create_subflows */
1119 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
1123 * We need this because we are coming from an NECP-event. This event gets posted
1124 * while holding NECP-locks. The creation of the subflow however leads us back
1125 * into NECP (e.g., to add the necp_cb and also from tcp_connect).
1126 * So, we would deadlock there as we already hold the NECP-lock.
1128 * So, let's schedule this separately. It also gives NECP the chance to make
1129 * progress, without having to wait for MPTCP to finish its subflow creation.
1132 mptcp_sched_create_subflows(struct mptses
*mpte
)
1134 struct mppcb
*mpp
= mpte
->mpte_mppcb
;
1135 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1136 struct socket
*mp_so
= mpp
->mpp_socket
;
1138 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
1139 mptcplog((LOG_DEBUG
, "%s: not a good time for subflows, state %u flags %#x",
1140 __func__
, mp_tp
->mpt_state
, mp_tp
->mpt_flags
),
1141 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
1145 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1146 mp_so
->so_usecount
++; /* To prevent it from being free'd in-between */
1147 mpp
->mpp_flags
|= MPP_CREATE_SUBFLOWS
;
1150 if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled
)) {
1154 /* Do the call in 100ms to allow NECP to schedule it on all sockets */
1155 timeout(mptcp_create_subflows
, NULL
, hz
/ 10);
1159 * Allocate an MPTCP socket option structure.
1162 mptcp_sopt_alloc(int how
)
1166 mpo
= (how
== M_WAITOK
) ? zalloc(mptopt_zone
) :
1167 zalloc_noblock(mptopt_zone
);
1169 bzero(mpo
, mptopt_zone_size
);
1176 * Free an MPTCP socket option structure.
1179 mptcp_sopt_free(struct mptopt
*mpo
)
1181 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
1183 zfree(mptopt_zone
, mpo
);
1187 * Add a socket option to the MPTCP socket option list.
1190 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
1192 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1193 mpo
->mpo_flags
|= MPOF_ATTACHED
;
1194 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1198 * Remove a socket option from the MPTCP socket option list.
1201 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
1203 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1204 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1205 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
1206 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1210 * Search for an existing <sopt_level,sopt_name> socket option.
1213 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
1217 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1219 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
1220 if (mpo
->mpo_level
== sopt
->sopt_level
&&
1221 mpo
->mpo_name
== sopt
->sopt_name
) {
1229 * Allocate a MPTCP subflow structure.
1231 static struct mptsub
*
1232 mptcp_subflow_alloc(void)
1234 struct mptsub
*mpts
= zalloc(mptsub_zone
);
1240 bzero(mpts
, mptsub_zone_size
);
1245 * Deallocate a subflow structure, called when all of the references held
1246 * on it have been released. This implies that the subflow has been deleted.
1249 mptcp_subflow_free(struct mptsub
*mpts
)
1251 VERIFY(mpts
->mpts_refcnt
== 0);
1252 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
1253 VERIFY(mpts
->mpts_mpte
== NULL
);
1254 VERIFY(mpts
->mpts_socket
== NULL
);
1256 if (mpts
->mpts_src
!= NULL
) {
1257 FREE(mpts
->mpts_src
, M_SONAME
);
1258 mpts
->mpts_src
= NULL
;
1261 zfree(mptsub_zone
, mpts
);
1265 mptcp_subflow_addref(struct mptsub
*mpts
)
1267 if (++mpts
->mpts_refcnt
== 0) {
1268 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
1274 mptcp_subflow_remref(struct mptsub
*mpts
)
1276 if (mpts
->mpts_refcnt
== 0) {
1277 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
1280 if (--mpts
->mpts_refcnt
> 0) {
1284 /* callee will unlock and destroy lock */
1285 mptcp_subflow_free(mpts
);
1289 mptcp_subflow_attach(struct mptses
*mpte
, struct mptsub
*mpts
, struct socket
*so
)
1291 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1292 struct tcpcb
*tp
= sototcpcb(so
);
1295 * From this moment on, the subflow is linked to the MPTCP-connection.
1296 * Locking,... happens now at the MPTCP-layer
1298 tp
->t_mptcb
= mpte
->mpte_mptcb
;
1299 so
->so_flags
|= SOF_MP_SUBFLOW
;
1300 mp_so
->so_usecount
++;
1303 * Insert the subflow into the list, and associate the MPTCP PCB
1304 * as well as the the subflow socket. From this point on, removing
1305 * the subflow needs to be done via mptcp_subflow_del().
1307 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1308 mpte
->mpte_numflows
++;
1310 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1311 mpts
->mpts_mpte
= mpte
;
1312 mpts
->mpts_socket
= so
;
1314 mptcp_subflow_addref(mpts
); /* for being in MPTCP subflow list */
1315 mptcp_subflow_addref(mpts
); /* for subflow socket */
1319 mptcp_subflow_necp_cb(void *handle
, __unused
int action
,
1320 __unused
uint32_t interface_index
,
1321 uint32_t necp_flags
, bool *viable
)
1323 boolean_t low_power
= !!(necp_flags
& NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER
);
1324 struct inpcb
*inp
= (struct inpcb
*)handle
;
1325 struct socket
*so
= inp
->inp_socket
;
1326 struct mptsub
*mpts
;
1327 struct mptses
*mpte
;
1330 action
= NECP_CLIENT_CBACTION_NONVIABLE
;
1333 if (action
!= NECP_CLIENT_CBACTION_NONVIABLE
) {
1338 * The socket is being garbage-collected. There is nothing to be done
1341 if (so
->so_usecount
== 0) {
1347 /* Check again after we acquired the lock. */
1348 if (so
->so_usecount
== 0) {
1352 mpte
= tptomptp(sototcpcb(so
))->mpt_mpte
;
1353 mpts
= sototcpcb(so
)->t_mpsub
;
1355 os_log_debug(mptcp_log_handle
, "%s Subflow on itf %u became non-viable, power %u",
1356 __func__
, mpts
->mpts_ifscope
, low_power
);
1358 mpts
->mpts_flags
|= MPTSF_CLOSE_REQD
;
1360 mptcp_sched_create_subflows(mpte
);
1362 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
&& viable
!= NULL
) {
1367 socket_unlock(so
, 1);
1371 * Create an MPTCP subflow socket.
1374 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
1377 lck_mtx_t
*subflow_mtx
;
1378 struct mptopt smpo
, *mpo
, *tmpo
;
1380 struct socket
*mp_so
;
1384 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1385 mp_so
= mptetoso(mpte
);
1387 p
= proc_find(mp_so
->last_pid
);
1388 if (p
== PROC_NULL
) {
1389 mptcplog((LOG_ERR
, "%s: Couldn't find proc for pid %u\n", __func__
, mp_so
->last_pid
),
1390 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1396 * Create the subflow socket (multipath subflow, non-blocking.)
1398 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
1399 * socket; it will be cleared when the socket is peeled off or closed.
1400 * It also indicates to the underlying TCP to handle MPTCP options.
1401 * A multipath subflow socket implies SS_NOFDREF state.
1405 * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes
1406 * the ipi-lock. We cannot hold the socket-lock at that point.
1409 error
= socreate_internal(dom
, so
, SOCK_STREAM
, IPPROTO_TCP
, p
,
1410 SOCF_ASYNC
, PROC_NULL
);
1413 mptcplog((LOG_ERR
, "%s: subflow socreate mp_so 0x%llx unable to create subflow socket error %d\n",
1414 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), error
),
1415 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1419 mptcp_subflow_free(mpts
);
1424 * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because
1425 * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock.
1426 * Which is why we also need to get the lock with pr_getlock, as after
1427 * setting the flag, socket_unlock will work on the MPTCP-level lock.
1429 subflow_mtx
= ((*so
)->so_proto
->pr_getlock
)(*so
, 0);
1430 lck_mtx_lock(subflow_mtx
);
1433 * Must be the first thing we do, to make sure all pointers for this
1436 mptcp_subflow_attach(mpte
, mpts
, *so
);
1439 * A multipath subflow socket is used internally in the kernel,
1440 * therefore it does not have a file desciptor associated by
1443 (*so
)->so_state
|= SS_NOFDREF
;
1445 lck_mtx_unlock(subflow_mtx
);
1447 /* prevent the socket buffers from being compressed */
1448 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
1449 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
1451 /* Inherit preconnect and TFO data flags */
1452 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1453 (*so
)->so_flags1
|= SOF1_PRECONNECT_DATA
;
1455 if (mp_so
->so_flags1
& SOF1_DATA_IDEMPOTENT
) {
1456 (*so
)->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
1459 /* Inherit uuid and create the related flow. */
1460 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1461 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1463 sotoinpcb(*so
)->necp_cb
= mptcp_subflow_necp_cb
;
1466 * A note on the unlock: With MPTCP, we do multiple times a
1467 * necp_client_register_socket_flow. This is problematic,
1468 * because now the lock-ordering guarantee (first necp-locks,
1469 * then socket-locks) is no more respected. So, we need to
1473 error
= necp_client_register_socket_flow(mp_so
->last_pid
,
1474 mpsotomppcb(mp_so
)->necp_client_uuid
, sotoinpcb(*so
));
1481 /* Possible state-change during the unlock above */
1482 if (mp_tp
->mpt_state
>= MPTCPS_TIME_WAIT
||
1483 (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
)) {
1487 uuid_copy(sotoinpcb(*so
)->necp_client_uuid
, mpsotomppcb(mp_so
)->necp_client_uuid
);
1489 mptcplog((LOG_NOTICE
, "%s: uuid is not set!\n"),
1490 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1493 /* inherit the other socket options */
1494 bzero(&smpo
, sizeof(smpo
));
1495 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1496 smpo
.mpo_level
= SOL_SOCKET
;
1497 smpo
.mpo_intval
= 1;
1499 /* disable SIGPIPE */
1500 smpo
.mpo_name
= SO_NOSIGPIPE
;
1501 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1505 /* find out if the subflow's source address goes away */
1506 smpo
.mpo_name
= SO_NOADDRERR
;
1507 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1511 /* enable keepalive */
1512 smpo
.mpo_name
= SO_KEEPALIVE
;
1513 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1517 smpo
.mpo_level
= IPPROTO_TCP
;
1518 smpo
.mpo_intval
= mptcp_subflow_keeptime
;
1519 smpo
.mpo_name
= TCP_KEEPALIVE
;
1520 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1524 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
) {
1526 * On secondary subflows we might need to set the cell-fallback
1527 * flag (see conditions in mptcp_subflow_sosetopt).
1529 smpo
.mpo_level
= SOL_SOCKET
;
1530 smpo
.mpo_name
= SO_MARK_CELLFALLBACK
;
1531 smpo
.mpo_intval
= 1;
1532 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1537 /* replay setsockopt(2) on the subflow sockets for eligible options */
1538 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
1541 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1546 * Skip those that are handled internally; these options
1547 * should not have been recorded and marked with the
1548 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1550 if (mpo
->mpo_level
== SOL_SOCKET
&&
1551 (mpo
->mpo_name
== SO_NOSIGPIPE
||
1552 mpo
->mpo_name
== SO_NOADDRERR
||
1553 mpo
->mpo_name
== SO_KEEPALIVE
)) {
1557 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
1558 if (mptcp_subflow_sosetopt(mpte
, mpts
, mpo
) != 0 && interim
) {
1559 mptcplog((LOG_ERR
, "%s: subflow socreate mp_so 0x%llx"
1560 " sopt %s val %d interim record removed\n", __func__
,
1561 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1562 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
1564 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1565 mptcp_sopt_remove(mpte
, mpo
);
1566 mptcp_sopt_free(mpo
);
1572 * We need to receive everything that the subflow socket has,
1573 * so use a customized socket receive function. We will undo
1574 * this when the socket is peeled off or closed.
1578 (*so
)->so_proto
= &mptcp_subflow_protosw
;
1582 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
1592 DTRACE_MPTCP3(subflow__create
, struct mptses
*, mpte
,
1593 int, dom
, int, error
);
1598 mptcp_subflow_abort(mpts
, error
);
1602 mptcplog((LOG_ERR
, "%s: subflow socreate failed with error %d\n",
1603 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1609 * Close an MPTCP subflow socket.
1611 * Note that this may be called on an embryonic subflow, and the only
1612 * thing that is guaranteed valid is the protocol-user request.
1615 mptcp_subflow_soclose(struct mptsub
*mpts
)
1617 struct socket
*so
= mpts
->mpts_socket
;
1619 if (mpts
->mpts_flags
& MPTSF_CLOSED
) {
1624 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1625 VERIFY((so
->so_state
& (SS_NBIO
| SS_NOFDREF
)) == (SS_NBIO
| SS_NOFDREF
));
1627 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
1628 struct socket
*, so
,
1629 struct sockbuf
*, &so
->so_rcv
,
1630 struct sockbuf
*, &so
->so_snd
,
1631 struct mptses
*, mpts
->mpts_mpte
);
1633 mpts
->mpts_flags
|= MPTSF_CLOSED
;
1635 if (so
->so_retaincnt
== 0) {
1640 VERIFY(so
->so_usecount
> 0);
1648 * Connect an MPTCP subflow socket.
1650 * Note that in the pending connect case, the subflow socket may have been
1651 * bound to an interface and/or a source IP address which may no longer be
1652 * around by the time this routine is called; in that case the connect attempt
1653 * will most likely fail.
1656 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
1658 char dbuf
[MAX_IPv6_STR_LEN
];
1659 struct socket
*mp_so
, *so
;
1660 struct mptcb
*mp_tp
;
1661 struct sockaddr
*dst
;
1663 int af
, error
, dport
;
1665 mp_so
= mptetoso(mpte
);
1666 mp_tp
= mpte
->mpte_mptcb
;
1667 so
= mpts
->mpts_socket
;
1668 af
= mpts
->mpts_dst
.sa_family
;
1669 dst
= &mpts
->mpts_dst
;
1671 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
| MPTSF_CONNECTED
)) == MPTSF_CONNECTING
);
1672 VERIFY(mpts
->mpts_socket
!= NULL
);
1673 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1675 if (af
== AF_INET
) {
1676 inet_ntop(af
, &SIN(dst
)->sin_addr
.s_addr
, dbuf
, sizeof(dbuf
));
1677 dport
= ntohs(SIN(dst
)->sin_port
);
1679 inet_ntop(af
, &SIN6(dst
)->sin6_addr
, dbuf
, sizeof(dbuf
));
1680 dport
= ntohs(SIN6(dst
)->sin6_port
);
1683 os_log_info(mptcp_log_handle
,
1684 "%s: ifindex %u dst %s:%d pended %u\n", __func__
, mpts
->mpts_ifscope
,
1685 dbuf
, dport
, !!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
));
1687 p
= proc_find(mp_so
->last_pid
);
1688 if (p
== PROC_NULL
) {
1689 mptcplog((LOG_ERR
, "%s: Couldn't find proc for pid %u\n", __func__
, mp_so
->last_pid
),
1690 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1695 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
1697 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
1699 /* connect the subflow socket */
1700 error
= soconnectxlocked(so
, mpts
->mpts_src
, &mpts
->mpts_dst
,
1701 p
, mpts
->mpts_ifscope
,
1702 mpte
->mpte_associd
, NULL
, 0, NULL
, 0, NULL
, NULL
);
1704 mpts
->mpts_iss
= sototcpcb(so
)->iss
;
1706 /* See tcp_connect_complete */
1707 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&&
1708 (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1709 mp_tp
->mpt_sndwnd
= sototcpcb(so
)->snd_wnd
;
1712 /* Allocate a unique address id per subflow */
1713 mpte
->mpte_addrid_last
++;
1714 if (mpte
->mpte_addrid_last
== 0) {
1715 mpte
->mpte_addrid_last
++;
1720 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
1721 struct mptsub
*, mpts
, int, error
);
1723 mptcplog((LOG_ERR
, "%s: connectx failed with error %d ifscope %u\n",
1724 __func__
, error
, mpts
->mpts_ifscope
),
1725 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1732 * MPTCP subflow socket receive routine, derived from soreceive().
1735 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
1736 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
1739 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
1740 int flags
, error
= 0;
1741 struct proc
*p
= current_proc();
1742 struct mbuf
*m
, **mp
= mp0
;
1743 boolean_t proc_held
= FALSE
;
1745 mpte_lock_assert_held(tptomptp(sototcpcb(so
))->mpt_mpte
);
1746 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
1748 #ifdef MORE_LOCKING_DEBUG
1749 if (so
->so_usecount
== 1) {
1750 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
1755 * We return all that is there in the subflow's socket receive buffer
1756 * to the MPTCP layer, so we require that the caller passes in the
1757 * expected parameters.
1759 if (mp
== NULL
|| controlp
!= NULL
) {
1767 if (flagsp
!= NULL
) {
1768 flags
= *flagsp
& ~MSG_EOR
;
1773 if (flags
& (MSG_PEEK
| MSG_OOB
| MSG_NEEDSA
| MSG_WAITALL
| MSG_WAITSTREAM
)) {
1777 flags
|= (MSG_DONTWAIT
| MSG_NBIO
);
1780 * If a recv attempt is made on a previously-accepted socket
1781 * that has been marked as inactive (disconnected), reject
1784 if (so
->so_flags
& SOF_DEFUNCT
) {
1785 struct sockbuf
*sb
= &so
->so_rcv
;
1789 * This socket should have been disconnected and flushed
1790 * prior to being returned from sodefunct(); there should
1791 * be no data on its receive list, so panic otherwise.
1793 if (so
->so_state
& SS_DEFUNCT
) {
1794 sb_empty_assert(sb
, __func__
);
1800 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
1801 * and if so just return to the caller. This could happen when
1802 * soreceive() is called by a socket upcall function during the
1803 * time the socket is freed. The socket buffer would have been
1804 * locked across the upcall, therefore we cannot put this thread
1805 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
1806 * we may livelock), because the lock on the socket buffer will
1807 * only be released when the upcall routine returns to its caller.
1808 * Because the socket has been officially closed, there can be
1809 * no further read on it.
1811 * A multipath subflow socket would have its SS_NOFDREF set by
1812 * default, so check for SOF_MP_SUBFLOW socket flag; when the
1813 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1815 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
1816 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
1821 * For consistency with soreceive() semantics, we need to obey
1822 * SB_LOCK in case some other code path has locked the buffer.
1824 error
= sblock(&so
->so_rcv
, 0);
1829 m
= so
->so_rcv
.sb_mb
;
1832 * Panic if we notice inconsistencies in the socket's
1833 * receive list; both sb_mb and sb_cc should correctly
1834 * reflect the contents of the list, otherwise we may
1835 * end up with false positives during select() or poll()
1836 * which could put the application in a bad state.
1838 SB_MB_CHECK(&so
->so_rcv
);
1840 if (so
->so_error
!= 0) {
1841 error
= so
->so_error
;
1846 if (so
->so_state
& SS_CANTRCVMORE
) {
1850 if (!(so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
))) {
1856 * MSG_DONTWAIT is implicitly defined and this routine will
1857 * never block, so return EWOULDBLOCK when there is nothing.
1859 error
= EWOULDBLOCK
;
1863 mptcp_update_last_owner(so
, mp_so
);
1865 if (mp_so
->last_pid
!= proc_pid(p
)) {
1866 p
= proc_find(mp_so
->last_pid
);
1867 if (p
== PROC_NULL
) {
1874 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
1875 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1876 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1879 int dlen
= 0, dfin
= 0, error_out
= 0;
1880 struct mbuf
*start
= m
;
1886 VERIFY(m
->m_nextpkt
== NULL
);
1888 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
1889 orig_dlen
= dlen
= m
->m_pkthdr
.mp_rlen
;
1890 dsn
= m
->m_pkthdr
.mp_dsn
;
1891 sseq
= m
->m_pkthdr
.mp_rseq
;
1892 csum
= m
->m_pkthdr
.mp_csum
;
1894 /* We did fallback */
1895 mptcp_adj_rmap(so
, m
, 0, 0, 0, 0);
1897 sbfree(&so
->so_rcv
, m
);
1902 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1907 so
->so_rcv
.sb_lastrecord
= m
;
1909 SB_EMPTY_FIXUP(&so
->so_rcv
);
1915 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
) {
1920 * Check if the full mapping is now present
1922 if ((int)so
->so_rcv
.sb_cc
< dlen
- dfin
) {
1923 mptcplog((LOG_INFO
, "%s not enough data (%u) need %u for dsn %u\n",
1924 __func__
, so
->so_rcv
.sb_cc
, dlen
, (uint32_t)dsn
),
1925 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_LOG
);
1928 error
= EWOULDBLOCK
;
1933 /* Now, get the full mapping */
1935 if (mptcp_adj_rmap(so
, m
, orig_dlen
- dlen
, dsn
, sseq
, orig_dlen
)) {
1940 mptcp_subflow_abort(sototcpcb(so
)->t_mpsub
, ECONNABORTED
);
1945 sbfree(&so
->so_rcv
, m
);
1950 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1954 if (dlen
- dfin
== 0) {
1958 VERIFY(dlen
<= 0 || m
);
1964 so
->so_rcv
.sb_lastrecord
= m
;
1966 SB_EMPTY_FIXUP(&so
->so_rcv
);
1973 if (mptcp_validate_csum(sototcpcb(so
), start
, dsn
, sseq
, orig_dlen
, csum
, dfin
)) {
1979 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1980 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1983 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
1984 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1986 if (flagsp
!= NULL
) {
1991 sbunlock(&so
->so_rcv
, TRUE
);
2001 * MPTCP subflow socket send routine, derived from sosend().
2004 mptcp_subflow_sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
2005 struct mbuf
*top
, struct mbuf
*control
, int flags
)
2007 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
2008 struct proc
*p
= current_proc();
2009 boolean_t en_tracing
= FALSE
, proc_held
= FALSE
;
2011 int sblocked
= 1; /* Pretend as if it is already locked, so we won't relock it */
2014 VERIFY(control
== NULL
);
2015 VERIFY(addr
== NULL
);
2016 VERIFY(uio
== NULL
);
2018 VERIFY((so
->so_flags
& SOF_CONTENT_FILTER
) == 0);
2020 VERIFY(top
->m_pkthdr
.len
> 0 && top
->m_pkthdr
.len
<= UINT16_MAX
);
2021 VERIFY(top
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2024 * trace if tracing & network (vs. unix) sockets & and
2027 if (ENTR_SHOULDTRACE
&&
2028 (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) {
2029 struct inpcb
*inp
= sotoinpcb(so
);
2030 if (inp
->inp_last_outifp
!= NULL
&&
2031 !(inp
->inp_last_outifp
->if_flags
& IFF_LOOPBACK
)) {
2033 en_tracing_val
= top
->m_pkthdr
.len
;
2034 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
,
2035 VM_KERNEL_ADDRPERM(so
),
2036 ((so
->so_state
& SS_NBIO
) ? kEnTrFlagNonBlocking
: 0),
2037 (int64_t)en_tracing_val
);
2041 mptcp_update_last_owner(so
, mp_so
);
2043 if (mp_so
->last_pid
!= proc_pid(p
)) {
2044 p
= proc_find(mp_so
->last_pid
);
2045 if (p
== PROC_NULL
) {
2053 inp_update_necp_policy(sotoinpcb(so
), NULL
, NULL
, 0);
2056 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
2058 error
= sosendcheck(so
, NULL
, top
->m_pkthdr
.len
, 0, 1, 0, &sblocked
, NULL
);
2063 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, top
, NULL
, NULL
, p
);
2075 soclearfastopen(so
);
2078 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
,
2079 VM_KERNEL_ADDRPERM(so
),
2080 ((error
== EWOULDBLOCK
) ? kEnTrFlagNoWork
: 0),
2081 (int64_t)en_tracing_val
);
2088 * Establish an initial MPTCP connection (if first subflow and not yet
2089 * connected), or add a subflow to an existing MPTCP connection.
2092 mptcp_subflow_add(struct mptses
*mpte
, struct sockaddr
*src
,
2093 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
2095 struct socket
*mp_so
, *so
= NULL
;
2096 struct mptcb
*mp_tp
;
2097 struct mptsub
*mpts
= NULL
;
2100 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
2101 mp_so
= mptetoso(mpte
);
2102 mp_tp
= mpte
->mpte_mptcb
;
2104 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
2105 /* If the remote end sends Data FIN, refuse subflow adds */
2106 mptcplog((LOG_ERR
, "%s state %u\n", __func__
, mp_tp
->mpt_state
),
2107 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
2112 mpts
= mptcp_subflow_alloc();
2114 mptcplog((LOG_ERR
, "%s malloc subflow failed\n", __func__
),
2115 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
2121 if (src
->sa_family
!= AF_INET
&& src
->sa_family
!= AF_INET6
) {
2122 error
= EAFNOSUPPORT
;
2126 if (src
->sa_family
== AF_INET
&&
2127 src
->sa_len
!= sizeof(struct sockaddr_in
)) {
2132 if (src
->sa_family
== AF_INET6
&&
2133 src
->sa_len
!= sizeof(struct sockaddr_in6
)) {
2138 MALLOC(mpts
->mpts_src
, struct sockaddr
*, src
->sa_len
, M_SONAME
,
2140 if (mpts
->mpts_src
== NULL
) {
2144 bcopy(src
, mpts
->mpts_src
, src
->sa_len
);
2147 if (dst
->sa_family
!= AF_INET
&& dst
->sa_family
!= AF_INET6
) {
2148 error
= EAFNOSUPPORT
;
2152 if (dst
->sa_family
== AF_INET
&&
2153 dst
->sa_len
!= sizeof(mpts
->__mpts_dst_v4
)) {
2158 if (dst
->sa_family
== AF_INET6
&&
2159 dst
->sa_len
!= sizeof(mpts
->__mpts_dst_v6
)) {
2164 memcpy(&mpts
->mpts_dst
, dst
, dst
->sa_len
);
2166 af
= mpts
->mpts_dst
.sa_family
;
2168 ifnet_head_lock_shared();
2169 if ((ifscope
> (unsigned)if_index
)) {
2176 mpts
->mpts_ifscope
= ifscope
;
2178 /* create the subflow socket */
2179 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, &so
)) != 0) {
2181 * Returning (error) and not cleaning up, because up to here
2182 * all we did is creating mpts.
2184 * And the contract is that the call to mptcp_subflow_socreate,
2185 * moves ownership of mpts to mptcp_subflow_socreate.
2191 * We may be called from within the kernel. Still need to account this
2192 * one to the real app.
2194 mptcp_update_last_owner(mpts
->mpts_socket
, mp_so
);
2197 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
2198 * -1 (SAE_CONNID_ALL).
2200 mpte
->mpte_connid_last
++;
2201 if (mpte
->mpte_connid_last
== SAE_CONNID_ALL
||
2202 mpte
->mpte_connid_last
== SAE_CONNID_ANY
) {
2203 mpte
->mpte_connid_last
++;
2206 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
2208 mpts
->mpts_rel_seq
= 1;
2210 /* Allocate a unique address id per subflow */
2211 mpte
->mpte_addrid_last
++;
2212 if (mpte
->mpte_addrid_last
== 0) {
2213 mpte
->mpte_addrid_last
++;
2216 /* register for subflow socket read/write events */
2217 sock_setupcalls_locked(so
, mptcp_subflow_rupcall
, mpts
, mptcp_subflow_wupcall
, mpts
, 1);
2219 /* Register for subflow socket control events */
2220 sock_catchevents_locked(so
, mptcp_subflow_eupcall1
, mpts
,
2221 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
2222 SO_FILT_HINT_TIMEOUT
| SO_FILT_HINT_NOSRCADDR
|
2223 SO_FILT_HINT_IFDENIED
| SO_FILT_HINT_CONNECTED
|
2224 SO_FILT_HINT_DISCONNECTED
| SO_FILT_HINT_MPFAILOVER
|
2225 SO_FILT_HINT_MPSTATUS
| SO_FILT_HINT_MUSTRST
|
2226 SO_FILT_HINT_MPCANTRCVMORE
| SO_FILT_HINT_ADAPTIVE_RTIMO
|
2227 SO_FILT_HINT_ADAPTIVE_WTIMO
);
2230 VERIFY(!(mpts
->mpts_flags
&
2231 (MPTSF_CONNECTING
| MPTSF_CONNECTED
| MPTSF_CONNECT_PENDING
)));
2234 * Indicate to the TCP subflow whether or not it should establish
2235 * the initial MPTCP connection, or join an existing one. Fill
2236 * in the connection request structure with additional info needed
2237 * by the underlying TCP (to be used in the TCP options, etc.)
2239 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
2240 mpts
->mpts_flags
|= MPTSF_INITIAL_SUB
;
2242 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
2243 mptcp_init_local_parms(mpte
);
2245 soisconnecting(mp_so
);
2247 /* If fastopen is requested, set state in mpts */
2248 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
2249 mpts
->mpts_flags
|= MPTSF_TFO_REQD
;
2252 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
)) {
2253 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
2257 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
2259 if (af
== AF_INET
|| af
== AF_INET6
) {
2260 char dbuf
[MAX_IPv6_STR_LEN
];
2262 mptcplog((LOG_DEBUG
, "MPTCP Socket: %s "
2263 "mp_so 0x%llx dst %s[%d] cid %d "
2264 "[pending %s]\n", __func__
,
2265 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2266 inet_ntop(af
, ((af
== AF_INET
) ?
2267 (void *)&SIN(&mpts
->mpts_dst
)->sin_addr
.s_addr
:
2268 (void *)&SIN6(&mpts
->mpts_dst
)->sin6_addr
),
2269 dbuf
, sizeof(dbuf
)), ((af
== AF_INET
) ?
2270 ntohs(SIN(&mpts
->mpts_dst
)->sin_port
) :
2271 ntohs(SIN6(&mpts
->mpts_dst
)->sin6_port
)),
2273 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
2275 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2278 /* connect right away if first attempt, or if join can be done now */
2279 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
)) {
2280 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
2288 *pcid
= mpts
->mpts_connid
;
2294 mptcp_subflow_abort(mpts
, error
);
2300 mptcp_subflow_free(mpts
);
2307 mptcpstats_update(struct mptcp_itf_stats
*stats
, struct mptsub
*mpts
)
2309 int index
= mptcp_get_statsindex(stats
, mpts
);
2312 struct inpcb
*inp
= sotoinpcb(mpts
->mpts_socket
);
2314 stats
[index
].mpis_txbytes
+= inp
->inp_stat
->txbytes
;
2315 stats
[index
].mpis_rxbytes
+= inp
->inp_stat
->rxbytes
;
2320 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
2321 * will no longer be accessible after a subflow is deleted, thus this
2322 * should occur only after the subflow socket has been disconnected.
2325 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
)
2327 struct socket
*mp_so
= mptetoso(mpte
);
2328 struct socket
*so
= mpts
->mpts_socket
;
2329 struct tcpcb
*tp
= sototcpcb(so
);
2331 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
2332 VERIFY(mpts
->mpts_mpte
== mpte
);
2333 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
2334 VERIFY(mpte
->mpte_numflows
!= 0);
2335 VERIFY(mp_so
->so_usecount
> 0);
2337 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d %x error %d\n",
2338 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2339 mp_so
->so_usecount
, mp_so
->so_retaincnt
, mpts
->mpts_connid
,
2340 mpts
->mpts_flags
, mp_so
->so_error
),
2341 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2343 mptcpstats_update(mpte
->mpte_itfstats
, mpts
);
2344 mpte
->mpte_init_rxbytes
= sotoinpcb(so
)->inp_stat
->rxbytes
;
2345 mpte
->mpte_init_txbytes
= sotoinpcb(so
)->inp_stat
->txbytes
;
2347 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
2348 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
2349 mpte
->mpte_numflows
--;
2350 if (mpte
->mpte_active_sub
== mpts
) {
2351 mpte
->mpte_active_sub
= NULL
;
2355 * Drop references held by this subflow socket; there
2356 * will be no further upcalls made from this point.
2358 sock_setupcalls_locked(so
, NULL
, NULL
, NULL
, NULL
, 0);
2359 sock_catchevents_locked(so
, NULL
, NULL
, 0);
2361 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
2363 mp_so
->so_usecount
--; /* for subflow socket */
2364 mpts
->mpts_mpte
= NULL
;
2365 mpts
->mpts_socket
= NULL
;
2367 mptcp_subflow_remref(mpts
); /* for MPTCP subflow list */
2368 mptcp_subflow_remref(mpts
); /* for subflow socket */
2370 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
2376 mptcp_subflow_shutdown(struct mptses
*mpte
, struct mptsub
*mpts
)
2378 struct socket
*so
= mpts
->mpts_socket
;
2379 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2382 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
) {
2386 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2387 (so
->so_state
& SS_ISCONNECTED
)) {
2388 mptcplog((LOG_DEBUG
, "MPTCP subflow shutdown %s: cid %d fin %d\n",
2389 __func__
, mpts
->mpts_connid
, send_dfin
),
2390 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2393 mptcp_send_dfin(so
);
2395 soshutdownlock(so
, SHUT_WR
);
2400 mptcp_subflow_abort(struct mptsub
*mpts
, int error
)
2402 struct socket
*so
= mpts
->mpts_socket
;
2403 struct tcpcb
*tp
= sototcpcb(so
);
2405 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
) {
2409 mptcplog((LOG_DEBUG
, "%s aborting connection state %u\n", __func__
, tp
->t_state
),
2410 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2412 if (tp
->t_state
!= TCPS_CLOSED
) {
2413 tcp_drop(tp
, error
);
2416 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2420 * Disconnect a subflow socket.
2423 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
)
2426 struct mptcb
*mp_tp
;
2429 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
2431 VERIFY(mpts
->mpts_mpte
== mpte
);
2432 VERIFY(mpts
->mpts_socket
!= NULL
);
2434 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
)) {
2438 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
2440 so
= mpts
->mpts_socket
;
2441 mp_tp
= mpte
->mpte_mptcb
;
2442 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
) {
2446 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2447 (so
->so_state
& SS_ISCONNECTED
)) {
2448 mptcplog((LOG_DEBUG
, "%s: cid %d fin %d\n",
2449 __func__
, mpts
->mpts_connid
, send_dfin
),
2450 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2453 mptcp_send_dfin(so
);
2455 (void) soshutdownlock(so
, SHUT_RD
);
2456 (void) soshutdownlock(so
, SHUT_WR
);
2457 (void) sodisconnectlocked(so
);
2460 * Generate a disconnect event for this subflow socket, in case
2461 * the lower layer doesn't do it; this is needed because the
2462 * subflow socket deletion relies on it.
2464 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2468 * Called when the associated subflow socket posted a read event.
2471 mptcp_subflow_rupcall(struct socket
*so
, void *arg
, int waitf
)
2473 #pragma unused(so, waitf)
2474 struct mptsub
*mpts
= arg
, *tmpts
;
2475 struct mptses
*mpte
= mpts
->mpts_mpte
;
2477 VERIFY(mpte
!= NULL
);
2479 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2480 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_RUPCALL
)) {
2481 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2486 mpte
->mpte_mppcb
->mpp_flags
|= MPP_RUPCALL
;
2487 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
2488 if (mpts
->mpts_socket
->so_usecount
== 0) {
2489 /* Will be removed soon by tcp_garbage_collect */
2493 mptcp_subflow_addref(mpts
);
2494 mpts
->mpts_socket
->so_usecount
++;
2496 mptcp_subflow_input(mpte
, mpts
);
2498 mptcp_subflow_remref(mpts
); /* ours */
2500 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
2501 mpts
->mpts_socket
->so_usecount
--;
2504 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_RUPCALL
);
2508 * Subflow socket input.
2511 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
2513 struct socket
*mp_so
= mptetoso(mpte
);
2514 struct mbuf
*m
= NULL
;
2516 int error
, wakeup
= 0;
2518 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_INPUT
));
2519 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_INPUT
;
2521 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
2522 struct mptsub
*, mpts
);
2524 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
)) {
2528 so
= mpts
->mpts_socket
;
2530 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
2531 if (error
!= 0 && error
!= EWOULDBLOCK
) {
2532 mptcplog((LOG_ERR
, "%s: cid %d error %d\n",
2533 __func__
, mpts
->mpts_connid
, error
),
2534 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
2535 if (error
== ENODATA
) {
2537 * Don't ignore ENODATA so as to discover
2538 * nasty middleboxes.
2540 mp_so
->so_error
= ENODATA
;
2545 } else if (error
== 0) {
2546 mptcplog((LOG_DEBUG
, "%s: cid %d \n", __func__
, mpts
->mpts_connid
),
2547 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2550 /* In fallback, make sure to accept data on all but one subflow */
2551 if (m
&& (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2552 !(mpts
->mpts_flags
& MPTSF_ACTIVE
)) {
2553 mptcplog((LOG_DEBUG
, "%s: degraded and got data on non-active flow\n",
2554 __func__
), MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2560 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
2561 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SET_CELLICON
;
2563 mpte
->mpte_used_cell
= 1;
2565 mpte
->mpte_mppcb
->mpp_flags
|= MPP_UNSET_CELLICON
;
2567 mpte
->mpte_used_wifi
= 1;
2570 mptcp_input(mpte
, m
);
2573 /* notify protocol that we drained all the data */
2574 if (error
== 0 && m
!= NULL
&&
2575 (so
->so_proto
->pr_flags
& PR_WANTRCVD
) && so
->so_pcb
!= NULL
) {
2576 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, 0);
2581 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2584 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_INPUT
);
2588 * Subflow socket write upcall.
2590 * Called when the associated subflow socket posted a read event.
2593 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
2595 #pragma unused(so, waitf)
2596 struct mptsub
*mpts
= arg
;
2597 struct mptses
*mpte
= mpts
->mpts_mpte
;
2599 VERIFY(mpte
!= NULL
);
2601 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2602 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_WUPCALL
)) {
2603 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
2612 mptcp_search_seq_in_sub(struct mbuf
*m
, struct socket
*so
)
2614 struct mbuf
*so_m
= so
->so_snd
.sb_mb
;
2615 uint64_t dsn
= m
->m_pkthdr
.mp_dsn
;
2618 VERIFY(so_m
->m_flags
& M_PKTHDR
);
2619 VERIFY(so_m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2621 /* Part of the segment is covered, don't reinject here */
2622 if (so_m
->m_pkthdr
.mp_dsn
<= dsn
&&
2623 so_m
->m_pkthdr
.mp_dsn
+ so_m
->m_pkthdr
.mp_rlen
> dsn
) {
2627 so_m
= so_m
->m_next
;
2634 * Subflow socket output.
2636 * Called for sending data from MPTCP to the underlying subflow socket.
2639 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
, int flags
)
2641 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2642 struct mbuf
*sb_mb
, *m
, *mpt_mbuf
= NULL
, *head
, *tail
;
2643 struct socket
*mp_so
, *so
;
2645 uint64_t mpt_dsn
= 0, off
= 0;
2646 int sb_cc
= 0, error
= 0, wakeup
= 0;
2648 uint16_t tot_sent
= 0;
2649 boolean_t reinjected
= FALSE
;
2651 mpte_lock_assert_held(mpte
);
2653 mp_so
= mptetoso(mpte
);
2654 so
= mpts
->mpts_socket
;
2657 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_OUTPUT
));
2658 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_OUTPUT
;
2660 VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so
)));
2661 VERIFY((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ||
2662 (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2663 (mpts
->mpts_flags
& MPTSF_TFO_REQD
));
2664 VERIFY(mptcp_subflow_cwnd_space(mpts
->mpts_socket
) > 0);
2666 mptcplog((LOG_DEBUG
, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
2667 __func__
, mpts
->mpts_flags
, mpte
->mpte_flags
,
2668 mptcp_subflow_cwnd_space(so
)),
2669 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2670 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
2671 struct mptsub
*, mpts
);
2673 /* Remove Addr Option is not sent reliably as per I-D */
2674 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
2675 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
2676 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
2677 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
2681 * The mbuf chains containing the metadata (as well as pointing to
2682 * the user data sitting at the MPTCP output queue) would then be
2683 * sent down to the subflow socket.
2685 * Some notes on data sequencing:
2687 * a. Each mbuf must be a M_PKTHDR.
2688 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
2689 * in the mbuf pkthdr structure.
2690 * c. Each mbuf containing the MPTCP metadata must have its
2691 * pkt_flags marked with the PKTF_MPTCP flag.
2694 if (mpte
->mpte_reinjectq
) {
2695 sb_mb
= mpte
->mpte_reinjectq
;
2697 sb_mb
= mp_so
->so_snd
.sb_mb
;
2700 if (sb_mb
== NULL
) {
2701 mptcplog((LOG_ERR
, "%s: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u state %u flags %#x\n",
2702 __func__
, (uint32_t)mp_tp
->mpt_sndmax
, (uint32_t)mp_tp
->mpt_sndnxt
,
2703 (uint32_t)mp_tp
->mpt_snduna
, mp_tp
->mpt_state
, mp_so
->so_flags1
),
2704 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2706 /* Fix it to prevent looping */
2707 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
2708 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
2713 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2715 if (sb_mb
->m_pkthdr
.mp_rlen
== 0 &&
2716 !(so
->so_state
& SS_ISCONNECTED
) &&
2717 (so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
2718 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
2719 goto zero_len_write
;
2722 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
2724 /* First, drop acknowledged data */
2725 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
2726 mptcplog((LOG_ERR
, "%s: dropping data, should have been done earlier "
2727 "dsn %u suna %u reinject? %u\n",
2728 __func__
, (uint32_t)mpt_dsn
,
2729 (uint32_t)mp_tp
->mpt_snduna
, !!mpte
->mpte_reinjectq
),
2730 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2731 if (mpte
->mpte_reinjectq
) {
2732 mptcp_clean_reinjectq(mpte
);
2735 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
2736 sbdrop(&mp_so
->so_snd
, (int)len
);
2741 /* Check again because of above sbdrop */
2742 if (mp_so
->so_snd
.sb_mb
== NULL
&& mpte
->mpte_reinjectq
== NULL
) {
2743 mptcplog((LOG_ERR
, "%s send-buffer is empty\n", __func__
),
2744 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2749 * In degraded mode, we don't receive data acks, so force free
2750 * mbufs less than snd_nxt
2752 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2753 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
2754 mp_so
->so_snd
.sb_mb
) {
2755 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
2756 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
2758 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
2759 sbdrop(&mp_so
->so_snd
, (int)len
);
2762 mptcplog((LOG_ERR
, "%s: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
2763 __func__
, (uint32_t)mpt_dsn
, (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_snduna
),
2764 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2768 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2769 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
2770 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
2771 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
2775 * Adjust the top level notion of next byte used for retransmissions
2778 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
2779 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
2782 /* Now determine the offset from which to start transmitting data */
2783 if (mpte
->mpte_reinjectq
) {
2784 sb_mb
= mpte
->mpte_reinjectq
;
2787 sb_mb
= mp_so
->so_snd
.sb_mb
;
2789 if (sb_mb
== NULL
) {
2790 mptcplog((LOG_ERR
, "%s send-buffer is still empty\n", __func__
),
2791 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2795 if (sb_mb
== mpte
->mpte_reinjectq
) {
2796 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
2799 if (mptcp_search_seq_in_sub(sb_mb
, so
)) {
2800 if (mptcp_can_send_more(mp_tp
, TRUE
)) {
2809 } else if (flags
& MPTCP_SUBOUT_PROBING
) {
2810 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
2813 sb_cc
= min(mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
);
2816 * With TFO, there might be no data at all, thus still go into this
2819 if ((mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) ||
2820 MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_sndmax
)) {
2821 off
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
2824 mptcplog((LOG_ERR
, "%s this should not happen: sndnxt %u sndmax %u\n",
2825 __func__
, (uint32_t)mp_tp
->mpt_sndnxt
,
2826 (uint32_t)mp_tp
->mpt_sndmax
),
2827 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2833 sb_cc
= min(sb_cc
, mptcp_subflow_cwnd_space(so
));
2835 mptcplog((LOG_ERR
, "%s sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
2836 __func__
, sb_cc
, mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
,
2837 (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_sndmax
,
2838 mptcp_subflow_cwnd_space(so
)),
2839 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2842 sb_cc
= min(sb_cc
, UINT16_MAX
);
2845 * Create a DSN mapping for the data we are about to send. It all
2846 * has the same mapping.
2849 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
2851 mpt_dsn
= mp_tp
->mpt_snduna
+ off
;
2855 while (mpt_mbuf
&& reinjected
== FALSE
&&
2856 (mpt_mbuf
->m_pkthdr
.mp_rlen
== 0 ||
2857 mpt_mbuf
->m_pkthdr
.mp_rlen
<= (uint32_t)off
)) {
2858 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
2859 mpt_mbuf
= mpt_mbuf
->m_next
;
2861 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
2862 mptcplog((LOG_DEBUG
, "%s: %u snduna = %u sndnxt = %u probe %d\n",
2863 __func__
, mpts
->mpts_connid
, (uint32_t)mp_tp
->mpt_snduna
, (uint32_t)mp_tp
->mpt_sndnxt
,
2864 mpts
->mpts_probecnt
),
2865 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2868 VERIFY((mpt_mbuf
== NULL
) || (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
2872 while (tot_sent
< sb_cc
) {
2875 mlen
= mpt_mbuf
->m_len
;
2877 mlen
= min(mlen
, sb_cc
- tot_sent
);
2880 mptcplog((LOG_ERR
, "%s mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
2881 __func__
, (int)mlen
, mpt_mbuf
->m_pkthdr
.mp_rlen
,
2882 (uint32_t)off
, sb_cc
, tot_sent
),
2883 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2891 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
2892 M_COPYM_MUST_COPY_HDR
);
2894 mptcplog((LOG_ERR
, "%s m_copym_mode failed\n", __func__
),
2895 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2900 /* Create a DSN mapping for the data (m_copym does it) */
2901 VERIFY(m
->m_flags
& M_PKTHDR
);
2902 VERIFY(m
->m_next
== NULL
);
2904 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
2905 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
2906 m
->m_pkthdr
.mp_dsn
= mpt_dsn
;
2907 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
2908 m
->m_pkthdr
.len
= mlen
;
2920 mpt_mbuf
= mpt_mbuf
->m_next
;
2924 if (sb_cc
< sb_mb
->m_pkthdr
.mp_rlen
) {
2925 struct mbuf
*n
= sb_mb
;
2928 n
->m_pkthdr
.mp_dsn
+= sb_cc
;
2929 n
->m_pkthdr
.mp_rlen
-= sb_cc
;
2932 m_adj(sb_mb
, sb_cc
);
2934 mpte
->mpte_reinjectq
= sb_mb
->m_nextpkt
;
2939 mptcplog((LOG_DEBUG
, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
2940 __func__
, (uint32_t)mpt_dsn
, mpts
->mpts_rel_seq
,
2941 tot_sent
, mpts
->mpts_connid
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2943 if (head
&& (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
)) {
2944 dss_csum
= mptcp_output_csum(head
, mpt_dsn
, mpts
->mpts_rel_seq
,
2948 /* Now, let's update rel-seq and the data-level length */
2949 mpts
->mpts_rel_seq
+= tot_sent
;
2952 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
2953 m
->m_pkthdr
.mp_csum
= dss_csum
;
2955 m
->m_pkthdr
.mp_rlen
= tot_sent
;
2960 if ((mpts
->mpts_flags
& MPTSF_TFO_REQD
) &&
2961 (tp
->t_tfo_stats
== 0)) {
2962 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
2965 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
2967 DTRACE_MPTCP7(send
, struct mbuf
*, m
, struct socket
*, so
,
2968 struct sockbuf
*, &so
->so_rcv
,
2969 struct sockbuf
*, &so
->so_snd
,
2970 struct mptses
*, mpte
, struct mptsub
*, mpts
,
2976 (error
== EWOULDBLOCK
&& (tp
->t_mpflags
& TMPF_TFO_REQUEST
))) {
2977 uint64_t new_sndnxt
= mp_tp
->mpt_sndnxt
+ tot_sent
;
2979 if (mpts
->mpts_probesoon
&& mpts
->mpts_maxseg
&& tot_sent
) {
2980 tcpstat
.tcps_mp_num_probes
++;
2981 if ((uint32_t)tot_sent
< mpts
->mpts_maxseg
) {
2982 mpts
->mpts_probecnt
+= 1;
2984 mpts
->mpts_probecnt
+=
2985 tot_sent
/ mpts
->mpts_maxseg
;
2989 if (!reinjected
&& !(flags
& MPTCP_SUBOUT_PROBING
)) {
2990 if (MPTCP_DATASEQ_HIGH32(new_sndnxt
) >
2991 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
)) {
2992 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
2994 mp_tp
->mpt_sndnxt
= new_sndnxt
;
2997 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
2999 /* Must be here as mptcp_can_send_more() checks for this */
3000 soclearfastopen(mp_so
);
3002 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
3003 (mpts
->mpts_probesoon
!= 0)) {
3004 mptcplog((LOG_DEBUG
, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
3005 __func__
, mpts
->mpts_connid
,
3006 !!(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
),
3007 tot_sent
, (int) sb_cc
, mpts
->mpts_probecnt
,
3008 (tcp_now
- mpts
->mpts_probesoon
)),
3009 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3012 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
3013 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SET_CELLICON
;
3015 mpte
->mpte_used_cell
= 1;
3017 mpte
->mpte_mppcb
->mpp_flags
|= MPP_UNSET_CELLICON
;
3019 mpte
->mpte_used_wifi
= 1;
3023 * Don't propagate EWOULDBLOCK - it's already taken care of
3024 * in mptcp_usr_send for TFO.
3028 mptcplog((LOG_ERR
, "%s: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
3029 __func__
, mpts
->mpts_connid
, error
, tot_sent
, so
->so_flags
, so
->so_state
, so
->so_error
, so
->so_snd
.sb_hiwat
, so
->so_snd
.sb_lowat
),
3030 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
3035 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
3038 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_OUTPUT
);
3042 /* Opting to call pru_send as no mbuf at subflow level */
3043 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, NULL
, NULL
,
3044 NULL
, current_proc());
3050 mptcp_add_reinjectq(struct mptses
*mpte
, struct mbuf
*m
)
3052 struct mbuf
*n
, *prev
= NULL
;
3054 mptcplog((LOG_DEBUG
, "%s reinjecting dsn %u dlen %u rseq %u\n",
3055 __func__
, (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3056 m
->m_pkthdr
.mp_rseq
),
3057 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3059 n
= mpte
->mpte_reinjectq
;
3061 /* First, look for an mbuf n, whose data-sequence-number is bigger or
3062 * equal than m's sequence number.
3065 if (MPTCP_SEQ_GEQ(n
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_dsn
)) {
3075 /* m is already fully covered by the next mbuf in the queue */
3076 if (n
->m_pkthdr
.mp_dsn
== m
->m_pkthdr
.mp_dsn
&&
3077 n
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_rlen
) {
3078 mptcplog((LOG_DEBUG
, "%s fully covered with len %u\n",
3079 __func__
, n
->m_pkthdr
.mp_rlen
),
3080 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3084 /* m is covering the next mbuf entirely, thus we remove this guy */
3085 if (m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
>= n
->m_pkthdr
.mp_dsn
+ n
->m_pkthdr
.mp_rlen
) {
3086 struct mbuf
*tmp
= n
->m_nextpkt
;
3088 mptcplog((LOG_DEBUG
, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
3089 __func__
, m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3090 n
->m_pkthdr
.mp_dsn
, n
->m_pkthdr
.mp_rlen
),
3091 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3093 m
->m_nextpkt
= NULL
;
3095 mpte
->mpte_reinjectq
= tmp
;
3097 prev
->m_nextpkt
= tmp
;
3106 /* m is already fully covered by the previous mbuf in the queue */
3107 if (prev
->m_pkthdr
.mp_dsn
+ prev
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.len
) {
3108 mptcplog((LOG_DEBUG
, "%s prev covers us from %u with len %u\n",
3109 __func__
, prev
->m_pkthdr
.mp_dsn
, prev
->m_pkthdr
.mp_rlen
),
3110 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3116 mpte
->mpte_reinjectq
= m
;
3118 prev
->m_nextpkt
= m
;
3130 static struct mbuf
*
3131 mptcp_lookup_dsn(struct mptses
*mpte
, uint64_t dsn
)
3133 struct socket
*mp_so
= mptetoso(mpte
);
3136 m
= mp_so
->so_snd
.sb_mb
;
3139 /* If this segment covers what we are looking for, return it. */
3140 if (MPTCP_SEQ_LEQ(m
->m_pkthdr
.mp_dsn
, dsn
) &&
3141 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, dsn
)) {
3146 /* Segment is no more in the queue */
3147 if (MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
, dsn
)) {
3157 static struct mbuf
*
3158 mptcp_copy_mbuf_list(struct mbuf
*m
, int len
)
3160 struct mbuf
*top
= NULL
, *tail
= NULL
;
3162 uint32_t dlen
, rseq
;
3164 dsn
= m
->m_pkthdr
.mp_dsn
;
3165 dlen
= m
->m_pkthdr
.mp_rlen
;
3166 rseq
= m
->m_pkthdr
.mp_rseq
;
3171 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3173 n
= m_copym_mode(m
, 0, m
->m_len
, M_DONTWAIT
, M_COPYM_MUST_COPY_HDR
);
3175 mptcplog((LOG_ERR
, "%s m_copym_mode returned NULL\n", __func__
),
3176 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
3180 VERIFY(n
->m_flags
& M_PKTHDR
);
3181 VERIFY(n
->m_next
== NULL
);
3182 VERIFY(n
->m_pkthdr
.mp_dsn
== dsn
);
3183 VERIFY(n
->m_pkthdr
.mp_rlen
== dlen
);
3184 VERIFY(n
->m_pkthdr
.mp_rseq
== rseq
);
3185 VERIFY(n
->m_len
== m
->m_len
);
3187 n
->m_pkthdr
.pkt_flags
|= (PKTF_MPSO
| PKTF_MPTCP
);
3214 mptcp_reinject_mbufs(struct socket
*so
)
3216 struct tcpcb
*tp
= sototcpcb(so
);
3217 struct mptsub
*mpts
= tp
->t_mpsub
;
3218 struct mptcb
*mp_tp
= tptomptp(tp
);
3219 struct mptses
*mpte
= mp_tp
->mpt_mpte
;;
3220 struct sockbuf
*sb
= &so
->so_snd
;
3225 struct mbuf
*n
= m
->m_next
, *orig
= m
;
3227 mptcplog((LOG_DEBUG
, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
3228 __func__
, tp
->snd_una
, m
->m_pkthdr
.mp_rseq
, mpts
->mpts_iss
,
3229 m
->m_pkthdr
.mp_rlen
, m
->m_pkthdr
.pkt_flags
),
3230 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3232 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3234 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_REINJ
) {
3238 /* Has it all already been acknowledged at the data-level? */
3239 if (MPTCP_SEQ_GEQ(mp_tp
->mpt_snduna
, m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
)) {
3243 /* Part of this has already been acknowledged - lookup in the
3244 * MPTCP-socket for the segment.
3246 if (SEQ_GT(tp
->snd_una
- mpts
->mpts_iss
, m
->m_pkthdr
.mp_rseq
)) {
3247 m
= mptcp_lookup_dsn(mpte
, m
->m_pkthdr
.mp_dsn
);
3253 /* Copy the mbuf with headers (aka, DSN-numbers) */
3254 m
= mptcp_copy_mbuf_list(m
, m
->m_pkthdr
.mp_rlen
);
3259 VERIFY(m
->m_nextpkt
== NULL
);
3261 /* Now, add to the reinject-queue, eliminating overlapping
3264 mptcp_add_reinjectq(mpte
, m
);
3266 orig
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3269 /* mp_rlen can cover multiple mbufs, so advance to the end of it. */
3271 VERIFY((n
->m_flags
& M_PKTHDR
) && (n
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3273 if (n
->m_pkthdr
.mp_dsn
!= orig
->m_pkthdr
.mp_dsn
) {
3277 n
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3286 mptcp_clean_reinjectq(struct mptses
*mpte
)
3288 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3290 mpte_lock_assert_held(mpte
);
3292 while (mpte
->mpte_reinjectq
) {
3293 struct mbuf
*m
= mpte
->mpte_reinjectq
;
3295 if (MPTCP_SEQ_GEQ(m
->m_pkthdr
.mp_dsn
, mp_tp
->mpt_snduna
) ||
3296 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, mp_tp
->mpt_snduna
)) {
3300 mpte
->mpte_reinjectq
= m
->m_nextpkt
;
3301 m
->m_nextpkt
= NULL
;
3307 * Subflow socket control event upcall.
3310 mptcp_subflow_eupcall1(struct socket
*so
, void *arg
, uint32_t events
)
3313 struct mptsub
*mpts
= arg
;
3314 struct mptses
*mpte
= mpts
->mpts_mpte
;
3316 VERIFY(mpte
!= NULL
);
3317 mpte_lock_assert_held(mpte
);
3319 if ((mpts
->mpts_evctl
& events
) == events
) {
3323 mpts
->mpts_evctl
|= events
;
3325 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
3326 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WORKLOOP
;
3330 mptcp_subflow_workloop(mpte
);
3334 * Subflow socket control events.
3336 * Called for handling events related to the underlying subflow socket.
3339 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
,
3340 uint64_t *p_mpsofilt_hint
)
3342 ev_ret_t ret
= MPTS_EVRET_OK
;
3343 int i
, mpsub_ev_entry_count
= sizeof(mpsub_ev_entry_tbl
) /
3344 sizeof(mpsub_ev_entry_tbl
[0]);
3346 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3348 /* bail if there's nothing to process */
3349 if (!mpts
->mpts_evctl
) {
3353 if (mpts
->mpts_evctl
& (SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_MUSTRST
|
3354 SO_FILT_HINT_CANTSENDMORE
| SO_FILT_HINT_TIMEOUT
|
3355 SO_FILT_HINT_NOSRCADDR
| SO_FILT_HINT_IFDENIED
|
3356 SO_FILT_HINT_DISCONNECTED
)) {
3357 mpts
->mpts_evctl
|= SO_FILT_HINT_MPFAILOVER
;
3360 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
3361 struct mptsub
*, mpts
, uint32_t, mpts
->mpts_evctl
);
3363 mptcplog((LOG_DEBUG
, "%s cid %d events=%b\n", __func__
,
3364 mpts
->mpts_connid
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3365 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3368 * Process all the socket filter hints and reset the hint
3369 * once it is handled
3371 for (i
= 0; i
< mpsub_ev_entry_count
&& mpts
->mpts_evctl
; i
++) {
3373 * Always execute the DISCONNECTED event, because it will wakeup
3376 if ((mpts
->mpts_evctl
& mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
) &&
3377 (ret
>= MPTS_EVRET_OK
||
3378 mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
== SO_FILT_HINT_DISCONNECTED
)) {
3379 mpts
->mpts_evctl
&= ~mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
;
3381 mpsub_ev_entry_tbl
[i
].sofilt_hint_ev_hdlr(mpte
, mpts
, p_mpsofilt_hint
, mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
);
3382 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
3387 * We should be getting only events specified via sock_catchevents(),
3388 * so loudly complain if we have any unprocessed one(s).
3390 if (mpts
->mpts_evctl
|| ret
< MPTS_EVRET_OK
) {
3391 mptcplog((LOG_WARNING
, "%s%s: cid %d evret %s (%d) unhandled events=%b\n", __func__
,
3392 (mpts
->mpts_evctl
&& ret
== MPTS_EVRET_OK
) ? "MPTCP_ERROR " : "",
3394 mptcp_evret2str(ret
), ret
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3395 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3397 mptcplog((LOG_DEBUG
, "%s: Done, events %b\n", __func__
,
3398 mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3399 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3406 mptcp_subflow_propagate_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3407 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3409 struct socket
*mp_so
, *so
;
3410 struct mptcb
*mp_tp
;
3412 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3413 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3414 mp_so
= mptetoso(mpte
);
3415 mp_tp
= mpte
->mpte_mptcb
;
3416 so
= mpts
->mpts_socket
;
3418 mptcplog((LOG_DEBUG
, "%s: cid %d event %d\n", __func__
,
3419 mpts
->mpts_connid
, event
),
3420 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3423 * We got an event for this subflow that might need to be propagated,
3424 * based on the state of the MPTCP connection.
3426 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3427 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
3428 mp_so
->so_error
= so
->so_error
;
3429 *p_mpsofilt_hint
|= event
;
3432 return MPTS_EVRET_OK
;
3436 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
3439 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3440 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3442 #pragma unused(p_mpsofilt_hint, event)
3443 struct socket
*mp_so
;
3446 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3448 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3449 mp_so
= mptetoso(mpte
);
3450 tp
= intotcpcb(sotoinpcb(mpts
->mpts_socket
));
3453 * This overwrites any previous mpte_lost_aid to avoid storing
3454 * too much state when the typical case has only two subflows.
3456 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
3457 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
3459 mptcplog((LOG_DEBUG
, "%s cid %d\n", __func__
, mpts
->mpts_connid
),
3460 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3463 * The subflow connection has lost its source address.
3465 mptcp_subflow_abort(mpts
, EADDRNOTAVAIL
);
3467 if (mp_so
->so_flags
& SOF_NOADDRAVAIL
) {
3468 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3471 return MPTS_EVRET_DELETE
;
3475 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
3476 * indicates that the remote side sent a Data FIN
3479 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3480 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3482 #pragma unused(event)
3483 struct mptcb
*mp_tp
;
3485 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3486 mp_tp
= mpte
->mpte_mptcb
;
3488 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
3489 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3492 * We got a Data FIN for the MPTCP connection.
3493 * The FIN may arrive with data. The data is handed up to the
3494 * mptcp socket and the user is notified so that it may close
3495 * the socket if needed.
3497 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
) {
3498 *p_mpsofilt_hint
|= SO_FILT_HINT_CANTRCVMORE
;
3501 return MPTS_EVRET_OK
; /* keep the subflow socket around */
3505 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
3508 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3509 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3511 #pragma unused(event, p_mpsofilt_hint)
3512 struct mptsub
*mpts_alt
= NULL
;
3513 struct socket
*alt_so
= NULL
;
3514 struct socket
*mp_so
;
3515 int altpath_exists
= 0;
3517 mpte_lock_assert_held(mpte
);
3518 mp_so
= mptetoso(mpte
);
3519 mptcplog((LOG_NOTICE
, "%s: mp_so 0x%llx\n", __func__
,
3520 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
3521 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3523 mptcp_reinject_mbufs(mpts
->mpts_socket
);
3525 mpts_alt
= mptcp_get_subflow(mpte
, mpts
, NULL
);
3527 * If there is no alternate eligible subflow, ignore the
3530 if (mpts_alt
== NULL
) {
3531 mptcplog((LOG_WARNING
, "%s: no alternate path\n", __func__
),
3532 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3538 alt_so
= mpts_alt
->mpts_socket
;
3539 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
3540 /* All data acknowledged and no RTT spike */
3541 if (alt_so
->so_snd
.sb_cc
== 0 && mptcp_no_rto_spike(alt_so
)) {
3542 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
3544 /* no alternate path available */
3549 if (altpath_exists
) {
3550 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
3552 mpte
->mpte_active_sub
= mpts_alt
;
3553 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
3554 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
3556 mptcplog((LOG_NOTICE
, "%s: switched from %d to %d\n",
3557 __func__
, mpts
->mpts_connid
, mpts_alt
->mpts_connid
),
3558 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3560 mptcpstats_inc_switch(mpte
, mpts
);
3564 mptcplog((LOG_DEBUG
, "%s: no alt cid = %d\n", __func__
,
3566 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3568 mpts
->mpts_socket
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
3571 return MPTS_EVRET_OK
;
3575 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
3578 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3579 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3581 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3582 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3584 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
,
3585 mpts
->mpts_connid
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3588 * The subflow connection cannot use the outgoing interface, let's
3589 * close this subflow.
3591 mptcp_subflow_abort(mpts
, EPERM
);
3593 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3595 return MPTS_EVRET_DELETE
;
3599 * https://tools.ietf.org/html/rfc6052#section-2
3600 * https://tools.ietf.org/html/rfc6147#section-5.2
3603 mptcp_desynthesize_ipv6_addr(const struct in6_addr
*addr
,
3604 const struct ipv6_prefix
*prefix
,
3605 struct in_addr
*addrv4
)
3607 char buf
[MAX_IPv4_STR_LEN
];
3608 char *ptrv4
= (char *)addrv4
;
3609 const char *ptr
= (const char *)addr
;
3611 if (memcmp(addr
, &prefix
->ipv6_prefix
, prefix
->prefix_len
) != 0) {
3615 switch (prefix
->prefix_len
) {
3616 case NAT64_PREFIX_LEN_96
:
3617 memcpy(ptrv4
, ptr
+ 12, 4);
3619 case NAT64_PREFIX_LEN_64
:
3620 memcpy(ptrv4
, ptr
+ 9, 4);
3622 case NAT64_PREFIX_LEN_56
:
3623 memcpy(ptrv4
, ptr
+ 7, 1);
3624 memcpy(ptrv4
+ 1, ptr
+ 9, 3);
3626 case NAT64_PREFIX_LEN_48
:
3627 memcpy(ptrv4
, ptr
+ 6, 2);
3628 memcpy(ptrv4
+ 2, ptr
+ 9, 2);
3630 case NAT64_PREFIX_LEN_40
:
3631 memcpy(ptrv4
, ptr
+ 5, 3);
3632 memcpy(ptrv4
+ 3, ptr
+ 9, 1);
3634 case NAT64_PREFIX_LEN_32
:
3635 memcpy(ptrv4
, ptr
+ 4, 4);
3638 panic("NAT64-prefix len is wrong: %u\n",
3639 prefix
->prefix_len
);
3642 os_log_info(mptcp_log_handle
, "%s desynthesized to %s\n", __func__
,
3643 inet_ntop(AF_INET
, (void *)addrv4
, buf
, sizeof(buf
)));
3649 mptcp_handle_ipv6_connection(struct mptses
*mpte
, const struct mptsub
*mpts
)
3651 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
3652 struct socket
*so
= mpts
->mpts_socket
;
3656 ifp
= sotoinpcb(so
)->inp_last_outifp
;
3658 if (ifnet_get_nat64prefix(ifp
, nat64prefixes
) == ENOENT
) {
3659 mptcp_ask_for_nat64(ifp
);
3664 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
3667 if (nat64prefixes
[j
].prefix_len
== 0) {
3671 success
= mptcp_desynthesize_ipv6_addr(&mpte
->__mpte_dst_v6
.sin6_addr
,
3673 &mpte
->mpte_dst_v4_nat64
.sin_addr
);
3675 mpte
->mpte_dst_v4_nat64
.sin_len
= sizeof(mpte
->mpte_dst_v4_nat64
);
3676 mpte
->mpte_dst_v4_nat64
.sin_family
= AF_INET
;
3677 mpte
->mpte_dst_v4_nat64
.sin_port
= mpte
->__mpte_dst_v6
.sin6_port
;
3684 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
3687 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3688 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3690 #pragma unused(event, p_mpsofilt_hint)
3691 struct socket
*mp_so
, *so
;
3694 struct mptcb
*mp_tp
;
3696 boolean_t mpok
= FALSE
;
3698 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3699 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3701 mp_so
= mptetoso(mpte
);
3702 mp_tp
= mpte
->mpte_mptcb
;
3703 so
= mpts
->mpts_socket
;
3705 af
= mpts
->mpts_dst
.sa_family
;
3707 if (mpts
->mpts_flags
& MPTSF_CONNECTED
) {
3708 return MPTS_EVRET_OK
;
3711 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
3712 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
3713 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
3714 (so
->so_state
& SS_ISCONNECTED
)) {
3715 mptcplog((LOG_DEBUG
, "%s: cid %d disconnect before tcp connect\n",
3716 __func__
, mpts
->mpts_connid
),
3717 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3718 (void) soshutdownlock(so
, SHUT_RD
);
3719 (void) soshutdownlock(so
, SHUT_WR
);
3720 (void) sodisconnectlocked(so
);
3722 return MPTS_EVRET_OK
;
3726 * The subflow connection has been connected. Find out whether it
3727 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
3729 * a. If MPTCP connection is not yet established, then this must be
3730 * the first subflow connection. If MPTCP failed to negotiate,
3731 * fallback to regular TCP by degrading this subflow.
3733 * b. If MPTCP connection has been established, then this must be
3734 * one of the subsequent subflow connections. If MPTCP failed
3735 * to negotiate, disconnect the connection.
3737 * Right now, we simply unblock any waiters at the MPTCP socket layer
3738 * if the MPTCP connection has not been established.
3741 if (so
->so_state
& SS_ISDISCONNECTED
) {
3743 * With MPTCP joins, a connection is connected at the subflow
3744 * level, but the 4th ACK from the server elevates the MPTCP
3745 * subflow to connected state. So there is a small window
3746 * where the subflow could get disconnected before the
3747 * connected event is processed.
3749 return MPTS_EVRET_OK
;
3752 if (mpts
->mpts_flags
& MPTSF_TFO_REQD
) {
3753 mptcp_drop_tfo_data(mpte
, mpts
);
3756 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_TFO_REQD
);
3757 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
3759 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
3760 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
3763 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
3765 /* get/verify the outbound interface */
3766 inp
= sotoinpcb(so
);
3768 mpts
->mpts_maxseg
= tp
->t_maxseg
;
3770 mptcplog((LOG_DEBUG
, "%s: cid %d outif %s is %s\n", __func__
, mpts
->mpts_connid
,
3771 ((inp
->inp_last_outifp
!= NULL
) ? inp
->inp_last_outifp
->if_xname
: "NULL"),
3772 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ? "MPTCP capable" : "a regular TCP")),
3773 (MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
), MPTCP_LOGLVL_LOG
);
3775 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
3777 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
3778 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
3779 mpte
->mpte_associd
= mpts
->mpts_connid
;
3780 DTRACE_MPTCP2(state__change
,
3781 struct mptcb
*, mp_tp
,
3782 uint32_t, 0 /* event */);
3784 if (SOCK_DOM(so
) == AF_INET
) {
3785 in_getsockaddr_s(so
, &mpte
->__mpte_src_v4
);
3787 in6_getsockaddr_s(so
, &mpte
->__mpte_src_v6
);
3790 mpts
->mpts_flags
|= MPTSF_ACTIVE
;
3792 /* case (a) above */
3794 tcpstat
.tcps_mpcap_fallback
++;
3796 tp
->t_mpflags
|= TMPF_INFIN_SENT
;
3797 mptcp_notify_mpfail(so
);
3799 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
3800 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_AGGREGATE
) {
3801 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
3803 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
3805 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
3806 mpte
->mpte_nummpcapflows
++;
3808 if (SOCK_DOM(so
) == AF_INET6
) {
3809 mptcp_handle_ipv6_connection(mpte
, mpts
);
3812 mptcp_check_subflows_and_add(mpte
);
3814 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
)) {
3815 mpte
->mpte_initial_cell
= 1;
3818 mpte
->mpte_handshake_success
= 1;
3821 mp_tp
->mpt_sndwnd
= tp
->snd_wnd
;
3822 mp_tp
->mpt_sndwl1
= mp_tp
->mpt_rcvnxt
;
3823 mp_tp
->mpt_sndwl2
= mp_tp
->mpt_snduna
;
3824 soisconnected(mp_so
);
3826 mptcplog((LOG_DEBUG
, "%s: MPTCPS_ESTABLISHED for mp_so 0x%llx mpok %u\n",
3827 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpok
),
3828 MPTCP_STATE_DBG
, MPTCP_LOGLVL_LOG
);
3832 * In case of additional flows, the MPTCP socket is not
3833 * MPTSF_MP_CAPABLE until an ACK is received from server
3834 * for 3-way handshake. TCP would have guaranteed that this
3835 * is an MPTCP subflow.
3837 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
3838 !(tp
->t_mpflags
& TMPF_BACKUP_PATH
) &&
3839 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_AGGREGATE
) {
3840 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
3841 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
3843 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
3846 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
3847 mpte
->mpte_nummpcapflows
++;
3849 mpts
->mpts_rel_seq
= 1;
3851 mptcp_check_subflows_and_remove(mpte
);
3855 /* Should we try the alternate port? */
3856 if (mpte
->mpte_alternate_port
&&
3857 inp
->inp_fport
!= mpte
->mpte_alternate_port
) {
3858 union sockaddr_in_4_6 dst
;
3859 struct sockaddr_in
*dst_in
= (struct sockaddr_in
*)&dst
;
3861 memcpy(&dst
, &mpts
->mpts_dst
, mpts
->mpts_dst
.sa_len
);
3863 dst_in
->sin_port
= mpte
->mpte_alternate_port
;
3865 mptcp_subflow_add(mpte
, NULL
, (struct sockaddr
*)&dst
,
3866 mpts
->mpts_ifscope
, NULL
);
3867 } else { /* Else, we tried all we could, mark this interface as non-MPTCP */
3868 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
3869 struct mpt_itf_info
*info
= &mpte
->mpte_itfinfo
[i
];
3871 if (inp
->inp_last_outifp
->if_index
== info
->ifindex
) {
3872 info
->no_mptcp_support
= 1;
3878 tcpstat
.tcps_join_fallback
++;
3879 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
)) {
3880 tcpstat
.tcps_mptcp_cell_proxy
++;
3882 tcpstat
.tcps_mptcp_wifi_proxy
++;
3885 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
3887 return MPTS_EVRET_OK
;
3890 /* This call, just to "book" an entry in the stats-table for this ifindex */
3891 mptcp_get_statsindex(mpte
->mpte_itfstats
, mpts
);
3895 return MPTS_EVRET_OK
; /* keep the subflow socket around */
3899 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
3902 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3903 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3905 #pragma unused(event, p_mpsofilt_hint)
3906 struct socket
*mp_so
, *so
;
3907 struct mptcb
*mp_tp
;
3909 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3910 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3911 mp_so
= mptetoso(mpte
);
3912 mp_tp
= mpte
->mpte_mptcb
;
3913 so
= mpts
->mpts_socket
;
3915 mptcplog((LOG_DEBUG
, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
3916 __func__
, mpts
->mpts_connid
, so
->so_error
, mp_tp
->mpt_state
,
3917 !!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
),
3918 !!(mpts
->mpts_flags
& MPTSF_ACTIVE
), sototcpcb(so
)->t_mpflags
),
3919 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3921 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
) {
3922 return MPTS_EVRET_DELETE
;
3925 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
3927 /* The subflow connection has been disconnected. */
3929 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
3930 mpte
->mpte_nummpcapflows
--;
3931 if (mpte
->mpte_active_sub
== mpts
) {
3932 mpte
->mpte_active_sub
= NULL
;
3933 mptcplog((LOG_DEBUG
, "%s: resetting active subflow \n",
3934 __func__
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3936 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
3939 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3940 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
3941 mptcp_drop(mpte
, mp_tp
, so
->so_error
);
3944 if (sototcpcb(so
)->t_mpflags
& TMPF_FASTCLOSERCV
) {
3945 mptcp_drop(mpte
, mp_tp
, mp_so
->so_error
);
3949 * Clear flags that are used by getconninfo to return state.
3950 * Retain like MPTSF_DELETEOK for internal purposes.
3952 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_CONNECT_PENDING
|
3953 MPTSF_CONNECTED
| MPTSF_DISCONNECTING
| MPTSF_PREFERRED
|
3954 MPTSF_MP_CAPABLE
| MPTSF_MP_READY
| MPTSF_MP_DEGRADED
| MPTSF_ACTIVE
);
3956 return MPTS_EVRET_DELETE
;
3960 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
3963 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3964 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3966 #pragma unused(event, p_mpsofilt_hint)
3967 struct socket
*mp_so
, *so
;
3968 struct mptcb
*mp_tp
;
3969 ev_ret_t ret
= MPTS_EVRET_OK
;
3971 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3972 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3973 mp_so
= mptetoso(mpte
);
3974 mp_tp
= mpte
->mpte_mptcb
;
3975 so
= mpts
->mpts_socket
;
3977 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
) {
3978 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
3980 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
3983 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
3984 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3987 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
3989 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
3992 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
) {
3993 mpts
->mpts_flags
|= MPTSF_MP_READY
;
3995 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
3998 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3999 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
4000 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
4003 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
4004 VERIFY(!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
));
4005 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
4007 m_freem_list(mpte
->mpte_reinjectq
);
4008 mpte
->mpte_reinjectq
= NULL
;
4009 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
4010 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
4011 ret
= MPTS_EVRET_CONNECT_PENDING
;
4014 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx mpt_flags=%b cid %d mptsf=%b\n",
4015 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4016 mp_tp
->mpt_flags
, MPTCPF_BITS
, mpts
->mpts_connid
,
4017 mpts
->mpts_flags
, MPTSF_BITS
),
4018 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4025 * Handle SO_FILT_HINT_MUSTRST subflow socket event
4028 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4029 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4031 #pragma unused(event)
4032 struct socket
*mp_so
, *so
;
4033 struct mptcb
*mp_tp
;
4034 boolean_t is_fastclose
;
4036 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
4037 VERIFY(mpte
->mpte_mppcb
!= NULL
);
4038 mp_so
= mptetoso(mpte
);
4039 mp_tp
= mpte
->mpte_mptcb
;
4040 so
= mpts
->mpts_socket
;
4042 /* We got an invalid option or a fast close */
4043 struct tcptemp
*t_template
;
4044 struct inpcb
*inp
= sotoinpcb(so
);
4045 struct tcpcb
*tp
= NULL
;
4047 tp
= intotcpcb(inp
);
4048 so
->so_error
= ECONNABORTED
;
4050 is_fastclose
= !!(tp
->t_mpflags
& TMPF_FASTCLOSERCV
);
4052 t_template
= tcp_maketemplate(tp
);
4054 struct tcp_respond_args tra
;
4056 bzero(&tra
, sizeof(tra
));
4057 if (inp
->inp_flags
& INP_BOUND_IF
) {
4058 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
4060 tra
.ifscope
= IFSCOPE_NONE
;
4062 tra
.awdl_unrestricted
= 1;
4064 tcp_respond(tp
, t_template
->tt_ipgen
,
4065 &t_template
->tt_t
, (struct mbuf
*)NULL
,
4066 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
4067 (void) m_free(dtom(t_template
));
4068 mptcplog((LOG_DEBUG
, "MPTCP Events: "
4069 "%s: mp_so 0x%llx cid %d \n",
4070 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4071 so
, mpts
->mpts_connid
),
4072 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4074 mptcp_subflow_abort(mpts
, ECONNABORTED
);
4076 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && is_fastclose
) {
4077 *p_mpsofilt_hint
|= SO_FILT_HINT_CONNRESET
;
4079 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
4080 mp_so
->so_error
= ECONNABORTED
;
4082 mp_so
->so_error
= ECONNRESET
;
4086 * mptcp_drop is being called after processing the events, to fully
4087 * close the MPTCP connection
4091 if (mp_tp
->mpt_gc_ticks
== MPT_GC_TICKS
) {
4092 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS_FAST
;
4095 return MPTS_EVRET_DELETE
;
4099 mptcp_subflow_adaptive_rtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4100 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4102 #pragma unused(event)
4103 bool found_active
= false;
4105 mpts
->mpts_flags
|= MPTSF_READ_STALL
;
4107 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4108 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4110 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4111 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
4115 if (!(mpts
->mpts_flags
& MPTSF_READ_STALL
)) {
4116 found_active
= true;
4121 if (!found_active
) {
4122 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_RTIMO
;
4125 return MPTS_EVRET_OK
;
4129 mptcp_subflow_adaptive_wtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4130 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4132 #pragma unused(event)
4133 bool found_active
= false;
4135 mpts
->mpts_flags
|= MPTSF_WRITE_STALL
;
4137 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4138 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4140 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4141 tp
->t_state
> TCPS_CLOSE_WAIT
) {
4145 if (!(mpts
->mpts_flags
& MPTSF_WRITE_STALL
)) {
4146 found_active
= true;
4151 if (!found_active
) {
4152 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_WTIMO
;
4155 return MPTS_EVRET_OK
;
4159 mptcp_evret2str(ev_ret_t ret
)
4161 const char *c
= "UNKNOWN";
4164 case MPTS_EVRET_DELETE
:
4165 c
= "MPTS_EVRET_DELETE";
4167 case MPTS_EVRET_CONNECT_PENDING
:
4168 c
= "MPTS_EVRET_CONNECT_PENDING";
4170 case MPTS_EVRET_DISCONNECT_FALLBACK
:
4171 c
= "MPTS_EVRET_DISCONNECT_FALLBACK";
4174 c
= "MPTS_EVRET_OK";
4183 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
4184 * caller must ensure that the option can be issued on subflow sockets, via
4185 * MPOF_SUBFLOW_OK flag.
4188 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct mptsub
*mpts
, struct mptopt
*mpo
)
4190 struct socket
*mp_so
, *so
;
4191 struct sockopt sopt
;
4194 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
4195 mpte_lock_assert_held(mpte
);
4197 mp_so
= mptetoso(mpte
);
4198 so
= mpts
->mpts_socket
;
4200 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
&&
4201 mpo
->mpo_level
== SOL_SOCKET
&&
4202 mpo
->mpo_name
== SO_MARK_CELLFALLBACK
) {
4203 struct ifnet
*ifp
= ifindex2ifnet
[mpts
->mpts_ifscope
];
4205 mptcplog((LOG_DEBUG
, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %d lastcell? %d boundcell? %d\n",
4206 __func__
, mpte
->mpte_flags
, mpte
->mpte_svctype
, mptcp_is_wifi_unusable(mpte
),
4207 sotoinpcb(so
)->inp_last_outifp
? IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
) : -1,
4208 mpts
->mpts_ifscope
!= IFSCOPE_NONE
&& ifp
? IFNET_IS_CELLULAR(ifp
) : -1),
4209 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4212 * When we open a new subflow, mark it as cell fallback, if
4213 * this subflow goes over cell.
4215 * (except for first-party apps)
4218 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
4222 if (sotoinpcb(so
)->inp_last_outifp
&&
4223 !IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
4228 * This here is an OR, because if the app is not binding to the
4229 * interface, then it definitely is not a cell-fallback
4232 if (mpts
->mpts_ifscope
== IFSCOPE_NONE
|| ifp
== NULL
||
4233 !IFNET_IS_CELLULAR(ifp
)) {
4238 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
4240 bzero(&sopt
, sizeof(sopt
));
4241 sopt
.sopt_dir
= SOPT_SET
;
4242 sopt
.sopt_level
= mpo
->mpo_level
;
4243 sopt
.sopt_name
= mpo
->mpo_name
;
4244 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
4245 sopt
.sopt_valsize
= sizeof(int);
4246 sopt
.sopt_p
= kernproc
;
4248 error
= sosetoptlock(so
, &sopt
, 0);
4250 mptcplog((LOG_INFO
, "%s: mp_so 0x%llx sopt %s "
4251 "val %d set successful\n", __func__
,
4252 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4253 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
4255 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4257 mptcplog((LOG_ERR
, "%s:mp_so 0x%llx sopt %s "
4258 "val %d set error %d\n", __func__
,
4259 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4260 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
4261 mpo
->mpo_intval
, error
),
4262 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
4268 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
4269 * caller must ensure that the option can be issued on subflow sockets, via
4270 * MPOF_SUBFLOW_OK flag.
4273 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
4276 struct socket
*mp_so
;
4277 struct sockopt sopt
;
4280 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
4281 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
4282 mp_so
= mptetoso(mpte
);
4284 bzero(&sopt
, sizeof(sopt
));
4285 sopt
.sopt_dir
= SOPT_GET
;
4286 sopt
.sopt_level
= mpo
->mpo_level
;
4287 sopt
.sopt_name
= mpo
->mpo_name
;
4288 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
4289 sopt
.sopt_valsize
= sizeof(int);
4290 sopt
.sopt_p
= kernproc
;
4292 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
4294 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
4295 "%s: mp_so 0x%llx sopt %s "
4296 "val %d get successful\n", __func__
,
4297 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4298 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
4300 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4302 mptcplog((LOG_ERR
, "MPTCP Socket: "
4303 "%s: mp_so 0x%llx sopt %s get error %d\n",
4304 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4305 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
), error
),
4306 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
4313 * MPTCP garbage collector.
4315 * This routine is called by the MP domain on-demand, periodic callout,
4316 * which is triggered when a MPTCP socket is closed. The callout will
4317 * repeat as long as this routine returns a non-zero value.
4320 mptcp_gc(struct mppcbinfo
*mppi
)
4322 struct mppcb
*mpp
, *tmpp
;
4323 uint32_t active
= 0;
4325 LCK_MTX_ASSERT(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
4327 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
4328 struct socket
*mp_so
;
4329 struct mptses
*mpte
;
4330 struct mptcb
*mp_tp
;
4332 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
4333 mp_so
= mpp
->mpp_socket
;
4334 VERIFY(mp_so
!= NULL
);
4335 mpte
= mptompte(mpp
);
4336 VERIFY(mpte
!= NULL
);
4337 mp_tp
= mpte
->mpte_mptcb
;
4338 VERIFY(mp_tp
!= NULL
);
4340 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
4341 "%s: mp_so 0x%llx found "
4342 "(u=%d,r=%d,s=%d)\n", __func__
,
4343 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mp_so
->so_usecount
,
4344 mp_so
->so_retaincnt
, mpp
->mpp_state
),
4345 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4347 if (!mpte_try_lock(mpte
)) {
4348 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
4349 "%s: mp_so 0x%llx skipped lock "
4350 "(u=%d,r=%d)\n", __func__
,
4351 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4352 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
4353 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4358 /* check again under the lock */
4359 if (mp_so
->so_usecount
> 0) {
4360 boolean_t wakeup
= FALSE
;
4361 struct mptsub
*mpts
, *tmpts
;
4363 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
4364 "%s: mp_so 0x%llx skipped usecount "
4365 "[u=%d,r=%d] %d %d\n", __func__
,
4366 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4367 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
4368 mp_tp
->mpt_gc_ticks
,
4370 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4372 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
4373 if (mp_tp
->mpt_gc_ticks
> 0) {
4374 mp_tp
->mpt_gc_ticks
--;
4376 if (mp_tp
->mpt_gc_ticks
== 0) {
4381 TAILQ_FOREACH_SAFE(mpts
,
4382 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4383 mptcp_subflow_eupcall1(mpts
->mpts_socket
,
4384 mpts
, SO_FILT_HINT_DISCONNECTED
);
4392 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
4393 panic("MPTCP Socket: %s: mp_so 0x%llx skipped state "
4394 "[u=%d,r=%d,s=%d]\n", __func__
,
4395 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4396 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
4400 if (mp_tp
->mpt_state
== MPTCPS_TIME_WAIT
) {
4401 mptcp_close(mpte
, mp_tp
);
4404 mptcp_session_destroy(mpte
);
4406 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
4407 "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
4408 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4409 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
4410 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4412 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
4413 struct sockbuf
*, &mp_so
->so_rcv
,
4414 struct sockbuf
*, &mp_so
->so_snd
,
4415 struct mppcb
*, mpp
);
4425 * Drop a MPTCP connection, reporting the specified error.
4428 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, int errno
)
4430 struct socket
*mp_so
;
4432 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
4433 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4434 mp_so
= mptetoso(mpte
);
4436 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
4437 uint32_t, 0 /* event */);
4439 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0) {
4440 errno
= mp_tp
->mpt_softerror
;
4442 mp_so
->so_error
= errno
;
4444 return mptcp_close(mpte
, mp_tp
);
4448 * Close a MPTCP control block.
4451 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
4453 struct socket
*mp_so
= NULL
;
4454 struct mptsub
*mpts
= NULL
, *tmpts
= NULL
;
4456 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
4457 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4458 mp_so
= mptetoso(mpte
);
4460 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
4464 soisdisconnected(mp_so
);
4466 /* Clean up all subflows */
4467 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4468 mptcp_subflow_disconnect(mpte
, mpts
);
4475 mptcp_notify_close(struct socket
*so
)
4477 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
4484 mptcp_subflow_workloop(struct mptses
*mpte
)
4486 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
4487 uint64_t mpsofilt_hint_mask
;
4488 struct mptsub
*mpts
, *tmpts
;
4489 struct socket
*mp_so
;
4491 mpte_lock_assert_held(mpte
);
4493 if (mpte
->mpte_flags
& MPTE_IN_WORKLOOP
) {
4494 mpte
->mpte_flags
|= MPTE_WORKLOOP_RELAUNCH
;
4497 mpte
->mpte_flags
|= MPTE_IN_WORKLOOP
;
4499 mp_so
= mptetoso(mpte
);
4502 mpsofilt_hint_mask
= SO_FILT_HINT_LOCKED
;
4503 mpte
->mpte_flags
&= ~MPTE_WORKLOOP_RELAUNCH
;
4505 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4508 if (mpts
->mpts_socket
->so_usecount
== 0) {
4509 /* Will be removed soon by tcp_garbage_collect */
4513 mptcp_subflow_addref(mpts
);
4514 mpts
->mpts_socket
->so_usecount
++;
4516 ret
= mptcp_subflow_events(mpte
, mpts
, &mpsofilt_hint_mask
);
4519 * If MPTCP socket is closed, disconnect all subflows.
4520 * This will generate a disconnect event which will
4521 * be handled during the next iteration, causing a
4522 * non-zero error to be returned above.
4524 if (mp_so
->so_flags
& SOF_PCBCLEARING
) {
4525 mptcp_subflow_disconnect(mpte
, mpts
);
4532 case MPTS_EVRET_DELETE
:
4533 mptcp_subflow_soclose(mpts
);
4535 case MPTS_EVRET_CONNECT_PENDING
:
4536 connect_pending
= TRUE
;
4538 case MPTS_EVRET_DISCONNECT_FALLBACK
:
4539 disconnect_fallback
= TRUE
;
4542 mptcplog((LOG_DEBUG
,
4543 "MPTCP Socket: %s: mptcp_subflow_events "
4544 "returned invalid value: %d\n", __func__
,
4546 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4549 mptcp_subflow_remref(mpts
); /* ours */
4551 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
4552 mpts
->mpts_socket
->so_usecount
--;
4555 if (mpsofilt_hint_mask
!= SO_FILT_HINT_LOCKED
) {
4556 VERIFY(mpsofilt_hint_mask
& SO_FILT_HINT_LOCKED
);
4558 soevent(mp_so
, mpsofilt_hint_mask
);
4561 if (!connect_pending
&& !disconnect_fallback
) {
4565 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4566 if (disconnect_fallback
) {
4567 struct socket
*so
= NULL
;
4568 struct inpcb
*inp
= NULL
;
4569 struct tcpcb
*tp
= NULL
;
4571 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4575 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4577 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
4578 MPTSF_DISCONNECTED
| MPTSF_CONNECT_PENDING
)) {
4582 so
= mpts
->mpts_socket
;
4585 * The MPTCP connection has degraded to a fallback
4586 * mode, so there is no point in keeping this subflow
4587 * regardless of its MPTCP-readiness state, unless it
4588 * is the primary one which we use for fallback. This
4589 * assumes that the subflow used for fallback is the
4593 inp
= sotoinpcb(so
);
4594 tp
= intotcpcb(inp
);
4596 ~(TMPF_MPTCP_READY
| TMPF_MPTCP_TRUE
);
4597 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
4599 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
4602 tp
->t_mpflags
|= TMPF_RESET
;
4603 soevent(so
, SO_FILT_HINT_MUSTRST
);
4604 } else if (connect_pending
) {
4606 * The MPTCP connection has progressed to a state
4607 * where it supports full multipath semantics; allow
4608 * additional joins to be attempted for all subflows
4609 * that are in the PENDING state.
4611 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
4612 int error
= mptcp_subflow_soconnectx(mpte
, mpts
);
4615 mptcp_subflow_abort(mpts
, error
);
4622 if (mpte
->mpte_flags
& MPTE_WORKLOOP_RELAUNCH
) {
4626 mpte
->mpte_flags
&= ~MPTE_IN_WORKLOOP
;
4630 * Protocol pr_lock callback.
4633 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
4635 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4639 lr_saved
= __builtin_return_address(0);
4645 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
4646 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
4651 if (mp_so
->so_usecount
< 0) {
4652 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
4653 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
4654 solockhistory_nr(mp_so
));
4657 if (refcount
!= 0) {
4658 mp_so
->so_usecount
++;
4660 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
4661 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
4667 * Protocol pr_unlock callback.
4670 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
4672 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4676 lr_saved
= __builtin_return_address(0);
4682 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
4683 mp_so
, mp_so
->so_usecount
, lr_saved
,
4684 solockhistory_nr(mp_so
));
4687 mpp_lock_assert_held(mpp
);
4689 if (refcount
!= 0) {
4690 mp_so
->so_usecount
--;
4693 if (mp_so
->so_usecount
< 0) {
4694 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4695 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4698 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
4699 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
4706 * Protocol pr_getlock callback.
4709 mptcp_getlock(struct socket
*mp_so
, int flags
)
4711 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4714 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
4715 solockhistory_nr(mp_so
));
4718 if (mp_so
->so_usecount
< 0) {
4719 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4720 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4723 return mpp_getlock(mpp
, flags
);
4727 * MPTCP Join support
4731 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
,
4734 struct tcpcb
*tp
= sototcpcb(so
);
4735 struct mptcp_subf_auth_entry
*sauth_entry
;
4736 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4739 * The address ID of the first flow is implicitly 0.
4741 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
4742 tp
->t_local_aid
= 0;
4744 tp
->t_local_aid
= addr_id
;
4745 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
4746 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
4748 sauth_entry
= zalloc(mpt_subauth_zone
);
4749 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
4750 sauth_entry
->msae_raddr_id
= 0;
4751 sauth_entry
->msae_raddr_rand
= 0;
4753 sauth_entry
->msae_laddr_rand
= RandomULong();
4754 if (sauth_entry
->msae_laddr_rand
== 0) {
4757 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
4761 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
4763 struct mptcp_subf_auth_entry
*sauth_entry
;
4764 struct tcpcb
*tp
= NULL
;
4772 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4773 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
4779 LIST_REMOVE(sauth_entry
, msae_next
);
4783 zfree(mpt_subauth_zone
, sauth_entry
);
4788 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
4791 struct mptcp_subf_auth_entry
*sauth_entry
;
4792 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4794 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4795 if (sauth_entry
->msae_laddr_id
== addr_id
) {
4797 *lrand
= sauth_entry
->msae_laddr_rand
;
4800 *rrand
= sauth_entry
->msae_raddr_rand
;
4808 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
4809 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
4811 struct mptcp_subf_auth_entry
*sauth_entry
;
4812 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4814 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4815 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
4816 if ((sauth_entry
->msae_raddr_id
!= 0) &&
4817 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
4818 mptcplog((LOG_ERR
, "MPTCP Socket: %s mismatched"
4819 " address ids %d %d \n", __func__
, raddr_id
,
4820 sauth_entry
->msae_raddr_id
),
4821 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4824 sauth_entry
->msae_raddr_id
= raddr_id
;
4825 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
4826 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
4827 mptcplog((LOG_ERR
, "MPTCP Socket: "
4828 "%s: dup SYN_ACK %d %d \n",
4829 __func__
, raddr_rand
,
4830 sauth_entry
->msae_raddr_rand
),
4831 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4834 sauth_entry
->msae_raddr_rand
= raddr_rand
;
4841 * SHA1 support for MPTCP
4844 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
)
4847 const unsigned char *sha1_base
;
4850 sha1_base
= (const unsigned char *) key
;
4851 sha1_size
= sizeof(mptcp_key_t
);
4852 SHA1Init(&sha1ctxt
);
4853 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
4854 SHA1Final(sha_digest
, &sha1ctxt
);
4858 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
4859 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
)
4862 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
4863 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
4867 bzero(digest
, SHA1_RESULTLEN
);
4869 /* Set up the Key for HMAC */
4876 /* Set up the message for HMAC */
4880 /* Key is 512 block length, so no need to compute hash */
4882 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
4884 for (i
= 0; i
< 8; i
++) {
4885 key_ipad
[i
] ^= 0x3636363636363636;
4886 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
4889 /* Perform inner SHA1 */
4890 SHA1Init(&sha1ctxt
);
4891 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof(key_ipad
));
4892 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof(data
));
4893 SHA1Final(digest
, &sha1ctxt
);
4895 /* Perform outer SHA1 */
4896 SHA1Init(&sha1ctxt
);
4897 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof(key_opad
));
4898 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
4899 SHA1Final(digest
, &sha1ctxt
);
4903 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
4904 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
4907 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
)
4909 uint32_t lrand
, rrand
;
4911 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4914 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
4915 mptcp_hmac_sha1(mp_tp
->mpt_localkey
, mp_tp
->mpt_remotekey
, lrand
, rrand
,
4920 * Authentication data generation
4923 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
4926 VERIFY(token_len
== sizeof(u_int32_t
));
4927 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4929 /* Most significant 32 bits of the SHA1 hash */
4930 bcopy(sha_digest
, token
, sizeof(u_int32_t
));
4935 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
4938 VERIFY(idsn_len
== sizeof(u_int64_t
));
4939 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4942 * Least significant 64 bits of the SHA1 hash
4945 idsn
[7] = sha_digest
[12];
4946 idsn
[6] = sha_digest
[13];
4947 idsn
[5] = sha_digest
[14];
4948 idsn
[4] = sha_digest
[15];
4949 idsn
[3] = sha_digest
[16];
4950 idsn
[2] = sha_digest
[17];
4951 idsn
[1] = sha_digest
[18];
4952 idsn
[0] = sha_digest
[19];
4957 mptcp_conn_properties(struct mptcb
*mp_tp
)
4959 /* There is only Version 0 at this time */
4960 mp_tp
->mpt_version
= MPTCP_STD_VERSION_0
;
4962 /* Set DSS checksum flag */
4963 if (mptcp_dss_csum
) {
4964 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
4967 /* Set up receive window */
4968 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
4970 /* Set up gc ticks */
4971 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
4975 mptcp_init_local_parms(struct mptses
*mpte
)
4977 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
4978 char key_digest
[SHA1_RESULTLEN
];
4980 read_frandom(&mp_tp
->mpt_localkey
, sizeof(mp_tp
->mpt_localkey
));
4981 mptcp_do_sha1(&mp_tp
->mpt_localkey
, key_digest
);
4983 mptcp_generate_token(key_digest
, SHA1_RESULTLEN
,
4984 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof(mp_tp
->mpt_localtoken
));
4985 mptcp_generate_idsn(key_digest
, SHA1_RESULTLEN
,
4986 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof(u_int64_t
));
4988 /* The subflow SYN is also first MPTCP byte */
4989 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
4990 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
4992 mptcp_conn_properties(mp_tp
);
4996 mptcp_init_remote_parms(struct mptcb
*mp_tp
)
4998 char remote_digest
[SHA1_RESULTLEN
];
4999 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5001 /* Only Version 0 is supported for auth purposes */
5002 if (mp_tp
->mpt_version
!= MPTCP_STD_VERSION_0
) {
5006 /* Setup local and remote tokens and Initial DSNs */
5007 mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
);
5008 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
5009 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof(mp_tp
->mpt_remotetoken
));
5010 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
5011 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof(u_int64_t
));
5012 mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
5018 mptcp_send_dfin(struct socket
*so
)
5020 struct tcpcb
*tp
= NULL
;
5021 struct inpcb
*inp
= NULL
;
5023 inp
= sotoinpcb(so
);
5028 tp
= intotcpcb(inp
);
5033 if (!(tp
->t_mpflags
& TMPF_RESET
)) {
5034 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
5039 * Data Sequence Mapping routines
5042 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
5044 struct mptcb
*mp_tp
;
5050 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
5051 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5054 VERIFY(m
->m_flags
& M_PKTHDR
);
5055 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
5056 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
5057 m
->m_pkthdr
.mp_rlen
= m_pktlen(m
);
5058 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
5064 mptcp_fallback_sbdrop(struct socket
*so
, struct mbuf
*m
, int len
)
5066 struct mptcb
*mp_tp
= tptomptp(sototcpcb(so
));
5070 if (!m
|| len
== 0) {
5074 while (m
&& len
> 0) {
5075 VERIFY(m
->m_flags
& M_PKTHDR
);
5076 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5078 data_ack
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
5079 dsn
= m
->m_pkthdr
.mp_dsn
;
5085 if (m
&& len
== 0) {
5087 * If there is one more mbuf in the chain, it automatically means
5088 * that up to m->mp_dsn has been ack'ed.
5090 * This means, we actually correct data_ack back down (compared
5091 * to what we set inside the loop - dsn + data_len). Because in
5092 * the loop we are "optimistic" and assume that the full mapping
5093 * will be acked. If that's not the case and we get out of the
5094 * loop with m != NULL, it means only up to m->mp_dsn has been
5097 data_ack
= m
->m_pkthdr
.mp_dsn
;
5102 * If len is negative, meaning we acked in the middle of an mbuf,
5103 * only up to this mbuf's data-sequence number has been acked
5104 * at the MPTCP-level.
5109 mptcplog((LOG_DEBUG
, "%s inferred ack up to %u\n", __func__
, (uint32_t)data_ack
),
5110 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5111 mptcp_data_ack_rcvd(mp_tp
, sototcpcb(so
), data_ack
);
5115 mptcp_preproc_sbdrop(struct socket
*so
, struct mbuf
*m
, unsigned int len
)
5119 /* TFO makes things complicated. */
5120 if (so
->so_flags1
& SOF1_TFO_REWIND
) {
5122 so
->so_flags1
&= ~SOF1_TFO_REWIND
;
5125 while (m
&& (!(so
->so_flags
& SOF_MP_SUBFLOW
) || rewinding
)) {
5127 VERIFY(m
->m_flags
& M_PKTHDR
);
5128 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5130 sub_len
= m
->m_pkthdr
.mp_rlen
;
5132 if (sub_len
< len
) {
5133 m
->m_pkthdr
.mp_dsn
+= sub_len
;
5134 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
5135 m
->m_pkthdr
.mp_rseq
+= sub_len
;
5137 m
->m_pkthdr
.mp_rlen
= 0;
5140 /* sub_len >= len */
5141 if (rewinding
== 0) {
5142 m
->m_pkthdr
.mp_dsn
+= len
;
5144 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
5145 if (rewinding
== 0) {
5146 m
->m_pkthdr
.mp_rseq
+= len
;
5149 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u len %d %d\n",
5150 __func__
, (u_int32_t
)m
->m_pkthdr
.mp_dsn
,
5151 m
->m_pkthdr
.mp_rseq
, m
->m_pkthdr
.mp_rlen
, len
),
5152 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5153 m
->m_pkthdr
.mp_rlen
-= len
;
5159 if (so
->so_flags
& SOF_MP_SUBFLOW
&&
5160 !(sototcpcb(so
)->t_mpflags
& TMPF_TFO_REQUEST
) &&
5161 !(sototcpcb(so
)->t_mpflags
& TMPF_RCVD_DACK
)) {
5163 * Received an ack without receiving a DATA_ACK.
5164 * Need to fallback to regular TCP (or destroy this subflow).
5166 sototcpcb(so
)->t_mpflags
|= TMPF_INFIN_SENT
;
5167 mptcp_notify_mpfail(so
);
5171 /* Obtain the DSN mapping stored in the mbuf */
5173 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
,
5174 uint32_t *dsn
, uint32_t *relseq
, uint16_t *data_len
, uint16_t *dss_csum
)
5178 mptcp_output_getm_dsnmap64(so
, off
, &dsn64
, relseq
, data_len
, dss_csum
);
5179 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
5183 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint64_t *dsn
,
5184 uint32_t *relseq
, uint16_t *data_len
,
5187 struct mbuf
*m
= so
->so_snd
.sb_mb
;
5193 * In the subflow socket, the DSN sequencing can be discontiguous,
5194 * but the subflow sequence mapping is contiguous. Use the subflow
5195 * sequence property to find the right mbuf and corresponding dsn
5200 VERIFY(m
->m_flags
& M_PKTHDR
);
5201 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5203 if (off
>= m
->m_len
) {
5213 VERIFY(m
->m_pkthdr
.mp_rlen
<= UINT16_MAX
);
5215 *dsn
= m
->m_pkthdr
.mp_dsn
;
5216 *relseq
= m
->m_pkthdr
.mp_rseq
;
5217 *data_len
= m
->m_pkthdr
.mp_rlen
;
5218 *dss_csum
= m
->m_pkthdr
.mp_csum
;
5220 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
5221 __func__
, (u_int32_t
)(*dsn
), *relseq
, *data_len
, off
, off_orig
),
5222 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5226 * Note that this is called only from tcp_input() via mptcp_input_preproc()
5227 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
5228 * When it trims data tcp_input calls m_adj() which does not remove the
5229 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
5230 * The dsn map insertion cannot be delayed after trim, because data can be in
5231 * the reassembly queue for a while and the DSN option info in tp will be
5232 * overwritten for every new packet received.
5233 * The dsn map will be adjusted just prior to appending to subflow sockbuf
5234 * with mptcp_adj_rmap()
5237 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
, struct tcphdr
*th
)
5239 VERIFY(m
->m_flags
& M_PKTHDR
);
5240 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
5242 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
5243 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
5244 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
5245 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
5246 m
->m_pkthdr
.mp_csum
= tp
->t_rcv_map
.mpt_csum
;
5247 if (tp
->t_rcv_map
.mpt_dfin
) {
5248 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
5251 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
5253 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
5254 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
5255 } else if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
5256 if (th
->th_flags
& TH_FIN
) {
5257 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
5263 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
, int off
, uint64_t dsn
,
5264 uint32_t rseq
, uint16_t dlen
)
5266 struct mptsub
*mpts
= sototcpcb(so
)->t_mpsub
;
5268 if (m_pktlen(m
) == 0) {
5272 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
5273 if (off
&& (dsn
!= m
->m_pkthdr
.mp_dsn
||
5274 rseq
!= m
->m_pkthdr
.mp_rseq
||
5275 dlen
!= m
->m_pkthdr
.mp_rlen
)) {
5276 mptcplog((LOG_ERR
, "%s: Received incorrect second mapping: %llu - %llu , %u - %u, %u - %u\n",
5277 __func__
, dsn
, m
->m_pkthdr
.mp_dsn
,
5278 rseq
, m
->m_pkthdr
.mp_rseq
,
5279 dlen
, m
->m_pkthdr
.mp_rlen
),
5280 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
5283 m
->m_pkthdr
.mp_dsn
+= off
;
5284 m
->m_pkthdr
.mp_rseq
+= off
;
5285 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
5287 if (!(mpts
->mpts_flags
& MPTSF_CONFIRMED
)) {
5288 /* data arrived without an DSS option mapping */
5290 /* initial subflow can fallback right after SYN handshake */
5291 mptcp_notify_mpfail(so
);
5295 mpts
->mpts_flags
|= MPTSF_CONFIRMED
;
5301 * Following routines help with failure detection and failover of data
5302 * transfer from one subflow to another.
5305 mptcp_act_on_txfail(struct socket
*so
)
5307 struct tcpcb
*tp
= NULL
;
5308 struct inpcb
*inp
= sotoinpcb(so
);
5314 tp
= intotcpcb(inp
);
5319 if (so
->so_flags
& SOF_MP_TRYFAILOVER
) {
5323 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
5324 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
5328 * Support for MP_FAIL option
5331 mptcp_get_map_for_dsn(struct socket
*so
, u_int64_t dsn_fail
, u_int32_t
*tcp_seq
)
5333 struct mbuf
*m
= so
->so_snd
.sb_mb
;
5343 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5344 VERIFY(m
->m_flags
& M_PKTHDR
);
5345 dsn
= m
->m_pkthdr
.mp_dsn
;
5346 datalen
= m
->m_pkthdr
.mp_rlen
;
5347 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
5348 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
5349 off
= dsn_fail
- dsn
;
5350 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
5351 mptcplog((LOG_DEBUG
, "%s: %llu %llu \n", __func__
, dsn
,
5352 dsn_fail
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5360 * If there was no mbuf data and a fallback to TCP occurred, there's
5361 * not much else to do.
5364 mptcplog((LOG_ERR
, "MPTCP Sender: "
5365 "%s: %llu not found \n", __func__
, dsn_fail
),
5366 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5371 * Support for sending contiguous MPTCP bytes in subflow
5372 * Also for preventing sending data with ACK in 3-way handshake
5375 mptcp_adj_sendlen(struct socket
*so
, int32_t off
)
5377 struct tcpcb
*tp
= sototcpcb(so
);
5378 struct mptsub
*mpts
= tp
->t_mpsub
;
5380 uint32_t mdss_subflow_seq
;
5381 int mdss_subflow_off
;
5382 uint16_t mdss_data_len
;
5385 mptcp_output_getm_dsnmap64(so
, off
, &mdss_dsn
, &mdss_subflow_seq
,
5386 &mdss_data_len
, &dss_csum
);
5389 * We need to compute how much of the mapping still remains.
5390 * So, we compute the offset in the send-buffer of the dss-sub-seq.
5392 mdss_subflow_off
= (mdss_subflow_seq
+ mpts
->mpts_iss
) - tp
->snd_una
;
5395 * When TFO is used, we are sending the mpts->mpts_iss although the relative
5396 * seq has been set to 1 (while it should be 0).
5398 if (tp
->t_mpflags
& TMPF_TFO_REQUEST
) {
5402 if (off
< mdss_subflow_off
) {
5403 printf("%s off %d mdss_subflow_off %d mdss_subflow_seq %u iss %u suna %u\n", __func__
,
5404 off
, mdss_subflow_off
, mdss_subflow_seq
, mpts
->mpts_iss
, tp
->snd_una
);
5406 VERIFY(off
>= mdss_subflow_off
);
5408 mptcplog((LOG_DEBUG
, "%s dlen %u off %d sub_off %d sub_seq %u iss %u suna %u\n",
5409 __func__
, mdss_data_len
, off
, mdss_subflow_off
, mdss_subflow_seq
,
5410 mpts
->mpts_iss
, tp
->snd_una
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5411 return mdss_data_len
- (off
- mdss_subflow_off
);
5415 mptcp_get_maxseg(struct mptses
*mpte
)
5417 struct mptsub
*mpts
;
5418 uint32_t maxseg
= 0;
5420 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5421 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
5423 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
5424 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
5428 if (tp
->t_maxseg
> maxseg
) {
5429 maxseg
= tp
->t_maxseg
;
5437 mptcp_get_rcvscale(struct mptses
*mpte
)
5439 struct mptsub
*mpts
;
5440 uint8_t rcvscale
= UINT8_MAX
;
5442 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5443 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
5445 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
5446 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
5450 if (tp
->rcv_scale
< rcvscale
) {
5451 rcvscale
= tp
->rcv_scale
;
5458 /* Similar to tcp_sbrcv_reserve */
5460 mptcp_sbrcv_reserve(struct mptcb
*mp_tp
, struct sockbuf
*sbrcv
,
5461 u_int32_t newsize
, u_int32_t idealsize
)
5463 uint8_t rcvscale
= mptcp_get_rcvscale(mp_tp
->mpt_mpte
);
5465 /* newsize should not exceed max */
5466 newsize
= min(newsize
, tcp_autorcvbuf_max
);
5468 /* The receive window scale negotiated at the
5469 * beginning of the connection will also set a
5470 * limit on the socket buffer size
5472 newsize
= min(newsize
, TCP_MAXWIN
<< rcvscale
);
5474 /* Set new socket buffer size */
5475 if (newsize
> sbrcv
->sb_hiwat
&&
5476 (sbreserve(sbrcv
, newsize
) == 1)) {
5477 sbrcv
->sb_idealsize
= min(max(sbrcv
->sb_idealsize
,
5478 (idealsize
!= 0) ? idealsize
: newsize
), tcp_autorcvbuf_max
);
5480 /* Again check the limit set by the advertised
5483 sbrcv
->sb_idealsize
= min(sbrcv
->sb_idealsize
,
5484 TCP_MAXWIN
<< rcvscale
);
5489 mptcp_sbrcv_grow(struct mptcb
*mp_tp
)
5491 struct mptses
*mpte
= mp_tp
->mpt_mpte
;
5492 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
5493 struct sockbuf
*sbrcv
= &mp_so
->so_rcv
;
5494 uint32_t hiwat_sum
= 0;
5495 uint32_t ideal_sum
= 0;
5496 struct mptsub
*mpts
;
5499 * Do not grow the receive socket buffer if
5500 * - auto resizing is disabled, globally or on this socket
5501 * - the high water mark already reached the maximum
5502 * - the stream is in background and receive side is being
5504 * - if there are segments in reassembly queue indicating loss,
5505 * do not need to increase recv window during recovery as more
5506 * data is not going to be sent. A duplicate ack sent during
5507 * recovery should not change the receive window
5509 if (tcp_do_autorcvbuf
== 0 ||
5510 (sbrcv
->sb_flags
& SB_AUTOSIZE
) == 0 ||
5511 tcp_cansbgrow(sbrcv
) == 0 ||
5512 sbrcv
->sb_hiwat
>= tcp_autorcvbuf_max
||
5513 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) ||
5514 !LIST_EMPTY(&mp_tp
->mpt_segq
)) {
5515 /* Can not resize the socket buffer, just return */
5520 * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2)
5522 * But, for this we first need accurate receiver-RTT estimations, which
5523 * we currently don't have.
5525 * Let's use a dummy algorithm for now, just taking the sum of all
5526 * subflow's receive-buffers. It's too low, but that's all we can get
5530 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5531 hiwat_sum
+= mpts
->mpts_socket
->so_rcv
.sb_hiwat
;
5532 ideal_sum
+= mpts
->mpts_socket
->so_rcv
.sb_idealsize
;
5535 mptcp_sbrcv_reserve(mp_tp
, sbrcv
, hiwat_sum
, ideal_sum
);
5539 * Determine if we can grow the recieve socket buffer to avoid sending
5540 * a zero window update to the peer. We allow even socket buffers that
5541 * have fixed size (set by the application) to grow if the resource
5542 * constraints are met. They will also be trimmed after the application
5545 * Similar to tcp_sbrcv_grow_rwin
5548 mptcp_sbrcv_grow_rwin(struct mptcb
*mp_tp
, struct sockbuf
*sb
)
5550 struct socket
*mp_so
= mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
;
5551 u_int32_t rcvbufinc
= mptcp_get_maxseg(mp_tp
->mpt_mpte
) << 4;
5552 u_int32_t rcvbuf
= sb
->sb_hiwat
;
5554 if (tcp_recv_bg
== 1 || IS_TCP_RECV_BG(mp_so
)) {
5558 if (tcp_do_autorcvbuf
== 1 &&
5559 tcp_cansbgrow(sb
) &&
5560 /* Diff to tcp_sbrcv_grow_rwin */
5561 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) == 0 &&
5562 (rcvbuf
- sb
->sb_cc
) < rcvbufinc
&&
5563 rcvbuf
< tcp_autorcvbuf_max
&&
5564 (sb
->sb_idealsize
> 0 &&
5565 sb
->sb_hiwat
<= (sb
->sb_idealsize
+ rcvbufinc
))) {
5566 sbreserve(sb
, min((sb
->sb_hiwat
+ rcvbufinc
), tcp_autorcvbuf_max
));
5570 /* Similar to tcp_sbspace */
5572 mptcp_sbspace(struct mptcb
*mp_tp
)
5574 struct sockbuf
*sb
= &mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
5577 int32_t pending
= 0;
5579 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5581 mptcp_sbrcv_grow_rwin(mp_tp
, sb
);
5583 /* hiwat might have changed */
5584 rcvbuf
= sb
->sb_hiwat
;
5586 space
= ((int32_t) imin((rcvbuf
- sb
->sb_cc
),
5587 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
5593 /* Compensate for data being processed by content filters */
5594 pending
= cfil_sock_data_space(sb
);
5595 #endif /* CONTENT_FILTER */
5596 if (pending
> space
) {
5606 * Support Fallback to Regular TCP
5609 mptcp_notify_mpready(struct socket
*so
)
5611 struct tcpcb
*tp
= NULL
;
5617 tp
= intotcpcb(sotoinpcb(so
));
5623 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
5624 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5625 struct tcpcb
*, tp
);
5627 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) {
5631 if (tp
->t_mpflags
& TMPF_MPTCP_READY
) {
5635 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
5636 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
5638 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5642 mptcp_notify_mpfail(struct socket
*so
)
5644 struct tcpcb
*tp
= NULL
;
5650 tp
= intotcpcb(sotoinpcb(so
));
5656 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
5657 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5658 struct tcpcb
*, tp
);
5660 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
5664 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
| TMPF_MPTCP_TRUE
);
5665 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
5667 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5671 * Keepalive helper function
5674 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
5677 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5679 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
5686 * MPTCP t_maxseg adjustment function
5689 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
5692 struct mptcb
*mp_tp
= tptomptp(tp
);
5694 #define MPTCP_COMPUTE_LEN { \
5695 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
5696 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
5699 /* adjust to 32-bit boundary + EOL */ \
5702 if (mp_tp
== NULL
) {
5706 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5709 * For the first subflow and subsequent subflows, adjust mss for
5710 * most common MPTCP option size, for case where tcp_mss is called
5711 * during option processing and MTU discovery.
5714 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
&&
5715 !(tp
->t_mpflags
& TMPF_JOINED_FLOW
)) {
5719 if (tp
->t_mpflags
& TMPF_PREESTABLISHED
&&
5720 tp
->t_mpflags
& TMPF_SENT_JOIN
) {
5724 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
5733 * Update the pid, upid, uuid of the subflow so, based on parent so
5736 mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
)
5738 if (so
->last_pid
!= mp_so
->last_pid
||
5739 so
->last_upid
!= mp_so
->last_upid
) {
5740 so
->last_upid
= mp_so
->last_upid
;
5741 so
->last_pid
= mp_so
->last_pid
;
5742 uuid_copy(so
->last_uuid
, mp_so
->last_uuid
);
5744 so_update_policy(so
);
5748 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
5752 tcp_getconninfo(so
, &flow
->flow_ci
);
5753 inp
= sotoinpcb(so
);
5755 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
5756 flow
->flow_src
.ss_family
= AF_INET6
;
5757 flow
->flow_dst
.ss_family
= AF_INET6
;
5758 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
5759 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
5760 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
5761 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
5762 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
5763 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
5766 if ((inp
->inp_vflag
& INP_IPV4
) != 0) {
5767 flow
->flow_src
.ss_family
= AF_INET
;
5768 flow
->flow_dst
.ss_family
= AF_INET
;
5769 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
5770 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
5771 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
5772 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
5773 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
5774 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
5776 flow
->flow_len
= sizeof(*flow
);
5777 flow
->flow_tcpci_offset
= offsetof(mptcp_flow_t
, flow_ci
);
5778 flow
->flow_flags
= mpts
->mpts_flags
;
5779 flow
->flow_cid
= mpts
->mpts_connid
;
5780 flow
->flow_relseq
= mpts
->mpts_rel_seq
;
5781 flow
->flow_soerror
= mpts
->mpts_socket
->so_error
;
5782 flow
->flow_probecnt
= mpts
->mpts_probecnt
;
5786 mptcp_pcblist SYSCTL_HANDLER_ARGS
5788 #pragma unused(oidp, arg1, arg2)
5792 struct mptses
*mpte
;
5793 struct mptcb
*mp_tp
;
5794 struct mptsub
*mpts
;
5796 conninfo_mptcp_t mptcpci
;
5797 mptcp_flow_t
*flows
= NULL
;
5799 if (req
->newptr
!= USER_ADDR_NULL
) {
5803 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5804 if (req
->oldptr
== USER_ADDR_NULL
) {
5805 size_t n
= mtcbinfo
.mppi_count
;
5806 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5807 req
->oldidx
= (n
+ n
/ 8) * sizeof(conninfo_mptcp_t
) +
5808 4 * (n
+ n
/ 8) * sizeof(mptcp_flow_t
);
5811 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5814 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
5815 mpte
= mptompte(mpp
);
5816 VERIFY(mpte
!= NULL
);
5817 mpte_lock_assert_held(mpte
);
5818 mp_tp
= mpte
->mpte_mptcb
;
5819 VERIFY(mp_tp
!= NULL
);
5821 bzero(&mptcpci
, sizeof(mptcpci
));
5822 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
5823 mptcpci
.mptcpci_flags
= mp_tp
->mpt_flags
;
5824 mptcpci
.mptcpci_ltoken
= mp_tp
->mpt_localtoken
;
5825 mptcpci
.mptcpci_rtoken
= mp_tp
->mpt_remotetoken
;
5826 mptcpci
.mptcpci_notsent_lowat
= mp_tp
->mpt_notsent_lowat
;
5827 mptcpci
.mptcpci_snduna
= mp_tp
->mpt_snduna
;
5828 mptcpci
.mptcpci_sndnxt
= mp_tp
->mpt_sndnxt
;
5829 mptcpci
.mptcpci_sndmax
= mp_tp
->mpt_sndmax
;
5830 mptcpci
.mptcpci_lidsn
= mp_tp
->mpt_local_idsn
;
5831 mptcpci
.mptcpci_sndwnd
= mp_tp
->mpt_sndwnd
;
5832 mptcpci
.mptcpci_rcvnxt
= mp_tp
->mpt_rcvnxt
;
5833 mptcpci
.mptcpci_rcvatmark
= mp_tp
->mpt_rcvnxt
;
5834 mptcpci
.mptcpci_ridsn
= mp_tp
->mpt_remote_idsn
;
5835 mptcpci
.mptcpci_rcvwnd
= mp_tp
->mpt_rcvwnd
;
5837 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
5838 mptcpci
.mptcpci_mpte_flags
= mpte
->mpte_flags
;
5839 mptcpci
.mptcpci_mpte_addrid
= mpte
->mpte_addrid_last
;
5840 mptcpci
.mptcpci_flow_offset
=
5841 offsetof(conninfo_mptcp_t
, mptcpci_flows
);
5843 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
5844 if (mpte
->mpte_numflows
!= 0) {
5845 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
5846 if (flows
== NULL
) {
5850 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
5851 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
5852 error
= SYSCTL_OUT(req
, &mptcpci
,
5853 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
5855 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
5856 error
= SYSCTL_OUT(req
, &mptcpci
, sizeof(mptcpci
));
5860 FREE(flows
, M_TEMP
);
5864 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5865 so
= mpts
->mpts_socket
;
5866 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
5871 error
= SYSCTL_OUT(req
, flows
, len
);
5872 FREE(flows
, M_TEMP
);
5878 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5883 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
5884 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
5885 "List of active MPTCP connections");
5888 * Set notsent lowat mark on the MPTCB
5891 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
5893 struct mptcb
*mp_tp
= NULL
;
5896 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
) {
5897 mp_tp
= mpte
->mpte_mptcb
;
5901 mp_tp
->mpt_notsent_lowat
= optval
;
5910 mptcp_get_notsent_lowat(struct mptses
*mpte
)
5912 struct mptcb
*mp_tp
= NULL
;
5914 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
) {
5915 mp_tp
= mpte
->mpte_mptcb
;
5919 return mp_tp
->mpt_notsent_lowat
;
5926 mptcp_notsent_lowat_check(struct socket
*so
)
5928 struct mptses
*mpte
;
5930 struct mptcb
*mp_tp
;
5931 struct mptsub
*mpts
;
5935 mpp
= mpsotomppcb(so
);
5936 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
5940 mpte
= mptompte(mpp
);
5941 mpte_lock_assert_held(mpte
);
5942 mp_tp
= mpte
->mpte_mptcb
;
5944 notsent
= so
->so_snd
.sb_cc
;
5946 if ((notsent
== 0) ||
5947 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
5948 mp_tp
->mpt_notsent_lowat
)) {
5949 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
5950 "lowat %d notsent %d actual %d \n",
5951 mp_tp
->mpt_notsent_lowat
, notsent
,
5952 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)),
5953 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5957 /* When Nagle's algorithm is not disabled, it is better
5958 * to wakeup the client even before there is atleast one
5959 * maxseg of data to write.
5961 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5963 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
5964 struct socket
*subf_so
= mpts
->mpts_socket
;
5965 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
5967 notsent
= so
->so_snd
.sb_cc
-
5968 (tp
->snd_nxt
- tp
->snd_una
);
5970 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
5971 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
5974 mptcplog((LOG_DEBUG
, "MPTCP Sender: lowat %d notsent %d"
5975 " nodelay false \n",
5976 mp_tp
->mpt_notsent_lowat
, notsent
),
5977 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5984 /* Using Symptoms Advisory to detect poor WiFi or poor Cell */
5985 static kern_ctl_ref mptcp_kern_ctrl_ref
= NULL
;
5986 static uint32_t mptcp_kern_skt_inuse
= 0;
5987 static uint32_t mptcp_kern_skt_unit
;
5988 symptoms_advisory_t mptcp_advisory
;
5991 mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
5994 #pragma unused(kctlref, sac, unitinfo)
5996 if (OSIncrementAtomic(&mptcp_kern_skt_inuse
) > 0) {
5997 os_log_error(mptcp_log_handle
, "%s MPTCP kernel-control socket for Symptoms already open!", __func__
);
6000 mptcp_kern_skt_unit
= sac
->sc_unit
;
6006 mptcp_allow_uuid(uuid_t uuid
)
6010 /* Iterate over all MPTCP connections */
6012 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6014 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6015 struct mptses
*mpte
;
6016 struct socket
*mp_so
;
6020 mpte
= mpp
->mpp_pcbe
;
6021 mp_so
= mpp
->mpp_socket
;
6023 if (mp_so
->so_flags
& SOF_DELEGATED
&&
6024 uuid_compare(uuid
, mp_so
->e_uuid
)) {
6026 } else if (!(mp_so
->so_flags
& SOF_DELEGATED
) &&
6027 uuid_compare(uuid
, mp_so
->last_uuid
)) {
6031 os_log(mptcp_log_handle
, "%s - %lx: Got allowance for useApp\n",
6032 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
6034 mpte
->mpte_flags
|= MPTE_ACCESS_GRANTED
;
6036 mptcp_check_subflows_and_add(mpte
);
6037 mptcp_remove_subflows(mpte
);
6039 mpte
->mpte_flags
&= ~MPTE_ACCESS_GRANTED
;
6045 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6049 mptcp_wifi_status_changed(void)
6053 /* Iterate over all MPTCP connections */
6055 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6057 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6058 struct mptses
*mpte
;
6059 struct socket
*mp_so
;
6063 mpte
= mpp
->mpp_pcbe
;
6064 mp_so
= mpp
->mpp_socket
;
6066 /* Only handover-mode is purely driven by Symptom's Wi-Fi status */
6067 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
) {
6071 mptcp_check_subflows_and_add(mpte
);
6072 mptcp_check_subflows_and_remove(mpte
);
6078 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6082 mptcp_ask_symptoms(struct mptses
*mpte
)
6084 struct mptcp_symptoms_ask_uuid ask
;
6085 struct socket
*mp_so
;
6089 if (mptcp_kern_skt_unit
== 0) {
6090 os_log_error(mptcp_log_handle
, "%s skt_unit is still 0\n", __func__
);
6094 mp_so
= mptetoso(mpte
);
6096 if (mp_so
->so_flags
& SOF_DELEGATED
) {
6099 pid
= mp_so
->last_pid
;
6103 if (p
== PROC_NULL
) {
6104 os_log_error(mptcp_log_handle
, "%s Couldn't find proc for pid %u\n", __func__
, pid
);
6108 ask
.cmd
= MPTCP_SYMPTOMS_ASK_UUID
;
6110 if (mp_so
->so_flags
& SOF_DELEGATED
) {
6111 uuid_copy(ask
.uuid
, mp_so
->e_uuid
);
6113 uuid_copy(ask
.uuid
, mp_so
->last_uuid
);
6116 prio
= proc_get_effective_task_policy(proc_task(p
), TASK_POLICY_ROLE
);
6118 if (prio
== TASK_BACKGROUND_APPLICATION
) {
6119 ask
.priority
= MPTCP_SYMPTOMS_BACKGROUND
;
6120 } else if (prio
== TASK_FOREGROUND_APPLICATION
) {
6121 ask
.priority
= MPTCP_SYMPTOMS_FOREGROUND
;
6123 ask
.priority
= MPTCP_SYMPTOMS_UNKNOWN
;
6126 err
= ctl_enqueuedata(mptcp_kern_ctrl_ref
, mptcp_kern_skt_unit
,
6127 &ask
, sizeof(ask
), CTL_DATA_EOR
);
6129 os_log_debug(mptcp_log_handle
, "%s asked symptoms about pid %u, prio %u, err %d\n",
6130 __func__
, pid
, ask
.priority
, err
);
6137 mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
,
6140 #pragma unused(kctlref, kcunit, unitinfo)
6142 OSDecrementAtomic(&mptcp_kern_skt_inuse
);
6148 mptcp_symptoms_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
6149 mbuf_t m
, int flags
)
6151 #pragma unused(kctlref, unitinfo, flags)
6152 symptoms_advisory_t
*sa
= NULL
;
6154 if (kcunit
!= mptcp_kern_skt_unit
) {
6155 os_log_error(mptcp_log_handle
, "%s kcunit %u is different from expected one %u\n",
6156 __func__
, kcunit
, mptcp_kern_skt_unit
);
6159 if (mbuf_pkthdr_len(m
) < sizeof(*sa
)) {
6164 if (mbuf_len(m
) < sizeof(*sa
)) {
6165 os_log_error(mptcp_log_handle
, "%s: mbuf is %lu but need %lu\n",
6166 __func__
, mbuf_len(m
), sizeof(*sa
));
6173 if (sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_NOCOMMENT
&&
6174 sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_USEAPP
) {
6175 uint8_t old_wifi_status
= mptcp_advisory
.sa_wifi_status
;
6177 mptcplog((LOG_DEBUG
, "%s: wifi %d,%d\n",
6178 __func__
, sa
->sa_wifi_status
, mptcp_advisory
.sa_wifi_status
),
6179 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
6181 if ((sa
->sa_wifi_status
&
6182 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
)) !=
6183 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
)) {
6184 mptcp_advisory
.sa_wifi_status
= sa
->sa_wifi_status
;
6187 if (old_wifi_status
!= mptcp_advisory
.sa_wifi_status
) {
6188 mptcp_wifi_status_changed();
6190 } else if (sa
->sa_nwk_status
== SYMPTOMS_ADVISORY_NOCOMMENT
) {
6191 mptcplog((LOG_DEBUG
, "%s: NOCOMMENT wifi %d\n", __func__
,
6192 mptcp_advisory
.sa_wifi_status
),
6193 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
6194 } else if (sa
->sa_nwk_status
== SYMPTOMS_ADVISORY_USEAPP
) {
6198 if (mbuf_len(m
) < sizeof(uuid_t
) + sizeof(*sa
)) {
6199 os_log_error(mptcp_log_handle
, "%s: mbuf is %lu but need %lu\n",
6200 __func__
, mbuf_len(m
), sizeof(uuid_t
) + sizeof(*sa
));
6205 err
= mbuf_copydata(m
, sizeof(*sa
), sizeof(uuid_t
), uuid
);
6207 os_log_error(mptcp_log_handle
, "%s: mbuf_copydata returned %d\n", __func__
, err
);
6212 mptcp_allow_uuid(uuid
);
6220 mptcp_control_register(void)
6222 /* Set up the advisory control socket */
6223 struct kern_ctl_reg mptcp_kern_ctl
;
6225 bzero(&mptcp_kern_ctl
, sizeof(mptcp_kern_ctl
));
6226 strlcpy(mptcp_kern_ctl
.ctl_name
, MPTCP_KERN_CTL_NAME
,
6227 sizeof(mptcp_kern_ctl
.ctl_name
));
6228 mptcp_kern_ctl
.ctl_connect
= mptcp_symptoms_ctl_connect
;
6229 mptcp_kern_ctl
.ctl_disconnect
= mptcp_symptoms_ctl_disconnect
;
6230 mptcp_kern_ctl
.ctl_send
= mptcp_symptoms_ctl_send
;
6231 mptcp_kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
;
6233 (void)ctl_register(&mptcp_kern_ctl
, &mptcp_kern_ctrl_ref
);
6237 * Three return-values:
6240 * -1 : WiFi-state is unknown, use subflow-only heuristics
6243 mptcp_is_wifi_unusable(struct mptses
*mpte
)
6245 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6246 if (mptcp_advisory
.sa_wifi_status
) {
6247 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
) ? 1 : 0;
6251 * If it's a first-party app and we don't have any info
6252 * about the Wi-Fi state, let's be pessimistic.
6257 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
) ? 1 : 0;
6261 mptcp_subflow_is_bad(struct mptses
*mpte
, struct mptsub
*mpts
)
6263 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
6264 int fail_thresh
= mptcp_fail_thresh
;
6266 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
) {
6270 return tp
->t_rxtshift
>= fail_thresh
&&
6271 (mptetoso(mpte
)->so_snd
.sb_cc
|| mpte
->mpte_reinjectq
);
6274 /* If TFO data is succesfully acked, it must be dropped from the mptcp so */
6276 mptcp_drop_tfo_data(struct mptses
*mpte
, struct mptsub
*mpts
)
6278 struct socket
*mp_so
= mptetoso(mpte
);
6279 struct socket
*so
= mpts
->mpts_socket
;
6280 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
6281 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
6283 /* If data was sent with SYN, rewind state */
6284 if (tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) {
6285 u_int64_t mp_droplen
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
6286 unsigned int tcp_droplen
= tp
->snd_una
- tp
->iss
- 1;
6288 VERIFY(mp_droplen
<= (UINT_MAX
));
6289 VERIFY(mp_droplen
>= tcp_droplen
);
6291 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
6292 mpts
->mpts_iss
+= tcp_droplen
;
6293 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
6295 if (mp_droplen
> tcp_droplen
) {
6296 /* handle partial TCP ack */
6297 mp_so
->so_flags1
|= SOF1_TFO_REWIND
;
6298 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
+ (mp_droplen
- tcp_droplen
);
6299 mp_droplen
= tcp_droplen
;
6301 /* all data on SYN was acked */
6302 mpts
->mpts_rel_seq
= 1;
6303 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
6305 mp_tp
->mpt_sndmax
-= tcp_droplen
;
6307 if (mp_droplen
!= 0) {
6308 VERIFY(mp_so
->so_snd
.sb_mb
!= NULL
);
6309 sbdrop(&mp_so
->so_snd
, (int)mp_droplen
);
6311 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx cid %d TFO tcp len %d mptcp len %d\n",
6312 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
6313 mpts
->mpts_connid
, tcp_droplen
, mp_droplen
),
6314 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
6319 mptcp_freeq(struct mptcb
*mp_tp
)
6321 struct tseg_qent
*q
;
6324 while ((q
= LIST_FIRST(&mp_tp
->mpt_segq
)) != NULL
) {
6325 LIST_REMOVE(q
, tqe_q
);
6327 zfree(tcp_reass_zone
, q
);
6330 mp_tp
->mpt_reassqlen
= 0;
6335 mptcp_post_event(u_int32_t event_code
, int value
)
6337 struct kev_mptcp_data event_data
;
6338 struct kev_msg ev_msg
;
6340 memset(&ev_msg
, 0, sizeof(ev_msg
));
6342 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
6343 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
6344 ev_msg
.kev_subclass
= KEV_MPTCP_SUBCLASS
;
6345 ev_msg
.event_code
= event_code
;
6347 event_data
.value
= value
;
6349 ev_msg
.dv
[0].data_ptr
= &event_data
;
6350 ev_msg
.dv
[0].data_length
= sizeof(event_data
);
6352 return kev_post_msg(&ev_msg
);
6356 mptcp_set_cellicon(struct mptses
*mpte
)
6360 /* First-party apps (Siri) don't flip the cellicon */
6361 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6365 /* Remember the last time we set the cellicon (see mptcp_unset_cellicon) */
6366 mptcp_last_cellicon_set
= tcp_now
;
6368 /* If cellicon is already set, get out of here! */
6369 if (OSTestAndSet(7, &mptcp_cellicon_is_set
)) {
6373 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 1);
6376 mptcplog((LOG_ERR
, "%s: Setting cellicon failed with %d\n",
6377 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
6379 mptcplog((LOG_DEBUG
, "%s successfully set the cellicon\n", __func__
),
6380 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
6385 mptcp_unset_cellicon(void)
6389 /* If cellicon is already unset, get out of here! */
6390 if (OSTestAndClear(7, &mptcp_cellicon_is_set
)) {
6395 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
6396 * explicitly set the cellicon (see mptcp_set_cellicon()), then we unset
6399 if (TSTMP_GT(mptcp_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
,
6401 OSTestAndSet(7, &mptcp_cellicon_is_set
);
6405 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 0);
6408 mptcplog((LOG_ERR
, "%s: Unsetting cellicon failed with %d\n",
6409 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
6411 mptcplog((LOG_DEBUG
, "%s successfully unset the cellicon\n", __func__
),
6412 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
6417 mptcp_reset_rexmit_state(struct tcpcb
*tp
)
6419 struct mptsub
*mpts
;
6428 so
= inp
->inp_socket
;
6433 if (!(so
->so_flags
& SOF_MP_SUBFLOW
)) {
6439 mpts
->mpts_flags
&= ~MPTSF_WRITE_STALL
;
6440 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
6444 mptcp_reset_keepalive(struct tcpcb
*tp
)
6446 struct mptsub
*mpts
= tp
->t_mpsub
;
6448 mpts
->mpts_flags
&= ~MPTSF_READ_STALL
;