2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/locks.h>
30 #include <kern/policy_internal.h>
31 #include <kern/zalloc.h>
35 #include <sys/domain.h>
36 #include <sys/kdebug.h>
37 #include <sys/kern_control.h>
38 #include <sys/kernel.h>
40 #include <sys/mcache.h>
41 #include <sys/param.h>
43 #include <sys/protosw.h>
44 #include <sys/resourcevar.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/systm.h>
51 #include <net/content_filter.h>
53 #include <net/if_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/in_var.h>
57 #include <netinet/tcp.h>
58 #include <netinet/tcp_fsm.h>
59 #include <netinet/tcp_seq.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/mptcp_var.h>
62 #include <netinet/mptcp.h>
63 #include <netinet/mptcp_opt.h>
64 #include <netinet/mptcp_seq.h>
65 #include <netinet/mptcp_timer.h>
66 #include <libkern/crypto/sha1.h>
67 #include <netinet6/in6_pcb.h>
68 #include <netinet6/ip6protosw.h>
69 #include <dev/random/randomdev.h>
72 * Notes on MPTCP implementation.
74 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
75 * communication domain. The structure mtcbinfo describes the MPTCP instance
76 * of a Multipath protocol in that domain. It is used to keep track of all
77 * MPTCP PCB instances in the system, and is protected by the global lock
80 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
81 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
82 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
83 * allocated from the same memory block, and each structure has a pointer
84 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
85 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
86 * PCB (mppcb) as well as the MPTCP Session (mptses).
88 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
90 * A functioning MPTCP Session consists of one or more subflow sockets. Each
91 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
92 * represented by the mptsub structure. Because each subflow requires access
93 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
94 * subflow. This gets decremented prior to the subflow's destruction.
96 * To handle events (read, write, control) from the subflows, we do direct
97 * upcalls into the specific function.
99 * The whole MPTCP connection is protected by a single lock, the MPTCP socket's
100 * lock. Incoming data on a subflow also ends up taking this single lock. To
101 * achieve the latter, tcp_lock/unlock has been changed to rather use the lock
102 * of the MPTCP-socket.
104 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
105 * work is done by the MPTCP garbage collector which is invoked on demand by
106 * the PF_MULTIPATH garbage collector. This process will take place once all
107 * of the subflows have been destroyed.
110 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
111 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
113 static uint32_t mptcp_gc(struct mppcbinfo
*);
114 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
115 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
116 static int mptcp_subflow_sosend(struct socket
*, struct sockaddr
*,
117 struct uio
*, struct mbuf
*, struct mbuf
*, int);
118 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
119 static void mptcp_subflow_eupcall1(struct socket
*so
, void *arg
, long events
);
120 static void mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
);
121 static void mptcp_drop_tfo_data(struct mptses
*, struct mptsub
*);
123 static void mptcp_subflow_abort(struct mptsub
*, int);
125 static void mptcp_send_dfin(struct socket
*so
);
126 static void mptcp_set_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
);
127 static int mptcp_freeq(struct mptcb
*mp_tp
);
130 * Possible return values for subflow event handlers. Note that success
131 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
132 * indicate errors or actions which require immediate attention; they will
133 * prevent the rest of the handlers from processing their respective events
134 * until the next round of events processing.
137 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
138 MPTS_EVRET_OK
= 2, /* OK */
139 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
140 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
143 static ev_ret_t
mptcp_subflow_propagate_ev(struct mptses
*, struct mptsub
*, long *, long);
144 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*, long *, long);
145 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*, long *, long);
146 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*, long *, long);
147 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*, long *, long);
148 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*, long *, long);
149 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*, long *, long);
150 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*, long *, long);
151 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*, long *, long);
152 static ev_ret_t
mptcp_subflow_mpsuberror_ev(struct mptses
*, struct mptsub
*, long *, long);
153 static ev_ret_t
mptcp_subflow_adaptive_rtimo_ev(struct mptses
*, struct mptsub
*, long *, long);
154 static ev_ret_t
mptcp_subflow_adaptive_wtimo_ev(struct mptses
*, struct mptsub
*, long *, long);
156 static void mptcp_do_sha1(mptcp_key_t
*, char *);
157 static void mptcp_init_local_parms(struct mptses
*);
159 static ZONE_DECLARE(mptsub_zone
, "mptsub", sizeof(struct mptsub
), ZC_ZFREE_CLEARMEM
);
160 static ZONE_DECLARE(mptopt_zone
, "mptopt", sizeof(struct mptopt
), ZC_ZFREE_CLEARMEM
);
161 static ZONE_DECLARE(mpt_subauth_zone
, "mptauth",
162 sizeof(struct mptcp_subf_auth_entry
), ZC_NONE
);
164 struct mppcbinfo mtcbinfo
;
166 SYSCTL_DECL(_net_inet
);
168 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "MPTCP");
170 uint32_t mptcp_dbg_area
= 31; /* more noise if greater than 1 */
171 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, dbg_area
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
172 &mptcp_dbg_area
, 0, "MPTCP debug area");
174 uint32_t mptcp_dbg_level
= 1;
175 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, dbg_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
176 &mptcp_dbg_level
, 0, "MPTCP debug level");
178 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
179 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
182 static int mptcp_alternate_port
= 0;
183 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, alternate_port
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
184 &mptcp_alternate_port
, 0, "Set alternate port for MPTCP connections");
186 static struct protosw mptcp_subflow_protosw
;
187 static struct pr_usrreqs mptcp_subflow_usrreqs
;
188 static struct ip6protosw mptcp_subflow_protosw6
;
189 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
191 static uint8_t mptcp_create_subflows_scheduled
;
193 typedef struct mptcp_subflow_event_entry
{
194 long sofilt_hint_mask
;
195 ev_ret_t (*sofilt_hint_ev_hdlr
)(
198 long *p_mpsofilt_hint
,
202 /* Using Symptoms Advisory to detect poor WiFi or poor Cell */
203 static kern_ctl_ref mptcp_kern_ctrl_ref
= NULL
;
204 static uint32_t mptcp_kern_skt_inuse
= 0;
205 static uint32_t mptcp_kern_skt_unit
;
206 static symptoms_advisory_t mptcp_advisory
;
208 uint32_t mptcp_cellicon_refcount
= 0;
211 * XXX The order of the event handlers below is really
212 * really important. Think twice before changing it.
214 static mptsub_ev_entry_t mpsub_ev_entry_tbl
[] = {
216 .sofilt_hint_mask
= SO_FILT_HINT_MP_SUB_ERROR
,
217 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpsuberror_ev
,
220 .sofilt_hint_mask
= SO_FILT_HINT_MPCANTRCVMORE
,
221 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpcantrcvmore_ev
,
224 .sofilt_hint_mask
= SO_FILT_HINT_MPFAILOVER
,
225 .sofilt_hint_ev_hdlr
= mptcp_subflow_failover_ev
,
228 .sofilt_hint_mask
= SO_FILT_HINT_CONNRESET
,
229 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
232 .sofilt_hint_mask
= SO_FILT_HINT_MUSTRST
,
233 .sofilt_hint_ev_hdlr
= mptcp_subflow_mustrst_ev
,
236 .sofilt_hint_mask
= SO_FILT_HINT_CANTRCVMORE
,
237 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
240 .sofilt_hint_mask
= SO_FILT_HINT_TIMEOUT
,
241 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
244 .sofilt_hint_mask
= SO_FILT_HINT_NOSRCADDR
,
245 .sofilt_hint_ev_hdlr
= mptcp_subflow_nosrcaddr_ev
,
248 .sofilt_hint_mask
= SO_FILT_HINT_IFDENIED
,
249 .sofilt_hint_ev_hdlr
= mptcp_subflow_ifdenied_ev
,
252 .sofilt_hint_mask
= SO_FILT_HINT_CONNECTED
,
253 .sofilt_hint_ev_hdlr
= mptcp_subflow_connected_ev
,
256 .sofilt_hint_mask
= SO_FILT_HINT_MPSTATUS
,
257 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpstatus_ev
,
260 .sofilt_hint_mask
= SO_FILT_HINT_DISCONNECTED
,
261 .sofilt_hint_ev_hdlr
= mptcp_subflow_disconnected_ev
,
264 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_RTIMO
,
265 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_rtimo_ev
,
268 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_WTIMO
,
269 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_wtimo_ev
,
273 os_log_t mptcp_log_handle
;
276 * Protocol pr_init callback.
279 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
282 static int mptcp_initialized
= 0;
284 struct ip6protosw
*prp6
;
286 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
| PR_ATTACHED
)) == PR_ATTACHED
);
288 /* do this only once */
289 if (mptcp_initialized
) {
292 mptcp_initialized
= 1;
294 mptcp_advisory
.sa_wifi_status
= SYMPTOMS_ADVISORY_WIFI_OK
;
297 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
298 * we must be able to find IPPROTO_TCP entries for both.
300 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
302 bcopy(prp
, &mptcp_subflow_protosw
, sizeof(*prp
));
303 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
304 sizeof(mptcp_subflow_usrreqs
));
305 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
306 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
307 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
308 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
309 mptcp_subflow_usrreqs
.pru_sosend
= mptcp_subflow_sosend
;
310 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
312 * Socket filters shouldn't attach/detach to/from this protosw
313 * since pr_protosw is to be used instead, which points to the
314 * real protocol; if they do, it is a bug and we should panic.
316 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
317 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
318 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
319 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
321 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
322 IPPROTO_TCP
, SOCK_STREAM
);
323 VERIFY(prp6
!= NULL
);
324 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof(*prp6
));
325 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
326 sizeof(mptcp_subflow_usrreqs6
));
327 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
328 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
329 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
330 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
331 mptcp_subflow_usrreqs6
.pru_sosend
= mptcp_subflow_sosend
;
332 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
334 * Socket filters shouldn't attach/detach to/from this protosw
335 * since pr_protosw is to be used instead, which points to the
336 * real protocol; if they do, it is a bug and we should panic.
338 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
339 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
340 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
341 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
343 bzero(&mtcbinfo
, sizeof(mtcbinfo
));
344 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
345 mtcbinfo
.mppi_size
= sizeof(struct mpp_mtp
);
346 mtcbinfo
.mppi_zone
= zone_create("mptc", mtcbinfo
.mppi_size
,
349 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
350 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
351 mtcbinfo
.mppi_lock_grp_attr
);
352 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
353 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
354 mtcbinfo
.mppi_lock_attr
);
356 mtcbinfo
.mppi_gc
= mptcp_gc
;
357 mtcbinfo
.mppi_timer
= mptcp_timer
;
359 /* attach to MP domain for garbage collection to take place */
360 mp_pcbinfo_attach(&mtcbinfo
);
362 mptcp_log_handle
= os_log_create("com.apple.xnu.net.mptcp", "mptcp");
366 mptcpstats_get_index_by_ifindex(struct mptcp_itf_stats
*stats
, u_short ifindex
, boolean_t create
)
370 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
371 if (create
&& stats
[i
].ifindex
== IFSCOPE_NONE
) {
378 if (stats
[i
].ifindex
== ifindex
) {
385 stats
[index
].ifindex
= ifindex
;
392 mptcpstats_get_index(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
394 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
398 os_log_error(mptcp_log_handle
, "%s - %lx: no ifp on subflow, state %u flags %#x\n",
399 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpts
->mpts_mpte
),
400 sototcpcb(mpts
->mpts_socket
)->t_state
, mpts
->mpts_flags
);
404 index
= mptcpstats_get_index_by_ifindex(stats
, ifp
->if_index
, true);
407 if (stats
[index
].is_expensive
== 0) {
408 stats
[index
].is_expensive
= IFNET_IS_CELLULAR(ifp
);
416 mptcpstats_inc_switch(struct mptses
*mpte
, const struct mptsub
*mpts
)
420 tcpstat
.tcps_mp_switches
++;
421 mpte
->mpte_subflow_switches
++;
423 index
= mptcpstats_get_index(mpte
->mpte_itfstats
, mpts
);
426 mpte
->mpte_itfstats
[index
].switches
++;
431 * Flushes all recorded socket options from an MP socket.
434 mptcp_flush_sopts(struct mptses
*mpte
)
436 struct mptopt
*mpo
, *tmpo
;
438 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
439 mptcp_sopt_remove(mpte
, mpo
);
440 mptcp_sopt_free(mpo
);
442 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
446 * Create an MPTCP session, called as a result of opening a MPTCP socket.
449 mptcp_session_create(struct mppcb
*mpp
)
451 struct mppcbinfo
*mppi
;
456 mppi
= mpp
->mpp_pcbinfo
;
457 VERIFY(mppi
!= NULL
);
459 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
460 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
462 /* MPTCP Multipath PCB Extension */
463 bzero(mpte
, sizeof(*mpte
));
464 VERIFY(mpp
->mpp_pcbe
== NULL
);
465 mpp
->mpp_pcbe
= mpte
;
466 mpte
->mpte_mppcb
= mpp
;
467 mpte
->mpte_mptcb
= mp_tp
;
469 TAILQ_INIT(&mpte
->mpte_sopts
);
470 TAILQ_INIT(&mpte
->mpte_subflows
);
471 mpte
->mpte_associd
= SAE_ASSOCID_ANY
;
472 mpte
->mpte_connid_last
= SAE_CONNID_ANY
;
474 mptcp_init_urgency_timer(mpte
);
476 mpte
->mpte_itfinfo
= &mpte
->_mpte_itfinfo
[0];
477 mpte
->mpte_itfinfo_size
= MPTE_ITFINFO_SIZE
;
479 if (mptcp_alternate_port
> 0 && mptcp_alternate_port
< UINT16_MAX
) {
480 mpte
->mpte_alternate_port
= htons((uint16_t)mptcp_alternate_port
);
483 mpte
->mpte_last_cellicon_set
= tcp_now
;
485 /* MPTCP Protocol Control Block */
486 bzero(mp_tp
, sizeof(*mp_tp
));
487 mp_tp
->mpt_mpte
= mpte
;
488 mp_tp
->mpt_state
= MPTCPS_CLOSED
;
490 DTRACE_MPTCP1(session__create
, struct mppcb
*, mpp
);
496 mptcp_get_session_dst(struct mptses
*mpte
, boolean_t ipv6
, boolean_t ipv4
)
498 if (ipv6
&& mpte
->mpte_sub_dst_v6
.sin6_family
== AF_INET6
) {
499 return (struct sockaddr
*)&mpte
->mpte_sub_dst_v6
;
502 if (ipv4
&& mpte
->mpte_sub_dst_v4
.sin_family
== AF_INET
) {
503 return (struct sockaddr
*)&mpte
->mpte_sub_dst_v4
;
506 /* The interface has neither IPv4 nor IPv6 routes. Give our best guess,
507 * meaning we prefer IPv6 over IPv4.
509 if (mpte
->mpte_sub_dst_v6
.sin6_family
== AF_INET6
) {
510 return (struct sockaddr
*)&mpte
->mpte_sub_dst_v6
;
513 if (mpte
->mpte_sub_dst_v4
.sin_family
== AF_INET
) {
514 return (struct sockaddr
*)&mpte
->mpte_sub_dst_v4
;
517 /* We don't yet have a unicast IP */
522 mptcpstats_get_bytes(struct mptses
*mpte
, boolean_t initial_cell
,
523 uint64_t *cellbytes
, uint64_t *allbytes
)
525 int64_t mycellbytes
= 0;
526 uint64_t myallbytes
= 0;
529 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
530 if (mpte
->mpte_itfstats
[i
].is_expensive
) {
531 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
532 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
535 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
536 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
540 mycellbytes
-= mpte
->mpte_init_txbytes
;
541 mycellbytes
-= mpte
->mpte_init_rxbytes
;
544 if (mycellbytes
< 0) {
545 os_log_error(mptcp_log_handle
, "%s - %lx: cellbytes is %lld\n",
546 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mycellbytes
);
550 *cellbytes
= mycellbytes
;
551 *allbytes
= myallbytes
;
556 mptcpstats_session_wrapup(struct mptses
*mpte
)
558 boolean_t cell
= mpte
->mpte_initial_cell
;
560 switch (mpte
->mpte_svctype
) {
561 case MPTCP_SVCTYPE_HANDOVER
:
562 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
563 tcpstat
.tcps_mptcp_fp_handover_attempt
++;
565 if (cell
&& mpte
->mpte_handshake_success
) {
566 tcpstat
.tcps_mptcp_fp_handover_success_cell
++;
568 if (mpte
->mpte_used_wifi
) {
569 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
571 } else if (mpte
->mpte_handshake_success
) {
572 tcpstat
.tcps_mptcp_fp_handover_success_wifi
++;
574 if (mpte
->mpte_used_cell
) {
575 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
579 tcpstat
.tcps_mptcp_handover_attempt
++;
581 if (cell
&& mpte
->mpte_handshake_success
) {
582 tcpstat
.tcps_mptcp_handover_success_cell
++;
584 if (mpte
->mpte_used_wifi
) {
585 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
587 } else if (mpte
->mpte_handshake_success
) {
588 tcpstat
.tcps_mptcp_handover_success_wifi
++;
590 if (mpte
->mpte_used_cell
) {
591 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
596 if (mpte
->mpte_handshake_success
) {
600 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
602 tcpstat
.tcps_mptcp_handover_cell_bytes
+= cellbytes
;
603 tcpstat
.tcps_mptcp_handover_all_bytes
+= allbytes
;
606 case MPTCP_SVCTYPE_INTERACTIVE
:
607 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
608 tcpstat
.tcps_mptcp_fp_interactive_attempt
++;
610 if (mpte
->mpte_handshake_success
) {
611 tcpstat
.tcps_mptcp_fp_interactive_success
++;
613 if (!cell
&& mpte
->mpte_used_cell
) {
614 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
618 tcpstat
.tcps_mptcp_interactive_attempt
++;
620 if (mpte
->mpte_handshake_success
) {
621 tcpstat
.tcps_mptcp_interactive_success
++;
623 if (!cell
&& mpte
->mpte_used_cell
) {
624 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
629 if (mpte
->mpte_handshake_success
) {
633 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
635 tcpstat
.tcps_mptcp_interactive_cell_bytes
+= cellbytes
;
636 tcpstat
.tcps_mptcp_interactive_all_bytes
+= allbytes
;
639 case MPTCP_SVCTYPE_AGGREGATE
:
640 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
641 tcpstat
.tcps_mptcp_fp_aggregate_attempt
++;
643 if (mpte
->mpte_handshake_success
) {
644 tcpstat
.tcps_mptcp_fp_aggregate_success
++;
647 tcpstat
.tcps_mptcp_aggregate_attempt
++;
649 if (mpte
->mpte_handshake_success
) {
650 tcpstat
.tcps_mptcp_aggregate_success
++;
654 if (mpte
->mpte_handshake_success
) {
658 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
660 tcpstat
.tcps_mptcp_aggregate_cell_bytes
+= cellbytes
;
661 tcpstat
.tcps_mptcp_aggregate_all_bytes
+= allbytes
;
666 if (cell
&& mpte
->mpte_handshake_success
&& mpte
->mpte_used_wifi
) {
667 tcpstat
.tcps_mptcp_back_to_wifi
++;
670 if (mpte
->mpte_triggered_cell
) {
671 tcpstat
.tcps_mptcp_triggered_cell
++;
676 * Destroy an MPTCP session.
679 mptcp_session_destroy(struct mptses
*mpte
)
681 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
683 VERIFY(mp_tp
!= NULL
);
684 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
686 mptcpstats_session_wrapup(mpte
);
687 mptcp_unset_cellicon(mpte
, NULL
, mpte
->mpte_cellicon_increments
);
688 mptcp_flush_sopts(mpte
);
690 if (mpte
->mpte_itfinfo_size
> MPTE_ITFINFO_SIZE
) {
691 _FREE(mpte
->mpte_itfinfo
, M_TEMP
);
693 mpte
->mpte_itfinfo
= NULL
;
695 m_freem_list(mpte
->mpte_reinjectq
);
697 os_log(mptcp_log_handle
, "%s - %lx: Destroying session\n",
698 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
702 mptcp_ok_to_create_subflows(struct mptcb
*mp_tp
)
704 return mp_tp
->mpt_state
>= MPTCPS_ESTABLISHED
&&
705 mp_tp
->mpt_state
< MPTCPS_FIN_WAIT_1
&&
706 !(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
);
710 mptcp_synthesize_nat64(struct in6_addr
*addr
, uint32_t len
,
711 const struct in_addr
*addrv4
)
713 static const struct in6_addr well_known_prefix
= {
714 .__u6_addr
.__u6_addr8
= {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
715 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
716 0x00, 0x00, 0x00, 0x00},
718 const char *ptrv4
= (const char *)addrv4
;
719 char *ptr
= (char *)addr
;
721 if (IN_ZERONET(ntohl(addrv4
->s_addr
)) || // 0.0.0.0/8 Source hosts on local network
722 IN_LOOPBACK(ntohl(addrv4
->s_addr
)) || // 127.0.0.0/8 Loopback
723 IN_LINKLOCAL(ntohl(addrv4
->s_addr
)) || // 169.254.0.0/16 Link Local
724 IN_DS_LITE(ntohl(addrv4
->s_addr
)) || // 192.0.0.0/29 DS-Lite
725 IN_6TO4_RELAY_ANYCAST(ntohl(addrv4
->s_addr
)) || // 192.88.99.0/24 6to4 Relay Anycast
726 IN_MULTICAST(ntohl(addrv4
->s_addr
)) || // 224.0.0.0/4 Multicast
727 INADDR_BROADCAST
== addrv4
->s_addr
) { // 255.255.255.255/32 Limited Broadcast
731 /* Check for the well-known prefix */
732 if (len
== NAT64_PREFIX_LEN_96
&&
733 IN6_ARE_ADDR_EQUAL(addr
, &well_known_prefix
)) {
734 if (IN_PRIVATE(ntohl(addrv4
->s_addr
)) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use
735 IN_SHARED_ADDRESS_SPACE(ntohl(addrv4
->s_addr
))) { // 100.64.0.0/10 Shared Address Space
741 case NAT64_PREFIX_LEN_96
:
742 memcpy(ptr
+ 12, ptrv4
, 4);
744 case NAT64_PREFIX_LEN_64
:
745 memcpy(ptr
+ 9, ptrv4
, 4);
747 case NAT64_PREFIX_LEN_56
:
748 memcpy(ptr
+ 7, ptrv4
, 1);
749 memcpy(ptr
+ 9, ptrv4
+ 1, 3);
751 case NAT64_PREFIX_LEN_48
:
752 memcpy(ptr
+ 6, ptrv4
, 2);
753 memcpy(ptr
+ 9, ptrv4
+ 2, 2);
755 case NAT64_PREFIX_LEN_40
:
756 memcpy(ptr
+ 5, ptrv4
, 3);
757 memcpy(ptr
+ 9, ptrv4
+ 3, 1);
759 case NAT64_PREFIX_LEN_32
:
760 memcpy(ptr
+ 4, ptrv4
, 4);
763 panic("NAT64-prefix len is wrong: %u\n", len
);
770 mptcp_trigger_cell_bringup(struct mptses
*mpte
)
772 struct socket
*mp_so
= mptetoso(mpte
);
774 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
775 uuid_string_t uuidstr
;
778 socket_unlock(mp_so
, 0);
779 err
= necp_client_assert_bb_radio_manager(mpsotomppcb(mp_so
)->necp_client_uuid
,
781 socket_lock(mp_so
, 0);
784 mpte
->mpte_triggered_cell
= 1;
787 uuid_unparse_upper(mpsotomppcb(mp_so
)->necp_client_uuid
, uuidstr
);
788 os_log_info(mptcp_log_handle
, "%s - %lx: asked irat to bringup cell for uuid %s, err %d\n",
789 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), uuidstr
, err
);
791 os_log_info(mptcp_log_handle
, "%s - %lx: UUID is already null\n",
792 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
797 mptcp_subflow_disconnecting(struct mptsub
*mpts
)
799 if (mpts
->mpts_socket
->so_state
& SS_ISDISCONNECTED
) {
803 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
| MPTSF_CLOSE_REQD
)) {
807 if (sototcpcb(mpts
->mpts_socket
)->t_state
== TCPS_CLOSED
) {
815 * In Handover mode, only create cell subflow if
816 * - Symptoms marked WiFi as weak:
817 * Here, if we are sending data, then we can check the RTO-state. That is a
818 * stronger signal of WiFi quality than the Symptoms indicator.
819 * If however we are not sending any data, the only thing we can do is guess
820 * and thus bring up Cell.
822 * - Symptoms marked WiFi as unknown:
823 * In this state we don't know what the situation is and thus remain
824 * conservative, only bringing up cell if there are retransmissions going on.
827 mptcp_handover_use_cellular(struct mptses
*mpte
, struct tcpcb
*tp
)
829 int unusable_state
= mptcp_is_wifi_unusable_for_session(mpte
);
831 if (unusable_state
== 0) {
832 /* WiFi is good - don't use cell */
836 if (unusable_state
== -1) {
838 * We are in unknown state, only use Cell if we have confirmed
841 if (mptetoso(mpte
)->so_snd
.sb_cc
!= 0 && tp
->t_rxtshift
>= mptcp_fail_thresh
* 2) {
848 if (unusable_state
== 1) {
850 * WiFi is confirmed to be bad from Symptoms-Framework.
851 * If we are sending data, check the RTOs.
852 * Otherwise, be pessimistic and use Cell.
854 if (mptetoso(mpte
)->so_snd
.sb_cc
!= 0) {
855 if (tp
->t_rxtshift
>= mptcp_fail_thresh
* 2) {
869 mptcp_check_subflows_and_add(struct mptses
*mpte
)
871 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
872 boolean_t cellular_viable
= FALSE
;
873 boolean_t want_cellular
= TRUE
;
876 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
877 os_log_debug(mptcp_log_handle
, "%s - %lx: not a good time for subflows, state %u flags %#x",
878 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
882 /* Just to see if we have an IP-address available */
883 if (mptcp_get_session_dst(mpte
, false, false) == NULL
) {
887 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
888 boolean_t need_to_ask_symptoms
= FALSE
, found
= FALSE
;
889 struct mpt_itf_info
*info
;
890 struct sockaddr_in6 nat64pre
;
891 struct sockaddr
*dst
;
896 info
= &mpte
->mpte_itfinfo
[i
];
898 ifindex
= info
->ifindex
;
899 if (ifindex
== IFSCOPE_NONE
) {
903 os_log(mptcp_log_handle
, "%s - %lx: itf %u no support %u hasv4 %u has v6 %u hasnat64 %u\n",
904 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), info
->ifindex
, info
->no_mptcp_support
,
905 info
->has_v4_conn
, info
->has_v6_conn
, info
->has_nat64_conn
);
907 if (info
->no_mptcp_support
) {
911 ifnet_head_lock_shared();
912 ifp
= ifindex2ifnet
[ifindex
];
919 if (IFNET_IS_CELLULAR(ifp
)) {
920 cellular_viable
= TRUE
;
922 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
||
923 mpte
->mpte_svctype
== MPTCP_SVCTYPE_PURE_HANDOVER
) {
924 if (!mptcp_is_wifi_unusable_for_session(mpte
)) {
930 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
931 const struct ifnet
*subifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
932 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
934 if (subifp
== NULL
) {
939 * If there is at least one functioning subflow on WiFi
940 * and we are checking for the cell interface, then
941 * we always need to ask symptoms for permission as
942 * cell is triggered even if WiFi is available.
944 if (!IFNET_IS_CELLULAR(subifp
) &&
945 !mptcp_subflow_disconnecting(mpts
) &&
946 IFNET_IS_CELLULAR(ifp
)) {
947 need_to_ask_symptoms
= TRUE
;
950 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
|| mpte
->mpte_svctype
== MPTCP_SVCTYPE_PURE_HANDOVER
) {
951 os_log(mptcp_log_handle
,
952 "%s - %lx: %s: cell %u wifi-state %d flags %#x rxt %u first-party %u sb_cc %u ifindex %u this %u rtt %u rttvar %u rto %u\n",
953 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
954 mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
? "handover" : "pure-handover",
955 IFNET_IS_CELLULAR(subifp
),
956 mptcp_is_wifi_unusable_for_session(mpte
),
959 !!(mpte
->mpte_flags
& MPTE_FIRSTPARTY
),
960 mptetoso(mpte
)->so_snd
.sb_cc
,
961 ifindex
, subifp
->if_index
,
962 tp
->t_srtt
>> TCP_RTT_SHIFT
,
963 tp
->t_rttvar
>> TCP_RTTVAR_SHIFT
,
966 if (!IFNET_IS_CELLULAR(subifp
) &&
967 !mptcp_subflow_disconnecting(mpts
) &&
968 (mpts
->mpts_flags
& MPTSF_CONNECTED
) &&
969 !mptcp_handover_use_cellular(mpte
, tp
)) {
972 /* We found a proper subflow on WiFi - no need for cell */
973 want_cellular
= FALSE
;
976 } else if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
977 uint64_t time_now
= mach_continuous_time();
979 os_log(mptcp_log_handle
,
980 "%s - %lx: target-based: %llu now %llu unusable? %d cell %u sostat %#x mpts_flags %#x tcp-state %u\n",
981 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_time_target
,
982 time_now
, mptcp_is_wifi_unusable_for_session(mpte
),
983 IFNET_IS_CELLULAR(subifp
), mpts
->mpts_socket
->so_state
,
984 mpts
->mpts_flags
, sototcpcb(mpts
->mpts_socket
)->t_state
);
986 if (!IFNET_IS_CELLULAR(subifp
) &&
987 !mptcp_subflow_disconnecting(mpts
) &&
988 (mpte
->mpte_time_target
== 0 ||
989 (int64_t)(mpte
->mpte_time_target
- time_now
) > 0 ||
990 !mptcp_is_wifi_unusable_for_session(mpte
))) {
993 want_cellular
= FALSE
;
998 if (subifp
->if_index
== ifindex
&&
999 !mptcp_subflow_disconnecting(mpts
)) {
1001 * We found a subflow on this interface.
1002 * No need to create a new one.
1013 if (need_to_ask_symptoms
&&
1014 !(mpte
->mpte_flags
& MPTE_FIRSTPARTY
) &&
1015 !(mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
) &&
1016 mptcp_developer_mode
== 0) {
1017 mptcp_ask_symptoms(mpte
);
1021 dst
= mptcp_get_session_dst(mpte
, info
->has_v6_conn
, info
->has_v4_conn
);
1023 if (dst
->sa_family
== AF_INET
&&
1024 !info
->has_v4_conn
&& info
->has_nat64_conn
) {
1025 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
1028 bzero(&nat64pre
, sizeof(struct sockaddr_in6
));
1030 error
= ifnet_get_nat64prefix(ifp
, nat64prefixes
);
1032 os_log_error(mptcp_log_handle
, "%s - %lx: no NAT64-prefix on itf %s, error %d\n",
1033 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), ifp
->if_name
, error
);
1037 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
1038 if (nat64prefixes
[j
].prefix_len
!= 0) {
1043 VERIFY(j
< NAT64_MAX_NUM_PREFIXES
);
1045 error
= mptcp_synthesize_nat64(&nat64prefixes
[j
].ipv6_prefix
,
1046 nat64prefixes
[j
].prefix_len
,
1047 &((struct sockaddr_in
*)(void *)dst
)->sin_addr
);
1049 os_log_error(mptcp_log_handle
, "%s - %lx: cannot synthesize this addr\n",
1050 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
1054 memcpy(&nat64pre
.sin6_addr
,
1055 &nat64prefixes
[j
].ipv6_prefix
,
1056 sizeof(nat64pre
.sin6_addr
));
1057 nat64pre
.sin6_len
= sizeof(struct sockaddr_in6
);
1058 nat64pre
.sin6_family
= AF_INET6
;
1059 nat64pre
.sin6_port
= ((struct sockaddr_in
*)(void *)dst
)->sin_port
;
1060 nat64pre
.sin6_flowinfo
= 0;
1061 nat64pre
.sin6_scope_id
= 0;
1063 dst
= (struct sockaddr
*)&nat64pre
;
1066 if (dst
->sa_family
== AF_INET
&& !info
->has_v4_conn
) {
1069 if (dst
->sa_family
== AF_INET6
&& !info
->has_v6_conn
) {
1073 mptcp_subflow_add(mpte
, NULL
, dst
, ifindex
, NULL
);
1076 if (!cellular_viable
&& want_cellular
) {
1077 /* Trigger Cell Bringup */
1078 mptcp_trigger_cell_bringup(mpte
);
1083 mptcp_remove_cell_subflows(struct mptses
*mpte
)
1085 struct mptsub
*mpts
, *tmpts
;
1087 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1088 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1090 if (ifp
== NULL
|| !IFNET_IS_CELLULAR(ifp
)) {
1094 os_log(mptcp_log_handle
, "%s - %lx: removing cell subflow\n",
1095 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
1097 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1104 mptcp_remove_wifi_subflows(struct mptses
*mpte
)
1106 struct mptsub
*mpts
, *tmpts
;
1108 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1109 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1111 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
)) {
1115 os_log(mptcp_log_handle
, "%s - %lx: removing wifi subflow\n",
1116 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
1118 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1125 mptcp_pure_handover_subflows_remove(struct mptses
*mpte
)
1127 int wifi_unusable
= mptcp_is_wifi_unusable_for_session(mpte
);
1128 boolean_t found_working_wifi_subflow
= false;
1129 boolean_t found_working_cell_subflow
= false;
1131 struct mptsub
*mpts
;
1134 * Look for a subflow that is on a non-cellular interface in connected
1137 * In that case, remove all cellular subflows.
1139 * If however there is no connected subflow
1141 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1142 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1150 so
= mpts
->mpts_socket
;
1153 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
) ||
1154 tp
->t_state
!= TCPS_ESTABLISHED
||
1155 mptcp_subflow_disconnecting(mpts
)) {
1159 if (IFNET_IS_CELLULAR(ifp
)) {
1160 found_working_cell_subflow
= true;
1162 os_log_debug(mptcp_log_handle
, "%s - %lx: rxt %u sb_cc %u unusable %d\n",
1163 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), tp
->t_rxtshift
, mptetoso(mpte
)->so_snd
.sb_cc
, wifi_unusable
);
1164 if (!mptcp_handover_use_cellular(mpte
, tp
)) {
1165 found_working_wifi_subflow
= true;
1171 * Couldn't find a working subflow, let's not remove those on a cellular
1174 os_log_debug(mptcp_log_handle
, "%s - %lx: Found Wi-Fi: %u Found Cellular %u",
1175 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1176 found_working_wifi_subflow
, found_working_cell_subflow
);
1177 if (!found_working_wifi_subflow
&& wifi_unusable
) {
1178 if (found_working_cell_subflow
) {
1179 mptcp_remove_wifi_subflows(mpte
);
1184 mptcp_remove_cell_subflows(mpte
);
1188 mptcp_handover_subflows_remove(struct mptses
*mpte
)
1190 int wifi_unusable
= mptcp_is_wifi_unusable_for_session(mpte
);
1191 boolean_t found_working_subflow
= false;
1192 struct mptsub
*mpts
;
1195 * Look for a subflow that is on a non-cellular interface
1196 * and actually works (aka, no retransmission timeout).
1198 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1199 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1203 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
)) {
1207 so
= mpts
->mpts_socket
;
1210 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
) ||
1211 tp
->t_state
!= TCPS_ESTABLISHED
) {
1215 os_log_debug(mptcp_log_handle
, "%s - %lx: rxt %u sb_cc %u unusable %d\n",
1216 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), tp
->t_rxtshift
, mptetoso(mpte
)->so_snd
.sb_cc
, wifi_unusable
);
1218 if (!mptcp_handover_use_cellular(mpte
, tp
)) {
1219 found_working_subflow
= true;
1225 * Couldn't find a working subflow, let's not remove those on a cellular
1228 if (!found_working_subflow
) {
1232 mptcp_remove_cell_subflows(mpte
);
1236 mptcp_targetbased_subflows_remove(struct mptses
*mpte
)
1238 uint64_t time_now
= mach_continuous_time();
1239 struct mptsub
*mpts
;
1241 if (mpte
->mpte_time_target
!= 0 &&
1242 (int64_t)(mpte
->mpte_time_target
- time_now
) <= 0 &&
1243 mptcp_is_wifi_unusable_for_session(mpte
)) {
1244 /* WiFi is bad and we are below the target - don't remove any subflows */
1248 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1249 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1251 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
)) {
1255 /* We have a functioning subflow on WiFi. No need for cell! */
1256 if (mpts
->mpts_flags
& MPTSF_CONNECTED
&&
1257 !mptcp_subflow_disconnecting(mpts
)) {
1258 mptcp_remove_cell_subflows(mpte
);
1265 * Based on the MPTCP Service-type and the state of the subflows, we
1266 * will destroy subflows here.
1269 mptcp_check_subflows_and_remove(struct mptses
*mpte
)
1271 if (!mptcp_ok_to_create_subflows(mpte
->mpte_mptcb
)) {
1275 socket_lock_assert_owned(mptetoso(mpte
));
1277 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_PURE_HANDOVER
) {
1278 mptcp_pure_handover_subflows_remove(mpte
);
1281 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
) {
1282 mptcp_handover_subflows_remove(mpte
);
1285 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
1286 mptcp_targetbased_subflows_remove(mpte
);
1291 mptcp_remove_subflows(struct mptses
*mpte
)
1293 struct mptsub
*mpts
, *tmpts
;
1295 if (!mptcp_ok_to_create_subflows(mpte
->mpte_mptcb
)) {
1299 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1300 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1301 boolean_t found
= false;
1305 if (mpts
->mpts_flags
& MPTSF_CLOSE_REQD
) {
1306 mpts
->mpts_flags
&= ~MPTSF_CLOSE_REQD
;
1308 os_log(mptcp_log_handle
, "%s - %lx: itf %u close_reqd last itf %d\n",
1309 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_ifscope
,
1310 ifp
? ifp
->if_index
: -1);
1311 soevent(mpts
->mpts_socket
,
1312 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
1317 if (ifp
== NULL
&& mpts
->mpts_ifscope
== IFSCOPE_NONE
) {
1322 ifindex
= ifp
->if_index
;
1324 ifindex
= mpts
->mpts_ifscope
;
1327 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
1328 if (mpte
->mpte_itfinfo
[i
].ifindex
== IFSCOPE_NONE
) {
1332 if (mpte
->mpte_itfinfo
[i
].ifindex
== ifindex
) {
1333 if (mpts
->mpts_dst
.sa_family
== AF_INET6
&&
1334 (mpte
->mpte_itfinfo
[i
].has_v6_conn
|| mpte
->mpte_itfinfo
[i
].has_nat64_conn
)) {
1339 if (mpts
->mpts_dst
.sa_family
== AF_INET
&&
1340 mpte
->mpte_itfinfo
[i
].has_v4_conn
) {
1348 os_log(mptcp_log_handle
, "%s - %lx: itf %u killing %#x\n",
1349 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1350 ifindex
, mpts
->mpts_flags
);
1352 soevent(mpts
->mpts_socket
,
1353 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
1359 mptcp_create_subflows(__unused
void *arg
)
1364 * Start with clearing, because we might be processing connections
1365 * while a new event comes in.
1367 if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled
)) {
1368 os_log_error(mptcp_log_handle
, "%s: bit was already cleared!\n", __func__
);
1371 /* Iterate over all MPTCP connections */
1373 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
1375 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
1376 struct socket
*mp_so
= mpp
->mpp_socket
;
1377 struct mptses
*mpte
= mpp
->mpp_pcbe
;
1379 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1383 socket_lock(mp_so
, 1);
1384 VERIFY(mp_so
->so_usecount
> 0);
1386 mpp
->mpp_flags
&= ~MPP_CREATE_SUBFLOWS
;
1388 mptcp_check_subflows_and_add(mpte
);
1389 mptcp_remove_subflows(mpte
);
1391 mp_so
->so_usecount
--; /* See mptcp_sched_create_subflows */
1392 socket_unlock(mp_so
, 1);
1395 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
1399 * We need this because we are coming from an NECP-event. This event gets posted
1400 * while holding NECP-locks. The creation of the subflow however leads us back
1401 * into NECP (e.g., to add the necp_cb and also from tcp_connect).
1402 * So, we would deadlock there as we already hold the NECP-lock.
1404 * So, let's schedule this separately. It also gives NECP the chance to make
1405 * progress, without having to wait for MPTCP to finish its subflow creation.
1408 mptcp_sched_create_subflows(struct mptses
*mpte
)
1410 struct mppcb
*mpp
= mpte
->mpte_mppcb
;
1411 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1412 struct socket
*mp_so
= mpp
->mpp_socket
;
1414 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
1415 os_log_debug(mptcp_log_handle
, "%s - %lx: not a good time for subflows, state %u flags %#x",
1416 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
1420 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1421 mp_so
->so_usecount
++; /* To prevent it from being free'd in-between */
1422 mpp
->mpp_flags
|= MPP_CREATE_SUBFLOWS
;
1425 if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled
)) {
1429 /* Do the call in 100ms to allow NECP to schedule it on all sockets */
1430 timeout(mptcp_create_subflows
, NULL
, hz
/ 10);
1434 * Allocate an MPTCP socket option structure.
1437 mptcp_sopt_alloc(zalloc_flags_t how
)
1439 return zalloc_flags(mptopt_zone
, how
| Z_ZERO
);
1443 * Free an MPTCP socket option structure.
1446 mptcp_sopt_free(struct mptopt
*mpo
)
1448 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
1450 zfree(mptopt_zone
, mpo
);
1454 * Add a socket option to the MPTCP socket option list.
1457 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
1459 socket_lock_assert_owned(mptetoso(mpte
));
1460 mpo
->mpo_flags
|= MPOF_ATTACHED
;
1461 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1465 * Remove a socket option from the MPTCP socket option list.
1468 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
1470 socket_lock_assert_owned(mptetoso(mpte
));
1471 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1472 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
1473 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1477 * Search for an existing <sopt_level,sopt_name> socket option.
1480 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
1484 socket_lock_assert_owned(mptetoso(mpte
));
1486 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
1487 if (mpo
->mpo_level
== sopt
->sopt_level
&&
1488 mpo
->mpo_name
== sopt
->sopt_name
) {
1496 * Allocate a MPTCP subflow structure.
1498 static struct mptsub
*
1499 mptcp_subflow_alloc(void)
1501 return zalloc_flags(mptsub_zone
, Z_WAITOK
| Z_ZERO
);
1505 * Deallocate a subflow structure, called when all of the references held
1506 * on it have been released. This implies that the subflow has been deleted.
1509 mptcp_subflow_free(struct mptsub
*mpts
)
1511 VERIFY(mpts
->mpts_refcnt
== 0);
1512 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
1513 VERIFY(mpts
->mpts_mpte
== NULL
);
1514 VERIFY(mpts
->mpts_socket
== NULL
);
1516 if (mpts
->mpts_src
!= NULL
) {
1517 FREE(mpts
->mpts_src
, M_SONAME
);
1518 mpts
->mpts_src
= NULL
;
1521 zfree(mptsub_zone
, mpts
);
1525 mptcp_subflow_addref(struct mptsub
*mpts
)
1527 if (++mpts
->mpts_refcnt
== 0) {
1528 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
1534 mptcp_subflow_remref(struct mptsub
*mpts
)
1536 if (mpts
->mpts_refcnt
== 0) {
1537 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
1540 if (--mpts
->mpts_refcnt
> 0) {
1544 /* callee will unlock and destroy lock */
1545 mptcp_subflow_free(mpts
);
1549 mptcp_subflow_attach(struct mptses
*mpte
, struct mptsub
*mpts
, struct socket
*so
)
1551 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1552 struct tcpcb
*tp
= sototcpcb(so
);
1555 * From this moment on, the subflow is linked to the MPTCP-connection.
1556 * Locking,... happens now at the MPTCP-layer
1558 tp
->t_mptcb
= mpte
->mpte_mptcb
;
1559 so
->so_flags
|= SOF_MP_SUBFLOW
;
1560 mp_so
->so_usecount
++;
1563 * Insert the subflow into the list, and associate the MPTCP PCB
1564 * as well as the the subflow socket. From this point on, removing
1565 * the subflow needs to be done via mptcp_subflow_del().
1567 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1568 mpte
->mpte_numflows
++;
1570 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1571 mpts
->mpts_mpte
= mpte
;
1572 mpts
->mpts_socket
= so
;
1574 mptcp_subflow_addref(mpts
); /* for being in MPTCP subflow list */
1575 mptcp_subflow_addref(mpts
); /* for subflow socket */
1579 mptcp_subflow_necp_cb(void *handle
, __unused
int action
,
1580 __unused
uint32_t interface_index
,
1581 uint32_t necp_flags
, bool *viable
)
1583 boolean_t low_power
= !!(necp_flags
& NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER
);
1584 struct inpcb
*inp
= (struct inpcb
*)handle
;
1585 struct socket
*so
= inp
->inp_socket
;
1586 struct mptsub
*mpts
;
1587 struct mptses
*mpte
;
1590 action
= NECP_CLIENT_CBACTION_NONVIABLE
;
1593 if (action
!= NECP_CLIENT_CBACTION_NONVIABLE
) {
1598 * The socket is being garbage-collected. There is nothing to be done
1601 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) == WNT_STOPUSING
) {
1607 /* Check again after we acquired the lock. */
1608 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
1612 mpte
= tptomptp(sototcpcb(so
))->mpt_mpte
;
1613 mpts
= sototcpcb(so
)->t_mpsub
;
1615 os_log_debug(mptcp_log_handle
, "%s - %lx: Subflow on itf %u became non-viable, power %u",
1616 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_ifscope
, low_power
);
1618 mpts
->mpts_flags
|= MPTSF_CLOSE_REQD
;
1620 mptcp_sched_create_subflows(mpte
);
1622 if ((mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
||
1623 mpte
->mpte_svctype
== MPTCP_SVCTYPE_PURE_HANDOVER
||
1624 mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) &&
1630 socket_unlock(so
, 1);
1634 * Create an MPTCP subflow socket.
1637 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
1640 lck_mtx_t
*subflow_mtx
;
1641 struct mptopt smpo
, *mpo
, *tmpo
;
1643 struct socket
*mp_so
;
1648 mp_so
= mptetoso(mpte
);
1650 p
= proc_find(mp_so
->last_pid
);
1651 if (p
== PROC_NULL
) {
1652 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
1653 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_so
->last_pid
);
1655 mptcp_subflow_free(mpts
);
1660 * Create the subflow socket (multipath subflow, non-blocking.)
1662 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
1663 * socket; it will be cleared when the socket is peeled off or closed.
1664 * It also indicates to the underlying TCP to handle MPTCP options.
1665 * A multipath subflow socket implies SS_NOFDREF state.
1669 * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes
1670 * the ipi-lock. We cannot hold the socket-lock at that point.
1672 socket_unlock(mp_so
, 0);
1673 error
= socreate_internal(dom
, so
, SOCK_STREAM
, IPPROTO_TCP
, p
,
1674 SOCF_MPTCP
, PROC_NULL
);
1675 socket_lock(mp_so
, 0);
1677 os_log_error(mptcp_log_handle
, "%s - %lx: unable to create subflow socket error %d\n",
1678 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1682 mptcp_subflow_free(mpts
);
1687 * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because
1688 * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock.
1689 * Which is why we also need to get the lock with pr_getlock, as after
1690 * setting the flag, socket_unlock will work on the MPTCP-level lock.
1692 subflow_mtx
= ((*so
)->so_proto
->pr_getlock
)(*so
, 0);
1693 lck_mtx_lock(subflow_mtx
);
1696 * Must be the first thing we do, to make sure all pointers for this
1699 mptcp_subflow_attach(mpte
, mpts
, *so
);
1702 * A multipath subflow socket is used internally in the kernel,
1703 * therefore it does not have a file desciptor associated by
1706 (*so
)->so_state
|= SS_NOFDREF
;
1708 lck_mtx_unlock(subflow_mtx
);
1710 /* prevent the socket buffers from being compressed */
1711 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
1712 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
1714 /* Inherit preconnect and TFO data flags */
1715 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1716 (*so
)->so_flags1
|= SOF1_PRECONNECT_DATA
;
1718 if (mp_so
->so_flags1
& SOF1_DATA_IDEMPOTENT
) {
1719 (*so
)->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
1721 if (mp_so
->so_flags1
& SOF1_DATA_AUTHENTICATED
) {
1722 (*so
)->so_flags1
|= SOF1_DATA_AUTHENTICATED
;
1725 /* Inherit uuid and create the related flow. */
1726 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1727 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1729 sotoinpcb(*so
)->necp_cb
= mptcp_subflow_necp_cb
;
1732 * A note on the unlock: With MPTCP, we do multiple times a
1733 * necp_client_register_socket_flow. This is problematic,
1734 * because now the lock-ordering guarantee (first necp-locks,
1735 * then socket-locks) is no more respected. So, we need to
1738 socket_unlock(mp_so
, 0);
1739 error
= necp_client_register_socket_flow(mp_so
->last_pid
,
1740 mpsotomppcb(mp_so
)->necp_client_uuid
, sotoinpcb(*so
));
1741 socket_lock(mp_so
, 0);
1744 os_log_error(mptcp_log_handle
, "%s - %lx: necp_client_register_socket_flow failed with error %d\n",
1745 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1750 /* Possible state-change during the unlock above */
1751 if (mp_tp
->mpt_state
>= MPTCPS_TIME_WAIT
||
1752 (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
)) {
1753 os_log_error(mptcp_log_handle
, "%s - %lx: state changed during unlock: %u flags %#x\n",
1754 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1755 mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
1761 uuid_copy(sotoinpcb(*so
)->necp_client_uuid
, mpsotomppcb(mp_so
)->necp_client_uuid
);
1764 /* Needs to happen prior to the delegation! */
1765 (*so
)->last_pid
= mp_so
->last_pid
;
1767 if (mp_so
->so_flags
& SOF_DELEGATED
) {
1768 if (mpte
->mpte_epid
) {
1769 error
= so_set_effective_pid(*so
, mpte
->mpte_epid
, p
, false);
1771 os_log_error(mptcp_log_handle
, "%s - %lx: so_set_effective_pid failed with error %d\n",
1772 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1776 if (!uuid_is_null(mpte
->mpte_euuid
)) {
1777 error
= so_set_effective_uuid(*so
, mpte
->mpte_euuid
, p
, false);
1779 os_log_error(mptcp_log_handle
, "%s - %lx: so_set_effective_uuid failed with error %d\n",
1780 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1786 /* inherit the other socket options */
1787 bzero(&smpo
, sizeof(smpo
));
1788 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1789 smpo
.mpo_level
= SOL_SOCKET
;
1790 smpo
.mpo_intval
= 1;
1792 /* disable SIGPIPE */
1793 smpo
.mpo_name
= SO_NOSIGPIPE
;
1794 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1798 /* find out if the subflow's source address goes away */
1799 smpo
.mpo_name
= SO_NOADDRERR
;
1800 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1804 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
) {
1806 * On secondary subflows we might need to set the cell-fallback
1807 * flag (see conditions in mptcp_subflow_sosetopt).
1809 smpo
.mpo_level
= SOL_SOCKET
;
1810 smpo
.mpo_name
= SO_MARK_CELLFALLBACK
;
1811 smpo
.mpo_intval
= 1;
1812 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1817 /* replay setsockopt(2) on the subflow sockets for eligible options */
1818 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
1821 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1826 * Skip those that are handled internally; these options
1827 * should not have been recorded and marked with the
1828 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1830 if (mpo
->mpo_level
== SOL_SOCKET
&&
1831 (mpo
->mpo_name
== SO_NOSIGPIPE
||
1832 mpo
->mpo_name
== SO_NOADDRERR
||
1833 mpo
->mpo_name
== SO_KEEPALIVE
)) {
1837 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
1838 if (mptcp_subflow_sosetopt(mpte
, mpts
, mpo
) != 0 && interim
) {
1839 os_log_error(mptcp_log_handle
, "%s - %lx: sopt %s val %d interim record removed\n",
1840 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1841 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
1843 mptcp_sopt_remove(mpte
, mpo
);
1844 mptcp_sopt_free(mpo
);
1850 * We need to receive everything that the subflow socket has,
1851 * so use a customized socket receive function. We will undo
1852 * this when the socket is peeled off or closed.
1856 (*so
)->so_proto
= &mptcp_subflow_protosw
;
1859 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
1868 DTRACE_MPTCP3(subflow__create
, struct mptses
*, mpte
,
1869 int, dom
, int, error
);
1874 mptcp_subflow_abort(mpts
, error
);
1882 * Close an MPTCP subflow socket.
1884 * Note that this may be called on an embryonic subflow, and the only
1885 * thing that is guaranteed valid is the protocol-user request.
1888 mptcp_subflow_soclose(struct mptsub
*mpts
)
1890 struct socket
*so
= mpts
->mpts_socket
;
1892 if (mpts
->mpts_flags
& MPTSF_CLOSED
) {
1897 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1898 VERIFY((so
->so_state
& (SS_NBIO
| SS_NOFDREF
)) == (SS_NBIO
| SS_NOFDREF
));
1900 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
1901 struct socket
*, so
,
1902 struct sockbuf
*, &so
->so_rcv
,
1903 struct sockbuf
*, &so
->so_snd
,
1904 struct mptses
*, mpts
->mpts_mpte
);
1906 mpts
->mpts_flags
|= MPTSF_CLOSED
;
1908 if (so
->so_retaincnt
== 0) {
1913 VERIFY(so
->so_usecount
> 0);
1921 * Connect an MPTCP subflow socket.
1923 * Note that in the pending connect case, the subflow socket may have been
1924 * bound to an interface and/or a source IP address which may no longer be
1925 * around by the time this routine is called; in that case the connect attempt
1926 * will most likely fail.
1929 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
1931 char dbuf
[MAX_IPv6_STR_LEN
];
1932 struct socket
*mp_so
, *so
;
1933 struct mptcb
*mp_tp
;
1934 struct sockaddr
*dst
;
1936 int af
, error
, dport
;
1938 mp_so
= mptetoso(mpte
);
1939 mp_tp
= mpte
->mpte_mptcb
;
1940 so
= mpts
->mpts_socket
;
1941 af
= mpts
->mpts_dst
.sa_family
;
1942 dst
= &mpts
->mpts_dst
;
1944 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
| MPTSF_CONNECTED
)) == MPTSF_CONNECTING
);
1945 VERIFY(mpts
->mpts_socket
!= NULL
);
1946 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1948 if (af
== AF_INET
) {
1949 inet_ntop(af
, &SIN(dst
)->sin_addr
.s_addr
, dbuf
, sizeof(dbuf
));
1950 dport
= ntohs(SIN(dst
)->sin_port
);
1952 inet_ntop(af
, &SIN6(dst
)->sin6_addr
, dbuf
, sizeof(dbuf
));
1953 dport
= ntohs(SIN6(dst
)->sin6_port
);
1956 os_log(mptcp_log_handle
,
1957 "%s - %lx: ifindex %u dst %s:%d pended %u\n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1958 mpts
->mpts_ifscope
, dbuf
, dport
, !!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
));
1960 p
= proc_find(mp_so
->last_pid
);
1961 if (p
== PROC_NULL
) {
1962 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
1963 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_so
->last_pid
);
1968 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
1970 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
1972 /* connect the subflow socket */
1973 error
= soconnectxlocked(so
, mpts
->mpts_src
, &mpts
->mpts_dst
,
1974 p
, mpts
->mpts_ifscope
,
1975 mpte
->mpte_associd
, NULL
, 0, NULL
, 0, NULL
, NULL
);
1977 mpts
->mpts_iss
= sototcpcb(so
)->iss
;
1979 /* See tcp_connect_complete */
1980 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&&
1981 (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1982 mp_tp
->mpt_sndwnd
= sototcpcb(so
)->snd_wnd
;
1985 /* Allocate a unique address id per subflow */
1986 mpte
->mpte_addrid_last
++;
1987 if (mpte
->mpte_addrid_last
== 0) {
1988 mpte
->mpte_addrid_last
++;
1993 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
1994 struct mptsub
*, mpts
, int, error
);
1996 os_log_error(mptcp_log_handle
, "%s - %lx: connectx failed with error %d ifscope %u\n",
1997 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
, mpts
->mpts_ifscope
);
2004 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
, int off
, uint64_t dsn
,
2005 uint32_t rseq
, uint16_t dlen
, uint8_t dfin
)
2007 struct mptsub
*mpts
= sototcpcb(so
)->t_mpsub
;
2009 if (m_pktlen(m
) == 0) {
2013 if (!(m
->m_flags
& M_PKTHDR
)) {
2017 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
) {
2018 if (off
&& (dsn
!= m
->m_pkthdr
.mp_dsn
||
2019 rseq
!= m
->m_pkthdr
.mp_rseq
||
2020 dlen
!= m
->m_pkthdr
.mp_rlen
||
2021 dfin
!= !!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
))) {
2022 os_log_error(mptcp_log_handle
, "%s - %lx: Received incorrect second mapping: DSN: %u - %u , SSN: %u - %u, DLEN: %u - %u, DFIN: %u - %u\n",
2023 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpts
->mpts_mpte
),
2024 (uint32_t)dsn
, (uint32_t)m
->m_pkthdr
.mp_dsn
,
2025 rseq
, m
->m_pkthdr
.mp_rseq
,
2026 dlen
, m
->m_pkthdr
.mp_rlen
,
2027 dfin
, !!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
));
2029 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
2034 /* If mbuf is beyond right edge of the mapping, we need to split */
2035 if (m_pktlen(m
) > dlen
- dfin
- off
) {
2036 struct mbuf
*new = m_split(m
, dlen
- dfin
- off
, M_DONTWAIT
);
2038 os_log_error(mptcp_log_handle
, "%s - %lx: m_split failed dlen %u dfin %u off %d pktlen %d, killing subflow %d",
2039 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpts
->mpts_mpte
),
2040 dlen
, dfin
, off
, m_pktlen(m
),
2043 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
2048 sballoc(&so
->so_rcv
, new);
2049 /* Undo, as sballoc will add to it as well */
2050 so
->so_rcv
.sb_cc
-= new->m_len
;
2052 if (so
->so_rcv
.sb_mbtail
== m
) {
2053 so
->so_rcv
.sb_mbtail
= new;
2057 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
2058 m
->m_pkthdr
.mp_dsn
= dsn
+ off
;
2059 m
->m_pkthdr
.mp_rseq
= rseq
+ off
;
2060 VERIFY(m_pktlen(m
) < UINT16_MAX
);
2061 m
->m_pkthdr
.mp_rlen
= (uint16_t)m_pktlen(m
);
2063 /* Only put the DATA_FIN-flag on the last mbuf of this mapping */
2065 if (m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
< dsn
+ dlen
- dfin
) {
2066 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPTCP_DFIN
;
2068 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
2073 mpts
->mpts_flags
|= MPTSF_FULLY_ESTABLISHED
;
2079 * MPTCP subflow socket receive routine, derived from soreceive().
2082 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
2083 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
2086 struct socket
*mp_so
;
2087 struct mptses
*mpte
;
2088 struct mptcb
*mp_tp
;
2089 int flags
, error
= 0;
2090 struct mbuf
*m
, **mp
= mp0
;
2092 mpte
= tptomptp(sototcpcb(so
))->mpt_mpte
;
2093 mp_so
= mptetoso(mpte
);
2094 mp_tp
= mpte
->mpte_mptcb
;
2096 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
2098 #ifdef MORE_LOCKING_DEBUG
2099 if (so
->so_usecount
== 1) {
2100 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
2105 * We return all that is there in the subflow's socket receive buffer
2106 * to the MPTCP layer, so we require that the caller passes in the
2107 * expected parameters.
2109 if (mp
== NULL
|| controlp
!= NULL
) {
2117 if (flagsp
!= NULL
) {
2118 flags
= *flagsp
& ~MSG_EOR
;
2123 if (flags
& (MSG_PEEK
| MSG_OOB
| MSG_NEEDSA
| MSG_WAITALL
| MSG_WAITSTREAM
)) {
2127 flags
|= (MSG_DONTWAIT
| MSG_NBIO
);
2130 * If a recv attempt is made on a previously-accepted socket
2131 * that has been marked as inactive (disconnected), reject
2134 if (so
->so_flags
& SOF_DEFUNCT
) {
2135 struct sockbuf
*sb
= &so
->so_rcv
;
2139 * This socket should have been disconnected and flushed
2140 * prior to being returned from sodefunct(); there should
2141 * be no data on its receive list, so panic otherwise.
2143 if (so
->so_state
& SS_DEFUNCT
) {
2144 sb_empty_assert(sb
, __func__
);
2150 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2151 * and if so just return to the caller. This could happen when
2152 * soreceive() is called by a socket upcall function during the
2153 * time the socket is freed. The socket buffer would have been
2154 * locked across the upcall, therefore we cannot put this thread
2155 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2156 * we may livelock), because the lock on the socket buffer will
2157 * only be released when the upcall routine returns to its caller.
2158 * Because the socket has been officially closed, there can be
2159 * no further read on it.
2161 * A multipath subflow socket would have its SS_NOFDREF set by
2162 * default, so check for SOF_MP_SUBFLOW socket flag; when the
2163 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2165 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
2166 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
2171 * For consistency with soreceive() semantics, we need to obey
2172 * SB_LOCK in case some other code path has locked the buffer.
2174 error
= sblock(&so
->so_rcv
, 0);
2179 m
= so
->so_rcv
.sb_mb
;
2182 * Panic if we notice inconsistencies in the socket's
2183 * receive list; both sb_mb and sb_cc should correctly
2184 * reflect the contents of the list, otherwise we may
2185 * end up with false positives during select() or poll()
2186 * which could put the application in a bad state.
2188 SB_MB_CHECK(&so
->so_rcv
);
2190 if (so
->so_error
!= 0) {
2191 error
= so
->so_error
;
2196 if (so
->so_state
& SS_CANTRCVMORE
) {
2200 if (!(so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
))) {
2206 * MSG_DONTWAIT is implicitly defined and this routine will
2207 * never block, so return EWOULDBLOCK when there is nothing.
2209 error
= EWOULDBLOCK
;
2213 mptcp_update_last_owner(so
, mp_so
);
2215 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
2216 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
2219 int dlen
= 0, error_out
= 0, off
= 0;
2221 struct mbuf
*start
= m
;
2227 VERIFY(m
->m_nextpkt
== NULL
);
2229 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
2231 /* Just move mbuf to MPTCP-level */
2233 sbfree(&so
->so_rcv
, m
);
2238 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2243 so
->so_rcv
.sb_lastrecord
= m
;
2245 SB_EMPTY_FIXUP(&so
->so_rcv
);
2249 } else if (!(m
->m_flags
& M_PKTHDR
) || !(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
2250 struct mptsub
*mpts
= sototcpcb(so
)->t_mpsub
;
2251 boolean_t found_mapping
= false;
2252 int parsed_length
= 0;
2253 struct mbuf
*m_iter
;
2256 * No MPTCP-option in the header. Either fallback or
2257 * wait for additional mappings.
2259 if (!(mpts
->mpts_flags
& MPTSF_FULLY_ESTABLISHED
)) {
2260 /* data arrived without a DSS option mapping */
2262 /* initial subflow can fallback right after SYN handshake */
2263 if (mpts
->mpts_flags
& MPTSF_INITIAL_SUB
) {
2264 mptcp_notify_mpfail(so
);
2268 os_log_error(mptcp_log_handle
, "%s - %lx: No DSS on secondary subflow. Killing %d\n",
2269 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
2271 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
2279 /* Thus, let's look for an mbuf with the mapping */
2281 parsed_length
= m
->m_len
;
2282 while (m_iter
!= NULL
&& parsed_length
< UINT16_MAX
) {
2283 if (!(m_iter
->m_flags
& M_PKTHDR
) || !(m_iter
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
2284 parsed_length
+= m_iter
->m_len
;
2285 m_iter
= m_iter
->m_next
;
2289 found_mapping
= true;
2291 /* Found an mbuf with a DSS-mapping */
2292 orig_dlen
= dlen
= m_iter
->m_pkthdr
.mp_rlen
;
2293 dsn
= m_iter
->m_pkthdr
.mp_dsn
;
2294 sseq
= m_iter
->m_pkthdr
.mp_rseq
;
2295 csum
= m_iter
->m_pkthdr
.mp_csum
;
2297 if (m_iter
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
) {
2305 if (!found_mapping
&& parsed_length
< UINT16_MAX
) {
2306 /* Mapping not yet present, we can wait! */
2308 error
= EWOULDBLOCK
;
2311 } else if (!found_mapping
&& parsed_length
>= UINT16_MAX
) {
2312 os_log_error(mptcp_log_handle
, "%s - %lx: Received more than 64KB without DSS mapping. Killing %d\n",
2313 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
2315 /* Received 64KB without DSS-mapping. We should kill the subflow */
2316 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
2323 orig_dlen
= dlen
= m
->m_pkthdr
.mp_rlen
;
2324 dsn
= m
->m_pkthdr
.mp_dsn
;
2325 sseq
= m
->m_pkthdr
.mp_rseq
;
2326 csum
= m
->m_pkthdr
.mp_csum
;
2328 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
) {
2335 * Check if the full mapping is now present
2337 if ((int)so
->so_rcv
.sb_cc
< dlen
) {
2339 error
= EWOULDBLOCK
;
2344 /* Now, get the full mapping */
2347 if (mptcp_adj_rmap(so
, m
, off
, dsn
, sseq
, orig_dlen
, dfin
)) {
2357 sbfree(&so
->so_rcv
, m
);
2362 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2366 VERIFY(dlen
== 0 || m
);
2372 so
->so_rcv
.sb_lastrecord
= m
;
2374 SB_EMPTY_FIXUP(&so
->so_rcv
);
2381 if (mptcp_validate_csum(sototcpcb(so
), start
, dsn
, sseq
, orig_dlen
, csum
, dfin
)) {
2387 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
2388 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
2391 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
2392 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
2394 if (flagsp
!= NULL
) {
2399 sbunlock(&so
->so_rcv
, TRUE
);
2405 * MPTCP subflow socket send routine, derived from sosend().
2408 mptcp_subflow_sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
2409 struct mbuf
*top
, struct mbuf
*control
, int flags
)
2411 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
2412 boolean_t en_tracing
= FALSE
, proc_held
= FALSE
;
2413 struct proc
*p
= current_proc();
2415 int sblocked
= 1; /* Pretend as if it is already locked, so we won't relock it */
2418 VERIFY(control
== NULL
);
2419 VERIFY(addr
== NULL
);
2420 VERIFY(uio
== NULL
);
2422 VERIFY((so
->so_flags
& SOF_CONTENT_FILTER
) == 0);
2424 VERIFY(top
->m_pkthdr
.len
> 0 && top
->m_pkthdr
.len
<= UINT16_MAX
);
2425 VERIFY(top
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2428 * trace if tracing & network (vs. unix) sockets & and
2431 if (ENTR_SHOULDTRACE
&&
2432 (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) {
2433 struct inpcb
*inp
= sotoinpcb(so
);
2434 if (inp
->inp_last_outifp
!= NULL
&&
2435 !(inp
->inp_last_outifp
->if_flags
& IFF_LOOPBACK
)) {
2437 en_tracing_val
= top
->m_pkthdr
.len
;
2438 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
,
2439 (unsigned long)VM_KERNEL_ADDRPERM(so
),
2440 ((so
->so_state
& SS_NBIO
) ? kEnTrFlagNonBlocking
: 0),
2441 (int64_t)en_tracing_val
);
2445 mptcp_update_last_owner(so
, mp_so
);
2447 if (mp_so
->last_pid
!= proc_pid(p
)) {
2448 p
= proc_find(mp_so
->last_pid
);
2449 if (p
== PROC_NULL
) {
2457 inp_update_necp_policy(sotoinpcb(so
), NULL
, NULL
, 0);
2460 error
= sosendcheck(so
, NULL
, top
->m_pkthdr
.len
, 0, 1, 0, &sblocked
);
2465 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, top
, NULL
, NULL
, p
);
2477 soclearfastopen(so
);
2480 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
,
2481 (unsigned long)VM_KERNEL_ADDRPERM(so
),
2482 ((error
== EWOULDBLOCK
) ? kEnTrFlagNoWork
: 0),
2483 (int64_t)en_tracing_val
);
2490 * Establish an initial MPTCP connection (if first subflow and not yet
2491 * connected), or add a subflow to an existing MPTCP connection.
2494 mptcp_subflow_add(struct mptses
*mpte
, struct sockaddr
*src
,
2495 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
2497 struct socket
*mp_so
, *so
= NULL
;
2498 struct mptcb
*mp_tp
;
2499 struct mptsub
*mpts
= NULL
;
2502 mp_so
= mptetoso(mpte
);
2503 mp_tp
= mpte
->mpte_mptcb
;
2505 socket_lock_assert_owned(mp_so
);
2507 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
2508 /* If the remote end sends Data FIN, refuse subflow adds */
2509 os_log_error(mptcp_log_handle
, "%s - %lx: state %u\n",
2510 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
);
2515 if (mpte
->mpte_numflows
> MPTCP_MAX_NUM_SUBFLOWS
) {
2520 mpts
= mptcp_subflow_alloc();
2522 os_log_error(mptcp_log_handle
, "%s - %lx: malloc subflow failed\n",
2523 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
2529 if (src
->sa_family
!= AF_INET
&& src
->sa_family
!= AF_INET6
) {
2530 error
= EAFNOSUPPORT
;
2534 if (src
->sa_family
== AF_INET
&&
2535 src
->sa_len
!= sizeof(struct sockaddr_in
)) {
2540 if (src
->sa_family
== AF_INET6
&&
2541 src
->sa_len
!= sizeof(struct sockaddr_in6
)) {
2546 MALLOC(mpts
->mpts_src
, struct sockaddr
*, src
->sa_len
, M_SONAME
,
2548 if (mpts
->mpts_src
== NULL
) {
2552 bcopy(src
, mpts
->mpts_src
, src
->sa_len
);
2555 if (dst
->sa_family
!= AF_INET
&& dst
->sa_family
!= AF_INET6
) {
2556 error
= EAFNOSUPPORT
;
2560 if (dst
->sa_family
== AF_INET
&&
2561 dst
->sa_len
!= sizeof(mpts
->__mpts_dst_v4
)) {
2566 if (dst
->sa_family
== AF_INET6
&&
2567 dst
->sa_len
!= sizeof(mpts
->__mpts_dst_v6
)) {
2572 memcpy(&mpts
->mpts_u_dst
, dst
, dst
->sa_len
);
2574 af
= mpts
->mpts_dst
.sa_family
;
2576 ifnet_head_lock_shared();
2577 if ((ifscope
> (unsigned)if_index
)) {
2584 mpts
->mpts_ifscope
= ifscope
;
2586 /* create the subflow socket */
2587 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, &so
)) != 0) {
2589 * Returning (error) and not cleaning up, because up to here
2590 * all we did is creating mpts.
2592 * And the contract is that the call to mptcp_subflow_socreate,
2593 * moves ownership of mpts to mptcp_subflow_socreate.
2599 * We may be called from within the kernel. Still need to account this
2600 * one to the real app.
2602 mptcp_update_last_owner(mpts
->mpts_socket
, mp_so
);
2605 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
2606 * -1 (SAE_CONNID_ALL).
2608 mpte
->mpte_connid_last
++;
2609 if (mpte
->mpte_connid_last
== SAE_CONNID_ALL
||
2610 mpte
->mpte_connid_last
== SAE_CONNID_ANY
) {
2611 mpte
->mpte_connid_last
++;
2614 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
2616 mpts
->mpts_rel_seq
= 1;
2618 /* Allocate a unique address id per subflow */
2619 mpte
->mpte_addrid_last
++;
2620 if (mpte
->mpte_addrid_last
== 0) {
2621 mpte
->mpte_addrid_last
++;
2624 /* register for subflow socket read/write events */
2625 sock_setupcalls_locked(so
, NULL
, NULL
, mptcp_subflow_wupcall
, mpts
, 1);
2627 /* Register for subflow socket control events */
2628 sock_catchevents_locked(so
, mptcp_subflow_eupcall1
, mpts
,
2629 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
2630 SO_FILT_HINT_TIMEOUT
| SO_FILT_HINT_NOSRCADDR
|
2631 SO_FILT_HINT_IFDENIED
| SO_FILT_HINT_CONNECTED
|
2632 SO_FILT_HINT_DISCONNECTED
| SO_FILT_HINT_MPFAILOVER
|
2633 SO_FILT_HINT_MPSTATUS
| SO_FILT_HINT_MUSTRST
|
2634 SO_FILT_HINT_MPCANTRCVMORE
| SO_FILT_HINT_ADAPTIVE_RTIMO
|
2635 SO_FILT_HINT_ADAPTIVE_WTIMO
| SO_FILT_HINT_MP_SUB_ERROR
);
2638 VERIFY(!(mpts
->mpts_flags
&
2639 (MPTSF_CONNECTING
| MPTSF_CONNECTED
| MPTSF_CONNECT_PENDING
)));
2642 * Indicate to the TCP subflow whether or not it should establish
2643 * the initial MPTCP connection, or join an existing one. Fill
2644 * in the connection request structure with additional info needed
2645 * by the underlying TCP (to be used in the TCP options, etc.)
2647 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
2648 mpts
->mpts_flags
|= MPTSF_INITIAL_SUB
;
2650 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
2651 mptcp_init_local_parms(mpte
);
2653 soisconnecting(mp_so
);
2655 /* If fastopen is requested, set state in mpts */
2656 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
2657 mpts
->mpts_flags
|= MPTSF_TFO_REQD
;
2660 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
)) {
2661 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
2665 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
2667 /* connect right away if first attempt, or if join can be done now */
2668 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
)) {
2669 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
2677 *pcid
= mpts
->mpts_connid
;
2683 mptcp_subflow_abort(mpts
, error
);
2689 mptcp_subflow_free(mpts
);
2696 mptcpstats_update(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
2698 int index
= mptcpstats_get_index(stats
, mpts
);
2701 struct inpcb
*inp
= sotoinpcb(mpts
->mpts_socket
);
2703 stats
[index
].mpis_txbytes
+= inp
->inp_stat
->txbytes
;
2704 stats
[index
].mpis_rxbytes
+= inp
->inp_stat
->rxbytes
;
2706 stats
[index
].mpis_wifi_txbytes
+= inp
->inp_wstat
->txbytes
;
2707 stats
[index
].mpis_wifi_rxbytes
+= inp
->inp_wstat
->rxbytes
;
2709 stats
[index
].mpis_wired_txbytes
+= inp
->inp_Wstat
->txbytes
;
2710 stats
[index
].mpis_wired_rxbytes
+= inp
->inp_Wstat
->rxbytes
;
2712 stats
[index
].mpis_cell_txbytes
+= inp
->inp_cstat
->txbytes
;
2713 stats
[index
].mpis_cell_rxbytes
+= inp
->inp_cstat
->rxbytes
;
2718 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
2719 * will no longer be accessible after a subflow is deleted, thus this
2720 * should occur only after the subflow socket has been disconnected.
2723 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
)
2725 struct socket
*mp_so
= mptetoso(mpte
);
2726 struct socket
*so
= mpts
->mpts_socket
;
2727 struct tcpcb
*tp
= sototcpcb(so
);
2729 socket_lock_assert_owned(mp_so
);
2730 VERIFY(mpts
->mpts_mpte
== mpte
);
2731 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
2732 VERIFY(mpte
->mpte_numflows
!= 0);
2733 VERIFY(mp_so
->so_usecount
> 0);
2735 mptcpstats_update(mpte
->mpte_itfstats
, mpts
);
2737 mptcp_unset_cellicon(mpte
, mpts
, 1);
2739 mpte
->mpte_init_rxbytes
= sotoinpcb(so
)->inp_stat
->rxbytes
;
2740 mpte
->mpte_init_txbytes
= sotoinpcb(so
)->inp_stat
->txbytes
;
2742 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
2743 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
2744 mpte
->mpte_numflows
--;
2745 if (mpte
->mpte_active_sub
== mpts
) {
2746 mpte
->mpte_active_sub
= NULL
;
2750 * Drop references held by this subflow socket; there
2751 * will be no further upcalls made from this point.
2753 sock_setupcalls_locked(so
, NULL
, NULL
, NULL
, NULL
, 0);
2754 sock_catchevents_locked(so
, NULL
, NULL
, 0);
2756 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
2758 mp_so
->so_usecount
--; /* for subflow socket */
2759 mpts
->mpts_mpte
= NULL
;
2760 mpts
->mpts_socket
= NULL
;
2762 mptcp_subflow_remref(mpts
); /* for MPTCP subflow list */
2763 mptcp_subflow_remref(mpts
); /* for subflow socket */
2765 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
2771 mptcp_subflow_shutdown(struct mptses
*mpte
, struct mptsub
*mpts
)
2773 struct socket
*so
= mpts
->mpts_socket
;
2774 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2777 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
) {
2781 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2782 (so
->so_state
& SS_ISCONNECTED
)) {
2783 mptcplog((LOG_DEBUG
, "MPTCP subflow shutdown %s: cid %d fin %d\n",
2784 __func__
, mpts
->mpts_connid
, send_dfin
),
2785 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2788 mptcp_send_dfin(so
);
2790 soshutdownlock(so
, SHUT_WR
);
2795 mptcp_subflow_abort(struct mptsub
*mpts
, int error
)
2797 struct socket
*so
= mpts
->mpts_socket
;
2798 struct tcpcb
*tp
= sototcpcb(so
);
2800 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
) {
2804 mptcplog((LOG_DEBUG
, "%s aborting connection state %u\n", __func__
, tp
->t_state
),
2805 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2807 if (tp
->t_state
!= TCPS_CLOSED
) {
2808 tcp_drop(tp
, error
);
2811 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2815 * Disconnect a subflow socket.
2818 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
)
2820 struct socket
*so
, *mp_so
;
2821 struct mptcb
*mp_tp
;
2824 so
= mpts
->mpts_socket
;
2825 mp_tp
= mpte
->mpte_mptcb
;
2826 mp_so
= mptetoso(mpte
);
2828 socket_lock_assert_owned(mp_so
);
2830 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
)) {
2834 mptcp_unset_cellicon(mpte
, mpts
, 1);
2836 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
2838 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
) {
2842 if (mp_so
->so_flags
& SOF_DEFUNCT
) {
2845 ret
= sosetdefunct(NULL
, so
, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
, TRUE
);
2847 ret
= sodefunct(NULL
, so
, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
2850 os_log_error(mptcp_log_handle
, "%s - %lx: sodefunct failed with %d\n",
2851 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), ret
);
2854 os_log_error(mptcp_log_handle
, "%s - %lx: sosetdefunct failed with %d\n",
2855 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), ret
);
2859 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2860 (so
->so_state
& SS_ISCONNECTED
)) {
2861 mptcplog((LOG_DEBUG
, "%s: cid %d fin %d\n",
2862 __func__
, mpts
->mpts_connid
, send_dfin
),
2863 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2866 mptcp_send_dfin(so
);
2869 (void) soshutdownlock(so
, SHUT_RD
);
2870 (void) soshutdownlock(so
, SHUT_WR
);
2871 (void) sodisconnectlocked(so
);
2875 * Generate a disconnect event for this subflow socket, in case
2876 * the lower layer doesn't do it; this is needed because the
2877 * subflow socket deletion relies on it.
2879 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2883 * Subflow socket input.
2886 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
2888 struct socket
*mp_so
= mptetoso(mpte
);
2889 struct mbuf
*m
= NULL
;
2891 int error
, wakeup
= 0;
2893 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_INPUT
));
2894 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_INPUT
;
2896 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
2897 struct mptsub
*, mpts
);
2899 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
)) {
2903 so
= mpts
->mpts_socket
;
2905 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
2906 if (error
!= 0 && error
!= EWOULDBLOCK
) {
2907 os_log_error(mptcp_log_handle
, "%s - %lx: cid %d error %d\n",
2908 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, error
);
2909 if (error
== ENODATA
) {
2911 * Don't ignore ENODATA so as to discover
2912 * nasty middleboxes.
2914 mp_so
->so_error
= ENODATA
;
2919 } else if (error
== 0) {
2920 mptcplog((LOG_DEBUG
, "%s: cid %d \n", __func__
, mpts
->mpts_connid
),
2921 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2924 /* In fallback, make sure to accept data on all but one subflow */
2925 if (m
&& (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2926 !(mpts
->mpts_flags
& MPTSF_ACTIVE
)) {
2927 mptcplog((LOG_DEBUG
, "%s: degraded and got data on non-active flow\n",
2928 __func__
), MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2934 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
2935 mptcp_set_cellicon(mpte
, mpts
);
2937 mpte
->mpte_used_cell
= 1;
2940 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
2941 * explicitly set the cellicon, then we unset it again.
2943 if (TSTMP_LT(mpte
->mpte_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
, tcp_now
)) {
2944 mptcp_unset_cellicon(mpte
, NULL
, 1);
2947 mpte
->mpte_used_wifi
= 1;
2950 mptcp_input(mpte
, m
);
2955 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2958 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_INPUT
);
2962 mptcp_handle_input(struct socket
*so
)
2964 struct mptsub
*mpts
, *tmpts
;
2965 struct mptses
*mpte
;
2967 if (!(so
->so_flags
& SOF_MP_SUBFLOW
)) {
2971 mpts
= sototcpcb(so
)->t_mpsub
;
2972 mpte
= mpts
->mpts_mpte
;
2974 socket_lock_assert_owned(mptetoso(mpte
));
2976 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2977 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INPUT_HANDLE
)) {
2978 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2983 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INPUT_HANDLE
;
2984 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
2985 if (mpts
->mpts_socket
->so_usecount
== 0) {
2986 /* Will be removed soon by tcp_garbage_collect */
2990 mptcp_subflow_addref(mpts
);
2991 mpts
->mpts_socket
->so_usecount
++;
2993 mptcp_subflow_input(mpte
, mpts
);
2995 mptcp_subflow_remref(mpts
); /* ours */
2997 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
2998 mpts
->mpts_socket
->so_usecount
--;
3001 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INPUT_HANDLE
);
3005 * Subflow socket write upcall.
3007 * Called when the associated subflow socket posted a read event.
3010 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
3012 #pragma unused(so, waitf)
3013 struct mptsub
*mpts
= arg
;
3014 struct mptses
*mpte
= mpts
->mpts_mpte
;
3016 VERIFY(mpte
!= NULL
);
3018 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
3019 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_WUPCALL
)) {
3020 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
3029 mptcp_search_seq_in_sub(struct mbuf
*m
, struct socket
*so
)
3031 struct mbuf
*so_m
= so
->so_snd
.sb_mb
;
3032 uint64_t dsn
= m
->m_pkthdr
.mp_dsn
;
3035 VERIFY(so_m
->m_flags
& M_PKTHDR
);
3036 VERIFY(so_m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
3038 /* Part of the segment is covered, don't reinject here */
3039 if (so_m
->m_pkthdr
.mp_dsn
<= dsn
&&
3040 so_m
->m_pkthdr
.mp_dsn
+ so_m
->m_pkthdr
.mp_rlen
> dsn
) {
3044 so_m
= so_m
->m_next
;
3051 * Subflow socket output.
3053 * Called for sending data from MPTCP to the underlying subflow socket.
3056 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
, int flags
)
3058 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3059 struct mbuf
*sb_mb
, *m
, *mpt_mbuf
= NULL
, *head
, *tail
;
3060 struct socket
*mp_so
, *so
;
3062 uint64_t mpt_dsn
= 0, off
= 0;
3063 int sb_cc
= 0, error
= 0, wakeup
= 0;
3065 uint16_t tot_sent
= 0;
3066 boolean_t reinjected
= FALSE
;
3068 mp_so
= mptetoso(mpte
);
3069 so
= mpts
->mpts_socket
;
3072 socket_lock_assert_owned(mp_so
);
3074 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_OUTPUT
));
3075 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_OUTPUT
;
3077 VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so
)));
3078 VERIFY((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ||
3079 (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
3080 (mpts
->mpts_flags
& MPTSF_TFO_REQD
));
3081 VERIFY(mptcp_subflow_cwnd_space(mpts
->mpts_socket
) > 0);
3083 mptcplog((LOG_DEBUG
, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
3084 __func__
, mpts
->mpts_flags
, mpte
->mpte_flags
,
3085 mptcp_subflow_cwnd_space(so
)),
3086 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3087 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
3088 struct mptsub
*, mpts
);
3090 /* Remove Addr Option is not sent reliably as per I-D */
3091 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
3092 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
3093 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
3094 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
3098 * The mbuf chains containing the metadata (as well as pointing to
3099 * the user data sitting at the MPTCP output queue) would then be
3100 * sent down to the subflow socket.
3102 * Some notes on data sequencing:
3104 * a. Each mbuf must be a M_PKTHDR.
3105 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
3106 * in the mbuf pkthdr structure.
3107 * c. Each mbuf containing the MPTCP metadata must have its
3108 * pkt_flags marked with the PKTF_MPTCP flag.
3111 if (mpte
->mpte_reinjectq
) {
3112 sb_mb
= mpte
->mpte_reinjectq
;
3114 sb_mb
= mp_so
->so_snd
.sb_mb
;
3117 if (sb_mb
== NULL
) {
3118 os_log_error(mptcp_log_handle
, "%s - %lx: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u state %u flags %#x\n",
3119 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
3120 (uint32_t)mp_tp
->mpt_sndmax
, (uint32_t)mp_tp
->mpt_sndnxt
,
3121 (uint32_t)mp_tp
->mpt_snduna
, mp_tp
->mpt_state
, mp_so
->so_flags1
);
3123 /* Fix it to prevent looping */
3124 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
3125 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
3130 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
3132 if (sb_mb
->m_pkthdr
.mp_rlen
== 0 &&
3133 !(so
->so_state
& SS_ISCONNECTED
) &&
3134 (so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
3135 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
3136 goto zero_len_write
;
3139 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
3141 /* First, drop acknowledged data */
3142 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
3143 os_log_error(mptcp_log_handle
, "%s - %lx: dropping data, should have been done earlier "
3144 "dsn %u suna %u reinject? %u\n",
3145 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), (uint32_t)mpt_dsn
,
3146 (uint32_t)mp_tp
->mpt_snduna
, !!mpte
->mpte_reinjectq
);
3147 if (mpte
->mpte_reinjectq
) {
3148 mptcp_clean_reinjectq(mpte
);
3151 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
3152 sbdrop(&mp_so
->so_snd
, (int)len
);
3157 /* Check again because of above sbdrop */
3158 if (mp_so
->so_snd
.sb_mb
== NULL
&& mpte
->mpte_reinjectq
== NULL
) {
3159 os_log_error(mptcp_log_handle
, "%s - $%lx: send-buffer is empty\n",
3160 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3165 * In degraded mode, we don't receive data acks, so force free
3166 * mbufs less than snd_nxt
3168 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
3169 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
3170 mp_so
->so_snd
.sb_mb
) {
3171 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
3172 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
3174 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
3175 sbdrop(&mp_so
->so_snd
, (int)len
);
3178 os_log_error(mptcp_log_handle
, "%s - %lx: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
3179 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
3180 (uint32_t)mpt_dsn
, (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_snduna
);
3184 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
3185 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
3186 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
3187 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
3191 * Adjust the top level notion of next byte used for retransmissions
3194 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
3195 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
3198 /* Now determine the offset from which to start transmitting data */
3199 if (mpte
->mpte_reinjectq
) {
3200 sb_mb
= mpte
->mpte_reinjectq
;
3203 sb_mb
= mp_so
->so_snd
.sb_mb
;
3205 if (sb_mb
== NULL
) {
3206 os_log_error(mptcp_log_handle
, "%s - %lx: send-buffer is still empty\n", __func__
,
3207 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3211 if (sb_mb
== mpte
->mpte_reinjectq
) {
3212 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
3215 if (mptcp_search_seq_in_sub(sb_mb
, so
)) {
3216 if (mptcp_can_send_more(mp_tp
, TRUE
)) {
3225 } else if (flags
& MPTCP_SUBOUT_PROBING
) {
3226 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
3229 sb_cc
= min(mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
);
3232 * With TFO, there might be no data at all, thus still go into this
3235 if ((mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) ||
3236 MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_sndmax
)) {
3237 off
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
3240 os_log_error(mptcp_log_handle
, "%s - %lx: this should not happen: sndnxt %u sndmax %u\n",
3241 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), (uint32_t)mp_tp
->mpt_sndnxt
,
3242 (uint32_t)mp_tp
->mpt_sndmax
);
3248 sb_cc
= min(sb_cc
, mptcp_subflow_cwnd_space(so
));
3250 os_log_error(mptcp_log_handle
, "%s - %lx: sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
3251 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), sb_cc
, mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
,
3252 (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_sndmax
,
3253 mptcp_subflow_cwnd_space(so
));
3256 sb_cc
= min(sb_cc
, UINT16_MAX
);
3259 * Create a DSN mapping for the data we are about to send. It all
3260 * has the same mapping.
3263 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
3265 mpt_dsn
= mp_tp
->mpt_snduna
+ off
;
3269 while (mpt_mbuf
&& reinjected
== FALSE
&&
3270 (mpt_mbuf
->m_pkthdr
.mp_rlen
== 0 ||
3271 mpt_mbuf
->m_pkthdr
.mp_rlen
<= (uint32_t)off
)) {
3272 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
3273 mpt_mbuf
= mpt_mbuf
->m_next
;
3275 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3276 mptcplog((LOG_DEBUG
, "%s: %u snduna = %u sndnxt = %u probe %d\n",
3277 __func__
, mpts
->mpts_connid
, (uint32_t)mp_tp
->mpt_snduna
, (uint32_t)mp_tp
->mpt_sndnxt
,
3278 mpts
->mpts_probecnt
),
3279 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3282 VERIFY((mpt_mbuf
== NULL
) || (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3286 while (tot_sent
< sb_cc
) {
3289 mlen
= mpt_mbuf
->m_len
;
3291 mlen
= MIN(mlen
, sb_cc
- tot_sent
);
3294 os_log_error(mptcp_log_handle
, "%s - %lx: mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
3295 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mlen
, mpt_mbuf
->m_pkthdr
.mp_rlen
,
3296 (uint32_t)off
, sb_cc
, tot_sent
);
3304 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
3305 M_COPYM_MUST_COPY_HDR
);
3307 os_log_error(mptcp_log_handle
, "%s - %lx: m_copym_mode failed\n", __func__
,
3308 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3313 /* Create a DSN mapping for the data (m_copym does it) */
3314 VERIFY(m
->m_flags
& M_PKTHDR
);
3315 VERIFY(m
->m_next
== NULL
);
3317 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
3318 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
3319 m
->m_pkthdr
.mp_dsn
= mpt_dsn
;
3320 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
3321 m
->m_pkthdr
.len
= mlen
;
3333 mpt_mbuf
= mpt_mbuf
->m_next
;
3337 if (sb_cc
< sb_mb
->m_pkthdr
.mp_rlen
) {
3338 struct mbuf
*n
= sb_mb
;
3341 n
->m_pkthdr
.mp_dsn
+= sb_cc
;
3342 n
->m_pkthdr
.mp_rlen
-= sb_cc
;
3345 m_adj(sb_mb
, sb_cc
);
3347 mpte
->mpte_reinjectq
= sb_mb
->m_nextpkt
;
3352 mptcplog((LOG_DEBUG
, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
3353 __func__
, (uint32_t)mpt_dsn
, mpts
->mpts_rel_seq
,
3354 tot_sent
, mpts
->mpts_connid
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3356 if (head
&& (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
)) {
3357 dss_csum
= mptcp_output_csum(head
, mpt_dsn
, mpts
->mpts_rel_seq
,
3361 /* Now, let's update rel-seq and the data-level length */
3362 mpts
->mpts_rel_seq
+= tot_sent
;
3365 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
3366 m
->m_pkthdr
.mp_csum
= dss_csum
;
3368 m
->m_pkthdr
.mp_rlen
= tot_sent
;
3373 if ((mpts
->mpts_flags
& MPTSF_TFO_REQD
) &&
3374 (tp
->t_tfo_stats
== 0)) {
3375 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
3378 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
3380 DTRACE_MPTCP7(send
, struct mbuf
*, m
, struct socket
*, so
,
3381 struct sockbuf
*, &so
->so_rcv
,
3382 struct sockbuf
*, &so
->so_snd
,
3383 struct mptses
*, mpte
, struct mptsub
*, mpts
,
3389 (error
== EWOULDBLOCK
&& (tp
->t_mpflags
& TMPF_TFO_REQUEST
))) {
3390 uint64_t new_sndnxt
= mp_tp
->mpt_sndnxt
+ tot_sent
;
3392 if (mpts
->mpts_probesoon
&& mpts
->mpts_maxseg
&& tot_sent
) {
3393 tcpstat
.tcps_mp_num_probes
++;
3394 if ((uint32_t)tot_sent
< mpts
->mpts_maxseg
) {
3395 mpts
->mpts_probecnt
+= 1;
3397 mpts
->mpts_probecnt
+=
3398 tot_sent
/ mpts
->mpts_maxseg
;
3402 if (!reinjected
&& !(flags
& MPTCP_SUBOUT_PROBING
)) {
3403 if (MPTCP_DATASEQ_HIGH32(new_sndnxt
) >
3404 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
)) {
3405 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
3407 mp_tp
->mpt_sndnxt
= new_sndnxt
;
3410 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
3412 /* Must be here as mptcp_can_send_more() checks for this */
3413 soclearfastopen(mp_so
);
3415 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
3416 (mpts
->mpts_probesoon
!= 0)) {
3417 mptcplog((LOG_DEBUG
, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
3418 __func__
, mpts
->mpts_connid
,
3419 !!(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
),
3420 tot_sent
, (int) sb_cc
, mpts
->mpts_probecnt
,
3421 (tcp_now
- mpts
->mpts_probesoon
)),
3422 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3425 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
3426 mptcp_set_cellicon(mpte
, mpts
);
3428 mpte
->mpte_used_cell
= 1;
3431 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
3432 * explicitly set the cellicon, then we unset it again.
3434 if (TSTMP_LT(mpte
->mpte_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
, tcp_now
)) {
3435 mptcp_unset_cellicon(mpte
, NULL
, 1);
3438 mpte
->mpte_used_wifi
= 1;
3442 * Don't propagate EWOULDBLOCK - it's already taken care of
3443 * in mptcp_usr_send for TFO.
3447 /* We need to revert our change to mpts_rel_seq */
3448 mpts
->mpts_rel_seq
-= tot_sent
;
3450 os_log_error(mptcp_log_handle
, "%s - %lx: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
3451 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, error
, tot_sent
, so
->so_flags
, so
->so_state
, so
->so_error
, so
->so_snd
.sb_hiwat
, so
->so_snd
.sb_lowat
);
3456 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
3459 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_OUTPUT
);
3463 /* Opting to call pru_send as no mbuf at subflow level */
3464 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, NULL
, NULL
,
3465 NULL
, current_proc());
3471 mptcp_add_reinjectq(struct mptses
*mpte
, struct mbuf
*m
)
3473 struct mbuf
*n
, *prev
= NULL
;
3475 mptcplog((LOG_DEBUG
, "%s reinjecting dsn %u dlen %u rseq %u\n",
3476 __func__
, (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3477 m
->m_pkthdr
.mp_rseq
),
3478 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3480 n
= mpte
->mpte_reinjectq
;
3482 /* First, look for an mbuf n, whose data-sequence-number is bigger or
3483 * equal than m's sequence number.
3486 if (MPTCP_SEQ_GEQ(n
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_dsn
)) {
3496 /* m is already fully covered by the next mbuf in the queue */
3497 if (n
->m_pkthdr
.mp_dsn
== m
->m_pkthdr
.mp_dsn
&&
3498 n
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_rlen
) {
3499 os_log(mptcp_log_handle
, "%s - %lx: dsn %u dlen %u rseq %u fully covered with len %u\n",
3500 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
3501 (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3502 m
->m_pkthdr
.mp_rseq
, n
->m_pkthdr
.mp_rlen
);
3506 /* m is covering the next mbuf entirely, thus we remove this guy */
3507 if (m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
>= n
->m_pkthdr
.mp_dsn
+ n
->m_pkthdr
.mp_rlen
) {
3508 struct mbuf
*tmp
= n
->m_nextpkt
;
3510 os_log(mptcp_log_handle
, "%s - %lx: m (dsn %u len %u) is covering existing mbuf (dsn %u len %u)\n",
3511 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
3512 (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3513 (uint32_t)n
->m_pkthdr
.mp_dsn
, n
->m_pkthdr
.mp_rlen
);
3515 m
->m_nextpkt
= NULL
;
3517 mpte
->mpte_reinjectq
= tmp
;
3519 prev
->m_nextpkt
= tmp
;
3528 /* m is already fully covered by the previous mbuf in the queue */
3529 if (prev
->m_pkthdr
.mp_dsn
+ prev
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.len
) {
3530 os_log(mptcp_log_handle
, "%s - %lx: prev (dsn %u len %u) covers us (dsn %u len %u)\n",
3531 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
3532 (uint32_t)prev
->m_pkthdr
.mp_dsn
, prev
->m_pkthdr
.mp_rlen
,
3533 (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
);
3539 mpte
->mpte_reinjectq
= m
;
3541 prev
->m_nextpkt
= m
;
3553 static struct mbuf
*
3554 mptcp_lookup_dsn(struct mptses
*mpte
, uint64_t dsn
)
3556 struct socket
*mp_so
= mptetoso(mpte
);
3559 m
= mp_so
->so_snd
.sb_mb
;
3562 /* If this segment covers what we are looking for, return it. */
3563 if (MPTCP_SEQ_LEQ(m
->m_pkthdr
.mp_dsn
, dsn
) &&
3564 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, dsn
)) {
3569 /* Segment is no more in the queue */
3570 if (MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
, dsn
)) {
3580 static struct mbuf
*
3581 mptcp_copy_mbuf_list(struct mptses
*mpte
, struct mbuf
*m
, int len
)
3583 struct mbuf
*top
= NULL
, *tail
= NULL
;
3585 uint32_t dlen
, rseq
;
3587 dsn
= m
->m_pkthdr
.mp_dsn
;
3588 dlen
= m
->m_pkthdr
.mp_rlen
;
3589 rseq
= m
->m_pkthdr
.mp_rseq
;
3594 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3596 n
= m_copym_mode(m
, 0, m
->m_len
, M_DONTWAIT
, M_COPYM_MUST_COPY_HDR
);
3598 os_log_error(mptcp_log_handle
, "%s - %lx: m_copym_mode returned NULL\n",
3599 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3603 VERIFY(n
->m_flags
& M_PKTHDR
);
3604 VERIFY(n
->m_next
== NULL
);
3605 VERIFY(n
->m_pkthdr
.mp_dsn
== dsn
);
3606 VERIFY(n
->m_pkthdr
.mp_rlen
== dlen
);
3607 VERIFY(n
->m_pkthdr
.mp_rseq
== rseq
);
3608 VERIFY(n
->m_len
== m
->m_len
);
3610 n
->m_pkthdr
.pkt_flags
|= (PKTF_MPSO
| PKTF_MPTCP
);
3637 mptcp_reinject_mbufs(struct socket
*so
)
3639 struct tcpcb
*tp
= sototcpcb(so
);
3640 struct mptsub
*mpts
= tp
->t_mpsub
;
3641 struct mptcb
*mp_tp
= tptomptp(tp
);
3642 struct mptses
*mpte
= mp_tp
->mpt_mpte
;;
3643 struct sockbuf
*sb
= &so
->so_snd
;
3648 struct mbuf
*n
= m
->m_next
, *orig
= m
;
3649 bool set_reinject_flag
= false;
3651 mptcplog((LOG_DEBUG
, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
3652 __func__
, tp
->snd_una
, m
->m_pkthdr
.mp_rseq
, mpts
->mpts_iss
,
3653 m
->m_pkthdr
.mp_rlen
, m
->m_pkthdr
.pkt_flags
),
3654 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3656 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3658 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_REINJ
) {
3662 /* Has it all already been acknowledged at the data-level? */
3663 if (MPTCP_SEQ_GEQ(mp_tp
->mpt_snduna
, m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
)) {
3667 /* Part of this has already been acknowledged - lookup in the
3668 * MPTCP-socket for the segment.
3670 if (SEQ_GT(tp
->snd_una
- mpts
->mpts_iss
, m
->m_pkthdr
.mp_rseq
)) {
3671 m
= mptcp_lookup_dsn(mpte
, m
->m_pkthdr
.mp_dsn
);
3677 /* Copy the mbuf with headers (aka, DSN-numbers) */
3678 m
= mptcp_copy_mbuf_list(mpte
, m
, m
->m_pkthdr
.mp_rlen
);
3683 VERIFY(m
->m_nextpkt
== NULL
);
3685 /* Now, add to the reinject-queue, eliminating overlapping
3688 mptcp_add_reinjectq(mpte
, m
);
3690 set_reinject_flag
= true;
3691 orig
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3694 /* mp_rlen can cover multiple mbufs, so advance to the end of it. */
3696 VERIFY((n
->m_flags
& M_PKTHDR
) && (n
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3698 if (n
->m_pkthdr
.mp_dsn
!= orig
->m_pkthdr
.mp_dsn
) {
3702 if (set_reinject_flag
) {
3703 n
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3713 mptcp_clean_reinjectq(struct mptses
*mpte
)
3715 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3717 socket_lock_assert_owned(mptetoso(mpte
));
3719 while (mpte
->mpte_reinjectq
) {
3720 struct mbuf
*m
= mpte
->mpte_reinjectq
;
3722 if (MPTCP_SEQ_GEQ(m
->m_pkthdr
.mp_dsn
, mp_tp
->mpt_snduna
) ||
3723 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, mp_tp
->mpt_snduna
)) {
3727 mpte
->mpte_reinjectq
= m
->m_nextpkt
;
3728 m
->m_nextpkt
= NULL
;
3734 * Subflow socket control event upcall.
3737 mptcp_subflow_eupcall1(struct socket
*so
, void *arg
, long events
)
3740 struct mptsub
*mpts
= arg
;
3741 struct mptses
*mpte
= mpts
->mpts_mpte
;
3743 socket_lock_assert_owned(mptetoso(mpte
));
3745 if ((mpts
->mpts_evctl
& events
) == events
) {
3749 mpts
->mpts_evctl
|= events
;
3751 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
3752 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WORKLOOP
;
3756 mptcp_subflow_workloop(mpte
);
3760 * Subflow socket control events.
3762 * Called for handling events related to the underlying subflow socket.
3765 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
,
3766 long *p_mpsofilt_hint
)
3768 ev_ret_t ret
= MPTS_EVRET_OK
;
3769 int i
, mpsub_ev_entry_count
= sizeof(mpsub_ev_entry_tbl
) /
3770 sizeof(mpsub_ev_entry_tbl
[0]);
3772 /* bail if there's nothing to process */
3773 if (!mpts
->mpts_evctl
) {
3777 if (mpts
->mpts_evctl
& (SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_MUSTRST
|
3778 SO_FILT_HINT_CANTSENDMORE
| SO_FILT_HINT_TIMEOUT
|
3779 SO_FILT_HINT_NOSRCADDR
| SO_FILT_HINT_IFDENIED
|
3780 SO_FILT_HINT_DISCONNECTED
)) {
3781 mpts
->mpts_evctl
|= SO_FILT_HINT_MPFAILOVER
;
3784 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
3785 struct mptsub
*, mpts
, uint32_t, mpts
->mpts_evctl
);
3788 * Process all the socket filter hints and reset the hint
3789 * once it is handled
3791 for (i
= 0; i
< mpsub_ev_entry_count
&& mpts
->mpts_evctl
; i
++) {
3793 * Always execute the DISCONNECTED event, because it will wakeup
3796 if ((mpts
->mpts_evctl
& mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
) &&
3797 (ret
>= MPTS_EVRET_OK
||
3798 mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
== SO_FILT_HINT_DISCONNECTED
)) {
3799 mpts
->mpts_evctl
&= ~mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
;
3801 mpsub_ev_entry_tbl
[i
].sofilt_hint_ev_hdlr(mpte
, mpts
, p_mpsofilt_hint
, mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
);
3802 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
3810 mptcp_subflow_propagate_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3811 long *p_mpsofilt_hint
, long event
)
3813 struct socket
*mp_so
, *so
;
3814 struct mptcb
*mp_tp
;
3816 mp_so
= mptetoso(mpte
);
3817 mp_tp
= mpte
->mpte_mptcb
;
3818 so
= mpts
->mpts_socket
;
3821 * We got an event for this subflow that might need to be propagated,
3822 * based on the state of the MPTCP connection.
3824 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3825 (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
) && !(mpts
->mpts_flags
& MPTSF_MP_READY
)) ||
3826 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
3827 mp_so
->so_error
= so
->so_error
;
3828 *p_mpsofilt_hint
|= event
;
3831 return MPTS_EVRET_OK
;
3835 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
3838 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3839 long *p_mpsofilt_hint
, long event
)
3841 #pragma unused(p_mpsofilt_hint, event)
3842 struct socket
*mp_so
;
3845 mp_so
= mptetoso(mpte
);
3846 tp
= intotcpcb(sotoinpcb(mpts
->mpts_socket
));
3849 * This overwrites any previous mpte_lost_aid to avoid storing
3850 * too much state when the typical case has only two subflows.
3852 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
3853 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
3855 mptcplog((LOG_DEBUG
, "%s cid %d\n", __func__
, mpts
->mpts_connid
),
3856 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3859 * The subflow connection has lost its source address.
3861 mptcp_subflow_abort(mpts
, EADDRNOTAVAIL
);
3863 if (mp_so
->so_flags
& SOF_NOADDRAVAIL
) {
3864 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3867 return MPTS_EVRET_DELETE
;
3871 mptcp_subflow_mpsuberror_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3872 long *p_mpsofilt_hint
, long event
)
3874 #pragma unused(event, p_mpsofilt_hint)
3875 struct socket
*so
, *mp_so
;
3877 so
= mpts
->mpts_socket
;
3879 if (so
->so_error
!= ENODATA
) {
3880 return MPTS_EVRET_OK
;
3884 mp_so
= mptetoso(mpte
);
3886 mp_so
->so_error
= ENODATA
;
3891 return MPTS_EVRET_OK
;
3896 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
3897 * indicates that the remote side sent a Data FIN
3900 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3901 long *p_mpsofilt_hint
, long event
)
3903 #pragma unused(event)
3904 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3906 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
3907 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3910 * We got a Data FIN for the MPTCP connection.
3911 * The FIN may arrive with data. The data is handed up to the
3912 * mptcp socket and the user is notified so that it may close
3913 * the socket if needed.
3915 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
) {
3916 *p_mpsofilt_hint
|= SO_FILT_HINT_CANTRCVMORE
;
3919 return MPTS_EVRET_OK
; /* keep the subflow socket around */
3923 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
3926 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3927 long *p_mpsofilt_hint
, long event
)
3929 #pragma unused(event, p_mpsofilt_hint)
3930 struct mptsub
*mpts_alt
= NULL
;
3931 struct socket
*alt_so
= NULL
;
3932 struct socket
*mp_so
;
3933 int altpath_exists
= 0;
3935 mp_so
= mptetoso(mpte
);
3936 os_log_info(mptcp_log_handle
, "%s - %lx\n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3938 mptcp_reinject_mbufs(mpts
->mpts_socket
);
3940 mpts_alt
= mptcp_get_subflow(mpte
, NULL
);
3942 /* If there is no alternate eligible subflow, ignore the failover hint. */
3943 if (mpts_alt
== NULL
|| mpts_alt
== mpts
) {
3944 os_log(mptcp_log_handle
, "%s - %lx no alternate path\n", __func__
,
3945 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3951 alt_so
= mpts_alt
->mpts_socket
;
3952 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
3953 /* All data acknowledged and no RTT spike */
3954 if (alt_so
->so_snd
.sb_cc
== 0 && mptcp_no_rto_spike(alt_so
)) {
3955 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
3957 /* no alternate path available */
3962 if (altpath_exists
) {
3963 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
3965 mpte
->mpte_active_sub
= mpts_alt
;
3966 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
3967 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
3969 os_log_info(mptcp_log_handle
, "%s - %lx: switched from %d to %d\n",
3970 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, mpts_alt
->mpts_connid
);
3972 mptcpstats_inc_switch(mpte
, mpts
);
3976 mptcplog((LOG_DEBUG
, "%s: no alt cid = %d\n", __func__
,
3978 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3980 mpts
->mpts_socket
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
3983 return MPTS_EVRET_OK
;
3987 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
3990 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3991 long *p_mpsofilt_hint
, long event
)
3993 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
,
3994 mpts
->mpts_connid
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3997 * The subflow connection cannot use the outgoing interface, let's
3998 * close this subflow.
4000 mptcp_subflow_abort(mpts
, EPERM
);
4002 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
4004 return MPTS_EVRET_DELETE
;
4008 * https://tools.ietf.org/html/rfc6052#section-2
4009 * https://tools.ietf.org/html/rfc6147#section-5.2
4012 mptcp_desynthesize_ipv6_addr(const struct in6_addr
*addr
,
4013 const struct ipv6_prefix
*prefix
,
4014 struct in_addr
*addrv4
)
4016 char buf
[MAX_IPv4_STR_LEN
];
4017 char *ptrv4
= (char *)addrv4
;
4018 const char *ptr
= (const char *)addr
;
4020 if (memcmp(addr
, &prefix
->ipv6_prefix
, prefix
->prefix_len
) != 0) {
4024 switch (prefix
->prefix_len
) {
4025 case NAT64_PREFIX_LEN_96
:
4026 memcpy(ptrv4
, ptr
+ 12, 4);
4028 case NAT64_PREFIX_LEN_64
:
4029 memcpy(ptrv4
, ptr
+ 9, 4);
4031 case NAT64_PREFIX_LEN_56
:
4032 memcpy(ptrv4
, ptr
+ 7, 1);
4033 memcpy(ptrv4
+ 1, ptr
+ 9, 3);
4035 case NAT64_PREFIX_LEN_48
:
4036 memcpy(ptrv4
, ptr
+ 6, 2);
4037 memcpy(ptrv4
+ 2, ptr
+ 9, 2);
4039 case NAT64_PREFIX_LEN_40
:
4040 memcpy(ptrv4
, ptr
+ 5, 3);
4041 memcpy(ptrv4
+ 3, ptr
+ 9, 1);
4043 case NAT64_PREFIX_LEN_32
:
4044 memcpy(ptrv4
, ptr
+ 4, 4);
4047 panic("NAT64-prefix len is wrong: %u\n",
4048 prefix
->prefix_len
);
4051 os_log_info(mptcp_log_handle
, "%s desynthesized to %s\n", __func__
,
4052 inet_ntop(AF_INET
, (void *)addrv4
, buf
, sizeof(buf
)));
4058 mptcp_handle_ipv6_connection(struct mptses
*mpte
, const struct mptsub
*mpts
)
4060 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
4061 struct socket
*so
= mpts
->mpts_socket
;
4065 /* Subflow IPs will be steered directly by the server - no need to
4068 if (mpte
->mpte_flags
& MPTE_UNICAST_IP
) {
4072 ifp
= sotoinpcb(so
)->inp_last_outifp
;
4074 if (ifnet_get_nat64prefix(ifp
, nat64prefixes
) == ENOENT
) {
4078 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
4081 if (nat64prefixes
[j
].prefix_len
== 0) {
4085 success
= mptcp_desynthesize_ipv6_addr(&mpte
->__mpte_dst_v6
.sin6_addr
,
4087 &mpte
->mpte_sub_dst_v4
.sin_addr
);
4089 mpte
->mpte_sub_dst_v4
.sin_len
= sizeof(mpte
->mpte_sub_dst_v4
);
4090 mpte
->mpte_sub_dst_v4
.sin_family
= AF_INET
;
4091 mpte
->mpte_sub_dst_v4
.sin_port
= mpte
->__mpte_dst_v6
.sin6_port
;
4098 mptcp_try_alternate_port(struct mptses
*mpte
, struct mptsub
*mpts
)
4102 if (!mptcp_ok_to_create_subflows(mpte
->mpte_mptcb
)) {
4106 inp
= sotoinpcb(mpts
->mpts_socket
);
4111 /* Should we try the alternate port? */
4112 if (mpte
->mpte_alternate_port
&&
4113 inp
->inp_fport
!= mpte
->mpte_alternate_port
) {
4114 union sockaddr_in_4_6 dst
;
4115 struct sockaddr_in
*dst_in
= (struct sockaddr_in
*)&dst
;
4117 memcpy(&dst
, &mpts
->mpts_dst
, mpts
->mpts_dst
.sa_len
);
4119 dst_in
->sin_port
= mpte
->mpte_alternate_port
;
4121 mptcp_subflow_add(mpte
, NULL
, (struct sockaddr
*)&dst
,
4122 mpts
->mpts_ifscope
, NULL
);
4123 } else { /* Else, we tried all we could, mark this interface as non-MPTCP */
4126 if (inp
->inp_last_outifp
== NULL
) {
4130 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
4131 struct mpt_itf_info
*info
= &mpte
->mpte_itfinfo
[i
];
4133 if (inp
->inp_last_outifp
->if_index
== info
->ifindex
) {
4134 info
->no_mptcp_support
= 1;
4142 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
4145 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4146 long *p_mpsofilt_hint
, long event
)
4148 #pragma unused(event, p_mpsofilt_hint)
4149 struct socket
*mp_so
, *so
;
4152 struct mptcb
*mp_tp
;
4154 boolean_t mpok
= FALSE
;
4156 mp_so
= mptetoso(mpte
);
4157 mp_tp
= mpte
->mpte_mptcb
;
4158 so
= mpts
->mpts_socket
;
4160 af
= mpts
->mpts_dst
.sa_family
;
4162 if (mpts
->mpts_flags
& MPTSF_CONNECTED
) {
4163 return MPTS_EVRET_OK
;
4166 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
4167 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
4168 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
4169 (so
->so_state
& SS_ISCONNECTED
)) {
4170 mptcplog((LOG_DEBUG
, "%s: cid %d disconnect before tcp connect\n",
4171 __func__
, mpts
->mpts_connid
),
4172 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4173 (void) soshutdownlock(so
, SHUT_RD
);
4174 (void) soshutdownlock(so
, SHUT_WR
);
4175 (void) sodisconnectlocked(so
);
4177 return MPTS_EVRET_OK
;
4181 * The subflow connection has been connected. Find out whether it
4182 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
4184 * a. If MPTCP connection is not yet established, then this must be
4185 * the first subflow connection. If MPTCP failed to negotiate,
4186 * fallback to regular TCP by degrading this subflow.
4188 * b. If MPTCP connection has been established, then this must be
4189 * one of the subsequent subflow connections. If MPTCP failed
4190 * to negotiate, disconnect the connection.
4192 * Right now, we simply unblock any waiters at the MPTCP socket layer
4193 * if the MPTCP connection has not been established.
4196 if (so
->so_state
& SS_ISDISCONNECTED
) {
4198 * With MPTCP joins, a connection is connected at the subflow
4199 * level, but the 4th ACK from the server elevates the MPTCP
4200 * subflow to connected state. So there is a small window
4201 * where the subflow could get disconnected before the
4202 * connected event is processed.
4204 return MPTS_EVRET_OK
;
4207 if (mpts
->mpts_flags
& MPTSF_TFO_REQD
) {
4208 mptcp_drop_tfo_data(mpte
, mpts
);
4211 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_TFO_REQD
);
4212 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
4214 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
4215 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
4218 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
4220 /* get/verify the outbound interface */
4221 inp
= sotoinpcb(so
);
4223 mpts
->mpts_maxseg
= tp
->t_maxseg
;
4225 mptcplog((LOG_DEBUG
, "%s: cid %d outif %s is %s\n", __func__
, mpts
->mpts_connid
,
4226 ((inp
->inp_last_outifp
!= NULL
) ? inp
->inp_last_outifp
->if_xname
: "NULL"),
4227 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ? "MPTCP capable" : "a regular TCP")),
4228 (MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
), MPTCP_LOGLVL_LOG
);
4230 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
4232 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
4233 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
4234 mpte
->mpte_associd
= mpts
->mpts_connid
;
4235 DTRACE_MPTCP2(state__change
,
4236 struct mptcb
*, mp_tp
,
4237 uint32_t, 0 /* event */);
4239 if (SOCK_DOM(so
) == AF_INET
) {
4240 in_getsockaddr_s(so
, &mpte
->__mpte_src_v4
);
4242 in6_getsockaddr_s(so
, &mpte
->__mpte_src_v6
);
4245 mpts
->mpts_flags
|= MPTSF_ACTIVE
;
4247 /* case (a) above */
4249 tcpstat
.tcps_mpcap_fallback
++;
4251 tp
->t_mpflags
|= TMPF_INFIN_SENT
;
4252 mptcp_notify_mpfail(so
);
4254 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
4255 mptcp_subflows_need_backup_flag(mpte
)) {
4256 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
4258 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
4260 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
4261 mpte
->mpte_nummpcapflows
++;
4263 if (SOCK_DOM(so
) == AF_INET6
) {
4264 mptcp_handle_ipv6_connection(mpte
, mpts
);
4267 mptcp_check_subflows_and_add(mpte
);
4269 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
)) {
4270 mpte
->mpte_initial_cell
= 1;
4273 mpte
->mpte_handshake_success
= 1;
4276 mp_tp
->mpt_sndwnd
= tp
->snd_wnd
;
4277 mp_tp
->mpt_sndwl1
= mp_tp
->mpt_rcvnxt
;
4278 mp_tp
->mpt_sndwl2
= mp_tp
->mpt_snduna
;
4279 soisconnected(mp_so
);
4283 * In case of additional flows, the MPTCP socket is not
4284 * MPTSF_MP_CAPABLE until an ACK is received from server
4285 * for 3-way handshake. TCP would have guaranteed that this
4286 * is an MPTCP subflow.
4288 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
4289 !(tp
->t_mpflags
& TMPF_BACKUP_PATH
) &&
4290 mptcp_subflows_need_backup_flag(mpte
)) {
4291 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
4292 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
4294 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
4297 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
4298 mpte
->mpte_nummpcapflows
++;
4300 mpts
->mpts_rel_seq
= 1;
4302 mptcp_check_subflows_and_remove(mpte
);
4304 mptcp_try_alternate_port(mpte
, mpts
);
4306 tcpstat
.tcps_join_fallback
++;
4307 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
)) {
4308 tcpstat
.tcps_mptcp_cell_proxy
++;
4310 tcpstat
.tcps_mptcp_wifi_proxy
++;
4313 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
4315 return MPTS_EVRET_OK
;
4318 /* This call, just to "book" an entry in the stats-table for this ifindex */
4319 mptcpstats_get_index(mpte
->mpte_itfstats
, mpts
);
4323 return MPTS_EVRET_OK
; /* keep the subflow socket around */
4327 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
4330 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4331 long *p_mpsofilt_hint
, long event
)
4333 #pragma unused(event, p_mpsofilt_hint)
4334 struct socket
*mp_so
, *so
;
4335 struct mptcb
*mp_tp
;
4337 mp_so
= mptetoso(mpte
);
4338 mp_tp
= mpte
->mpte_mptcb
;
4339 so
= mpts
->mpts_socket
;
4341 mptcplog((LOG_DEBUG
, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
4342 __func__
, mpts
->mpts_connid
, so
->so_error
, mp_tp
->mpt_state
,
4343 !!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
),
4344 !!(mpts
->mpts_flags
& MPTSF_ACTIVE
), sototcpcb(so
)->t_mpflags
),
4345 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4347 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
) {
4348 return MPTS_EVRET_DELETE
;
4351 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
4353 /* The subflow connection has been disconnected. */
4355 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
4356 mpte
->mpte_nummpcapflows
--;
4357 if (mpte
->mpte_active_sub
== mpts
) {
4358 mpte
->mpte_active_sub
= NULL
;
4359 mptcplog((LOG_DEBUG
, "%s: resetting active subflow \n",
4360 __func__
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4362 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
4364 if (so
->so_flags
& SOF_MP_SEC_SUBFLOW
&&
4365 !(mpts
->mpts_flags
& MPTSF_CONNECTED
)) {
4366 mptcp_try_alternate_port(mpte
, mpts
);
4370 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
4371 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
4372 mptcp_drop(mpte
, mp_tp
, so
->so_error
);
4376 * Clear flags that are used by getconninfo to return state.
4377 * Retain like MPTSF_DELETEOK for internal purposes.
4379 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_CONNECT_PENDING
|
4380 MPTSF_CONNECTED
| MPTSF_DISCONNECTING
| MPTSF_PREFERRED
|
4381 MPTSF_MP_CAPABLE
| MPTSF_MP_READY
| MPTSF_MP_DEGRADED
| MPTSF_ACTIVE
);
4383 return MPTS_EVRET_DELETE
;
4387 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
4390 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4391 long *p_mpsofilt_hint
, long event
)
4393 #pragma unused(event, p_mpsofilt_hint)
4394 ev_ret_t ret
= MPTS_EVRET_OK
;
4395 struct socket
*mp_so
, *so
;
4396 struct mptcb
*mp_tp
;
4398 mp_so
= mptetoso(mpte
);
4399 mp_tp
= mpte
->mpte_mptcb
;
4400 so
= mpts
->mpts_socket
;
4402 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
) {
4403 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
4405 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
4408 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
4409 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4412 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4414 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
4417 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
) {
4418 mpts
->mpts_flags
|= MPTSF_MP_READY
;
4420 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
4423 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4424 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
4425 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
4428 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
4429 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
4431 m_freem_list(mpte
->mpte_reinjectq
);
4432 mpte
->mpte_reinjectq
= NULL
;
4433 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
4434 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
4435 ret
= MPTS_EVRET_CONNECT_PENDING
;
4443 * Handle SO_FILT_HINT_MUSTRST subflow socket event
4446 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4447 long *p_mpsofilt_hint
, long event
)
4449 #pragma unused(event)
4450 struct socket
*mp_so
, *so
;
4451 struct mptcb
*mp_tp
;
4452 boolean_t is_fastclose
;
4454 mp_so
= mptetoso(mpte
);
4455 mp_tp
= mpte
->mpte_mptcb
;
4456 so
= mpts
->mpts_socket
;
4458 /* We got an invalid option or a fast close */
4459 struct inpcb
*inp
= sotoinpcb(so
);
4460 struct tcpcb
*tp
= NULL
;
4462 tp
= intotcpcb(inp
);
4463 so
->so_error
= ECONNABORTED
;
4465 is_fastclose
= !!(tp
->t_mpflags
& TMPF_FASTCLOSERCV
);
4467 tp
->t_mpflags
|= TMPF_RESET
;
4469 if (tp
->t_state
!= TCPS_CLOSED
) {
4470 struct tcptemp
*t_template
= tcp_maketemplate(tp
);
4473 struct tcp_respond_args tra
;
4475 bzero(&tra
, sizeof(tra
));
4476 if (inp
->inp_flags
& INP_BOUND_IF
) {
4477 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
4479 tra
.ifscope
= IFSCOPE_NONE
;
4481 tra
.awdl_unrestricted
= 1;
4483 tcp_respond(tp
, t_template
->tt_ipgen
,
4484 &t_template
->tt_t
, (struct mbuf
*)NULL
,
4485 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
4486 (void) m_free(dtom(t_template
));
4490 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && is_fastclose
) {
4491 struct mptsub
*iter
, *tmp
;
4493 *p_mpsofilt_hint
|= SO_FILT_HINT_CONNRESET
;
4495 mp_so
->so_error
= ECONNRESET
;
4497 TAILQ_FOREACH_SAFE(iter
, &mpte
->mpte_subflows
, mpts_entry
, tmp
) {
4501 mptcp_subflow_abort(iter
, ECONNABORTED
);
4505 * mptcp_drop is being called after processing the events, to fully
4506 * close the MPTCP connection
4508 mptcp_drop(mpte
, mp_tp
, mp_so
->so_error
);
4511 mptcp_subflow_abort(mpts
, ECONNABORTED
);
4513 if (mp_tp
->mpt_gc_ticks
== MPT_GC_TICKS
) {
4514 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS_FAST
;
4517 return MPTS_EVRET_DELETE
;
4521 mptcp_subflow_adaptive_rtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4522 long *p_mpsofilt_hint
, long event
)
4524 #pragma unused(event)
4525 bool found_active
= false;
4527 mpts
->mpts_flags
|= MPTSF_READ_STALL
;
4529 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4530 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4532 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4533 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
4537 if (!(mpts
->mpts_flags
& MPTSF_READ_STALL
)) {
4538 found_active
= true;
4543 if (!found_active
) {
4544 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_RTIMO
;
4547 return MPTS_EVRET_OK
;
4551 mptcp_subflow_adaptive_wtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4552 long *p_mpsofilt_hint
, long event
)
4554 #pragma unused(event)
4555 bool found_active
= false;
4557 mpts
->mpts_flags
|= MPTSF_WRITE_STALL
;
4559 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4560 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4562 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4563 tp
->t_state
> TCPS_CLOSE_WAIT
) {
4567 if (!(mpts
->mpts_flags
& MPTSF_WRITE_STALL
)) {
4568 found_active
= true;
4573 if (!found_active
) {
4574 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_WTIMO
;
4577 return MPTS_EVRET_OK
;
4581 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
4582 * caller must ensure that the option can be issued on subflow sockets, via
4583 * MPOF_SUBFLOW_OK flag.
4586 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct mptsub
*mpts
, struct mptopt
*mpo
)
4588 struct socket
*mp_so
, *so
;
4589 struct sockopt sopt
;
4592 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
4594 mp_so
= mptetoso(mpte
);
4595 so
= mpts
->mpts_socket
;
4597 socket_lock_assert_owned(mp_so
);
4599 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
&&
4600 mpo
->mpo_level
== SOL_SOCKET
&&
4601 mpo
->mpo_name
== SO_MARK_CELLFALLBACK
) {
4602 struct ifnet
*ifp
= ifindex2ifnet
[mpts
->mpts_ifscope
];
4604 mptcplog((LOG_DEBUG
, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %d lastcell? %d boundcell? %d\n",
4605 __func__
, mpte
->mpte_flags
, mpte
->mpte_svctype
, mptcp_is_wifi_unusable_for_session(mpte
),
4606 sotoinpcb(so
)->inp_last_outifp
? IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
) : -1,
4607 mpts
->mpts_ifscope
!= IFSCOPE_NONE
&& ifp
? IFNET_IS_CELLULAR(ifp
) : -1),
4608 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4611 * When we open a new subflow, mark it as cell fallback, if
4612 * this subflow goes over cell.
4614 * (except for first-party apps)
4617 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
4621 if (sotoinpcb(so
)->inp_last_outifp
&&
4622 !IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
4627 * This here is an OR, because if the app is not binding to the
4628 * interface, then it definitely is not a cell-fallback
4631 if (mpts
->mpts_ifscope
== IFSCOPE_NONE
|| ifp
== NULL
||
4632 !IFNET_IS_CELLULAR(ifp
)) {
4637 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
4639 bzero(&sopt
, sizeof(sopt
));
4640 sopt
.sopt_dir
= SOPT_SET
;
4641 sopt
.sopt_level
= mpo
->mpo_level
;
4642 sopt
.sopt_name
= mpo
->mpo_name
;
4643 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
4644 sopt
.sopt_valsize
= sizeof(int);
4645 sopt
.sopt_p
= kernproc
;
4647 error
= sosetoptlock(so
, &sopt
, 0);
4649 os_log_error(mptcp_log_handle
, "%s - %lx: sopt %s "
4650 "val %d set error %d\n", __func__
,
4651 (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4652 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
4653 mpo
->mpo_intval
, error
);
4659 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
4660 * caller must ensure that the option can be issued on subflow sockets, via
4661 * MPOF_SUBFLOW_OK flag.
4664 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
4667 struct socket
*mp_so
;
4668 struct sockopt sopt
;
4671 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
4672 mp_so
= mptetoso(mpte
);
4674 socket_lock_assert_owned(mp_so
);
4676 bzero(&sopt
, sizeof(sopt
));
4677 sopt
.sopt_dir
= SOPT_GET
;
4678 sopt
.sopt_level
= mpo
->mpo_level
;
4679 sopt
.sopt_name
= mpo
->mpo_name
;
4680 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
4681 sopt
.sopt_valsize
= sizeof(int);
4682 sopt
.sopt_p
= kernproc
;
4684 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
4686 os_log_error(mptcp_log_handle
,
4687 "%s - %lx: sopt %s get error %d\n",
4688 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4689 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
), error
);
4696 * MPTCP garbage collector.
4698 * This routine is called by the MP domain on-demand, periodic callout,
4699 * which is triggered when a MPTCP socket is closed. The callout will
4700 * repeat as long as this routine returns a non-zero value.
4703 mptcp_gc(struct mppcbinfo
*mppi
)
4705 struct mppcb
*mpp
, *tmpp
;
4706 uint32_t active
= 0;
4708 LCK_MTX_ASSERT(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
4710 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
4711 struct socket
*mp_so
;
4712 struct mptses
*mpte
;
4713 struct mptcb
*mp_tp
;
4715 mp_so
= mpp
->mpp_socket
;
4716 mpte
= mptompte(mpp
);
4717 mp_tp
= mpte
->mpte_mptcb
;
4719 if (!mpp_try_lock(mpp
)) {
4724 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
4726 /* check again under the lock */
4727 if (mp_so
->so_usecount
> 0) {
4728 boolean_t wakeup
= FALSE
;
4729 struct mptsub
*mpts
, *tmpts
;
4731 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
4732 if (mp_tp
->mpt_gc_ticks
> 0) {
4733 mp_tp
->mpt_gc_ticks
--;
4735 if (mp_tp
->mpt_gc_ticks
== 0) {
4740 TAILQ_FOREACH_SAFE(mpts
,
4741 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4742 mptcp_subflow_eupcall1(mpts
->mpts_socket
,
4743 mpts
, SO_FILT_HINT_DISCONNECTED
);
4746 socket_unlock(mp_so
, 0);
4751 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
4752 panic("%s - %lx: skipped state "
4753 "[u=%d,r=%d,s=%d]\n", __func__
,
4754 (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4755 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
4759 if (mp_tp
->mpt_state
== MPTCPS_TIME_WAIT
) {
4760 mptcp_close(mpte
, mp_tp
);
4763 mptcp_session_destroy(mpte
);
4765 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
4766 struct sockbuf
*, &mp_so
->so_rcv
,
4767 struct sockbuf
*, &mp_so
->so_snd
,
4768 struct mppcb
*, mpp
);
4778 * Drop a MPTCP connection, reporting the specified error.
4781 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, u_short errno
)
4783 struct socket
*mp_so
= mptetoso(mpte
);
4785 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4787 socket_lock_assert_owned(mp_so
);
4789 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
4790 uint32_t, 0 /* event */);
4792 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0) {
4793 errno
= mp_tp
->mpt_softerror
;
4795 mp_so
->so_error
= errno
;
4797 return mptcp_close(mpte
, mp_tp
);
4801 * Close a MPTCP control block.
4804 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
4806 struct mptsub
*mpts
= NULL
, *tmpts
= NULL
;
4807 struct socket
*mp_so
= mptetoso(mpte
);
4809 socket_lock_assert_owned(mp_so
);
4810 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4812 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
4816 soisdisconnected(mp_so
);
4818 /* Clean up all subflows */
4819 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4820 mptcp_subflow_disconnect(mpte
, mpts
);
4827 mptcp_notify_close(struct socket
*so
)
4829 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
4836 mptcp_subflow_workloop(struct mptses
*mpte
)
4838 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
4839 long mpsofilt_hint_mask
= SO_FILT_HINT_LOCKED
;
4840 struct mptsub
*mpts
, *tmpts
;
4841 struct socket
*mp_so
;
4843 mp_so
= mptetoso(mpte
);
4845 socket_lock_assert_owned(mp_so
);
4847 if (mpte
->mpte_flags
& MPTE_IN_WORKLOOP
) {
4848 mpte
->mpte_flags
|= MPTE_WORKLOOP_RELAUNCH
;
4851 mpte
->mpte_flags
|= MPTE_IN_WORKLOOP
;
4854 mpte
->mpte_flags
&= ~MPTE_WORKLOOP_RELAUNCH
;
4856 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4859 if (mpts
->mpts_socket
->so_usecount
== 0) {
4860 /* Will be removed soon by tcp_garbage_collect */
4864 mptcp_subflow_addref(mpts
);
4865 mpts
->mpts_socket
->so_usecount
++;
4867 ret
= mptcp_subflow_events(mpte
, mpts
, &mpsofilt_hint_mask
);
4870 * If MPTCP socket is closed, disconnect all subflows.
4871 * This will generate a disconnect event which will
4872 * be handled during the next iteration, causing a
4873 * non-zero error to be returned above.
4875 if (mp_so
->so_flags
& SOF_PCBCLEARING
) {
4876 mptcp_subflow_disconnect(mpte
, mpts
);
4883 case MPTS_EVRET_DELETE
:
4884 mptcp_subflow_soclose(mpts
);
4886 case MPTS_EVRET_CONNECT_PENDING
:
4887 connect_pending
= TRUE
;
4889 case MPTS_EVRET_DISCONNECT_FALLBACK
:
4890 disconnect_fallback
= TRUE
;
4893 mptcplog((LOG_DEBUG
,
4894 "MPTCP Socket: %s: mptcp_subflow_events "
4895 "returned invalid value: %d\n", __func__
,
4897 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4900 mptcp_subflow_remref(mpts
); /* ours */
4902 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
4903 mpts
->mpts_socket
->so_usecount
--;
4906 if (mpsofilt_hint_mask
!= SO_FILT_HINT_LOCKED
) {
4907 VERIFY(mpsofilt_hint_mask
& SO_FILT_HINT_LOCKED
);
4909 if (mpsofilt_hint_mask
& SO_FILT_HINT_CANTRCVMORE
) {
4910 mp_so
->so_state
|= SS_CANTRCVMORE
;
4914 soevent(mp_so
, mpsofilt_hint_mask
);
4917 if (!connect_pending
&& !disconnect_fallback
) {
4921 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4922 if (disconnect_fallback
) {
4923 struct socket
*so
= NULL
;
4924 struct inpcb
*inp
= NULL
;
4925 struct tcpcb
*tp
= NULL
;
4927 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4931 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4933 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
4934 MPTSF_DISCONNECTED
)) {
4938 so
= mpts
->mpts_socket
;
4941 * The MPTCP connection has degraded to a fallback
4942 * mode, so there is no point in keeping this subflow
4943 * regardless of its MPTCP-readiness state, unless it
4944 * is the primary one which we use for fallback. This
4945 * assumes that the subflow used for fallback is the
4949 inp
= sotoinpcb(so
);
4950 tp
= intotcpcb(inp
);
4952 ~(TMPF_MPTCP_READY
| TMPF_MPTCP_TRUE
);
4953 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
4955 soevent(so
, SO_FILT_HINT_MUSTRST
);
4956 } else if (connect_pending
) {
4958 * The MPTCP connection has progressed to a state
4959 * where it supports full multipath semantics; allow
4960 * additional joins to be attempted for all subflows
4961 * that are in the PENDING state.
4963 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
4964 int error
= mptcp_subflow_soconnectx(mpte
, mpts
);
4967 mptcp_subflow_abort(mpts
, error
);
4974 if (mpte
->mpte_flags
& MPTE_WORKLOOP_RELAUNCH
) {
4978 mpte
->mpte_flags
&= ~MPTE_IN_WORKLOOP
;
4982 * Protocol pr_lock callback.
4985 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
4987 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4991 lr_saved
= __builtin_return_address(0);
4997 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
4998 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
5003 if (mp_so
->so_usecount
< 0) {
5004 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
5005 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
5006 solockhistory_nr(mp_so
));
5009 if (refcount
!= 0) {
5010 mp_so
->so_usecount
++;
5013 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
5014 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
5020 * Protocol pr_unlock callback.
5023 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
5025 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
5029 lr_saved
= __builtin_return_address(0);
5035 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
5036 mp_so
, mp_so
->so_usecount
, lr_saved
,
5037 solockhistory_nr(mp_so
));
5040 socket_lock_assert_owned(mp_so
);
5042 if (refcount
!= 0) {
5043 mp_so
->so_usecount
--;
5047 if (mp_so
->so_usecount
< 0) {
5048 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
5049 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
5052 if (mpp
->mpp_inside
< 0) {
5053 panic("%s: mpp=%p inside=%x lrh= %s\n", __func__
,
5054 mpp
, mpp
->mpp_inside
, solockhistory_nr(mp_so
));
5057 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
5058 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
5065 * Protocol pr_getlock callback.
5068 mptcp_getlock(struct socket
*mp_so
, int flags
)
5070 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
5073 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
5074 solockhistory_nr(mp_so
));
5077 if (mp_so
->so_usecount
< 0) {
5078 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
5079 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
5082 return mpp_getlock(mpp
, flags
);
5086 * MPTCP Join support
5090 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
, uint8_t addr_id
)
5092 struct tcpcb
*tp
= sototcpcb(so
);
5093 struct mptcp_subf_auth_entry
*sauth_entry
;
5096 * The address ID of the first flow is implicitly 0.
5098 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
5099 tp
->t_local_aid
= 0;
5101 tp
->t_local_aid
= addr_id
;
5102 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
5103 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
5105 sauth_entry
= zalloc(mpt_subauth_zone
);
5106 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
5107 sauth_entry
->msae_raddr_id
= 0;
5108 sauth_entry
->msae_raddr_rand
= 0;
5110 sauth_entry
->msae_laddr_rand
= RandomULong();
5111 if (sauth_entry
->msae_laddr_rand
== 0) {
5114 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
5118 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
5120 struct mptcp_subf_auth_entry
*sauth_entry
;
5121 struct tcpcb
*tp
= NULL
;
5129 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
5130 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
5136 LIST_REMOVE(sauth_entry
, msae_next
);
5140 zfree(mpt_subauth_zone
, sauth_entry
);
5145 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
5148 struct mptcp_subf_auth_entry
*sauth_entry
;
5150 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
5151 if (sauth_entry
->msae_laddr_id
== addr_id
) {
5153 *lrand
= sauth_entry
->msae_laddr_rand
;
5156 *rrand
= sauth_entry
->msae_raddr_rand
;
5164 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
5165 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
5167 struct mptcp_subf_auth_entry
*sauth_entry
;
5169 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
5170 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
5171 if ((sauth_entry
->msae_raddr_id
!= 0) &&
5172 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
5173 os_log_error(mptcp_log_handle
, "%s - %lx: mismatched"
5174 " address ids %d %d \n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
),
5175 raddr_id
, sauth_entry
->msae_raddr_id
);
5178 sauth_entry
->msae_raddr_id
= raddr_id
;
5179 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
5180 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
5181 os_log_error(mptcp_log_handle
, "%s - %lx: "
5182 "dup SYN_ACK %d %d \n",
5183 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
),
5184 raddr_rand
, sauth_entry
->msae_raddr_rand
);
5187 sauth_entry
->msae_raddr_rand
= raddr_rand
;
5194 * SHA1 support for MPTCP
5197 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
)
5200 const unsigned char *sha1_base
;
5203 sha1_base
= (const unsigned char *) key
;
5204 sha1_size
= sizeof(mptcp_key_t
);
5205 SHA1Init(&sha1ctxt
);
5206 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
5207 SHA1Final(sha_digest
, &sha1ctxt
);
5211 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
5212 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
)
5215 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
5216 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
5220 bzero(digest
, SHA1_RESULTLEN
);
5222 /* Set up the Key for HMAC */
5229 /* Set up the message for HMAC */
5233 /* Key is 512 block length, so no need to compute hash */
5235 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
5237 for (i
= 0; i
< 8; i
++) {
5238 key_ipad
[i
] ^= 0x3636363636363636;
5239 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
5242 /* Perform inner SHA1 */
5243 SHA1Init(&sha1ctxt
);
5244 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof(key_ipad
));
5245 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof(data
));
5246 SHA1Final(digest
, &sha1ctxt
);
5248 /* Perform outer SHA1 */
5249 SHA1Init(&sha1ctxt
);
5250 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof(key_opad
));
5251 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
5252 SHA1Final(digest
, &sha1ctxt
);
5256 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
5257 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
5260 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
)
5262 uint32_t lrand
, rrand
;
5265 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
5266 mptcp_hmac_sha1(mp_tp
->mpt_localkey
, mp_tp
->mpt_remotekey
, lrand
, rrand
,
5271 * Authentication data generation
5274 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
5277 VERIFY(token_len
== sizeof(u_int32_t
));
5278 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
5280 /* Most significant 32 bits of the SHA1 hash */
5281 bcopy(sha_digest
, token
, sizeof(u_int32_t
));
5286 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
5289 VERIFY(idsn_len
== sizeof(u_int64_t
));
5290 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
5293 * Least significant 64 bits of the SHA1 hash
5296 idsn
[7] = sha_digest
[12];
5297 idsn
[6] = sha_digest
[13];
5298 idsn
[5] = sha_digest
[14];
5299 idsn
[4] = sha_digest
[15];
5300 idsn
[3] = sha_digest
[16];
5301 idsn
[2] = sha_digest
[17];
5302 idsn
[1] = sha_digest
[18];
5303 idsn
[0] = sha_digest
[19];
5308 mptcp_conn_properties(struct mptcb
*mp_tp
)
5310 /* There is only Version 0 at this time */
5311 mp_tp
->mpt_version
= MPTCP_STD_VERSION_0
;
5313 /* Set DSS checksum flag */
5314 if (mptcp_dss_csum
) {
5315 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
5318 /* Set up receive window */
5319 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
5321 /* Set up gc ticks */
5322 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
5326 mptcp_init_local_parms(struct mptses
*mpte
)
5328 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
5329 char key_digest
[SHA1_RESULTLEN
];
5331 read_frandom(&mp_tp
->mpt_localkey
, sizeof(mp_tp
->mpt_localkey
));
5332 mptcp_do_sha1(&mp_tp
->mpt_localkey
, key_digest
);
5334 mptcp_generate_token(key_digest
, SHA1_RESULTLEN
,
5335 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof(mp_tp
->mpt_localtoken
));
5336 mptcp_generate_idsn(key_digest
, SHA1_RESULTLEN
,
5337 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof(u_int64_t
));
5339 /* The subflow SYN is also first MPTCP byte */
5340 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
5341 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
5343 mptcp_conn_properties(mp_tp
);
5347 mptcp_init_remote_parms(struct mptcb
*mp_tp
)
5349 char remote_digest
[SHA1_RESULTLEN
];
5351 /* Only Version 0 is supported for auth purposes */
5352 if (mp_tp
->mpt_version
!= MPTCP_STD_VERSION_0
) {
5356 /* Setup local and remote tokens and Initial DSNs */
5357 mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
);
5358 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
5359 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof(mp_tp
->mpt_remotetoken
));
5360 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
5361 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof(u_int64_t
));
5362 mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
5363 mp_tp
->mpt_rcvadv
= mp_tp
->mpt_rcvnxt
+ mp_tp
->mpt_rcvwnd
;
5369 mptcp_send_dfin(struct socket
*so
)
5371 struct tcpcb
*tp
= NULL
;
5372 struct inpcb
*inp
= NULL
;
5374 inp
= sotoinpcb(so
);
5379 tp
= intotcpcb(inp
);
5384 if (!(tp
->t_mpflags
& TMPF_RESET
)) {
5385 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
5390 * Data Sequence Mapping routines
5393 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
5395 struct mptcb
*mp_tp
;
5401 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
5404 VERIFY(m
->m_flags
& M_PKTHDR
);
5405 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
5406 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
5407 VERIFY(m_pktlen(m
) >= 0 && m_pktlen(m
) < UINT16_MAX
);
5408 m
->m_pkthdr
.mp_rlen
= (uint16_t)m_pktlen(m
);
5409 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
5415 mptcp_fallback_sbdrop(struct socket
*so
, struct mbuf
*m
, int len
)
5417 struct mptcb
*mp_tp
= tptomptp(sototcpcb(so
));
5423 if (!m
|| len
== 0) {
5427 while (m
&& len
> 0) {
5428 VERIFY(m
->m_flags
& M_PKTHDR
);
5429 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5431 data_ack
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
5432 dsn
= m
->m_pkthdr
.mp_dsn
;
5438 if (m
&& len
== 0) {
5440 * If there is one more mbuf in the chain, it automatically means
5441 * that up to m->mp_dsn has been ack'ed.
5443 * This means, we actually correct data_ack back down (compared
5444 * to what we set inside the loop - dsn + data_len). Because in
5445 * the loop we are "optimistic" and assume that the full mapping
5446 * will be acked. If that's not the case and we get out of the
5447 * loop with m != NULL, it means only up to m->mp_dsn has been
5450 data_ack
= m
->m_pkthdr
.mp_dsn
;
5455 * If len is negative, meaning we acked in the middle of an mbuf,
5456 * only up to this mbuf's data-sequence number has been acked
5457 * at the MPTCP-level.
5462 mptcplog((LOG_DEBUG
, "%s inferred ack up to %u\n", __func__
, (uint32_t)data_ack
),
5463 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5465 /* We can have data in the subflow's send-queue that is being acked,
5466 * while the DATA_ACK has already advanced. Thus, we should check whether
5467 * or not the DATA_ACK is actually new here.
5469 if (MPTCP_SEQ_LEQ(data_ack
, mp_tp
->mpt_sndmax
) &&
5470 MPTCP_SEQ_GEQ(data_ack
, mp_tp
->mpt_snduna
)) {
5471 mptcp_data_ack_rcvd(mp_tp
, sototcpcb(so
), data_ack
);
5476 mptcp_preproc_sbdrop(struct socket
*so
, struct mbuf
*m
, unsigned int len
)
5480 /* TFO makes things complicated. */
5481 if (so
->so_flags1
& SOF1_TFO_REWIND
) {
5483 so
->so_flags1
&= ~SOF1_TFO_REWIND
;
5486 while (m
&& (!(so
->so_flags
& SOF_MP_SUBFLOW
) || rewinding
)) {
5488 VERIFY(m
->m_flags
& M_PKTHDR
);
5489 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5491 sub_len
= m
->m_pkthdr
.mp_rlen
;
5493 if (sub_len
< len
) {
5494 m
->m_pkthdr
.mp_dsn
+= sub_len
;
5495 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
5496 m
->m_pkthdr
.mp_rseq
+= sub_len
;
5498 m
->m_pkthdr
.mp_rlen
= 0;
5501 /* sub_len >= len */
5502 if (rewinding
== 0) {
5503 m
->m_pkthdr
.mp_dsn
+= len
;
5505 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
5506 if (rewinding
== 0) {
5507 m
->m_pkthdr
.mp_rseq
+= len
;
5510 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u len %d %d\n",
5511 __func__
, (u_int32_t
)m
->m_pkthdr
.mp_dsn
,
5512 m
->m_pkthdr
.mp_rseq
, m
->m_pkthdr
.mp_rlen
, len
),
5513 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5514 m
->m_pkthdr
.mp_rlen
-= len
;
5520 if (so
->so_flags
& SOF_MP_SUBFLOW
&&
5521 !(sototcpcb(so
)->t_mpflags
& TMPF_TFO_REQUEST
) &&
5522 !(sototcpcb(so
)->t_mpflags
& TMPF_RCVD_DACK
)) {
5524 * Received an ack without receiving a DATA_ACK.
5525 * Need to fallback to regular TCP (or destroy this subflow).
5527 sototcpcb(so
)->t_mpflags
|= TMPF_INFIN_SENT
;
5528 mptcp_notify_mpfail(so
);
5532 /* Obtain the DSN mapping stored in the mbuf */
5534 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
,
5535 uint32_t *dsn
, uint32_t *relseq
, uint16_t *data_len
, uint16_t *dss_csum
)
5539 mptcp_output_getm_dsnmap64(so
, off
, &dsn64
, relseq
, data_len
, dss_csum
);
5540 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
5544 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint64_t *dsn
,
5545 uint32_t *relseq
, uint16_t *data_len
,
5548 struct mbuf
*m
= so
->so_snd
.sb_mb
;
5553 if (m
== NULL
&& (so
->so_flags
& SOF_DEFUNCT
)) {
5562 * In the subflow socket, the DSN sequencing can be discontiguous,
5563 * but the subflow sequence mapping is contiguous. Use the subflow
5564 * sequence property to find the right mbuf and corresponding dsn
5569 VERIFY(m
->m_flags
& M_PKTHDR
);
5570 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5572 if (off
>= m
->m_len
) {
5581 VERIFY(m
->m_pkthdr
.mp_rlen
<= UINT16_MAX
);
5583 *dsn
= m
->m_pkthdr
.mp_dsn
;
5584 *relseq
= m
->m_pkthdr
.mp_rseq
;
5585 *data_len
= m
->m_pkthdr
.mp_rlen
;
5586 *dss_csum
= m
->m_pkthdr
.mp_csum
;
5588 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
5589 __func__
, (u_int32_t
)(*dsn
), *relseq
, *data_len
, off
, off_orig
),
5590 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5594 * Note that this is called only from tcp_input() via mptcp_input_preproc()
5595 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
5596 * When it trims data tcp_input calls m_adj() which does not remove the
5597 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
5598 * The dsn map insertion cannot be delayed after trim, because data can be in
5599 * the reassembly queue for a while and the DSN option info in tp will be
5600 * overwritten for every new packet received.
5601 * The dsn map will be adjusted just prior to appending to subflow sockbuf
5602 * with mptcp_adj_rmap()
5605 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
, struct tcphdr
*th
)
5607 VERIFY(m
->m_flags
& M_PKTHDR
);
5608 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
5610 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
5611 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
5612 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
5613 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
5614 m
->m_pkthdr
.mp_csum
= tp
->t_rcv_map
.mpt_csum
;
5615 if (tp
->t_rcv_map
.mpt_dfin
) {
5616 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
5619 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
5621 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
5622 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
5623 } else if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
5624 if (th
->th_flags
& TH_FIN
) {
5625 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
5631 * Following routines help with failure detection and failover of data
5632 * transfer from one subflow to another.
5635 mptcp_act_on_txfail(struct socket
*so
)
5637 struct tcpcb
*tp
= NULL
;
5638 struct inpcb
*inp
= sotoinpcb(so
);
5644 tp
= intotcpcb(inp
);
5649 if (so
->so_flags
& SOF_MP_TRYFAILOVER
) {
5653 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
5654 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
5658 * Support for MP_FAIL option
5661 mptcp_get_map_for_dsn(struct socket
*so
, uint64_t dsn_fail
, uint32_t *tcp_seq
)
5663 struct mbuf
*m
= so
->so_snd
.sb_mb
;
5673 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5674 VERIFY(m
->m_flags
& M_PKTHDR
);
5675 dsn
= m
->m_pkthdr
.mp_dsn
;
5676 datalen
= m
->m_pkthdr
.mp_rlen
;
5677 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
5678 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
5679 off
= (int)(dsn_fail
- dsn
);
5680 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
5688 * If there was no mbuf data and a fallback to TCP occurred, there's
5689 * not much else to do.
5692 os_log_error(mptcp_log_handle
, "%s: %llu not found \n", __func__
, dsn_fail
);
5697 * Support for sending contiguous MPTCP bytes in subflow
5698 * Also for preventing sending data with ACK in 3-way handshake
5701 mptcp_adj_sendlen(struct socket
*so
, int32_t off
)
5703 struct tcpcb
*tp
= sototcpcb(so
);
5704 struct mptsub
*mpts
= tp
->t_mpsub
;
5706 uint32_t mdss_subflow_seq
;
5707 int mdss_subflow_off
;
5708 uint16_t mdss_data_len
;
5711 if (so
->so_snd
.sb_mb
== NULL
&& (so
->so_flags
& SOF_DEFUNCT
)) {
5715 mptcp_output_getm_dsnmap64(so
, off
, &mdss_dsn
, &mdss_subflow_seq
,
5716 &mdss_data_len
, &dss_csum
);
5719 * We need to compute how much of the mapping still remains.
5720 * So, we compute the offset in the send-buffer of the dss-sub-seq.
5722 mdss_subflow_off
= (mdss_subflow_seq
+ mpts
->mpts_iss
) - tp
->snd_una
;
5725 * When TFO is used, we are sending the mpts->mpts_iss although the relative
5726 * seq has been set to 1 (while it should be 0).
5728 if (tp
->t_mpflags
& TMPF_TFO_REQUEST
) {
5732 VERIFY(off
>= mdss_subflow_off
);
5734 return mdss_data_len
- (off
- mdss_subflow_off
);
5738 mptcp_get_maxseg(struct mptses
*mpte
)
5740 struct mptsub
*mpts
;
5741 uint32_t maxseg
= 0;
5743 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5744 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
5746 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
5747 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
5751 if (tp
->t_maxseg
> maxseg
) {
5752 maxseg
= tp
->t_maxseg
;
5760 mptcp_get_rcvscale(struct mptses
*mpte
)
5762 struct mptsub
*mpts
;
5763 uint8_t rcvscale
= UINT8_MAX
;
5765 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5766 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
5768 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
5769 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
5773 if (tp
->rcv_scale
< rcvscale
) {
5774 rcvscale
= tp
->rcv_scale
;
5781 /* Similar to tcp_sbrcv_reserve */
5783 mptcp_sbrcv_reserve(struct mptcb
*mp_tp
, struct sockbuf
*sbrcv
,
5784 u_int32_t newsize
, u_int32_t idealsize
)
5786 uint8_t rcvscale
= mptcp_get_rcvscale(mp_tp
->mpt_mpte
);
5788 /* newsize should not exceed max */
5789 newsize
= min(newsize
, tcp_autorcvbuf_max
);
5791 /* The receive window scale negotiated at the
5792 * beginning of the connection will also set a
5793 * limit on the socket buffer size
5795 newsize
= min(newsize
, TCP_MAXWIN
<< rcvscale
);
5797 /* Set new socket buffer size */
5798 if (newsize
> sbrcv
->sb_hiwat
&&
5799 (sbreserve(sbrcv
, newsize
) == 1)) {
5800 sbrcv
->sb_idealsize
= min(max(sbrcv
->sb_idealsize
,
5801 (idealsize
!= 0) ? idealsize
: newsize
), tcp_autorcvbuf_max
);
5803 /* Again check the limit set by the advertised
5806 sbrcv
->sb_idealsize
= min(sbrcv
->sb_idealsize
,
5807 TCP_MAXWIN
<< rcvscale
);
5812 mptcp_sbrcv_grow(struct mptcb
*mp_tp
)
5814 struct mptses
*mpte
= mp_tp
->mpt_mpte
;
5815 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
5816 struct sockbuf
*sbrcv
= &mp_so
->so_rcv
;
5817 uint32_t hiwat_sum
= 0;
5818 uint32_t ideal_sum
= 0;
5819 struct mptsub
*mpts
;
5822 * Do not grow the receive socket buffer if
5823 * - auto resizing is disabled, globally or on this socket
5824 * - the high water mark already reached the maximum
5825 * - the stream is in background and receive side is being
5827 * - if there are segments in reassembly queue indicating loss,
5828 * do not need to increase recv window during recovery as more
5829 * data is not going to be sent. A duplicate ack sent during
5830 * recovery should not change the receive window
5832 if (tcp_do_autorcvbuf
== 0 ||
5833 (sbrcv
->sb_flags
& SB_AUTOSIZE
) == 0 ||
5834 tcp_cansbgrow(sbrcv
) == 0 ||
5835 sbrcv
->sb_hiwat
>= tcp_autorcvbuf_max
||
5836 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) ||
5837 !LIST_EMPTY(&mp_tp
->mpt_segq
)) {
5838 /* Can not resize the socket buffer, just return */
5843 * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2)
5845 * But, for this we first need accurate receiver-RTT estimations, which
5846 * we currently don't have.
5848 * Let's use a dummy algorithm for now, just taking the sum of all
5849 * subflow's receive-buffers. It's too low, but that's all we can get
5853 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5854 hiwat_sum
+= mpts
->mpts_socket
->so_rcv
.sb_hiwat
;
5855 ideal_sum
+= mpts
->mpts_socket
->so_rcv
.sb_idealsize
;
5858 mptcp_sbrcv_reserve(mp_tp
, sbrcv
, hiwat_sum
, ideal_sum
);
5862 * Determine if we can grow the recieve socket buffer to avoid sending
5863 * a zero window update to the peer. We allow even socket buffers that
5864 * have fixed size (set by the application) to grow if the resource
5865 * constraints are met. They will also be trimmed after the application
5868 * Similar to tcp_sbrcv_grow_rwin
5871 mptcp_sbrcv_grow_rwin(struct mptcb
*mp_tp
, struct sockbuf
*sb
)
5873 struct socket
*mp_so
= mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
;
5874 u_int32_t rcvbufinc
= mptcp_get_maxseg(mp_tp
->mpt_mpte
) << 4;
5875 u_int32_t rcvbuf
= sb
->sb_hiwat
;
5877 if (tcp_recv_bg
== 1 || IS_TCP_RECV_BG(mp_so
)) {
5881 if (tcp_do_autorcvbuf
== 1 &&
5882 tcp_cansbgrow(sb
) &&
5883 /* Diff to tcp_sbrcv_grow_rwin */
5884 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) == 0 &&
5885 (rcvbuf
- sb
->sb_cc
) < rcvbufinc
&&
5886 rcvbuf
< tcp_autorcvbuf_max
&&
5887 (sb
->sb_idealsize
> 0 &&
5888 sb
->sb_hiwat
<= (sb
->sb_idealsize
+ rcvbufinc
))) {
5889 sbreserve(sb
, min((sb
->sb_hiwat
+ rcvbufinc
), tcp_autorcvbuf_max
));
5893 /* Similar to tcp_sbspace */
5895 mptcp_sbspace(struct mptcb
*mp_tp
)
5897 struct sockbuf
*sb
= &mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
5900 int32_t pending
= 0;
5902 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
5904 mptcp_sbrcv_grow_rwin(mp_tp
, sb
);
5906 /* hiwat might have changed */
5907 rcvbuf
= sb
->sb_hiwat
;
5909 space
= ((int32_t) imin((rcvbuf
- sb
->sb_cc
),
5910 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
5916 /* Compensate for data being processed by content filters */
5917 pending
= cfil_sock_data_space(sb
);
5918 #endif /* CONTENT_FILTER */
5919 if (pending
> space
) {
5929 * Support Fallback to Regular TCP
5932 mptcp_notify_mpready(struct socket
*so
)
5934 struct tcpcb
*tp
= NULL
;
5940 tp
= intotcpcb(sotoinpcb(so
));
5946 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
5947 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5948 struct tcpcb
*, tp
);
5950 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) {
5954 if (tp
->t_mpflags
& TMPF_MPTCP_READY
) {
5958 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
5959 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
5961 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5965 mptcp_notify_mpfail(struct socket
*so
)
5967 struct tcpcb
*tp
= NULL
;
5973 tp
= intotcpcb(sotoinpcb(so
));
5979 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
5980 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5981 struct tcpcb
*, tp
);
5983 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
5987 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
| TMPF_MPTCP_TRUE
);
5988 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
5990 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5994 * Keepalive helper function
5997 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
6001 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
6003 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
6010 * MPTCP t_maxseg adjustment function
6013 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
6016 struct mptcb
*mp_tp
= tptomptp(tp
);
6018 #define MPTCP_COMPUTE_LEN { \
6019 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
6020 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
6023 /* adjust to 32-bit boundary + EOL */ \
6026 if (mp_tp
== NULL
) {
6030 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
6033 * For the first subflow and subsequent subflows, adjust mss for
6034 * most common MPTCP option size, for case where tcp_mss is called
6035 * during option processing and MTU discovery.
6038 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
&&
6039 !(tp
->t_mpflags
& TMPF_JOINED_FLOW
)) {
6043 if (tp
->t_mpflags
& TMPF_PREESTABLISHED
&&
6044 tp
->t_mpflags
& TMPF_SENT_JOIN
) {
6048 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
6057 * Update the pid, upid, uuid of the subflow so, based on parent so
6060 mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
)
6062 if (so
->last_pid
!= mp_so
->last_pid
||
6063 so
->last_upid
!= mp_so
->last_upid
) {
6064 so
->last_upid
= mp_so
->last_upid
;
6065 so
->last_pid
= mp_so
->last_pid
;
6066 uuid_copy(so
->last_uuid
, mp_so
->last_uuid
);
6068 so_update_policy(so
);
6072 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
6076 tcp_getconninfo(so
, &flow
->flow_ci
);
6077 inp
= sotoinpcb(so
);
6078 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
6079 flow
->flow_src
.ss_family
= AF_INET6
;
6080 flow
->flow_dst
.ss_family
= AF_INET6
;
6081 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
6082 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
6083 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
6084 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
6085 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
6086 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
6087 } else if ((inp
->inp_vflag
& INP_IPV4
) != 0) {
6088 flow
->flow_src
.ss_family
= AF_INET
;
6089 flow
->flow_dst
.ss_family
= AF_INET
;
6090 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
6091 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
6092 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
6093 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
6094 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
6095 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
6097 flow
->flow_len
= sizeof(*flow
);
6098 flow
->flow_tcpci_offset
= offsetof(mptcp_flow_t
, flow_ci
);
6099 flow
->flow_flags
= mpts
->mpts_flags
;
6100 flow
->flow_cid
= mpts
->mpts_connid
;
6101 flow
->flow_relseq
= mpts
->mpts_rel_seq
;
6102 flow
->flow_soerror
= mpts
->mpts_socket
->so_error
;
6103 flow
->flow_probecnt
= mpts
->mpts_probecnt
;
6107 mptcp_pcblist SYSCTL_HANDLER_ARGS
6109 #pragma unused(oidp, arg1, arg2)
6113 struct mptses
*mpte
;
6114 struct mptcb
*mp_tp
;
6115 struct mptsub
*mpts
;
6117 conninfo_mptcp_t mptcpci
;
6118 mptcp_flow_t
*flows
= NULL
;
6120 if (req
->newptr
!= USER_ADDR_NULL
) {
6124 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6125 if (req
->oldptr
== USER_ADDR_NULL
) {
6126 size_t n
= mtcbinfo
.mppi_count
;
6127 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6128 req
->oldidx
= (n
+ n
/ 8) * sizeof(conninfo_mptcp_t
) +
6129 4 * (n
+ n
/ 8) * sizeof(mptcp_flow_t
);
6132 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6134 socket_lock(mpp
->mpp_socket
, 1);
6135 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
6136 mpte
= mptompte(mpp
);
6138 socket_lock_assert_owned(mptetoso(mpte
));
6139 mp_tp
= mpte
->mpte_mptcb
;
6141 bzero(&mptcpci
, sizeof(mptcpci
));
6142 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
6143 mptcpci
.mptcpci_flags
= mp_tp
->mpt_flags
;
6144 mptcpci
.mptcpci_ltoken
= mp_tp
->mpt_localtoken
;
6145 mptcpci
.mptcpci_rtoken
= mp_tp
->mpt_remotetoken
;
6146 mptcpci
.mptcpci_notsent_lowat
= mp_tp
->mpt_notsent_lowat
;
6147 mptcpci
.mptcpci_snduna
= mp_tp
->mpt_snduna
;
6148 mptcpci
.mptcpci_sndnxt
= mp_tp
->mpt_sndnxt
;
6149 mptcpci
.mptcpci_sndmax
= mp_tp
->mpt_sndmax
;
6150 mptcpci
.mptcpci_lidsn
= mp_tp
->mpt_local_idsn
;
6151 mptcpci
.mptcpci_sndwnd
= mp_tp
->mpt_sndwnd
;
6152 mptcpci
.mptcpci_rcvnxt
= mp_tp
->mpt_rcvnxt
;
6153 mptcpci
.mptcpci_rcvatmark
= mp_tp
->mpt_rcvnxt
;
6154 mptcpci
.mptcpci_ridsn
= mp_tp
->mpt_remote_idsn
;
6155 mptcpci
.mptcpci_rcvwnd
= mp_tp
->mpt_rcvwnd
;
6157 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
6158 mptcpci
.mptcpci_mpte_flags
= mpte
->mpte_flags
;
6159 mptcpci
.mptcpci_mpte_addrid
= mpte
->mpte_addrid_last
;
6160 mptcpci
.mptcpci_flow_offset
=
6161 offsetof(conninfo_mptcp_t
, mptcpci_flows
);
6163 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
6164 if (mpte
->mpte_numflows
!= 0) {
6165 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
6166 if (flows
== NULL
) {
6167 socket_unlock(mpp
->mpp_socket
, 1);
6170 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
6171 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
6172 error
= SYSCTL_OUT(req
, &mptcpci
,
6173 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
6175 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
6176 error
= SYSCTL_OUT(req
, &mptcpci
, sizeof(mptcpci
));
6179 socket_unlock(mpp
->mpp_socket
, 1);
6180 FREE(flows
, M_TEMP
);
6184 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
6185 so
= mpts
->mpts_socket
;
6186 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
6189 socket_unlock(mpp
->mpp_socket
, 1);
6191 error
= SYSCTL_OUT(req
, flows
, len
);
6192 FREE(flows
, M_TEMP
);
6198 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6203 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
6204 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
6205 "List of active MPTCP connections");
6208 * Set notsent lowat mark on the MPTCB
6211 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
6213 struct mptcb
*mp_tp
= NULL
;
6216 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
) {
6217 mp_tp
= mpte
->mpte_mptcb
;
6221 mp_tp
->mpt_notsent_lowat
= optval
;
6230 mptcp_get_notsent_lowat(struct mptses
*mpte
)
6232 struct mptcb
*mp_tp
= NULL
;
6234 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
) {
6235 mp_tp
= mpte
->mpte_mptcb
;
6239 return mp_tp
->mpt_notsent_lowat
;
6246 mptcp_notsent_lowat_check(struct socket
*so
)
6248 struct mptses
*mpte
;
6250 struct mptcb
*mp_tp
;
6251 struct mptsub
*mpts
;
6255 mpp
= mpsotomppcb(so
);
6256 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
6260 mpte
= mptompte(mpp
);
6261 socket_lock_assert_owned(mptetoso(mpte
));
6262 mp_tp
= mpte
->mpte_mptcb
;
6264 notsent
= so
->so_snd
.sb_cc
;
6266 if ((notsent
== 0) ||
6267 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
6268 mp_tp
->mpt_notsent_lowat
)) {
6269 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
6270 "lowat %d notsent %d actual %llu \n",
6271 mp_tp
->mpt_notsent_lowat
, notsent
,
6272 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)),
6273 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
6277 /* When Nagle's algorithm is not disabled, it is better
6278 * to wakeup the client even before there is atleast one
6279 * maxseg of data to write.
6281 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
6283 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
6284 struct socket
*subf_so
= mpts
->mpts_socket
;
6285 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
6287 notsent
= so
->so_snd
.sb_cc
-
6288 (tp
->snd_nxt
- tp
->snd_una
);
6290 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
6291 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
6294 mptcplog((LOG_DEBUG
, "MPTCP Sender: lowat %d notsent %d"
6295 " nodelay false \n",
6296 mp_tp
->mpt_notsent_lowat
, notsent
),
6297 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
6305 mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
6308 #pragma unused(kctlref, sac, unitinfo)
6310 if (OSIncrementAtomic(&mptcp_kern_skt_inuse
) > 0) {
6311 os_log_error(mptcp_log_handle
, "%s: MPTCP kernel-control socket for Symptoms already open!", __func__
);
6314 mptcp_kern_skt_unit
= sac
->sc_unit
;
6320 mptcp_allow_uuid(uuid_t uuid
, int32_t rssi
)
6324 /* Iterate over all MPTCP connections */
6326 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6328 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6329 struct socket
*mp_so
= mpp
->mpp_socket
;
6330 struct mptses
*mpte
= mpp
->mpp_pcbe
;
6332 socket_lock(mp_so
, 1);
6334 if (mp_so
->so_flags
& SOF_DELEGATED
&&
6335 uuid_compare(uuid
, mp_so
->e_uuid
)) {
6337 } else if (!(mp_so
->so_flags
& SOF_DELEGATED
) &&
6338 uuid_compare(uuid
, mp_so
->last_uuid
)) {
6342 os_log(mptcp_log_handle
, "%s - %lx: Got allowance for useApp with rssi %d\n",
6343 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), rssi
);
6345 mpte
->mpte_flags
|= MPTE_ACCESS_GRANTED
;
6347 if (rssi
> MPTCP_TARGET_BASED_RSSI_THRESHOLD
) {
6348 mpte
->mpte_flags
|= MPTE_CELL_PROHIBITED
;
6351 mptcp_check_subflows_and_add(mpte
);
6352 mptcp_remove_subflows(mpte
);
6354 mpte
->mpte_flags
&= ~(MPTE_ACCESS_GRANTED
| MPTE_CELL_PROHIBITED
);
6357 socket_unlock(mp_so
, 1);
6360 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6364 mptcp_wifi_status_changed(void)
6368 /* Iterate over all MPTCP connections */
6370 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6372 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6373 struct socket
*mp_so
= mpp
->mpp_socket
;
6374 struct mptses
*mpte
= mpp
->mpp_pcbe
;
6376 socket_lock(mp_so
, 1);
6378 /* Only handover- and urgency-mode are purely driven by Symptom's Wi-Fi status */
6379 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
&&
6380 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_PURE_HANDOVER
&&
6381 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_TARGET_BASED
) {
6385 mptcp_check_subflows_and_add(mpte
);
6386 mptcp_check_subflows_and_remove(mpte
);
6389 socket_unlock(mp_so
, 1);
6392 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6395 struct mptcp_uuid_search_info
{
6398 boolean_t is_proc_found
;
6402 mptcp_find_proc_filter(proc_t p
, void *arg
)
6404 struct mptcp_uuid_search_info
*info
= (struct mptcp_uuid_search_info
*)arg
;
6407 if (info
->is_proc_found
) {
6412 * uuid_compare returns 0 if the uuids are matching, but the proc-filter
6413 * expects != 0 for a matching filter.
6415 found
= uuid_compare(p
->p_uuid
, info
->target_uuid
) == 0;
6417 info
->is_proc_found
= true;
6424 mptcp_find_proc_callout(proc_t p
, void * arg
)
6426 struct mptcp_uuid_search_info
*info
= (struct mptcp_uuid_search_info
*)arg
;
6428 if (uuid_compare(p
->p_uuid
, info
->target_uuid
) == 0) {
6429 info
->found_proc
= p
;
6430 return PROC_CLAIMED_DONE
;
6433 return PROC_RETURNED
;
6437 mptcp_find_proc(const uuid_t uuid
)
6439 struct mptcp_uuid_search_info info
;
6441 uuid_copy(info
.target_uuid
, uuid
);
6442 info
.found_proc
= PROC_NULL
;
6443 info
.is_proc_found
= false;
6445 proc_iterate(PROC_ALLPROCLIST
, mptcp_find_proc_callout
, &info
,
6446 mptcp_find_proc_filter
, &info
);
6448 return info
.found_proc
;
6452 mptcp_ask_symptoms(struct mptses
*mpte
)
6454 struct mptcp_symptoms_ask_uuid ask
;
6455 struct socket
*mp_so
;
6456 struct proc
*p
= PROC_NULL
;
6459 if (mptcp_kern_skt_unit
== 0) {
6460 os_log_error(mptcp_log_handle
, "%s - %lx: skt_unit is still 0\n",
6461 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
6465 mp_so
= mptetoso(mpte
);
6467 if (mp_so
->so_flags
& SOF_DELEGATED
) {
6468 if (mpte
->mpte_epid
!= 0) {
6469 p
= proc_find(mpte
->mpte_epid
);
6470 if (p
!= PROC_NULL
) {
6471 /* We found a pid, check its UUID */
6472 if (uuid_compare(mp_so
->e_uuid
, p
->p_uuid
)) {
6473 /* It's not the same - we need to look for the real proc */
6480 if (p
== PROC_NULL
) {
6481 p
= mptcp_find_proc(mp_so
->e_uuid
);
6482 if (p
== PROC_NULL
) {
6483 uuid_string_t uuid_string
;
6484 uuid_unparse(mp_so
->e_uuid
, uuid_string
);
6486 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for uuid %s\n",
6487 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), uuid_string
);
6491 mpte
->mpte_epid
= proc_pid(p
);
6494 pid
= mpte
->mpte_epid
;
6495 uuid_copy(ask
.uuid
, mp_so
->e_uuid
);
6497 pid
= mp_so
->last_pid
;
6500 if (p
== PROC_NULL
) {
6501 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
6502 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), pid
);
6506 uuid_copy(ask
.uuid
, mp_so
->last_uuid
);
6510 ask
.cmd
= MPTCP_SYMPTOMS_ASK_UUID
;
6512 prio
= proc_get_effective_task_policy(proc_task(p
), TASK_POLICY_ROLE
);
6514 if (prio
== TASK_BACKGROUND_APPLICATION
|| prio
== TASK_NONUI_APPLICATION
||
6515 prio
== TASK_DARWINBG_APPLICATION
) {
6516 ask
.priority
= MPTCP_SYMPTOMS_BACKGROUND
;
6517 } else if (prio
== TASK_FOREGROUND_APPLICATION
) {
6518 ask
.priority
= MPTCP_SYMPTOMS_FOREGROUND
;
6520 ask
.priority
= MPTCP_SYMPTOMS_UNKNOWN
;
6523 err
= ctl_enqueuedata(mptcp_kern_ctrl_ref
, mptcp_kern_skt_unit
,
6524 &ask
, sizeof(ask
), CTL_DATA_EOR
);
6526 os_log(mptcp_log_handle
, "%s - %lx: asked symptoms about pid %u, taskprio %u, prio %u, err %d\n",
6527 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), pid
, prio
, ask
.priority
, err
);
6534 mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
,
6537 #pragma unused(kctlref, kcunit, unitinfo)
6539 OSDecrementAtomic(&mptcp_kern_skt_inuse
);
6545 mptcp_symptoms_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
6546 mbuf_t m
, int flags
)
6548 #pragma unused(kctlref, unitinfo, flags)
6549 symptoms_advisory_t
*sa
= NULL
;
6551 if (kcunit
!= mptcp_kern_skt_unit
) {
6552 os_log_error(mptcp_log_handle
, "%s: kcunit %u is different from expected one %u\n",
6553 __func__
, kcunit
, mptcp_kern_skt_unit
);
6556 if (mbuf_pkthdr_len(m
) < sizeof(*sa
)) {
6561 if (mbuf_len(m
) < sizeof(*sa
)) {
6562 os_log_error(mptcp_log_handle
, "%s: mbuf is %lu but need %lu\n",
6563 __func__
, mbuf_len(m
), sizeof(*sa
));
6570 if (sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_USEAPP
) {
6571 os_log(mptcp_log_handle
, "%s: wifi new,old: %d,%d, cell new, old: %d,%d\n", __func__
,
6572 sa
->sa_wifi_status
, mptcp_advisory
.sa_wifi_status
,
6573 sa
->sa_cell_status
, mptcp_advisory
.sa_cell_status
);
6575 if (sa
->sa_wifi_status
!= mptcp_advisory
.sa_wifi_status
) {
6576 mptcp_advisory
.sa_wifi_status
= sa
->sa_wifi_status
;
6577 mptcp_wifi_status_changed();
6580 struct mptcp_symptoms_answer answer
;
6583 /* We temporarily allow different sizes for ease of submission */
6584 if (mbuf_len(m
) != sizeof(uuid_t
) + sizeof(*sa
) &&
6585 mbuf_len(m
) != sizeof(answer
)) {
6586 os_log_error(mptcp_log_handle
, "%s: mbuf is %lu but need %lu or %lu\n",
6587 __func__
, mbuf_len(m
), sizeof(uuid_t
) + sizeof(*sa
),
6593 memset(&answer
, 0, sizeof(answer
));
6595 err
= mbuf_copydata(m
, 0, mbuf_len(m
), &answer
);
6597 os_log_error(mptcp_log_handle
, "%s: mbuf_copydata returned %d\n", __func__
, err
);
6602 mptcp_allow_uuid(answer
.uuid
, answer
.rssi
);
6610 mptcp_control_register(void)
6612 /* Set up the advisory control socket */
6613 struct kern_ctl_reg mptcp_kern_ctl
;
6615 bzero(&mptcp_kern_ctl
, sizeof(mptcp_kern_ctl
));
6616 strlcpy(mptcp_kern_ctl
.ctl_name
, MPTCP_KERN_CTL_NAME
,
6617 sizeof(mptcp_kern_ctl
.ctl_name
));
6618 mptcp_kern_ctl
.ctl_connect
= mptcp_symptoms_ctl_connect
;
6619 mptcp_kern_ctl
.ctl_disconnect
= mptcp_symptoms_ctl_disconnect
;
6620 mptcp_kern_ctl
.ctl_send
= mptcp_symptoms_ctl_send
;
6621 mptcp_kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
;
6623 (void)ctl_register(&mptcp_kern_ctl
, &mptcp_kern_ctrl_ref
);
6627 * Three return-values:
6630 * -1 : WiFi-state is unknown
6633 mptcp_is_wifi_unusable_for_session(struct mptses
*mpte
)
6635 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6636 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
&&
6637 mptcp_advisory
.sa_wifi_status
) {
6638 return symptoms_is_wifi_lossy() ? 1 : 0;
6642 * If it's a first-party app and we don't have any info
6643 * about the Wi-Fi state, let's be pessimistic.
6647 if (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
) {
6652 * If we are target-based (meaning, we allow to be more lax on
6653 * the "unusable" target. We only *know* about the state once
6654 * we got the allowance from Symptoms (MPTE_ACCESS_GRANTED).
6656 * If RSSI is not bad enough, MPTE_CELL_PROHIBITED will then
6659 * In any other case (while in target-mode), consider WiFi bad
6660 * and we are going to ask for allowance from Symptoms anyway.
6662 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
6663 if (mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
&&
6664 mpte
->mpte_flags
& MPTE_CELL_PROHIBITED
) {
6676 symptoms_is_wifi_lossy(void)
6678 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_OK
) ? false : true;
6681 /* If TFO data is succesfully acked, it must be dropped from the mptcp so */
6683 mptcp_drop_tfo_data(struct mptses
*mpte
, struct mptsub
*mpts
)
6685 struct socket
*mp_so
= mptetoso(mpte
);
6686 struct socket
*so
= mpts
->mpts_socket
;
6687 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
6688 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
6690 /* If data was sent with SYN, rewind state */
6691 if (tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) {
6692 u_int64_t mp_droplen
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
6693 unsigned int tcp_droplen
= tp
->snd_una
- tp
->iss
- 1;
6695 VERIFY(mp_droplen
<= (UINT_MAX
));
6696 VERIFY(mp_droplen
>= tcp_droplen
);
6698 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
6699 mpts
->mpts_iss
+= tcp_droplen
;
6700 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
6702 if (mp_droplen
> tcp_droplen
) {
6703 /* handle partial TCP ack */
6704 mp_so
->so_flags1
|= SOF1_TFO_REWIND
;
6705 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
+ (mp_droplen
- tcp_droplen
);
6706 mp_droplen
= tcp_droplen
;
6708 /* all data on SYN was acked */
6709 mpts
->mpts_rel_seq
= 1;
6710 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
6712 mp_tp
->mpt_sndmax
-= tcp_droplen
;
6714 if (mp_droplen
!= 0) {
6715 VERIFY(mp_so
->so_snd
.sb_mb
!= NULL
);
6716 sbdrop(&mp_so
->so_snd
, (int)mp_droplen
);
6722 mptcp_freeq(struct mptcb
*mp_tp
)
6724 struct tseg_qent
*q
;
6727 while ((q
= LIST_FIRST(&mp_tp
->mpt_segq
)) != NULL
) {
6728 LIST_REMOVE(q
, tqe_q
);
6730 zfree(tcp_reass_zone
, q
);
6733 mp_tp
->mpt_reassqlen
= 0;
6738 mptcp_post_event(u_int32_t event_code
, int value
)
6740 struct kev_mptcp_data event_data
;
6741 struct kev_msg ev_msg
;
6743 memset(&ev_msg
, 0, sizeof(ev_msg
));
6745 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
6746 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
6747 ev_msg
.kev_subclass
= KEV_MPTCP_SUBCLASS
;
6748 ev_msg
.event_code
= event_code
;
6750 event_data
.value
= value
;
6752 ev_msg
.dv
[0].data_ptr
= &event_data
;
6753 ev_msg
.dv
[0].data_length
= sizeof(event_data
);
6755 return kev_post_msg(&ev_msg
);
6759 mptcp_set_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
)
6761 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
6764 /* First-party apps (Siri) don't flip the cellicon */
6765 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6769 /* Subflow is disappearing - don't set it on this one */
6770 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
)) {
6774 /* Fallen back connections are not triggering the cellicon */
6775 if (mpte
->mpte_mptcb
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
6779 /* Remember the last time we set the cellicon. Needed for debouncing */
6780 mpte
->mpte_last_cellicon_set
= tcp_now
;
6782 tp
->t_timer
[TCPT_CELLICON
] = OFFSET_FROM_START(tp
, MPTCP_CELLICON_TOGGLE_RATE
);
6783 tcp_sched_timers(tp
);
6785 if (mpts
->mpts_flags
& MPTSF_CELLICON_SET
&&
6786 mpte
->mpte_cellicon_increments
!= 0) {
6787 if (mptcp_cellicon_refcount
== 0) {
6788 os_log_error(mptcp_log_handle
, "%s - %lx: Cell should be set (count is %u), but it's zero!\n",
6789 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6791 /* Continue, so that the icon gets set... */
6794 * In this case, the cellicon is already set. No need to bump it
6802 /* When tearing down this subflow, we need to decrement the
6805 mpts
->mpts_flags
|= MPTSF_CELLICON_SET
;
6807 /* This counter, so that when a session gets destroyed we decrement
6808 * the reference counter by whatever is left
6810 mpte
->mpte_cellicon_increments
++;
6812 if (OSIncrementAtomic(&mptcp_cellicon_refcount
)) {
6813 /* If cellicon is already set, get out of here! */
6817 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 1);
6820 os_log_error(mptcp_log_handle
, "%s - %lx: Setting cellicon failed with %d\n",
6821 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
6823 os_log(mptcp_log_handle
, "%s - %lx: successfully set the cellicon\n",
6824 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
6829 mptcp_clear_cellicon(void)
6831 int error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 0);
6834 os_log_error(mptcp_log_handle
, "%s: Unsetting cellicon failed with %d\n",
6837 os_log(mptcp_log_handle
, "%s: successfully unset the cellicon\n",
6843 * Returns true if the icon has been flipped to WiFi.
6846 __mptcp_unset_cellicon(uint32_t val
)
6848 VERIFY(val
< INT32_MAX
);
6849 if (OSAddAtomic((int32_t)-val
, &mptcp_cellicon_refcount
) != 1) {
6853 mptcp_clear_cellicon();
6859 mptcp_unset_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
, uint32_t val
)
6861 /* First-party apps (Siri) don't flip the cellicon */
6862 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6866 if (mpte
->mpte_cellicon_increments
== 0) {
6867 /* This flow never used cell - get out of here! */
6871 if (mptcp_cellicon_refcount
== 0) {
6872 os_log_error(mptcp_log_handle
, "%s - %lx: Cell is off, but should be at least %u\n",
6873 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6879 if (!(mpts
->mpts_flags
& MPTSF_CELLICON_SET
)) {
6883 mpts
->mpts_flags
&= ~MPTSF_CELLICON_SET
;
6886 if (mpte
->mpte_cellicon_increments
< val
) {
6887 os_log_error(mptcp_log_handle
, "%s - %lx: Increments is %u but want to dec by %u.\n",
6888 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
, val
);
6889 val
= mpte
->mpte_cellicon_increments
;
6892 mpte
->mpte_cellicon_increments
-= val
;
6894 if (__mptcp_unset_cellicon(val
) == false) {
6898 /* All flows are gone - our counter should be at zero too! */
6899 if (mpte
->mpte_cellicon_increments
!= 0) {
6900 os_log_error(mptcp_log_handle
, "%s - %lx: Inconsistent state! Cell refcount is zero but increments are at %u\n",
6901 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6906 mptcp_reset_rexmit_state(struct tcpcb
*tp
)
6908 struct mptsub
*mpts
;
6917 so
= inp
->inp_socket
;
6922 if (!(so
->so_flags
& SOF_MP_SUBFLOW
)) {
6928 mpts
->mpts_flags
&= ~MPTSF_WRITE_STALL
;
6929 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
6933 mptcp_reset_keepalive(struct tcpcb
*tp
)
6935 struct mptsub
*mpts
= tp
->t_mpsub
;
6937 mpts
->mpts_flags
&= ~MPTSF_READ_STALL
;