2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/locks.h>
30 #include <kern/policy_internal.h>
31 #include <kern/zalloc.h>
35 #include <sys/domain.h>
36 #include <sys/kdebug.h>
37 #include <sys/kern_control.h>
38 #include <sys/kernel.h>
40 #include <sys/mcache.h>
41 #include <sys/param.h>
43 #include <sys/protosw.h>
44 #include <sys/resourcevar.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/systm.h>
51 #include <net/content_filter.h>
53 #include <net/if_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/in_var.h>
57 #include <netinet/tcp.h>
58 #include <netinet/tcp_fsm.h>
59 #include <netinet/tcp_seq.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/mptcp_var.h>
62 #include <netinet/mptcp.h>
63 #include <netinet/mptcp_opt.h>
64 #include <netinet/mptcp_seq.h>
65 #include <netinet/mptcp_timer.h>
66 #include <libkern/crypto/sha1.h>
68 #include <netinet6/in6_pcb.h>
69 #include <netinet6/ip6protosw.h>
71 #include <dev/random/randomdev.h>
74 * Notes on MPTCP implementation.
76 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
77 * communication domain. The structure mtcbinfo describes the MPTCP instance
78 * of a Multipath protocol in that domain. It is used to keep track of all
79 * MPTCP PCB instances in the system, and is protected by the global lock
82 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
83 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
84 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
85 * allocated from the same memory block, and each structure has a pointer
86 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
87 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
88 * PCB (mppcb) as well as the MPTCP Session (mptses).
90 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
92 * A functioning MPTCP Session consists of one or more subflow sockets. Each
93 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
94 * represented by the mptsub structure. Because each subflow requires access
95 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
96 * subflow. This gets decremented prior to the subflow's destruction.
98 * To handle events (read, write, control) from the subflows, we do direct
99 * upcalls into the specific function.
101 * The whole MPTCP connection is protected by a single lock, the MPTCP socket's
102 * lock. Incoming data on a subflow also ends up taking this single lock. To
103 * achieve the latter, tcp_lock/unlock has been changed to rather use the lock
104 * of the MPTCP-socket.
106 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
107 * work is done by the MPTCP garbage collector which is invoked on demand by
108 * the PF_MULTIPATH garbage collector. This process will take place once all
109 * of the subflows have been destroyed.
112 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
113 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
115 static uint32_t mptcp_gc(struct mppcbinfo
*);
116 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
117 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
118 static int mptcp_subflow_sosend(struct socket
*, struct sockaddr
*,
119 struct uio
*, struct mbuf
*, struct mbuf
*, int);
120 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
121 static void mptcp_subflow_eupcall1(struct socket
*, void *, uint32_t);
122 static void mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
);
123 static void mptcp_drop_tfo_data(struct mptses
*, struct mptsub
*);
125 static void mptcp_subflow_abort(struct mptsub
*, int);
127 static void mptcp_send_dfin(struct socket
*so
);
128 static void mptcp_set_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
);
129 static void mptcp_unset_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
, long val
);
130 static int mptcp_freeq(struct mptcb
*mp_tp
);
133 * Possible return values for subflow event handlers. Note that success
134 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
135 * indicate errors or actions which require immediate attention; they will
136 * prevent the rest of the handlers from processing their respective events
137 * until the next round of events processing.
140 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
141 MPTS_EVRET_OK
= 2, /* OK */
142 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
143 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
146 static ev_ret_t
mptcp_subflow_propagate_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
147 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
148 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
149 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
150 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
151 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
152 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
153 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
154 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
155 static ev_ret_t
mptcp_subflow_mpsuberror_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
156 static ev_ret_t
mptcp_subflow_adaptive_rtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
157 static ev_ret_t
mptcp_subflow_adaptive_wtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
159 static void mptcp_do_sha1(mptcp_key_t
*, char *);
160 static void mptcp_init_local_parms(struct mptses
*);
162 static unsigned int mptsub_zone_size
; /* size of mptsub */
163 static struct zone
*mptsub_zone
; /* zone for mptsub */
165 static unsigned int mptopt_zone_size
; /* size of mptopt */
166 static struct zone
*mptopt_zone
; /* zone for mptopt */
168 static unsigned int mpt_subauth_entry_size
; /* size of subf auth entry */
169 static struct zone
*mpt_subauth_zone
; /* zone of subf auth entry */
171 struct mppcbinfo mtcbinfo
;
173 SYSCTL_DECL(_net_inet
);
175 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "MPTCP");
177 uint32_t mptcp_dbg_area
= 31; /* more noise if greater than 1 */
178 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, dbg_area
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
179 &mptcp_dbg_area
, 0, "MPTCP debug area");
181 uint32_t mptcp_dbg_level
= 1;
182 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, dbg_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
183 &mptcp_dbg_level
, 0, "MPTCP debug level");
185 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
186 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
189 static int mptcp_alternate_port
= 0;
190 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, alternate_port
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
191 &mptcp_alternate_port
, 0, "Set alternate port for MPTCP connections");
193 static struct protosw mptcp_subflow_protosw
;
194 static struct pr_usrreqs mptcp_subflow_usrreqs
;
196 static struct ip6protosw mptcp_subflow_protosw6
;
197 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
200 static uint8_t mptcp_create_subflows_scheduled
;
202 typedef struct mptcp_subflow_event_entry
{
203 uint64_t sofilt_hint_mask
;
204 ev_ret_t (*sofilt_hint_ev_hdlr
)(
207 uint64_t *p_mpsofilt_hint
,
211 /* Using Symptoms Advisory to detect poor WiFi or poor Cell */
212 static kern_ctl_ref mptcp_kern_ctrl_ref
= NULL
;
213 static uint32_t mptcp_kern_skt_inuse
= 0;
214 static uint32_t mptcp_kern_skt_unit
;
215 static symptoms_advisory_t mptcp_advisory
;
217 uint32_t mptcp_cellicon_refcount
= 0;
218 #define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */
221 * XXX The order of the event handlers below is really
222 * really important. Think twice before changing it.
224 static mptsub_ev_entry_t mpsub_ev_entry_tbl
[] = {
226 .sofilt_hint_mask
= SO_FILT_HINT_MP_SUB_ERROR
,
227 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpsuberror_ev
,
230 .sofilt_hint_mask
= SO_FILT_HINT_MPCANTRCVMORE
,
231 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpcantrcvmore_ev
,
234 .sofilt_hint_mask
= SO_FILT_HINT_MPFAILOVER
,
235 .sofilt_hint_ev_hdlr
= mptcp_subflow_failover_ev
,
238 .sofilt_hint_mask
= SO_FILT_HINT_CONNRESET
,
239 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
242 .sofilt_hint_mask
= SO_FILT_HINT_MUSTRST
,
243 .sofilt_hint_ev_hdlr
= mptcp_subflow_mustrst_ev
,
246 .sofilt_hint_mask
= SO_FILT_HINT_CANTRCVMORE
,
247 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
250 .sofilt_hint_mask
= SO_FILT_HINT_TIMEOUT
,
251 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
254 .sofilt_hint_mask
= SO_FILT_HINT_NOSRCADDR
,
255 .sofilt_hint_ev_hdlr
= mptcp_subflow_nosrcaddr_ev
,
258 .sofilt_hint_mask
= SO_FILT_HINT_IFDENIED
,
259 .sofilt_hint_ev_hdlr
= mptcp_subflow_ifdenied_ev
,
262 .sofilt_hint_mask
= SO_FILT_HINT_CONNECTED
,
263 .sofilt_hint_ev_hdlr
= mptcp_subflow_connected_ev
,
266 .sofilt_hint_mask
= SO_FILT_HINT_MPSTATUS
,
267 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpstatus_ev
,
270 .sofilt_hint_mask
= SO_FILT_HINT_DISCONNECTED
,
271 .sofilt_hint_ev_hdlr
= mptcp_subflow_disconnected_ev
,
274 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_RTIMO
,
275 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_rtimo_ev
,
278 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_WTIMO
,
279 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_wtimo_ev
,
283 os_log_t mptcp_log_handle
;
286 * Protocol pr_init callback.
289 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
292 static int mptcp_initialized
= 0;
295 struct ip6protosw
*prp6
;
298 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
| PR_ATTACHED
)) == PR_ATTACHED
);
300 /* do this only once */
301 if (mptcp_initialized
) {
304 mptcp_initialized
= 1;
306 mptcp_advisory
.sa_wifi_status
= SYMPTOMS_ADVISORY_WIFI_OK
;
309 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
310 * we must be able to find IPPROTO_TCP entries for both.
312 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
314 bcopy(prp
, &mptcp_subflow_protosw
, sizeof(*prp
));
315 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
316 sizeof(mptcp_subflow_usrreqs
));
317 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
318 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
319 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
320 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
321 mptcp_subflow_usrreqs
.pru_sosend
= mptcp_subflow_sosend
;
322 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
324 * Socket filters shouldn't attach/detach to/from this protosw
325 * since pr_protosw is to be used instead, which points to the
326 * real protocol; if they do, it is a bug and we should panic.
328 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
329 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
330 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
331 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
334 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
335 IPPROTO_TCP
, SOCK_STREAM
);
336 VERIFY(prp6
!= NULL
);
337 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof(*prp6
));
338 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
339 sizeof(mptcp_subflow_usrreqs6
));
340 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
341 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
342 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
343 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
344 mptcp_subflow_usrreqs6
.pru_sosend
= mptcp_subflow_sosend
;
345 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
347 * Socket filters shouldn't attach/detach to/from this protosw
348 * since pr_protosw is to be used instead, which points to the
349 * real protocol; if they do, it is a bug and we should panic.
351 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
352 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
353 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
354 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
357 bzero(&mtcbinfo
, sizeof(mtcbinfo
));
358 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
359 mtcbinfo
.mppi_size
= sizeof(struct mpp_mtp
);
360 if ((mtcbinfo
.mppi_zone
= zinit(mtcbinfo
.mppi_size
,
361 1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) {
362 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
);
365 zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
);
366 zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
);
368 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
369 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
370 mtcbinfo
.mppi_lock_grp_attr
);
371 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
372 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
373 mtcbinfo
.mppi_lock_attr
);
375 mtcbinfo
.mppi_gc
= mptcp_gc
;
376 mtcbinfo
.mppi_timer
= mptcp_timer
;
378 /* attach to MP domain for garbage collection to take place */
379 mp_pcbinfo_attach(&mtcbinfo
);
381 mptsub_zone_size
= sizeof(struct mptsub
);
382 if ((mptsub_zone
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
,
383 8192, "mptsub")) == NULL
) {
384 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
);
387 zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
);
388 zone_change(mptsub_zone
, Z_EXPAND
, TRUE
);
390 mptopt_zone_size
= sizeof(struct mptopt
);
391 if ((mptopt_zone
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
,
392 1024, "mptopt")) == NULL
) {
393 panic("%s: unable to allocate MPTCP option zone\n", __func__
);
396 zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
);
397 zone_change(mptopt_zone
, Z_EXPAND
, TRUE
);
399 mpt_subauth_entry_size
= sizeof(struct mptcp_subf_auth_entry
);
400 if ((mpt_subauth_zone
= zinit(mpt_subauth_entry_size
,
401 1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) {
402 panic("%s: unable to allocate MPTCP address auth zone \n",
406 zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
);
407 zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
);
409 mptcp_log_handle
= os_log_create("com.apple.xnu.net.mptcp", "mptcp");
413 mptcpstats_get_index_by_ifindex(struct mptcp_itf_stats
*stats
, int ifindex
, boolean_t create
)
417 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
418 if (create
&& stats
[i
].ifindex
== IFSCOPE_NONE
) {
425 if (stats
[i
].ifindex
== ifindex
) {
432 stats
[index
].ifindex
= ifindex
;
439 mptcpstats_get_index(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
441 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
445 os_log_error(mptcp_log_handle
, "%s - %lx: no ifp on subflow, state %u flags %#x\n",
446 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpts
->mpts_mpte
),
447 sototcpcb(mpts
->mpts_socket
)->t_state
, mpts
->mpts_flags
);
451 index
= mptcpstats_get_index_by_ifindex(stats
, ifp
->if_index
, true);
454 if (stats
[index
].is_expensive
== 0) {
455 stats
[index
].is_expensive
= IFNET_IS_CELLULAR(ifp
);
463 mptcpstats_inc_switch(struct mptses
*mpte
, const struct mptsub
*mpts
)
467 tcpstat
.tcps_mp_switches
++;
468 mpte
->mpte_subflow_switches
++;
470 index
= mptcpstats_get_index(mpte
->mpte_itfstats
, mpts
);
473 mpte
->mpte_itfstats
[index
].switches
++;
478 * Flushes all recorded socket options from an MP socket.
481 mptcp_flush_sopts(struct mptses
*mpte
)
483 struct mptopt
*mpo
, *tmpo
;
485 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
486 mptcp_sopt_remove(mpte
, mpo
);
487 mptcp_sopt_free(mpo
);
489 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
493 * Create an MPTCP session, called as a result of opening a MPTCP socket.
496 mptcp_session_create(struct mppcb
*mpp
)
498 struct mppcbinfo
*mppi
;
503 mppi
= mpp
->mpp_pcbinfo
;
504 VERIFY(mppi
!= NULL
);
506 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
507 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
509 /* MPTCP Multipath PCB Extension */
510 bzero(mpte
, sizeof(*mpte
));
511 VERIFY(mpp
->mpp_pcbe
== NULL
);
512 mpp
->mpp_pcbe
= mpte
;
513 mpte
->mpte_mppcb
= mpp
;
514 mpte
->mpte_mptcb
= mp_tp
;
516 TAILQ_INIT(&mpte
->mpte_sopts
);
517 TAILQ_INIT(&mpte
->mpte_subflows
);
518 mpte
->mpte_associd
= SAE_ASSOCID_ANY
;
519 mpte
->mpte_connid_last
= SAE_CONNID_ANY
;
521 mptcp_init_urgency_timer(mpte
);
523 mpte
->mpte_itfinfo
= &mpte
->_mpte_itfinfo
[0];
524 mpte
->mpte_itfinfo_size
= MPTE_ITFINFO_SIZE
;
526 if (mptcp_alternate_port
) {
527 mpte
->mpte_alternate_port
= htons(mptcp_alternate_port
);
530 mpte
->mpte_last_cellicon_set
= tcp_now
;
532 /* MPTCP Protocol Control Block */
533 bzero(mp_tp
, sizeof(*mp_tp
));
534 mp_tp
->mpt_mpte
= mpte
;
535 mp_tp
->mpt_state
= MPTCPS_CLOSED
;
537 DTRACE_MPTCP1(session__create
, struct mppcb
*, mpp
);
543 mptcp_get_session_dst(struct mptses
*mpte
, boolean_t ipv6
, boolean_t ipv4
)
545 if (!(mpte
->mpte_flags
& MPTE_UNICAST_IP
)) {
546 return &mpte
->mpte_dst
;
549 if (ipv6
&& mpte
->mpte_dst_unicast_v6
.sin6_family
== AF_INET6
) {
550 return (struct sockaddr
*)&mpte
->mpte_dst_unicast_v6
;
553 if (ipv4
&& mpte
->mpte_dst_unicast_v4
.sin_family
== AF_INET
) {
554 return (struct sockaddr
*)&mpte
->mpte_dst_unicast_v4
;
557 /* The interface has neither IPv4 nor IPv6 routes. Give our best guess,
558 * meaning we prefer IPv6 over IPv4.
560 if (mpte
->mpte_dst_unicast_v6
.sin6_family
== AF_INET6
) {
561 return (struct sockaddr
*)&mpte
->mpte_dst_unicast_v6
;
564 if (mpte
->mpte_dst_unicast_v4
.sin_family
== AF_INET
) {
565 return (struct sockaddr
*)&mpte
->mpte_dst_unicast_v4
;
568 /* We don't yet have a unicast IP */
573 mptcpstats_get_bytes(struct mptses
*mpte
, boolean_t initial_cell
,
574 uint64_t *cellbytes
, uint64_t *allbytes
)
576 int64_t mycellbytes
= 0;
577 uint64_t myallbytes
= 0;
580 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
581 if (mpte
->mpte_itfstats
[i
].is_expensive
) {
582 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
583 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
586 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
587 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
591 mycellbytes
-= mpte
->mpte_init_txbytes
;
592 mycellbytes
-= mpte
->mpte_init_rxbytes
;
595 if (mycellbytes
< 0) {
596 os_log_error(mptcp_log_handle
, "%s - %lx: cellbytes is %lld\n",
597 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mycellbytes
);
601 *cellbytes
= mycellbytes
;
602 *allbytes
= myallbytes
;
607 mptcpstats_session_wrapup(struct mptses
*mpte
)
609 boolean_t cell
= mpte
->mpte_initial_cell
;
611 switch (mpte
->mpte_svctype
) {
612 case MPTCP_SVCTYPE_HANDOVER
:
613 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
614 tcpstat
.tcps_mptcp_fp_handover_attempt
++;
616 if (cell
&& mpte
->mpte_handshake_success
) {
617 tcpstat
.tcps_mptcp_fp_handover_success_cell
++;
619 if (mpte
->mpte_used_wifi
) {
620 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
622 } else if (mpte
->mpte_handshake_success
) {
623 tcpstat
.tcps_mptcp_fp_handover_success_wifi
++;
625 if (mpte
->mpte_used_cell
) {
626 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
630 tcpstat
.tcps_mptcp_handover_attempt
++;
632 if (cell
&& mpte
->mpte_handshake_success
) {
633 tcpstat
.tcps_mptcp_handover_success_cell
++;
635 if (mpte
->mpte_used_wifi
) {
636 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
638 } else if (mpte
->mpte_handshake_success
) {
639 tcpstat
.tcps_mptcp_handover_success_wifi
++;
641 if (mpte
->mpte_used_cell
) {
642 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
647 if (mpte
->mpte_handshake_success
) {
651 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
653 tcpstat
.tcps_mptcp_handover_cell_bytes
+= cellbytes
;
654 tcpstat
.tcps_mptcp_handover_all_bytes
+= allbytes
;
657 case MPTCP_SVCTYPE_INTERACTIVE
:
658 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
659 tcpstat
.tcps_mptcp_fp_interactive_attempt
++;
661 if (mpte
->mpte_handshake_success
) {
662 tcpstat
.tcps_mptcp_fp_interactive_success
++;
664 if (!cell
&& mpte
->mpte_used_cell
) {
665 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
669 tcpstat
.tcps_mptcp_interactive_attempt
++;
671 if (mpte
->mpte_handshake_success
) {
672 tcpstat
.tcps_mptcp_interactive_success
++;
674 if (!cell
&& mpte
->mpte_used_cell
) {
675 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
680 if (mpte
->mpte_handshake_success
) {
684 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
686 tcpstat
.tcps_mptcp_interactive_cell_bytes
+= cellbytes
;
687 tcpstat
.tcps_mptcp_interactive_all_bytes
+= allbytes
;
690 case MPTCP_SVCTYPE_AGGREGATE
:
691 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
692 tcpstat
.tcps_mptcp_fp_aggregate_attempt
++;
694 if (mpte
->mpte_handshake_success
) {
695 tcpstat
.tcps_mptcp_fp_aggregate_success
++;
698 tcpstat
.tcps_mptcp_aggregate_attempt
++;
700 if (mpte
->mpte_handshake_success
) {
701 tcpstat
.tcps_mptcp_aggregate_success
++;
705 if (mpte
->mpte_handshake_success
) {
709 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
711 tcpstat
.tcps_mptcp_aggregate_cell_bytes
+= cellbytes
;
712 tcpstat
.tcps_mptcp_aggregate_all_bytes
+= allbytes
;
717 if (cell
&& mpte
->mpte_handshake_success
&& mpte
->mpte_used_wifi
) {
718 tcpstat
.tcps_mptcp_back_to_wifi
++;
721 if (mpte
->mpte_triggered_cell
) {
722 tcpstat
.tcps_mptcp_triggered_cell
++;
727 * Destroy an MPTCP session.
730 mptcp_session_destroy(struct mptses
*mpte
)
732 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
734 VERIFY(mp_tp
!= NULL
);
735 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
737 mptcpstats_session_wrapup(mpte
);
738 mptcp_unset_cellicon(mpte
, NULL
, mpte
->mpte_cellicon_increments
);
739 mptcp_flush_sopts(mpte
);
741 if (mpte
->mpte_itfinfo_size
> MPTE_ITFINFO_SIZE
) {
742 _FREE(mpte
->mpte_itfinfo
, M_TEMP
);
744 mpte
->mpte_itfinfo
= NULL
;
746 m_freem_list(mpte
->mpte_reinjectq
);
748 os_log(mptcp_log_handle
, "%s - %lx: Destroying session\n",
749 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
753 mptcp_ok_to_create_subflows(struct mptcb
*mp_tp
)
755 return mp_tp
->mpt_state
>= MPTCPS_ESTABLISHED
&&
756 mp_tp
->mpt_state
< MPTCPS_FIN_WAIT_1
&&
757 !(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
);
761 mptcp_synthesize_nat64(struct in6_addr
*addr
, uint32_t len
,
762 const struct in_addr
*addrv4
)
764 static const struct in6_addr well_known_prefix
= {
765 .__u6_addr
.__u6_addr8
= {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
766 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
767 0x00, 0x00, 0x00, 0x00},
769 const char *ptrv4
= (const char *)addrv4
;
770 char buf
[MAX_IPv6_STR_LEN
];
771 char *ptr
= (char *)addr
;
773 if (IN_ZERONET(ntohl(addrv4
->s_addr
)) || // 0.0.0.0/8 Source hosts on local network
774 IN_LOOPBACK(ntohl(addrv4
->s_addr
)) || // 127.0.0.0/8 Loopback
775 IN_LINKLOCAL(ntohl(addrv4
->s_addr
)) || // 169.254.0.0/16 Link Local
776 IN_DS_LITE(ntohl(addrv4
->s_addr
)) || // 192.0.0.0/29 DS-Lite
777 IN_6TO4_RELAY_ANYCAST(ntohl(addrv4
->s_addr
)) || // 192.88.99.0/24 6to4 Relay Anycast
778 IN_MULTICAST(ntohl(addrv4
->s_addr
)) || // 224.0.0.0/4 Multicast
779 INADDR_BROADCAST
== addrv4
->s_addr
) { // 255.255.255.255/32 Limited Broadcast
783 /* Check for the well-known prefix */
784 if (len
== NAT64_PREFIX_LEN_96
&&
785 IN6_ARE_ADDR_EQUAL(addr
, &well_known_prefix
)) {
786 if (IN_PRIVATE(ntohl(addrv4
->s_addr
)) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use
787 IN_SHARED_ADDRESS_SPACE(ntohl(addrv4
->s_addr
))) { // 100.64.0.0/10 Shared Address Space
793 case NAT64_PREFIX_LEN_96
:
794 memcpy(ptr
+ 12, ptrv4
, 4);
796 case NAT64_PREFIX_LEN_64
:
797 memcpy(ptr
+ 9, ptrv4
, 4);
799 case NAT64_PREFIX_LEN_56
:
800 memcpy(ptr
+ 7, ptrv4
, 1);
801 memcpy(ptr
+ 9, ptrv4
+ 1, 3);
803 case NAT64_PREFIX_LEN_48
:
804 memcpy(ptr
+ 6, ptrv4
, 2);
805 memcpy(ptr
+ 9, ptrv4
+ 2, 2);
807 case NAT64_PREFIX_LEN_40
:
808 memcpy(ptr
+ 5, ptrv4
, 3);
809 memcpy(ptr
+ 9, ptrv4
+ 3, 1);
811 case NAT64_PREFIX_LEN_32
:
812 memcpy(ptr
+ 4, ptrv4
, 4);
815 panic("NAT64-prefix len is wrong: %u\n", len
);
818 os_log_info(mptcp_log_handle
, "%s: nat64prefix-len %u synthesized %s\n",
820 inet_ntop(AF_INET6
, (void *)addr
, buf
, sizeof(buf
)));
826 mptcp_trigger_cell_bringup(struct mptses
*mpte
)
828 struct socket
*mp_so
= mptetoso(mpte
);
830 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
831 uuid_string_t uuidstr
;
834 socket_unlock(mp_so
, 0);
835 err
= necp_client_assert_bb_radio_manager(mpsotomppcb(mp_so
)->necp_client_uuid
,
837 socket_lock(mp_so
, 0);
840 mpte
->mpte_triggered_cell
= 1;
843 uuid_unparse_upper(mpsotomppcb(mp_so
)->necp_client_uuid
, uuidstr
);
844 os_log_info(mptcp_log_handle
, "%s - %lx: asked irat to bringup cell for uuid %s, err %d\n",
845 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), uuidstr
, err
);
847 os_log_info(mptcp_log_handle
, "%s - %lx: UUID is already null\n",
848 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
853 mptcp_subflow_disconnecting(struct mptsub
*mpts
)
855 /* Split out in if-statements for readability. Compile should
858 if (mpts
->mpts_socket
->so_state
& SS_ISDISCONNECTED
) {
862 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
| MPTSF_CLOSE_REQD
)) {
866 if (sototcpcb(mpts
->mpts_socket
)->t_state
== TCPS_CLOSED
) {
874 mptcp_check_subflows_and_add(struct mptses
*mpte
)
876 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
877 boolean_t cellular_viable
= FALSE
;
878 boolean_t want_cellular
= TRUE
;
881 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
882 os_log_debug(mptcp_log_handle
, "%s - %lx: not a good time for subflows, state %u flags %#x",
883 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
887 if (mptcp_get_session_dst(mpte
, false, false) == NULL
) {
891 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
892 boolean_t need_to_ask_symptoms
= FALSE
, found
= FALSE
;
893 struct mpt_itf_info
*info
;
894 struct sockaddr_in6 nat64pre
;
895 struct sockaddr
*dst
;
900 info
= &mpte
->mpte_itfinfo
[i
];
902 ifindex
= info
->ifindex
;
903 if (ifindex
== IFSCOPE_NONE
) {
907 os_log(mptcp_log_handle
, "%s - %lx: itf %u no support %u hasv4 %u has v6 %u hasnat64 %u\n",
908 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), info
->ifindex
, info
->no_mptcp_support
,
909 info
->has_v4_conn
, info
->has_v6_conn
, info
->has_nat64_conn
);
911 if (info
->no_mptcp_support
) {
915 ifnet_head_lock_shared();
916 ifp
= ifindex2ifnet
[ifindex
];
923 if (IFNET_IS_CELLULAR(ifp
)) {
924 cellular_viable
= TRUE
;
927 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
928 const struct ifnet
*subifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
929 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
931 if (subifp
== NULL
) {
936 * If there is at least one functioning subflow on WiFi
937 * and we are checking for the cell interface, then
938 * we always need to ask symptoms for permission as
939 * cell is triggered even if WiFi is available.
941 if (!IFNET_IS_CELLULAR(subifp
) &&
942 !mptcp_subflow_disconnecting(mpts
) &&
943 IFNET_IS_CELLULAR(ifp
)) {
944 need_to_ask_symptoms
= TRUE
;
948 * In Handover mode, only create cell subflow if
949 * 1. Wi-Fi Assist is active
950 * 2. Symptoms marked WiFi as weak
951 * 3. We are experiencing RTOs or we are not sending data.
953 * This covers the scenario, where:
954 * 1. We send and get retransmission timeouts (thus,
955 * we confirmed that WiFi is indeed bad).
956 * 2. We are not sending and the server tries to send.
957 * Establshing a cell-subflow gives the server a
958 * chance to send us some data over cell if WiFi
959 * is dead. We establish the subflow with the
960 * backup-bit set, so the server is not allowed to
961 * send on this subflow as long as WiFi is providing
964 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
&&
965 !IFNET_IS_CELLULAR(subifp
) &&
966 !mptcp_subflow_disconnecting(mpts
) &&
967 (mptcp_is_wifi_unusable_for_session(mpte
) == 0 ||
968 (tp
->t_rxtshift
< mptcp_fail_thresh
* 2 && mptetoso(mpte
)->so_snd
.sb_cc
))) {
969 os_log_debug(mptcp_log_handle
,
970 "%s - %lx: handover, wifi state %d rxt %u first-party %u sb_cc %u ifindex %u this %u rtt %u rttvar %u rto %u\n",
971 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
972 mptcp_is_wifi_unusable_for_session(mpte
),
974 !!(mpte
->mpte_flags
& MPTE_FIRSTPARTY
),
975 mptetoso(mpte
)->so_snd
.sb_cc
,
976 ifindex
, subifp
->if_index
,
977 tp
->t_srtt
>> TCP_RTT_SHIFT
,
978 tp
->t_rttvar
>> TCP_RTTVAR_SHIFT
,
982 /* We found a proper subflow on WiFi - no need for cell */
983 want_cellular
= FALSE
;
985 } else if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
986 uint64_t time_now
= mach_continuous_time();
988 os_log(mptcp_log_handle
,
989 "%s - %lx: target-based: %llu now %llu unusable? %d cell %u sostat %#x mpts_flags %#x tcp-state %u\n",
990 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_time_target
,
991 time_now
, mptcp_is_wifi_unusable_for_session(mpte
),
992 IFNET_IS_CELLULAR(subifp
), mpts
->mpts_socket
->so_state
,
993 mpts
->mpts_flags
, sototcpcb(mpts
->mpts_socket
)->t_state
);
995 if (!IFNET_IS_CELLULAR(subifp
) &&
996 !mptcp_subflow_disconnecting(mpts
) &&
997 (mpte
->mpte_time_target
== 0 ||
998 (int64_t)(mpte
->mpte_time_target
- time_now
) > 0 ||
999 !mptcp_is_wifi_unusable_for_session(mpte
))) {
1002 want_cellular
= FALSE
;
1006 os_log_debug(mptcp_log_handle
,
1007 "%s - %lx: svc %u cell %u flags %#x unusable %d rtx %u first %u sbcc %u rtt %u rttvar %u rto %u\n",
1008 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1009 mpte
->mpte_svctype
, IFNET_IS_CELLULAR(subifp
), mpts
->mpts_flags
,
1010 mptcp_is_wifi_unusable_for_session(mpte
), tp
->t_rxtshift
,
1011 !!(mpte
->mpte_flags
& MPTE_FIRSTPARTY
), mptetoso(mpte
)->so_snd
.sb_cc
,
1012 tp
->t_srtt
>> TCP_RTT_SHIFT
,
1013 tp
->t_rttvar
>> TCP_RTTVAR_SHIFT
,
1017 if (subifp
->if_index
== ifindex
&&
1018 !mptcp_subflow_disconnecting(mpts
)) {
1020 * We found a subflow on this interface.
1021 * No need to create a new one.
1032 if (need_to_ask_symptoms
&&
1033 !(mpte
->mpte_flags
& MPTE_FIRSTPARTY
) &&
1034 !(mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
) &&
1035 mptcp_developer_mode
== 0) {
1036 mptcp_ask_symptoms(mpte
);
1040 dst
= mptcp_get_session_dst(mpte
, info
->has_v6_conn
, info
->has_v4_conn
);
1042 if (dst
->sa_family
== AF_INET
&&
1043 !info
->has_v4_conn
&& info
->has_nat64_conn
) {
1044 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
1047 bzero(&nat64pre
, sizeof(struct sockaddr_in6
));
1049 error
= ifnet_get_nat64prefix(ifp
, nat64prefixes
);
1051 os_log_error(mptcp_log_handle
, "%s - %lx: no NAT64-prefix on itf %s, error %d\n",
1052 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), ifp
->if_name
, error
);
1056 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
1057 if (nat64prefixes
[j
].prefix_len
!= 0) {
1062 VERIFY(j
< NAT64_MAX_NUM_PREFIXES
);
1064 error
= mptcp_synthesize_nat64(&nat64prefixes
[j
].ipv6_prefix
,
1065 nat64prefixes
[j
].prefix_len
,
1066 &((struct sockaddr_in
*)(void *)dst
)->sin_addr
);
1068 os_log_info(mptcp_log_handle
, "%s - %lx: cannot synthesize this addr\n",
1069 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
1073 memcpy(&nat64pre
.sin6_addr
,
1074 &nat64prefixes
[j
].ipv6_prefix
,
1075 sizeof(nat64pre
.sin6_addr
));
1076 nat64pre
.sin6_len
= sizeof(struct sockaddr_in6
);
1077 nat64pre
.sin6_family
= AF_INET6
;
1078 nat64pre
.sin6_port
= ((struct sockaddr_in
*)(void *)dst
)->sin_port
;
1079 nat64pre
.sin6_flowinfo
= 0;
1080 nat64pre
.sin6_scope_id
= 0;
1082 dst
= (struct sockaddr
*)&nat64pre
;
1085 /* Initial subflow started on a NAT64'd address? */
1086 if (!(mpte
->mpte_flags
& MPTE_UNICAST_IP
) &&
1087 mpte
->mpte_dst
.sa_family
== AF_INET6
&&
1088 mpte
->mpte_dst_v4_nat64
.sin_family
== AF_INET
) {
1089 dst
= (struct sockaddr
*)&mpte
->mpte_dst_v4_nat64
;
1092 if (dst
->sa_family
== AF_INET
&& !info
->has_v4_conn
) {
1095 if (dst
->sa_family
== AF_INET6
&& !info
->has_v6_conn
) {
1099 mptcp_subflow_add(mpte
, NULL
, dst
, ifindex
, NULL
);
1102 if (!cellular_viable
&& want_cellular
) {
1103 /* Trigger Cell Bringup */
1104 mptcp_trigger_cell_bringup(mpte
);
1109 mptcp_remove_cell_subflows(struct mptses
*mpte
)
1111 struct mptsub
*mpts
, *tmpts
;
1112 boolean_t found
= false;
1114 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1115 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1117 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
)) {
1121 /* We have a functioning subflow on WiFi. No need for cell! */
1122 if (mpts
->mpts_flags
& MPTSF_CONNECTED
&&
1123 !mptcp_subflow_disconnecting(mpts
)) {
1128 /* Didn't found functional sub on WiFi - stay on cell */
1133 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1134 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1136 /* Only remove cellular subflows */
1137 if (ifp
== NULL
|| !IFNET_IS_CELLULAR(ifp
)) {
1141 os_log(mptcp_log_handle
, "%s - %lx: removing cell subflow\n",
1142 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
1144 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1150 /* Returns true if it removed a subflow on cell */
1152 mptcp_handover_subflows_remove(struct mptses
*mpte
)
1154 int wifi_unusable
= mptcp_is_wifi_unusable_for_session(mpte
);
1155 boolean_t found_working_subflow
= false;
1156 struct mptsub
*mpts
;
1159 * Look for a subflow that is on a non-cellular interface
1160 * and actually works (aka, no retransmission timeout).
1162 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1163 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1167 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
)) {
1171 so
= mpts
->mpts_socket
;
1174 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
) ||
1175 tp
->t_state
!= TCPS_ESTABLISHED
) {
1179 os_log_debug(mptcp_log_handle
, "%s - %lx: rxt %u sb_cc %u unusable %d\n",
1180 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), tp
->t_rxtshift
, mptetoso(mpte
)->so_snd
.sb_cc
, wifi_unusable
);
1182 /* Is this subflow in good condition? */
1183 if (tp
->t_rxtshift
== 0 && mptetoso(mpte
)->so_snd
.sb_cc
) {
1184 found_working_subflow
= true;
1187 /* Or WiFi is fine */
1188 if (!wifi_unusable
) {
1189 found_working_subflow
= true;
1194 * Couldn't find a working subflow, let's not remove those on a cellular
1197 if (!found_working_subflow
) {
1201 mptcp_remove_cell_subflows(mpte
);
1205 mptcp_targetbased_subflows_remove(struct mptses
*mpte
)
1207 uint64_t time_now
= mach_continuous_time();
1209 if (mpte
->mpte_time_target
!= 0 &&
1210 (int64_t)(mpte
->mpte_time_target
- time_now
) <= 0 &&
1211 mptcp_is_wifi_unusable_for_session(mpte
)) {
1212 /* WiFi is bad and we are below the target - don't remove any subflows */
1216 mptcp_remove_cell_subflows(mpte
);
1220 * Based on the MPTCP Service-type and the state of the subflows, we
1221 * will destroy subflows here.
1224 mptcp_check_subflows_and_remove(struct mptses
*mpte
)
1226 if (!mptcp_ok_to_create_subflows(mpte
->mpte_mptcb
)) {
1230 socket_lock_assert_owned(mptetoso(mpte
));
1232 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
) {
1233 mptcp_handover_subflows_remove(mpte
);
1236 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
1237 mptcp_targetbased_subflows_remove(mpte
);
1242 mptcp_remove_subflows(struct mptses
*mpte
)
1244 struct mptsub
*mpts
, *tmpts
;
1246 if (!mptcp_ok_to_create_subflows(mpte
->mpte_mptcb
)) {
1250 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1251 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1252 boolean_t found
= false;
1256 if (mpts
->mpts_flags
& MPTSF_CLOSE_REQD
) {
1257 mpts
->mpts_flags
&= ~MPTSF_CLOSE_REQD
;
1259 os_log(mptcp_log_handle
, "%s - %lx: itf %u close_reqd last itf %d\n",
1260 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_ifscope
,
1261 ifp
? ifp
->if_index
: -1);
1262 soevent(mpts
->mpts_socket
,
1263 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
1268 if (ifp
== NULL
&& mpts
->mpts_ifscope
== IFSCOPE_NONE
) {
1273 ifindex
= ifp
->if_index
;
1275 ifindex
= mpts
->mpts_ifscope
;
1278 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
1279 if (mpte
->mpte_itfinfo
[i
].ifindex
== IFSCOPE_NONE
) {
1283 if (mpte
->mpte_itfinfo
[i
].ifindex
== ifindex
) {
1284 if (mpts
->mpts_dst
.sa_family
== AF_INET6
&&
1285 (mpte
->mpte_itfinfo
[i
].has_v6_conn
|| mpte
->mpte_itfinfo
[i
].has_nat64_conn
)) {
1290 if (mpts
->mpts_dst
.sa_family
== AF_INET
&&
1291 mpte
->mpte_itfinfo
[i
].has_v4_conn
) {
1299 os_log(mptcp_log_handle
, "%s - %lx: itf %u killing %#x\n",
1300 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1301 ifindex
, mpts
->mpts_flags
);
1303 soevent(mpts
->mpts_socket
,
1304 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
1310 mptcp_create_subflows(__unused
void *arg
)
1315 * Start with clearing, because we might be processing connections
1316 * while a new event comes in.
1318 if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled
)) {
1319 os_log_error(mptcp_log_handle
, "%s: bit was already cleared!\n", __func__
);
1322 /* Iterate over all MPTCP connections */
1324 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
1326 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
1327 struct socket
*mp_so
= mpp
->mpp_socket
;
1328 struct mptses
*mpte
= mpp
->mpp_pcbe
;
1330 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1334 socket_lock(mp_so
, 1);
1335 VERIFY(mp_so
->so_usecount
> 0);
1337 mpp
->mpp_flags
&= ~MPP_CREATE_SUBFLOWS
;
1339 mptcp_check_subflows_and_add(mpte
);
1340 mptcp_remove_subflows(mpte
);
1342 mp_so
->so_usecount
--; /* See mptcp_sched_create_subflows */
1343 socket_unlock(mp_so
, 1);
1346 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
1350 * We need this because we are coming from an NECP-event. This event gets posted
1351 * while holding NECP-locks. The creation of the subflow however leads us back
1352 * into NECP (e.g., to add the necp_cb and also from tcp_connect).
1353 * So, we would deadlock there as we already hold the NECP-lock.
1355 * So, let's schedule this separately. It also gives NECP the chance to make
1356 * progress, without having to wait for MPTCP to finish its subflow creation.
1359 mptcp_sched_create_subflows(struct mptses
*mpte
)
1361 struct mppcb
*mpp
= mpte
->mpte_mppcb
;
1362 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1363 struct socket
*mp_so
= mpp
->mpp_socket
;
1365 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
1366 os_log_debug(mptcp_log_handle
, "%s - %lx: not a good time for subflows, state %u flags %#x",
1367 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
1371 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1372 mp_so
->so_usecount
++; /* To prevent it from being free'd in-between */
1373 mpp
->mpp_flags
|= MPP_CREATE_SUBFLOWS
;
1376 if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled
)) {
1380 /* Do the call in 100ms to allow NECP to schedule it on all sockets */
1381 timeout(mptcp_create_subflows
, NULL
, hz
/ 10);
1385 * Allocate an MPTCP socket option structure.
1388 mptcp_sopt_alloc(int how
)
1392 mpo
= (how
== M_WAITOK
) ? zalloc(mptopt_zone
) :
1393 zalloc_noblock(mptopt_zone
);
1395 bzero(mpo
, mptopt_zone_size
);
1402 * Free an MPTCP socket option structure.
1405 mptcp_sopt_free(struct mptopt
*mpo
)
1407 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
1409 zfree(mptopt_zone
, mpo
);
1413 * Add a socket option to the MPTCP socket option list.
1416 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
1418 socket_lock_assert_owned(mptetoso(mpte
));
1419 mpo
->mpo_flags
|= MPOF_ATTACHED
;
1420 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1424 * Remove a socket option from the MPTCP socket option list.
1427 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
1429 socket_lock_assert_owned(mptetoso(mpte
));
1430 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1431 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
1432 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1436 * Search for an existing <sopt_level,sopt_name> socket option.
1439 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
1443 socket_lock_assert_owned(mptetoso(mpte
));
1445 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
1446 if (mpo
->mpo_level
== sopt
->sopt_level
&&
1447 mpo
->mpo_name
== sopt
->sopt_name
) {
1455 * Allocate a MPTCP subflow structure.
1457 static struct mptsub
*
1458 mptcp_subflow_alloc(void)
1460 struct mptsub
*mpts
= zalloc(mptsub_zone
);
1466 bzero(mpts
, mptsub_zone_size
);
1471 * Deallocate a subflow structure, called when all of the references held
1472 * on it have been released. This implies that the subflow has been deleted.
1475 mptcp_subflow_free(struct mptsub
*mpts
)
1477 VERIFY(mpts
->mpts_refcnt
== 0);
1478 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
1479 VERIFY(mpts
->mpts_mpte
== NULL
);
1480 VERIFY(mpts
->mpts_socket
== NULL
);
1482 if (mpts
->mpts_src
!= NULL
) {
1483 FREE(mpts
->mpts_src
, M_SONAME
);
1484 mpts
->mpts_src
= NULL
;
1487 zfree(mptsub_zone
, mpts
);
1491 mptcp_subflow_addref(struct mptsub
*mpts
)
1493 if (++mpts
->mpts_refcnt
== 0) {
1494 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
1500 mptcp_subflow_remref(struct mptsub
*mpts
)
1502 if (mpts
->mpts_refcnt
== 0) {
1503 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
1506 if (--mpts
->mpts_refcnt
> 0) {
1510 /* callee will unlock and destroy lock */
1511 mptcp_subflow_free(mpts
);
1515 mptcp_subflow_attach(struct mptses
*mpte
, struct mptsub
*mpts
, struct socket
*so
)
1517 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1518 struct tcpcb
*tp
= sototcpcb(so
);
1521 * From this moment on, the subflow is linked to the MPTCP-connection.
1522 * Locking,... happens now at the MPTCP-layer
1524 tp
->t_mptcb
= mpte
->mpte_mptcb
;
1525 so
->so_flags
|= SOF_MP_SUBFLOW
;
1526 mp_so
->so_usecount
++;
1529 * Insert the subflow into the list, and associate the MPTCP PCB
1530 * as well as the the subflow socket. From this point on, removing
1531 * the subflow needs to be done via mptcp_subflow_del().
1533 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1534 mpte
->mpte_numflows
++;
1536 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1537 mpts
->mpts_mpte
= mpte
;
1538 mpts
->mpts_socket
= so
;
1540 mptcp_subflow_addref(mpts
); /* for being in MPTCP subflow list */
1541 mptcp_subflow_addref(mpts
); /* for subflow socket */
1545 mptcp_subflow_necp_cb(void *handle
, __unused
int action
,
1546 __unused
uint32_t interface_index
,
1547 uint32_t necp_flags
, bool *viable
)
1549 boolean_t low_power
= !!(necp_flags
& NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER
);
1550 struct inpcb
*inp
= (struct inpcb
*)handle
;
1551 struct socket
*so
= inp
->inp_socket
;
1552 struct mptsub
*mpts
;
1553 struct mptses
*mpte
;
1556 action
= NECP_CLIENT_CBACTION_NONVIABLE
;
1559 if (action
!= NECP_CLIENT_CBACTION_NONVIABLE
) {
1564 * The socket is being garbage-collected. There is nothing to be done
1567 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) == WNT_STOPUSING
) {
1573 /* Check again after we acquired the lock. */
1574 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
1578 mpte
= tptomptp(sototcpcb(so
))->mpt_mpte
;
1579 mpts
= sototcpcb(so
)->t_mpsub
;
1581 os_log_debug(mptcp_log_handle
, "%s - %lx: Subflow on itf %u became non-viable, power %u",
1582 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_ifscope
, low_power
);
1584 mpts
->mpts_flags
|= MPTSF_CLOSE_REQD
;
1586 mptcp_sched_create_subflows(mpte
);
1588 if ((mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
||
1589 mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) &&
1595 socket_unlock(so
, 1);
1599 * Create an MPTCP subflow socket.
1602 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
1605 lck_mtx_t
*subflow_mtx
;
1606 struct mptopt smpo
, *mpo
, *tmpo
;
1608 struct socket
*mp_so
;
1613 mp_so
= mptetoso(mpte
);
1615 p
= proc_find(mp_so
->last_pid
);
1616 if (p
== PROC_NULL
) {
1617 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
1618 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_so
->last_pid
);
1624 * Create the subflow socket (multipath subflow, non-blocking.)
1626 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
1627 * socket; it will be cleared when the socket is peeled off or closed.
1628 * It also indicates to the underlying TCP to handle MPTCP options.
1629 * A multipath subflow socket implies SS_NOFDREF state.
1633 * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes
1634 * the ipi-lock. We cannot hold the socket-lock at that point.
1636 socket_unlock(mp_so
, 0);
1637 error
= socreate_internal(dom
, so
, SOCK_STREAM
, IPPROTO_TCP
, p
,
1638 SOCF_MPTCP
, PROC_NULL
);
1639 socket_lock(mp_so
, 0);
1641 os_log_error(mptcp_log_handle
, "%s - %lx: unable to create subflow socket error %d\n",
1642 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1646 mptcp_subflow_free(mpts
);
1651 * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because
1652 * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock.
1653 * Which is why we also need to get the lock with pr_getlock, as after
1654 * setting the flag, socket_unlock will work on the MPTCP-level lock.
1656 subflow_mtx
= ((*so
)->so_proto
->pr_getlock
)(*so
, 0);
1657 lck_mtx_lock(subflow_mtx
);
1660 * Must be the first thing we do, to make sure all pointers for this
1663 mptcp_subflow_attach(mpte
, mpts
, *so
);
1666 * A multipath subflow socket is used internally in the kernel,
1667 * therefore it does not have a file desciptor associated by
1670 (*so
)->so_state
|= SS_NOFDREF
;
1672 lck_mtx_unlock(subflow_mtx
);
1674 /* prevent the socket buffers from being compressed */
1675 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
1676 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
1678 /* Inherit preconnect and TFO data flags */
1679 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1680 (*so
)->so_flags1
|= SOF1_PRECONNECT_DATA
;
1682 if (mp_so
->so_flags1
& SOF1_DATA_IDEMPOTENT
) {
1683 (*so
)->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
1686 /* Inherit uuid and create the related flow. */
1687 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1688 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1690 sotoinpcb(*so
)->necp_cb
= mptcp_subflow_necp_cb
;
1693 * A note on the unlock: With MPTCP, we do multiple times a
1694 * necp_client_register_socket_flow. This is problematic,
1695 * because now the lock-ordering guarantee (first necp-locks,
1696 * then socket-locks) is no more respected. So, we need to
1699 socket_unlock(mp_so
, 0);
1700 error
= necp_client_register_socket_flow(mp_so
->last_pid
,
1701 mpsotomppcb(mp_so
)->necp_client_uuid
, sotoinpcb(*so
));
1702 socket_lock(mp_so
, 0);
1705 os_log_error(mptcp_log_handle
, "%s - %lx: necp_client_register_socket_flow failed with error %d\n",
1706 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1711 /* Possible state-change during the unlock above */
1712 if (mp_tp
->mpt_state
>= MPTCPS_TIME_WAIT
||
1713 (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
)) {
1714 os_log_error(mptcp_log_handle
, "%s - %lx: state changed during unlock: %u flags %#x\n",
1715 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1716 mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
1722 uuid_copy(sotoinpcb(*so
)->necp_client_uuid
, mpsotomppcb(mp_so
)->necp_client_uuid
);
1725 /* Needs to happen prior to the delegation! */
1726 (*so
)->last_pid
= mp_so
->last_pid
;
1728 if (mp_so
->so_flags
& SOF_DELEGATED
) {
1729 if (mpte
->mpte_epid
) {
1730 error
= so_set_effective_pid(*so
, mpte
->mpte_epid
, p
, false);
1732 os_log_error(mptcp_log_handle
, "%s - %lx: so_set_effective_pid failed with error %d\n",
1733 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1737 if (!uuid_is_null(mpte
->mpte_euuid
)) {
1738 error
= so_set_effective_uuid(*so
, mpte
->mpte_euuid
, p
, false);
1740 os_log_error(mptcp_log_handle
, "%s - %lx: so_set_effective_uuid failed with error %d\n",
1741 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1747 /* inherit the other socket options */
1748 bzero(&smpo
, sizeof(smpo
));
1749 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1750 smpo
.mpo_level
= SOL_SOCKET
;
1751 smpo
.mpo_intval
= 1;
1753 /* disable SIGPIPE */
1754 smpo
.mpo_name
= SO_NOSIGPIPE
;
1755 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1759 /* find out if the subflow's source address goes away */
1760 smpo
.mpo_name
= SO_NOADDRERR
;
1761 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1765 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
) {
1767 * On secondary subflows we might need to set the cell-fallback
1768 * flag (see conditions in mptcp_subflow_sosetopt).
1770 smpo
.mpo_level
= SOL_SOCKET
;
1771 smpo
.mpo_name
= SO_MARK_CELLFALLBACK
;
1772 smpo
.mpo_intval
= 1;
1773 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1778 /* replay setsockopt(2) on the subflow sockets for eligible options */
1779 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
1782 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1787 * Skip those that are handled internally; these options
1788 * should not have been recorded and marked with the
1789 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1791 if (mpo
->mpo_level
== SOL_SOCKET
&&
1792 (mpo
->mpo_name
== SO_NOSIGPIPE
||
1793 mpo
->mpo_name
== SO_NOADDRERR
||
1794 mpo
->mpo_name
== SO_KEEPALIVE
)) {
1798 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
1799 if (mptcp_subflow_sosetopt(mpte
, mpts
, mpo
) != 0 && interim
) {
1800 os_log_error(mptcp_log_handle
, "%s - %lx: sopt %s val %d interim record removed\n",
1801 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1802 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
1804 mptcp_sopt_remove(mpte
, mpo
);
1805 mptcp_sopt_free(mpo
);
1811 * We need to receive everything that the subflow socket has,
1812 * so use a customized socket receive function. We will undo
1813 * this when the socket is peeled off or closed.
1817 (*so
)->so_proto
= &mptcp_subflow_protosw
;
1821 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
1831 DTRACE_MPTCP3(subflow__create
, struct mptses
*, mpte
,
1832 int, dom
, int, error
);
1837 mptcp_subflow_abort(mpts
, error
);
1845 * Close an MPTCP subflow socket.
1847 * Note that this may be called on an embryonic subflow, and the only
1848 * thing that is guaranteed valid is the protocol-user request.
1851 mptcp_subflow_soclose(struct mptsub
*mpts
)
1853 struct socket
*so
= mpts
->mpts_socket
;
1855 if (mpts
->mpts_flags
& MPTSF_CLOSED
) {
1860 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1861 VERIFY((so
->so_state
& (SS_NBIO
| SS_NOFDREF
)) == (SS_NBIO
| SS_NOFDREF
));
1863 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
1864 struct socket
*, so
,
1865 struct sockbuf
*, &so
->so_rcv
,
1866 struct sockbuf
*, &so
->so_snd
,
1867 struct mptses
*, mpts
->mpts_mpte
);
1869 mpts
->mpts_flags
|= MPTSF_CLOSED
;
1871 if (so
->so_retaincnt
== 0) {
1876 VERIFY(so
->so_usecount
> 0);
1884 * Connect an MPTCP subflow socket.
1886 * Note that in the pending connect case, the subflow socket may have been
1887 * bound to an interface and/or a source IP address which may no longer be
1888 * around by the time this routine is called; in that case the connect attempt
1889 * will most likely fail.
1892 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
1894 char dbuf
[MAX_IPv6_STR_LEN
];
1895 struct socket
*mp_so
, *so
;
1896 struct mptcb
*mp_tp
;
1897 struct sockaddr
*dst
;
1899 int af
, error
, dport
;
1901 mp_so
= mptetoso(mpte
);
1902 mp_tp
= mpte
->mpte_mptcb
;
1903 so
= mpts
->mpts_socket
;
1904 af
= mpts
->mpts_dst
.sa_family
;
1905 dst
= &mpts
->mpts_dst
;
1907 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
| MPTSF_CONNECTED
)) == MPTSF_CONNECTING
);
1908 VERIFY(mpts
->mpts_socket
!= NULL
);
1909 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1911 if (af
== AF_INET
) {
1912 inet_ntop(af
, &SIN(dst
)->sin_addr
.s_addr
, dbuf
, sizeof(dbuf
));
1913 dport
= ntohs(SIN(dst
)->sin_port
);
1915 inet_ntop(af
, &SIN6(dst
)->sin6_addr
, dbuf
, sizeof(dbuf
));
1916 dport
= ntohs(SIN6(dst
)->sin6_port
);
1919 os_log_info(mptcp_log_handle
,
1920 "%s - %lx: ifindex %u dst %s:%d pended %u\n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1921 mpts
->mpts_ifscope
, dbuf
, dport
, !!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
));
1923 p
= proc_find(mp_so
->last_pid
);
1924 if (p
== PROC_NULL
) {
1925 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
1926 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_so
->last_pid
);
1931 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
1933 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
1935 /* connect the subflow socket */
1936 error
= soconnectxlocked(so
, mpts
->mpts_src
, &mpts
->mpts_dst
,
1937 p
, mpts
->mpts_ifscope
,
1938 mpte
->mpte_associd
, NULL
, 0, NULL
, 0, NULL
, NULL
);
1940 mpts
->mpts_iss
= sototcpcb(so
)->iss
;
1942 /* See tcp_connect_complete */
1943 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&&
1944 (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1945 mp_tp
->mpt_sndwnd
= sototcpcb(so
)->snd_wnd
;
1948 /* Allocate a unique address id per subflow */
1949 mpte
->mpte_addrid_last
++;
1950 if (mpte
->mpte_addrid_last
== 0) {
1951 mpte
->mpte_addrid_last
++;
1956 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
1957 struct mptsub
*, mpts
, int, error
);
1959 os_log_error(mptcp_log_handle
, "%s - %lx: connectx failed with error %d ifscope %u\n",
1960 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
, mpts
->mpts_ifscope
);
1967 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
, int off
, uint64_t dsn
,
1968 uint32_t rseq
, uint16_t dlen
)
1970 struct mptsub
*mpts
= sototcpcb(so
)->t_mpsub
;
1972 if (m_pktlen(m
) == 0) {
1976 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
1977 if (off
&& (dsn
!= m
->m_pkthdr
.mp_dsn
||
1978 rseq
!= m
->m_pkthdr
.mp_rseq
||
1979 dlen
!= m
->m_pkthdr
.mp_rlen
)) {
1980 os_log_error(mptcp_log_handle
, "%s - %lx: Received incorrect second mapping: %u - %u , %u - %u, %u - %u\n",
1981 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpts
->mpts_mpte
),
1982 (uint32_t)dsn
, (uint32_t)m
->m_pkthdr
.mp_dsn
,
1983 rseq
, m
->m_pkthdr
.mp_rseq
,
1984 dlen
, m
->m_pkthdr
.mp_rlen
);
1986 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1989 m
->m_pkthdr
.mp_dsn
+= off
;
1990 m
->m_pkthdr
.mp_rseq
+= off
;
1991 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
1993 if (!(mpts
->mpts_flags
& MPTSF_FULLY_ESTABLISHED
)) {
1994 /* data arrived without an DSS option mapping */
1996 /* initial subflow can fallback right after SYN handshake */
1997 if (mpts
->mpts_flags
& MPTSF_INITIAL_SUB
) {
1998 mptcp_notify_mpfail(so
);
2000 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
2004 } else if (m
->m_flags
& M_PKTHDR
) {
2005 /* We need to fake the DATA-mapping */
2006 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
2007 m
->m_pkthdr
.mp_dsn
= dsn
+ off
;
2008 m
->m_pkthdr
.mp_rseq
= rseq
+ off
;
2009 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
2013 mpts
->mpts_flags
|= MPTSF_FULLY_ESTABLISHED
;
2019 * MPTCP subflow socket receive routine, derived from soreceive().
2022 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
2023 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
2026 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
2027 int flags
, error
= 0;
2028 struct proc
*p
= current_proc();
2029 struct mbuf
*m
, **mp
= mp0
;
2030 boolean_t proc_held
= FALSE
;
2032 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
2034 #ifdef MORE_LOCKING_DEBUG
2035 if (so
->so_usecount
== 1) {
2036 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
2041 * We return all that is there in the subflow's socket receive buffer
2042 * to the MPTCP layer, so we require that the caller passes in the
2043 * expected parameters.
2045 if (mp
== NULL
|| controlp
!= NULL
) {
2053 if (flagsp
!= NULL
) {
2054 flags
= *flagsp
& ~MSG_EOR
;
2059 if (flags
& (MSG_PEEK
| MSG_OOB
| MSG_NEEDSA
| MSG_WAITALL
| MSG_WAITSTREAM
)) {
2063 flags
|= (MSG_DONTWAIT
| MSG_NBIO
);
2066 * If a recv attempt is made on a previously-accepted socket
2067 * that has been marked as inactive (disconnected), reject
2070 if (so
->so_flags
& SOF_DEFUNCT
) {
2071 struct sockbuf
*sb
= &so
->so_rcv
;
2075 * This socket should have been disconnected and flushed
2076 * prior to being returned from sodefunct(); there should
2077 * be no data on its receive list, so panic otherwise.
2079 if (so
->so_state
& SS_DEFUNCT
) {
2080 sb_empty_assert(sb
, __func__
);
2086 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2087 * and if so just return to the caller. This could happen when
2088 * soreceive() is called by a socket upcall function during the
2089 * time the socket is freed. The socket buffer would have been
2090 * locked across the upcall, therefore we cannot put this thread
2091 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2092 * we may livelock), because the lock on the socket buffer will
2093 * only be released when the upcall routine returns to its caller.
2094 * Because the socket has been officially closed, there can be
2095 * no further read on it.
2097 * A multipath subflow socket would have its SS_NOFDREF set by
2098 * default, so check for SOF_MP_SUBFLOW socket flag; when the
2099 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2101 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
2102 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
2107 * For consistency with soreceive() semantics, we need to obey
2108 * SB_LOCK in case some other code path has locked the buffer.
2110 error
= sblock(&so
->so_rcv
, 0);
2115 m
= so
->so_rcv
.sb_mb
;
2118 * Panic if we notice inconsistencies in the socket's
2119 * receive list; both sb_mb and sb_cc should correctly
2120 * reflect the contents of the list, otherwise we may
2121 * end up with false positives during select() or poll()
2122 * which could put the application in a bad state.
2124 SB_MB_CHECK(&so
->so_rcv
);
2126 if (so
->so_error
!= 0) {
2127 error
= so
->so_error
;
2132 if (so
->so_state
& SS_CANTRCVMORE
) {
2136 if (!(so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
))) {
2142 * MSG_DONTWAIT is implicitly defined and this routine will
2143 * never block, so return EWOULDBLOCK when there is nothing.
2145 error
= EWOULDBLOCK
;
2149 mptcp_update_last_owner(so
, mp_so
);
2151 if (mp_so
->last_pid
!= proc_pid(p
)) {
2152 p
= proc_find(mp_so
->last_pid
);
2153 if (p
== PROC_NULL
) {
2160 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
2161 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
2162 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
2165 int dlen
= 0, dfin
= 0, error_out
= 0;
2166 struct mbuf
*start
= m
;
2172 VERIFY(m
->m_nextpkt
== NULL
);
2174 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
2175 orig_dlen
= dlen
= m
->m_pkthdr
.mp_rlen
;
2176 dsn
= m
->m_pkthdr
.mp_dsn
;
2177 sseq
= m
->m_pkthdr
.mp_rseq
;
2178 csum
= m
->m_pkthdr
.mp_csum
;
2180 /* We did fallback */
2181 if (mptcp_adj_rmap(so
, m
, 0, 0, 0, 0)) {
2187 sbfree(&so
->so_rcv
, m
);
2192 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2197 so
->so_rcv
.sb_lastrecord
= m
;
2199 SB_EMPTY_FIXUP(&so
->so_rcv
);
2205 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
) {
2210 * Check if the full mapping is now present
2212 if ((int)so
->so_rcv
.sb_cc
< dlen
- dfin
) {
2213 mptcplog((LOG_INFO
, "%s not enough data (%u) need %u for dsn %u\n",
2214 __func__
, so
->so_rcv
.sb_cc
, dlen
, (uint32_t)dsn
),
2215 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_LOG
);
2218 error
= EWOULDBLOCK
;
2223 /* Now, get the full mapping */
2225 if (mptcp_adj_rmap(so
, m
, orig_dlen
- dlen
, dsn
, sseq
, orig_dlen
)) {
2234 sbfree(&so
->so_rcv
, m
);
2239 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2243 if (dlen
- dfin
== 0) {
2247 VERIFY(dlen
<= 0 || m
);
2253 so
->so_rcv
.sb_lastrecord
= m
;
2255 SB_EMPTY_FIXUP(&so
->so_rcv
);
2262 if (mptcp_validate_csum(sototcpcb(so
), start
, dsn
, sseq
, orig_dlen
, csum
, dfin
)) {
2268 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
2269 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
2272 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
2273 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
2275 if (flagsp
!= NULL
) {
2280 sbunlock(&so
->so_rcv
, TRUE
);
2290 * MPTCP subflow socket send routine, derived from sosend().
2293 mptcp_subflow_sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
2294 struct mbuf
*top
, struct mbuf
*control
, int flags
)
2296 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
2297 struct proc
*p
= current_proc();
2298 boolean_t en_tracing
= FALSE
, proc_held
= FALSE
;
2300 int sblocked
= 1; /* Pretend as if it is already locked, so we won't relock it */
2303 VERIFY(control
== NULL
);
2304 VERIFY(addr
== NULL
);
2305 VERIFY(uio
== NULL
);
2307 VERIFY((so
->so_flags
& SOF_CONTENT_FILTER
) == 0);
2309 VERIFY(top
->m_pkthdr
.len
> 0 && top
->m_pkthdr
.len
<= UINT16_MAX
);
2310 VERIFY(top
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2313 * trace if tracing & network (vs. unix) sockets & and
2316 if (ENTR_SHOULDTRACE
&&
2317 (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) {
2318 struct inpcb
*inp
= sotoinpcb(so
);
2319 if (inp
->inp_last_outifp
!= NULL
&&
2320 !(inp
->inp_last_outifp
->if_flags
& IFF_LOOPBACK
)) {
2322 en_tracing_val
= top
->m_pkthdr
.len
;
2323 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
,
2324 (unsigned long)VM_KERNEL_ADDRPERM(so
),
2325 ((so
->so_state
& SS_NBIO
) ? kEnTrFlagNonBlocking
: 0),
2326 (int64_t)en_tracing_val
);
2330 mptcp_update_last_owner(so
, mp_so
);
2332 if (mp_so
->last_pid
!= proc_pid(p
)) {
2333 p
= proc_find(mp_so
->last_pid
);
2334 if (p
== PROC_NULL
) {
2342 inp_update_necp_policy(sotoinpcb(so
), NULL
, NULL
, 0);
2345 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
2347 error
= sosendcheck(so
, NULL
, top
->m_pkthdr
.len
, 0, 1, 0, &sblocked
, NULL
);
2352 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, top
, NULL
, NULL
, p
);
2364 soclearfastopen(so
);
2367 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
,
2368 (unsigned long)VM_KERNEL_ADDRPERM(so
),
2369 ((error
== EWOULDBLOCK
) ? kEnTrFlagNoWork
: 0),
2370 (int64_t)en_tracing_val
);
2377 * Establish an initial MPTCP connection (if first subflow and not yet
2378 * connected), or add a subflow to an existing MPTCP connection.
2381 mptcp_subflow_add(struct mptses
*mpte
, struct sockaddr
*src
,
2382 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
2384 struct socket
*mp_so
, *so
= NULL
;
2385 struct mptcb
*mp_tp
;
2386 struct mptsub
*mpts
= NULL
;
2389 mp_so
= mptetoso(mpte
);
2390 mp_tp
= mpte
->mpte_mptcb
;
2392 socket_lock_assert_owned(mp_so
);
2394 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
2395 /* If the remote end sends Data FIN, refuse subflow adds */
2396 os_log_error(mptcp_log_handle
, "%s - %lx: state %u\n",
2397 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
);
2402 mpts
= mptcp_subflow_alloc();
2404 os_log_error(mptcp_log_handle
, "%s - %lx: malloc subflow failed\n",
2405 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
2411 if (src
->sa_family
!= AF_INET
&& src
->sa_family
!= AF_INET6
) {
2412 error
= EAFNOSUPPORT
;
2416 if (src
->sa_family
== AF_INET
&&
2417 src
->sa_len
!= sizeof(struct sockaddr_in
)) {
2422 if (src
->sa_family
== AF_INET6
&&
2423 src
->sa_len
!= sizeof(struct sockaddr_in6
)) {
2428 MALLOC(mpts
->mpts_src
, struct sockaddr
*, src
->sa_len
, M_SONAME
,
2430 if (mpts
->mpts_src
== NULL
) {
2434 bcopy(src
, mpts
->mpts_src
, src
->sa_len
);
2437 if (dst
->sa_family
!= AF_INET
&& dst
->sa_family
!= AF_INET6
) {
2438 error
= EAFNOSUPPORT
;
2442 if (dst
->sa_family
== AF_INET
&&
2443 dst
->sa_len
!= sizeof(mpts
->__mpts_dst_v4
)) {
2448 if (dst
->sa_family
== AF_INET6
&&
2449 dst
->sa_len
!= sizeof(mpts
->__mpts_dst_v6
)) {
2454 memcpy(&mpts
->mpts_u_dst
, dst
, dst
->sa_len
);
2456 af
= mpts
->mpts_dst
.sa_family
;
2458 ifnet_head_lock_shared();
2459 if ((ifscope
> (unsigned)if_index
)) {
2466 mpts
->mpts_ifscope
= ifscope
;
2468 /* create the subflow socket */
2469 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, &so
)) != 0) {
2471 * Returning (error) and not cleaning up, because up to here
2472 * all we did is creating mpts.
2474 * And the contract is that the call to mptcp_subflow_socreate,
2475 * moves ownership of mpts to mptcp_subflow_socreate.
2481 * We may be called from within the kernel. Still need to account this
2482 * one to the real app.
2484 mptcp_update_last_owner(mpts
->mpts_socket
, mp_so
);
2487 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
2488 * -1 (SAE_CONNID_ALL).
2490 mpte
->mpte_connid_last
++;
2491 if (mpte
->mpte_connid_last
== SAE_CONNID_ALL
||
2492 mpte
->mpte_connid_last
== SAE_CONNID_ANY
) {
2493 mpte
->mpte_connid_last
++;
2496 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
2498 mpts
->mpts_rel_seq
= 1;
2500 /* Allocate a unique address id per subflow */
2501 mpte
->mpte_addrid_last
++;
2502 if (mpte
->mpte_addrid_last
== 0) {
2503 mpte
->mpte_addrid_last
++;
2506 /* register for subflow socket read/write events */
2507 sock_setupcalls_locked(so
, NULL
, NULL
, mptcp_subflow_wupcall
, mpts
, 1);
2509 /* Register for subflow socket control events */
2510 sock_catchevents_locked(so
, mptcp_subflow_eupcall1
, mpts
,
2511 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
2512 SO_FILT_HINT_TIMEOUT
| SO_FILT_HINT_NOSRCADDR
|
2513 SO_FILT_HINT_IFDENIED
| SO_FILT_HINT_CONNECTED
|
2514 SO_FILT_HINT_DISCONNECTED
| SO_FILT_HINT_MPFAILOVER
|
2515 SO_FILT_HINT_MPSTATUS
| SO_FILT_HINT_MUSTRST
|
2516 SO_FILT_HINT_MPCANTRCVMORE
| SO_FILT_HINT_ADAPTIVE_RTIMO
|
2517 SO_FILT_HINT_ADAPTIVE_WTIMO
| SO_FILT_HINT_MP_SUB_ERROR
);
2520 VERIFY(!(mpts
->mpts_flags
&
2521 (MPTSF_CONNECTING
| MPTSF_CONNECTED
| MPTSF_CONNECT_PENDING
)));
2524 * Indicate to the TCP subflow whether or not it should establish
2525 * the initial MPTCP connection, or join an existing one. Fill
2526 * in the connection request structure with additional info needed
2527 * by the underlying TCP (to be used in the TCP options, etc.)
2529 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
2530 mpts
->mpts_flags
|= MPTSF_INITIAL_SUB
;
2532 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
2533 mptcp_init_local_parms(mpte
);
2535 soisconnecting(mp_so
);
2537 /* If fastopen is requested, set state in mpts */
2538 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
2539 mpts
->mpts_flags
|= MPTSF_TFO_REQD
;
2542 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
)) {
2543 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
2547 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
2549 /* connect right away if first attempt, or if join can be done now */
2550 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
)) {
2551 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
2559 *pcid
= mpts
->mpts_connid
;
2565 mptcp_subflow_abort(mpts
, error
);
2571 mptcp_subflow_free(mpts
);
2578 mptcpstats_update(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
2580 int index
= mptcpstats_get_index(stats
, mpts
);
2583 struct inpcb
*inp
= sotoinpcb(mpts
->mpts_socket
);
2585 stats
[index
].mpis_txbytes
+= inp
->inp_stat
->txbytes
;
2586 stats
[index
].mpis_rxbytes
+= inp
->inp_stat
->rxbytes
;
2588 stats
[index
].mpis_wifi_txbytes
+= inp
->inp_wstat
->txbytes
;
2589 stats
[index
].mpis_wifi_rxbytes
+= inp
->inp_wstat
->rxbytes
;
2591 stats
[index
].mpis_wired_txbytes
+= inp
->inp_Wstat
->txbytes
;
2592 stats
[index
].mpis_wired_rxbytes
+= inp
->inp_Wstat
->rxbytes
;
2594 stats
[index
].mpis_cell_txbytes
+= inp
->inp_cstat
->txbytes
;
2595 stats
[index
].mpis_cell_rxbytes
+= inp
->inp_cstat
->rxbytes
;
2600 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
2601 * will no longer be accessible after a subflow is deleted, thus this
2602 * should occur only after the subflow socket has been disconnected.
2605 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
)
2607 struct socket
*mp_so
= mptetoso(mpte
);
2608 struct socket
*so
= mpts
->mpts_socket
;
2609 struct tcpcb
*tp
= sototcpcb(so
);
2611 socket_lock_assert_owned(mp_so
);
2612 VERIFY(mpts
->mpts_mpte
== mpte
);
2613 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
2614 VERIFY(mpte
->mpte_numflows
!= 0);
2615 VERIFY(mp_so
->so_usecount
> 0);
2617 mptcpstats_update(mpte
->mpte_itfstats
, mpts
);
2619 mptcp_unset_cellicon(mpte
, mpts
, 1);
2621 mpte
->mpte_init_rxbytes
= sotoinpcb(so
)->inp_stat
->rxbytes
;
2622 mpte
->mpte_init_txbytes
= sotoinpcb(so
)->inp_stat
->txbytes
;
2624 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
2625 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
2626 mpte
->mpte_numflows
--;
2627 if (mpte
->mpte_active_sub
== mpts
) {
2628 mpte
->mpte_active_sub
= NULL
;
2632 * Drop references held by this subflow socket; there
2633 * will be no further upcalls made from this point.
2635 sock_setupcalls_locked(so
, NULL
, NULL
, NULL
, NULL
, 0);
2636 sock_catchevents_locked(so
, NULL
, NULL
, 0);
2638 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
2640 mp_so
->so_usecount
--; /* for subflow socket */
2641 mpts
->mpts_mpte
= NULL
;
2642 mpts
->mpts_socket
= NULL
;
2644 mptcp_subflow_remref(mpts
); /* for MPTCP subflow list */
2645 mptcp_subflow_remref(mpts
); /* for subflow socket */
2647 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
2653 mptcp_subflow_shutdown(struct mptses
*mpte
, struct mptsub
*mpts
)
2655 struct socket
*so
= mpts
->mpts_socket
;
2656 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2659 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
) {
2663 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2664 (so
->so_state
& SS_ISCONNECTED
)) {
2665 mptcplog((LOG_DEBUG
, "MPTCP subflow shutdown %s: cid %d fin %d\n",
2666 __func__
, mpts
->mpts_connid
, send_dfin
),
2667 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2670 mptcp_send_dfin(so
);
2672 soshutdownlock(so
, SHUT_WR
);
2677 mptcp_subflow_abort(struct mptsub
*mpts
, int error
)
2679 struct socket
*so
= mpts
->mpts_socket
;
2680 struct tcpcb
*tp
= sototcpcb(so
);
2682 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
) {
2686 mptcplog((LOG_DEBUG
, "%s aborting connection state %u\n", __func__
, tp
->t_state
),
2687 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2689 if (tp
->t_state
!= TCPS_CLOSED
) {
2690 tcp_drop(tp
, error
);
2693 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2697 * Disconnect a subflow socket.
2700 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
)
2703 struct mptcb
*mp_tp
;
2706 socket_lock_assert_owned(mptetoso(mpte
));
2708 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
)) {
2712 mptcp_unset_cellicon(mpte
, mpts
, 1);
2714 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
2716 so
= mpts
->mpts_socket
;
2717 mp_tp
= mpte
->mpte_mptcb
;
2718 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
) {
2722 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2723 (so
->so_state
& SS_ISCONNECTED
)) {
2724 mptcplog((LOG_DEBUG
, "%s: cid %d fin %d\n",
2725 __func__
, mpts
->mpts_connid
, send_dfin
),
2726 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2729 mptcp_send_dfin(so
);
2731 (void) soshutdownlock(so
, SHUT_RD
);
2732 (void) soshutdownlock(so
, SHUT_WR
);
2733 (void) sodisconnectlocked(so
);
2736 * Generate a disconnect event for this subflow socket, in case
2737 * the lower layer doesn't do it; this is needed because the
2738 * subflow socket deletion relies on it.
2740 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2744 * Subflow socket input.
2747 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
2749 struct socket
*mp_so
= mptetoso(mpte
);
2750 struct mbuf
*m
= NULL
;
2752 int error
, wakeup
= 0;
2754 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_INPUT
));
2755 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_INPUT
;
2757 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
2758 struct mptsub
*, mpts
);
2760 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
)) {
2764 so
= mpts
->mpts_socket
;
2766 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
2767 if (error
!= 0 && error
!= EWOULDBLOCK
) {
2768 os_log_error(mptcp_log_handle
, "%s - %lx: cid %d error %d\n",
2769 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, error
);
2770 if (error
== ENODATA
) {
2772 * Don't ignore ENODATA so as to discover
2773 * nasty middleboxes.
2775 mp_so
->so_error
= ENODATA
;
2780 } else if (error
== 0) {
2781 mptcplog((LOG_DEBUG
, "%s: cid %d \n", __func__
, mpts
->mpts_connid
),
2782 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2785 /* In fallback, make sure to accept data on all but one subflow */
2786 if (m
&& (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2787 !(mpts
->mpts_flags
& MPTSF_ACTIVE
)) {
2788 mptcplog((LOG_DEBUG
, "%s: degraded and got data on non-active flow\n",
2789 __func__
), MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2795 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
2796 mptcp_set_cellicon(mpte
, mpts
);
2798 mpte
->mpte_used_cell
= 1;
2801 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
2802 * explicitly set the cellicon, then we unset it again.
2804 if (TSTMP_LT(mpte
->mpte_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
, tcp_now
)) {
2805 mptcp_unset_cellicon(mpte
, NULL
, 1);
2808 mpte
->mpte_used_wifi
= 1;
2811 mptcp_input(mpte
, m
);
2816 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2819 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_INPUT
);
2823 mptcp_handle_input(struct socket
*so
)
2825 struct mptsub
*mpts
, *tmpts
;
2826 struct mptses
*mpte
;
2828 if (!(so
->so_flags
& SOF_MP_SUBFLOW
)) {
2832 mpts
= sototcpcb(so
)->t_mpsub
;
2833 mpte
= mpts
->mpts_mpte
;
2835 socket_lock_assert_owned(mptetoso(mpte
));
2837 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2838 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INPUT_HANDLE
)) {
2839 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2844 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INPUT_HANDLE
;
2845 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
2846 if (mpts
->mpts_socket
->so_usecount
== 0) {
2847 /* Will be removed soon by tcp_garbage_collect */
2851 mptcp_subflow_addref(mpts
);
2852 mpts
->mpts_socket
->so_usecount
++;
2854 mptcp_subflow_input(mpte
, mpts
);
2856 mptcp_subflow_remref(mpts
); /* ours */
2858 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
2859 mpts
->mpts_socket
->so_usecount
--;
2862 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INPUT_HANDLE
);
2866 * Subflow socket write upcall.
2868 * Called when the associated subflow socket posted a read event.
2871 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
2873 #pragma unused(so, waitf)
2874 struct mptsub
*mpts
= arg
;
2875 struct mptses
*mpte
= mpts
->mpts_mpte
;
2877 VERIFY(mpte
!= NULL
);
2879 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2880 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_WUPCALL
)) {
2881 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
2890 mptcp_search_seq_in_sub(struct mbuf
*m
, struct socket
*so
)
2892 struct mbuf
*so_m
= so
->so_snd
.sb_mb
;
2893 uint64_t dsn
= m
->m_pkthdr
.mp_dsn
;
2896 VERIFY(so_m
->m_flags
& M_PKTHDR
);
2897 VERIFY(so_m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2899 /* Part of the segment is covered, don't reinject here */
2900 if (so_m
->m_pkthdr
.mp_dsn
<= dsn
&&
2901 so_m
->m_pkthdr
.mp_dsn
+ so_m
->m_pkthdr
.mp_rlen
> dsn
) {
2905 so_m
= so_m
->m_next
;
2912 * Subflow socket output.
2914 * Called for sending data from MPTCP to the underlying subflow socket.
2917 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
, int flags
)
2919 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2920 struct mbuf
*sb_mb
, *m
, *mpt_mbuf
= NULL
, *head
, *tail
;
2921 struct socket
*mp_so
, *so
;
2923 uint64_t mpt_dsn
= 0, off
= 0;
2924 int sb_cc
= 0, error
= 0, wakeup
= 0;
2926 uint16_t tot_sent
= 0;
2927 boolean_t reinjected
= FALSE
;
2929 mp_so
= mptetoso(mpte
);
2930 so
= mpts
->mpts_socket
;
2933 socket_lock_assert_owned(mp_so
);
2935 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_OUTPUT
));
2936 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_OUTPUT
;
2938 VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so
)));
2939 VERIFY((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ||
2940 (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2941 (mpts
->mpts_flags
& MPTSF_TFO_REQD
));
2942 VERIFY(mptcp_subflow_cwnd_space(mpts
->mpts_socket
) > 0);
2944 mptcplog((LOG_DEBUG
, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
2945 __func__
, mpts
->mpts_flags
, mpte
->mpte_flags
,
2946 mptcp_subflow_cwnd_space(so
)),
2947 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2948 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
2949 struct mptsub
*, mpts
);
2951 /* Remove Addr Option is not sent reliably as per I-D */
2952 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
2953 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
2954 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
2955 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
2959 * The mbuf chains containing the metadata (as well as pointing to
2960 * the user data sitting at the MPTCP output queue) would then be
2961 * sent down to the subflow socket.
2963 * Some notes on data sequencing:
2965 * a. Each mbuf must be a M_PKTHDR.
2966 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
2967 * in the mbuf pkthdr structure.
2968 * c. Each mbuf containing the MPTCP metadata must have its
2969 * pkt_flags marked with the PKTF_MPTCP flag.
2972 if (mpte
->mpte_reinjectq
) {
2973 sb_mb
= mpte
->mpte_reinjectq
;
2975 sb_mb
= mp_so
->so_snd
.sb_mb
;
2978 if (sb_mb
== NULL
) {
2979 os_log_error(mptcp_log_handle
, "%s - %lx: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u state %u flags %#x\n",
2980 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
2981 (uint32_t)mp_tp
->mpt_sndmax
, (uint32_t)mp_tp
->mpt_sndnxt
,
2982 (uint32_t)mp_tp
->mpt_snduna
, mp_tp
->mpt_state
, mp_so
->so_flags1
);
2984 /* Fix it to prevent looping */
2985 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
2986 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
2991 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2993 if (sb_mb
->m_pkthdr
.mp_rlen
== 0 &&
2994 !(so
->so_state
& SS_ISCONNECTED
) &&
2995 (so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
2996 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
2997 goto zero_len_write
;
3000 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
3002 /* First, drop acknowledged data */
3003 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
3004 os_log_error(mptcp_log_handle
, "%s - %lx: dropping data, should have been done earlier "
3005 "dsn %u suna %u reinject? %u\n",
3006 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), (uint32_t)mpt_dsn
,
3007 (uint32_t)mp_tp
->mpt_snduna
, !!mpte
->mpte_reinjectq
);
3008 if (mpte
->mpte_reinjectq
) {
3009 mptcp_clean_reinjectq(mpte
);
3012 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
3013 sbdrop(&mp_so
->so_snd
, (int)len
);
3018 /* Check again because of above sbdrop */
3019 if (mp_so
->so_snd
.sb_mb
== NULL
&& mpte
->mpte_reinjectq
== NULL
) {
3020 os_log_error(mptcp_log_handle
, "%s - $%lx: send-buffer is empty\n",
3021 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3026 * In degraded mode, we don't receive data acks, so force free
3027 * mbufs less than snd_nxt
3029 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
3030 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
3031 mp_so
->so_snd
.sb_mb
) {
3032 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
3033 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
3035 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
3036 sbdrop(&mp_so
->so_snd
, (int)len
);
3039 os_log_error(mptcp_log_handle
, "%s - %lx: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
3040 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
3041 (uint32_t)mpt_dsn
, (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_snduna
);
3045 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
3046 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
3047 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
3048 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
3052 * Adjust the top level notion of next byte used for retransmissions
3055 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
3056 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
3059 /* Now determine the offset from which to start transmitting data */
3060 if (mpte
->mpte_reinjectq
) {
3061 sb_mb
= mpte
->mpte_reinjectq
;
3064 sb_mb
= mp_so
->so_snd
.sb_mb
;
3066 if (sb_mb
== NULL
) {
3067 os_log_error(mptcp_log_handle
, "%s - %lx: send-buffer is still empty\n", __func__
,
3068 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3072 if (sb_mb
== mpte
->mpte_reinjectq
) {
3073 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
3076 if (mptcp_search_seq_in_sub(sb_mb
, so
)) {
3077 if (mptcp_can_send_more(mp_tp
, TRUE
)) {
3086 } else if (flags
& MPTCP_SUBOUT_PROBING
) {
3087 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
3090 sb_cc
= min(mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
);
3093 * With TFO, there might be no data at all, thus still go into this
3096 if ((mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) ||
3097 MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_sndmax
)) {
3098 off
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
3101 os_log_error(mptcp_log_handle
, "%s - %lx: this should not happen: sndnxt %u sndmax %u\n",
3102 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), (uint32_t)mp_tp
->mpt_sndnxt
,
3103 (uint32_t)mp_tp
->mpt_sndmax
);
3109 sb_cc
= min(sb_cc
, mptcp_subflow_cwnd_space(so
));
3111 os_log_error(mptcp_log_handle
, "%s - %lx: sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
3112 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), sb_cc
, mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
,
3113 (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_sndmax
,
3114 mptcp_subflow_cwnd_space(so
));
3117 sb_cc
= min(sb_cc
, UINT16_MAX
);
3120 * Create a DSN mapping for the data we are about to send. It all
3121 * has the same mapping.
3124 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
3126 mpt_dsn
= mp_tp
->mpt_snduna
+ off
;
3130 while (mpt_mbuf
&& reinjected
== FALSE
&&
3131 (mpt_mbuf
->m_pkthdr
.mp_rlen
== 0 ||
3132 mpt_mbuf
->m_pkthdr
.mp_rlen
<= (uint32_t)off
)) {
3133 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
3134 mpt_mbuf
= mpt_mbuf
->m_next
;
3136 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3137 mptcplog((LOG_DEBUG
, "%s: %u snduna = %u sndnxt = %u probe %d\n",
3138 __func__
, mpts
->mpts_connid
, (uint32_t)mp_tp
->mpt_snduna
, (uint32_t)mp_tp
->mpt_sndnxt
,
3139 mpts
->mpts_probecnt
),
3140 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3143 VERIFY((mpt_mbuf
== NULL
) || (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3147 while (tot_sent
< sb_cc
) {
3150 mlen
= mpt_mbuf
->m_len
;
3152 mlen
= min(mlen
, sb_cc
- tot_sent
);
3155 os_log_error(mptcp_log_handle
, "%s - %lx: mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
3156 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), (int)mlen
, mpt_mbuf
->m_pkthdr
.mp_rlen
,
3157 (uint32_t)off
, sb_cc
, tot_sent
);
3165 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
3166 M_COPYM_MUST_COPY_HDR
);
3168 os_log_error(mptcp_log_handle
, "%s - %lx: m_copym_mode failed\n", __func__
,
3169 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3174 /* Create a DSN mapping for the data (m_copym does it) */
3175 VERIFY(m
->m_flags
& M_PKTHDR
);
3176 VERIFY(m
->m_next
== NULL
);
3178 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
3179 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
3180 m
->m_pkthdr
.mp_dsn
= mpt_dsn
;
3181 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
3182 m
->m_pkthdr
.len
= mlen
;
3194 mpt_mbuf
= mpt_mbuf
->m_next
;
3198 if (sb_cc
< sb_mb
->m_pkthdr
.mp_rlen
) {
3199 struct mbuf
*n
= sb_mb
;
3202 n
->m_pkthdr
.mp_dsn
+= sb_cc
;
3203 n
->m_pkthdr
.mp_rlen
-= sb_cc
;
3206 m_adj(sb_mb
, sb_cc
);
3208 mpte
->mpte_reinjectq
= sb_mb
->m_nextpkt
;
3213 mptcplog((LOG_DEBUG
, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
3214 __func__
, (uint32_t)mpt_dsn
, mpts
->mpts_rel_seq
,
3215 tot_sent
, mpts
->mpts_connid
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3217 if (head
&& (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
)) {
3218 dss_csum
= mptcp_output_csum(head
, mpt_dsn
, mpts
->mpts_rel_seq
,
3222 /* Now, let's update rel-seq and the data-level length */
3223 mpts
->mpts_rel_seq
+= tot_sent
;
3226 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
3227 m
->m_pkthdr
.mp_csum
= dss_csum
;
3229 m
->m_pkthdr
.mp_rlen
= tot_sent
;
3234 if ((mpts
->mpts_flags
& MPTSF_TFO_REQD
) &&
3235 (tp
->t_tfo_stats
== 0)) {
3236 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
3239 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
3241 DTRACE_MPTCP7(send
, struct mbuf
*, m
, struct socket
*, so
,
3242 struct sockbuf
*, &so
->so_rcv
,
3243 struct sockbuf
*, &so
->so_snd
,
3244 struct mptses
*, mpte
, struct mptsub
*, mpts
,
3250 (error
== EWOULDBLOCK
&& (tp
->t_mpflags
& TMPF_TFO_REQUEST
))) {
3251 uint64_t new_sndnxt
= mp_tp
->mpt_sndnxt
+ tot_sent
;
3253 if (mpts
->mpts_probesoon
&& mpts
->mpts_maxseg
&& tot_sent
) {
3254 tcpstat
.tcps_mp_num_probes
++;
3255 if ((uint32_t)tot_sent
< mpts
->mpts_maxseg
) {
3256 mpts
->mpts_probecnt
+= 1;
3258 mpts
->mpts_probecnt
+=
3259 tot_sent
/ mpts
->mpts_maxseg
;
3263 if (!reinjected
&& !(flags
& MPTCP_SUBOUT_PROBING
)) {
3264 if (MPTCP_DATASEQ_HIGH32(new_sndnxt
) >
3265 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
)) {
3266 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
3268 mp_tp
->mpt_sndnxt
= new_sndnxt
;
3271 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
3273 /* Must be here as mptcp_can_send_more() checks for this */
3274 soclearfastopen(mp_so
);
3276 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
3277 (mpts
->mpts_probesoon
!= 0)) {
3278 mptcplog((LOG_DEBUG
, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
3279 __func__
, mpts
->mpts_connid
,
3280 !!(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
),
3281 tot_sent
, (int) sb_cc
, mpts
->mpts_probecnt
,
3282 (tcp_now
- mpts
->mpts_probesoon
)),
3283 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3286 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
3287 mptcp_set_cellicon(mpte
, mpts
);
3289 mpte
->mpte_used_cell
= 1;
3292 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
3293 * explicitly set the cellicon, then we unset it again.
3295 if (TSTMP_LT(mpte
->mpte_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
, tcp_now
)) {
3296 mptcp_unset_cellicon(mpte
, NULL
, 1);
3299 mpte
->mpte_used_wifi
= 1;
3303 * Don't propagate EWOULDBLOCK - it's already taken care of
3304 * in mptcp_usr_send for TFO.
3308 os_log_error(mptcp_log_handle
, "%s - %lx: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
3309 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, error
, tot_sent
, so
->so_flags
, so
->so_state
, so
->so_error
, so
->so_snd
.sb_hiwat
, so
->so_snd
.sb_lowat
);
3314 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
3317 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_OUTPUT
);
3321 /* Opting to call pru_send as no mbuf at subflow level */
3322 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, NULL
, NULL
,
3323 NULL
, current_proc());
3329 mptcp_add_reinjectq(struct mptses
*mpte
, struct mbuf
*m
)
3331 struct mbuf
*n
, *prev
= NULL
;
3333 mptcplog((LOG_DEBUG
, "%s reinjecting dsn %u dlen %u rseq %u\n",
3334 __func__
, (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3335 m
->m_pkthdr
.mp_rseq
),
3336 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3338 n
= mpte
->mpte_reinjectq
;
3340 /* First, look for an mbuf n, whose data-sequence-number is bigger or
3341 * equal than m's sequence number.
3344 if (MPTCP_SEQ_GEQ(n
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_dsn
)) {
3354 /* m is already fully covered by the next mbuf in the queue */
3355 if (n
->m_pkthdr
.mp_dsn
== m
->m_pkthdr
.mp_dsn
&&
3356 n
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_rlen
) {
3357 mptcplog((LOG_DEBUG
, "%s fully covered with len %u\n",
3358 __func__
, n
->m_pkthdr
.mp_rlen
),
3359 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3363 /* m is covering the next mbuf entirely, thus we remove this guy */
3364 if (m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
>= n
->m_pkthdr
.mp_dsn
+ n
->m_pkthdr
.mp_rlen
) {
3365 struct mbuf
*tmp
= n
->m_nextpkt
;
3367 mptcplog((LOG_DEBUG
, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
3368 __func__
, m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3369 n
->m_pkthdr
.mp_dsn
, n
->m_pkthdr
.mp_rlen
),
3370 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3372 m
->m_nextpkt
= NULL
;
3374 mpte
->mpte_reinjectq
= tmp
;
3376 prev
->m_nextpkt
= tmp
;
3385 /* m is already fully covered by the previous mbuf in the queue */
3386 if (prev
->m_pkthdr
.mp_dsn
+ prev
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.len
) {
3387 mptcplog((LOG_DEBUG
, "%s prev covers us from %u with len %u\n",
3388 __func__
, prev
->m_pkthdr
.mp_dsn
, prev
->m_pkthdr
.mp_rlen
),
3389 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3395 mpte
->mpte_reinjectq
= m
;
3397 prev
->m_nextpkt
= m
;
3409 static struct mbuf
*
3410 mptcp_lookup_dsn(struct mptses
*mpte
, uint64_t dsn
)
3412 struct socket
*mp_so
= mptetoso(mpte
);
3415 m
= mp_so
->so_snd
.sb_mb
;
3418 /* If this segment covers what we are looking for, return it. */
3419 if (MPTCP_SEQ_LEQ(m
->m_pkthdr
.mp_dsn
, dsn
) &&
3420 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, dsn
)) {
3425 /* Segment is no more in the queue */
3426 if (MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
, dsn
)) {
3436 static struct mbuf
*
3437 mptcp_copy_mbuf_list(struct mptses
*mpte
, struct mbuf
*m
, int len
)
3439 struct mbuf
*top
= NULL
, *tail
= NULL
;
3441 uint32_t dlen
, rseq
;
3443 dsn
= m
->m_pkthdr
.mp_dsn
;
3444 dlen
= m
->m_pkthdr
.mp_rlen
;
3445 rseq
= m
->m_pkthdr
.mp_rseq
;
3450 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3452 n
= m_copym_mode(m
, 0, m
->m_len
, M_DONTWAIT
, M_COPYM_MUST_COPY_HDR
);
3454 os_log_error(mptcp_log_handle
, "%s - %lx: m_copym_mode returned NULL\n",
3455 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3459 VERIFY(n
->m_flags
& M_PKTHDR
);
3460 VERIFY(n
->m_next
== NULL
);
3461 VERIFY(n
->m_pkthdr
.mp_dsn
== dsn
);
3462 VERIFY(n
->m_pkthdr
.mp_rlen
== dlen
);
3463 VERIFY(n
->m_pkthdr
.mp_rseq
== rseq
);
3464 VERIFY(n
->m_len
== m
->m_len
);
3466 n
->m_pkthdr
.pkt_flags
|= (PKTF_MPSO
| PKTF_MPTCP
);
3493 mptcp_reinject_mbufs(struct socket
*so
)
3495 struct tcpcb
*tp
= sototcpcb(so
);
3496 struct mptsub
*mpts
= tp
->t_mpsub
;
3497 struct mptcb
*mp_tp
= tptomptp(tp
);
3498 struct mptses
*mpte
= mp_tp
->mpt_mpte
;;
3499 struct sockbuf
*sb
= &so
->so_snd
;
3504 struct mbuf
*n
= m
->m_next
, *orig
= m
;
3506 mptcplog((LOG_DEBUG
, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
3507 __func__
, tp
->snd_una
, m
->m_pkthdr
.mp_rseq
, mpts
->mpts_iss
,
3508 m
->m_pkthdr
.mp_rlen
, m
->m_pkthdr
.pkt_flags
),
3509 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3511 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3513 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_REINJ
) {
3517 /* Has it all already been acknowledged at the data-level? */
3518 if (MPTCP_SEQ_GEQ(mp_tp
->mpt_snduna
, m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
)) {
3522 /* Part of this has already been acknowledged - lookup in the
3523 * MPTCP-socket for the segment.
3525 if (SEQ_GT(tp
->snd_una
- mpts
->mpts_iss
, m
->m_pkthdr
.mp_rseq
)) {
3526 m
= mptcp_lookup_dsn(mpte
, m
->m_pkthdr
.mp_dsn
);
3532 /* Copy the mbuf with headers (aka, DSN-numbers) */
3533 m
= mptcp_copy_mbuf_list(mpte
, m
, m
->m_pkthdr
.mp_rlen
);
3538 VERIFY(m
->m_nextpkt
== NULL
);
3540 /* Now, add to the reinject-queue, eliminating overlapping
3543 mptcp_add_reinjectq(mpte
, m
);
3545 orig
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3548 /* mp_rlen can cover multiple mbufs, so advance to the end of it. */
3550 VERIFY((n
->m_flags
& M_PKTHDR
) && (n
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3552 if (n
->m_pkthdr
.mp_dsn
!= orig
->m_pkthdr
.mp_dsn
) {
3556 n
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3565 mptcp_clean_reinjectq(struct mptses
*mpte
)
3567 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3569 socket_lock_assert_owned(mptetoso(mpte
));
3571 while (mpte
->mpte_reinjectq
) {
3572 struct mbuf
*m
= mpte
->mpte_reinjectq
;
3574 if (MPTCP_SEQ_GEQ(m
->m_pkthdr
.mp_dsn
, mp_tp
->mpt_snduna
) ||
3575 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, mp_tp
->mpt_snduna
)) {
3579 mpte
->mpte_reinjectq
= m
->m_nextpkt
;
3580 m
->m_nextpkt
= NULL
;
3586 * Subflow socket control event upcall.
3589 mptcp_subflow_eupcall1(struct socket
*so
, void *arg
, uint32_t events
)
3592 struct mptsub
*mpts
= arg
;
3593 struct mptses
*mpte
= mpts
->mpts_mpte
;
3595 socket_lock_assert_owned(mptetoso(mpte
));
3597 if ((mpts
->mpts_evctl
& events
) == events
) {
3601 mpts
->mpts_evctl
|= events
;
3603 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
3604 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WORKLOOP
;
3608 mptcp_subflow_workloop(mpte
);
3612 * Subflow socket control events.
3614 * Called for handling events related to the underlying subflow socket.
3617 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
,
3618 uint64_t *p_mpsofilt_hint
)
3620 ev_ret_t ret
= MPTS_EVRET_OK
;
3621 int i
, mpsub_ev_entry_count
= sizeof(mpsub_ev_entry_tbl
) /
3622 sizeof(mpsub_ev_entry_tbl
[0]);
3624 /* bail if there's nothing to process */
3625 if (!mpts
->mpts_evctl
) {
3629 if (mpts
->mpts_evctl
& (SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_MUSTRST
|
3630 SO_FILT_HINT_CANTSENDMORE
| SO_FILT_HINT_TIMEOUT
|
3631 SO_FILT_HINT_NOSRCADDR
| SO_FILT_HINT_IFDENIED
|
3632 SO_FILT_HINT_DISCONNECTED
)) {
3633 mpts
->mpts_evctl
|= SO_FILT_HINT_MPFAILOVER
;
3636 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
3637 struct mptsub
*, mpts
, uint32_t, mpts
->mpts_evctl
);
3639 mptcplog((LOG_DEBUG
, "%s cid %d events=%b\n", __func__
,
3640 mpts
->mpts_connid
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3641 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3644 * Process all the socket filter hints and reset the hint
3645 * once it is handled
3647 for (i
= 0; i
< mpsub_ev_entry_count
&& mpts
->mpts_evctl
; i
++) {
3649 * Always execute the DISCONNECTED event, because it will wakeup
3652 if ((mpts
->mpts_evctl
& mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
) &&
3653 (ret
>= MPTS_EVRET_OK
||
3654 mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
== SO_FILT_HINT_DISCONNECTED
)) {
3655 mpts
->mpts_evctl
&= ~mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
;
3657 mpsub_ev_entry_tbl
[i
].sofilt_hint_ev_hdlr(mpte
, mpts
, p_mpsofilt_hint
, mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
);
3658 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
3663 * We should be getting only events specified via sock_catchevents(),
3664 * so loudly complain if we have any unprocessed one(s).
3666 if (mpts
->mpts_evctl
|| ret
< MPTS_EVRET_OK
) {
3667 mptcplog((LOG_WARNING
, "%s%s: cid %d evret %d unhandled events=%b\n", __func__
,
3668 (mpts
->mpts_evctl
&& ret
== MPTS_EVRET_OK
) ? "MPTCP_ERROR " : "",
3670 ret
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3671 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3673 mptcplog((LOG_DEBUG
, "%s: Done, events %b\n", __func__
,
3674 mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3675 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3682 mptcp_subflow_propagate_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3683 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3685 struct socket
*mp_so
, *so
;
3686 struct mptcb
*mp_tp
;
3688 mp_so
= mptetoso(mpte
);
3689 mp_tp
= mpte
->mpte_mptcb
;
3690 so
= mpts
->mpts_socket
;
3692 mptcplog((LOG_DEBUG
, "%s: cid %d event %d\n", __func__
,
3693 mpts
->mpts_connid
, event
),
3694 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3697 * We got an event for this subflow that might need to be propagated,
3698 * based on the state of the MPTCP connection.
3700 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3701 (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
) && !(mpts
->mpts_flags
& MPTSF_MP_READY
)) ||
3702 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
3703 mp_so
->so_error
= so
->so_error
;
3704 *p_mpsofilt_hint
|= event
;
3707 return MPTS_EVRET_OK
;
3711 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
3714 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3715 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3717 #pragma unused(p_mpsofilt_hint, event)
3718 struct socket
*mp_so
;
3721 mp_so
= mptetoso(mpte
);
3722 tp
= intotcpcb(sotoinpcb(mpts
->mpts_socket
));
3725 * This overwrites any previous mpte_lost_aid to avoid storing
3726 * too much state when the typical case has only two subflows.
3728 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
3729 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
3731 mptcplog((LOG_DEBUG
, "%s cid %d\n", __func__
, mpts
->mpts_connid
),
3732 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3735 * The subflow connection has lost its source address.
3737 mptcp_subflow_abort(mpts
, EADDRNOTAVAIL
);
3739 if (mp_so
->so_flags
& SOF_NOADDRAVAIL
) {
3740 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3743 return MPTS_EVRET_DELETE
;
3747 mptcp_subflow_mpsuberror_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3748 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3750 #pragma unused(event, p_mpsofilt_hint)
3751 struct socket
*so
, *mp_so
;
3753 so
= mpts
->mpts_socket
;
3755 if (so
->so_error
!= ENODATA
) {
3756 return MPTS_EVRET_OK
;
3760 mp_so
= mptetoso(mpte
);
3762 mp_so
->so_error
= ENODATA
;
3767 return MPTS_EVRET_OK
;
3772 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
3773 * indicates that the remote side sent a Data FIN
3776 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3777 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3779 #pragma unused(event)
3780 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3782 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
3783 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3786 * We got a Data FIN for the MPTCP connection.
3787 * The FIN may arrive with data. The data is handed up to the
3788 * mptcp socket and the user is notified so that it may close
3789 * the socket if needed.
3791 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
) {
3792 *p_mpsofilt_hint
|= SO_FILT_HINT_CANTRCVMORE
;
3795 return MPTS_EVRET_OK
; /* keep the subflow socket around */
3799 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
3802 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3803 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3805 #pragma unused(event, p_mpsofilt_hint)
3806 struct mptsub
*mpts_alt
= NULL
;
3807 struct socket
*alt_so
= NULL
;
3808 struct socket
*mp_so
;
3809 int altpath_exists
= 0;
3811 mp_so
= mptetoso(mpte
);
3812 os_log_info(mptcp_log_handle
, "%s - %lx\n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3814 mptcp_reinject_mbufs(mpts
->mpts_socket
);
3816 mpts_alt
= mptcp_get_subflow(mpte
, NULL
);
3818 /* If there is no alternate eligible subflow, ignore the failover hint. */
3819 if (mpts_alt
== NULL
|| mpts_alt
== mpts
) {
3820 os_log(mptcp_log_handle
, "%s - %lx no alternate path\n", __func__
,
3821 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3827 alt_so
= mpts_alt
->mpts_socket
;
3828 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
3829 /* All data acknowledged and no RTT spike */
3830 if (alt_so
->so_snd
.sb_cc
== 0 && mptcp_no_rto_spike(alt_so
)) {
3831 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
3833 /* no alternate path available */
3838 if (altpath_exists
) {
3839 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
3841 mpte
->mpte_active_sub
= mpts_alt
;
3842 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
3843 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
3845 os_log_info(mptcp_log_handle
, "%s - %lx: switched from %d to %d\n",
3846 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, mpts_alt
->mpts_connid
);
3848 mptcpstats_inc_switch(mpte
, mpts
);
3852 mptcplog((LOG_DEBUG
, "%s: no alt cid = %d\n", __func__
,
3854 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3856 mpts
->mpts_socket
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
3859 return MPTS_EVRET_OK
;
3863 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
3866 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3867 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3869 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
,
3870 mpts
->mpts_connid
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3873 * The subflow connection cannot use the outgoing interface, let's
3874 * close this subflow.
3876 mptcp_subflow_abort(mpts
, EPERM
);
3878 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3880 return MPTS_EVRET_DELETE
;
3884 * https://tools.ietf.org/html/rfc6052#section-2
3885 * https://tools.ietf.org/html/rfc6147#section-5.2
3888 mptcp_desynthesize_ipv6_addr(const struct in6_addr
*addr
,
3889 const struct ipv6_prefix
*prefix
,
3890 struct in_addr
*addrv4
)
3892 char buf
[MAX_IPv4_STR_LEN
];
3893 char *ptrv4
= (char *)addrv4
;
3894 const char *ptr
= (const char *)addr
;
3896 if (memcmp(addr
, &prefix
->ipv6_prefix
, prefix
->prefix_len
) != 0) {
3900 switch (prefix
->prefix_len
) {
3901 case NAT64_PREFIX_LEN_96
:
3902 memcpy(ptrv4
, ptr
+ 12, 4);
3904 case NAT64_PREFIX_LEN_64
:
3905 memcpy(ptrv4
, ptr
+ 9, 4);
3907 case NAT64_PREFIX_LEN_56
:
3908 memcpy(ptrv4
, ptr
+ 7, 1);
3909 memcpy(ptrv4
+ 1, ptr
+ 9, 3);
3911 case NAT64_PREFIX_LEN_48
:
3912 memcpy(ptrv4
, ptr
+ 6, 2);
3913 memcpy(ptrv4
+ 2, ptr
+ 9, 2);
3915 case NAT64_PREFIX_LEN_40
:
3916 memcpy(ptrv4
, ptr
+ 5, 3);
3917 memcpy(ptrv4
+ 3, ptr
+ 9, 1);
3919 case NAT64_PREFIX_LEN_32
:
3920 memcpy(ptrv4
, ptr
+ 4, 4);
3923 panic("NAT64-prefix len is wrong: %u\n",
3924 prefix
->prefix_len
);
3927 os_log_info(mptcp_log_handle
, "%s desynthesized to %s\n", __func__
,
3928 inet_ntop(AF_INET
, (void *)addrv4
, buf
, sizeof(buf
)));
3934 mptcp_handle_ipv6_connection(struct mptses
*mpte
, const struct mptsub
*mpts
)
3936 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
3937 struct socket
*so
= mpts
->mpts_socket
;
3941 /* Subflow IPs will be steered directly by the server - no need to
3944 if (mpte
->mpte_flags
& MPTE_UNICAST_IP
) {
3948 ifp
= sotoinpcb(so
)->inp_last_outifp
;
3950 if (ifnet_get_nat64prefix(ifp
, nat64prefixes
) == ENOENT
) {
3951 mptcp_ask_for_nat64(ifp
);
3956 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
3959 if (nat64prefixes
[j
].prefix_len
== 0) {
3963 success
= mptcp_desynthesize_ipv6_addr(&mpte
->__mpte_dst_v6
.sin6_addr
,
3965 &mpte
->mpte_dst_v4_nat64
.sin_addr
);
3967 mpte
->mpte_dst_v4_nat64
.sin_len
= sizeof(mpte
->mpte_dst_v4_nat64
);
3968 mpte
->mpte_dst_v4_nat64
.sin_family
= AF_INET
;
3969 mpte
->mpte_dst_v4_nat64
.sin_port
= mpte
->__mpte_dst_v6
.sin6_port
;
3976 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
3979 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3980 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3982 #pragma unused(event, p_mpsofilt_hint)
3983 struct socket
*mp_so
, *so
;
3986 struct mptcb
*mp_tp
;
3988 boolean_t mpok
= FALSE
;
3990 mp_so
= mptetoso(mpte
);
3991 mp_tp
= mpte
->mpte_mptcb
;
3992 so
= mpts
->mpts_socket
;
3994 af
= mpts
->mpts_dst
.sa_family
;
3996 if (mpts
->mpts_flags
& MPTSF_CONNECTED
) {
3997 return MPTS_EVRET_OK
;
4000 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
4001 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
4002 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
4003 (so
->so_state
& SS_ISCONNECTED
)) {
4004 mptcplog((LOG_DEBUG
, "%s: cid %d disconnect before tcp connect\n",
4005 __func__
, mpts
->mpts_connid
),
4006 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4007 (void) soshutdownlock(so
, SHUT_RD
);
4008 (void) soshutdownlock(so
, SHUT_WR
);
4009 (void) sodisconnectlocked(so
);
4011 return MPTS_EVRET_OK
;
4015 * The subflow connection has been connected. Find out whether it
4016 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
4018 * a. If MPTCP connection is not yet established, then this must be
4019 * the first subflow connection. If MPTCP failed to negotiate,
4020 * fallback to regular TCP by degrading this subflow.
4022 * b. If MPTCP connection has been established, then this must be
4023 * one of the subsequent subflow connections. If MPTCP failed
4024 * to negotiate, disconnect the connection.
4026 * Right now, we simply unblock any waiters at the MPTCP socket layer
4027 * if the MPTCP connection has not been established.
4030 if (so
->so_state
& SS_ISDISCONNECTED
) {
4032 * With MPTCP joins, a connection is connected at the subflow
4033 * level, but the 4th ACK from the server elevates the MPTCP
4034 * subflow to connected state. So there is a small window
4035 * where the subflow could get disconnected before the
4036 * connected event is processed.
4038 return MPTS_EVRET_OK
;
4041 if (mpts
->mpts_flags
& MPTSF_TFO_REQD
) {
4042 mptcp_drop_tfo_data(mpte
, mpts
);
4045 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_TFO_REQD
);
4046 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
4048 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
4049 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
4052 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
4054 /* get/verify the outbound interface */
4055 inp
= sotoinpcb(so
);
4057 mpts
->mpts_maxseg
= tp
->t_maxseg
;
4059 mptcplog((LOG_DEBUG
, "%s: cid %d outif %s is %s\n", __func__
, mpts
->mpts_connid
,
4060 ((inp
->inp_last_outifp
!= NULL
) ? inp
->inp_last_outifp
->if_xname
: "NULL"),
4061 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ? "MPTCP capable" : "a regular TCP")),
4062 (MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
), MPTCP_LOGLVL_LOG
);
4064 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
4066 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
4067 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
4068 mpte
->mpte_associd
= mpts
->mpts_connid
;
4069 DTRACE_MPTCP2(state__change
,
4070 struct mptcb
*, mp_tp
,
4071 uint32_t, 0 /* event */);
4073 if (SOCK_DOM(so
) == AF_INET
) {
4074 in_getsockaddr_s(so
, &mpte
->__mpte_src_v4
);
4076 in6_getsockaddr_s(so
, &mpte
->__mpte_src_v6
);
4079 mpts
->mpts_flags
|= MPTSF_ACTIVE
;
4081 /* case (a) above */
4083 tcpstat
.tcps_mpcap_fallback
++;
4085 tp
->t_mpflags
|= TMPF_INFIN_SENT
;
4086 mptcp_notify_mpfail(so
);
4088 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
4089 mpte
->mpte_svctype
< MPTCP_SVCTYPE_AGGREGATE
) {
4090 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
4092 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
4094 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
4095 mpte
->mpte_nummpcapflows
++;
4097 if (SOCK_DOM(so
) == AF_INET6
) {
4098 mptcp_handle_ipv6_connection(mpte
, mpts
);
4101 mptcp_check_subflows_and_add(mpte
);
4103 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
)) {
4104 mpte
->mpte_initial_cell
= 1;
4107 mpte
->mpte_handshake_success
= 1;
4110 mp_tp
->mpt_sndwnd
= tp
->snd_wnd
;
4111 mp_tp
->mpt_sndwl1
= mp_tp
->mpt_rcvnxt
;
4112 mp_tp
->mpt_sndwl2
= mp_tp
->mpt_snduna
;
4113 soisconnected(mp_so
);
4117 * In case of additional flows, the MPTCP socket is not
4118 * MPTSF_MP_CAPABLE until an ACK is received from server
4119 * for 3-way handshake. TCP would have guaranteed that this
4120 * is an MPTCP subflow.
4122 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
4123 !(tp
->t_mpflags
& TMPF_BACKUP_PATH
) &&
4124 mpte
->mpte_svctype
< MPTCP_SVCTYPE_AGGREGATE
) {
4125 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
4126 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
4128 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
4131 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
4132 mpte
->mpte_nummpcapflows
++;
4134 mpts
->mpts_rel_seq
= 1;
4136 mptcp_check_subflows_and_remove(mpte
);
4140 /* Should we try the alternate port? */
4141 if (mpte
->mpte_alternate_port
&&
4142 inp
->inp_fport
!= mpte
->mpte_alternate_port
) {
4143 union sockaddr_in_4_6 dst
;
4144 struct sockaddr_in
*dst_in
= (struct sockaddr_in
*)&dst
;
4146 memcpy(&dst
, &mpts
->mpts_dst
, mpts
->mpts_dst
.sa_len
);
4148 dst_in
->sin_port
= mpte
->mpte_alternate_port
;
4150 mptcp_subflow_add(mpte
, NULL
, (struct sockaddr
*)&dst
,
4151 mpts
->mpts_ifscope
, NULL
);
4152 } else { /* Else, we tried all we could, mark this interface as non-MPTCP */
4153 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
4154 struct mpt_itf_info
*info
= &mpte
->mpte_itfinfo
[i
];
4156 if (inp
->inp_last_outifp
->if_index
== info
->ifindex
) {
4157 info
->no_mptcp_support
= 1;
4163 tcpstat
.tcps_join_fallback
++;
4164 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
)) {
4165 tcpstat
.tcps_mptcp_cell_proxy
++;
4167 tcpstat
.tcps_mptcp_wifi_proxy
++;
4170 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
4172 return MPTS_EVRET_OK
;
4175 /* This call, just to "book" an entry in the stats-table for this ifindex */
4176 mptcpstats_get_index(mpte
->mpte_itfstats
, mpts
);
4180 return MPTS_EVRET_OK
; /* keep the subflow socket around */
4184 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
4187 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4188 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4190 #pragma unused(event, p_mpsofilt_hint)
4191 struct socket
*mp_so
, *so
;
4192 struct mptcb
*mp_tp
;
4194 mp_so
= mptetoso(mpte
);
4195 mp_tp
= mpte
->mpte_mptcb
;
4196 so
= mpts
->mpts_socket
;
4198 mptcplog((LOG_DEBUG
, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
4199 __func__
, mpts
->mpts_connid
, so
->so_error
, mp_tp
->mpt_state
,
4200 !!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
),
4201 !!(mpts
->mpts_flags
& MPTSF_ACTIVE
), sototcpcb(so
)->t_mpflags
),
4202 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4204 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
) {
4205 return MPTS_EVRET_DELETE
;
4208 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
4210 /* The subflow connection has been disconnected. */
4212 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
4213 mpte
->mpte_nummpcapflows
--;
4214 if (mpte
->mpte_active_sub
== mpts
) {
4215 mpte
->mpte_active_sub
= NULL
;
4216 mptcplog((LOG_DEBUG
, "%s: resetting active subflow \n",
4217 __func__
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4219 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
4222 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
4223 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
4224 mptcp_drop(mpte
, mp_tp
, so
->so_error
);
4228 * Clear flags that are used by getconninfo to return state.
4229 * Retain like MPTSF_DELETEOK for internal purposes.
4231 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_CONNECT_PENDING
|
4232 MPTSF_CONNECTED
| MPTSF_DISCONNECTING
| MPTSF_PREFERRED
|
4233 MPTSF_MP_CAPABLE
| MPTSF_MP_READY
| MPTSF_MP_DEGRADED
| MPTSF_ACTIVE
);
4235 return MPTS_EVRET_DELETE
;
4239 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
4242 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4243 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4245 #pragma unused(event, p_mpsofilt_hint)
4246 ev_ret_t ret
= MPTS_EVRET_OK
;
4247 struct socket
*mp_so
, *so
;
4248 struct mptcb
*mp_tp
;
4250 mp_so
= mptetoso(mpte
);
4251 mp_tp
= mpte
->mpte_mptcb
;
4252 so
= mpts
->mpts_socket
;
4254 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
) {
4255 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
4257 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
4260 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
4261 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4264 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4266 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
4269 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
) {
4270 mpts
->mpts_flags
|= MPTSF_MP_READY
;
4272 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
4275 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4276 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
4277 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
4280 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
4281 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
4283 m_freem_list(mpte
->mpte_reinjectq
);
4284 mpte
->mpte_reinjectq
= NULL
;
4285 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
4286 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
4287 ret
= MPTS_EVRET_CONNECT_PENDING
;
4295 * Handle SO_FILT_HINT_MUSTRST subflow socket event
4298 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4299 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4301 #pragma unused(event)
4302 struct socket
*mp_so
, *so
;
4303 struct mptcb
*mp_tp
;
4304 boolean_t is_fastclose
;
4306 mp_so
= mptetoso(mpte
);
4307 mp_tp
= mpte
->mpte_mptcb
;
4308 so
= mpts
->mpts_socket
;
4310 /* We got an invalid option or a fast close */
4311 struct tcptemp
*t_template
;
4312 struct inpcb
*inp
= sotoinpcb(so
);
4313 struct tcpcb
*tp
= NULL
;
4315 tp
= intotcpcb(inp
);
4316 so
->so_error
= ECONNABORTED
;
4318 is_fastclose
= !!(tp
->t_mpflags
& TMPF_FASTCLOSERCV
);
4320 tp
->t_mpflags
|= TMPF_RESET
;
4322 t_template
= tcp_maketemplate(tp
);
4324 struct tcp_respond_args tra
;
4326 bzero(&tra
, sizeof(tra
));
4327 if (inp
->inp_flags
& INP_BOUND_IF
) {
4328 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
4330 tra
.ifscope
= IFSCOPE_NONE
;
4332 tra
.awdl_unrestricted
= 1;
4334 tcp_respond(tp
, t_template
->tt_ipgen
,
4335 &t_template
->tt_t
, (struct mbuf
*)NULL
,
4336 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
4337 (void) m_free(dtom(t_template
));
4340 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && is_fastclose
) {
4341 struct mptsub
*iter
, *tmp
;
4343 *p_mpsofilt_hint
|= SO_FILT_HINT_CONNRESET
;
4345 mp_so
->so_error
= ECONNRESET
;
4347 TAILQ_FOREACH_SAFE(iter
, &mpte
->mpte_subflows
, mpts_entry
, tmp
) {
4351 mptcp_subflow_abort(iter
, ECONNABORTED
);
4355 * mptcp_drop is being called after processing the events, to fully
4356 * close the MPTCP connection
4358 mptcp_drop(mpte
, mp_tp
, mp_so
->so_error
);
4361 mptcp_subflow_abort(mpts
, ECONNABORTED
);
4364 if (mp_tp
->mpt_gc_ticks
== MPT_GC_TICKS
) {
4365 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS_FAST
;
4368 return MPTS_EVRET_DELETE
;
4372 mptcp_subflow_adaptive_rtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4373 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4375 #pragma unused(event)
4376 bool found_active
= false;
4378 mpts
->mpts_flags
|= MPTSF_READ_STALL
;
4380 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4381 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4383 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4384 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
4388 if (!(mpts
->mpts_flags
& MPTSF_READ_STALL
)) {
4389 found_active
= true;
4394 if (!found_active
) {
4395 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_RTIMO
;
4398 return MPTS_EVRET_OK
;
4402 mptcp_subflow_adaptive_wtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4403 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4405 #pragma unused(event)
4406 bool found_active
= false;
4408 mpts
->mpts_flags
|= MPTSF_WRITE_STALL
;
4410 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4411 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4413 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4414 tp
->t_state
> TCPS_CLOSE_WAIT
) {
4418 if (!(mpts
->mpts_flags
& MPTSF_WRITE_STALL
)) {
4419 found_active
= true;
4424 if (!found_active
) {
4425 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_WTIMO
;
4428 return MPTS_EVRET_OK
;
4432 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
4433 * caller must ensure that the option can be issued on subflow sockets, via
4434 * MPOF_SUBFLOW_OK flag.
4437 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct mptsub
*mpts
, struct mptopt
*mpo
)
4439 struct socket
*mp_so
, *so
;
4440 struct sockopt sopt
;
4443 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
4445 mp_so
= mptetoso(mpte
);
4446 so
= mpts
->mpts_socket
;
4448 socket_lock_assert_owned(mp_so
);
4450 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
&&
4451 mpo
->mpo_level
== SOL_SOCKET
&&
4452 mpo
->mpo_name
== SO_MARK_CELLFALLBACK
) {
4453 struct ifnet
*ifp
= ifindex2ifnet
[mpts
->mpts_ifscope
];
4455 mptcplog((LOG_DEBUG
, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %d lastcell? %d boundcell? %d\n",
4456 __func__
, mpte
->mpte_flags
, mpte
->mpte_svctype
, mptcp_is_wifi_unusable_for_session(mpte
),
4457 sotoinpcb(so
)->inp_last_outifp
? IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
) : -1,
4458 mpts
->mpts_ifscope
!= IFSCOPE_NONE
&& ifp
? IFNET_IS_CELLULAR(ifp
) : -1),
4459 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4462 * When we open a new subflow, mark it as cell fallback, if
4463 * this subflow goes over cell.
4465 * (except for first-party apps)
4468 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
4472 if (sotoinpcb(so
)->inp_last_outifp
&&
4473 !IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
4478 * This here is an OR, because if the app is not binding to the
4479 * interface, then it definitely is not a cell-fallback
4482 if (mpts
->mpts_ifscope
== IFSCOPE_NONE
|| ifp
== NULL
||
4483 !IFNET_IS_CELLULAR(ifp
)) {
4488 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
4490 bzero(&sopt
, sizeof(sopt
));
4491 sopt
.sopt_dir
= SOPT_SET
;
4492 sopt
.sopt_level
= mpo
->mpo_level
;
4493 sopt
.sopt_name
= mpo
->mpo_name
;
4494 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
4495 sopt
.sopt_valsize
= sizeof(int);
4496 sopt
.sopt_p
= kernproc
;
4498 error
= sosetoptlock(so
, &sopt
, 0);
4500 os_log_error(mptcp_log_handle
, "%s - %lx: sopt %s "
4501 "val %d set error %d\n", __func__
,
4502 (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4503 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
4504 mpo
->mpo_intval
, error
);
4510 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
4511 * caller must ensure that the option can be issued on subflow sockets, via
4512 * MPOF_SUBFLOW_OK flag.
4515 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
4518 struct socket
*mp_so
;
4519 struct sockopt sopt
;
4522 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
4523 mp_so
= mptetoso(mpte
);
4525 socket_lock_assert_owned(mp_so
);
4527 bzero(&sopt
, sizeof(sopt
));
4528 sopt
.sopt_dir
= SOPT_GET
;
4529 sopt
.sopt_level
= mpo
->mpo_level
;
4530 sopt
.sopt_name
= mpo
->mpo_name
;
4531 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
4532 sopt
.sopt_valsize
= sizeof(int);
4533 sopt
.sopt_p
= kernproc
;
4535 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
4537 os_log_error(mptcp_log_handle
,
4538 "%s - %lx: sopt %s get error %d\n",
4539 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4540 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
), error
);
4547 * MPTCP garbage collector.
4549 * This routine is called by the MP domain on-demand, periodic callout,
4550 * which is triggered when a MPTCP socket is closed. The callout will
4551 * repeat as long as this routine returns a non-zero value.
4554 mptcp_gc(struct mppcbinfo
*mppi
)
4556 struct mppcb
*mpp
, *tmpp
;
4557 uint32_t active
= 0;
4559 LCK_MTX_ASSERT(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
4561 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
4562 struct socket
*mp_so
;
4563 struct mptses
*mpte
;
4564 struct mptcb
*mp_tp
;
4566 mp_so
= mpp
->mpp_socket
;
4567 mpte
= mptompte(mpp
);
4568 mp_tp
= mpte
->mpte_mptcb
;
4570 if (!mpp_try_lock(mpp
)) {
4575 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
4577 /* check again under the lock */
4578 if (mp_so
->so_usecount
> 0) {
4579 boolean_t wakeup
= FALSE
;
4580 struct mptsub
*mpts
, *tmpts
;
4582 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
4583 if (mp_tp
->mpt_gc_ticks
> 0) {
4584 mp_tp
->mpt_gc_ticks
--;
4586 if (mp_tp
->mpt_gc_ticks
== 0) {
4591 TAILQ_FOREACH_SAFE(mpts
,
4592 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4593 mptcp_subflow_eupcall1(mpts
->mpts_socket
,
4594 mpts
, SO_FILT_HINT_DISCONNECTED
);
4597 socket_unlock(mp_so
, 0);
4602 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
4603 panic("%s - %lx: skipped state "
4604 "[u=%d,r=%d,s=%d]\n", __func__
,
4605 (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4606 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
4610 if (mp_tp
->mpt_state
== MPTCPS_TIME_WAIT
) {
4611 mptcp_close(mpte
, mp_tp
);
4614 mptcp_session_destroy(mpte
);
4616 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
4617 struct sockbuf
*, &mp_so
->so_rcv
,
4618 struct sockbuf
*, &mp_so
->so_snd
,
4619 struct mppcb
*, mpp
);
4629 * Drop a MPTCP connection, reporting the specified error.
4632 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, int errno
)
4634 struct socket
*mp_so
= mptetoso(mpte
);
4636 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4638 socket_lock_assert_owned(mp_so
);
4640 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
4641 uint32_t, 0 /* event */);
4643 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0) {
4644 errno
= mp_tp
->mpt_softerror
;
4646 mp_so
->so_error
= errno
;
4648 return mptcp_close(mpte
, mp_tp
);
4652 * Close a MPTCP control block.
4655 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
4657 struct mptsub
*mpts
= NULL
, *tmpts
= NULL
;
4658 struct socket
*mp_so
= mptetoso(mpte
);
4660 socket_lock_assert_owned(mp_so
);
4661 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4663 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
4667 soisdisconnected(mp_so
);
4669 /* Clean up all subflows */
4670 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4671 mptcp_subflow_disconnect(mpte
, mpts
);
4678 mptcp_notify_close(struct socket
*so
)
4680 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
4687 mptcp_subflow_workloop(struct mptses
*mpte
)
4689 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
4690 uint64_t mpsofilt_hint_mask
= SO_FILT_HINT_LOCKED
;
4691 struct mptsub
*mpts
, *tmpts
;
4692 struct socket
*mp_so
;
4694 mp_so
= mptetoso(mpte
);
4696 socket_lock_assert_owned(mp_so
);
4698 if (mpte
->mpte_flags
& MPTE_IN_WORKLOOP
) {
4699 mpte
->mpte_flags
|= MPTE_WORKLOOP_RELAUNCH
;
4702 mpte
->mpte_flags
|= MPTE_IN_WORKLOOP
;
4705 mpte
->mpte_flags
&= ~MPTE_WORKLOOP_RELAUNCH
;
4707 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4710 if (mpts
->mpts_socket
->so_usecount
== 0) {
4711 /* Will be removed soon by tcp_garbage_collect */
4715 mptcp_subflow_addref(mpts
);
4716 mpts
->mpts_socket
->so_usecount
++;
4718 ret
= mptcp_subflow_events(mpte
, mpts
, &mpsofilt_hint_mask
);
4721 * If MPTCP socket is closed, disconnect all subflows.
4722 * This will generate a disconnect event which will
4723 * be handled during the next iteration, causing a
4724 * non-zero error to be returned above.
4726 if (mp_so
->so_flags
& SOF_PCBCLEARING
) {
4727 mptcp_subflow_disconnect(mpte
, mpts
);
4734 case MPTS_EVRET_DELETE
:
4735 mptcp_subflow_soclose(mpts
);
4737 case MPTS_EVRET_CONNECT_PENDING
:
4738 connect_pending
= TRUE
;
4740 case MPTS_EVRET_DISCONNECT_FALLBACK
:
4741 disconnect_fallback
= TRUE
;
4744 mptcplog((LOG_DEBUG
,
4745 "MPTCP Socket: %s: mptcp_subflow_events "
4746 "returned invalid value: %d\n", __func__
,
4748 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4751 mptcp_subflow_remref(mpts
); /* ours */
4753 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
4754 mpts
->mpts_socket
->so_usecount
--;
4757 if (mpsofilt_hint_mask
!= SO_FILT_HINT_LOCKED
) {
4758 VERIFY(mpsofilt_hint_mask
& SO_FILT_HINT_LOCKED
);
4760 if (mpsofilt_hint_mask
& SO_FILT_HINT_CANTRCVMORE
) {
4761 mp_so
->so_state
|= SS_CANTRCVMORE
;
4765 soevent(mp_so
, mpsofilt_hint_mask
);
4768 if (!connect_pending
&& !disconnect_fallback
) {
4772 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4773 if (disconnect_fallback
) {
4774 struct socket
*so
= NULL
;
4775 struct inpcb
*inp
= NULL
;
4776 struct tcpcb
*tp
= NULL
;
4778 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4782 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4784 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
4785 MPTSF_DISCONNECTED
| MPTSF_CONNECT_PENDING
)) {
4789 so
= mpts
->mpts_socket
;
4792 * The MPTCP connection has degraded to a fallback
4793 * mode, so there is no point in keeping this subflow
4794 * regardless of its MPTCP-readiness state, unless it
4795 * is the primary one which we use for fallback. This
4796 * assumes that the subflow used for fallback is the
4800 inp
= sotoinpcb(so
);
4801 tp
= intotcpcb(inp
);
4803 ~(TMPF_MPTCP_READY
| TMPF_MPTCP_TRUE
);
4804 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
4806 soevent(so
, SO_FILT_HINT_MUSTRST
);
4807 } else if (connect_pending
) {
4809 * The MPTCP connection has progressed to a state
4810 * where it supports full multipath semantics; allow
4811 * additional joins to be attempted for all subflows
4812 * that are in the PENDING state.
4814 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
4815 int error
= mptcp_subflow_soconnectx(mpte
, mpts
);
4818 mptcp_subflow_abort(mpts
, error
);
4825 if (mpte
->mpte_flags
& MPTE_WORKLOOP_RELAUNCH
) {
4829 mpte
->mpte_flags
&= ~MPTE_IN_WORKLOOP
;
4833 * Protocol pr_lock callback.
4836 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
4838 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4842 lr_saved
= __builtin_return_address(0);
4848 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
4849 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
4854 if (mp_so
->so_usecount
< 0) {
4855 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
4856 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
4857 solockhistory_nr(mp_so
));
4860 if (refcount
!= 0) {
4861 mp_so
->so_usecount
++;
4864 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
4865 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
4871 * Protocol pr_unlock callback.
4874 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
4876 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4880 lr_saved
= __builtin_return_address(0);
4886 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
4887 mp_so
, mp_so
->so_usecount
, lr_saved
,
4888 solockhistory_nr(mp_so
));
4891 socket_lock_assert_owned(mp_so
);
4893 if (refcount
!= 0) {
4894 mp_so
->so_usecount
--;
4898 if (mp_so
->so_usecount
< 0) {
4899 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4900 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4903 if (mpp
->mpp_inside
< 0) {
4904 panic("%s: mpp=%p inside=%x lrh= %s\n", __func__
,
4905 mpp
, mpp
->mpp_inside
, solockhistory_nr(mp_so
));
4908 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
4909 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
4916 * Protocol pr_getlock callback.
4919 mptcp_getlock(struct socket
*mp_so
, int flags
)
4921 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4924 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
4925 solockhistory_nr(mp_so
));
4928 if (mp_so
->so_usecount
< 0) {
4929 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4930 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4933 return mpp_getlock(mpp
, flags
);
4937 * MPTCP Join support
4941 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
, uint8_t addr_id
)
4943 struct tcpcb
*tp
= sototcpcb(so
);
4944 struct mptcp_subf_auth_entry
*sauth_entry
;
4947 * The address ID of the first flow is implicitly 0.
4949 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
4950 tp
->t_local_aid
= 0;
4952 tp
->t_local_aid
= addr_id
;
4953 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
4954 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
4956 sauth_entry
= zalloc(mpt_subauth_zone
);
4957 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
4958 sauth_entry
->msae_raddr_id
= 0;
4959 sauth_entry
->msae_raddr_rand
= 0;
4961 sauth_entry
->msae_laddr_rand
= RandomULong();
4962 if (sauth_entry
->msae_laddr_rand
== 0) {
4965 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
4969 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
4971 struct mptcp_subf_auth_entry
*sauth_entry
;
4972 struct tcpcb
*tp
= NULL
;
4980 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4981 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
4987 LIST_REMOVE(sauth_entry
, msae_next
);
4991 zfree(mpt_subauth_zone
, sauth_entry
);
4996 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
4999 struct mptcp_subf_auth_entry
*sauth_entry
;
5001 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
5002 if (sauth_entry
->msae_laddr_id
== addr_id
) {
5004 *lrand
= sauth_entry
->msae_laddr_rand
;
5007 *rrand
= sauth_entry
->msae_raddr_rand
;
5015 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
5016 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
5018 struct mptcp_subf_auth_entry
*sauth_entry
;
5020 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
5021 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
5022 if ((sauth_entry
->msae_raddr_id
!= 0) &&
5023 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
5024 os_log_error(mptcp_log_handle
, "%s - %lx: mismatched"
5025 " address ids %d %d \n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
),
5026 raddr_id
, sauth_entry
->msae_raddr_id
);
5029 sauth_entry
->msae_raddr_id
= raddr_id
;
5030 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
5031 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
5032 os_log_error(mptcp_log_handle
, "%s - %lx: "
5033 "dup SYN_ACK %d %d \n",
5034 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
),
5035 raddr_rand
, sauth_entry
->msae_raddr_rand
);
5038 sauth_entry
->msae_raddr_rand
= raddr_rand
;
5045 * SHA1 support for MPTCP
5048 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
)
5051 const unsigned char *sha1_base
;
5054 sha1_base
= (const unsigned char *) key
;
5055 sha1_size
= sizeof(mptcp_key_t
);
5056 SHA1Init(&sha1ctxt
);
5057 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
5058 SHA1Final(sha_digest
, &sha1ctxt
);
5062 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
5063 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
)
5066 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
5067 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
5071 bzero(digest
, SHA1_RESULTLEN
);
5073 /* Set up the Key for HMAC */
5080 /* Set up the message for HMAC */
5084 /* Key is 512 block length, so no need to compute hash */
5086 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
5088 for (i
= 0; i
< 8; i
++) {
5089 key_ipad
[i
] ^= 0x3636363636363636;
5090 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
5093 /* Perform inner SHA1 */
5094 SHA1Init(&sha1ctxt
);
5095 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof(key_ipad
));
5096 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof(data
));
5097 SHA1Final(digest
, &sha1ctxt
);
5099 /* Perform outer SHA1 */
5100 SHA1Init(&sha1ctxt
);
5101 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof(key_opad
));
5102 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
5103 SHA1Final(digest
, &sha1ctxt
);
5107 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
5108 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
5111 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
)
5113 uint32_t lrand
, rrand
;
5116 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
5117 mptcp_hmac_sha1(mp_tp
->mpt_localkey
, mp_tp
->mpt_remotekey
, lrand
, rrand
,
5122 * Authentication data generation
5125 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
5128 VERIFY(token_len
== sizeof(u_int32_t
));
5129 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
5131 /* Most significant 32 bits of the SHA1 hash */
5132 bcopy(sha_digest
, token
, sizeof(u_int32_t
));
5137 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
5140 VERIFY(idsn_len
== sizeof(u_int64_t
));
5141 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
5144 * Least significant 64 bits of the SHA1 hash
5147 idsn
[7] = sha_digest
[12];
5148 idsn
[6] = sha_digest
[13];
5149 idsn
[5] = sha_digest
[14];
5150 idsn
[4] = sha_digest
[15];
5151 idsn
[3] = sha_digest
[16];
5152 idsn
[2] = sha_digest
[17];
5153 idsn
[1] = sha_digest
[18];
5154 idsn
[0] = sha_digest
[19];
5159 mptcp_conn_properties(struct mptcb
*mp_tp
)
5161 /* There is only Version 0 at this time */
5162 mp_tp
->mpt_version
= MPTCP_STD_VERSION_0
;
5164 /* Set DSS checksum flag */
5165 if (mptcp_dss_csum
) {
5166 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
5169 /* Set up receive window */
5170 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
5172 /* Set up gc ticks */
5173 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
5177 mptcp_init_local_parms(struct mptses
*mpte
)
5179 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
5180 char key_digest
[SHA1_RESULTLEN
];
5182 read_frandom(&mp_tp
->mpt_localkey
, sizeof(mp_tp
->mpt_localkey
));
5183 mptcp_do_sha1(&mp_tp
->mpt_localkey
, key_digest
);
5185 mptcp_generate_token(key_digest
, SHA1_RESULTLEN
,
5186 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof(mp_tp
->mpt_localtoken
));
5187 mptcp_generate_idsn(key_digest
, SHA1_RESULTLEN
,
5188 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof(u_int64_t
));
5190 /* The subflow SYN is also first MPTCP byte */
5191 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
5192 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
5194 mptcp_conn_properties(mp_tp
);
5198 mptcp_init_remote_parms(struct mptcb
*mp_tp
)
5200 char remote_digest
[SHA1_RESULTLEN
];
5202 /* Only Version 0 is supported for auth purposes */
5203 if (mp_tp
->mpt_version
!= MPTCP_STD_VERSION_0
) {
5207 /* Setup local and remote tokens and Initial DSNs */
5208 mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
);
5209 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
5210 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof(mp_tp
->mpt_remotetoken
));
5211 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
5212 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof(u_int64_t
));
5213 mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
5214 mp_tp
->mpt_rcvadv
= mp_tp
->mpt_rcvnxt
+ mp_tp
->mpt_rcvwnd
;
5220 mptcp_send_dfin(struct socket
*so
)
5222 struct tcpcb
*tp
= NULL
;
5223 struct inpcb
*inp
= NULL
;
5225 inp
= sotoinpcb(so
);
5230 tp
= intotcpcb(inp
);
5235 if (!(tp
->t_mpflags
& TMPF_RESET
)) {
5236 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
5241 * Data Sequence Mapping routines
5244 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
5246 struct mptcb
*mp_tp
;
5252 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
5255 VERIFY(m
->m_flags
& M_PKTHDR
);
5256 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
5257 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
5258 m
->m_pkthdr
.mp_rlen
= m_pktlen(m
);
5259 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
5265 mptcp_fallback_sbdrop(struct socket
*so
, struct mbuf
*m
, int len
)
5267 struct mptcb
*mp_tp
= tptomptp(sototcpcb(so
));
5271 if (!m
|| len
== 0) {
5275 while (m
&& len
> 0) {
5276 VERIFY(m
->m_flags
& M_PKTHDR
);
5277 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5279 data_ack
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
5280 dsn
= m
->m_pkthdr
.mp_dsn
;
5286 if (m
&& len
== 0) {
5288 * If there is one more mbuf in the chain, it automatically means
5289 * that up to m->mp_dsn has been ack'ed.
5291 * This means, we actually correct data_ack back down (compared
5292 * to what we set inside the loop - dsn + data_len). Because in
5293 * the loop we are "optimistic" and assume that the full mapping
5294 * will be acked. If that's not the case and we get out of the
5295 * loop with m != NULL, it means only up to m->mp_dsn has been
5298 data_ack
= m
->m_pkthdr
.mp_dsn
;
5303 * If len is negative, meaning we acked in the middle of an mbuf,
5304 * only up to this mbuf's data-sequence number has been acked
5305 * at the MPTCP-level.
5310 mptcplog((LOG_DEBUG
, "%s inferred ack up to %u\n", __func__
, (uint32_t)data_ack
),
5311 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5313 /* We can have data in the subflow's send-queue that is being acked,
5314 * while the DATA_ACK has already advanced. Thus, we should check whether
5315 * or not the DATA_ACK is actually new here.
5317 if (MPTCP_SEQ_LEQ(data_ack
, mp_tp
->mpt_sndmax
) &&
5318 MPTCP_SEQ_GEQ(data_ack
, mp_tp
->mpt_snduna
)) {
5319 mptcp_data_ack_rcvd(mp_tp
, sototcpcb(so
), data_ack
);
5324 mptcp_preproc_sbdrop(struct socket
*so
, struct mbuf
*m
, unsigned int len
)
5328 /* TFO makes things complicated. */
5329 if (so
->so_flags1
& SOF1_TFO_REWIND
) {
5331 so
->so_flags1
&= ~SOF1_TFO_REWIND
;
5334 while (m
&& (!(so
->so_flags
& SOF_MP_SUBFLOW
) || rewinding
)) {
5336 VERIFY(m
->m_flags
& M_PKTHDR
);
5337 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5339 sub_len
= m
->m_pkthdr
.mp_rlen
;
5341 if (sub_len
< len
) {
5342 m
->m_pkthdr
.mp_dsn
+= sub_len
;
5343 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
5344 m
->m_pkthdr
.mp_rseq
+= sub_len
;
5346 m
->m_pkthdr
.mp_rlen
= 0;
5349 /* sub_len >= len */
5350 if (rewinding
== 0) {
5351 m
->m_pkthdr
.mp_dsn
+= len
;
5353 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
5354 if (rewinding
== 0) {
5355 m
->m_pkthdr
.mp_rseq
+= len
;
5358 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u len %d %d\n",
5359 __func__
, (u_int32_t
)m
->m_pkthdr
.mp_dsn
,
5360 m
->m_pkthdr
.mp_rseq
, m
->m_pkthdr
.mp_rlen
, len
),
5361 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5362 m
->m_pkthdr
.mp_rlen
-= len
;
5368 if (so
->so_flags
& SOF_MP_SUBFLOW
&&
5369 !(sototcpcb(so
)->t_mpflags
& TMPF_TFO_REQUEST
) &&
5370 !(sototcpcb(so
)->t_mpflags
& TMPF_RCVD_DACK
)) {
5372 * Received an ack without receiving a DATA_ACK.
5373 * Need to fallback to regular TCP (or destroy this subflow).
5375 sototcpcb(so
)->t_mpflags
|= TMPF_INFIN_SENT
;
5376 mptcp_notify_mpfail(so
);
5380 /* Obtain the DSN mapping stored in the mbuf */
5382 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
,
5383 uint32_t *dsn
, uint32_t *relseq
, uint16_t *data_len
, uint16_t *dss_csum
)
5387 mptcp_output_getm_dsnmap64(so
, off
, &dsn64
, relseq
, data_len
, dss_csum
);
5388 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
5392 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint64_t *dsn
,
5393 uint32_t *relseq
, uint16_t *data_len
,
5396 struct mbuf
*m
= so
->so_snd
.sb_mb
;
5402 * In the subflow socket, the DSN sequencing can be discontiguous,
5403 * but the subflow sequence mapping is contiguous. Use the subflow
5404 * sequence property to find the right mbuf and corresponding dsn
5409 VERIFY(m
->m_flags
& M_PKTHDR
);
5410 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5412 if (off
>= m
->m_len
) {
5422 VERIFY(m
->m_pkthdr
.mp_rlen
<= UINT16_MAX
);
5424 *dsn
= m
->m_pkthdr
.mp_dsn
;
5425 *relseq
= m
->m_pkthdr
.mp_rseq
;
5426 *data_len
= m
->m_pkthdr
.mp_rlen
;
5427 *dss_csum
= m
->m_pkthdr
.mp_csum
;
5429 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
5430 __func__
, (u_int32_t
)(*dsn
), *relseq
, *data_len
, off
, off_orig
),
5431 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5435 * Note that this is called only from tcp_input() via mptcp_input_preproc()
5436 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
5437 * When it trims data tcp_input calls m_adj() which does not remove the
5438 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
5439 * The dsn map insertion cannot be delayed after trim, because data can be in
5440 * the reassembly queue for a while and the DSN option info in tp will be
5441 * overwritten for every new packet received.
5442 * The dsn map will be adjusted just prior to appending to subflow sockbuf
5443 * with mptcp_adj_rmap()
5446 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
, struct tcphdr
*th
)
5448 VERIFY(m
->m_flags
& M_PKTHDR
);
5449 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
5451 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
5452 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
5453 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
5454 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
5455 m
->m_pkthdr
.mp_csum
= tp
->t_rcv_map
.mpt_csum
;
5456 if (tp
->t_rcv_map
.mpt_dfin
) {
5457 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
5460 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
5462 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
5463 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
5464 } else if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
5465 if (th
->th_flags
& TH_FIN
) {
5466 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
5472 * Following routines help with failure detection and failover of data
5473 * transfer from one subflow to another.
5476 mptcp_act_on_txfail(struct socket
*so
)
5478 struct tcpcb
*tp
= NULL
;
5479 struct inpcb
*inp
= sotoinpcb(so
);
5485 tp
= intotcpcb(inp
);
5490 if (so
->so_flags
& SOF_MP_TRYFAILOVER
) {
5494 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
5495 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
5499 * Support for MP_FAIL option
5502 mptcp_get_map_for_dsn(struct socket
*so
, u_int64_t dsn_fail
, u_int32_t
*tcp_seq
)
5504 struct mbuf
*m
= so
->so_snd
.sb_mb
;
5514 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5515 VERIFY(m
->m_flags
& M_PKTHDR
);
5516 dsn
= m
->m_pkthdr
.mp_dsn
;
5517 datalen
= m
->m_pkthdr
.mp_rlen
;
5518 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
5519 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
5520 off
= dsn_fail
- dsn
;
5521 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
5522 mptcplog((LOG_DEBUG
, "%s: %llu %llu \n", __func__
, dsn
,
5523 dsn_fail
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5531 * If there was no mbuf data and a fallback to TCP occurred, there's
5532 * not much else to do.
5535 os_log_error(mptcp_log_handle
, "%s: %llu not found \n", __func__
, dsn_fail
);
5540 * Support for sending contiguous MPTCP bytes in subflow
5541 * Also for preventing sending data with ACK in 3-way handshake
5544 mptcp_adj_sendlen(struct socket
*so
, int32_t off
)
5546 struct tcpcb
*tp
= sototcpcb(so
);
5547 struct mptsub
*mpts
= tp
->t_mpsub
;
5549 uint32_t mdss_subflow_seq
;
5550 int mdss_subflow_off
;
5551 uint16_t mdss_data_len
;
5554 mptcp_output_getm_dsnmap64(so
, off
, &mdss_dsn
, &mdss_subflow_seq
,
5555 &mdss_data_len
, &dss_csum
);
5558 * We need to compute how much of the mapping still remains.
5559 * So, we compute the offset in the send-buffer of the dss-sub-seq.
5561 mdss_subflow_off
= (mdss_subflow_seq
+ mpts
->mpts_iss
) - tp
->snd_una
;
5564 * When TFO is used, we are sending the mpts->mpts_iss although the relative
5565 * seq has been set to 1 (while it should be 0).
5567 if (tp
->t_mpflags
& TMPF_TFO_REQUEST
) {
5571 if (off
< mdss_subflow_off
) {
5572 printf("%s off %d mdss_subflow_off %d mdss_subflow_seq %u iss %u suna %u\n", __func__
,
5573 off
, mdss_subflow_off
, mdss_subflow_seq
, mpts
->mpts_iss
, tp
->snd_una
);
5575 VERIFY(off
>= mdss_subflow_off
);
5577 mptcplog((LOG_DEBUG
, "%s dlen %u off %d sub_off %d sub_seq %u iss %u suna %u\n",
5578 __func__
, mdss_data_len
, off
, mdss_subflow_off
, mdss_subflow_seq
,
5579 mpts
->mpts_iss
, tp
->snd_una
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5580 return mdss_data_len
- (off
- mdss_subflow_off
);
5584 mptcp_get_maxseg(struct mptses
*mpte
)
5586 struct mptsub
*mpts
;
5587 uint32_t maxseg
= 0;
5589 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5590 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
5592 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
5593 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
5597 if (tp
->t_maxseg
> maxseg
) {
5598 maxseg
= tp
->t_maxseg
;
5606 mptcp_get_rcvscale(struct mptses
*mpte
)
5608 struct mptsub
*mpts
;
5609 uint8_t rcvscale
= UINT8_MAX
;
5611 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5612 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
5614 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
5615 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
5619 if (tp
->rcv_scale
< rcvscale
) {
5620 rcvscale
= tp
->rcv_scale
;
5627 /* Similar to tcp_sbrcv_reserve */
5629 mptcp_sbrcv_reserve(struct mptcb
*mp_tp
, struct sockbuf
*sbrcv
,
5630 u_int32_t newsize
, u_int32_t idealsize
)
5632 uint8_t rcvscale
= mptcp_get_rcvscale(mp_tp
->mpt_mpte
);
5634 /* newsize should not exceed max */
5635 newsize
= min(newsize
, tcp_autorcvbuf_max
);
5637 /* The receive window scale negotiated at the
5638 * beginning of the connection will also set a
5639 * limit on the socket buffer size
5641 newsize
= min(newsize
, TCP_MAXWIN
<< rcvscale
);
5643 /* Set new socket buffer size */
5644 if (newsize
> sbrcv
->sb_hiwat
&&
5645 (sbreserve(sbrcv
, newsize
) == 1)) {
5646 sbrcv
->sb_idealsize
= min(max(sbrcv
->sb_idealsize
,
5647 (idealsize
!= 0) ? idealsize
: newsize
), tcp_autorcvbuf_max
);
5649 /* Again check the limit set by the advertised
5652 sbrcv
->sb_idealsize
= min(sbrcv
->sb_idealsize
,
5653 TCP_MAXWIN
<< rcvscale
);
5658 mptcp_sbrcv_grow(struct mptcb
*mp_tp
)
5660 struct mptses
*mpte
= mp_tp
->mpt_mpte
;
5661 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
5662 struct sockbuf
*sbrcv
= &mp_so
->so_rcv
;
5663 uint32_t hiwat_sum
= 0;
5664 uint32_t ideal_sum
= 0;
5665 struct mptsub
*mpts
;
5668 * Do not grow the receive socket buffer if
5669 * - auto resizing is disabled, globally or on this socket
5670 * - the high water mark already reached the maximum
5671 * - the stream is in background and receive side is being
5673 * - if there are segments in reassembly queue indicating loss,
5674 * do not need to increase recv window during recovery as more
5675 * data is not going to be sent. A duplicate ack sent during
5676 * recovery should not change the receive window
5678 if (tcp_do_autorcvbuf
== 0 ||
5679 (sbrcv
->sb_flags
& SB_AUTOSIZE
) == 0 ||
5680 tcp_cansbgrow(sbrcv
) == 0 ||
5681 sbrcv
->sb_hiwat
>= tcp_autorcvbuf_max
||
5682 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) ||
5683 !LIST_EMPTY(&mp_tp
->mpt_segq
)) {
5684 /* Can not resize the socket buffer, just return */
5689 * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2)
5691 * But, for this we first need accurate receiver-RTT estimations, which
5692 * we currently don't have.
5694 * Let's use a dummy algorithm for now, just taking the sum of all
5695 * subflow's receive-buffers. It's too low, but that's all we can get
5699 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5700 hiwat_sum
+= mpts
->mpts_socket
->so_rcv
.sb_hiwat
;
5701 ideal_sum
+= mpts
->mpts_socket
->so_rcv
.sb_idealsize
;
5704 mptcp_sbrcv_reserve(mp_tp
, sbrcv
, hiwat_sum
, ideal_sum
);
5708 * Determine if we can grow the recieve socket buffer to avoid sending
5709 * a zero window update to the peer. We allow even socket buffers that
5710 * have fixed size (set by the application) to grow if the resource
5711 * constraints are met. They will also be trimmed after the application
5714 * Similar to tcp_sbrcv_grow_rwin
5717 mptcp_sbrcv_grow_rwin(struct mptcb
*mp_tp
, struct sockbuf
*sb
)
5719 struct socket
*mp_so
= mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
;
5720 u_int32_t rcvbufinc
= mptcp_get_maxseg(mp_tp
->mpt_mpte
) << 4;
5721 u_int32_t rcvbuf
= sb
->sb_hiwat
;
5723 if (tcp_recv_bg
== 1 || IS_TCP_RECV_BG(mp_so
)) {
5727 if (tcp_do_autorcvbuf
== 1 &&
5728 tcp_cansbgrow(sb
) &&
5729 /* Diff to tcp_sbrcv_grow_rwin */
5730 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) == 0 &&
5731 (rcvbuf
- sb
->sb_cc
) < rcvbufinc
&&
5732 rcvbuf
< tcp_autorcvbuf_max
&&
5733 (sb
->sb_idealsize
> 0 &&
5734 sb
->sb_hiwat
<= (sb
->sb_idealsize
+ rcvbufinc
))) {
5735 sbreserve(sb
, min((sb
->sb_hiwat
+ rcvbufinc
), tcp_autorcvbuf_max
));
5739 /* Similar to tcp_sbspace */
5741 mptcp_sbspace(struct mptcb
*mp_tp
)
5743 struct sockbuf
*sb
= &mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
5746 int32_t pending
= 0;
5748 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
5750 mptcp_sbrcv_grow_rwin(mp_tp
, sb
);
5752 /* hiwat might have changed */
5753 rcvbuf
= sb
->sb_hiwat
;
5755 space
= ((int32_t) imin((rcvbuf
- sb
->sb_cc
),
5756 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
5762 /* Compensate for data being processed by content filters */
5763 pending
= cfil_sock_data_space(sb
);
5764 #endif /* CONTENT_FILTER */
5765 if (pending
> space
) {
5775 * Support Fallback to Regular TCP
5778 mptcp_notify_mpready(struct socket
*so
)
5780 struct tcpcb
*tp
= NULL
;
5786 tp
= intotcpcb(sotoinpcb(so
));
5792 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
5793 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5794 struct tcpcb
*, tp
);
5796 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) {
5800 if (tp
->t_mpflags
& TMPF_MPTCP_READY
) {
5804 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
5805 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
5807 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5811 mptcp_notify_mpfail(struct socket
*so
)
5813 struct tcpcb
*tp
= NULL
;
5819 tp
= intotcpcb(sotoinpcb(so
));
5825 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
5826 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5827 struct tcpcb
*, tp
);
5829 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
5833 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
| TMPF_MPTCP_TRUE
);
5834 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
5836 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5840 * Keepalive helper function
5843 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
5847 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
5849 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
5856 * MPTCP t_maxseg adjustment function
5859 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
5862 struct mptcb
*mp_tp
= tptomptp(tp
);
5864 #define MPTCP_COMPUTE_LEN { \
5865 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
5866 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
5869 /* adjust to 32-bit boundary + EOL */ \
5872 if (mp_tp
== NULL
) {
5876 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
5879 * For the first subflow and subsequent subflows, adjust mss for
5880 * most common MPTCP option size, for case where tcp_mss is called
5881 * during option processing and MTU discovery.
5884 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
&&
5885 !(tp
->t_mpflags
& TMPF_JOINED_FLOW
)) {
5889 if (tp
->t_mpflags
& TMPF_PREESTABLISHED
&&
5890 tp
->t_mpflags
& TMPF_SENT_JOIN
) {
5894 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
5903 * Update the pid, upid, uuid of the subflow so, based on parent so
5906 mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
)
5908 if (so
->last_pid
!= mp_so
->last_pid
||
5909 so
->last_upid
!= mp_so
->last_upid
) {
5910 so
->last_upid
= mp_so
->last_upid
;
5911 so
->last_pid
= mp_so
->last_pid
;
5912 uuid_copy(so
->last_uuid
, mp_so
->last_uuid
);
5914 so_update_policy(so
);
5918 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
5922 tcp_getconninfo(so
, &flow
->flow_ci
);
5923 inp
= sotoinpcb(so
);
5925 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
5926 flow
->flow_src
.ss_family
= AF_INET6
;
5927 flow
->flow_dst
.ss_family
= AF_INET6
;
5928 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
5929 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
5930 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
5931 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
5932 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
5933 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
5936 if ((inp
->inp_vflag
& INP_IPV4
) != 0) {
5937 flow
->flow_src
.ss_family
= AF_INET
;
5938 flow
->flow_dst
.ss_family
= AF_INET
;
5939 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
5940 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
5941 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
5942 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
5943 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
5944 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
5946 flow
->flow_len
= sizeof(*flow
);
5947 flow
->flow_tcpci_offset
= offsetof(mptcp_flow_t
, flow_ci
);
5948 flow
->flow_flags
= mpts
->mpts_flags
;
5949 flow
->flow_cid
= mpts
->mpts_connid
;
5950 flow
->flow_relseq
= mpts
->mpts_rel_seq
;
5951 flow
->flow_soerror
= mpts
->mpts_socket
->so_error
;
5952 flow
->flow_probecnt
= mpts
->mpts_probecnt
;
5956 mptcp_pcblist SYSCTL_HANDLER_ARGS
5958 #pragma unused(oidp, arg1, arg2)
5962 struct mptses
*mpte
;
5963 struct mptcb
*mp_tp
;
5964 struct mptsub
*mpts
;
5966 conninfo_mptcp_t mptcpci
;
5967 mptcp_flow_t
*flows
= NULL
;
5969 if (req
->newptr
!= USER_ADDR_NULL
) {
5973 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5974 if (req
->oldptr
== USER_ADDR_NULL
) {
5975 size_t n
= mtcbinfo
.mppi_count
;
5976 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5977 req
->oldidx
= (n
+ n
/ 8) * sizeof(conninfo_mptcp_t
) +
5978 4 * (n
+ n
/ 8) * sizeof(mptcp_flow_t
);
5981 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5983 socket_lock(mpp
->mpp_socket
, 1);
5984 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
5985 mpte
= mptompte(mpp
);
5987 socket_lock_assert_owned(mptetoso(mpte
));
5988 mp_tp
= mpte
->mpte_mptcb
;
5990 bzero(&mptcpci
, sizeof(mptcpci
));
5991 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
5992 mptcpci
.mptcpci_flags
= mp_tp
->mpt_flags
;
5993 mptcpci
.mptcpci_ltoken
= mp_tp
->mpt_localtoken
;
5994 mptcpci
.mptcpci_rtoken
= mp_tp
->mpt_remotetoken
;
5995 mptcpci
.mptcpci_notsent_lowat
= mp_tp
->mpt_notsent_lowat
;
5996 mptcpci
.mptcpci_snduna
= mp_tp
->mpt_snduna
;
5997 mptcpci
.mptcpci_sndnxt
= mp_tp
->mpt_sndnxt
;
5998 mptcpci
.mptcpci_sndmax
= mp_tp
->mpt_sndmax
;
5999 mptcpci
.mptcpci_lidsn
= mp_tp
->mpt_local_idsn
;
6000 mptcpci
.mptcpci_sndwnd
= mp_tp
->mpt_sndwnd
;
6001 mptcpci
.mptcpci_rcvnxt
= mp_tp
->mpt_rcvnxt
;
6002 mptcpci
.mptcpci_rcvatmark
= mp_tp
->mpt_rcvnxt
;
6003 mptcpci
.mptcpci_ridsn
= mp_tp
->mpt_remote_idsn
;
6004 mptcpci
.mptcpci_rcvwnd
= mp_tp
->mpt_rcvwnd
;
6006 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
6007 mptcpci
.mptcpci_mpte_flags
= mpte
->mpte_flags
;
6008 mptcpci
.mptcpci_mpte_addrid
= mpte
->mpte_addrid_last
;
6009 mptcpci
.mptcpci_flow_offset
=
6010 offsetof(conninfo_mptcp_t
, mptcpci_flows
);
6012 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
6013 if (mpte
->mpte_numflows
!= 0) {
6014 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
6015 if (flows
== NULL
) {
6016 socket_unlock(mpp
->mpp_socket
, 1);
6019 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
6020 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
6021 error
= SYSCTL_OUT(req
, &mptcpci
,
6022 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
6024 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
6025 error
= SYSCTL_OUT(req
, &mptcpci
, sizeof(mptcpci
));
6028 socket_unlock(mpp
->mpp_socket
, 1);
6029 FREE(flows
, M_TEMP
);
6033 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
6034 so
= mpts
->mpts_socket
;
6035 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
6038 socket_unlock(mpp
->mpp_socket
, 1);
6040 error
= SYSCTL_OUT(req
, flows
, len
);
6041 FREE(flows
, M_TEMP
);
6047 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6052 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
6053 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
6054 "List of active MPTCP connections");
6057 * Set notsent lowat mark on the MPTCB
6060 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
6062 struct mptcb
*mp_tp
= NULL
;
6065 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
) {
6066 mp_tp
= mpte
->mpte_mptcb
;
6070 mp_tp
->mpt_notsent_lowat
= optval
;
6079 mptcp_get_notsent_lowat(struct mptses
*mpte
)
6081 struct mptcb
*mp_tp
= NULL
;
6083 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
) {
6084 mp_tp
= mpte
->mpte_mptcb
;
6088 return mp_tp
->mpt_notsent_lowat
;
6095 mptcp_notsent_lowat_check(struct socket
*so
)
6097 struct mptses
*mpte
;
6099 struct mptcb
*mp_tp
;
6100 struct mptsub
*mpts
;
6104 mpp
= mpsotomppcb(so
);
6105 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
6109 mpte
= mptompte(mpp
);
6110 socket_lock_assert_owned(mptetoso(mpte
));
6111 mp_tp
= mpte
->mpte_mptcb
;
6113 notsent
= so
->so_snd
.sb_cc
;
6115 if ((notsent
== 0) ||
6116 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
6117 mp_tp
->mpt_notsent_lowat
)) {
6118 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
6119 "lowat %d notsent %d actual %d \n",
6120 mp_tp
->mpt_notsent_lowat
, notsent
,
6121 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)),
6122 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
6126 /* When Nagle's algorithm is not disabled, it is better
6127 * to wakeup the client even before there is atleast one
6128 * maxseg of data to write.
6130 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
6132 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
6133 struct socket
*subf_so
= mpts
->mpts_socket
;
6134 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
6136 notsent
= so
->so_snd
.sb_cc
-
6137 (tp
->snd_nxt
- tp
->snd_una
);
6139 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
6140 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
6143 mptcplog((LOG_DEBUG
, "MPTCP Sender: lowat %d notsent %d"
6144 " nodelay false \n",
6145 mp_tp
->mpt_notsent_lowat
, notsent
),
6146 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
6154 mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
6157 #pragma unused(kctlref, sac, unitinfo)
6159 if (OSIncrementAtomic(&mptcp_kern_skt_inuse
) > 0) {
6160 os_log_error(mptcp_log_handle
, "%s: MPTCP kernel-control socket for Symptoms already open!", __func__
);
6163 mptcp_kern_skt_unit
= sac
->sc_unit
;
6169 mptcp_allow_uuid(uuid_t uuid
, int32_t rssi
)
6173 /* Iterate over all MPTCP connections */
6175 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6177 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6178 struct socket
*mp_so
= mpp
->mpp_socket
;
6179 struct mptses
*mpte
= mpp
->mpp_pcbe
;
6181 socket_lock(mp_so
, 1);
6183 if (mp_so
->so_flags
& SOF_DELEGATED
&&
6184 uuid_compare(uuid
, mp_so
->e_uuid
)) {
6186 } else if (!(mp_so
->so_flags
& SOF_DELEGATED
) &&
6187 uuid_compare(uuid
, mp_so
->last_uuid
)) {
6191 os_log(mptcp_log_handle
, "%s - %lx: Got allowance for useApp with rssi %d\n",
6192 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), rssi
);
6194 mpte
->mpte_flags
|= MPTE_ACCESS_GRANTED
;
6196 if (rssi
> MPTCP_TARGET_BASED_RSSI_THRESHOLD
) {
6197 mpte
->mpte_flags
|= MPTE_CELL_PROHIBITED
;
6200 mptcp_check_subflows_and_add(mpte
);
6201 mptcp_remove_subflows(mpte
);
6203 mpte
->mpte_flags
&= ~(MPTE_ACCESS_GRANTED
| MPTE_CELL_PROHIBITED
);
6206 socket_unlock(mp_so
, 1);
6209 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6213 mptcp_wifi_status_changed(void)
6217 /* Iterate over all MPTCP connections */
6219 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6221 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6222 struct socket
*mp_so
= mpp
->mpp_socket
;
6223 struct mptses
*mpte
= mpp
->mpp_pcbe
;
6225 socket_lock(mp_so
, 1);
6227 /* Only handover- and urgency-mode are purely driven by Symptom's Wi-Fi status */
6228 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
&&
6229 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_TARGET_BASED
) {
6233 mptcp_check_subflows_and_add(mpte
);
6234 mptcp_check_subflows_and_remove(mpte
);
6237 socket_unlock(mp_so
, 1);
6240 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6244 mptcp_ask_symptoms(struct mptses
*mpte
)
6246 struct mptcp_symptoms_ask_uuid ask
;
6247 struct socket
*mp_so
;
6251 if (mptcp_kern_skt_unit
== 0) {
6252 os_log_error(mptcp_log_handle
, "%s - %lx: skt_unit is still 0\n",
6253 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
6257 mp_so
= mptetoso(mpte
);
6259 if (mp_so
->so_flags
& SOF_DELEGATED
) {
6262 pid
= mp_so
->last_pid
;
6266 if (p
== PROC_NULL
) {
6267 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
6268 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), pid
);
6272 ask
.cmd
= MPTCP_SYMPTOMS_ASK_UUID
;
6274 if (mp_so
->so_flags
& SOF_DELEGATED
) {
6275 uuid_copy(ask
.uuid
, mp_so
->e_uuid
);
6277 uuid_copy(ask
.uuid
, mp_so
->last_uuid
);
6280 prio
= proc_get_effective_task_policy(proc_task(p
), TASK_POLICY_ROLE
);
6282 if (prio
== TASK_BACKGROUND_APPLICATION
|| prio
== TASK_NONUI_APPLICATION
||
6283 prio
== TASK_DARWINBG_APPLICATION
) {
6284 ask
.priority
= MPTCP_SYMPTOMS_BACKGROUND
;
6285 } else if (prio
== TASK_FOREGROUND_APPLICATION
) {
6286 ask
.priority
= MPTCP_SYMPTOMS_FOREGROUND
;
6288 ask
.priority
= MPTCP_SYMPTOMS_UNKNOWN
;
6291 err
= ctl_enqueuedata(mptcp_kern_ctrl_ref
, mptcp_kern_skt_unit
,
6292 &ask
, sizeof(ask
), CTL_DATA_EOR
);
6294 os_log(mptcp_log_handle
, "%s - %lx: asked symptoms about pid %u, taskprio %u, prio %u, err %d\n",
6295 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), pid
, prio
, ask
.priority
, err
);
6302 mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
,
6305 #pragma unused(kctlref, kcunit, unitinfo)
6307 OSDecrementAtomic(&mptcp_kern_skt_inuse
);
6313 mptcp_symptoms_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
6314 mbuf_t m
, int flags
)
6316 #pragma unused(kctlref, unitinfo, flags)
6317 symptoms_advisory_t
*sa
= NULL
;
6319 if (kcunit
!= mptcp_kern_skt_unit
) {
6320 os_log_error(mptcp_log_handle
, "%s: kcunit %u is different from expected one %u\n",
6321 __func__
, kcunit
, mptcp_kern_skt_unit
);
6324 if (mbuf_pkthdr_len(m
) < sizeof(*sa
)) {
6329 if (mbuf_len(m
) < sizeof(*sa
)) {
6330 os_log_error(mptcp_log_handle
, "%s: mbuf is %lu but need %lu\n",
6331 __func__
, mbuf_len(m
), sizeof(*sa
));
6338 if (sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_USEAPP
) {
6339 os_log(mptcp_log_handle
, "%s: wifi new,old: %d,%d, cell new, old: %d,%d\n", __func__
,
6340 sa
->sa_wifi_status
, mptcp_advisory
.sa_wifi_status
,
6341 sa
->sa_cell_status
, mptcp_advisory
.sa_cell_status
);
6343 if (sa
->sa_wifi_status
!= mptcp_advisory
.sa_wifi_status
) {
6344 mptcp_advisory
.sa_wifi_status
= sa
->sa_wifi_status
;
6345 mptcp_wifi_status_changed();
6348 struct mptcp_symptoms_answer answer
;
6351 /* We temporarily allow different sizes for ease of submission */
6352 if (mbuf_len(m
) != sizeof(uuid_t
) + sizeof(*sa
) &&
6353 mbuf_len(m
) != sizeof(answer
)) {
6354 os_log_error(mptcp_log_handle
, "%s: mbuf is %lu but need %lu or %lu\n",
6355 __func__
, mbuf_len(m
), sizeof(uuid_t
) + sizeof(*sa
),
6361 memset(&answer
, 0, sizeof(answer
));
6363 err
= mbuf_copydata(m
, 0, mbuf_len(m
), &answer
);
6365 os_log_error(mptcp_log_handle
, "%s: mbuf_copydata returned %d\n", __func__
, err
);
6370 mptcp_allow_uuid(answer
.uuid
, answer
.rssi
);
6378 mptcp_control_register(void)
6380 /* Set up the advisory control socket */
6381 struct kern_ctl_reg mptcp_kern_ctl
;
6383 bzero(&mptcp_kern_ctl
, sizeof(mptcp_kern_ctl
));
6384 strlcpy(mptcp_kern_ctl
.ctl_name
, MPTCP_KERN_CTL_NAME
,
6385 sizeof(mptcp_kern_ctl
.ctl_name
));
6386 mptcp_kern_ctl
.ctl_connect
= mptcp_symptoms_ctl_connect
;
6387 mptcp_kern_ctl
.ctl_disconnect
= mptcp_symptoms_ctl_disconnect
;
6388 mptcp_kern_ctl
.ctl_send
= mptcp_symptoms_ctl_send
;
6389 mptcp_kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
;
6391 (void)ctl_register(&mptcp_kern_ctl
, &mptcp_kern_ctrl_ref
);
6395 * Three return-values:
6398 * -1 : WiFi-state is unknown
6401 mptcp_is_wifi_unusable_for_session(struct mptses
*mpte
)
6403 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6404 if (mptcp_advisory
.sa_wifi_status
) {
6405 return symptoms_is_wifi_lossy() ? 1 : 0;
6409 * If it's a first-party app and we don't have any info
6410 * about the Wi-Fi state, let's be pessimistic.
6414 if (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
) {
6419 * If we are target-based (meaning, we allow to be more lax on
6420 * the "unusable" target. We only *know* about the state once
6421 * we got the allowance from Symptoms (MPTE_ACCESS_GRANTED).
6423 * If RSSI is not bad enough, MPTE_CELL_PROHIBITED will then
6426 * In any other case (while in target-mode), consider WiFi bad
6427 * and we are going to ask for allowance from Symptoms anyway.
6429 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
6430 if (mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
&&
6431 mpte
->mpte_flags
& MPTE_CELL_PROHIBITED
) {
6443 symptoms_is_wifi_lossy(void)
6445 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_OK
) ? false : true;
6448 /* If TFO data is succesfully acked, it must be dropped from the mptcp so */
6450 mptcp_drop_tfo_data(struct mptses
*mpte
, struct mptsub
*mpts
)
6452 struct socket
*mp_so
= mptetoso(mpte
);
6453 struct socket
*so
= mpts
->mpts_socket
;
6454 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
6455 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
6457 /* If data was sent with SYN, rewind state */
6458 if (tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) {
6459 u_int64_t mp_droplen
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
6460 unsigned int tcp_droplen
= tp
->snd_una
- tp
->iss
- 1;
6462 VERIFY(mp_droplen
<= (UINT_MAX
));
6463 VERIFY(mp_droplen
>= tcp_droplen
);
6465 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
6466 mpts
->mpts_iss
+= tcp_droplen
;
6467 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
6469 if (mp_droplen
> tcp_droplen
) {
6470 /* handle partial TCP ack */
6471 mp_so
->so_flags1
|= SOF1_TFO_REWIND
;
6472 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
+ (mp_droplen
- tcp_droplen
);
6473 mp_droplen
= tcp_droplen
;
6475 /* all data on SYN was acked */
6476 mpts
->mpts_rel_seq
= 1;
6477 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
6479 mp_tp
->mpt_sndmax
-= tcp_droplen
;
6481 if (mp_droplen
!= 0) {
6482 VERIFY(mp_so
->so_snd
.sb_mb
!= NULL
);
6483 sbdrop(&mp_so
->so_snd
, (int)mp_droplen
);
6489 mptcp_freeq(struct mptcb
*mp_tp
)
6491 struct tseg_qent
*q
;
6494 while ((q
= LIST_FIRST(&mp_tp
->mpt_segq
)) != NULL
) {
6495 LIST_REMOVE(q
, tqe_q
);
6497 zfree(tcp_reass_zone
, q
);
6500 mp_tp
->mpt_reassqlen
= 0;
6505 mptcp_post_event(u_int32_t event_code
, int value
)
6507 struct kev_mptcp_data event_data
;
6508 struct kev_msg ev_msg
;
6510 memset(&ev_msg
, 0, sizeof(ev_msg
));
6512 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
6513 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
6514 ev_msg
.kev_subclass
= KEV_MPTCP_SUBCLASS
;
6515 ev_msg
.event_code
= event_code
;
6517 event_data
.value
= value
;
6519 ev_msg
.dv
[0].data_ptr
= &event_data
;
6520 ev_msg
.dv
[0].data_length
= sizeof(event_data
);
6522 return kev_post_msg(&ev_msg
);
6526 mptcp_set_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
)
6530 /* First-party apps (Siri) don't flip the cellicon */
6531 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6535 /* Subflow is disappearing - don't set it on this one */
6536 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
)) {
6540 /* Remember the last time we set the cellicon. Needed for debouncing */
6541 mpte
->mpte_last_cellicon_set
= tcp_now
;
6543 if (mpts
->mpts_flags
& MPTSF_CELLICON_SET
&&
6544 mpte
->mpte_cellicon_increments
!= 0) {
6545 if (mptcp_cellicon_refcount
== 0) {
6546 os_log_error(mptcp_log_handle
, "%s - %lx: Cell should be set (count is %u), but it's zero!\n",
6547 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6549 /* Continue, so that the icon gets set... */
6552 * In this case, the cellicon is already set. No need to bump it
6560 /* When tearing down this subflow, we need to decrement the
6563 mpts
->mpts_flags
|= MPTSF_CELLICON_SET
;
6565 /* This counter, so that when a session gets destroyed we decrement
6566 * the reference counter by whatever is left
6568 mpte
->mpte_cellicon_increments
++;
6570 if (OSIncrementAtomic(&mptcp_cellicon_refcount
)) {
6571 /* If cellicon is already set, get out of here! */
6575 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 1);
6578 os_log_error(mptcp_log_handle
, "%s - %lx: Setting cellicon failed with %d\n",
6579 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
6581 os_log(mptcp_log_handle
, "%s - %lx: successfully set the cellicon\n",
6582 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
6587 mptcp_clear_cellicon(void)
6589 int error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 0);
6592 os_log_error(mptcp_log_handle
, "%s: Unsetting cellicon failed with %d\n",
6595 os_log(mptcp_log_handle
, "%s: successfully unset the cellicon\n",
6601 * Returns true if the icon has been flipped to WiFi.
6604 __mptcp_unset_cellicon(long val
)
6606 if (OSAddAtomic(-val
, &mptcp_cellicon_refcount
) != 1) {
6610 mptcp_clear_cellicon();
6616 mptcp_unset_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
, long val
)
6618 /* First-party apps (Siri) don't flip the cellicon */
6619 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6623 if (mpte
->mpte_cellicon_increments
== 0) {
6624 /* This flow never used cell - get out of here! */
6628 if (mptcp_cellicon_refcount
== 0) {
6629 os_log_error(mptcp_log_handle
, "%s - %lx: Cell is off, but should be at least %u\n",
6630 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6636 if (!(mpts
->mpts_flags
& MPTSF_CELLICON_SET
)) {
6640 mpts
->mpts_flags
&= ~MPTSF_CELLICON_SET
;
6643 mpte
->mpte_cellicon_increments
--;
6645 if (__mptcp_unset_cellicon(val
) == false) {
6649 /* All flows are gone - our counter should be at zero too! */
6650 if (mpte
->mpte_cellicon_increments
!= 0) {
6651 os_log_error(mptcp_log_handle
, "%s - %lx: Inconsistent state! Cell refcount is zero but increments are at %u\n",
6652 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6657 mptcp_reset_rexmit_state(struct tcpcb
*tp
)
6659 struct mptsub
*mpts
;
6668 so
= inp
->inp_socket
;
6673 if (!(so
->so_flags
& SOF_MP_SUBFLOW
)) {
6679 mpts
->mpts_flags
&= ~MPTSF_WRITE_STALL
;
6680 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
6684 mptcp_reset_keepalive(struct tcpcb
*tp
)
6686 struct mptsub
*mpts
= tp
->t_mpsub
;
6688 mpts
->mpts_flags
&= ~MPTSF_READ_STALL
;