2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/locks.h>
30 #include <kern/policy_internal.h>
31 #include <kern/zalloc.h>
35 #include <sys/domain.h>
36 #include <sys/kdebug.h>
37 #include <sys/kern_control.h>
38 #include <sys/kernel.h>
40 #include <sys/mcache.h>
41 #include <sys/param.h>
43 #include <sys/protosw.h>
44 #include <sys/resourcevar.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/systm.h>
51 #include <net/content_filter.h>
53 #include <net/if_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/in_var.h>
57 #include <netinet/tcp.h>
58 #include <netinet/tcp_fsm.h>
59 #include <netinet/tcp_seq.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/mptcp_var.h>
62 #include <netinet/mptcp.h>
63 #include <netinet/mptcp_opt.h>
64 #include <netinet/mptcp_seq.h>
65 #include <netinet/mptcp_timer.h>
66 #include <libkern/crypto/sha1.h>
68 #include <netinet6/in6_pcb.h>
69 #include <netinet6/ip6protosw.h>
71 #include <dev/random/randomdev.h>
74 * Notes on MPTCP implementation.
76 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
77 * communication domain. The structure mtcbinfo describes the MPTCP instance
78 * of a Multipath protocol in that domain. It is used to keep track of all
79 * MPTCP PCB instances in the system, and is protected by the global lock
82 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
83 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
84 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
85 * allocated from the same memory block, and each structure has a pointer
86 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
87 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
88 * PCB (mppcb) as well as the MPTCP Session (mptses).
90 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
92 * A functioning MPTCP Session consists of one or more subflow sockets. Each
93 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
94 * represented by the mptsub structure. Because each subflow requires access
95 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
96 * subflow. This gets decremented prior to the subflow's destruction.
98 * To handle events (read, write, control) from the subflows, we do direct
99 * upcalls into the specific function.
101 * The whole MPTCP connection is protected by a single lock, the MPTCP socket's
102 * lock. Incoming data on a subflow also ends up taking this single lock. To
103 * achieve the latter, tcp_lock/unlock has been changed to rather use the lock
104 * of the MPTCP-socket.
106 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
107 * work is done by the MPTCP garbage collector which is invoked on demand by
108 * the PF_MULTIPATH garbage collector. This process will take place once all
109 * of the subflows have been destroyed.
112 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
113 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
115 static uint32_t mptcp_gc(struct mppcbinfo
*);
116 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
117 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
118 static int mptcp_subflow_sosend(struct socket
*, struct sockaddr
*,
119 struct uio
*, struct mbuf
*, struct mbuf
*, int);
120 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
121 static void mptcp_subflow_eupcall1(struct socket
*, void *, uint32_t);
122 static void mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
);
123 static void mptcp_drop_tfo_data(struct mptses
*, struct mptsub
*);
125 static void mptcp_subflow_abort(struct mptsub
*, int);
127 static void mptcp_send_dfin(struct socket
*so
);
128 static void mptcp_set_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
);
129 static int mptcp_freeq(struct mptcb
*mp_tp
);
132 * Possible return values for subflow event handlers. Note that success
133 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
134 * indicate errors or actions which require immediate attention; they will
135 * prevent the rest of the handlers from processing their respective events
136 * until the next round of events processing.
139 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
140 MPTS_EVRET_OK
= 2, /* OK */
141 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
142 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
145 static ev_ret_t
mptcp_subflow_propagate_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
146 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
147 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
148 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
149 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
150 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
151 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
152 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
153 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
154 static ev_ret_t
mptcp_subflow_mpsuberror_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
155 static ev_ret_t
mptcp_subflow_adaptive_rtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
156 static ev_ret_t
mptcp_subflow_adaptive_wtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
158 static void mptcp_do_sha1(mptcp_key_t
*, char *);
159 static void mptcp_init_local_parms(struct mptses
*);
161 static unsigned int mptsub_zone_size
; /* size of mptsub */
162 static struct zone
*mptsub_zone
; /* zone for mptsub */
164 static unsigned int mptopt_zone_size
; /* size of mptopt */
165 static struct zone
*mptopt_zone
; /* zone for mptopt */
167 static unsigned int mpt_subauth_entry_size
; /* size of subf auth entry */
168 static struct zone
*mpt_subauth_zone
; /* zone of subf auth entry */
170 struct mppcbinfo mtcbinfo
;
172 SYSCTL_DECL(_net_inet
);
174 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "MPTCP");
176 uint32_t mptcp_dbg_area
= 31; /* more noise if greater than 1 */
177 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, dbg_area
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
178 &mptcp_dbg_area
, 0, "MPTCP debug area");
180 uint32_t mptcp_dbg_level
= 1;
181 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, dbg_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
182 &mptcp_dbg_level
, 0, "MPTCP debug level");
184 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
185 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
188 static int mptcp_alternate_port
= 0;
189 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, alternate_port
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
190 &mptcp_alternate_port
, 0, "Set alternate port for MPTCP connections");
192 static struct protosw mptcp_subflow_protosw
;
193 static struct pr_usrreqs mptcp_subflow_usrreqs
;
195 static struct ip6protosw mptcp_subflow_protosw6
;
196 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
199 static uint8_t mptcp_create_subflows_scheduled
;
201 typedef struct mptcp_subflow_event_entry
{
202 uint64_t sofilt_hint_mask
;
203 ev_ret_t (*sofilt_hint_ev_hdlr
)(
206 uint64_t *p_mpsofilt_hint
,
210 /* Using Symptoms Advisory to detect poor WiFi or poor Cell */
211 static kern_ctl_ref mptcp_kern_ctrl_ref
= NULL
;
212 static uint32_t mptcp_kern_skt_inuse
= 0;
213 static uint32_t mptcp_kern_skt_unit
;
214 static symptoms_advisory_t mptcp_advisory
;
216 uint32_t mptcp_cellicon_refcount
= 0;
219 * XXX The order of the event handlers below is really
220 * really important. Think twice before changing it.
222 static mptsub_ev_entry_t mpsub_ev_entry_tbl
[] = {
224 .sofilt_hint_mask
= SO_FILT_HINT_MP_SUB_ERROR
,
225 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpsuberror_ev
,
228 .sofilt_hint_mask
= SO_FILT_HINT_MPCANTRCVMORE
,
229 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpcantrcvmore_ev
,
232 .sofilt_hint_mask
= SO_FILT_HINT_MPFAILOVER
,
233 .sofilt_hint_ev_hdlr
= mptcp_subflow_failover_ev
,
236 .sofilt_hint_mask
= SO_FILT_HINT_CONNRESET
,
237 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
240 .sofilt_hint_mask
= SO_FILT_HINT_MUSTRST
,
241 .sofilt_hint_ev_hdlr
= mptcp_subflow_mustrst_ev
,
244 .sofilt_hint_mask
= SO_FILT_HINT_CANTRCVMORE
,
245 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
248 .sofilt_hint_mask
= SO_FILT_HINT_TIMEOUT
,
249 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
252 .sofilt_hint_mask
= SO_FILT_HINT_NOSRCADDR
,
253 .sofilt_hint_ev_hdlr
= mptcp_subflow_nosrcaddr_ev
,
256 .sofilt_hint_mask
= SO_FILT_HINT_IFDENIED
,
257 .sofilt_hint_ev_hdlr
= mptcp_subflow_ifdenied_ev
,
260 .sofilt_hint_mask
= SO_FILT_HINT_CONNECTED
,
261 .sofilt_hint_ev_hdlr
= mptcp_subflow_connected_ev
,
264 .sofilt_hint_mask
= SO_FILT_HINT_MPSTATUS
,
265 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpstatus_ev
,
268 .sofilt_hint_mask
= SO_FILT_HINT_DISCONNECTED
,
269 .sofilt_hint_ev_hdlr
= mptcp_subflow_disconnected_ev
,
272 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_RTIMO
,
273 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_rtimo_ev
,
276 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_WTIMO
,
277 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_wtimo_ev
,
281 os_log_t mptcp_log_handle
;
284 * Protocol pr_init callback.
287 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
290 static int mptcp_initialized
= 0;
293 struct ip6protosw
*prp6
;
296 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
| PR_ATTACHED
)) == PR_ATTACHED
);
298 /* do this only once */
299 if (mptcp_initialized
) {
302 mptcp_initialized
= 1;
304 mptcp_advisory
.sa_wifi_status
= SYMPTOMS_ADVISORY_WIFI_OK
;
307 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
308 * we must be able to find IPPROTO_TCP entries for both.
310 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
312 bcopy(prp
, &mptcp_subflow_protosw
, sizeof(*prp
));
313 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
314 sizeof(mptcp_subflow_usrreqs
));
315 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
316 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
317 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
318 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
319 mptcp_subflow_usrreqs
.pru_sosend
= mptcp_subflow_sosend
;
320 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
322 * Socket filters shouldn't attach/detach to/from this protosw
323 * since pr_protosw is to be used instead, which points to the
324 * real protocol; if they do, it is a bug and we should panic.
326 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
327 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
328 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
329 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
332 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
333 IPPROTO_TCP
, SOCK_STREAM
);
334 VERIFY(prp6
!= NULL
);
335 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof(*prp6
));
336 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
337 sizeof(mptcp_subflow_usrreqs6
));
338 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
339 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
340 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
341 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
342 mptcp_subflow_usrreqs6
.pru_sosend
= mptcp_subflow_sosend
;
343 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
345 * Socket filters shouldn't attach/detach to/from this protosw
346 * since pr_protosw is to be used instead, which points to the
347 * real protocol; if they do, it is a bug and we should panic.
349 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
350 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
351 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
352 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
355 bzero(&mtcbinfo
, sizeof(mtcbinfo
));
356 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
357 mtcbinfo
.mppi_size
= sizeof(struct mpp_mtp
);
358 if ((mtcbinfo
.mppi_zone
= zinit(mtcbinfo
.mppi_size
,
359 1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) {
360 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
);
363 zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
);
364 zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
);
366 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
367 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
368 mtcbinfo
.mppi_lock_grp_attr
);
369 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
370 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
371 mtcbinfo
.mppi_lock_attr
);
373 mtcbinfo
.mppi_gc
= mptcp_gc
;
374 mtcbinfo
.mppi_timer
= mptcp_timer
;
376 /* attach to MP domain for garbage collection to take place */
377 mp_pcbinfo_attach(&mtcbinfo
);
379 mptsub_zone_size
= sizeof(struct mptsub
);
380 if ((mptsub_zone
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
,
381 8192, "mptsub")) == NULL
) {
382 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
);
385 zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
);
386 zone_change(mptsub_zone
, Z_EXPAND
, TRUE
);
388 mptopt_zone_size
= sizeof(struct mptopt
);
389 if ((mptopt_zone
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
,
390 1024, "mptopt")) == NULL
) {
391 panic("%s: unable to allocate MPTCP option zone\n", __func__
);
394 zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
);
395 zone_change(mptopt_zone
, Z_EXPAND
, TRUE
);
397 mpt_subauth_entry_size
= sizeof(struct mptcp_subf_auth_entry
);
398 if ((mpt_subauth_zone
= zinit(mpt_subauth_entry_size
,
399 1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) {
400 panic("%s: unable to allocate MPTCP address auth zone \n",
404 zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
);
405 zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
);
407 mptcp_log_handle
= os_log_create("com.apple.xnu.net.mptcp", "mptcp");
411 mptcpstats_get_index_by_ifindex(struct mptcp_itf_stats
*stats
, int ifindex
, boolean_t create
)
415 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
416 if (create
&& stats
[i
].ifindex
== IFSCOPE_NONE
) {
423 if (stats
[i
].ifindex
== ifindex
) {
430 stats
[index
].ifindex
= ifindex
;
437 mptcpstats_get_index(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
439 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
443 os_log_error(mptcp_log_handle
, "%s - %lx: no ifp on subflow, state %u flags %#x\n",
444 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpts
->mpts_mpte
),
445 sototcpcb(mpts
->mpts_socket
)->t_state
, mpts
->mpts_flags
);
449 index
= mptcpstats_get_index_by_ifindex(stats
, ifp
->if_index
, true);
452 if (stats
[index
].is_expensive
== 0) {
453 stats
[index
].is_expensive
= IFNET_IS_CELLULAR(ifp
);
461 mptcpstats_inc_switch(struct mptses
*mpte
, const struct mptsub
*mpts
)
465 tcpstat
.tcps_mp_switches
++;
466 mpte
->mpte_subflow_switches
++;
468 index
= mptcpstats_get_index(mpte
->mpte_itfstats
, mpts
);
471 mpte
->mpte_itfstats
[index
].switches
++;
476 * Flushes all recorded socket options from an MP socket.
479 mptcp_flush_sopts(struct mptses
*mpte
)
481 struct mptopt
*mpo
, *tmpo
;
483 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
484 mptcp_sopt_remove(mpte
, mpo
);
485 mptcp_sopt_free(mpo
);
487 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
491 * Create an MPTCP session, called as a result of opening a MPTCP socket.
494 mptcp_session_create(struct mppcb
*mpp
)
496 struct mppcbinfo
*mppi
;
501 mppi
= mpp
->mpp_pcbinfo
;
502 VERIFY(mppi
!= NULL
);
504 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
505 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
507 /* MPTCP Multipath PCB Extension */
508 bzero(mpte
, sizeof(*mpte
));
509 VERIFY(mpp
->mpp_pcbe
== NULL
);
510 mpp
->mpp_pcbe
= mpte
;
511 mpte
->mpte_mppcb
= mpp
;
512 mpte
->mpte_mptcb
= mp_tp
;
514 TAILQ_INIT(&mpte
->mpte_sopts
);
515 TAILQ_INIT(&mpte
->mpte_subflows
);
516 mpte
->mpte_associd
= SAE_ASSOCID_ANY
;
517 mpte
->mpte_connid_last
= SAE_CONNID_ANY
;
519 mptcp_init_urgency_timer(mpte
);
521 mpte
->mpte_itfinfo
= &mpte
->_mpte_itfinfo
[0];
522 mpte
->mpte_itfinfo_size
= MPTE_ITFINFO_SIZE
;
524 if (mptcp_alternate_port
) {
525 mpte
->mpte_alternate_port
= htons(mptcp_alternate_port
);
528 mpte
->mpte_last_cellicon_set
= tcp_now
;
530 /* MPTCP Protocol Control Block */
531 bzero(mp_tp
, sizeof(*mp_tp
));
532 mp_tp
->mpt_mpte
= mpte
;
533 mp_tp
->mpt_state
= MPTCPS_CLOSED
;
535 DTRACE_MPTCP1(session__create
, struct mppcb
*, mpp
);
541 mptcp_get_session_dst(struct mptses
*mpte
, boolean_t ipv6
, boolean_t ipv4
)
543 if (!(mpte
->mpte_flags
& MPTE_UNICAST_IP
)) {
544 return &mpte
->mpte_dst
;
547 if (ipv6
&& mpte
->mpte_dst_unicast_v6
.sin6_family
== AF_INET6
) {
548 return (struct sockaddr
*)&mpte
->mpte_dst_unicast_v6
;
551 if (ipv4
&& mpte
->mpte_dst_unicast_v4
.sin_family
== AF_INET
) {
552 return (struct sockaddr
*)&mpte
->mpte_dst_unicast_v4
;
555 /* The interface has neither IPv4 nor IPv6 routes. Give our best guess,
556 * meaning we prefer IPv6 over IPv4.
558 if (mpte
->mpte_dst_unicast_v6
.sin6_family
== AF_INET6
) {
559 return (struct sockaddr
*)&mpte
->mpte_dst_unicast_v6
;
562 if (mpte
->mpte_dst_unicast_v4
.sin_family
== AF_INET
) {
563 return (struct sockaddr
*)&mpte
->mpte_dst_unicast_v4
;
566 /* We don't yet have a unicast IP */
571 mptcpstats_get_bytes(struct mptses
*mpte
, boolean_t initial_cell
,
572 uint64_t *cellbytes
, uint64_t *allbytes
)
574 int64_t mycellbytes
= 0;
575 uint64_t myallbytes
= 0;
578 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
579 if (mpte
->mpte_itfstats
[i
].is_expensive
) {
580 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
581 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
584 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
585 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
589 mycellbytes
-= mpte
->mpte_init_txbytes
;
590 mycellbytes
-= mpte
->mpte_init_rxbytes
;
593 if (mycellbytes
< 0) {
594 os_log_error(mptcp_log_handle
, "%s - %lx: cellbytes is %lld\n",
595 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mycellbytes
);
599 *cellbytes
= mycellbytes
;
600 *allbytes
= myallbytes
;
605 mptcpstats_session_wrapup(struct mptses
*mpte
)
607 boolean_t cell
= mpte
->mpte_initial_cell
;
609 switch (mpte
->mpte_svctype
) {
610 case MPTCP_SVCTYPE_HANDOVER
:
611 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
612 tcpstat
.tcps_mptcp_fp_handover_attempt
++;
614 if (cell
&& mpte
->mpte_handshake_success
) {
615 tcpstat
.tcps_mptcp_fp_handover_success_cell
++;
617 if (mpte
->mpte_used_wifi
) {
618 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
620 } else if (mpte
->mpte_handshake_success
) {
621 tcpstat
.tcps_mptcp_fp_handover_success_wifi
++;
623 if (mpte
->mpte_used_cell
) {
624 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
628 tcpstat
.tcps_mptcp_handover_attempt
++;
630 if (cell
&& mpte
->mpte_handshake_success
) {
631 tcpstat
.tcps_mptcp_handover_success_cell
++;
633 if (mpte
->mpte_used_wifi
) {
634 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
636 } else if (mpte
->mpte_handshake_success
) {
637 tcpstat
.tcps_mptcp_handover_success_wifi
++;
639 if (mpte
->mpte_used_cell
) {
640 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
645 if (mpte
->mpte_handshake_success
) {
649 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
651 tcpstat
.tcps_mptcp_handover_cell_bytes
+= cellbytes
;
652 tcpstat
.tcps_mptcp_handover_all_bytes
+= allbytes
;
655 case MPTCP_SVCTYPE_INTERACTIVE
:
656 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
657 tcpstat
.tcps_mptcp_fp_interactive_attempt
++;
659 if (mpte
->mpte_handshake_success
) {
660 tcpstat
.tcps_mptcp_fp_interactive_success
++;
662 if (!cell
&& mpte
->mpte_used_cell
) {
663 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
667 tcpstat
.tcps_mptcp_interactive_attempt
++;
669 if (mpte
->mpte_handshake_success
) {
670 tcpstat
.tcps_mptcp_interactive_success
++;
672 if (!cell
&& mpte
->mpte_used_cell
) {
673 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
678 if (mpte
->mpte_handshake_success
) {
682 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
684 tcpstat
.tcps_mptcp_interactive_cell_bytes
+= cellbytes
;
685 tcpstat
.tcps_mptcp_interactive_all_bytes
+= allbytes
;
688 case MPTCP_SVCTYPE_AGGREGATE
:
689 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
690 tcpstat
.tcps_mptcp_fp_aggregate_attempt
++;
692 if (mpte
->mpte_handshake_success
) {
693 tcpstat
.tcps_mptcp_fp_aggregate_success
++;
696 tcpstat
.tcps_mptcp_aggregate_attempt
++;
698 if (mpte
->mpte_handshake_success
) {
699 tcpstat
.tcps_mptcp_aggregate_success
++;
703 if (mpte
->mpte_handshake_success
) {
707 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
709 tcpstat
.tcps_mptcp_aggregate_cell_bytes
+= cellbytes
;
710 tcpstat
.tcps_mptcp_aggregate_all_bytes
+= allbytes
;
715 if (cell
&& mpte
->mpte_handshake_success
&& mpte
->mpte_used_wifi
) {
716 tcpstat
.tcps_mptcp_back_to_wifi
++;
719 if (mpte
->mpte_triggered_cell
) {
720 tcpstat
.tcps_mptcp_triggered_cell
++;
725 * Destroy an MPTCP session.
728 mptcp_session_destroy(struct mptses
*mpte
)
730 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
732 VERIFY(mp_tp
!= NULL
);
733 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
735 mptcpstats_session_wrapup(mpte
);
736 mptcp_unset_cellicon(mpte
, NULL
, mpte
->mpte_cellicon_increments
);
737 mptcp_flush_sopts(mpte
);
739 if (mpte
->mpte_itfinfo_size
> MPTE_ITFINFO_SIZE
) {
740 _FREE(mpte
->mpte_itfinfo
, M_TEMP
);
742 mpte
->mpte_itfinfo
= NULL
;
744 m_freem_list(mpte
->mpte_reinjectq
);
746 os_log(mptcp_log_handle
, "%s - %lx: Destroying session\n",
747 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
751 mptcp_ok_to_create_subflows(struct mptcb
*mp_tp
)
753 return mp_tp
->mpt_state
>= MPTCPS_ESTABLISHED
&&
754 mp_tp
->mpt_state
< MPTCPS_FIN_WAIT_1
&&
755 !(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
);
759 mptcp_synthesize_nat64(struct in6_addr
*addr
, uint32_t len
,
760 const struct in_addr
*addrv4
)
762 static const struct in6_addr well_known_prefix
= {
763 .__u6_addr
.__u6_addr8
= {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
764 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
765 0x00, 0x00, 0x00, 0x00},
767 const char *ptrv4
= (const char *)addrv4
;
768 char buf
[MAX_IPv6_STR_LEN
];
769 char *ptr
= (char *)addr
;
771 if (IN_ZERONET(ntohl(addrv4
->s_addr
)) || // 0.0.0.0/8 Source hosts on local network
772 IN_LOOPBACK(ntohl(addrv4
->s_addr
)) || // 127.0.0.0/8 Loopback
773 IN_LINKLOCAL(ntohl(addrv4
->s_addr
)) || // 169.254.0.0/16 Link Local
774 IN_DS_LITE(ntohl(addrv4
->s_addr
)) || // 192.0.0.0/29 DS-Lite
775 IN_6TO4_RELAY_ANYCAST(ntohl(addrv4
->s_addr
)) || // 192.88.99.0/24 6to4 Relay Anycast
776 IN_MULTICAST(ntohl(addrv4
->s_addr
)) || // 224.0.0.0/4 Multicast
777 INADDR_BROADCAST
== addrv4
->s_addr
) { // 255.255.255.255/32 Limited Broadcast
781 /* Check for the well-known prefix */
782 if (len
== NAT64_PREFIX_LEN_96
&&
783 IN6_ARE_ADDR_EQUAL(addr
, &well_known_prefix
)) {
784 if (IN_PRIVATE(ntohl(addrv4
->s_addr
)) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use
785 IN_SHARED_ADDRESS_SPACE(ntohl(addrv4
->s_addr
))) { // 100.64.0.0/10 Shared Address Space
791 case NAT64_PREFIX_LEN_96
:
792 memcpy(ptr
+ 12, ptrv4
, 4);
794 case NAT64_PREFIX_LEN_64
:
795 memcpy(ptr
+ 9, ptrv4
, 4);
797 case NAT64_PREFIX_LEN_56
:
798 memcpy(ptr
+ 7, ptrv4
, 1);
799 memcpy(ptr
+ 9, ptrv4
+ 1, 3);
801 case NAT64_PREFIX_LEN_48
:
802 memcpy(ptr
+ 6, ptrv4
, 2);
803 memcpy(ptr
+ 9, ptrv4
+ 2, 2);
805 case NAT64_PREFIX_LEN_40
:
806 memcpy(ptr
+ 5, ptrv4
, 3);
807 memcpy(ptr
+ 9, ptrv4
+ 3, 1);
809 case NAT64_PREFIX_LEN_32
:
810 memcpy(ptr
+ 4, ptrv4
, 4);
813 panic("NAT64-prefix len is wrong: %u\n", len
);
816 os_log_info(mptcp_log_handle
, "%s: nat64prefix-len %u synthesized %s\n",
818 inet_ntop(AF_INET6
, (void *)addr
, buf
, sizeof(buf
)));
824 mptcp_trigger_cell_bringup(struct mptses
*mpte
)
826 struct socket
*mp_so
= mptetoso(mpte
);
828 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
829 uuid_string_t uuidstr
;
832 socket_unlock(mp_so
, 0);
833 err
= necp_client_assert_bb_radio_manager(mpsotomppcb(mp_so
)->necp_client_uuid
,
835 socket_lock(mp_so
, 0);
838 mpte
->mpte_triggered_cell
= 1;
841 uuid_unparse_upper(mpsotomppcb(mp_so
)->necp_client_uuid
, uuidstr
);
842 os_log_info(mptcp_log_handle
, "%s - %lx: asked irat to bringup cell for uuid %s, err %d\n",
843 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), uuidstr
, err
);
845 os_log_info(mptcp_log_handle
, "%s - %lx: UUID is already null\n",
846 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
851 mptcp_subflow_disconnecting(struct mptsub
*mpts
)
853 if (mpts
->mpts_socket
->so_state
& SS_ISDISCONNECTED
) {
857 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
| MPTSF_CLOSE_REQD
)) {
861 if (sototcpcb(mpts
->mpts_socket
)->t_state
== TCPS_CLOSED
) {
869 mptcp_check_subflows_and_add(struct mptses
*mpte
)
871 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
872 boolean_t cellular_viable
= FALSE
;
873 boolean_t want_cellular
= TRUE
;
876 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
877 os_log_debug(mptcp_log_handle
, "%s - %lx: not a good time for subflows, state %u flags %#x",
878 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
882 if (mptcp_get_session_dst(mpte
, false, false) == NULL
) {
886 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
887 boolean_t need_to_ask_symptoms
= FALSE
, found
= FALSE
;
888 struct mpt_itf_info
*info
;
889 struct sockaddr_in6 nat64pre
;
890 struct sockaddr
*dst
;
895 info
= &mpte
->mpte_itfinfo
[i
];
897 ifindex
= info
->ifindex
;
898 if (ifindex
== IFSCOPE_NONE
) {
902 os_log(mptcp_log_handle
, "%s - %lx: itf %u no support %u hasv4 %u has v6 %u hasnat64 %u\n",
903 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), info
->ifindex
, info
->no_mptcp_support
,
904 info
->has_v4_conn
, info
->has_v6_conn
, info
->has_nat64_conn
);
906 if (info
->no_mptcp_support
) {
910 ifnet_head_lock_shared();
911 ifp
= ifindex2ifnet
[ifindex
];
918 if (IFNET_IS_CELLULAR(ifp
)) {
919 cellular_viable
= TRUE
;
922 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
923 const struct ifnet
*subifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
924 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
926 if (subifp
== NULL
) {
931 * If there is at least one functioning subflow on WiFi
932 * and we are checking for the cell interface, then
933 * we always need to ask symptoms for permission as
934 * cell is triggered even if WiFi is available.
936 if (!IFNET_IS_CELLULAR(subifp
) &&
937 !mptcp_subflow_disconnecting(mpts
) &&
938 IFNET_IS_CELLULAR(ifp
)) {
939 need_to_ask_symptoms
= TRUE
;
943 * In Handover mode, only create cell subflow if
944 * 1. Wi-Fi Assist is active
945 * 2. Symptoms marked WiFi as weak
946 * 3. We are experiencing RTOs or we are not sending data.
948 * This covers the scenario, where:
949 * 1. We send and get retransmission timeouts (thus,
950 * we confirmed that WiFi is indeed bad).
951 * 2. We are not sending and the server tries to send.
952 * Establshing a cell-subflow gives the server a
953 * chance to send us some data over cell if WiFi
954 * is dead. We establish the subflow with the
955 * backup-bit set, so the server is not allowed to
956 * send on this subflow as long as WiFi is providing
959 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
&&
960 !IFNET_IS_CELLULAR(subifp
) &&
961 !mptcp_subflow_disconnecting(mpts
) &&
962 (mptcp_is_wifi_unusable_for_session(mpte
) == 0 ||
963 (tp
->t_rxtshift
< mptcp_fail_thresh
* 2 && mptetoso(mpte
)->so_snd
.sb_cc
))) {
964 os_log_debug(mptcp_log_handle
,
965 "%s - %lx: handover, wifi state %d rxt %u first-party %u sb_cc %u ifindex %u this %u rtt %u rttvar %u rto %u\n",
966 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
967 mptcp_is_wifi_unusable_for_session(mpte
),
969 !!(mpte
->mpte_flags
& MPTE_FIRSTPARTY
),
970 mptetoso(mpte
)->so_snd
.sb_cc
,
971 ifindex
, subifp
->if_index
,
972 tp
->t_srtt
>> TCP_RTT_SHIFT
,
973 tp
->t_rttvar
>> TCP_RTTVAR_SHIFT
,
977 /* We found a proper subflow on WiFi - no need for cell */
978 want_cellular
= FALSE
;
980 } else if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
981 uint64_t time_now
= mach_continuous_time();
983 os_log(mptcp_log_handle
,
984 "%s - %lx: target-based: %llu now %llu unusable? %d cell %u sostat %#x mpts_flags %#x tcp-state %u\n",
985 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_time_target
,
986 time_now
, mptcp_is_wifi_unusable_for_session(mpte
),
987 IFNET_IS_CELLULAR(subifp
), mpts
->mpts_socket
->so_state
,
988 mpts
->mpts_flags
, sototcpcb(mpts
->mpts_socket
)->t_state
);
990 if (!IFNET_IS_CELLULAR(subifp
) &&
991 !mptcp_subflow_disconnecting(mpts
) &&
992 (mpte
->mpte_time_target
== 0 ||
993 (int64_t)(mpte
->mpte_time_target
- time_now
) > 0 ||
994 !mptcp_is_wifi_unusable_for_session(mpte
))) {
997 want_cellular
= FALSE
;
1001 os_log_debug(mptcp_log_handle
,
1002 "%s - %lx: svc %u cell %u flags %#x unusable %d rtx %u first %u sbcc %u rtt %u rttvar %u rto %u\n",
1003 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1004 mpte
->mpte_svctype
, IFNET_IS_CELLULAR(subifp
), mpts
->mpts_flags
,
1005 mptcp_is_wifi_unusable_for_session(mpte
), tp
->t_rxtshift
,
1006 !!(mpte
->mpte_flags
& MPTE_FIRSTPARTY
), mptetoso(mpte
)->so_snd
.sb_cc
,
1007 tp
->t_srtt
>> TCP_RTT_SHIFT
,
1008 tp
->t_rttvar
>> TCP_RTTVAR_SHIFT
,
1012 if (subifp
->if_index
== ifindex
&&
1013 !mptcp_subflow_disconnecting(mpts
)) {
1015 * We found a subflow on this interface.
1016 * No need to create a new one.
1027 if (need_to_ask_symptoms
&&
1028 !(mpte
->mpte_flags
& MPTE_FIRSTPARTY
) &&
1029 !(mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
) &&
1030 mptcp_developer_mode
== 0) {
1031 mptcp_ask_symptoms(mpte
);
1035 dst
= mptcp_get_session_dst(mpte
, info
->has_v6_conn
, info
->has_v4_conn
);
1037 if (dst
->sa_family
== AF_INET
&&
1038 !info
->has_v4_conn
&& info
->has_nat64_conn
) {
1039 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
1042 bzero(&nat64pre
, sizeof(struct sockaddr_in6
));
1044 error
= ifnet_get_nat64prefix(ifp
, nat64prefixes
);
1046 os_log_error(mptcp_log_handle
, "%s - %lx: no NAT64-prefix on itf %s, error %d\n",
1047 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), ifp
->if_name
, error
);
1051 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
1052 if (nat64prefixes
[j
].prefix_len
!= 0) {
1057 VERIFY(j
< NAT64_MAX_NUM_PREFIXES
);
1059 error
= mptcp_synthesize_nat64(&nat64prefixes
[j
].ipv6_prefix
,
1060 nat64prefixes
[j
].prefix_len
,
1061 &((struct sockaddr_in
*)(void *)dst
)->sin_addr
);
1063 os_log_info(mptcp_log_handle
, "%s - %lx: cannot synthesize this addr\n",
1064 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
1068 memcpy(&nat64pre
.sin6_addr
,
1069 &nat64prefixes
[j
].ipv6_prefix
,
1070 sizeof(nat64pre
.sin6_addr
));
1071 nat64pre
.sin6_len
= sizeof(struct sockaddr_in6
);
1072 nat64pre
.sin6_family
= AF_INET6
;
1073 nat64pre
.sin6_port
= ((struct sockaddr_in
*)(void *)dst
)->sin_port
;
1074 nat64pre
.sin6_flowinfo
= 0;
1075 nat64pre
.sin6_scope_id
= 0;
1077 dst
= (struct sockaddr
*)&nat64pre
;
1080 /* Initial subflow started on a NAT64'd address? */
1081 if (!(mpte
->mpte_flags
& MPTE_UNICAST_IP
) &&
1082 mpte
->mpte_dst
.sa_family
== AF_INET6
&&
1083 mpte
->mpte_dst_v4_nat64
.sin_family
== AF_INET
) {
1084 dst
= (struct sockaddr
*)&mpte
->mpte_dst_v4_nat64
;
1087 if (dst
->sa_family
== AF_INET
&& !info
->has_v4_conn
) {
1090 if (dst
->sa_family
== AF_INET6
&& !info
->has_v6_conn
) {
1094 mptcp_subflow_add(mpte
, NULL
, dst
, ifindex
, NULL
);
1097 if (!cellular_viable
&& want_cellular
) {
1098 /* Trigger Cell Bringup */
1099 mptcp_trigger_cell_bringup(mpte
);
1104 mptcp_remove_cell_subflows(struct mptses
*mpte
)
1106 struct mptsub
*mpts
, *tmpts
;
1107 boolean_t found
= false;
1109 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1110 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1112 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
)) {
1116 /* We have a functioning subflow on WiFi. No need for cell! */
1117 if (mpts
->mpts_flags
& MPTSF_CONNECTED
&&
1118 !mptcp_subflow_disconnecting(mpts
)) {
1123 /* Didn't found functional sub on WiFi - stay on cell */
1128 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1129 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1131 /* Only remove cellular subflows */
1132 if (ifp
== NULL
|| !IFNET_IS_CELLULAR(ifp
)) {
1136 os_log(mptcp_log_handle
, "%s - %lx: removing cell subflow\n",
1137 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
1139 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1145 /* Returns true if it removed a subflow on cell */
1147 mptcp_handover_subflows_remove(struct mptses
*mpte
)
1149 int wifi_unusable
= mptcp_is_wifi_unusable_for_session(mpte
);
1150 boolean_t found_working_subflow
= false;
1151 struct mptsub
*mpts
;
1154 * Look for a subflow that is on a non-cellular interface
1155 * and actually works (aka, no retransmission timeout).
1157 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1158 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1162 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
)) {
1166 so
= mpts
->mpts_socket
;
1169 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
) ||
1170 tp
->t_state
!= TCPS_ESTABLISHED
) {
1174 os_log_debug(mptcp_log_handle
, "%s - %lx: rxt %u sb_cc %u unusable %d\n",
1175 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), tp
->t_rxtshift
, mptetoso(mpte
)->so_snd
.sb_cc
, wifi_unusable
);
1177 /* Is this subflow in good condition? */
1178 if (tp
->t_rxtshift
== 0 && mptetoso(mpte
)->so_snd
.sb_cc
) {
1179 found_working_subflow
= true;
1182 /* Or WiFi is fine */
1183 if (!wifi_unusable
) {
1184 found_working_subflow
= true;
1189 * Couldn't find a working subflow, let's not remove those on a cellular
1192 if (!found_working_subflow
) {
1196 mptcp_remove_cell_subflows(mpte
);
1200 mptcp_targetbased_subflows_remove(struct mptses
*mpte
)
1202 uint64_t time_now
= mach_continuous_time();
1204 if (mpte
->mpte_time_target
!= 0 &&
1205 (int64_t)(mpte
->mpte_time_target
- time_now
) <= 0 &&
1206 mptcp_is_wifi_unusable_for_session(mpte
)) {
1207 /* WiFi is bad and we are below the target - don't remove any subflows */
1211 mptcp_remove_cell_subflows(mpte
);
1215 * Based on the MPTCP Service-type and the state of the subflows, we
1216 * will destroy subflows here.
1219 mptcp_check_subflows_and_remove(struct mptses
*mpte
)
1221 if (!mptcp_ok_to_create_subflows(mpte
->mpte_mptcb
)) {
1225 socket_lock_assert_owned(mptetoso(mpte
));
1227 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
) {
1228 mptcp_handover_subflows_remove(mpte
);
1231 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
1232 mptcp_targetbased_subflows_remove(mpte
);
1237 mptcp_remove_subflows(struct mptses
*mpte
)
1239 struct mptsub
*mpts
, *tmpts
;
1241 if (!mptcp_ok_to_create_subflows(mpte
->mpte_mptcb
)) {
1245 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
1246 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
1247 boolean_t found
= false;
1251 if (mpts
->mpts_flags
& MPTSF_CLOSE_REQD
) {
1252 mpts
->mpts_flags
&= ~MPTSF_CLOSE_REQD
;
1254 os_log(mptcp_log_handle
, "%s - %lx: itf %u close_reqd last itf %d\n",
1255 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_ifscope
,
1256 ifp
? ifp
->if_index
: -1);
1257 soevent(mpts
->mpts_socket
,
1258 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
1263 if (ifp
== NULL
&& mpts
->mpts_ifscope
== IFSCOPE_NONE
) {
1268 ifindex
= ifp
->if_index
;
1270 ifindex
= mpts
->mpts_ifscope
;
1273 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
1274 if (mpte
->mpte_itfinfo
[i
].ifindex
== IFSCOPE_NONE
) {
1278 if (mpte
->mpte_itfinfo
[i
].ifindex
== ifindex
) {
1279 if (mpts
->mpts_dst
.sa_family
== AF_INET6
&&
1280 (mpte
->mpte_itfinfo
[i
].has_v6_conn
|| mpte
->mpte_itfinfo
[i
].has_nat64_conn
)) {
1285 if (mpts
->mpts_dst
.sa_family
== AF_INET
&&
1286 mpte
->mpte_itfinfo
[i
].has_v4_conn
) {
1294 os_log(mptcp_log_handle
, "%s - %lx: itf %u killing %#x\n",
1295 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1296 ifindex
, mpts
->mpts_flags
);
1298 soevent(mpts
->mpts_socket
,
1299 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
1305 mptcp_create_subflows(__unused
void *arg
)
1310 * Start with clearing, because we might be processing connections
1311 * while a new event comes in.
1313 if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled
)) {
1314 os_log_error(mptcp_log_handle
, "%s: bit was already cleared!\n", __func__
);
1317 /* Iterate over all MPTCP connections */
1319 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
1321 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
1322 struct socket
*mp_so
= mpp
->mpp_socket
;
1323 struct mptses
*mpte
= mpp
->mpp_pcbe
;
1325 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1329 socket_lock(mp_so
, 1);
1330 VERIFY(mp_so
->so_usecount
> 0);
1332 mpp
->mpp_flags
&= ~MPP_CREATE_SUBFLOWS
;
1334 mptcp_check_subflows_and_add(mpte
);
1335 mptcp_remove_subflows(mpte
);
1337 mp_so
->so_usecount
--; /* See mptcp_sched_create_subflows */
1338 socket_unlock(mp_so
, 1);
1341 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
1345 * We need this because we are coming from an NECP-event. This event gets posted
1346 * while holding NECP-locks. The creation of the subflow however leads us back
1347 * into NECP (e.g., to add the necp_cb and also from tcp_connect).
1348 * So, we would deadlock there as we already hold the NECP-lock.
1350 * So, let's schedule this separately. It also gives NECP the chance to make
1351 * progress, without having to wait for MPTCP to finish its subflow creation.
1354 mptcp_sched_create_subflows(struct mptses
*mpte
)
1356 struct mppcb
*mpp
= mpte
->mpte_mppcb
;
1357 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1358 struct socket
*mp_so
= mpp
->mpp_socket
;
1360 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
1361 os_log_debug(mptcp_log_handle
, "%s - %lx: not a good time for subflows, state %u flags %#x",
1362 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
1366 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1367 mp_so
->so_usecount
++; /* To prevent it from being free'd in-between */
1368 mpp
->mpp_flags
|= MPP_CREATE_SUBFLOWS
;
1371 if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled
)) {
1375 /* Do the call in 100ms to allow NECP to schedule it on all sockets */
1376 timeout(mptcp_create_subflows
, NULL
, hz
/ 10);
1380 * Allocate an MPTCP socket option structure.
1383 mptcp_sopt_alloc(int how
)
1387 mpo
= (how
== M_WAITOK
) ? zalloc(mptopt_zone
) :
1388 zalloc_noblock(mptopt_zone
);
1390 bzero(mpo
, mptopt_zone_size
);
1397 * Free an MPTCP socket option structure.
1400 mptcp_sopt_free(struct mptopt
*mpo
)
1402 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
1404 zfree(mptopt_zone
, mpo
);
1408 * Add a socket option to the MPTCP socket option list.
1411 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
1413 socket_lock_assert_owned(mptetoso(mpte
));
1414 mpo
->mpo_flags
|= MPOF_ATTACHED
;
1415 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1419 * Remove a socket option from the MPTCP socket option list.
1422 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
1424 socket_lock_assert_owned(mptetoso(mpte
));
1425 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1426 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
1427 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1431 * Search for an existing <sopt_level,sopt_name> socket option.
1434 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
1438 socket_lock_assert_owned(mptetoso(mpte
));
1440 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
1441 if (mpo
->mpo_level
== sopt
->sopt_level
&&
1442 mpo
->mpo_name
== sopt
->sopt_name
) {
1450 * Allocate a MPTCP subflow structure.
1452 static struct mptsub
*
1453 mptcp_subflow_alloc(void)
1455 struct mptsub
*mpts
= zalloc(mptsub_zone
);
1461 bzero(mpts
, mptsub_zone_size
);
1466 * Deallocate a subflow structure, called when all of the references held
1467 * on it have been released. This implies that the subflow has been deleted.
1470 mptcp_subflow_free(struct mptsub
*mpts
)
1472 VERIFY(mpts
->mpts_refcnt
== 0);
1473 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
1474 VERIFY(mpts
->mpts_mpte
== NULL
);
1475 VERIFY(mpts
->mpts_socket
== NULL
);
1477 if (mpts
->mpts_src
!= NULL
) {
1478 FREE(mpts
->mpts_src
, M_SONAME
);
1479 mpts
->mpts_src
= NULL
;
1482 zfree(mptsub_zone
, mpts
);
1486 mptcp_subflow_addref(struct mptsub
*mpts
)
1488 if (++mpts
->mpts_refcnt
== 0) {
1489 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
1495 mptcp_subflow_remref(struct mptsub
*mpts
)
1497 if (mpts
->mpts_refcnt
== 0) {
1498 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
1501 if (--mpts
->mpts_refcnt
> 0) {
1505 /* callee will unlock and destroy lock */
1506 mptcp_subflow_free(mpts
);
1510 mptcp_subflow_attach(struct mptses
*mpte
, struct mptsub
*mpts
, struct socket
*so
)
1512 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1513 struct tcpcb
*tp
= sototcpcb(so
);
1516 * From this moment on, the subflow is linked to the MPTCP-connection.
1517 * Locking,... happens now at the MPTCP-layer
1519 tp
->t_mptcb
= mpte
->mpte_mptcb
;
1520 so
->so_flags
|= SOF_MP_SUBFLOW
;
1521 mp_so
->so_usecount
++;
1524 * Insert the subflow into the list, and associate the MPTCP PCB
1525 * as well as the the subflow socket. From this point on, removing
1526 * the subflow needs to be done via mptcp_subflow_del().
1528 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1529 mpte
->mpte_numflows
++;
1531 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1532 mpts
->mpts_mpte
= mpte
;
1533 mpts
->mpts_socket
= so
;
1535 mptcp_subflow_addref(mpts
); /* for being in MPTCP subflow list */
1536 mptcp_subflow_addref(mpts
); /* for subflow socket */
1540 mptcp_subflow_necp_cb(void *handle
, __unused
int action
,
1541 __unused
uint32_t interface_index
,
1542 uint32_t necp_flags
, bool *viable
)
1544 boolean_t low_power
= !!(necp_flags
& NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER
);
1545 struct inpcb
*inp
= (struct inpcb
*)handle
;
1546 struct socket
*so
= inp
->inp_socket
;
1547 struct mptsub
*mpts
;
1548 struct mptses
*mpte
;
1551 action
= NECP_CLIENT_CBACTION_NONVIABLE
;
1554 if (action
!= NECP_CLIENT_CBACTION_NONVIABLE
) {
1559 * The socket is being garbage-collected. There is nothing to be done
1562 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) == WNT_STOPUSING
) {
1568 /* Check again after we acquired the lock. */
1569 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
1573 mpte
= tptomptp(sototcpcb(so
))->mpt_mpte
;
1574 mpts
= sototcpcb(so
)->t_mpsub
;
1576 os_log_debug(mptcp_log_handle
, "%s - %lx: Subflow on itf %u became non-viable, power %u",
1577 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_ifscope
, low_power
);
1579 mpts
->mpts_flags
|= MPTSF_CLOSE_REQD
;
1581 mptcp_sched_create_subflows(mpte
);
1583 if ((mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
||
1584 mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) &&
1590 socket_unlock(so
, 1);
1594 * Create an MPTCP subflow socket.
1597 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
1600 lck_mtx_t
*subflow_mtx
;
1601 struct mptopt smpo
, *mpo
, *tmpo
;
1603 struct socket
*mp_so
;
1608 mp_so
= mptetoso(mpte
);
1610 p
= proc_find(mp_so
->last_pid
);
1611 if (p
== PROC_NULL
) {
1612 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
1613 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_so
->last_pid
);
1619 * Create the subflow socket (multipath subflow, non-blocking.)
1621 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
1622 * socket; it will be cleared when the socket is peeled off or closed.
1623 * It also indicates to the underlying TCP to handle MPTCP options.
1624 * A multipath subflow socket implies SS_NOFDREF state.
1628 * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes
1629 * the ipi-lock. We cannot hold the socket-lock at that point.
1631 socket_unlock(mp_so
, 0);
1632 error
= socreate_internal(dom
, so
, SOCK_STREAM
, IPPROTO_TCP
, p
,
1633 SOCF_MPTCP
, PROC_NULL
);
1634 socket_lock(mp_so
, 0);
1636 os_log_error(mptcp_log_handle
, "%s - %lx: unable to create subflow socket error %d\n",
1637 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1641 mptcp_subflow_free(mpts
);
1646 * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because
1647 * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock.
1648 * Which is why we also need to get the lock with pr_getlock, as after
1649 * setting the flag, socket_unlock will work on the MPTCP-level lock.
1651 subflow_mtx
= ((*so
)->so_proto
->pr_getlock
)(*so
, 0);
1652 lck_mtx_lock(subflow_mtx
);
1655 * Must be the first thing we do, to make sure all pointers for this
1658 mptcp_subflow_attach(mpte
, mpts
, *so
);
1661 * A multipath subflow socket is used internally in the kernel,
1662 * therefore it does not have a file desciptor associated by
1665 (*so
)->so_state
|= SS_NOFDREF
;
1667 lck_mtx_unlock(subflow_mtx
);
1669 /* prevent the socket buffers from being compressed */
1670 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
1671 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
1673 /* Inherit preconnect and TFO data flags */
1674 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
1675 (*so
)->so_flags1
|= SOF1_PRECONNECT_DATA
;
1677 if (mp_so
->so_flags1
& SOF1_DATA_IDEMPOTENT
) {
1678 (*so
)->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
1681 /* Inherit uuid and create the related flow. */
1682 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1683 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1685 sotoinpcb(*so
)->necp_cb
= mptcp_subflow_necp_cb
;
1688 * A note on the unlock: With MPTCP, we do multiple times a
1689 * necp_client_register_socket_flow. This is problematic,
1690 * because now the lock-ordering guarantee (first necp-locks,
1691 * then socket-locks) is no more respected. So, we need to
1694 socket_unlock(mp_so
, 0);
1695 error
= necp_client_register_socket_flow(mp_so
->last_pid
,
1696 mpsotomppcb(mp_so
)->necp_client_uuid
, sotoinpcb(*so
));
1697 socket_lock(mp_so
, 0);
1700 os_log_error(mptcp_log_handle
, "%s - %lx: necp_client_register_socket_flow failed with error %d\n",
1701 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1706 /* Possible state-change during the unlock above */
1707 if (mp_tp
->mpt_state
>= MPTCPS_TIME_WAIT
||
1708 (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
)) {
1709 os_log_error(mptcp_log_handle
, "%s - %lx: state changed during unlock: %u flags %#x\n",
1710 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1711 mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
1717 uuid_copy(sotoinpcb(*so
)->necp_client_uuid
, mpsotomppcb(mp_so
)->necp_client_uuid
);
1720 /* Needs to happen prior to the delegation! */
1721 (*so
)->last_pid
= mp_so
->last_pid
;
1723 if (mp_so
->so_flags
& SOF_DELEGATED
) {
1724 if (mpte
->mpte_epid
) {
1725 error
= so_set_effective_pid(*so
, mpte
->mpte_epid
, p
, false);
1727 os_log_error(mptcp_log_handle
, "%s - %lx: so_set_effective_pid failed with error %d\n",
1728 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1732 if (!uuid_is_null(mpte
->mpte_euuid
)) {
1733 error
= so_set_effective_uuid(*so
, mpte
->mpte_euuid
, p
, false);
1735 os_log_error(mptcp_log_handle
, "%s - %lx: so_set_effective_uuid failed with error %d\n",
1736 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
1742 /* inherit the other socket options */
1743 bzero(&smpo
, sizeof(smpo
));
1744 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1745 smpo
.mpo_level
= SOL_SOCKET
;
1746 smpo
.mpo_intval
= 1;
1748 /* disable SIGPIPE */
1749 smpo
.mpo_name
= SO_NOSIGPIPE
;
1750 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1754 /* find out if the subflow's source address goes away */
1755 smpo
.mpo_name
= SO_NOADDRERR
;
1756 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1760 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
) {
1762 * On secondary subflows we might need to set the cell-fallback
1763 * flag (see conditions in mptcp_subflow_sosetopt).
1765 smpo
.mpo_level
= SOL_SOCKET
;
1766 smpo
.mpo_name
= SO_MARK_CELLFALLBACK
;
1767 smpo
.mpo_intval
= 1;
1768 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0) {
1773 /* replay setsockopt(2) on the subflow sockets for eligible options */
1774 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
1777 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1782 * Skip those that are handled internally; these options
1783 * should not have been recorded and marked with the
1784 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1786 if (mpo
->mpo_level
== SOL_SOCKET
&&
1787 (mpo
->mpo_name
== SO_NOSIGPIPE
||
1788 mpo
->mpo_name
== SO_NOADDRERR
||
1789 mpo
->mpo_name
== SO_KEEPALIVE
)) {
1793 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
1794 if (mptcp_subflow_sosetopt(mpte
, mpts
, mpo
) != 0 && interim
) {
1795 os_log_error(mptcp_log_handle
, "%s - %lx: sopt %s val %d interim record removed\n",
1796 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1797 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
1799 mptcp_sopt_remove(mpte
, mpo
);
1800 mptcp_sopt_free(mpo
);
1806 * We need to receive everything that the subflow socket has,
1807 * so use a customized socket receive function. We will undo
1808 * this when the socket is peeled off or closed.
1812 (*so
)->so_proto
= &mptcp_subflow_protosw
;
1816 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
1826 DTRACE_MPTCP3(subflow__create
, struct mptses
*, mpte
,
1827 int, dom
, int, error
);
1832 mptcp_subflow_abort(mpts
, error
);
1840 * Close an MPTCP subflow socket.
1842 * Note that this may be called on an embryonic subflow, and the only
1843 * thing that is guaranteed valid is the protocol-user request.
1846 mptcp_subflow_soclose(struct mptsub
*mpts
)
1848 struct socket
*so
= mpts
->mpts_socket
;
1850 if (mpts
->mpts_flags
& MPTSF_CLOSED
) {
1855 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1856 VERIFY((so
->so_state
& (SS_NBIO
| SS_NOFDREF
)) == (SS_NBIO
| SS_NOFDREF
));
1858 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
1859 struct socket
*, so
,
1860 struct sockbuf
*, &so
->so_rcv
,
1861 struct sockbuf
*, &so
->so_snd
,
1862 struct mptses
*, mpts
->mpts_mpte
);
1864 mpts
->mpts_flags
|= MPTSF_CLOSED
;
1866 if (so
->so_retaincnt
== 0) {
1871 VERIFY(so
->so_usecount
> 0);
1879 * Connect an MPTCP subflow socket.
1881 * Note that in the pending connect case, the subflow socket may have been
1882 * bound to an interface and/or a source IP address which may no longer be
1883 * around by the time this routine is called; in that case the connect attempt
1884 * will most likely fail.
1887 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
1889 char dbuf
[MAX_IPv6_STR_LEN
];
1890 struct socket
*mp_so
, *so
;
1891 struct mptcb
*mp_tp
;
1892 struct sockaddr
*dst
;
1894 int af
, error
, dport
;
1896 mp_so
= mptetoso(mpte
);
1897 mp_tp
= mpte
->mpte_mptcb
;
1898 so
= mpts
->mpts_socket
;
1899 af
= mpts
->mpts_dst
.sa_family
;
1900 dst
= &mpts
->mpts_dst
;
1902 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
| MPTSF_CONNECTED
)) == MPTSF_CONNECTING
);
1903 VERIFY(mpts
->mpts_socket
!= NULL
);
1904 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1906 if (af
== AF_INET
) {
1907 inet_ntop(af
, &SIN(dst
)->sin_addr
.s_addr
, dbuf
, sizeof(dbuf
));
1908 dport
= ntohs(SIN(dst
)->sin_port
);
1910 inet_ntop(af
, &SIN6(dst
)->sin6_addr
, dbuf
, sizeof(dbuf
));
1911 dport
= ntohs(SIN6(dst
)->sin6_port
);
1914 os_log_info(mptcp_log_handle
,
1915 "%s - %lx: ifindex %u dst %s:%d pended %u\n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1916 mpts
->mpts_ifscope
, dbuf
, dport
, !!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
));
1918 p
= proc_find(mp_so
->last_pid
);
1919 if (p
== PROC_NULL
) {
1920 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
1921 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_so
->last_pid
);
1926 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
1928 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
1930 /* connect the subflow socket */
1931 error
= soconnectxlocked(so
, mpts
->mpts_src
, &mpts
->mpts_dst
,
1932 p
, mpts
->mpts_ifscope
,
1933 mpte
->mpte_associd
, NULL
, 0, NULL
, 0, NULL
, NULL
);
1935 mpts
->mpts_iss
= sototcpcb(so
)->iss
;
1937 /* See tcp_connect_complete */
1938 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&&
1939 (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1940 mp_tp
->mpt_sndwnd
= sototcpcb(so
)->snd_wnd
;
1943 /* Allocate a unique address id per subflow */
1944 mpte
->mpte_addrid_last
++;
1945 if (mpte
->mpte_addrid_last
== 0) {
1946 mpte
->mpte_addrid_last
++;
1951 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
1952 struct mptsub
*, mpts
, int, error
);
1954 os_log_error(mptcp_log_handle
, "%s - %lx: connectx failed with error %d ifscope %u\n",
1955 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
, mpts
->mpts_ifscope
);
1962 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
, int off
, uint64_t dsn
,
1963 uint32_t rseq
, uint16_t dlen
)
1965 struct mptsub
*mpts
= sototcpcb(so
)->t_mpsub
;
1967 if (m_pktlen(m
) == 0) {
1971 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
1972 if (off
&& (dsn
!= m
->m_pkthdr
.mp_dsn
||
1973 rseq
!= m
->m_pkthdr
.mp_rseq
||
1974 dlen
!= m
->m_pkthdr
.mp_rlen
)) {
1975 os_log_error(mptcp_log_handle
, "%s - %lx: Received incorrect second mapping: %u - %u , %u - %u, %u - %u\n",
1976 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpts
->mpts_mpte
),
1977 (uint32_t)dsn
, (uint32_t)m
->m_pkthdr
.mp_dsn
,
1978 rseq
, m
->m_pkthdr
.mp_rseq
,
1979 dlen
, m
->m_pkthdr
.mp_rlen
);
1981 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1984 m
->m_pkthdr
.mp_dsn
+= off
;
1985 m
->m_pkthdr
.mp_rseq
+= off
;
1986 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
1988 if (!(mpts
->mpts_flags
& MPTSF_FULLY_ESTABLISHED
)) {
1989 /* data arrived without an DSS option mapping */
1991 /* initial subflow can fallback right after SYN handshake */
1992 if (mpts
->mpts_flags
& MPTSF_INITIAL_SUB
) {
1993 mptcp_notify_mpfail(so
);
1995 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1999 } else if (m
->m_flags
& M_PKTHDR
) {
2000 /* We need to fake the DATA-mapping */
2001 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
2002 m
->m_pkthdr
.mp_dsn
= dsn
+ off
;
2003 m
->m_pkthdr
.mp_rseq
= rseq
+ off
;
2004 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
2008 mpts
->mpts_flags
|= MPTSF_FULLY_ESTABLISHED
;
2014 * MPTCP subflow socket receive routine, derived from soreceive().
2017 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
2018 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
2021 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
2022 int flags
, error
= 0;
2023 struct proc
*p
= current_proc();
2024 struct mbuf
*m
, **mp
= mp0
;
2025 boolean_t proc_held
= FALSE
;
2027 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
2029 #ifdef MORE_LOCKING_DEBUG
2030 if (so
->so_usecount
== 1) {
2031 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
2036 * We return all that is there in the subflow's socket receive buffer
2037 * to the MPTCP layer, so we require that the caller passes in the
2038 * expected parameters.
2040 if (mp
== NULL
|| controlp
!= NULL
) {
2048 if (flagsp
!= NULL
) {
2049 flags
= *flagsp
& ~MSG_EOR
;
2054 if (flags
& (MSG_PEEK
| MSG_OOB
| MSG_NEEDSA
| MSG_WAITALL
| MSG_WAITSTREAM
)) {
2058 flags
|= (MSG_DONTWAIT
| MSG_NBIO
);
2061 * If a recv attempt is made on a previously-accepted socket
2062 * that has been marked as inactive (disconnected), reject
2065 if (so
->so_flags
& SOF_DEFUNCT
) {
2066 struct sockbuf
*sb
= &so
->so_rcv
;
2070 * This socket should have been disconnected and flushed
2071 * prior to being returned from sodefunct(); there should
2072 * be no data on its receive list, so panic otherwise.
2074 if (so
->so_state
& SS_DEFUNCT
) {
2075 sb_empty_assert(sb
, __func__
);
2081 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2082 * and if so just return to the caller. This could happen when
2083 * soreceive() is called by a socket upcall function during the
2084 * time the socket is freed. The socket buffer would have been
2085 * locked across the upcall, therefore we cannot put this thread
2086 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2087 * we may livelock), because the lock on the socket buffer will
2088 * only be released when the upcall routine returns to its caller.
2089 * Because the socket has been officially closed, there can be
2090 * no further read on it.
2092 * A multipath subflow socket would have its SS_NOFDREF set by
2093 * default, so check for SOF_MP_SUBFLOW socket flag; when the
2094 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2096 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
2097 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
)) {
2102 * For consistency with soreceive() semantics, we need to obey
2103 * SB_LOCK in case some other code path has locked the buffer.
2105 error
= sblock(&so
->so_rcv
, 0);
2110 m
= so
->so_rcv
.sb_mb
;
2113 * Panic if we notice inconsistencies in the socket's
2114 * receive list; both sb_mb and sb_cc should correctly
2115 * reflect the contents of the list, otherwise we may
2116 * end up with false positives during select() or poll()
2117 * which could put the application in a bad state.
2119 SB_MB_CHECK(&so
->so_rcv
);
2121 if (so
->so_error
!= 0) {
2122 error
= so
->so_error
;
2127 if (so
->so_state
& SS_CANTRCVMORE
) {
2131 if (!(so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
))) {
2137 * MSG_DONTWAIT is implicitly defined and this routine will
2138 * never block, so return EWOULDBLOCK when there is nothing.
2140 error
= EWOULDBLOCK
;
2144 mptcp_update_last_owner(so
, mp_so
);
2146 if (mp_so
->last_pid
!= proc_pid(p
)) {
2147 p
= proc_find(mp_so
->last_pid
);
2148 if (p
== PROC_NULL
) {
2155 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
2156 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
2157 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
2160 int dlen
= 0, dfin
= 0, error_out
= 0;
2161 struct mbuf
*start
= m
;
2167 VERIFY(m
->m_nextpkt
== NULL
);
2169 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
2170 orig_dlen
= dlen
= m
->m_pkthdr
.mp_rlen
;
2171 dsn
= m
->m_pkthdr
.mp_dsn
;
2172 sseq
= m
->m_pkthdr
.mp_rseq
;
2173 csum
= m
->m_pkthdr
.mp_csum
;
2175 /* We did fallback */
2176 if (mptcp_adj_rmap(so
, m
, 0, 0, 0, 0)) {
2182 sbfree(&so
->so_rcv
, m
);
2187 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2192 so
->so_rcv
.sb_lastrecord
= m
;
2194 SB_EMPTY_FIXUP(&so
->so_rcv
);
2200 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
) {
2205 * Check if the full mapping is now present
2207 if ((int)so
->so_rcv
.sb_cc
< dlen
- dfin
) {
2208 mptcplog((LOG_INFO
, "%s not enough data (%u) need %u for dsn %u\n",
2209 __func__
, so
->so_rcv
.sb_cc
, dlen
, (uint32_t)dsn
),
2210 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_LOG
);
2213 error
= EWOULDBLOCK
;
2218 /* Now, get the full mapping */
2220 if (mptcp_adj_rmap(so
, m
, orig_dlen
- dlen
, dsn
, sseq
, orig_dlen
)) {
2229 sbfree(&so
->so_rcv
, m
);
2234 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
2238 if (dlen
- dfin
== 0) {
2242 VERIFY(dlen
<= 0 || m
);
2248 so
->so_rcv
.sb_lastrecord
= m
;
2250 SB_EMPTY_FIXUP(&so
->so_rcv
);
2257 if (mptcp_validate_csum(sototcpcb(so
), start
, dsn
, sseq
, orig_dlen
, csum
, dfin
)) {
2263 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
2264 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
2267 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
2268 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
2270 if (flagsp
!= NULL
) {
2275 sbunlock(&so
->so_rcv
, TRUE
);
2285 * MPTCP subflow socket send routine, derived from sosend().
2288 mptcp_subflow_sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
2289 struct mbuf
*top
, struct mbuf
*control
, int flags
)
2291 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
2292 struct proc
*p
= current_proc();
2293 boolean_t en_tracing
= FALSE
, proc_held
= FALSE
;
2295 int sblocked
= 1; /* Pretend as if it is already locked, so we won't relock it */
2298 VERIFY(control
== NULL
);
2299 VERIFY(addr
== NULL
);
2300 VERIFY(uio
== NULL
);
2302 VERIFY((so
->so_flags
& SOF_CONTENT_FILTER
) == 0);
2304 VERIFY(top
->m_pkthdr
.len
> 0 && top
->m_pkthdr
.len
<= UINT16_MAX
);
2305 VERIFY(top
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2308 * trace if tracing & network (vs. unix) sockets & and
2311 if (ENTR_SHOULDTRACE
&&
2312 (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) {
2313 struct inpcb
*inp
= sotoinpcb(so
);
2314 if (inp
->inp_last_outifp
!= NULL
&&
2315 !(inp
->inp_last_outifp
->if_flags
& IFF_LOOPBACK
)) {
2317 en_tracing_val
= top
->m_pkthdr
.len
;
2318 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
,
2319 (unsigned long)VM_KERNEL_ADDRPERM(so
),
2320 ((so
->so_state
& SS_NBIO
) ? kEnTrFlagNonBlocking
: 0),
2321 (int64_t)en_tracing_val
);
2325 mptcp_update_last_owner(so
, mp_so
);
2327 if (mp_so
->last_pid
!= proc_pid(p
)) {
2328 p
= proc_find(mp_so
->last_pid
);
2329 if (p
== PROC_NULL
) {
2337 inp_update_necp_policy(sotoinpcb(so
), NULL
, NULL
, 0);
2340 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
2342 error
= sosendcheck(so
, NULL
, top
->m_pkthdr
.len
, 0, 1, 0, &sblocked
, NULL
);
2347 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, top
, NULL
, NULL
, p
);
2359 soclearfastopen(so
);
2362 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
,
2363 (unsigned long)VM_KERNEL_ADDRPERM(so
),
2364 ((error
== EWOULDBLOCK
) ? kEnTrFlagNoWork
: 0),
2365 (int64_t)en_tracing_val
);
2372 * Establish an initial MPTCP connection (if first subflow and not yet
2373 * connected), or add a subflow to an existing MPTCP connection.
2376 mptcp_subflow_add(struct mptses
*mpte
, struct sockaddr
*src
,
2377 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
2379 struct socket
*mp_so
, *so
= NULL
;
2380 struct mptcb
*mp_tp
;
2381 struct mptsub
*mpts
= NULL
;
2384 mp_so
= mptetoso(mpte
);
2385 mp_tp
= mpte
->mpte_mptcb
;
2387 socket_lock_assert_owned(mp_so
);
2389 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
2390 /* If the remote end sends Data FIN, refuse subflow adds */
2391 os_log_error(mptcp_log_handle
, "%s - %lx: state %u\n",
2392 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
);
2397 mpts
= mptcp_subflow_alloc();
2399 os_log_error(mptcp_log_handle
, "%s - %lx: malloc subflow failed\n",
2400 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
2406 if (src
->sa_family
!= AF_INET
&& src
->sa_family
!= AF_INET6
) {
2407 error
= EAFNOSUPPORT
;
2411 if (src
->sa_family
== AF_INET
&&
2412 src
->sa_len
!= sizeof(struct sockaddr_in
)) {
2417 if (src
->sa_family
== AF_INET6
&&
2418 src
->sa_len
!= sizeof(struct sockaddr_in6
)) {
2423 MALLOC(mpts
->mpts_src
, struct sockaddr
*, src
->sa_len
, M_SONAME
,
2425 if (mpts
->mpts_src
== NULL
) {
2429 bcopy(src
, mpts
->mpts_src
, src
->sa_len
);
2432 if (dst
->sa_family
!= AF_INET
&& dst
->sa_family
!= AF_INET6
) {
2433 error
= EAFNOSUPPORT
;
2437 if (dst
->sa_family
== AF_INET
&&
2438 dst
->sa_len
!= sizeof(mpts
->__mpts_dst_v4
)) {
2443 if (dst
->sa_family
== AF_INET6
&&
2444 dst
->sa_len
!= sizeof(mpts
->__mpts_dst_v6
)) {
2449 memcpy(&mpts
->mpts_u_dst
, dst
, dst
->sa_len
);
2451 af
= mpts
->mpts_dst
.sa_family
;
2453 ifnet_head_lock_shared();
2454 if ((ifscope
> (unsigned)if_index
)) {
2461 mpts
->mpts_ifscope
= ifscope
;
2463 /* create the subflow socket */
2464 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, &so
)) != 0) {
2466 * Returning (error) and not cleaning up, because up to here
2467 * all we did is creating mpts.
2469 * And the contract is that the call to mptcp_subflow_socreate,
2470 * moves ownership of mpts to mptcp_subflow_socreate.
2476 * We may be called from within the kernel. Still need to account this
2477 * one to the real app.
2479 mptcp_update_last_owner(mpts
->mpts_socket
, mp_so
);
2482 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
2483 * -1 (SAE_CONNID_ALL).
2485 mpte
->mpte_connid_last
++;
2486 if (mpte
->mpte_connid_last
== SAE_CONNID_ALL
||
2487 mpte
->mpte_connid_last
== SAE_CONNID_ANY
) {
2488 mpte
->mpte_connid_last
++;
2491 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
2493 mpts
->mpts_rel_seq
= 1;
2495 /* Allocate a unique address id per subflow */
2496 mpte
->mpte_addrid_last
++;
2497 if (mpte
->mpte_addrid_last
== 0) {
2498 mpte
->mpte_addrid_last
++;
2501 /* register for subflow socket read/write events */
2502 sock_setupcalls_locked(so
, NULL
, NULL
, mptcp_subflow_wupcall
, mpts
, 1);
2504 /* Register for subflow socket control events */
2505 sock_catchevents_locked(so
, mptcp_subflow_eupcall1
, mpts
,
2506 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
2507 SO_FILT_HINT_TIMEOUT
| SO_FILT_HINT_NOSRCADDR
|
2508 SO_FILT_HINT_IFDENIED
| SO_FILT_HINT_CONNECTED
|
2509 SO_FILT_HINT_DISCONNECTED
| SO_FILT_HINT_MPFAILOVER
|
2510 SO_FILT_HINT_MPSTATUS
| SO_FILT_HINT_MUSTRST
|
2511 SO_FILT_HINT_MPCANTRCVMORE
| SO_FILT_HINT_ADAPTIVE_RTIMO
|
2512 SO_FILT_HINT_ADAPTIVE_WTIMO
| SO_FILT_HINT_MP_SUB_ERROR
);
2515 VERIFY(!(mpts
->mpts_flags
&
2516 (MPTSF_CONNECTING
| MPTSF_CONNECTED
| MPTSF_CONNECT_PENDING
)));
2519 * Indicate to the TCP subflow whether or not it should establish
2520 * the initial MPTCP connection, or join an existing one. Fill
2521 * in the connection request structure with additional info needed
2522 * by the underlying TCP (to be used in the TCP options, etc.)
2524 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
2525 mpts
->mpts_flags
|= MPTSF_INITIAL_SUB
;
2527 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
2528 mptcp_init_local_parms(mpte
);
2530 soisconnecting(mp_so
);
2532 /* If fastopen is requested, set state in mpts */
2533 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
) {
2534 mpts
->mpts_flags
|= MPTSF_TFO_REQD
;
2537 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
)) {
2538 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
2542 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
2544 /* connect right away if first attempt, or if join can be done now */
2545 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
)) {
2546 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
2554 *pcid
= mpts
->mpts_connid
;
2560 mptcp_subflow_abort(mpts
, error
);
2566 mptcp_subflow_free(mpts
);
2573 mptcpstats_update(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
2575 int index
= mptcpstats_get_index(stats
, mpts
);
2578 struct inpcb
*inp
= sotoinpcb(mpts
->mpts_socket
);
2580 stats
[index
].mpis_txbytes
+= inp
->inp_stat
->txbytes
;
2581 stats
[index
].mpis_rxbytes
+= inp
->inp_stat
->rxbytes
;
2583 stats
[index
].mpis_wifi_txbytes
+= inp
->inp_wstat
->txbytes
;
2584 stats
[index
].mpis_wifi_rxbytes
+= inp
->inp_wstat
->rxbytes
;
2586 stats
[index
].mpis_wired_txbytes
+= inp
->inp_Wstat
->txbytes
;
2587 stats
[index
].mpis_wired_rxbytes
+= inp
->inp_Wstat
->rxbytes
;
2589 stats
[index
].mpis_cell_txbytes
+= inp
->inp_cstat
->txbytes
;
2590 stats
[index
].mpis_cell_rxbytes
+= inp
->inp_cstat
->rxbytes
;
2595 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
2596 * will no longer be accessible after a subflow is deleted, thus this
2597 * should occur only after the subflow socket has been disconnected.
2600 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
)
2602 struct socket
*mp_so
= mptetoso(mpte
);
2603 struct socket
*so
= mpts
->mpts_socket
;
2604 struct tcpcb
*tp
= sototcpcb(so
);
2606 socket_lock_assert_owned(mp_so
);
2607 VERIFY(mpts
->mpts_mpte
== mpte
);
2608 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
2609 VERIFY(mpte
->mpte_numflows
!= 0);
2610 VERIFY(mp_so
->so_usecount
> 0);
2612 mptcpstats_update(mpte
->mpte_itfstats
, mpts
);
2614 mptcp_unset_cellicon(mpte
, mpts
, 1);
2616 mpte
->mpte_init_rxbytes
= sotoinpcb(so
)->inp_stat
->rxbytes
;
2617 mpte
->mpte_init_txbytes
= sotoinpcb(so
)->inp_stat
->txbytes
;
2619 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
2620 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
2621 mpte
->mpte_numflows
--;
2622 if (mpte
->mpte_active_sub
== mpts
) {
2623 mpte
->mpte_active_sub
= NULL
;
2627 * Drop references held by this subflow socket; there
2628 * will be no further upcalls made from this point.
2630 sock_setupcalls_locked(so
, NULL
, NULL
, NULL
, NULL
, 0);
2631 sock_catchevents_locked(so
, NULL
, NULL
, 0);
2633 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
2635 mp_so
->so_usecount
--; /* for subflow socket */
2636 mpts
->mpts_mpte
= NULL
;
2637 mpts
->mpts_socket
= NULL
;
2639 mptcp_subflow_remref(mpts
); /* for MPTCP subflow list */
2640 mptcp_subflow_remref(mpts
); /* for subflow socket */
2642 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
2648 mptcp_subflow_shutdown(struct mptses
*mpte
, struct mptsub
*mpts
)
2650 struct socket
*so
= mpts
->mpts_socket
;
2651 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2654 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
) {
2658 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2659 (so
->so_state
& SS_ISCONNECTED
)) {
2660 mptcplog((LOG_DEBUG
, "MPTCP subflow shutdown %s: cid %d fin %d\n",
2661 __func__
, mpts
->mpts_connid
, send_dfin
),
2662 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2665 mptcp_send_dfin(so
);
2667 soshutdownlock(so
, SHUT_WR
);
2672 mptcp_subflow_abort(struct mptsub
*mpts
, int error
)
2674 struct socket
*so
= mpts
->mpts_socket
;
2675 struct tcpcb
*tp
= sototcpcb(so
);
2677 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
) {
2681 mptcplog((LOG_DEBUG
, "%s aborting connection state %u\n", __func__
, tp
->t_state
),
2682 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2684 if (tp
->t_state
!= TCPS_CLOSED
) {
2685 tcp_drop(tp
, error
);
2688 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2692 * Disconnect a subflow socket.
2695 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
)
2697 struct socket
*so
, *mp_so
;
2698 struct mptcb
*mp_tp
;
2701 so
= mpts
->mpts_socket
;
2702 mp_tp
= mpte
->mpte_mptcb
;
2703 mp_so
= mptetoso(mpte
);
2705 socket_lock_assert_owned(mp_so
);
2707 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
)) {
2711 mptcp_unset_cellicon(mpte
, mpts
, 1);
2713 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
2715 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
) {
2719 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2720 (so
->so_state
& SS_ISCONNECTED
)) {
2721 mptcplog((LOG_DEBUG
, "%s: cid %d fin %d\n",
2722 __func__
, mpts
->mpts_connid
, send_dfin
),
2723 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2726 mptcp_send_dfin(so
);
2729 if (mp_so
->so_flags
& SOF_DEFUNCT
) {
2732 ret
= sosetdefunct(NULL
, so
, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
, TRUE
);
2734 ret
= sodefunct(NULL
, so
, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL
);
2737 os_log_error(mptcp_log_handle
, "%s - %lx: sodefunct failed with %d\n",
2738 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), ret
);
2741 os_log_error(mptcp_log_handle
, "%s - %lx: sosetdefunct failed with %d\n",
2742 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), ret
);
2745 (void) soshutdownlock(so
, SHUT_RD
);
2746 (void) soshutdownlock(so
, SHUT_WR
);
2747 (void) sodisconnectlocked(so
);
2752 * Generate a disconnect event for this subflow socket, in case
2753 * the lower layer doesn't do it; this is needed because the
2754 * subflow socket deletion relies on it.
2756 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2760 * Subflow socket input.
2763 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
2765 struct socket
*mp_so
= mptetoso(mpte
);
2766 struct mbuf
*m
= NULL
;
2768 int error
, wakeup
= 0;
2770 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_INPUT
));
2771 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_INPUT
;
2773 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
2774 struct mptsub
*, mpts
);
2776 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
)) {
2780 so
= mpts
->mpts_socket
;
2782 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
2783 if (error
!= 0 && error
!= EWOULDBLOCK
) {
2784 os_log_error(mptcp_log_handle
, "%s - %lx: cid %d error %d\n",
2785 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, error
);
2786 if (error
== ENODATA
) {
2788 * Don't ignore ENODATA so as to discover
2789 * nasty middleboxes.
2791 mp_so
->so_error
= ENODATA
;
2796 } else if (error
== 0) {
2797 mptcplog((LOG_DEBUG
, "%s: cid %d \n", __func__
, mpts
->mpts_connid
),
2798 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2801 /* In fallback, make sure to accept data on all but one subflow */
2802 if (m
&& (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2803 !(mpts
->mpts_flags
& MPTSF_ACTIVE
)) {
2804 mptcplog((LOG_DEBUG
, "%s: degraded and got data on non-active flow\n",
2805 __func__
), MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2811 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
2812 mptcp_set_cellicon(mpte
, mpts
);
2814 mpte
->mpte_used_cell
= 1;
2817 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
2818 * explicitly set the cellicon, then we unset it again.
2820 if (TSTMP_LT(mpte
->mpte_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
, tcp_now
)) {
2821 mptcp_unset_cellicon(mpte
, NULL
, 1);
2824 mpte
->mpte_used_wifi
= 1;
2827 mptcp_input(mpte
, m
);
2832 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2835 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_INPUT
);
2839 mptcp_handle_input(struct socket
*so
)
2841 struct mptsub
*mpts
, *tmpts
;
2842 struct mptses
*mpte
;
2844 if (!(so
->so_flags
& SOF_MP_SUBFLOW
)) {
2848 mpts
= sototcpcb(so
)->t_mpsub
;
2849 mpte
= mpts
->mpts_mpte
;
2851 socket_lock_assert_owned(mptetoso(mpte
));
2853 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2854 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INPUT_HANDLE
)) {
2855 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2860 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INPUT_HANDLE
;
2861 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
2862 if (mpts
->mpts_socket
->so_usecount
== 0) {
2863 /* Will be removed soon by tcp_garbage_collect */
2867 mptcp_subflow_addref(mpts
);
2868 mpts
->mpts_socket
->so_usecount
++;
2870 mptcp_subflow_input(mpte
, mpts
);
2872 mptcp_subflow_remref(mpts
); /* ours */
2874 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
2875 mpts
->mpts_socket
->so_usecount
--;
2878 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INPUT_HANDLE
);
2882 * Subflow socket write upcall.
2884 * Called when the associated subflow socket posted a read event.
2887 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
2889 #pragma unused(so, waitf)
2890 struct mptsub
*mpts
= arg
;
2891 struct mptses
*mpte
= mpts
->mpts_mpte
;
2893 VERIFY(mpte
!= NULL
);
2895 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2896 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_WUPCALL
)) {
2897 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
2906 mptcp_search_seq_in_sub(struct mbuf
*m
, struct socket
*so
)
2908 struct mbuf
*so_m
= so
->so_snd
.sb_mb
;
2909 uint64_t dsn
= m
->m_pkthdr
.mp_dsn
;
2912 VERIFY(so_m
->m_flags
& M_PKTHDR
);
2913 VERIFY(so_m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2915 /* Part of the segment is covered, don't reinject here */
2916 if (so_m
->m_pkthdr
.mp_dsn
<= dsn
&&
2917 so_m
->m_pkthdr
.mp_dsn
+ so_m
->m_pkthdr
.mp_rlen
> dsn
) {
2921 so_m
= so_m
->m_next
;
2928 * Subflow socket output.
2930 * Called for sending data from MPTCP to the underlying subflow socket.
2933 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
, int flags
)
2935 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2936 struct mbuf
*sb_mb
, *m
, *mpt_mbuf
= NULL
, *head
, *tail
;
2937 struct socket
*mp_so
, *so
;
2939 uint64_t mpt_dsn
= 0, off
= 0;
2940 int sb_cc
= 0, error
= 0, wakeup
= 0;
2942 uint16_t tot_sent
= 0;
2943 boolean_t reinjected
= FALSE
;
2945 mp_so
= mptetoso(mpte
);
2946 so
= mpts
->mpts_socket
;
2949 socket_lock_assert_owned(mp_so
);
2951 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_OUTPUT
));
2952 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_OUTPUT
;
2954 VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so
)));
2955 VERIFY((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ||
2956 (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2957 (mpts
->mpts_flags
& MPTSF_TFO_REQD
));
2958 VERIFY(mptcp_subflow_cwnd_space(mpts
->mpts_socket
) > 0);
2960 mptcplog((LOG_DEBUG
, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
2961 __func__
, mpts
->mpts_flags
, mpte
->mpte_flags
,
2962 mptcp_subflow_cwnd_space(so
)),
2963 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2964 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
2965 struct mptsub
*, mpts
);
2967 /* Remove Addr Option is not sent reliably as per I-D */
2968 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
2969 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
2970 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
2971 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
2975 * The mbuf chains containing the metadata (as well as pointing to
2976 * the user data sitting at the MPTCP output queue) would then be
2977 * sent down to the subflow socket.
2979 * Some notes on data sequencing:
2981 * a. Each mbuf must be a M_PKTHDR.
2982 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
2983 * in the mbuf pkthdr structure.
2984 * c. Each mbuf containing the MPTCP metadata must have its
2985 * pkt_flags marked with the PKTF_MPTCP flag.
2988 if (mpte
->mpte_reinjectq
) {
2989 sb_mb
= mpte
->mpte_reinjectq
;
2991 sb_mb
= mp_so
->so_snd
.sb_mb
;
2994 if (sb_mb
== NULL
) {
2995 os_log_error(mptcp_log_handle
, "%s - %lx: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u state %u flags %#x\n",
2996 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
2997 (uint32_t)mp_tp
->mpt_sndmax
, (uint32_t)mp_tp
->mpt_sndnxt
,
2998 (uint32_t)mp_tp
->mpt_snduna
, mp_tp
->mpt_state
, mp_so
->so_flags1
);
3000 /* Fix it to prevent looping */
3001 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
3002 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
3007 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
3009 if (sb_mb
->m_pkthdr
.mp_rlen
== 0 &&
3010 !(so
->so_state
& SS_ISCONNECTED
) &&
3011 (so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
3012 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
3013 goto zero_len_write
;
3016 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
3018 /* First, drop acknowledged data */
3019 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
3020 os_log_error(mptcp_log_handle
, "%s - %lx: dropping data, should have been done earlier "
3021 "dsn %u suna %u reinject? %u\n",
3022 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), (uint32_t)mpt_dsn
,
3023 (uint32_t)mp_tp
->mpt_snduna
, !!mpte
->mpte_reinjectq
);
3024 if (mpte
->mpte_reinjectq
) {
3025 mptcp_clean_reinjectq(mpte
);
3028 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
3029 sbdrop(&mp_so
->so_snd
, (int)len
);
3034 /* Check again because of above sbdrop */
3035 if (mp_so
->so_snd
.sb_mb
== NULL
&& mpte
->mpte_reinjectq
== NULL
) {
3036 os_log_error(mptcp_log_handle
, "%s - $%lx: send-buffer is empty\n",
3037 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3042 * In degraded mode, we don't receive data acks, so force free
3043 * mbufs less than snd_nxt
3045 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
3046 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
3047 mp_so
->so_snd
.sb_mb
) {
3048 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
3049 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
3051 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
3052 sbdrop(&mp_so
->so_snd
, (int)len
);
3055 os_log_error(mptcp_log_handle
, "%s - %lx: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
3056 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
3057 (uint32_t)mpt_dsn
, (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_snduna
);
3061 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
3062 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
3063 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
3064 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
3068 * Adjust the top level notion of next byte used for retransmissions
3071 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
)) {
3072 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
3075 /* Now determine the offset from which to start transmitting data */
3076 if (mpte
->mpte_reinjectq
) {
3077 sb_mb
= mpte
->mpte_reinjectq
;
3080 sb_mb
= mp_so
->so_snd
.sb_mb
;
3082 if (sb_mb
== NULL
) {
3083 os_log_error(mptcp_log_handle
, "%s - %lx: send-buffer is still empty\n", __func__
,
3084 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3088 if (sb_mb
== mpte
->mpte_reinjectq
) {
3089 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
3092 if (mptcp_search_seq_in_sub(sb_mb
, so
)) {
3093 if (mptcp_can_send_more(mp_tp
, TRUE
)) {
3102 } else if (flags
& MPTCP_SUBOUT_PROBING
) {
3103 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
3106 sb_cc
= min(mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
);
3109 * With TFO, there might be no data at all, thus still go into this
3112 if ((mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) ||
3113 MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_sndmax
)) {
3114 off
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
3117 os_log_error(mptcp_log_handle
, "%s - %lx: this should not happen: sndnxt %u sndmax %u\n",
3118 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), (uint32_t)mp_tp
->mpt_sndnxt
,
3119 (uint32_t)mp_tp
->mpt_sndmax
);
3125 sb_cc
= min(sb_cc
, mptcp_subflow_cwnd_space(so
));
3127 os_log_error(mptcp_log_handle
, "%s - %lx: sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
3128 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), sb_cc
, mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
,
3129 (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_sndmax
,
3130 mptcp_subflow_cwnd_space(so
));
3133 sb_cc
= min(sb_cc
, UINT16_MAX
);
3136 * Create a DSN mapping for the data we are about to send. It all
3137 * has the same mapping.
3140 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
3142 mpt_dsn
= mp_tp
->mpt_snduna
+ off
;
3146 while (mpt_mbuf
&& reinjected
== FALSE
&&
3147 (mpt_mbuf
->m_pkthdr
.mp_rlen
== 0 ||
3148 mpt_mbuf
->m_pkthdr
.mp_rlen
<= (uint32_t)off
)) {
3149 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
3150 mpt_mbuf
= mpt_mbuf
->m_next
;
3152 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3153 mptcplog((LOG_DEBUG
, "%s: %u snduna = %u sndnxt = %u probe %d\n",
3154 __func__
, mpts
->mpts_connid
, (uint32_t)mp_tp
->mpt_snduna
, (uint32_t)mp_tp
->mpt_sndnxt
,
3155 mpts
->mpts_probecnt
),
3156 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3159 VERIFY((mpt_mbuf
== NULL
) || (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3163 while (tot_sent
< sb_cc
) {
3166 mlen
= mpt_mbuf
->m_len
;
3168 mlen
= min(mlen
, sb_cc
- tot_sent
);
3171 os_log_error(mptcp_log_handle
, "%s - %lx: mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
3172 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), (int)mlen
, mpt_mbuf
->m_pkthdr
.mp_rlen
,
3173 (uint32_t)off
, sb_cc
, tot_sent
);
3181 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
3182 M_COPYM_MUST_COPY_HDR
);
3184 os_log_error(mptcp_log_handle
, "%s - %lx: m_copym_mode failed\n", __func__
,
3185 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3190 /* Create a DSN mapping for the data (m_copym does it) */
3191 VERIFY(m
->m_flags
& M_PKTHDR
);
3192 VERIFY(m
->m_next
== NULL
);
3194 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
3195 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
3196 m
->m_pkthdr
.mp_dsn
= mpt_dsn
;
3197 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
3198 m
->m_pkthdr
.len
= mlen
;
3210 mpt_mbuf
= mpt_mbuf
->m_next
;
3214 if (sb_cc
< sb_mb
->m_pkthdr
.mp_rlen
) {
3215 struct mbuf
*n
= sb_mb
;
3218 n
->m_pkthdr
.mp_dsn
+= sb_cc
;
3219 n
->m_pkthdr
.mp_rlen
-= sb_cc
;
3222 m_adj(sb_mb
, sb_cc
);
3224 mpte
->mpte_reinjectq
= sb_mb
->m_nextpkt
;
3229 mptcplog((LOG_DEBUG
, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
3230 __func__
, (uint32_t)mpt_dsn
, mpts
->mpts_rel_seq
,
3231 tot_sent
, mpts
->mpts_connid
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3233 if (head
&& (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
)) {
3234 dss_csum
= mptcp_output_csum(head
, mpt_dsn
, mpts
->mpts_rel_seq
,
3238 /* Now, let's update rel-seq and the data-level length */
3239 mpts
->mpts_rel_seq
+= tot_sent
;
3242 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
) {
3243 m
->m_pkthdr
.mp_csum
= dss_csum
;
3245 m
->m_pkthdr
.mp_rlen
= tot_sent
;
3250 if ((mpts
->mpts_flags
& MPTSF_TFO_REQD
) &&
3251 (tp
->t_tfo_stats
== 0)) {
3252 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
3255 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
3257 DTRACE_MPTCP7(send
, struct mbuf
*, m
, struct socket
*, so
,
3258 struct sockbuf
*, &so
->so_rcv
,
3259 struct sockbuf
*, &so
->so_snd
,
3260 struct mptses
*, mpte
, struct mptsub
*, mpts
,
3266 (error
== EWOULDBLOCK
&& (tp
->t_mpflags
& TMPF_TFO_REQUEST
))) {
3267 uint64_t new_sndnxt
= mp_tp
->mpt_sndnxt
+ tot_sent
;
3269 if (mpts
->mpts_probesoon
&& mpts
->mpts_maxseg
&& tot_sent
) {
3270 tcpstat
.tcps_mp_num_probes
++;
3271 if ((uint32_t)tot_sent
< mpts
->mpts_maxseg
) {
3272 mpts
->mpts_probecnt
+= 1;
3274 mpts
->mpts_probecnt
+=
3275 tot_sent
/ mpts
->mpts_maxseg
;
3279 if (!reinjected
&& !(flags
& MPTCP_SUBOUT_PROBING
)) {
3280 if (MPTCP_DATASEQ_HIGH32(new_sndnxt
) >
3281 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
)) {
3282 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
3284 mp_tp
->mpt_sndnxt
= new_sndnxt
;
3287 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
3289 /* Must be here as mptcp_can_send_more() checks for this */
3290 soclearfastopen(mp_so
);
3292 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
3293 (mpts
->mpts_probesoon
!= 0)) {
3294 mptcplog((LOG_DEBUG
, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
3295 __func__
, mpts
->mpts_connid
,
3296 !!(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
),
3297 tot_sent
, (int) sb_cc
, mpts
->mpts_probecnt
,
3298 (tcp_now
- mpts
->mpts_probesoon
)),
3299 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3302 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
3303 mptcp_set_cellicon(mpte
, mpts
);
3305 mpte
->mpte_used_cell
= 1;
3308 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
3309 * explicitly set the cellicon, then we unset it again.
3311 if (TSTMP_LT(mpte
->mpte_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
, tcp_now
)) {
3312 mptcp_unset_cellicon(mpte
, NULL
, 1);
3315 mpte
->mpte_used_wifi
= 1;
3319 * Don't propagate EWOULDBLOCK - it's already taken care of
3320 * in mptcp_usr_send for TFO.
3324 os_log_error(mptcp_log_handle
, "%s - %lx: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
3325 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, error
, tot_sent
, so
->so_flags
, so
->so_state
, so
->so_error
, so
->so_snd
.sb_hiwat
, so
->so_snd
.sb_lowat
);
3330 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
3333 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_OUTPUT
);
3337 /* Opting to call pru_send as no mbuf at subflow level */
3338 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, NULL
, NULL
,
3339 NULL
, current_proc());
3345 mptcp_add_reinjectq(struct mptses
*mpte
, struct mbuf
*m
)
3347 struct mbuf
*n
, *prev
= NULL
;
3349 mptcplog((LOG_DEBUG
, "%s reinjecting dsn %u dlen %u rseq %u\n",
3350 __func__
, (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3351 m
->m_pkthdr
.mp_rseq
),
3352 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3354 n
= mpte
->mpte_reinjectq
;
3356 /* First, look for an mbuf n, whose data-sequence-number is bigger or
3357 * equal than m's sequence number.
3360 if (MPTCP_SEQ_GEQ(n
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_dsn
)) {
3370 /* m is already fully covered by the next mbuf in the queue */
3371 if (n
->m_pkthdr
.mp_dsn
== m
->m_pkthdr
.mp_dsn
&&
3372 n
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_rlen
) {
3373 mptcplog((LOG_DEBUG
, "%s fully covered with len %u\n",
3374 __func__
, n
->m_pkthdr
.mp_rlen
),
3375 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3379 /* m is covering the next mbuf entirely, thus we remove this guy */
3380 if (m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
>= n
->m_pkthdr
.mp_dsn
+ n
->m_pkthdr
.mp_rlen
) {
3381 struct mbuf
*tmp
= n
->m_nextpkt
;
3383 mptcplog((LOG_DEBUG
, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
3384 __func__
, m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
3385 n
->m_pkthdr
.mp_dsn
, n
->m_pkthdr
.mp_rlen
),
3386 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3388 m
->m_nextpkt
= NULL
;
3390 mpte
->mpte_reinjectq
= tmp
;
3392 prev
->m_nextpkt
= tmp
;
3401 /* m is already fully covered by the previous mbuf in the queue */
3402 if (prev
->m_pkthdr
.mp_dsn
+ prev
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.len
) {
3403 mptcplog((LOG_DEBUG
, "%s prev covers us from %u with len %u\n",
3404 __func__
, prev
->m_pkthdr
.mp_dsn
, prev
->m_pkthdr
.mp_rlen
),
3405 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3411 mpte
->mpte_reinjectq
= m
;
3413 prev
->m_nextpkt
= m
;
3425 static struct mbuf
*
3426 mptcp_lookup_dsn(struct mptses
*mpte
, uint64_t dsn
)
3428 struct socket
*mp_so
= mptetoso(mpte
);
3431 m
= mp_so
->so_snd
.sb_mb
;
3434 /* If this segment covers what we are looking for, return it. */
3435 if (MPTCP_SEQ_LEQ(m
->m_pkthdr
.mp_dsn
, dsn
) &&
3436 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, dsn
)) {
3441 /* Segment is no more in the queue */
3442 if (MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
, dsn
)) {
3452 static struct mbuf
*
3453 mptcp_copy_mbuf_list(struct mptses
*mpte
, struct mbuf
*m
, int len
)
3455 struct mbuf
*top
= NULL
, *tail
= NULL
;
3457 uint32_t dlen
, rseq
;
3459 dsn
= m
->m_pkthdr
.mp_dsn
;
3460 dlen
= m
->m_pkthdr
.mp_rlen
;
3461 rseq
= m
->m_pkthdr
.mp_rseq
;
3466 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3468 n
= m_copym_mode(m
, 0, m
->m_len
, M_DONTWAIT
, M_COPYM_MUST_COPY_HDR
);
3470 os_log_error(mptcp_log_handle
, "%s - %lx: m_copym_mode returned NULL\n",
3471 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3475 VERIFY(n
->m_flags
& M_PKTHDR
);
3476 VERIFY(n
->m_next
== NULL
);
3477 VERIFY(n
->m_pkthdr
.mp_dsn
== dsn
);
3478 VERIFY(n
->m_pkthdr
.mp_rlen
== dlen
);
3479 VERIFY(n
->m_pkthdr
.mp_rseq
== rseq
);
3480 VERIFY(n
->m_len
== m
->m_len
);
3482 n
->m_pkthdr
.pkt_flags
|= (PKTF_MPSO
| PKTF_MPTCP
);
3509 mptcp_reinject_mbufs(struct socket
*so
)
3511 struct tcpcb
*tp
= sototcpcb(so
);
3512 struct mptsub
*mpts
= tp
->t_mpsub
;
3513 struct mptcb
*mp_tp
= tptomptp(tp
);
3514 struct mptses
*mpte
= mp_tp
->mpt_mpte
;;
3515 struct sockbuf
*sb
= &so
->so_snd
;
3520 struct mbuf
*n
= m
->m_next
, *orig
= m
;
3522 mptcplog((LOG_DEBUG
, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
3523 __func__
, tp
->snd_una
, m
->m_pkthdr
.mp_rseq
, mpts
->mpts_iss
,
3524 m
->m_pkthdr
.mp_rlen
, m
->m_pkthdr
.pkt_flags
),
3525 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
3527 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3529 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_REINJ
) {
3533 /* Has it all already been acknowledged at the data-level? */
3534 if (MPTCP_SEQ_GEQ(mp_tp
->mpt_snduna
, m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
)) {
3538 /* Part of this has already been acknowledged - lookup in the
3539 * MPTCP-socket for the segment.
3541 if (SEQ_GT(tp
->snd_una
- mpts
->mpts_iss
, m
->m_pkthdr
.mp_rseq
)) {
3542 m
= mptcp_lookup_dsn(mpte
, m
->m_pkthdr
.mp_dsn
);
3548 /* Copy the mbuf with headers (aka, DSN-numbers) */
3549 m
= mptcp_copy_mbuf_list(mpte
, m
, m
->m_pkthdr
.mp_rlen
);
3554 VERIFY(m
->m_nextpkt
== NULL
);
3556 /* Now, add to the reinject-queue, eliminating overlapping
3559 mptcp_add_reinjectq(mpte
, m
);
3561 orig
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3564 /* mp_rlen can cover multiple mbufs, so advance to the end of it. */
3566 VERIFY((n
->m_flags
& M_PKTHDR
) && (n
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3568 if (n
->m_pkthdr
.mp_dsn
!= orig
->m_pkthdr
.mp_dsn
) {
3572 n
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3581 mptcp_clean_reinjectq(struct mptses
*mpte
)
3583 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3585 socket_lock_assert_owned(mptetoso(mpte
));
3587 while (mpte
->mpte_reinjectq
) {
3588 struct mbuf
*m
= mpte
->mpte_reinjectq
;
3590 if (MPTCP_SEQ_GEQ(m
->m_pkthdr
.mp_dsn
, mp_tp
->mpt_snduna
) ||
3591 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, mp_tp
->mpt_snduna
)) {
3595 mpte
->mpte_reinjectq
= m
->m_nextpkt
;
3596 m
->m_nextpkt
= NULL
;
3602 * Subflow socket control event upcall.
3605 mptcp_subflow_eupcall1(struct socket
*so
, void *arg
, uint32_t events
)
3608 struct mptsub
*mpts
= arg
;
3609 struct mptses
*mpte
= mpts
->mpts_mpte
;
3611 socket_lock_assert_owned(mptetoso(mpte
));
3613 if ((mpts
->mpts_evctl
& events
) == events
) {
3617 mpts
->mpts_evctl
|= events
;
3619 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
3620 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WORKLOOP
;
3624 mptcp_subflow_workloop(mpte
);
3628 * Subflow socket control events.
3630 * Called for handling events related to the underlying subflow socket.
3633 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
,
3634 uint64_t *p_mpsofilt_hint
)
3636 ev_ret_t ret
= MPTS_EVRET_OK
;
3637 int i
, mpsub_ev_entry_count
= sizeof(mpsub_ev_entry_tbl
) /
3638 sizeof(mpsub_ev_entry_tbl
[0]);
3640 /* bail if there's nothing to process */
3641 if (!mpts
->mpts_evctl
) {
3645 if (mpts
->mpts_evctl
& (SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_MUSTRST
|
3646 SO_FILT_HINT_CANTSENDMORE
| SO_FILT_HINT_TIMEOUT
|
3647 SO_FILT_HINT_NOSRCADDR
| SO_FILT_HINT_IFDENIED
|
3648 SO_FILT_HINT_DISCONNECTED
)) {
3649 mpts
->mpts_evctl
|= SO_FILT_HINT_MPFAILOVER
;
3652 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
3653 struct mptsub
*, mpts
, uint32_t, mpts
->mpts_evctl
);
3655 mptcplog((LOG_DEBUG
, "%s cid %d events=%b\n", __func__
,
3656 mpts
->mpts_connid
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3657 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3660 * Process all the socket filter hints and reset the hint
3661 * once it is handled
3663 for (i
= 0; i
< mpsub_ev_entry_count
&& mpts
->mpts_evctl
; i
++) {
3665 * Always execute the DISCONNECTED event, because it will wakeup
3668 if ((mpts
->mpts_evctl
& mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
) &&
3669 (ret
>= MPTS_EVRET_OK
||
3670 mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
== SO_FILT_HINT_DISCONNECTED
)) {
3671 mpts
->mpts_evctl
&= ~mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
;
3673 mpsub_ev_entry_tbl
[i
].sofilt_hint_ev_hdlr(mpte
, mpts
, p_mpsofilt_hint
, mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
);
3674 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
3679 * We should be getting only events specified via sock_catchevents(),
3680 * so loudly complain if we have any unprocessed one(s).
3682 if (mpts
->mpts_evctl
|| ret
< MPTS_EVRET_OK
) {
3683 mptcplog((LOG_WARNING
, "%s%s: cid %d evret %d unhandled events=%b\n", __func__
,
3684 (mpts
->mpts_evctl
&& ret
== MPTS_EVRET_OK
) ? "MPTCP_ERROR " : "",
3686 ret
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3687 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3689 mptcplog((LOG_DEBUG
, "%s: Done, events %b\n", __func__
,
3690 mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3691 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3698 mptcp_subflow_propagate_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3699 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3701 struct socket
*mp_so
, *so
;
3702 struct mptcb
*mp_tp
;
3704 mp_so
= mptetoso(mpte
);
3705 mp_tp
= mpte
->mpte_mptcb
;
3706 so
= mpts
->mpts_socket
;
3708 mptcplog((LOG_DEBUG
, "%s: cid %d event %d\n", __func__
,
3709 mpts
->mpts_connid
, event
),
3710 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3713 * We got an event for this subflow that might need to be propagated,
3714 * based on the state of the MPTCP connection.
3716 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3717 (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
) && !(mpts
->mpts_flags
& MPTSF_MP_READY
)) ||
3718 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
3719 mp_so
->so_error
= so
->so_error
;
3720 *p_mpsofilt_hint
|= event
;
3723 return MPTS_EVRET_OK
;
3727 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
3730 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3731 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3733 #pragma unused(p_mpsofilt_hint, event)
3734 struct socket
*mp_so
;
3737 mp_so
= mptetoso(mpte
);
3738 tp
= intotcpcb(sotoinpcb(mpts
->mpts_socket
));
3741 * This overwrites any previous mpte_lost_aid to avoid storing
3742 * too much state when the typical case has only two subflows.
3744 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
3745 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
3747 mptcplog((LOG_DEBUG
, "%s cid %d\n", __func__
, mpts
->mpts_connid
),
3748 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3751 * The subflow connection has lost its source address.
3753 mptcp_subflow_abort(mpts
, EADDRNOTAVAIL
);
3755 if (mp_so
->so_flags
& SOF_NOADDRAVAIL
) {
3756 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3759 return MPTS_EVRET_DELETE
;
3763 mptcp_subflow_mpsuberror_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3764 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3766 #pragma unused(event, p_mpsofilt_hint)
3767 struct socket
*so
, *mp_so
;
3769 so
= mpts
->mpts_socket
;
3771 if (so
->so_error
!= ENODATA
) {
3772 return MPTS_EVRET_OK
;
3776 mp_so
= mptetoso(mpte
);
3778 mp_so
->so_error
= ENODATA
;
3783 return MPTS_EVRET_OK
;
3788 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
3789 * indicates that the remote side sent a Data FIN
3792 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3793 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3795 #pragma unused(event)
3796 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3798 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
3799 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3802 * We got a Data FIN for the MPTCP connection.
3803 * The FIN may arrive with data. The data is handed up to the
3804 * mptcp socket and the user is notified so that it may close
3805 * the socket if needed.
3807 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
) {
3808 *p_mpsofilt_hint
|= SO_FILT_HINT_CANTRCVMORE
;
3811 return MPTS_EVRET_OK
; /* keep the subflow socket around */
3815 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
3818 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3819 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3821 #pragma unused(event, p_mpsofilt_hint)
3822 struct mptsub
*mpts_alt
= NULL
;
3823 struct socket
*alt_so
= NULL
;
3824 struct socket
*mp_so
;
3825 int altpath_exists
= 0;
3827 mp_so
= mptetoso(mpte
);
3828 os_log_info(mptcp_log_handle
, "%s - %lx\n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3830 mptcp_reinject_mbufs(mpts
->mpts_socket
);
3832 mpts_alt
= mptcp_get_subflow(mpte
, NULL
);
3834 /* If there is no alternate eligible subflow, ignore the failover hint. */
3835 if (mpts_alt
== NULL
|| mpts_alt
== mpts
) {
3836 os_log(mptcp_log_handle
, "%s - %lx no alternate path\n", __func__
,
3837 (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
3843 alt_so
= mpts_alt
->mpts_socket
;
3844 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
3845 /* All data acknowledged and no RTT spike */
3846 if (alt_so
->so_snd
.sb_cc
== 0 && mptcp_no_rto_spike(alt_so
)) {
3847 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
3849 /* no alternate path available */
3854 if (altpath_exists
) {
3855 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
3857 mpte
->mpte_active_sub
= mpts_alt
;
3858 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
3859 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
3861 os_log_info(mptcp_log_handle
, "%s - %lx: switched from %d to %d\n",
3862 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpts
->mpts_connid
, mpts_alt
->mpts_connid
);
3864 mptcpstats_inc_switch(mpte
, mpts
);
3868 mptcplog((LOG_DEBUG
, "%s: no alt cid = %d\n", __func__
,
3870 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3872 mpts
->mpts_socket
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
3875 return MPTS_EVRET_OK
;
3879 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
3882 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3883 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3885 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
,
3886 mpts
->mpts_connid
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3889 * The subflow connection cannot use the outgoing interface, let's
3890 * close this subflow.
3892 mptcp_subflow_abort(mpts
, EPERM
);
3894 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3896 return MPTS_EVRET_DELETE
;
3900 * https://tools.ietf.org/html/rfc6052#section-2
3901 * https://tools.ietf.org/html/rfc6147#section-5.2
3904 mptcp_desynthesize_ipv6_addr(const struct in6_addr
*addr
,
3905 const struct ipv6_prefix
*prefix
,
3906 struct in_addr
*addrv4
)
3908 char buf
[MAX_IPv4_STR_LEN
];
3909 char *ptrv4
= (char *)addrv4
;
3910 const char *ptr
= (const char *)addr
;
3912 if (memcmp(addr
, &prefix
->ipv6_prefix
, prefix
->prefix_len
) != 0) {
3916 switch (prefix
->prefix_len
) {
3917 case NAT64_PREFIX_LEN_96
:
3918 memcpy(ptrv4
, ptr
+ 12, 4);
3920 case NAT64_PREFIX_LEN_64
:
3921 memcpy(ptrv4
, ptr
+ 9, 4);
3923 case NAT64_PREFIX_LEN_56
:
3924 memcpy(ptrv4
, ptr
+ 7, 1);
3925 memcpy(ptrv4
+ 1, ptr
+ 9, 3);
3927 case NAT64_PREFIX_LEN_48
:
3928 memcpy(ptrv4
, ptr
+ 6, 2);
3929 memcpy(ptrv4
+ 2, ptr
+ 9, 2);
3931 case NAT64_PREFIX_LEN_40
:
3932 memcpy(ptrv4
, ptr
+ 5, 3);
3933 memcpy(ptrv4
+ 3, ptr
+ 9, 1);
3935 case NAT64_PREFIX_LEN_32
:
3936 memcpy(ptrv4
, ptr
+ 4, 4);
3939 panic("NAT64-prefix len is wrong: %u\n",
3940 prefix
->prefix_len
);
3943 os_log_info(mptcp_log_handle
, "%s desynthesized to %s\n", __func__
,
3944 inet_ntop(AF_INET
, (void *)addrv4
, buf
, sizeof(buf
)));
3950 mptcp_handle_ipv6_connection(struct mptses
*mpte
, const struct mptsub
*mpts
)
3952 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
3953 struct socket
*so
= mpts
->mpts_socket
;
3957 /* Subflow IPs will be steered directly by the server - no need to
3960 if (mpte
->mpte_flags
& MPTE_UNICAST_IP
) {
3964 ifp
= sotoinpcb(so
)->inp_last_outifp
;
3966 if (ifnet_get_nat64prefix(ifp
, nat64prefixes
) == ENOENT
) {
3967 mptcp_ask_for_nat64(ifp
);
3972 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
3975 if (nat64prefixes
[j
].prefix_len
== 0) {
3979 success
= mptcp_desynthesize_ipv6_addr(&mpte
->__mpte_dst_v6
.sin6_addr
,
3981 &mpte
->mpte_dst_v4_nat64
.sin_addr
);
3983 mpte
->mpte_dst_v4_nat64
.sin_len
= sizeof(mpte
->mpte_dst_v4_nat64
);
3984 mpte
->mpte_dst_v4_nat64
.sin_family
= AF_INET
;
3985 mpte
->mpte_dst_v4_nat64
.sin_port
= mpte
->__mpte_dst_v6
.sin6_port
;
3992 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
3995 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3996 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3998 #pragma unused(event, p_mpsofilt_hint)
3999 struct socket
*mp_so
, *so
;
4002 struct mptcb
*mp_tp
;
4004 boolean_t mpok
= FALSE
;
4006 mp_so
= mptetoso(mpte
);
4007 mp_tp
= mpte
->mpte_mptcb
;
4008 so
= mpts
->mpts_socket
;
4010 af
= mpts
->mpts_dst
.sa_family
;
4012 if (mpts
->mpts_flags
& MPTSF_CONNECTED
) {
4013 return MPTS_EVRET_OK
;
4016 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
4017 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
4018 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
4019 (so
->so_state
& SS_ISCONNECTED
)) {
4020 mptcplog((LOG_DEBUG
, "%s: cid %d disconnect before tcp connect\n",
4021 __func__
, mpts
->mpts_connid
),
4022 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4023 (void) soshutdownlock(so
, SHUT_RD
);
4024 (void) soshutdownlock(so
, SHUT_WR
);
4025 (void) sodisconnectlocked(so
);
4027 return MPTS_EVRET_OK
;
4031 * The subflow connection has been connected. Find out whether it
4032 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
4034 * a. If MPTCP connection is not yet established, then this must be
4035 * the first subflow connection. If MPTCP failed to negotiate,
4036 * fallback to regular TCP by degrading this subflow.
4038 * b. If MPTCP connection has been established, then this must be
4039 * one of the subsequent subflow connections. If MPTCP failed
4040 * to negotiate, disconnect the connection.
4042 * Right now, we simply unblock any waiters at the MPTCP socket layer
4043 * if the MPTCP connection has not been established.
4046 if (so
->so_state
& SS_ISDISCONNECTED
) {
4048 * With MPTCP joins, a connection is connected at the subflow
4049 * level, but the 4th ACK from the server elevates the MPTCP
4050 * subflow to connected state. So there is a small window
4051 * where the subflow could get disconnected before the
4052 * connected event is processed.
4054 return MPTS_EVRET_OK
;
4057 if (mpts
->mpts_flags
& MPTSF_TFO_REQD
) {
4058 mptcp_drop_tfo_data(mpte
, mpts
);
4061 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_TFO_REQD
);
4062 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
4064 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
4065 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
4068 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
4070 /* get/verify the outbound interface */
4071 inp
= sotoinpcb(so
);
4073 mpts
->mpts_maxseg
= tp
->t_maxseg
;
4075 mptcplog((LOG_DEBUG
, "%s: cid %d outif %s is %s\n", __func__
, mpts
->mpts_connid
,
4076 ((inp
->inp_last_outifp
!= NULL
) ? inp
->inp_last_outifp
->if_xname
: "NULL"),
4077 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ? "MPTCP capable" : "a regular TCP")),
4078 (MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
), MPTCP_LOGLVL_LOG
);
4080 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
4082 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
4083 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
4084 mpte
->mpte_associd
= mpts
->mpts_connid
;
4085 DTRACE_MPTCP2(state__change
,
4086 struct mptcb
*, mp_tp
,
4087 uint32_t, 0 /* event */);
4089 if (SOCK_DOM(so
) == AF_INET
) {
4090 in_getsockaddr_s(so
, &mpte
->__mpte_src_v4
);
4092 in6_getsockaddr_s(so
, &mpte
->__mpte_src_v6
);
4095 mpts
->mpts_flags
|= MPTSF_ACTIVE
;
4097 /* case (a) above */
4099 tcpstat
.tcps_mpcap_fallback
++;
4101 tp
->t_mpflags
|= TMPF_INFIN_SENT
;
4102 mptcp_notify_mpfail(so
);
4104 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
4105 mpte
->mpte_svctype
< MPTCP_SVCTYPE_AGGREGATE
) {
4106 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
4108 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
4110 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
4111 mpte
->mpte_nummpcapflows
++;
4113 if (SOCK_DOM(so
) == AF_INET6
) {
4114 mptcp_handle_ipv6_connection(mpte
, mpts
);
4117 mptcp_check_subflows_and_add(mpte
);
4119 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
)) {
4120 mpte
->mpte_initial_cell
= 1;
4123 mpte
->mpte_handshake_success
= 1;
4126 mp_tp
->mpt_sndwnd
= tp
->snd_wnd
;
4127 mp_tp
->mpt_sndwl1
= mp_tp
->mpt_rcvnxt
;
4128 mp_tp
->mpt_sndwl2
= mp_tp
->mpt_snduna
;
4129 soisconnected(mp_so
);
4133 * In case of additional flows, the MPTCP socket is not
4134 * MPTSF_MP_CAPABLE until an ACK is received from server
4135 * for 3-way handshake. TCP would have guaranteed that this
4136 * is an MPTCP subflow.
4138 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
4139 !(tp
->t_mpflags
& TMPF_BACKUP_PATH
) &&
4140 mpte
->mpte_svctype
< MPTCP_SVCTYPE_AGGREGATE
) {
4141 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
4142 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
4144 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
4147 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
4148 mpte
->mpte_nummpcapflows
++;
4150 mpts
->mpts_rel_seq
= 1;
4152 mptcp_check_subflows_and_remove(mpte
);
4156 /* Should we try the alternate port? */
4157 if (mpte
->mpte_alternate_port
&&
4158 inp
->inp_fport
!= mpte
->mpte_alternate_port
) {
4159 union sockaddr_in_4_6 dst
;
4160 struct sockaddr_in
*dst_in
= (struct sockaddr_in
*)&dst
;
4162 memcpy(&dst
, &mpts
->mpts_dst
, mpts
->mpts_dst
.sa_len
);
4164 dst_in
->sin_port
= mpte
->mpte_alternate_port
;
4166 mptcp_subflow_add(mpte
, NULL
, (struct sockaddr
*)&dst
,
4167 mpts
->mpts_ifscope
, NULL
);
4168 } else { /* Else, we tried all we could, mark this interface as non-MPTCP */
4169 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
4170 struct mpt_itf_info
*info
= &mpte
->mpte_itfinfo
[i
];
4172 if (inp
->inp_last_outifp
->if_index
== info
->ifindex
) {
4173 info
->no_mptcp_support
= 1;
4179 tcpstat
.tcps_join_fallback
++;
4180 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
)) {
4181 tcpstat
.tcps_mptcp_cell_proxy
++;
4183 tcpstat
.tcps_mptcp_wifi_proxy
++;
4186 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
4188 return MPTS_EVRET_OK
;
4191 /* This call, just to "book" an entry in the stats-table for this ifindex */
4192 mptcpstats_get_index(mpte
->mpte_itfstats
, mpts
);
4196 return MPTS_EVRET_OK
; /* keep the subflow socket around */
4200 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
4203 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4204 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4206 #pragma unused(event, p_mpsofilt_hint)
4207 struct socket
*mp_so
, *so
;
4208 struct mptcb
*mp_tp
;
4210 mp_so
= mptetoso(mpte
);
4211 mp_tp
= mpte
->mpte_mptcb
;
4212 so
= mpts
->mpts_socket
;
4214 mptcplog((LOG_DEBUG
, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
4215 __func__
, mpts
->mpts_connid
, so
->so_error
, mp_tp
->mpt_state
,
4216 !!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
),
4217 !!(mpts
->mpts_flags
& MPTSF_ACTIVE
), sototcpcb(so
)->t_mpflags
),
4218 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4220 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
) {
4221 return MPTS_EVRET_DELETE
;
4224 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
4226 /* The subflow connection has been disconnected. */
4228 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
4229 mpte
->mpte_nummpcapflows
--;
4230 if (mpte
->mpte_active_sub
== mpts
) {
4231 mpte
->mpte_active_sub
= NULL
;
4232 mptcplog((LOG_DEBUG
, "%s: resetting active subflow \n",
4233 __func__
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
4235 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
4238 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
4239 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
4240 mptcp_drop(mpte
, mp_tp
, so
->so_error
);
4244 * Clear flags that are used by getconninfo to return state.
4245 * Retain like MPTSF_DELETEOK for internal purposes.
4247 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_CONNECT_PENDING
|
4248 MPTSF_CONNECTED
| MPTSF_DISCONNECTING
| MPTSF_PREFERRED
|
4249 MPTSF_MP_CAPABLE
| MPTSF_MP_READY
| MPTSF_MP_DEGRADED
| MPTSF_ACTIVE
);
4251 return MPTS_EVRET_DELETE
;
4255 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
4258 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4259 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4261 #pragma unused(event, p_mpsofilt_hint)
4262 ev_ret_t ret
= MPTS_EVRET_OK
;
4263 struct socket
*mp_so
, *so
;
4264 struct mptcb
*mp_tp
;
4266 mp_so
= mptetoso(mpte
);
4267 mp_tp
= mpte
->mpte_mptcb
;
4268 so
= mpts
->mpts_socket
;
4270 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
) {
4271 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
4273 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
4276 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
4277 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4280 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4282 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
4285 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
) {
4286 mpts
->mpts_flags
|= MPTSF_MP_READY
;
4288 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
4291 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4292 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
4293 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
4296 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
4297 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
4299 m_freem_list(mpte
->mpte_reinjectq
);
4300 mpte
->mpte_reinjectq
= NULL
;
4301 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
4302 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
4303 ret
= MPTS_EVRET_CONNECT_PENDING
;
4311 * Handle SO_FILT_HINT_MUSTRST subflow socket event
4314 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4315 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4317 #pragma unused(event)
4318 struct socket
*mp_so
, *so
;
4319 struct mptcb
*mp_tp
;
4320 boolean_t is_fastclose
;
4322 mp_so
= mptetoso(mpte
);
4323 mp_tp
= mpte
->mpte_mptcb
;
4324 so
= mpts
->mpts_socket
;
4326 /* We got an invalid option or a fast close */
4327 struct tcptemp
*t_template
;
4328 struct inpcb
*inp
= sotoinpcb(so
);
4329 struct tcpcb
*tp
= NULL
;
4331 tp
= intotcpcb(inp
);
4332 so
->so_error
= ECONNABORTED
;
4334 is_fastclose
= !!(tp
->t_mpflags
& TMPF_FASTCLOSERCV
);
4336 tp
->t_mpflags
|= TMPF_RESET
;
4338 t_template
= tcp_maketemplate(tp
);
4340 struct tcp_respond_args tra
;
4342 bzero(&tra
, sizeof(tra
));
4343 if (inp
->inp_flags
& INP_BOUND_IF
) {
4344 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
4346 tra
.ifscope
= IFSCOPE_NONE
;
4348 tra
.awdl_unrestricted
= 1;
4350 tcp_respond(tp
, t_template
->tt_ipgen
,
4351 &t_template
->tt_t
, (struct mbuf
*)NULL
,
4352 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
4353 (void) m_free(dtom(t_template
));
4356 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && is_fastclose
) {
4357 struct mptsub
*iter
, *tmp
;
4359 *p_mpsofilt_hint
|= SO_FILT_HINT_CONNRESET
;
4361 mp_so
->so_error
= ECONNRESET
;
4363 TAILQ_FOREACH_SAFE(iter
, &mpte
->mpte_subflows
, mpts_entry
, tmp
) {
4367 mptcp_subflow_abort(iter
, ECONNABORTED
);
4371 * mptcp_drop is being called after processing the events, to fully
4372 * close the MPTCP connection
4374 mptcp_drop(mpte
, mp_tp
, mp_so
->so_error
);
4377 mptcp_subflow_abort(mpts
, ECONNABORTED
);
4380 if (mp_tp
->mpt_gc_ticks
== MPT_GC_TICKS
) {
4381 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS_FAST
;
4384 return MPTS_EVRET_DELETE
;
4388 mptcp_subflow_adaptive_rtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4389 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4391 #pragma unused(event)
4392 bool found_active
= false;
4394 mpts
->mpts_flags
|= MPTSF_READ_STALL
;
4396 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4397 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4399 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4400 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
4404 if (!(mpts
->mpts_flags
& MPTSF_READ_STALL
)) {
4405 found_active
= true;
4410 if (!found_active
) {
4411 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_RTIMO
;
4414 return MPTS_EVRET_OK
;
4418 mptcp_subflow_adaptive_wtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
4419 uint64_t *p_mpsofilt_hint
, uint64_t event
)
4421 #pragma unused(event)
4422 bool found_active
= false;
4424 mpts
->mpts_flags
|= MPTSF_WRITE_STALL
;
4426 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4427 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4429 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4430 tp
->t_state
> TCPS_CLOSE_WAIT
) {
4434 if (!(mpts
->mpts_flags
& MPTSF_WRITE_STALL
)) {
4435 found_active
= true;
4440 if (!found_active
) {
4441 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_WTIMO
;
4444 return MPTS_EVRET_OK
;
4448 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
4449 * caller must ensure that the option can be issued on subflow sockets, via
4450 * MPOF_SUBFLOW_OK flag.
4453 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct mptsub
*mpts
, struct mptopt
*mpo
)
4455 struct socket
*mp_so
, *so
;
4456 struct sockopt sopt
;
4459 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
4461 mp_so
= mptetoso(mpte
);
4462 so
= mpts
->mpts_socket
;
4464 socket_lock_assert_owned(mp_so
);
4466 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
&&
4467 mpo
->mpo_level
== SOL_SOCKET
&&
4468 mpo
->mpo_name
== SO_MARK_CELLFALLBACK
) {
4469 struct ifnet
*ifp
= ifindex2ifnet
[mpts
->mpts_ifscope
];
4471 mptcplog((LOG_DEBUG
, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %d lastcell? %d boundcell? %d\n",
4472 __func__
, mpte
->mpte_flags
, mpte
->mpte_svctype
, mptcp_is_wifi_unusable_for_session(mpte
),
4473 sotoinpcb(so
)->inp_last_outifp
? IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
) : -1,
4474 mpts
->mpts_ifscope
!= IFSCOPE_NONE
&& ifp
? IFNET_IS_CELLULAR(ifp
) : -1),
4475 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4478 * When we open a new subflow, mark it as cell fallback, if
4479 * this subflow goes over cell.
4481 * (except for first-party apps)
4484 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
4488 if (sotoinpcb(so
)->inp_last_outifp
&&
4489 !IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
4494 * This here is an OR, because if the app is not binding to the
4495 * interface, then it definitely is not a cell-fallback
4498 if (mpts
->mpts_ifscope
== IFSCOPE_NONE
|| ifp
== NULL
||
4499 !IFNET_IS_CELLULAR(ifp
)) {
4504 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
4506 bzero(&sopt
, sizeof(sopt
));
4507 sopt
.sopt_dir
= SOPT_SET
;
4508 sopt
.sopt_level
= mpo
->mpo_level
;
4509 sopt
.sopt_name
= mpo
->mpo_name
;
4510 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
4511 sopt
.sopt_valsize
= sizeof(int);
4512 sopt
.sopt_p
= kernproc
;
4514 error
= sosetoptlock(so
, &sopt
, 0);
4516 os_log_error(mptcp_log_handle
, "%s - %lx: sopt %s "
4517 "val %d set error %d\n", __func__
,
4518 (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4519 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
4520 mpo
->mpo_intval
, error
);
4526 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
4527 * caller must ensure that the option can be issued on subflow sockets, via
4528 * MPOF_SUBFLOW_OK flag.
4531 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
4534 struct socket
*mp_so
;
4535 struct sockopt sopt
;
4538 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
4539 mp_so
= mptetoso(mpte
);
4541 socket_lock_assert_owned(mp_so
);
4543 bzero(&sopt
, sizeof(sopt
));
4544 sopt
.sopt_dir
= SOPT_GET
;
4545 sopt
.sopt_level
= mpo
->mpo_level
;
4546 sopt
.sopt_name
= mpo
->mpo_name
;
4547 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
4548 sopt
.sopt_valsize
= sizeof(int);
4549 sopt
.sopt_p
= kernproc
;
4551 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
4553 os_log_error(mptcp_log_handle
,
4554 "%s - %lx: sopt %s get error %d\n",
4555 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4556 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
), error
);
4563 * MPTCP garbage collector.
4565 * This routine is called by the MP domain on-demand, periodic callout,
4566 * which is triggered when a MPTCP socket is closed. The callout will
4567 * repeat as long as this routine returns a non-zero value.
4570 mptcp_gc(struct mppcbinfo
*mppi
)
4572 struct mppcb
*mpp
, *tmpp
;
4573 uint32_t active
= 0;
4575 LCK_MTX_ASSERT(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
4577 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
4578 struct socket
*mp_so
;
4579 struct mptses
*mpte
;
4580 struct mptcb
*mp_tp
;
4582 mp_so
= mpp
->mpp_socket
;
4583 mpte
= mptompte(mpp
);
4584 mp_tp
= mpte
->mpte_mptcb
;
4586 if (!mpp_try_lock(mpp
)) {
4591 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
4593 /* check again under the lock */
4594 if (mp_so
->so_usecount
> 0) {
4595 boolean_t wakeup
= FALSE
;
4596 struct mptsub
*mpts
, *tmpts
;
4598 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
4599 if (mp_tp
->mpt_gc_ticks
> 0) {
4600 mp_tp
->mpt_gc_ticks
--;
4602 if (mp_tp
->mpt_gc_ticks
== 0) {
4607 TAILQ_FOREACH_SAFE(mpts
,
4608 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4609 mptcp_subflow_eupcall1(mpts
->mpts_socket
,
4610 mpts
, SO_FILT_HINT_DISCONNECTED
);
4613 socket_unlock(mp_so
, 0);
4618 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
4619 panic("%s - %lx: skipped state "
4620 "[u=%d,r=%d,s=%d]\n", __func__
,
4621 (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
4622 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
4626 if (mp_tp
->mpt_state
== MPTCPS_TIME_WAIT
) {
4627 mptcp_close(mpte
, mp_tp
);
4630 mptcp_session_destroy(mpte
);
4632 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
4633 struct sockbuf
*, &mp_so
->so_rcv
,
4634 struct sockbuf
*, &mp_so
->so_snd
,
4635 struct mppcb
*, mpp
);
4645 * Drop a MPTCP connection, reporting the specified error.
4648 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, int errno
)
4650 struct socket
*mp_so
= mptetoso(mpte
);
4652 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4654 socket_lock_assert_owned(mp_so
);
4656 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
4657 uint32_t, 0 /* event */);
4659 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0) {
4660 errno
= mp_tp
->mpt_softerror
;
4662 mp_so
->so_error
= errno
;
4664 return mptcp_close(mpte
, mp_tp
);
4668 * Close a MPTCP control block.
4671 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
4673 struct mptsub
*mpts
= NULL
, *tmpts
= NULL
;
4674 struct socket
*mp_so
= mptetoso(mpte
);
4676 socket_lock_assert_owned(mp_so
);
4677 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4679 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
4683 soisdisconnected(mp_so
);
4685 /* Clean up all subflows */
4686 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4687 mptcp_subflow_disconnect(mpte
, mpts
);
4694 mptcp_notify_close(struct socket
*so
)
4696 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
4703 mptcp_subflow_workloop(struct mptses
*mpte
)
4705 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
4706 uint64_t mpsofilt_hint_mask
= SO_FILT_HINT_LOCKED
;
4707 struct mptsub
*mpts
, *tmpts
;
4708 struct socket
*mp_so
;
4710 mp_so
= mptetoso(mpte
);
4712 socket_lock_assert_owned(mp_so
);
4714 if (mpte
->mpte_flags
& MPTE_IN_WORKLOOP
) {
4715 mpte
->mpte_flags
|= MPTE_WORKLOOP_RELAUNCH
;
4718 mpte
->mpte_flags
|= MPTE_IN_WORKLOOP
;
4721 mpte
->mpte_flags
&= ~MPTE_WORKLOOP_RELAUNCH
;
4723 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4726 if (mpts
->mpts_socket
->so_usecount
== 0) {
4727 /* Will be removed soon by tcp_garbage_collect */
4731 mptcp_subflow_addref(mpts
);
4732 mpts
->mpts_socket
->so_usecount
++;
4734 ret
= mptcp_subflow_events(mpte
, mpts
, &mpsofilt_hint_mask
);
4737 * If MPTCP socket is closed, disconnect all subflows.
4738 * This will generate a disconnect event which will
4739 * be handled during the next iteration, causing a
4740 * non-zero error to be returned above.
4742 if (mp_so
->so_flags
& SOF_PCBCLEARING
) {
4743 mptcp_subflow_disconnect(mpte
, mpts
);
4750 case MPTS_EVRET_DELETE
:
4751 mptcp_subflow_soclose(mpts
);
4753 case MPTS_EVRET_CONNECT_PENDING
:
4754 connect_pending
= TRUE
;
4756 case MPTS_EVRET_DISCONNECT_FALLBACK
:
4757 disconnect_fallback
= TRUE
;
4760 mptcplog((LOG_DEBUG
,
4761 "MPTCP Socket: %s: mptcp_subflow_events "
4762 "returned invalid value: %d\n", __func__
,
4764 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4767 mptcp_subflow_remref(mpts
); /* ours */
4769 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
4770 mpts
->mpts_socket
->so_usecount
--;
4773 if (mpsofilt_hint_mask
!= SO_FILT_HINT_LOCKED
) {
4774 VERIFY(mpsofilt_hint_mask
& SO_FILT_HINT_LOCKED
);
4776 if (mpsofilt_hint_mask
& SO_FILT_HINT_CANTRCVMORE
) {
4777 mp_so
->so_state
|= SS_CANTRCVMORE
;
4781 soevent(mp_so
, mpsofilt_hint_mask
);
4784 if (!connect_pending
&& !disconnect_fallback
) {
4788 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4789 if (disconnect_fallback
) {
4790 struct socket
*so
= NULL
;
4791 struct inpcb
*inp
= NULL
;
4792 struct tcpcb
*tp
= NULL
;
4794 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
4798 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4800 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
4801 MPTSF_DISCONNECTED
| MPTSF_CONNECT_PENDING
)) {
4805 so
= mpts
->mpts_socket
;
4808 * The MPTCP connection has degraded to a fallback
4809 * mode, so there is no point in keeping this subflow
4810 * regardless of its MPTCP-readiness state, unless it
4811 * is the primary one which we use for fallback. This
4812 * assumes that the subflow used for fallback is the
4816 inp
= sotoinpcb(so
);
4817 tp
= intotcpcb(inp
);
4819 ~(TMPF_MPTCP_READY
| TMPF_MPTCP_TRUE
);
4820 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
4822 soevent(so
, SO_FILT_HINT_MUSTRST
);
4823 } else if (connect_pending
) {
4825 * The MPTCP connection has progressed to a state
4826 * where it supports full multipath semantics; allow
4827 * additional joins to be attempted for all subflows
4828 * that are in the PENDING state.
4830 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
4831 int error
= mptcp_subflow_soconnectx(mpte
, mpts
);
4834 mptcp_subflow_abort(mpts
, error
);
4841 if (mpte
->mpte_flags
& MPTE_WORKLOOP_RELAUNCH
) {
4845 mpte
->mpte_flags
&= ~MPTE_IN_WORKLOOP
;
4849 * Protocol pr_lock callback.
4852 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
4854 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4858 lr_saved
= __builtin_return_address(0);
4864 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
4865 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
4870 if (mp_so
->so_usecount
< 0) {
4871 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
4872 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
4873 solockhistory_nr(mp_so
));
4876 if (refcount
!= 0) {
4877 mp_so
->so_usecount
++;
4880 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
4881 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
4887 * Protocol pr_unlock callback.
4890 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
4892 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4896 lr_saved
= __builtin_return_address(0);
4902 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
4903 mp_so
, mp_so
->so_usecount
, lr_saved
,
4904 solockhistory_nr(mp_so
));
4907 socket_lock_assert_owned(mp_so
);
4909 if (refcount
!= 0) {
4910 mp_so
->so_usecount
--;
4914 if (mp_so
->so_usecount
< 0) {
4915 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4916 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4919 if (mpp
->mpp_inside
< 0) {
4920 panic("%s: mpp=%p inside=%x lrh= %s\n", __func__
,
4921 mpp
, mpp
->mpp_inside
, solockhistory_nr(mp_so
));
4924 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
4925 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
4932 * Protocol pr_getlock callback.
4935 mptcp_getlock(struct socket
*mp_so
, int flags
)
4937 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4940 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
4941 solockhistory_nr(mp_so
));
4944 if (mp_so
->so_usecount
< 0) {
4945 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4946 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4949 return mpp_getlock(mpp
, flags
);
4953 * MPTCP Join support
4957 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
, uint8_t addr_id
)
4959 struct tcpcb
*tp
= sototcpcb(so
);
4960 struct mptcp_subf_auth_entry
*sauth_entry
;
4963 * The address ID of the first flow is implicitly 0.
4965 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
4966 tp
->t_local_aid
= 0;
4968 tp
->t_local_aid
= addr_id
;
4969 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
4970 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
4972 sauth_entry
= zalloc(mpt_subauth_zone
);
4973 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
4974 sauth_entry
->msae_raddr_id
= 0;
4975 sauth_entry
->msae_raddr_rand
= 0;
4977 sauth_entry
->msae_laddr_rand
= RandomULong();
4978 if (sauth_entry
->msae_laddr_rand
== 0) {
4981 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
4985 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
4987 struct mptcp_subf_auth_entry
*sauth_entry
;
4988 struct tcpcb
*tp
= NULL
;
4996 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4997 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
5003 LIST_REMOVE(sauth_entry
, msae_next
);
5007 zfree(mpt_subauth_zone
, sauth_entry
);
5012 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
5015 struct mptcp_subf_auth_entry
*sauth_entry
;
5017 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
5018 if (sauth_entry
->msae_laddr_id
== addr_id
) {
5020 *lrand
= sauth_entry
->msae_laddr_rand
;
5023 *rrand
= sauth_entry
->msae_raddr_rand
;
5031 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
5032 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
5034 struct mptcp_subf_auth_entry
*sauth_entry
;
5036 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
5037 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
5038 if ((sauth_entry
->msae_raddr_id
!= 0) &&
5039 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
5040 os_log_error(mptcp_log_handle
, "%s - %lx: mismatched"
5041 " address ids %d %d \n", __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
),
5042 raddr_id
, sauth_entry
->msae_raddr_id
);
5045 sauth_entry
->msae_raddr_id
= raddr_id
;
5046 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
5047 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
5048 os_log_error(mptcp_log_handle
, "%s - %lx: "
5049 "dup SYN_ACK %d %d \n",
5050 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp
->mpt_mpte
),
5051 raddr_rand
, sauth_entry
->msae_raddr_rand
);
5054 sauth_entry
->msae_raddr_rand
= raddr_rand
;
5061 * SHA1 support for MPTCP
5064 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
)
5067 const unsigned char *sha1_base
;
5070 sha1_base
= (const unsigned char *) key
;
5071 sha1_size
= sizeof(mptcp_key_t
);
5072 SHA1Init(&sha1ctxt
);
5073 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
5074 SHA1Final(sha_digest
, &sha1ctxt
);
5078 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
5079 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
)
5082 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
5083 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
5087 bzero(digest
, SHA1_RESULTLEN
);
5089 /* Set up the Key for HMAC */
5096 /* Set up the message for HMAC */
5100 /* Key is 512 block length, so no need to compute hash */
5102 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
5104 for (i
= 0; i
< 8; i
++) {
5105 key_ipad
[i
] ^= 0x3636363636363636;
5106 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
5109 /* Perform inner SHA1 */
5110 SHA1Init(&sha1ctxt
);
5111 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof(key_ipad
));
5112 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof(data
));
5113 SHA1Final(digest
, &sha1ctxt
);
5115 /* Perform outer SHA1 */
5116 SHA1Init(&sha1ctxt
);
5117 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof(key_opad
));
5118 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
5119 SHA1Final(digest
, &sha1ctxt
);
5123 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
5124 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
5127 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
)
5129 uint32_t lrand
, rrand
;
5132 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
5133 mptcp_hmac_sha1(mp_tp
->mpt_localkey
, mp_tp
->mpt_remotekey
, lrand
, rrand
,
5138 * Authentication data generation
5141 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
5144 VERIFY(token_len
== sizeof(u_int32_t
));
5145 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
5147 /* Most significant 32 bits of the SHA1 hash */
5148 bcopy(sha_digest
, token
, sizeof(u_int32_t
));
5153 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
5156 VERIFY(idsn_len
== sizeof(u_int64_t
));
5157 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
5160 * Least significant 64 bits of the SHA1 hash
5163 idsn
[7] = sha_digest
[12];
5164 idsn
[6] = sha_digest
[13];
5165 idsn
[5] = sha_digest
[14];
5166 idsn
[4] = sha_digest
[15];
5167 idsn
[3] = sha_digest
[16];
5168 idsn
[2] = sha_digest
[17];
5169 idsn
[1] = sha_digest
[18];
5170 idsn
[0] = sha_digest
[19];
5175 mptcp_conn_properties(struct mptcb
*mp_tp
)
5177 /* There is only Version 0 at this time */
5178 mp_tp
->mpt_version
= MPTCP_STD_VERSION_0
;
5180 /* Set DSS checksum flag */
5181 if (mptcp_dss_csum
) {
5182 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
5185 /* Set up receive window */
5186 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
5188 /* Set up gc ticks */
5189 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
5193 mptcp_init_local_parms(struct mptses
*mpte
)
5195 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
5196 char key_digest
[SHA1_RESULTLEN
];
5198 read_frandom(&mp_tp
->mpt_localkey
, sizeof(mp_tp
->mpt_localkey
));
5199 mptcp_do_sha1(&mp_tp
->mpt_localkey
, key_digest
);
5201 mptcp_generate_token(key_digest
, SHA1_RESULTLEN
,
5202 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof(mp_tp
->mpt_localtoken
));
5203 mptcp_generate_idsn(key_digest
, SHA1_RESULTLEN
,
5204 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof(u_int64_t
));
5206 /* The subflow SYN is also first MPTCP byte */
5207 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
5208 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
5210 mptcp_conn_properties(mp_tp
);
5214 mptcp_init_remote_parms(struct mptcb
*mp_tp
)
5216 char remote_digest
[SHA1_RESULTLEN
];
5218 /* Only Version 0 is supported for auth purposes */
5219 if (mp_tp
->mpt_version
!= MPTCP_STD_VERSION_0
) {
5223 /* Setup local and remote tokens and Initial DSNs */
5224 mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
);
5225 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
5226 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof(mp_tp
->mpt_remotetoken
));
5227 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
5228 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof(u_int64_t
));
5229 mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
5230 mp_tp
->mpt_rcvadv
= mp_tp
->mpt_rcvnxt
+ mp_tp
->mpt_rcvwnd
;
5236 mptcp_send_dfin(struct socket
*so
)
5238 struct tcpcb
*tp
= NULL
;
5239 struct inpcb
*inp
= NULL
;
5241 inp
= sotoinpcb(so
);
5246 tp
= intotcpcb(inp
);
5251 if (!(tp
->t_mpflags
& TMPF_RESET
)) {
5252 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
5257 * Data Sequence Mapping routines
5260 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
5262 struct mptcb
*mp_tp
;
5268 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
5271 VERIFY(m
->m_flags
& M_PKTHDR
);
5272 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
5273 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
5274 m
->m_pkthdr
.mp_rlen
= m_pktlen(m
);
5275 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
5281 mptcp_fallback_sbdrop(struct socket
*so
, struct mbuf
*m
, int len
)
5283 struct mptcb
*mp_tp
= tptomptp(sototcpcb(so
));
5287 if (!m
|| len
== 0) {
5291 while (m
&& len
> 0) {
5292 VERIFY(m
->m_flags
& M_PKTHDR
);
5293 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5295 data_ack
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
5296 dsn
= m
->m_pkthdr
.mp_dsn
;
5302 if (m
&& len
== 0) {
5304 * If there is one more mbuf in the chain, it automatically means
5305 * that up to m->mp_dsn has been ack'ed.
5307 * This means, we actually correct data_ack back down (compared
5308 * to what we set inside the loop - dsn + data_len). Because in
5309 * the loop we are "optimistic" and assume that the full mapping
5310 * will be acked. If that's not the case and we get out of the
5311 * loop with m != NULL, it means only up to m->mp_dsn has been
5314 data_ack
= m
->m_pkthdr
.mp_dsn
;
5319 * If len is negative, meaning we acked in the middle of an mbuf,
5320 * only up to this mbuf's data-sequence number has been acked
5321 * at the MPTCP-level.
5326 mptcplog((LOG_DEBUG
, "%s inferred ack up to %u\n", __func__
, (uint32_t)data_ack
),
5327 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5329 /* We can have data in the subflow's send-queue that is being acked,
5330 * while the DATA_ACK has already advanced. Thus, we should check whether
5331 * or not the DATA_ACK is actually new here.
5333 if (MPTCP_SEQ_LEQ(data_ack
, mp_tp
->mpt_sndmax
) &&
5334 MPTCP_SEQ_GEQ(data_ack
, mp_tp
->mpt_snduna
)) {
5335 mptcp_data_ack_rcvd(mp_tp
, sototcpcb(so
), data_ack
);
5340 mptcp_preproc_sbdrop(struct socket
*so
, struct mbuf
*m
, unsigned int len
)
5344 /* TFO makes things complicated. */
5345 if (so
->so_flags1
& SOF1_TFO_REWIND
) {
5347 so
->so_flags1
&= ~SOF1_TFO_REWIND
;
5350 while (m
&& (!(so
->so_flags
& SOF_MP_SUBFLOW
) || rewinding
)) {
5352 VERIFY(m
->m_flags
& M_PKTHDR
);
5353 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5355 sub_len
= m
->m_pkthdr
.mp_rlen
;
5357 if (sub_len
< len
) {
5358 m
->m_pkthdr
.mp_dsn
+= sub_len
;
5359 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
5360 m
->m_pkthdr
.mp_rseq
+= sub_len
;
5362 m
->m_pkthdr
.mp_rlen
= 0;
5365 /* sub_len >= len */
5366 if (rewinding
== 0) {
5367 m
->m_pkthdr
.mp_dsn
+= len
;
5369 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
5370 if (rewinding
== 0) {
5371 m
->m_pkthdr
.mp_rseq
+= len
;
5374 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u len %d %d\n",
5375 __func__
, (u_int32_t
)m
->m_pkthdr
.mp_dsn
,
5376 m
->m_pkthdr
.mp_rseq
, m
->m_pkthdr
.mp_rlen
, len
),
5377 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5378 m
->m_pkthdr
.mp_rlen
-= len
;
5384 if (so
->so_flags
& SOF_MP_SUBFLOW
&&
5385 !(sototcpcb(so
)->t_mpflags
& TMPF_TFO_REQUEST
) &&
5386 !(sototcpcb(so
)->t_mpflags
& TMPF_RCVD_DACK
)) {
5388 * Received an ack without receiving a DATA_ACK.
5389 * Need to fallback to regular TCP (or destroy this subflow).
5391 sototcpcb(so
)->t_mpflags
|= TMPF_INFIN_SENT
;
5392 mptcp_notify_mpfail(so
);
5396 /* Obtain the DSN mapping stored in the mbuf */
5398 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
,
5399 uint32_t *dsn
, uint32_t *relseq
, uint16_t *data_len
, uint16_t *dss_csum
)
5403 mptcp_output_getm_dsnmap64(so
, off
, &dsn64
, relseq
, data_len
, dss_csum
);
5404 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
5408 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint64_t *dsn
,
5409 uint32_t *relseq
, uint16_t *data_len
,
5412 struct mbuf
*m
= so
->so_snd
.sb_mb
;
5417 if (m
== NULL
&& (so
->so_flags
& SOF_DEFUNCT
)) {
5426 * In the subflow socket, the DSN sequencing can be discontiguous,
5427 * but the subflow sequence mapping is contiguous. Use the subflow
5428 * sequence property to find the right mbuf and corresponding dsn
5433 VERIFY(m
->m_flags
& M_PKTHDR
);
5434 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5436 if (off
>= m
->m_len
) {
5445 VERIFY(m
->m_pkthdr
.mp_rlen
<= UINT16_MAX
);
5447 *dsn
= m
->m_pkthdr
.mp_dsn
;
5448 *relseq
= m
->m_pkthdr
.mp_rseq
;
5449 *data_len
= m
->m_pkthdr
.mp_rlen
;
5450 *dss_csum
= m
->m_pkthdr
.mp_csum
;
5452 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
5453 __func__
, (u_int32_t
)(*dsn
), *relseq
, *data_len
, off
, off_orig
),
5454 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5458 * Note that this is called only from tcp_input() via mptcp_input_preproc()
5459 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
5460 * When it trims data tcp_input calls m_adj() which does not remove the
5461 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
5462 * The dsn map insertion cannot be delayed after trim, because data can be in
5463 * the reassembly queue for a while and the DSN option info in tp will be
5464 * overwritten for every new packet received.
5465 * The dsn map will be adjusted just prior to appending to subflow sockbuf
5466 * with mptcp_adj_rmap()
5469 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
, struct tcphdr
*th
)
5471 VERIFY(m
->m_flags
& M_PKTHDR
);
5472 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
5474 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
5475 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
5476 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
5477 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
5478 m
->m_pkthdr
.mp_csum
= tp
->t_rcv_map
.mpt_csum
;
5479 if (tp
->t_rcv_map
.mpt_dfin
) {
5480 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
5483 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
5485 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
5486 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
5487 } else if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
5488 if (th
->th_flags
& TH_FIN
) {
5489 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
5495 * Following routines help with failure detection and failover of data
5496 * transfer from one subflow to another.
5499 mptcp_act_on_txfail(struct socket
*so
)
5501 struct tcpcb
*tp
= NULL
;
5502 struct inpcb
*inp
= sotoinpcb(so
);
5508 tp
= intotcpcb(inp
);
5513 if (so
->so_flags
& SOF_MP_TRYFAILOVER
) {
5517 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
5518 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
5522 * Support for MP_FAIL option
5525 mptcp_get_map_for_dsn(struct socket
*so
, u_int64_t dsn_fail
, u_int32_t
*tcp_seq
)
5527 struct mbuf
*m
= so
->so_snd
.sb_mb
;
5537 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
5538 VERIFY(m
->m_flags
& M_PKTHDR
);
5539 dsn
= m
->m_pkthdr
.mp_dsn
;
5540 datalen
= m
->m_pkthdr
.mp_rlen
;
5541 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
5542 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
5543 off
= dsn_fail
- dsn
;
5544 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
5545 mptcplog((LOG_DEBUG
, "%s: %llu %llu \n", __func__
, dsn
,
5546 dsn_fail
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
5554 * If there was no mbuf data and a fallback to TCP occurred, there's
5555 * not much else to do.
5558 os_log_error(mptcp_log_handle
, "%s: %llu not found \n", __func__
, dsn_fail
);
5563 * Support for sending contiguous MPTCP bytes in subflow
5564 * Also for preventing sending data with ACK in 3-way handshake
5567 mptcp_adj_sendlen(struct socket
*so
, int32_t off
)
5569 struct tcpcb
*tp
= sototcpcb(so
);
5570 struct mptsub
*mpts
= tp
->t_mpsub
;
5572 uint32_t mdss_subflow_seq
;
5573 int mdss_subflow_off
;
5574 uint16_t mdss_data_len
;
5577 if (so
->so_snd
.sb_mb
== NULL
&& (so
->so_flags
& SOF_DEFUNCT
)) {
5581 mptcp_output_getm_dsnmap64(so
, off
, &mdss_dsn
, &mdss_subflow_seq
,
5582 &mdss_data_len
, &dss_csum
);
5585 * We need to compute how much of the mapping still remains.
5586 * So, we compute the offset in the send-buffer of the dss-sub-seq.
5588 mdss_subflow_off
= (mdss_subflow_seq
+ mpts
->mpts_iss
) - tp
->snd_una
;
5591 * When TFO is used, we are sending the mpts->mpts_iss although the relative
5592 * seq has been set to 1 (while it should be 0).
5594 if (tp
->t_mpflags
& TMPF_TFO_REQUEST
) {
5598 VERIFY(off
>= mdss_subflow_off
);
5600 return mdss_data_len
- (off
- mdss_subflow_off
);
5604 mptcp_get_maxseg(struct mptses
*mpte
)
5606 struct mptsub
*mpts
;
5607 uint32_t maxseg
= 0;
5609 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5610 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
5612 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
5613 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
5617 if (tp
->t_maxseg
> maxseg
) {
5618 maxseg
= tp
->t_maxseg
;
5626 mptcp_get_rcvscale(struct mptses
*mpte
)
5628 struct mptsub
*mpts
;
5629 uint8_t rcvscale
= UINT8_MAX
;
5631 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5632 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
5634 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
5635 TCPS_HAVERCVDFIN2(tp
->t_state
)) {
5639 if (tp
->rcv_scale
< rcvscale
) {
5640 rcvscale
= tp
->rcv_scale
;
5647 /* Similar to tcp_sbrcv_reserve */
5649 mptcp_sbrcv_reserve(struct mptcb
*mp_tp
, struct sockbuf
*sbrcv
,
5650 u_int32_t newsize
, u_int32_t idealsize
)
5652 uint8_t rcvscale
= mptcp_get_rcvscale(mp_tp
->mpt_mpte
);
5654 /* newsize should not exceed max */
5655 newsize
= min(newsize
, tcp_autorcvbuf_max
);
5657 /* The receive window scale negotiated at the
5658 * beginning of the connection will also set a
5659 * limit on the socket buffer size
5661 newsize
= min(newsize
, TCP_MAXWIN
<< rcvscale
);
5663 /* Set new socket buffer size */
5664 if (newsize
> sbrcv
->sb_hiwat
&&
5665 (sbreserve(sbrcv
, newsize
) == 1)) {
5666 sbrcv
->sb_idealsize
= min(max(sbrcv
->sb_idealsize
,
5667 (idealsize
!= 0) ? idealsize
: newsize
), tcp_autorcvbuf_max
);
5669 /* Again check the limit set by the advertised
5672 sbrcv
->sb_idealsize
= min(sbrcv
->sb_idealsize
,
5673 TCP_MAXWIN
<< rcvscale
);
5678 mptcp_sbrcv_grow(struct mptcb
*mp_tp
)
5680 struct mptses
*mpte
= mp_tp
->mpt_mpte
;
5681 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
5682 struct sockbuf
*sbrcv
= &mp_so
->so_rcv
;
5683 uint32_t hiwat_sum
= 0;
5684 uint32_t ideal_sum
= 0;
5685 struct mptsub
*mpts
;
5688 * Do not grow the receive socket buffer if
5689 * - auto resizing is disabled, globally or on this socket
5690 * - the high water mark already reached the maximum
5691 * - the stream is in background and receive side is being
5693 * - if there are segments in reassembly queue indicating loss,
5694 * do not need to increase recv window during recovery as more
5695 * data is not going to be sent. A duplicate ack sent during
5696 * recovery should not change the receive window
5698 if (tcp_do_autorcvbuf
== 0 ||
5699 (sbrcv
->sb_flags
& SB_AUTOSIZE
) == 0 ||
5700 tcp_cansbgrow(sbrcv
) == 0 ||
5701 sbrcv
->sb_hiwat
>= tcp_autorcvbuf_max
||
5702 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) ||
5703 !LIST_EMPTY(&mp_tp
->mpt_segq
)) {
5704 /* Can not resize the socket buffer, just return */
5709 * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2)
5711 * But, for this we first need accurate receiver-RTT estimations, which
5712 * we currently don't have.
5714 * Let's use a dummy algorithm for now, just taking the sum of all
5715 * subflow's receive-buffers. It's too low, but that's all we can get
5719 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5720 hiwat_sum
+= mpts
->mpts_socket
->so_rcv
.sb_hiwat
;
5721 ideal_sum
+= mpts
->mpts_socket
->so_rcv
.sb_idealsize
;
5724 mptcp_sbrcv_reserve(mp_tp
, sbrcv
, hiwat_sum
, ideal_sum
);
5728 * Determine if we can grow the recieve socket buffer to avoid sending
5729 * a zero window update to the peer. We allow even socket buffers that
5730 * have fixed size (set by the application) to grow if the resource
5731 * constraints are met. They will also be trimmed after the application
5734 * Similar to tcp_sbrcv_grow_rwin
5737 mptcp_sbrcv_grow_rwin(struct mptcb
*mp_tp
, struct sockbuf
*sb
)
5739 struct socket
*mp_so
= mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
;
5740 u_int32_t rcvbufinc
= mptcp_get_maxseg(mp_tp
->mpt_mpte
) << 4;
5741 u_int32_t rcvbuf
= sb
->sb_hiwat
;
5743 if (tcp_recv_bg
== 1 || IS_TCP_RECV_BG(mp_so
)) {
5747 if (tcp_do_autorcvbuf
== 1 &&
5748 tcp_cansbgrow(sb
) &&
5749 /* Diff to tcp_sbrcv_grow_rwin */
5750 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) == 0 &&
5751 (rcvbuf
- sb
->sb_cc
) < rcvbufinc
&&
5752 rcvbuf
< tcp_autorcvbuf_max
&&
5753 (sb
->sb_idealsize
> 0 &&
5754 sb
->sb_hiwat
<= (sb
->sb_idealsize
+ rcvbufinc
))) {
5755 sbreserve(sb
, min((sb
->sb_hiwat
+ rcvbufinc
), tcp_autorcvbuf_max
));
5759 /* Similar to tcp_sbspace */
5761 mptcp_sbspace(struct mptcb
*mp_tp
)
5763 struct sockbuf
*sb
= &mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
5766 int32_t pending
= 0;
5768 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
5770 mptcp_sbrcv_grow_rwin(mp_tp
, sb
);
5772 /* hiwat might have changed */
5773 rcvbuf
= sb
->sb_hiwat
;
5775 space
= ((int32_t) imin((rcvbuf
- sb
->sb_cc
),
5776 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
5782 /* Compensate for data being processed by content filters */
5783 pending
= cfil_sock_data_space(sb
);
5784 #endif /* CONTENT_FILTER */
5785 if (pending
> space
) {
5795 * Support Fallback to Regular TCP
5798 mptcp_notify_mpready(struct socket
*so
)
5800 struct tcpcb
*tp
= NULL
;
5806 tp
= intotcpcb(sotoinpcb(so
));
5812 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
5813 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5814 struct tcpcb
*, tp
);
5816 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
)) {
5820 if (tp
->t_mpflags
& TMPF_MPTCP_READY
) {
5824 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
5825 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
5827 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5831 mptcp_notify_mpfail(struct socket
*so
)
5833 struct tcpcb
*tp
= NULL
;
5839 tp
= intotcpcb(sotoinpcb(so
));
5845 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
5846 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5847 struct tcpcb
*, tp
);
5849 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
5853 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
| TMPF_MPTCP_TRUE
);
5854 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
5856 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5860 * Keepalive helper function
5863 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
5867 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
5869 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
5876 * MPTCP t_maxseg adjustment function
5879 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
5882 struct mptcb
*mp_tp
= tptomptp(tp
);
5884 #define MPTCP_COMPUTE_LEN { \
5885 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
5886 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
5889 /* adjust to 32-bit boundary + EOL */ \
5892 if (mp_tp
== NULL
) {
5896 socket_lock_assert_owned(mptetoso(mp_tp
->mpt_mpte
));
5899 * For the first subflow and subsequent subflows, adjust mss for
5900 * most common MPTCP option size, for case where tcp_mss is called
5901 * during option processing and MTU discovery.
5904 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
&&
5905 !(tp
->t_mpflags
& TMPF_JOINED_FLOW
)) {
5909 if (tp
->t_mpflags
& TMPF_PREESTABLISHED
&&
5910 tp
->t_mpflags
& TMPF_SENT_JOIN
) {
5914 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
5923 * Update the pid, upid, uuid of the subflow so, based on parent so
5926 mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
)
5928 if (so
->last_pid
!= mp_so
->last_pid
||
5929 so
->last_upid
!= mp_so
->last_upid
) {
5930 so
->last_upid
= mp_so
->last_upid
;
5931 so
->last_pid
= mp_so
->last_pid
;
5932 uuid_copy(so
->last_uuid
, mp_so
->last_uuid
);
5934 so_update_policy(so
);
5938 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
5942 tcp_getconninfo(so
, &flow
->flow_ci
);
5943 inp
= sotoinpcb(so
);
5945 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
5946 flow
->flow_src
.ss_family
= AF_INET6
;
5947 flow
->flow_dst
.ss_family
= AF_INET6
;
5948 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
5949 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
5950 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
5951 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
5952 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
5953 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
5956 if ((inp
->inp_vflag
& INP_IPV4
) != 0) {
5957 flow
->flow_src
.ss_family
= AF_INET
;
5958 flow
->flow_dst
.ss_family
= AF_INET
;
5959 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
5960 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
5961 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
5962 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
5963 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
5964 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
5966 flow
->flow_len
= sizeof(*flow
);
5967 flow
->flow_tcpci_offset
= offsetof(mptcp_flow_t
, flow_ci
);
5968 flow
->flow_flags
= mpts
->mpts_flags
;
5969 flow
->flow_cid
= mpts
->mpts_connid
;
5970 flow
->flow_relseq
= mpts
->mpts_rel_seq
;
5971 flow
->flow_soerror
= mpts
->mpts_socket
->so_error
;
5972 flow
->flow_probecnt
= mpts
->mpts_probecnt
;
5976 mptcp_pcblist SYSCTL_HANDLER_ARGS
5978 #pragma unused(oidp, arg1, arg2)
5982 struct mptses
*mpte
;
5983 struct mptcb
*mp_tp
;
5984 struct mptsub
*mpts
;
5986 conninfo_mptcp_t mptcpci
;
5987 mptcp_flow_t
*flows
= NULL
;
5989 if (req
->newptr
!= USER_ADDR_NULL
) {
5993 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5994 if (req
->oldptr
== USER_ADDR_NULL
) {
5995 size_t n
= mtcbinfo
.mppi_count
;
5996 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5997 req
->oldidx
= (n
+ n
/ 8) * sizeof(conninfo_mptcp_t
) +
5998 4 * (n
+ n
/ 8) * sizeof(mptcp_flow_t
);
6001 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6003 socket_lock(mpp
->mpp_socket
, 1);
6004 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
6005 mpte
= mptompte(mpp
);
6007 socket_lock_assert_owned(mptetoso(mpte
));
6008 mp_tp
= mpte
->mpte_mptcb
;
6010 bzero(&mptcpci
, sizeof(mptcpci
));
6011 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
6012 mptcpci
.mptcpci_flags
= mp_tp
->mpt_flags
;
6013 mptcpci
.mptcpci_ltoken
= mp_tp
->mpt_localtoken
;
6014 mptcpci
.mptcpci_rtoken
= mp_tp
->mpt_remotetoken
;
6015 mptcpci
.mptcpci_notsent_lowat
= mp_tp
->mpt_notsent_lowat
;
6016 mptcpci
.mptcpci_snduna
= mp_tp
->mpt_snduna
;
6017 mptcpci
.mptcpci_sndnxt
= mp_tp
->mpt_sndnxt
;
6018 mptcpci
.mptcpci_sndmax
= mp_tp
->mpt_sndmax
;
6019 mptcpci
.mptcpci_lidsn
= mp_tp
->mpt_local_idsn
;
6020 mptcpci
.mptcpci_sndwnd
= mp_tp
->mpt_sndwnd
;
6021 mptcpci
.mptcpci_rcvnxt
= mp_tp
->mpt_rcvnxt
;
6022 mptcpci
.mptcpci_rcvatmark
= mp_tp
->mpt_rcvnxt
;
6023 mptcpci
.mptcpci_ridsn
= mp_tp
->mpt_remote_idsn
;
6024 mptcpci
.mptcpci_rcvwnd
= mp_tp
->mpt_rcvwnd
;
6026 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
6027 mptcpci
.mptcpci_mpte_flags
= mpte
->mpte_flags
;
6028 mptcpci
.mptcpci_mpte_addrid
= mpte
->mpte_addrid_last
;
6029 mptcpci
.mptcpci_flow_offset
=
6030 offsetof(conninfo_mptcp_t
, mptcpci_flows
);
6032 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
6033 if (mpte
->mpte_numflows
!= 0) {
6034 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
6035 if (flows
== NULL
) {
6036 socket_unlock(mpp
->mpp_socket
, 1);
6039 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
6040 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
6041 error
= SYSCTL_OUT(req
, &mptcpci
,
6042 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
6044 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
6045 error
= SYSCTL_OUT(req
, &mptcpci
, sizeof(mptcpci
));
6048 socket_unlock(mpp
->mpp_socket
, 1);
6049 FREE(flows
, M_TEMP
);
6053 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
6054 so
= mpts
->mpts_socket
;
6055 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
6058 socket_unlock(mpp
->mpp_socket
, 1);
6060 error
= SYSCTL_OUT(req
, flows
, len
);
6061 FREE(flows
, M_TEMP
);
6067 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6072 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
6073 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
6074 "List of active MPTCP connections");
6077 * Set notsent lowat mark on the MPTCB
6080 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
6082 struct mptcb
*mp_tp
= NULL
;
6085 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
) {
6086 mp_tp
= mpte
->mpte_mptcb
;
6090 mp_tp
->mpt_notsent_lowat
= optval
;
6099 mptcp_get_notsent_lowat(struct mptses
*mpte
)
6101 struct mptcb
*mp_tp
= NULL
;
6103 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
) {
6104 mp_tp
= mpte
->mpte_mptcb
;
6108 return mp_tp
->mpt_notsent_lowat
;
6115 mptcp_notsent_lowat_check(struct socket
*so
)
6117 struct mptses
*mpte
;
6119 struct mptcb
*mp_tp
;
6120 struct mptsub
*mpts
;
6124 mpp
= mpsotomppcb(so
);
6125 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
6129 mpte
= mptompte(mpp
);
6130 socket_lock_assert_owned(mptetoso(mpte
));
6131 mp_tp
= mpte
->mpte_mptcb
;
6133 notsent
= so
->so_snd
.sb_cc
;
6135 if ((notsent
== 0) ||
6136 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
6137 mp_tp
->mpt_notsent_lowat
)) {
6138 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
6139 "lowat %d notsent %d actual %d \n",
6140 mp_tp
->mpt_notsent_lowat
, notsent
,
6141 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)),
6142 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
6146 /* When Nagle's algorithm is not disabled, it is better
6147 * to wakeup the client even before there is atleast one
6148 * maxseg of data to write.
6150 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
6152 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
6153 struct socket
*subf_so
= mpts
->mpts_socket
;
6154 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
6156 notsent
= so
->so_snd
.sb_cc
-
6157 (tp
->snd_nxt
- tp
->snd_una
);
6159 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
6160 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
6163 mptcplog((LOG_DEBUG
, "MPTCP Sender: lowat %d notsent %d"
6164 " nodelay false \n",
6165 mp_tp
->mpt_notsent_lowat
, notsent
),
6166 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
6174 mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
6177 #pragma unused(kctlref, sac, unitinfo)
6179 if (OSIncrementAtomic(&mptcp_kern_skt_inuse
) > 0) {
6180 os_log_error(mptcp_log_handle
, "%s: MPTCP kernel-control socket for Symptoms already open!", __func__
);
6183 mptcp_kern_skt_unit
= sac
->sc_unit
;
6189 mptcp_allow_uuid(uuid_t uuid
, int32_t rssi
)
6193 /* Iterate over all MPTCP connections */
6195 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6197 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6198 struct socket
*mp_so
= mpp
->mpp_socket
;
6199 struct mptses
*mpte
= mpp
->mpp_pcbe
;
6201 socket_lock(mp_so
, 1);
6203 if (mp_so
->so_flags
& SOF_DELEGATED
&&
6204 uuid_compare(uuid
, mp_so
->e_uuid
)) {
6206 } else if (!(mp_so
->so_flags
& SOF_DELEGATED
) &&
6207 uuid_compare(uuid
, mp_so
->last_uuid
)) {
6211 os_log(mptcp_log_handle
, "%s - %lx: Got allowance for useApp with rssi %d\n",
6212 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), rssi
);
6214 mpte
->mpte_flags
|= MPTE_ACCESS_GRANTED
;
6216 if (rssi
> MPTCP_TARGET_BASED_RSSI_THRESHOLD
) {
6217 mpte
->mpte_flags
|= MPTE_CELL_PROHIBITED
;
6220 mptcp_check_subflows_and_add(mpte
);
6221 mptcp_remove_subflows(mpte
);
6223 mpte
->mpte_flags
&= ~(MPTE_ACCESS_GRANTED
| MPTE_CELL_PROHIBITED
);
6226 socket_unlock(mp_so
, 1);
6229 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6233 mptcp_wifi_status_changed(void)
6237 /* Iterate over all MPTCP connections */
6239 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
6241 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
6242 struct socket
*mp_so
= mpp
->mpp_socket
;
6243 struct mptses
*mpte
= mpp
->mpp_pcbe
;
6245 socket_lock(mp_so
, 1);
6247 /* Only handover- and urgency-mode are purely driven by Symptom's Wi-Fi status */
6248 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
&&
6249 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_TARGET_BASED
) {
6253 mptcp_check_subflows_and_add(mpte
);
6254 mptcp_check_subflows_and_remove(mpte
);
6257 socket_unlock(mp_so
, 1);
6260 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
6264 mptcp_ask_symptoms(struct mptses
*mpte
)
6266 struct mptcp_symptoms_ask_uuid ask
;
6267 struct socket
*mp_so
;
6271 if (mptcp_kern_skt_unit
== 0) {
6272 os_log_error(mptcp_log_handle
, "%s - %lx: skt_unit is still 0\n",
6273 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
6277 mp_so
= mptetoso(mpte
);
6279 if (mp_so
->so_flags
& SOF_DELEGATED
) {
6282 pid
= mp_so
->last_pid
;
6286 if (p
== PROC_NULL
) {
6287 os_log_error(mptcp_log_handle
, "%s - %lx: Couldn't find proc for pid %u\n",
6288 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), pid
);
6292 ask
.cmd
= MPTCP_SYMPTOMS_ASK_UUID
;
6294 if (mp_so
->so_flags
& SOF_DELEGATED
) {
6295 uuid_copy(ask
.uuid
, mp_so
->e_uuid
);
6297 uuid_copy(ask
.uuid
, mp_so
->last_uuid
);
6300 prio
= proc_get_effective_task_policy(proc_task(p
), TASK_POLICY_ROLE
);
6302 if (prio
== TASK_BACKGROUND_APPLICATION
|| prio
== TASK_NONUI_APPLICATION
||
6303 prio
== TASK_DARWINBG_APPLICATION
) {
6304 ask
.priority
= MPTCP_SYMPTOMS_BACKGROUND
;
6305 } else if (prio
== TASK_FOREGROUND_APPLICATION
) {
6306 ask
.priority
= MPTCP_SYMPTOMS_FOREGROUND
;
6308 ask
.priority
= MPTCP_SYMPTOMS_UNKNOWN
;
6311 err
= ctl_enqueuedata(mptcp_kern_ctrl_ref
, mptcp_kern_skt_unit
,
6312 &ask
, sizeof(ask
), CTL_DATA_EOR
);
6314 os_log(mptcp_log_handle
, "%s - %lx: asked symptoms about pid %u, taskprio %u, prio %u, err %d\n",
6315 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), pid
, prio
, ask
.priority
, err
);
6322 mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
,
6325 #pragma unused(kctlref, kcunit, unitinfo)
6327 OSDecrementAtomic(&mptcp_kern_skt_inuse
);
6333 mptcp_symptoms_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
6334 mbuf_t m
, int flags
)
6336 #pragma unused(kctlref, unitinfo, flags)
6337 symptoms_advisory_t
*sa
= NULL
;
6339 if (kcunit
!= mptcp_kern_skt_unit
) {
6340 os_log_error(mptcp_log_handle
, "%s: kcunit %u is different from expected one %u\n",
6341 __func__
, kcunit
, mptcp_kern_skt_unit
);
6344 if (mbuf_pkthdr_len(m
) < sizeof(*sa
)) {
6349 if (mbuf_len(m
) < sizeof(*sa
)) {
6350 os_log_error(mptcp_log_handle
, "%s: mbuf is %lu but need %lu\n",
6351 __func__
, mbuf_len(m
), sizeof(*sa
));
6358 if (sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_USEAPP
) {
6359 os_log(mptcp_log_handle
, "%s: wifi new,old: %d,%d, cell new, old: %d,%d\n", __func__
,
6360 sa
->sa_wifi_status
, mptcp_advisory
.sa_wifi_status
,
6361 sa
->sa_cell_status
, mptcp_advisory
.sa_cell_status
);
6363 if (sa
->sa_wifi_status
!= mptcp_advisory
.sa_wifi_status
) {
6364 mptcp_advisory
.sa_wifi_status
= sa
->sa_wifi_status
;
6365 mptcp_wifi_status_changed();
6368 struct mptcp_symptoms_answer answer
;
6371 /* We temporarily allow different sizes for ease of submission */
6372 if (mbuf_len(m
) != sizeof(uuid_t
) + sizeof(*sa
) &&
6373 mbuf_len(m
) != sizeof(answer
)) {
6374 os_log_error(mptcp_log_handle
, "%s: mbuf is %lu but need %lu or %lu\n",
6375 __func__
, mbuf_len(m
), sizeof(uuid_t
) + sizeof(*sa
),
6381 memset(&answer
, 0, sizeof(answer
));
6383 err
= mbuf_copydata(m
, 0, mbuf_len(m
), &answer
);
6385 os_log_error(mptcp_log_handle
, "%s: mbuf_copydata returned %d\n", __func__
, err
);
6390 mptcp_allow_uuid(answer
.uuid
, answer
.rssi
);
6398 mptcp_control_register(void)
6400 /* Set up the advisory control socket */
6401 struct kern_ctl_reg mptcp_kern_ctl
;
6403 bzero(&mptcp_kern_ctl
, sizeof(mptcp_kern_ctl
));
6404 strlcpy(mptcp_kern_ctl
.ctl_name
, MPTCP_KERN_CTL_NAME
,
6405 sizeof(mptcp_kern_ctl
.ctl_name
));
6406 mptcp_kern_ctl
.ctl_connect
= mptcp_symptoms_ctl_connect
;
6407 mptcp_kern_ctl
.ctl_disconnect
= mptcp_symptoms_ctl_disconnect
;
6408 mptcp_kern_ctl
.ctl_send
= mptcp_symptoms_ctl_send
;
6409 mptcp_kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
;
6411 (void)ctl_register(&mptcp_kern_ctl
, &mptcp_kern_ctrl_ref
);
6415 * Three return-values:
6418 * -1 : WiFi-state is unknown
6421 mptcp_is_wifi_unusable_for_session(struct mptses
*mpte
)
6423 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6424 if (mptcp_advisory
.sa_wifi_status
) {
6425 return symptoms_is_wifi_lossy() ? 1 : 0;
6429 * If it's a first-party app and we don't have any info
6430 * about the Wi-Fi state, let's be pessimistic.
6434 if (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
) {
6439 * If we are target-based (meaning, we allow to be more lax on
6440 * the "unusable" target. We only *know* about the state once
6441 * we got the allowance from Symptoms (MPTE_ACCESS_GRANTED).
6443 * If RSSI is not bad enough, MPTE_CELL_PROHIBITED will then
6446 * In any other case (while in target-mode), consider WiFi bad
6447 * and we are going to ask for allowance from Symptoms anyway.
6449 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_TARGET_BASED
) {
6450 if (mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
&&
6451 mpte
->mpte_flags
& MPTE_CELL_PROHIBITED
) {
6463 symptoms_is_wifi_lossy(void)
6465 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_OK
) ? false : true;
6468 /* If TFO data is succesfully acked, it must be dropped from the mptcp so */
6470 mptcp_drop_tfo_data(struct mptses
*mpte
, struct mptsub
*mpts
)
6472 struct socket
*mp_so
= mptetoso(mpte
);
6473 struct socket
*so
= mpts
->mpts_socket
;
6474 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
6475 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
6477 /* If data was sent with SYN, rewind state */
6478 if (tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) {
6479 u_int64_t mp_droplen
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
6480 unsigned int tcp_droplen
= tp
->snd_una
- tp
->iss
- 1;
6482 VERIFY(mp_droplen
<= (UINT_MAX
));
6483 VERIFY(mp_droplen
>= tcp_droplen
);
6485 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
6486 mpts
->mpts_iss
+= tcp_droplen
;
6487 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
6489 if (mp_droplen
> tcp_droplen
) {
6490 /* handle partial TCP ack */
6491 mp_so
->so_flags1
|= SOF1_TFO_REWIND
;
6492 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
+ (mp_droplen
- tcp_droplen
);
6493 mp_droplen
= tcp_droplen
;
6495 /* all data on SYN was acked */
6496 mpts
->mpts_rel_seq
= 1;
6497 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
6499 mp_tp
->mpt_sndmax
-= tcp_droplen
;
6501 if (mp_droplen
!= 0) {
6502 VERIFY(mp_so
->so_snd
.sb_mb
!= NULL
);
6503 sbdrop(&mp_so
->so_snd
, (int)mp_droplen
);
6509 mptcp_freeq(struct mptcb
*mp_tp
)
6511 struct tseg_qent
*q
;
6514 while ((q
= LIST_FIRST(&mp_tp
->mpt_segq
)) != NULL
) {
6515 LIST_REMOVE(q
, tqe_q
);
6517 zfree(tcp_reass_zone
, q
);
6520 mp_tp
->mpt_reassqlen
= 0;
6525 mptcp_post_event(u_int32_t event_code
, int value
)
6527 struct kev_mptcp_data event_data
;
6528 struct kev_msg ev_msg
;
6530 memset(&ev_msg
, 0, sizeof(ev_msg
));
6532 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
6533 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
6534 ev_msg
.kev_subclass
= KEV_MPTCP_SUBCLASS
;
6535 ev_msg
.event_code
= event_code
;
6537 event_data
.value
= value
;
6539 ev_msg
.dv
[0].data_ptr
= &event_data
;
6540 ev_msg
.dv
[0].data_length
= sizeof(event_data
);
6542 return kev_post_msg(&ev_msg
);
6546 mptcp_set_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
)
6548 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
6551 /* First-party apps (Siri) don't flip the cellicon */
6552 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6556 /* Subflow is disappearing - don't set it on this one */
6557 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
)) {
6561 /* Fallen back connections are not triggering the cellicon */
6562 if (mpte
->mpte_mptcb
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
6566 /* Remember the last time we set the cellicon. Needed for debouncing */
6567 mpte
->mpte_last_cellicon_set
= tcp_now
;
6569 tp
->t_timer
[TCPT_CELLICON
] = OFFSET_FROM_START(tp
, MPTCP_CELLICON_TOGGLE_RATE
);
6570 tcp_sched_timers(tp
);
6572 if (mpts
->mpts_flags
& MPTSF_CELLICON_SET
&&
6573 mpte
->mpte_cellicon_increments
!= 0) {
6574 if (mptcp_cellicon_refcount
== 0) {
6575 os_log_error(mptcp_log_handle
, "%s - %lx: Cell should be set (count is %u), but it's zero!\n",
6576 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6578 /* Continue, so that the icon gets set... */
6581 * In this case, the cellicon is already set. No need to bump it
6589 /* When tearing down this subflow, we need to decrement the
6592 mpts
->mpts_flags
|= MPTSF_CELLICON_SET
;
6594 /* This counter, so that when a session gets destroyed we decrement
6595 * the reference counter by whatever is left
6597 mpte
->mpte_cellicon_increments
++;
6599 if (OSIncrementAtomic(&mptcp_cellicon_refcount
)) {
6600 /* If cellicon is already set, get out of here! */
6604 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 1);
6607 os_log_error(mptcp_log_handle
, "%s - %lx: Setting cellicon failed with %d\n",
6608 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
6610 os_log(mptcp_log_handle
, "%s - %lx: successfully set the cellicon\n",
6611 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
6616 mptcp_clear_cellicon(void)
6618 int error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 0);
6621 os_log_error(mptcp_log_handle
, "%s: Unsetting cellicon failed with %d\n",
6624 os_log(mptcp_log_handle
, "%s: successfully unset the cellicon\n",
6630 * Returns true if the icon has been flipped to WiFi.
6633 __mptcp_unset_cellicon(long val
)
6635 if (OSAddAtomic(-val
, &mptcp_cellicon_refcount
) != 1) {
6639 mptcp_clear_cellicon();
6645 mptcp_unset_cellicon(struct mptses
*mpte
, struct mptsub
*mpts
, uint32_t val
)
6647 /* First-party apps (Siri) don't flip the cellicon */
6648 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
6652 if (mpte
->mpte_cellicon_increments
== 0) {
6653 /* This flow never used cell - get out of here! */
6657 if (mptcp_cellicon_refcount
== 0) {
6658 os_log_error(mptcp_log_handle
, "%s - %lx: Cell is off, but should be at least %u\n",
6659 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6665 if (!(mpts
->mpts_flags
& MPTSF_CELLICON_SET
)) {
6669 mpts
->mpts_flags
&= ~MPTSF_CELLICON_SET
;
6672 if (mpte
->mpte_cellicon_increments
< val
) {
6673 os_log_error(mptcp_log_handle
, "%s - %lx: Increments is %u but want to dec by %u.\n",
6674 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
, val
);
6675 val
= mpte
->mpte_cellicon_increments
;
6678 mpte
->mpte_cellicon_increments
-= val
;
6680 if (__mptcp_unset_cellicon(val
) == false) {
6684 /* All flows are gone - our counter should be at zero too! */
6685 if (mpte
->mpte_cellicon_increments
!= 0) {
6686 os_log_error(mptcp_log_handle
, "%s - %lx: Inconsistent state! Cell refcount is zero but increments are at %u\n",
6687 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_cellicon_increments
);
6692 mptcp_reset_rexmit_state(struct tcpcb
*tp
)
6694 struct mptsub
*mpts
;
6703 so
= inp
->inp_socket
;
6708 if (!(so
->so_flags
& SOF_MP_SUBFLOW
)) {
6714 mpts
->mpts_flags
&= ~MPTSF_WRITE_STALL
;
6715 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
6719 mptcp_reset_keepalive(struct tcpcb
*tp
)
6721 struct mptsub
*mpts
= tp
->t_mpsub
;
6723 mpts
->mpts_flags
&= ~MPTSF_READ_STALL
;