2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/locks.h>
30 #include <kern/policy_internal.h>
31 #include <kern/zalloc.h>
35 #include <sys/domain.h>
36 #include <sys/kdebug.h>
37 #include <sys/kern_control.h>
38 #include <sys/kernel.h>
40 #include <sys/mcache.h>
41 #include <sys/param.h>
43 #include <sys/protosw.h>
44 #include <sys/resourcevar.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/systm.h>
51 #include <net/content_filter.h>
53 #include <net/if_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/in_var.h>
57 #include <netinet/tcp.h>
58 #include <netinet/tcp_fsm.h>
59 #include <netinet/tcp_seq.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/mptcp_var.h>
62 #include <netinet/mptcp.h>
63 #include <netinet/mptcp_opt.h>
64 #include <netinet/mptcp_seq.h>
65 #include <netinet/mptcp_timer.h>
66 #include <libkern/crypto/sha1.h>
68 #include <netinet6/in6_pcb.h>
69 #include <netinet6/ip6protosw.h>
71 #include <dev/random/randomdev.h>
74 * Notes on MPTCP implementation.
76 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
77 * communication domain. The structure mtcbinfo describes the MPTCP instance
78 * of a Multipath protocol in that domain. It is used to keep track of all
79 * MPTCP PCB instances in the system, and is protected by the global lock
82 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
83 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
84 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
85 * allocated from the same memory block, and each structure has a pointer
86 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
87 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
88 * PCB (mppcb) as well as the MPTCP Session (mptses).
90 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
92 * A functioning MPTCP Session consists of one or more subflow sockets. Each
93 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
94 * represented by the mptsub structure. Because each subflow requires access
95 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
96 * subflow. This gets decremented prior to the subflow's destruction.
98 * To handle events (read, write, control) from the subflows, we do direct
99 * upcalls into the specific function.
101 * The whole MPTCP connection is protected by a single lock, the MPTCP socket's
102 * lock. Incoming data on a subflow also ends up taking this single lock. To
103 * achieve the latter, tcp_lock/unlock has been changed to rather use the lock
104 * of the MPTCP-socket.
106 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
107 * work is done by the MPTCP garbage collector which is invoked on demand by
108 * the PF_MULTIPATH garbage collector. This process will take place once all
109 * of the subflows have been destroyed.
112 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
113 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
115 static uint32_t mptcp_gc(struct mppcbinfo
*);
116 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
117 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
118 static int mptcp_subflow_sosend(struct socket
*, struct sockaddr
*,
119 struct uio
*, struct mbuf
*, struct mbuf
*, int);
120 static void mptcp_subflow_rupcall(struct socket
*, void *, int);
121 static void mptcp_subflow_input(struct mptses
*, struct mptsub
*);
122 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
123 static void mptcp_subflow_eupcall1(struct socket
*, void *, uint32_t);
124 static void mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
);
125 static void mptcp_drop_tfo_data(struct mptses
*, struct mptsub
*);
127 static void mptcp_subflow_abort(struct mptsub
*, int);
129 static void mptcp_send_dfin(struct socket
*so
);
132 * Possible return values for subflow event handlers. Note that success
133 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
134 * indicate errors or actions which require immediate attention; they will
135 * prevent the rest of the handlers from processing their respective events
136 * until the next round of events processing.
139 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
140 MPTS_EVRET_OK
= 2, /* OK */
141 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
142 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
145 static ev_ret_t
mptcp_subflow_events(struct mptses
*, struct mptsub
*, uint64_t *);
146 static ev_ret_t
mptcp_subflow_propagate_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
147 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
148 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
149 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
150 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
151 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
152 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
153 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
154 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
155 static ev_ret_t
mptcp_subflow_adaptive_rtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
156 static ev_ret_t
mptcp_subflow_adaptive_wtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
158 static const char *mptcp_evret2str(ev_ret_t
);
160 static void mptcp_do_sha1(mptcp_key_t
*, char *);
161 static void mptcp_init_local_parms(struct mptses
*);
163 static unsigned int mptsub_zone_size
; /* size of mptsub */
164 static struct zone
*mptsub_zone
; /* zone for mptsub */
166 static unsigned int mptopt_zone_size
; /* size of mptopt */
167 static struct zone
*mptopt_zone
; /* zone for mptopt */
169 static unsigned int mpt_subauth_entry_size
; /* size of subf auth entry */
170 static struct zone
*mpt_subauth_zone
; /* zone of subf auth entry */
172 struct mppcbinfo mtcbinfo
;
174 #define MPTCP_SUBFLOW_WRITELEN (8 * 1024) /* bytes to write each time */
175 #define MPTCP_SUBFLOW_READLEN (8 * 1024) /* bytes to read each time */
177 SYSCTL_DECL(_net_inet
);
179 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "MPTCP");
181 uint32_t mptcp_dbg_area
= 31; /* more noise if greater than 1 */
182 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, dbg_area
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
183 &mptcp_dbg_area
, 0, "MPTCP debug area");
185 uint32_t mptcp_dbg_level
= 1;
186 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, dbg_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
187 &mptcp_dbg_level
, 0, "MPTCP debug level");
189 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
190 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
192 static struct protosw mptcp_subflow_protosw
;
193 static struct pr_usrreqs mptcp_subflow_usrreqs
;
195 static struct ip6protosw mptcp_subflow_protosw6
;
196 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
199 static uint8_t mptcp_create_subflows_scheduled
;
201 typedef struct mptcp_subflow_event_entry
{
202 uint64_t sofilt_hint_mask
;
203 ev_ret_t (*sofilt_hint_ev_hdlr
)(
206 uint64_t *p_mpsofilt_hint
,
210 static uint8_t mptcp_cellicon_is_set
;
211 static uint32_t mptcp_last_cellicon_set
;
212 #define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */
215 * XXX The order of the event handlers below is really
216 * really important. Think twice before changing it.
218 static mptsub_ev_entry_t mpsub_ev_entry_tbl
[] = {
220 .sofilt_hint_mask
= SO_FILT_HINT_MPCANTRCVMORE
,
221 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpcantrcvmore_ev
,
224 .sofilt_hint_mask
= SO_FILT_HINT_MPFAILOVER
,
225 .sofilt_hint_ev_hdlr
= mptcp_subflow_failover_ev
,
228 .sofilt_hint_mask
= SO_FILT_HINT_CONNRESET
,
229 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
232 .sofilt_hint_mask
= SO_FILT_HINT_MUSTRST
,
233 .sofilt_hint_ev_hdlr
= mptcp_subflow_mustrst_ev
,
236 .sofilt_hint_mask
= SO_FILT_HINT_CANTRCVMORE
,
237 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
240 .sofilt_hint_mask
= SO_FILT_HINT_TIMEOUT
,
241 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
244 .sofilt_hint_mask
= SO_FILT_HINT_NOSRCADDR
,
245 .sofilt_hint_ev_hdlr
= mptcp_subflow_nosrcaddr_ev
,
248 .sofilt_hint_mask
= SO_FILT_HINT_IFDENIED
,
249 .sofilt_hint_ev_hdlr
= mptcp_subflow_ifdenied_ev
,
252 .sofilt_hint_mask
= SO_FILT_HINT_CONNECTED
,
253 .sofilt_hint_ev_hdlr
= mptcp_subflow_connected_ev
,
256 .sofilt_hint_mask
= SO_FILT_HINT_MPSTATUS
,
257 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpstatus_ev
,
260 .sofilt_hint_mask
= SO_FILT_HINT_DISCONNECTED
,
261 .sofilt_hint_ev_hdlr
= mptcp_subflow_disconnected_ev
,
264 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_RTIMO
,
265 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_rtimo_ev
,
268 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_WTIMO
,
269 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_wtimo_ev
,
274 * Protocol pr_init callback.
277 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
280 static int mptcp_initialized
= 0;
283 struct ip6protosw
*prp6
;
286 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
|PR_ATTACHED
)) == PR_ATTACHED
);
288 /* do this only once */
289 if (mptcp_initialized
)
291 mptcp_initialized
= 1;
294 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
295 * we must be able to find IPPROTO_TCP entries for both.
297 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
299 bcopy(prp
, &mptcp_subflow_protosw
, sizeof (*prp
));
300 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
301 sizeof (mptcp_subflow_usrreqs
));
302 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
303 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
304 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
305 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
306 mptcp_subflow_usrreqs
.pru_sosend
= mptcp_subflow_sosend
;
307 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
309 * Socket filters shouldn't attach/detach to/from this protosw
310 * since pr_protosw is to be used instead, which points to the
311 * real protocol; if they do, it is a bug and we should panic.
313 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
314 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
315 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
316 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
319 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
320 IPPROTO_TCP
, SOCK_STREAM
);
321 VERIFY(prp6
!= NULL
);
322 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof (*prp6
));
323 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
324 sizeof (mptcp_subflow_usrreqs6
));
325 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
326 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
327 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
328 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
329 mptcp_subflow_usrreqs6
.pru_sosend
= mptcp_subflow_sosend
;
330 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
332 * Socket filters shouldn't attach/detach to/from this protosw
333 * since pr_protosw is to be used instead, which points to the
334 * real protocol; if they do, it is a bug and we should panic.
336 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
337 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
338 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
339 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
342 bzero(&mtcbinfo
, sizeof (mtcbinfo
));
343 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
344 mtcbinfo
.mppi_size
= sizeof (struct mpp_mtp
);
345 if ((mtcbinfo
.mppi_zone
= zinit(mtcbinfo
.mppi_size
,
346 1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) {
347 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
);
350 zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
);
351 zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
);
353 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
354 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
355 mtcbinfo
.mppi_lock_grp_attr
);
356 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
357 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
358 mtcbinfo
.mppi_lock_attr
);
360 mtcbinfo
.mppi_gc
= mptcp_gc
;
361 mtcbinfo
.mppi_timer
= mptcp_timer
;
363 /* attach to MP domain for garbage collection to take place */
364 mp_pcbinfo_attach(&mtcbinfo
);
366 mptsub_zone_size
= sizeof (struct mptsub
);
367 if ((mptsub_zone
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
,
368 8192, "mptsub")) == NULL
) {
369 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
);
372 zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
);
373 zone_change(mptsub_zone
, Z_EXPAND
, TRUE
);
375 mptopt_zone_size
= sizeof (struct mptopt
);
376 if ((mptopt_zone
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
,
377 1024, "mptopt")) == NULL
) {
378 panic("%s: unable to allocate MPTCP option zone\n", __func__
);
381 zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
);
382 zone_change(mptopt_zone
, Z_EXPAND
, TRUE
);
384 mpt_subauth_entry_size
= sizeof (struct mptcp_subf_auth_entry
);
385 if ((mpt_subauth_zone
= zinit(mpt_subauth_entry_size
,
386 1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) {
387 panic("%s: unable to allocate MPTCP address auth zone \n",
391 zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
);
392 zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
);
394 mptcp_last_cellicon_set
= tcp_now
;
398 mptcp_get_statsindex(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
400 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
405 mptcplog((LOG_ERR
, "%s: no ifp on subflow\n", __func__
),
406 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
410 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
411 if (stats
[i
].ifindex
== IFSCOPE_NONE
) {
417 if (stats
[i
].ifindex
== ifp
->if_index
) {
424 stats
[index
].ifindex
= ifp
->if_index
;
425 if (stats
[index
].is_expensive
== 0)
426 stats
[index
].is_expensive
= IFNET_IS_CELLULAR(ifp
);
433 mptcpstats_inc_switch(struct mptses
*mpte
, const struct mptsub
*mpts
)
437 tcpstat
.tcps_mp_switches
++;
438 mpte
->mpte_subflow_switches
++;
440 index
= mptcp_get_statsindex(mpte
->mpte_itfstats
, mpts
);
443 mpte
->mpte_itfstats
[index
].switches
++;
447 * Flushes all recorded socket options from an MP socket.
450 mptcp_flush_sopts(struct mptses
*mpte
)
452 struct mptopt
*mpo
, *tmpo
;
454 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
455 mptcp_sopt_remove(mpte
, mpo
);
456 mptcp_sopt_free(mpo
);
458 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
462 * Create an MPTCP session, called as a result of opening a MPTCP socket.
465 mptcp_sescreate(struct mppcb
*mpp
)
467 struct mppcbinfo
*mppi
;
472 mppi
= mpp
->mpp_pcbinfo
;
473 VERIFY(mppi
!= NULL
);
475 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
476 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
478 /* MPTCP Multipath PCB Extension */
479 bzero(mpte
, sizeof (*mpte
));
480 VERIFY(mpp
->mpp_pcbe
== NULL
);
481 mpp
->mpp_pcbe
= mpte
;
482 mpte
->mpte_mppcb
= mpp
;
483 mpte
->mpte_mptcb
= mp_tp
;
485 TAILQ_INIT(&mpte
->mpte_sopts
);
486 TAILQ_INIT(&mpte
->mpte_subflows
);
487 mpte
->mpte_associd
= SAE_ASSOCID_ANY
;
488 mpte
->mpte_connid_last
= SAE_CONNID_ANY
;
490 mpte
->mpte_itfinfo
= &mpte
->_mpte_itfinfo
[0];
491 mpte
->mpte_itfinfo_size
= MPTE_ITFINFO_SIZE
;
493 /* MPTCP Protocol Control Block */
494 bzero(mp_tp
, sizeof (*mp_tp
));
495 mp_tp
->mpt_mpte
= mpte
;
496 mp_tp
->mpt_state
= MPTCPS_CLOSED
;
498 DTRACE_MPTCP1(session__create
, struct mppcb
*, mpp
);
504 mptcpstats_get_bytes(struct mptses
*mpte
, boolean_t initial_cell
,
505 uint64_t *cellbytes
, uint64_t *allbytes
)
507 int64_t mycellbytes
= 0;
508 uint64_t myallbytes
= 0;
511 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
512 if (mpte
->mpte_itfstats
[i
].is_expensive
) {
513 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
514 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
517 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
518 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
522 mycellbytes
-= mpte
->mpte_init_txbytes
;
523 mycellbytes
-= mpte
->mpte_init_txbytes
;
526 if (mycellbytes
< 0) {
527 mptcplog((LOG_ERR
, "%s cellbytes is %d\n", __func__
, mycellbytes
),
528 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
532 *cellbytes
= mycellbytes
;
533 *allbytes
= myallbytes
;
538 mptcpstats_session_wrapup(struct mptses
*mpte
)
540 boolean_t cell
= mpte
->mpte_initial_cell
;
542 switch (mpte
->mpte_svctype
) {
543 case MPTCP_SVCTYPE_HANDOVER
:
544 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
545 tcpstat
.tcps_mptcp_fp_handover_attempt
++;
547 if (cell
&& mpte
->mpte_handshake_success
) {
548 tcpstat
.tcps_mptcp_fp_handover_success_cell
++;
550 if (mpte
->mpte_used_wifi
)
551 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
552 } else if (mpte
->mpte_handshake_success
) {
553 tcpstat
.tcps_mptcp_fp_handover_success_wifi
++;
555 if (mpte
->mpte_used_cell
)
556 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
559 tcpstat
.tcps_mptcp_handover_attempt
++;
561 if (cell
&& mpte
->mpte_handshake_success
) {
562 tcpstat
.tcps_mptcp_handover_success_cell
++;
564 if (mpte
->mpte_used_wifi
)
565 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
566 } else if (mpte
->mpte_handshake_success
) {
567 tcpstat
.tcps_mptcp_handover_success_wifi
++;
569 if (mpte
->mpte_used_cell
)
570 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
574 if (mpte
->mpte_handshake_success
) {
578 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
580 tcpstat
.tcps_mptcp_handover_cell_bytes
+= cellbytes
;
581 tcpstat
.tcps_mptcp_handover_all_bytes
+= allbytes
;
584 case MPTCP_SVCTYPE_INTERACTIVE
:
585 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
586 tcpstat
.tcps_mptcp_fp_interactive_attempt
++;
588 if (mpte
->mpte_handshake_success
) {
589 tcpstat
.tcps_mptcp_fp_interactive_success
++;
591 if (!cell
&& mpte
->mpte_used_cell
)
592 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
595 tcpstat
.tcps_mptcp_interactive_attempt
++;
597 if (mpte
->mpte_handshake_success
) {
598 tcpstat
.tcps_mptcp_interactive_success
++;
600 if (!cell
&& mpte
->mpte_used_cell
)
601 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
605 if (mpte
->mpte_handshake_success
) {
609 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
611 tcpstat
.tcps_mptcp_interactive_cell_bytes
+= cellbytes
;
612 tcpstat
.tcps_mptcp_interactive_all_bytes
+= allbytes
;
615 case MPTCP_SVCTYPE_AGGREGATE
:
616 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
617 tcpstat
.tcps_mptcp_fp_aggregate_attempt
++;
619 if (mpte
->mpte_handshake_success
)
620 tcpstat
.tcps_mptcp_fp_aggregate_success
++;
622 tcpstat
.tcps_mptcp_aggregate_attempt
++;
624 if (mpte
->mpte_handshake_success
) {
625 tcpstat
.tcps_mptcp_aggregate_success
++;
629 if (mpte
->mpte_handshake_success
) {
633 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
635 tcpstat
.tcps_mptcp_aggregate_cell_bytes
+= cellbytes
;
636 tcpstat
.tcps_mptcp_aggregate_all_bytes
+= allbytes
;
641 if (cell
&& mpte
->mpte_handshake_success
&& mpte
->mpte_used_wifi
)
642 tcpstat
.tcps_mptcp_back_to_wifi
++;
646 * Destroy an MPTCP session.
649 mptcp_session_destroy(struct mptses
*mpte
)
653 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
655 mp_tp
= mpte
->mpte_mptcb
;
656 VERIFY(mp_tp
!= NULL
);
658 mptcpstats_session_wrapup(mpte
);
660 mptcp_unset_cellicon();
663 * MPTCP Multipath PCB Extension section
665 mptcp_flush_sopts(mpte
);
666 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
668 if (mpte
->mpte_itfinfo_size
> MPTE_ITFINFO_SIZE
)
669 _FREE(mpte
->mpte_itfinfo
, M_TEMP
);
671 mpte
->mpte_itfinfo
= NULL
;
673 m_freem_list(mpte
->mpte_reinjectq
);
676 * MPTCP Protocol Control Block section
678 DTRACE_MPTCP2(session__destroy
, struct mptses
*, mpte
,
679 struct mptcb
*, mp_tp
);
683 mptcp_ok_to_create_subflows(struct mptcb
*mp_tp
)
685 return (mp_tp
->mpt_state
>= MPTCPS_ESTABLISHED
&&
686 mp_tp
->mpt_state
< MPTCPS_TIME_WAIT
&&
687 !(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
));
691 mptcp_synthesize_nat64(struct in6_addr
*addr
, uint32_t len
, struct in_addr
*addrv4
)
693 static const struct in6_addr well_known_prefix
= {
694 .__u6_addr
.__u6_addr8
= {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
695 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
696 0x00, 0x00, 0x00, 0x00},
698 char buf
[MAX_IPv6_STR_LEN
];
699 char *ptrv4
= (char *)addrv4
;
700 char *ptr
= (char *)addr
;
702 if (IN_ZERONET(addrv4
->s_addr
) || // 0.0.0.0/8 Source hosts on local network
703 IN_LOOPBACK(addrv4
->s_addr
) || // 127.0.0.0/8 Loopback
704 IN_LINKLOCAL(addrv4
->s_addr
) || // 169.254.0.0/16 Link Local
705 IN_DS_LITE(addrv4
->s_addr
) || // 192.0.0.0/29 DS-Lite
706 IN_6TO4_RELAY_ANYCAST(addrv4
->s_addr
) || // 192.88.99.0/24 6to4 Relay Anycast
707 IN_MULTICAST(addrv4
->s_addr
) || // 224.0.0.0/4 Multicast
708 INADDR_BROADCAST
== addrv4
->s_addr
) { // 255.255.255.255/32 Limited Broadcast
712 /* Check for the well-known prefix */
713 if (len
== NAT64_PREFIX_LEN_96
&&
714 IN6_ARE_ADDR_EQUAL(addr
, &well_known_prefix
)) {
715 if (IN_PRIVATE(addrv4
->s_addr
) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use
716 IN_SHARED_ADDRESS_SPACE(addrv4
->s_addr
)) // 100.64.0.0/10 Shared Address Space
721 case NAT64_PREFIX_LEN_96
:
722 memcpy(ptr
+ 12, ptrv4
, 4);
724 case NAT64_PREFIX_LEN_64
:
725 memcpy(ptr
+ 9, ptrv4
, 4);
727 case NAT64_PREFIX_LEN_56
:
728 memcpy(ptr
+ 7, ptrv4
, 1);
729 memcpy(ptr
+ 9, ptrv4
+ 1, 3);
731 case NAT64_PREFIX_LEN_48
:
732 memcpy(ptr
+ 6, ptrv4
, 2);
733 memcpy(ptr
+ 9, ptrv4
+ 2, 2);
735 case NAT64_PREFIX_LEN_40
:
736 memcpy(ptr
+ 5, ptrv4
, 3);
737 memcpy(ptr
+ 9, ptrv4
+ 3, 1);
739 case NAT64_PREFIX_LEN_32
:
740 memcpy(ptr
+ 4, ptrv4
, 4);
743 panic("NAT64-prefix len is wrong: %u\n", len
);
746 mptcplog((LOG_DEBUG
, "%s: nat64prefix-len %u synthesized %s\n", __func__
,
747 len
, inet_ntop(AF_INET6
, (void *)addr
, buf
, sizeof(buf
))),
748 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
754 mptcp_check_subflows_and_add(struct mptses
*mpte
)
756 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
759 if (!mptcp_ok_to_create_subflows(mp_tp
))
762 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
763 struct mpt_itf_info
*info
;
768 info
= &mpte
->mpte_itfinfo
[i
];
770 if (info
->no_mptcp_support
)
773 ifindex
= info
->ifindex
;
774 if (ifindex
== IFSCOPE_NONE
)
777 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
778 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
783 if (ifp
->if_index
== ifindex
&&
784 !(mpts
->mpts_socket
->so_state
& SS_ISDISCONNECTED
)) {
786 * We found a subflow on this interface.
787 * No need to create a new one.
794 * In Handover mode, only create cell subflow if
795 * 1. Wi-Fi Assist is active
796 * 2. Symptoms marked WiFi as weak
797 * 3. We are experiencing RTOs or we are not sending data.
799 * This covers the scenario, where:
800 * 1. We send and get retransmission timeouts (thus,
801 * we confirmed that WiFi is indeed bad).
802 * 2. We are not sending and the server tries to send.
803 * Establshing a cell-subflow gives the server a
804 * chance to send us some data over cell if WiFi
805 * is dead. We establish the subflow with the
806 * backup-bit set, so the server is not allowed to
807 * send on this subflow as long as WiFi is providing
810 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
&&
811 !IFNET_IS_CELLULAR(ifp
) &&
812 !(mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
| MPTSF_CLOSE_REQD
)) &&
813 (!mptcp_is_wifi_unusable() ||
814 (sototcpcb(mpts
->mpts_socket
)->t_rxtshift
< mptcp_fail_thresh
&&
815 mptetoso(mpte
)->so_snd
.sb_cc
))) {
816 mptcplog((LOG_DEBUG
, "%s handover, wifi state %u rxt %u ifindex %u this %u\n",
817 __func__
, mptcp_is_wifi_unusable(), sototcpcb(mpts
->mpts_socket
)->t_rxtshift
, ifindex
,
819 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
825 if (!found
&& !(mpte
->mpte_flags
& MPTE_FIRSTPARTY
) &&
826 !(mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
) &&
827 mptcp_developer_mode
== 0) {
828 mptcp_ask_symptoms(mpte
);
833 struct sockaddr
*dst
= &mpte
->mpte_dst
;
834 struct sockaddr_in6 nat64pre
;
836 if (mpte
->mpte_dst
.sa_family
== AF_INET
&&
837 !info
->has_v4_conn
&& info
->has_v6_conn
) {
838 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
842 bzero(&nat64pre
, sizeof(struct sockaddr_in6
));
844 ifnet_head_lock_shared();
845 ifp
= ifindex2ifnet
[ifindex
];
848 error
= ifnet_get_nat64prefix(ifp
, nat64prefixes
);
850 mptcplog((LOG_ERR
, "%s: no NAT64-prefix on itf %s, error %d\n",
851 __func__
, ifp
->if_name
, error
),
852 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
856 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
857 if (nat64prefixes
[j
].prefix_len
!= 0)
861 VERIFY(j
< NAT64_MAX_NUM_PREFIXES
);
863 error
= mptcp_synthesize_nat64(&nat64prefixes
[j
].ipv6_prefix
,
864 nat64prefixes
[j
].prefix_len
,
865 &mpte
->__mpte_dst_v4
.sin_addr
);
867 mptcplog((LOG_INFO
, "%s: cannot synthesize this addr\n", __func__
),
868 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
872 memcpy(&nat64pre
.sin6_addr
,
873 &nat64prefixes
[j
].ipv6_prefix
,
874 sizeof(nat64pre
.sin6_addr
));
875 nat64pre
.sin6_len
= sizeof(struct sockaddr_in6
);
876 nat64pre
.sin6_family
= AF_INET6
;
877 nat64pre
.sin6_port
= mpte
->__mpte_dst_v6
.sin6_port
;
878 nat64pre
.sin6_flowinfo
= 0;
879 nat64pre
.sin6_scope_id
= 0;
881 dst
= (struct sockaddr
*)&nat64pre
;
884 mptcp_subflow_add(mpte
, NULL
, dst
, ifindex
, NULL
);
890 * Based on the MPTCP Service-type and the state of the subflows, we
891 * will destroy subflows here.
894 mptcp_check_subflows_and_remove(struct mptses
*mpte
)
896 struct mptsub
*mpts
, *tmpts
;
897 int found_working_subflow
= 0, removed_some
= 0;
898 int wifi_unusable
= mptcp_is_wifi_unusable();
900 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
)
904 * Look for a subflow that is on a non-cellular interface
905 * and actually works (aka, no retransmission timeout).
907 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
908 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
912 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
))
915 so
= mpts
->mpts_socket
;
918 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
) ||
919 tp
->t_state
!= TCPS_ESTABLISHED
)
922 /* Either this subflow is in good condition while we try to send */
923 if (tp
->t_rxtshift
== 0 && mptetoso(mpte
)->so_snd
.sb_cc
)
924 found_working_subflow
= 1;
926 /* Or WiFi is fine */
928 found_working_subflow
= 1;
932 * Couldn't find a working subflow, let's not remove those on a cellular
935 if (!found_working_subflow
)
938 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
939 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
941 /* Only remove cellular subflows */
942 if (ifp
== NULL
|| !IFNET_IS_CELLULAR(ifp
))
945 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
950 mptcp_unset_cellicon();
954 mptcp_remove_subflows(struct mptses
*mpte
)
956 struct mptsub
*mpts
, *tmpts
;
958 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
959 if (mpts
->mpts_flags
& MPTSF_CLOSE_REQD
) {
960 mpts
->mpts_flags
&= ~MPTSF_CLOSE_REQD
;
962 soevent(mpts
->mpts_socket
,
963 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
969 mptcp_create_subflows(__unused
void *arg
)
974 * Start with clearing, because we might be processing connections
975 * while a new event comes in.
977 if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled
))
978 mptcplog((LOG_ERR
, "%s: bit was already cleared!\n", __func__
),
979 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
981 /* Iterate over all MPTCP connections */
983 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
985 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
987 struct socket
*mp_so
;
989 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
))
994 mpp
->mpp_flags
&= ~MPP_CREATE_SUBFLOWS
;
996 mpte
= mpp
->mpp_pcbe
;
997 mp_so
= mpp
->mpp_socket
;
999 VERIFY(mp_so
->so_usecount
> 0);
1001 mptcp_check_subflows_and_add(mpte
);
1002 mptcp_remove_subflows(mpte
);
1004 mp_so
->so_usecount
--; /* See mptcp_sched_create_subflows */
1008 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
1012 * We need this because we are coming from an NECP-event. This event gets posted
1013 * while holding NECP-locks. The creation of the subflow however leads us back
1014 * into NECP (e.g., to add the necp_cb and also from tcp_connect).
1015 * So, we would deadlock there as we already hold the NECP-lock.
1017 * So, let's schedule this separately. It also gives NECP the chance to make
1018 * progress, without having to wait for MPTCP to finish its subflow creation.
1021 mptcp_sched_create_subflows(struct mptses
*mpte
)
1023 struct mppcb
*mpp
= mpte
->mpte_mppcb
;
1024 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1025 struct socket
*mp_so
= mpp
->mpp_socket
;
1027 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
1028 mptcplog((LOG_DEBUG
, "%s: not a good time for subflows, state %u flags %#x",
1029 __func__
, mp_tp
->mpt_state
, mp_tp
->mpt_flags
),
1030 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
1034 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1035 mp_so
->so_usecount
++; /* To prevent it from being free'd in-between */
1036 mpp
->mpp_flags
|= MPP_CREATE_SUBFLOWS
;
1039 if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled
))
1042 /* Do the call in 100ms to allow NECP to schedule it on all sockets */
1043 timeout(mptcp_create_subflows
, NULL
, hz
/10);
1047 * Allocate an MPTCP socket option structure.
1050 mptcp_sopt_alloc(int how
)
1054 mpo
= (how
== M_WAITOK
) ? zalloc(mptopt_zone
) :
1055 zalloc_noblock(mptopt_zone
);
1057 bzero(mpo
, mptopt_zone_size
);
1064 * Free an MPTCP socket option structure.
1067 mptcp_sopt_free(struct mptopt
*mpo
)
1069 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
1071 zfree(mptopt_zone
, mpo
);
1075 * Add a socket option to the MPTCP socket option list.
1078 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
1080 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1081 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
1082 mpo
->mpo_flags
|= MPOF_ATTACHED
;
1083 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1087 * Remove a socket option from the MPTCP socket option list.
1090 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
1092 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1093 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1094 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
1095 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1099 * Search for an existing <sopt_level,sopt_name> socket option.
1102 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
1106 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1108 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
1109 if (mpo
->mpo_level
== sopt
->sopt_level
&&
1110 mpo
->mpo_name
== sopt
->sopt_name
)
1113 VERIFY(mpo
== NULL
|| sopt
->sopt_valsize
== sizeof (int));
1119 * Allocate a MPTCP subflow structure.
1121 static struct mptsub
*
1122 mptcp_subflow_alloc(void)
1124 struct mptsub
*mpts
= zalloc(mptsub_zone
);
1129 bzero(mpts
, mptsub_zone_size
);
1134 * Deallocate a subflow structure, called when all of the references held
1135 * on it have been released. This implies that the subflow has been deleted.
1138 mptcp_subflow_free(struct mptsub
*mpts
)
1140 VERIFY(mpts
->mpts_refcnt
== 0);
1141 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
1142 VERIFY(mpts
->mpts_mpte
== NULL
);
1143 VERIFY(mpts
->mpts_socket
== NULL
);
1145 if (mpts
->mpts_src
!= NULL
) {
1146 FREE(mpts
->mpts_src
, M_SONAME
);
1147 mpts
->mpts_src
= NULL
;
1150 zfree(mptsub_zone
, mpts
);
1154 mptcp_subflow_addref(struct mptsub
*mpts
)
1156 if (++mpts
->mpts_refcnt
== 0)
1157 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
1162 mptcp_subflow_remref(struct mptsub
*mpts
)
1164 if (mpts
->mpts_refcnt
== 0) {
1165 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
1168 if (--mpts
->mpts_refcnt
> 0)
1171 /* callee will unlock and destroy lock */
1172 mptcp_subflow_free(mpts
);
1176 mptcp_subflow_attach(struct mptses
*mpte
, struct mptsub
*mpts
, struct socket
*so
)
1178 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1179 struct tcpcb
*tp
= sototcpcb(so
);
1182 * From this moment on, the subflow is linked to the MPTCP-connection.
1183 * Locking,... happens now at the MPTCP-layer
1185 tp
->t_mptcb
= mpte
->mpte_mptcb
;
1186 so
->so_flags
|= SOF_MP_SUBFLOW
;
1187 mp_so
->so_usecount
++;
1190 * Insert the subflow into the list, and associate the MPTCP PCB
1191 * as well as the the subflow socket. From this point on, removing
1192 * the subflow needs to be done via mptcp_subflow_del().
1194 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1195 mpte
->mpte_numflows
++;
1197 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1198 mpts
->mpts_mpte
= mpte
;
1199 mpts
->mpts_socket
= so
;
1201 mptcp_subflow_addref(mpts
); /* for being in MPTCP subflow list */
1202 mptcp_subflow_addref(mpts
); /* for subflow socket */
1206 mptcp_subflow_necp_cb(void *handle
, __unused
int action
,
1207 __unused
struct necp_client_flow
*flow
)
1209 struct inpcb
*inp
= (struct inpcb
*)handle
;
1210 struct socket
*so
= inp
->inp_socket
;
1211 struct mptsub
*mpts
;
1212 struct mptses
*mpte
;
1214 if (action
!= NECP_CLIENT_CBACTION_NONVIABLE
)
1218 * The socket is being garbage-collected. There is nothing to be done
1221 if (so
->so_usecount
== 0)
1226 /* Check again after we acquired the lock. */
1227 if (so
->so_usecount
== 0)
1230 mpte
= tptomptp(sototcpcb(so
))->mpt_mpte
;
1231 mpts
= sototcpcb(so
)->t_mpsub
;
1233 mptcplog((LOG_DEBUG
, "%s: Subflow became non-viable", __func__
),
1234 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
1236 mpts
->mpts_flags
|= MPTSF_CLOSE_REQD
;
1238 mptcp_sched_create_subflows(mpte
);
1240 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
)
1244 socket_unlock(so
, 1);
1248 * Create an MPTCP subflow socket.
1251 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
1254 lck_mtx_t
*subflow_mtx
;
1255 struct mptopt smpo
, *mpo
, *tmpo
;
1257 struct socket
*mp_so
;
1261 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1262 mp_so
= mptetoso(mpte
);
1264 p
= proc_find(mp_so
->last_pid
);
1265 if (p
== PROC_NULL
) {
1266 mptcplog((LOG_ERR
, "%s: Couldn't find proc for pid %u\n", __func__
, mp_so
->last_pid
),
1267 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1273 * Create the subflow socket (multipath subflow, non-blocking.)
1275 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
1276 * socket; it will be cleared when the socket is peeled off or closed.
1277 * It also indicates to the underlying TCP to handle MPTCP options.
1278 * A multipath subflow socket implies SS_NOFDREF state.
1282 * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes
1283 * the ipi-lock. We cannot hold the socket-lock at that point.
1286 error
= socreate_internal(dom
, so
, SOCK_STREAM
, IPPROTO_TCP
, p
,
1287 SOCF_ASYNC
, PROC_NULL
);
1290 mptcplog((LOG_ERR
, "%s: subflow socreate mp_so 0x%llx unable to create subflow socket error %d\n",
1291 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), error
),
1292 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1296 mptcp_subflow_free(mpts
);
1301 * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because
1302 * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock.
1303 * Which is why we also need to get the lock with pr_getlock, as after
1304 * setting the flag, socket_unlock will work on the MPTCP-level lock.
1306 subflow_mtx
= ((*so
)->so_proto
->pr_getlock
)(*so
, 0);
1307 lck_mtx_lock(subflow_mtx
);
1310 * Must be the first thing we do, to make sure all pointers for this
1313 mptcp_subflow_attach(mpte
, mpts
, *so
);
1316 * A multipath subflow socket is used internally in the kernel,
1317 * therefore it does not have a file desciptor associated by
1320 (*so
)->so_state
|= SS_NOFDREF
;
1322 lck_mtx_unlock(subflow_mtx
);
1324 /* prevent the socket buffers from being compressed */
1325 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
1326 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
1328 /* Inherit preconnect and TFO data flags */
1329 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)
1330 (*so
)->so_flags1
|= SOF1_PRECONNECT_DATA
;
1331 if (mp_so
->so_flags1
& SOF1_DATA_IDEMPOTENT
)
1332 (*so
)->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
1334 /* Inherit uuid and create the related flow. */
1335 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1336 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1338 sotoinpcb(*so
)->necp_cb
= mptcp_subflow_necp_cb
;
1341 * A note on the unlock: With MPTCP, we do multiple times a
1342 * necp_client_register_socket_flow. This is problematic,
1343 * because now the lock-ordering guarantee (first necp-locks,
1344 * then socket-locks) is no more respected. So, we need to
1348 error
= necp_client_register_socket_flow(mp_so
->last_pid
,
1349 mpsotomppcb(mp_so
)->necp_client_uuid
, sotoinpcb(*so
));
1355 /* Possible state-change during the unlock above */
1356 if (mp_tp
->mpt_state
>= MPTCPS_TIME_WAIT
||
1357 (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
))
1360 uuid_copy(sotoinpcb(*so
)->necp_client_uuid
, mpsotomppcb(mp_so
)->necp_client_uuid
);
1362 mptcplog((LOG_NOTICE
, "%s: uuid is not set!\n"),
1363 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1366 /* inherit the other socket options */
1367 bzero(&smpo
, sizeof (smpo
));
1368 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1369 smpo
.mpo_level
= SOL_SOCKET
;
1370 smpo
.mpo_intval
= 1;
1372 /* disable SIGPIPE */
1373 smpo
.mpo_name
= SO_NOSIGPIPE
;
1374 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1377 /* find out if the subflow's source address goes away */
1378 smpo
.mpo_name
= SO_NOADDRERR
;
1379 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1382 /* enable keepalive */
1383 smpo
.mpo_name
= SO_KEEPALIVE
;
1384 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1387 smpo
.mpo_level
= IPPROTO_TCP
;
1388 smpo
.mpo_intval
= mptcp_subflow_keeptime
;
1389 smpo
.mpo_name
= TCP_KEEPALIVE
;
1390 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1393 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
) {
1395 * On secondary subflows we might need to set the cell-fallback
1396 * flag (see conditions in mptcp_subflow_sosetopt).
1398 smpo
.mpo_level
= SOL_SOCKET
;
1399 smpo
.mpo_name
= SO_MARK_CELLFALLBACK
;
1400 smpo
.mpo_intval
= 1;
1401 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1405 /* replay setsockopt(2) on the subflow sockets for eligible options */
1406 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
1409 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
))
1413 * Skip those that are handled internally; these options
1414 * should not have been recorded and marked with the
1415 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1417 if (mpo
->mpo_level
== SOL_SOCKET
&&
1418 (mpo
->mpo_name
== SO_NOSIGPIPE
||
1419 mpo
->mpo_name
== SO_NOADDRERR
||
1420 mpo
->mpo_name
== SO_KEEPALIVE
))
1423 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
1424 if (mptcp_subflow_sosetopt(mpte
, mpts
, mpo
) != 0 && interim
) {
1425 mptcplog((LOG_ERR
, "%s: subflow socreate mp_so 0x%llx"
1426 " sopt %s val %d interim record removed\n", __func__
,
1427 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1428 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
1430 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1431 mptcp_sopt_remove(mpte
, mpo
);
1432 mptcp_sopt_free(mpo
);
1438 * We need to receive everything that the subflow socket has,
1439 * so use a customized socket receive function. We will undo
1440 * this when the socket is peeled off or closed.
1444 (*so
)->so_proto
= &mptcp_subflow_protosw
;
1448 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
1458 DTRACE_MPTCP3(subflow__create
, struct mptses
*, mpte
,
1459 int, dom
, int, error
);
1464 mptcp_subflow_abort(mpts
, error
);
1468 mptcplog((LOG_ERR
, "%s: subflow socreate failed with error %d\n",
1469 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1475 * Close an MPTCP subflow socket.
1477 * Note that this may be called on an embryonic subflow, and the only
1478 * thing that is guaranteed valid is the protocol-user request.
1481 mptcp_subflow_soclose(struct mptsub
*mpts
)
1483 struct socket
*so
= mpts
->mpts_socket
;
1485 if (mpts
->mpts_flags
& MPTSF_CLOSED
)
1489 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1490 VERIFY((so
->so_state
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
));
1492 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
1493 struct socket
*, so
,
1494 struct sockbuf
*, &so
->so_rcv
,
1495 struct sockbuf
*, &so
->so_snd
,
1496 struct mptses
*, mpts
->mpts_mpte
);
1498 mpts
->mpts_flags
|= MPTSF_CLOSED
;
1500 if (so
->so_retaincnt
== 0) {
1505 VERIFY(so
->so_usecount
> 0);
1513 * Connect an MPTCP subflow socket.
1515 * Note that in the pending connect case, the subflow socket may have been
1516 * bound to an interface and/or a source IP address which may no longer be
1517 * around by the time this routine is called; in that case the connect attempt
1518 * will most likely fail.
1521 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
1523 char dbuf
[MAX_IPv6_STR_LEN
];
1524 struct socket
*mp_so
, *so
;
1525 struct mptcb
*mp_tp
;
1526 struct sockaddr
*dst
;
1530 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1532 mp_so
= mptetoso(mpte
);
1533 mp_tp
= mpte
->mpte_mptcb
;
1535 p
= proc_find(mp_so
->last_pid
);
1536 if (p
== PROC_NULL
) {
1537 mptcplog((LOG_ERR
, "%s: Couldn't find proc for pid %u\n", __func__
, mp_so
->last_pid
),
1538 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1543 so
= mpts
->mpts_socket
;
1544 af
= mpts
->mpts_dst
.sa_family
;
1546 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)) == MPTSF_CONNECTING
);
1547 VERIFY(mpts
->mpts_socket
!= NULL
);
1548 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1550 dst
= &mpts
->mpts_dst
;
1551 mptcplog((LOG_DEBUG
, "%s: connectx mp_so 0x%llx dst %s[%d] cid %d [pended %s]\n",
1552 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1553 inet_ntop(af
, ((af
== AF_INET
) ? (void *)&SIN(dst
)->sin_addr
.s_addr
:
1554 (void *)&SIN6(dst
)->sin6_addr
),
1555 dbuf
, sizeof (dbuf
)),
1556 ((af
== AF_INET
) ? ntohs(SIN(dst
)->sin_port
) : ntohs(SIN6(dst
)->sin6_port
)),
1558 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ? "YES" : "NO")),
1559 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
1561 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
1563 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
1565 /* connect the subflow socket */
1566 error
= soconnectxlocked(so
, mpts
->mpts_src
, &mpts
->mpts_dst
,
1567 p
, mpts
->mpts_ifscope
,
1568 mpte
->mpte_associd
, NULL
, 0, NULL
, 0, NULL
, NULL
);
1570 mpts
->mpts_iss
= sototcpcb(so
)->iss
;
1572 /* See tcp_connect_complete */
1573 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&&
1574 (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1575 mp_tp
->mpt_sndwnd
= sototcpcb(so
)->snd_wnd
;
1578 /* Allocate a unique address id per subflow */
1579 mpte
->mpte_addrid_last
++;
1580 if (mpte
->mpte_addrid_last
== 0)
1581 mpte
->mpte_addrid_last
++;
1585 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
1586 struct mptsub
*, mpts
, int, error
);
1588 mptcplog((LOG_ERR
, "%s: connectx failed with error %d ifscope %u\n",
1589 __func__
, error
, mpts
->mpts_ifscope
),
1590 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1596 * MPTCP subflow socket receive routine, derived from soreceive().
1599 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
1600 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
1603 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
1604 int flags
, error
= 0;
1605 struct proc
*p
= current_proc();
1606 struct mbuf
*m
, **mp
= mp0
;
1607 boolean_t proc_held
= FALSE
;
1609 mpte_lock_assert_held(tptomptp(sototcpcb(so
))->mpt_mpte
);
1610 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
1612 #ifdef MORE_LOCKING_DEBUG
1613 if (so
->so_usecount
== 1) {
1614 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
1619 * We return all that is there in the subflow's socket receive buffer
1620 * to the MPTCP layer, so we require that the caller passes in the
1621 * expected parameters.
1623 if (mp
== NULL
|| controlp
!= NULL
)
1630 flags
= *flagsp
&~ MSG_EOR
;
1634 if (flags
& (MSG_PEEK
|MSG_OOB
|MSG_NEEDSA
|MSG_WAITALL
|MSG_WAITSTREAM
))
1635 return (EOPNOTSUPP
);
1637 flags
|= (MSG_DONTWAIT
|MSG_NBIO
);
1640 * If a recv attempt is made on a previously-accepted socket
1641 * that has been marked as inactive (disconnected), reject
1644 if (so
->so_flags
& SOF_DEFUNCT
) {
1645 struct sockbuf
*sb
= &so
->so_rcv
;
1649 * This socket should have been disconnected and flushed
1650 * prior to being returned from sodefunct(); there should
1651 * be no data on its receive list, so panic otherwise.
1653 if (so
->so_state
& SS_DEFUNCT
)
1654 sb_empty_assert(sb
, __func__
);
1659 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
1660 * and if so just return to the caller. This could happen when
1661 * soreceive() is called by a socket upcall function during the
1662 * time the socket is freed. The socket buffer would have been
1663 * locked across the upcall, therefore we cannot put this thread
1664 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
1665 * we may livelock), because the lock on the socket buffer will
1666 * only be released when the upcall routine returns to its caller.
1667 * Because the socket has been officially closed, there can be
1668 * no further read on it.
1670 * A multipath subflow socket would have its SS_NOFDREF set by
1671 * default, so check for SOF_MP_SUBFLOW socket flag; when the
1672 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1674 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
1675 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
))
1679 * For consistency with soreceive() semantics, we need to obey
1680 * SB_LOCK in case some other code path has locked the buffer.
1682 error
= sblock(&so
->so_rcv
, 0);
1686 m
= so
->so_rcv
.sb_mb
;
1689 * Panic if we notice inconsistencies in the socket's
1690 * receive list; both sb_mb and sb_cc should correctly
1691 * reflect the contents of the list, otherwise we may
1692 * end up with false positives during select() or poll()
1693 * which could put the application in a bad state.
1695 SB_MB_CHECK(&so
->so_rcv
);
1697 if (so
->so_error
!= 0) {
1698 error
= so
->so_error
;
1703 if (so
->so_state
& SS_CANTRCVMORE
) {
1707 if (!(so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
))) {
1713 * MSG_DONTWAIT is implicitly defined and this routine will
1714 * never block, so return EWOULDBLOCK when there is nothing.
1716 error
= EWOULDBLOCK
;
1720 mptcp_update_last_owner(so
, mp_so
);
1722 if (mp_so
->last_pid
!= proc_pid(p
)) {
1723 p
= proc_find(mp_so
->last_pid
);
1724 if (p
== PROC_NULL
) {
1731 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
1732 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1733 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1736 int dlen
= 0, dfin
= 0, error_out
= 0;
1737 struct mbuf
*start
= m
;
1743 VERIFY(m
->m_nextpkt
== NULL
);
1745 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
1746 orig_dlen
= dlen
= m
->m_pkthdr
.mp_rlen
;
1747 dsn
= m
->m_pkthdr
.mp_dsn
;
1748 sseq
= m
->m_pkthdr
.mp_rseq
;
1749 csum
= m
->m_pkthdr
.mp_csum
;
1751 /* We did fallback */
1752 mptcp_adj_rmap(so
, m
, 0, 0, 0, 0);
1754 sbfree(&so
->so_rcv
, m
);
1759 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1765 so
->so_rcv
.sb_lastrecord
= m
;
1767 SB_EMPTY_FIXUP(&so
->so_rcv
);
1773 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_DFIN
)
1777 * Check if the full mapping is now present
1779 if ((int)so
->so_rcv
.sb_cc
< dlen
- dfin
) {
1780 mptcplog((LOG_INFO
, "%s not enough data (%u) need %u\n",
1781 __func__
, so
->so_rcv
.sb_cc
, dlen
),
1782 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_LOG
);
1785 error
= EWOULDBLOCK
;
1789 /* Now, get the full mapping */
1791 if (mptcp_adj_rmap(so
, m
, orig_dlen
- dlen
, dsn
, sseq
, orig_dlen
)) {
1795 soevent(so
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
1800 sbfree(&so
->so_rcv
, m
);
1805 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1809 if (dlen
- dfin
== 0)
1812 VERIFY(dlen
<= 0 || m
);
1818 so
->so_rcv
.sb_lastrecord
= m
;
1820 SB_EMPTY_FIXUP(&so
->so_rcv
);
1827 if (mptcp_validate_csum(sototcpcb(so
), start
, dsn
, sseq
, orig_dlen
, csum
, dfin
)) {
1833 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1834 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1837 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
1838 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1844 sbunlock(&so
->so_rcv
, TRUE
);
1854 * MPTCP subflow socket send routine, derived from sosend().
1857 mptcp_subflow_sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
1858 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1860 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
1861 struct proc
*p
= current_proc();
1862 boolean_t en_tracing
= FALSE
, proc_held
= FALSE
;
1864 int sblocked
= 1; /* Pretend as if it is already locked, so we won't relock it */
1867 VERIFY(control
== NULL
);
1868 VERIFY(addr
== NULL
);
1869 VERIFY(uio
== NULL
);
1871 VERIFY((so
->so_flags
& SOF_CONTENT_FILTER
) == 0);
1873 VERIFY(top
->m_pkthdr
.len
> 0 && top
->m_pkthdr
.len
<= UINT16_MAX
);
1874 VERIFY(top
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
1877 * trace if tracing & network (vs. unix) sockets & and
1880 if (ENTR_SHOULDTRACE
&&
1881 (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) {
1882 struct inpcb
*inp
= sotoinpcb(so
);
1883 if (inp
->inp_last_outifp
!= NULL
&&
1884 !(inp
->inp_last_outifp
->if_flags
& IFF_LOOPBACK
)) {
1886 en_tracing_val
= top
->m_pkthdr
.len
;
1887 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
,
1888 VM_KERNEL_ADDRPERM(so
),
1889 ((so
->so_state
& SS_NBIO
) ? kEnTrFlagNonBlocking
: 0),
1890 (int64_t)en_tracing_val
);
1894 mptcp_update_last_owner(so
, mp_so
);
1896 if (mp_so
->last_pid
!= proc_pid(p
)) {
1897 p
= proc_find(mp_so
->last_pid
);
1898 if (p
== PROC_NULL
) {
1906 inp_update_necp_policy(sotoinpcb(so
), NULL
, NULL
, 0);
1909 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1911 error
= sosendcheck(so
, NULL
, top
->m_pkthdr
.len
, 0, 1, 0, &sblocked
, NULL
);
1915 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, top
, NULL
, NULL
, p
);
1925 soclearfastopen(so
);
1928 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
,
1929 VM_KERNEL_ADDRPERM(so
),
1930 ((error
== EWOULDBLOCK
) ? kEnTrFlagNoWork
: 0),
1931 (int64_t)en_tracing_val
);
1939 * Establish an initial MPTCP connection (if first subflow and not yet
1940 * connected), or add a subflow to an existing MPTCP connection.
1943 mptcp_subflow_add(struct mptses
*mpte
, struct sockaddr
*src
,
1944 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
1946 struct socket
*mp_so
, *so
= NULL
;
1947 struct mptcb
*mp_tp
;
1948 struct mptsub
*mpts
= NULL
;
1951 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1952 mp_so
= mptetoso(mpte
);
1953 mp_tp
= mpte
->mpte_mptcb
;
1955 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
1956 /* If the remote end sends Data FIN, refuse subflow adds */
1957 mptcplog((LOG_ERR
, "%s state %u\n", __func__
, mp_tp
->mpt_state
),
1958 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1963 mpts
= mptcp_subflow_alloc();
1965 mptcplog((LOG_ERR
, "%s malloc subflow failed\n", __func__
),
1966 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1972 int len
= src
->sa_len
;
1974 MALLOC(mpts
->mpts_src
, struct sockaddr
*, len
, M_SONAME
,
1976 if (mpts
->mpts_src
== NULL
) {
1977 mptcplog((LOG_ERR
, "%s malloc mpts_src failed", __func__
),
1978 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1982 bcopy(src
, mpts
->mpts_src
, len
);
1985 memcpy(&mpts
->mpts_dst
, dst
, dst
->sa_len
);
1987 af
= mpts
->mpts_dst
.sa_family
;
1989 mpts
->mpts_ifscope
= ifscope
;
1991 /* create the subflow socket */
1992 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, &so
)) != 0)
1994 * Returning (error) and not cleaning up, because up to here
1995 * all we did is creating mpts.
1997 * And the contract is that the call to mptcp_subflow_socreate,
1998 * moves ownership of mpts to mptcp_subflow_socreate.
2003 * We may be called from within the kernel. Still need to account this
2004 * one to the real app.
2006 mptcp_update_last_owner(mpts
->mpts_socket
, mp_so
);
2009 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
2010 * -1 (SAE_CONNID_ALL).
2012 mpte
->mpte_connid_last
++;
2013 if (mpte
->mpte_connid_last
== SAE_CONNID_ALL
||
2014 mpte
->mpte_connid_last
== SAE_CONNID_ANY
)
2015 mpte
->mpte_connid_last
++;
2017 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
2019 mpts
->mpts_rel_seq
= 1;
2021 /* Allocate a unique address id per subflow */
2022 mpte
->mpte_addrid_last
++;
2023 if (mpte
->mpte_addrid_last
== 0)
2024 mpte
->mpte_addrid_last
++;
2026 /* register for subflow socket read/write events */
2027 sock_setupcalls_locked(so
, mptcp_subflow_rupcall
, mpts
, mptcp_subflow_wupcall
, mpts
, 1);
2029 /* Register for subflow socket control events */
2030 sock_catchevents_locked(so
, mptcp_subflow_eupcall1
, mpts
,
2031 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
2032 SO_FILT_HINT_TIMEOUT
| SO_FILT_HINT_NOSRCADDR
|
2033 SO_FILT_HINT_IFDENIED
| SO_FILT_HINT_CONNECTED
|
2034 SO_FILT_HINT_DISCONNECTED
| SO_FILT_HINT_MPFAILOVER
|
2035 SO_FILT_HINT_MPSTATUS
| SO_FILT_HINT_MUSTRST
|
2036 SO_FILT_HINT_MPCANTRCVMORE
| SO_FILT_HINT_ADAPTIVE_RTIMO
|
2037 SO_FILT_HINT_ADAPTIVE_WTIMO
);
2040 VERIFY(!(mpts
->mpts_flags
&
2041 (MPTSF_CONNECTING
|MPTSF_CONNECTED
|MPTSF_CONNECT_PENDING
)));
2044 * Indicate to the TCP subflow whether or not it should establish
2045 * the initial MPTCP connection, or join an existing one. Fill
2046 * in the connection request structure with additional info needed
2047 * by the underlying TCP (to be used in the TCP options, etc.)
2049 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
2050 mpts
->mpts_flags
|= MPTSF_INITIAL_SUB
;
2052 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
2053 mptcp_init_local_parms(mpte
);
2055 soisconnecting(mp_so
);
2057 /* If fastopen is requested, set state in mpts */
2058 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
)
2059 mpts
->mpts_flags
|= MPTSF_TFO_REQD
;
2061 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
))
2062 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
2065 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
2067 if (af
== AF_INET
|| af
== AF_INET6
) {
2068 char dbuf
[MAX_IPv6_STR_LEN
];
2070 mptcplog((LOG_DEBUG
, "MPTCP Socket: %s "
2071 "mp_so 0x%llx dst %s[%d] cid %d "
2072 "[pending %s]\n", __func__
,
2073 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2074 inet_ntop(af
, ((af
== AF_INET
) ?
2075 (void *)&SIN(&mpts
->mpts_dst
)->sin_addr
.s_addr
:
2076 (void *)&SIN6(&mpts
->mpts_dst
)->sin6_addr
),
2077 dbuf
, sizeof (dbuf
)), ((af
== AF_INET
) ?
2078 ntohs(SIN(&mpts
->mpts_dst
)->sin_port
) :
2079 ntohs(SIN6(&mpts
->mpts_dst
)->sin6_port
)),
2081 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
2083 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2086 /* connect right away if first attempt, or if join can be done now */
2087 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
))
2088 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
2094 *pcid
= mpts
->mpts_connid
;
2099 mptcp_subflow_abort(mpts
, error
);
2105 mptcp_subflow_free(mpts
);
2111 mptcpstats_update(struct mptcp_itf_stats
*stats
, struct mptsub
*mpts
)
2113 int index
= mptcp_get_statsindex(stats
, mpts
);
2116 struct inpcb
*inp
= sotoinpcb(mpts
->mpts_socket
);
2118 stats
[index
].mpis_txbytes
+= inp
->inp_stat
->txbytes
;
2119 stats
[index
].mpis_rxbytes
+= inp
->inp_stat
->rxbytes
;
2124 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
2125 * will no longer be accessible after a subflow is deleted, thus this
2126 * should occur only after the subflow socket has been disconnected.
2129 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
)
2131 struct socket
*mp_so
= mptetoso(mpte
);
2132 struct socket
*so
= mpts
->mpts_socket
;
2133 struct tcpcb
*tp
= sototcpcb(so
);
2135 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
2136 VERIFY(mpts
->mpts_mpte
== mpte
);
2137 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
2138 VERIFY(mpte
->mpte_numflows
!= 0);
2139 VERIFY(mp_so
->so_usecount
> 0);
2141 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d %x error %d\n",
2142 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2143 mp_so
->so_usecount
, mp_so
->so_retaincnt
, mpts
->mpts_connid
,
2144 mpts
->mpts_flags
, mp_so
->so_error
),
2145 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2147 mptcpstats_update(mpte
->mpte_itfstats
, mpts
);
2148 mpte
->mpte_init_rxbytes
= sotoinpcb(so
)->inp_stat
->rxbytes
;
2149 mpte
->mpte_init_txbytes
= sotoinpcb(so
)->inp_stat
->txbytes
;
2151 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
2152 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
2153 mpte
->mpte_numflows
--;
2154 if (mpte
->mpte_active_sub
== mpts
)
2155 mpte
->mpte_active_sub
= NULL
;
2158 * Drop references held by this subflow socket; there
2159 * will be no further upcalls made from this point.
2161 sock_setupcalls_locked(so
, NULL
, NULL
, NULL
, NULL
, 0);
2162 sock_catchevents_locked(so
, NULL
, NULL
, 0);
2164 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
2166 mp_so
->so_usecount
--; /* for subflow socket */
2167 mpts
->mpts_mpte
= NULL
;
2168 mpts
->mpts_socket
= NULL
;
2170 mptcp_subflow_remref(mpts
); /* for MPTCP subflow list */
2171 mptcp_subflow_remref(mpts
); /* for subflow socket */
2173 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
2179 mptcp_subflow_shutdown(struct mptses
*mpte
, struct mptsub
*mpts
)
2181 struct socket
*so
= mpts
->mpts_socket
;
2182 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2185 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
)
2188 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2189 (so
->so_state
& SS_ISCONNECTED
)) {
2190 mptcplog((LOG_DEBUG
, "MPTCP subflow shutdown %s: cid %d fin %d\n",
2191 __func__
, mpts
->mpts_connid
, send_dfin
),
2192 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2195 mptcp_send_dfin(so
);
2196 soshutdownlock(so
, SHUT_WR
);
2202 mptcp_subflow_abort(struct mptsub
*mpts
, int error
)
2204 struct socket
*so
= mpts
->mpts_socket
;
2205 struct tcpcb
*tp
= sototcpcb(so
);
2207 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
2210 mptcplog((LOG_DEBUG
, "%s aborting connection state %u\n", __func__
, tp
->t_state
),
2211 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2213 if (tp
->t_state
!= TCPS_CLOSED
)
2214 tcp_drop(tp
, error
);
2216 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2220 * Disconnect a subflow socket.
2223 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
)
2226 struct mptcb
*mp_tp
;
2229 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
2231 VERIFY(mpts
->mpts_mpte
== mpte
);
2232 VERIFY(mpts
->mpts_socket
!= NULL
);
2234 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|MPTSF_DISCONNECTED
))
2237 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
2239 so
= mpts
->mpts_socket
;
2240 mp_tp
= mpte
->mpte_mptcb
;
2241 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
)
2244 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2245 (so
->so_state
& SS_ISCONNECTED
)) {
2246 mptcplog((LOG_DEBUG
, "MPTCP Socket %s: cid %d fin %d\n",
2247 __func__
, mpts
->mpts_connid
, send_dfin
),
2248 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2251 mptcp_send_dfin(so
);
2252 (void) soshutdownlock(so
, SHUT_RD
);
2253 (void) soshutdownlock(so
, SHUT_WR
);
2254 (void) sodisconnectlocked(so
);
2257 * Generate a disconnect event for this subflow socket, in case
2258 * the lower layer doesn't do it; this is needed because the
2259 * subflow socket deletion relies on it.
2261 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2265 * Called when the associated subflow socket posted a read event.
2268 mptcp_subflow_rupcall(struct socket
*so
, void *arg
, int waitf
)
2270 #pragma unused(so, waitf)
2271 struct mptsub
*mpts
= arg
, *tmpts
;
2272 struct mptses
*mpte
= mpts
->mpts_mpte
;
2274 VERIFY(mpte
!= NULL
);
2276 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2277 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_RUPCALL
))
2278 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2282 mpte
->mpte_mppcb
->mpp_flags
|= MPP_RUPCALL
;
2283 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
2284 if (mpts
->mpts_socket
->so_usecount
== 0) {
2285 /* Will be removed soon by tcp_garbage_collect */
2289 mptcp_subflow_addref(mpts
);
2290 mpts
->mpts_socket
->so_usecount
++;
2292 mptcp_subflow_input(mpte
, mpts
);
2294 mptcp_subflow_remref(mpts
); /* ours */
2296 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
2297 mpts
->mpts_socket
->so_usecount
--;
2300 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_RUPCALL
);
2304 * Subflow socket input.
2307 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
2309 struct socket
*mp_so
= mptetoso(mpte
);
2310 struct mbuf
*m
= NULL
;
2312 int error
, wakeup
= 0;
2314 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_INPUT
));
2315 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_INPUT
;
2317 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
2318 struct mptsub
*, mpts
);
2320 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
))
2323 so
= mpts
->mpts_socket
;
2325 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
2326 if (error
!= 0 && error
!= EWOULDBLOCK
) {
2327 mptcplog((LOG_ERR
, "%s: cid %d error %d\n",
2328 __func__
, mpts
->mpts_connid
, error
),
2329 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
2330 if (error
== ENODATA
) {
2332 * Don't ignore ENODATA so as to discover
2333 * nasty middleboxes.
2335 mp_so
->so_error
= ENODATA
;
2340 } else if (error
== 0) {
2341 mptcplog((LOG_DEBUG
, "%s: cid %d \n", __func__
, mpts
->mpts_connid
),
2342 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2345 /* In fallback, make sure to accept data on all but one subflow */
2346 if (m
&& (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2347 !(mpts
->mpts_flags
& MPTSF_ACTIVE
)) {
2348 mptcplog((LOG_DEBUG
, "%s: degraded and got data on non-active flow\n",
2349 __func__
), MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2355 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
2356 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SET_CELLICON
;
2358 mpte
->mpte_used_cell
= 1;
2360 mpte
->mpte_mppcb
->mpp_flags
|= MPP_UNSET_CELLICON
;
2362 mpte
->mpte_used_wifi
= 1;
2365 mptcp_input(mpte
, m
);
2368 /* notify protocol that we drained all the data */
2369 if (error
== 0 && m
!= NULL
&&
2370 (so
->so_proto
->pr_flags
& PR_WANTRCVD
) && so
->so_pcb
!= NULL
)
2371 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, 0);
2375 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2377 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_INPUT
);
2381 * Subflow socket write upcall.
2383 * Called when the associated subflow socket posted a read event.
2386 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
2388 #pragma unused(so, waitf)
2389 struct mptsub
*mpts
= arg
;
2390 struct mptses
*mpte
= mpts
->mpts_mpte
;
2392 VERIFY(mpte
!= NULL
);
2394 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2395 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_WUPCALL
))
2396 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
2404 * Subflow socket output.
2406 * Called for sending data from MPTCP to the underlying subflow socket.
2409 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
, int flags
)
2411 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2412 struct mbuf
*sb_mb
, *m
, *mpt_mbuf
= NULL
, *head
, *tail
;
2413 struct socket
*mp_so
, *so
;
2415 uint64_t mpt_dsn
= 0, off
= 0;
2416 int sb_cc
= 0, error
= 0, wakeup
= 0;
2418 uint16_t tot_sent
= 0;
2419 boolean_t reinjected
= FALSE
;
2421 mpte_lock_assert_held(mpte
);
2423 mp_so
= mptetoso(mpte
);
2424 so
= mpts
->mpts_socket
;
2427 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_OUTPUT
));
2428 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_OUTPUT
;
2430 VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so
)));
2431 VERIFY((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ||
2432 (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2433 (mpts
->mpts_flags
& MPTSF_TFO_REQD
));
2434 VERIFY(mptcp_subflow_cwnd_space(mpts
->mpts_socket
) > 0);
2436 mptcplog((LOG_DEBUG
, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
2437 __func__
, mpts
->mpts_flags
, mpte
->mpte_flags
,
2438 mptcp_subflow_cwnd_space(so
)),
2439 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2440 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
2441 struct mptsub
*, mpts
);
2443 /* Remove Addr Option is not sent reliably as per I-D */
2444 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
2445 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
2446 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
2447 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
2451 * The mbuf chains containing the metadata (as well as pointing to
2452 * the user data sitting at the MPTCP output queue) would then be
2453 * sent down to the subflow socket.
2455 * Some notes on data sequencing:
2457 * a. Each mbuf must be a M_PKTHDR.
2458 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
2459 * in the mbuf pkthdr structure.
2460 * c. Each mbuf containing the MPTCP metadata must have its
2461 * pkt_flags marked with the PKTF_MPTCP flag.
2464 if (mpte
->mpte_reinjectq
)
2465 sb_mb
= mpte
->mpte_reinjectq
;
2467 sb_mb
= mp_so
->so_snd
.sb_mb
;
2469 if (sb_mb
== NULL
) {
2470 mptcplog((LOG_ERR
, "%s: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u\n",
2471 __func__
, (uint32_t)mp_tp
->mpt_sndmax
, (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_snduna
),
2472 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2476 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2478 if (sb_mb
->m_pkthdr
.mp_rlen
== 0 &&
2479 !(so
->so_state
& SS_ISCONNECTED
) &&
2480 (so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
2481 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
2482 goto zero_len_write
;
2485 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
2487 /* First, drop acknowledged data */
2488 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
2489 mptcplog((LOG_ERR
, "%s: dropping data, should have been done earlier "
2490 "dsn %u suna %u reinject? %u\n",
2491 __func__
, (uint32_t)mpt_dsn
,
2492 (uint32_t)mp_tp
->mpt_snduna
, !!mpte
->mpte_reinjectq
),
2493 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2494 if (mpte
->mpte_reinjectq
) {
2495 mptcp_clean_reinjectq(mpte
);
2498 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
2499 sbdrop(&mp_so
->so_snd
, (int)len
);
2504 /* Check again because of above sbdrop */
2505 if (mp_so
->so_snd
.sb_mb
== NULL
&& mpte
->mpte_reinjectq
== NULL
) {
2506 mptcplog((LOG_ERR
, "%s send-buffer is empty\n", __func__
),
2507 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2512 * In degraded mode, we don't receive data acks, so force free
2513 * mbufs less than snd_nxt
2515 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2516 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
2517 mp_so
->so_snd
.sb_mb
) {
2518 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
2519 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
2521 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
2522 sbdrop(&mp_so
->so_snd
, (int)len
);
2525 mptcplog((LOG_ERR
, "%s: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
2526 __func__
, (uint32_t)mpt_dsn
, (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_snduna
),
2527 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2531 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2532 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
2533 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
2534 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
2538 * Adjust the top level notion of next byte used for retransmissions
2541 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
))
2542 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
2544 /* Now determine the offset from which to start transmitting data */
2545 if (mpte
->mpte_reinjectq
)
2546 sb_mb
= mpte
->mpte_reinjectq
;
2548 sb_mb
= mp_so
->so_snd
.sb_mb
;
2549 if (sb_mb
== NULL
) {
2550 mptcplog((LOG_ERR
, "%s send-buffer is still empty\n", __func__
),
2551 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2555 if (mpte
->mpte_reinjectq
) {
2556 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
2557 } else if (flags
& MPTCP_SUBOUT_PROBING
) {
2558 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
2561 sb_cc
= min(mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
);
2564 * With TFO, there might be no data at all, thus still go into this
2567 if ((mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) ||
2568 MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_sndmax
)) {
2569 off
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
2572 mptcplog((LOG_ERR
, "%s this should not happen: sndnxt %u sndmax %u\n",
2573 __func__
, (uint32_t)mp_tp
->mpt_sndnxt
,
2574 (uint32_t)mp_tp
->mpt_sndmax
),
2575 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2581 sb_cc
= min(sb_cc
, mptcp_subflow_cwnd_space(so
));
2583 mptcplog((LOG_ERR
, "%s sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
2584 __func__
, sb_cc
, mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
,
2585 (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_sndmax
,
2586 mptcp_subflow_cwnd_space(so
)),
2587 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2590 sb_cc
= min(sb_cc
, UINT16_MAX
);
2593 * Create a DSN mapping for the data we are about to send. It all
2594 * has the same mapping.
2596 if (mpte
->mpte_reinjectq
)
2597 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
2599 mpt_dsn
= mp_tp
->mpt_snduna
+ off
;
2602 while (mpt_mbuf
&& mpte
->mpte_reinjectq
== NULL
&&
2603 (mpt_mbuf
->m_pkthdr
.mp_rlen
== 0 ||
2604 mpt_mbuf
->m_pkthdr
.mp_rlen
<= (uint32_t)off
)) {
2605 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
2606 mpt_mbuf
= mpt_mbuf
->m_next
;
2608 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
2609 mptcplog((LOG_DEBUG
, "%s: %u snduna = %u sndnxt = %u probe %d\n",
2610 __func__
, mpts
->mpts_connid
, (uint32_t)mp_tp
->mpt_snduna
, (uint32_t)mp_tp
->mpt_sndnxt
,
2611 mpts
->mpts_probecnt
),
2612 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2614 VERIFY((mpt_mbuf
== NULL
) || (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
2618 while (tot_sent
< sb_cc
) {
2621 mlen
= mpt_mbuf
->m_len
;
2623 mlen
= min(mlen
, sb_cc
- tot_sent
);
2626 mptcplog((LOG_ERR
, "%s mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
2627 __func__
, (int)mlen
, mpt_mbuf
->m_pkthdr
.mp_rlen
,
2628 (uint32_t)off
, sb_cc
, tot_sent
),
2629 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2636 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
2637 M_COPYM_MUST_COPY_HDR
);
2639 mptcplog((LOG_ERR
, "%s m_copym_mode failed\n", __func__
),
2640 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2645 /* Create a DSN mapping for the data (m_copym does it) */
2646 VERIFY(m
->m_flags
& M_PKTHDR
);
2647 VERIFY(m
->m_next
== NULL
);
2649 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
2650 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
2651 m
->m_pkthdr
.mp_dsn
= mpt_dsn
;
2652 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
2653 m
->m_pkthdr
.len
= mlen
;
2665 mpt_mbuf
= mpt_mbuf
->m_next
;
2668 if (mpte
->mpte_reinjectq
) {
2671 if (sb_cc
< sb_mb
->m_pkthdr
.mp_rlen
) {
2672 struct mbuf
*n
= sb_mb
;
2675 n
->m_pkthdr
.mp_dsn
+= sb_cc
;
2676 n
->m_pkthdr
.mp_rlen
-= sb_cc
;
2679 m_adj(sb_mb
, sb_cc
);
2681 mpte
->mpte_reinjectq
= sb_mb
->m_nextpkt
;
2686 mptcplog((LOG_DEBUG
, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
2687 __func__
, (uint32_t)mpt_dsn
, mpts
->mpts_rel_seq
,
2688 tot_sent
, mpts
->mpts_connid
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2690 if (head
&& (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
)) {
2691 dss_csum
= mptcp_output_csum(head
, mpt_dsn
, mpts
->mpts_rel_seq
,
2695 /* Now, let's update rel-seq and the data-level length */
2696 mpts
->mpts_rel_seq
+= tot_sent
;
2699 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
)
2700 m
->m_pkthdr
.mp_csum
= dss_csum
;
2701 m
->m_pkthdr
.mp_rlen
= tot_sent
;
2706 if ((mpts
->mpts_flags
& MPTSF_TFO_REQD
) &&
2707 (tp
->t_tfo_stats
== 0))
2708 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
2710 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
2712 DTRACE_MPTCP7(send
, struct mbuf
*, m
, struct socket
*, so
,
2713 struct sockbuf
*, &so
->so_rcv
,
2714 struct sockbuf
*, &so
->so_snd
,
2715 struct mptses
*, mpte
, struct mptsub
*, mpts
,
2721 (error
== EWOULDBLOCK
&& (tp
->t_mpflags
& TMPF_TFO_REQUEST
))) {
2722 uint64_t new_sndnxt
= mp_tp
->mpt_sndnxt
+ tot_sent
;
2724 if (mpts
->mpts_probesoon
&& mpts
->mpts_maxseg
&& tot_sent
) {
2725 tcpstat
.tcps_mp_num_probes
++;
2726 if ((uint32_t)tot_sent
< mpts
->mpts_maxseg
)
2727 mpts
->mpts_probecnt
+= 1;
2729 mpts
->mpts_probecnt
+=
2730 tot_sent
/mpts
->mpts_maxseg
;
2733 if (!reinjected
&& !(flags
& MPTCP_SUBOUT_PROBING
)) {
2734 if (MPTCP_DATASEQ_HIGH32(new_sndnxt
) >
2735 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
))
2736 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
2737 mp_tp
->mpt_sndnxt
= new_sndnxt
;
2740 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
2742 /* Must be here as mptcp_can_send_more() checks for this */
2743 soclearfastopen(mp_so
);
2745 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2746 (mpts
->mpts_probesoon
!= 0))
2747 mptcplog((LOG_DEBUG
, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
2748 __func__
, mpts
->mpts_connid
,
2749 !!(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
),
2750 tot_sent
, (int) sb_cc
, mpts
->mpts_probecnt
,
2751 (tcp_now
- mpts
->mpts_probesoon
)),
2752 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2754 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
2755 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SET_CELLICON
;
2757 mpte
->mpte_used_cell
= 1;
2759 mpte
->mpte_mppcb
->mpp_flags
|= MPP_UNSET_CELLICON
;
2761 mpte
->mpte_used_wifi
= 1;
2765 * Don't propagate EWOULDBLOCK - it's already taken care of
2766 * in mptcp_usr_send for TFO.
2770 mptcplog((LOG_ERR
, "%s: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
2771 __func__
, mpts
->mpts_connid
, error
, tot_sent
, so
->so_flags
, so
->so_state
, so
->so_error
, so
->so_snd
.sb_hiwat
, so
->so_snd
.sb_lowat
),
2772 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2777 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
2779 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_OUTPUT
);
2783 /* Opting to call pru_send as no mbuf at subflow level */
2784 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, NULL
, NULL
,
2785 NULL
, current_proc());
2791 mptcp_add_reinjectq(struct mptses
*mpte
, struct mbuf
*m
)
2793 struct mbuf
*n
, *prev
= NULL
;
2795 mptcplog((LOG_DEBUG
, "%s reinjecting dsn %u dlen %u rseq %u\n",
2796 __func__
, (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
2797 m
->m_pkthdr
.mp_rseq
),
2798 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2800 n
= mpte
->mpte_reinjectq
;
2802 /* First, look for an mbuf n, whose data-sequence-number is bigger or
2803 * equal than m's sequence number.
2806 if (MPTCP_SEQ_GEQ(n
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_dsn
))
2815 /* m is already fully covered by the next mbuf in the queue */
2816 if (n
->m_pkthdr
.mp_dsn
== m
->m_pkthdr
.mp_dsn
&&
2817 n
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_rlen
) {
2818 mptcplog((LOG_DEBUG
, "%s fully covered with len %u\n",
2819 __func__
, n
->m_pkthdr
.mp_rlen
),
2820 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2824 /* m is covering the next mbuf entirely, thus we remove this guy */
2825 if (m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
>= n
->m_pkthdr
.mp_dsn
+ n
->m_pkthdr
.mp_rlen
) {
2826 struct mbuf
*tmp
= n
->m_nextpkt
;
2828 mptcplog((LOG_DEBUG
, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
2829 __func__
, m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
2830 n
->m_pkthdr
.mp_dsn
, n
->m_pkthdr
.mp_rlen
),
2831 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2833 m
->m_nextpkt
= NULL
;
2835 mpte
->mpte_reinjectq
= tmp
;
2837 prev
->m_nextpkt
= tmp
;
2846 /* m is already fully covered by the previous mbuf in the queue */
2847 if (prev
->m_pkthdr
.mp_dsn
+ prev
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.len
) {
2848 mptcplog((LOG_DEBUG
, "%s prev covers us from %u with len %u\n",
2849 __func__
, prev
->m_pkthdr
.mp_dsn
, prev
->m_pkthdr
.mp_rlen
),
2850 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2856 mpte
->mpte_reinjectq
= m
;
2858 prev
->m_nextpkt
= m
;
2869 static struct mbuf
*
2870 mptcp_lookup_dsn(struct mptses
*mpte
, uint64_t dsn
)
2872 struct socket
*mp_so
= mptetoso(mpte
);
2875 m
= mp_so
->so_snd
.sb_mb
;
2878 /* If this segment covers what we are looking for, return it. */
2879 if (MPTCP_SEQ_LEQ(m
->m_pkthdr
.mp_dsn
, dsn
) &&
2880 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, dsn
))
2884 /* Segment is no more in the queue */
2885 if (MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
, dsn
))
2894 static struct mbuf
*
2895 mptcp_copy_mbuf_list(struct mbuf
*m
, int len
)
2897 struct mbuf
*top
= NULL
, *tail
= NULL
;
2899 uint32_t dlen
, rseq
;
2901 dsn
= m
->m_pkthdr
.mp_dsn
;
2902 dlen
= m
->m_pkthdr
.mp_rlen
;
2903 rseq
= m
->m_pkthdr
.mp_rseq
;
2908 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
2910 n
= m_copym_mode(m
, 0, m
->m_len
, M_DONTWAIT
, M_COPYM_MUST_COPY_HDR
);
2912 mptcplog((LOG_ERR
, "%s m_copym_mode returned NULL\n", __func__
),
2913 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
2917 VERIFY(n
->m_flags
& M_PKTHDR
);
2918 VERIFY(n
->m_next
== NULL
);
2919 VERIFY(n
->m_pkthdr
.mp_dsn
== dsn
);
2920 VERIFY(n
->m_pkthdr
.mp_rlen
== dlen
);
2921 VERIFY(n
->m_pkthdr
.mp_rseq
== rseq
);
2922 VERIFY(n
->m_len
== m
->m_len
);
2924 n
->m_pkthdr
.pkt_flags
|= (PKTF_MPSO
| PKTF_MPTCP
);
2948 mptcp_reinject_mbufs(struct socket
*so
)
2950 struct tcpcb
*tp
= sototcpcb(so
);
2951 struct mptsub
*mpts
= tp
->t_mpsub
;
2952 struct mptcb
*mp_tp
= tptomptp(tp
);
2953 struct mptses
*mpte
= mp_tp
->mpt_mpte
;;
2954 struct sockbuf
*sb
= &so
->so_snd
;
2959 struct mbuf
*n
= m
->m_next
, *orig
= m
;
2961 mptcplog((LOG_DEBUG
, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
2962 __func__
, tp
->snd_una
, m
->m_pkthdr
.mp_rseq
, mpts
->mpts_iss
,
2963 m
->m_pkthdr
.mp_rlen
, m
->m_pkthdr
.pkt_flags
),
2964 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2966 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
2968 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_REINJ
)
2971 /* Has it all already been acknowledged at the data-level? */
2972 if (MPTCP_SEQ_GEQ(mp_tp
->mpt_snduna
, m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
))
2975 /* Part of this has already been acknowledged - lookup in the
2976 * MPTCP-socket for the segment.
2978 if (SEQ_GT(tp
->snd_una
- mpts
->mpts_iss
, m
->m_pkthdr
.mp_rseq
)) {
2979 m
= mptcp_lookup_dsn(mpte
, m
->m_pkthdr
.mp_dsn
);
2984 /* Copy the mbuf with headers (aka, DSN-numbers) */
2985 m
= mptcp_copy_mbuf_list(m
, m
->m_pkthdr
.mp_rlen
);
2989 VERIFY(m
->m_nextpkt
== NULL
);
2991 /* Now, add to the reinject-queue, eliminating overlapping
2994 mptcp_add_reinjectq(mpte
, m
);
2996 orig
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
2999 /* mp_rlen can cover multiple mbufs, so advance to the end of it. */
3001 VERIFY((n
->m_flags
& M_PKTHDR
) && (n
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
3003 if (n
->m_pkthdr
.mp_dsn
!= orig
->m_pkthdr
.mp_dsn
)
3006 n
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
3015 mptcp_clean_reinjectq(struct mptses
*mpte
)
3017 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3019 mpte_lock_assert_held(mpte
);
3021 while (mpte
->mpte_reinjectq
) {
3022 struct mbuf
*m
= mpte
->mpte_reinjectq
;
3024 if (MPTCP_SEQ_GEQ(m
->m_pkthdr
.mp_dsn
, mp_tp
->mpt_snduna
) ||
3025 MPTCP_SEQ_GEQ(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, mp_tp
->mpt_snduna
))
3028 mpte
->mpte_reinjectq
= m
->m_nextpkt
;
3029 m
->m_nextpkt
= NULL
;
3035 * Subflow socket control event upcall.
3038 mptcp_subflow_eupcall1(struct socket
*so
, void *arg
, uint32_t events
)
3041 struct mptsub
*mpts
= arg
;
3042 struct mptses
*mpte
= mpts
->mpts_mpte
;
3044 VERIFY(mpte
!= NULL
);
3045 mpte_lock_assert_held(mpte
);
3047 if ((mpts
->mpts_evctl
& events
) == events
)
3050 mpts
->mpts_evctl
|= events
;
3052 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
3053 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WORKLOOP
;
3057 mptcp_subflow_workloop(mpte
);
3061 * Subflow socket control events.
3063 * Called for handling events related to the underlying subflow socket.
3066 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
,
3067 uint64_t *p_mpsofilt_hint
)
3069 ev_ret_t ret
= MPTS_EVRET_OK
;
3070 int i
, mpsub_ev_entry_count
= sizeof(mpsub_ev_entry_tbl
) /
3071 sizeof(mpsub_ev_entry_tbl
[0]);
3073 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3075 /* bail if there's nothing to process */
3076 if (!mpts
->mpts_evctl
)
3079 if (mpts
->mpts_evctl
& (SO_FILT_HINT_CONNRESET
|SO_FILT_HINT_MUSTRST
|
3080 SO_FILT_HINT_CANTSENDMORE
|SO_FILT_HINT_TIMEOUT
|
3081 SO_FILT_HINT_NOSRCADDR
|SO_FILT_HINT_IFDENIED
|
3082 SO_FILT_HINT_DISCONNECTED
)) {
3083 mpts
->mpts_evctl
|= SO_FILT_HINT_MPFAILOVER
;
3086 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
3087 struct mptsub
*, mpts
, uint32_t, mpts
->mpts_evctl
);
3089 mptcplog((LOG_DEBUG
, "%s cid %d events=%b\n", __func__
,
3090 mpts
->mpts_connid
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3091 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3094 * Process all the socket filter hints and reset the hint
3095 * once it is handled
3097 for (i
= 0; i
< mpsub_ev_entry_count
&& mpts
->mpts_evctl
; i
++) {
3099 * Always execute the DISCONNECTED event, because it will wakeup
3102 if ((mpts
->mpts_evctl
& mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
) &&
3103 (ret
>= MPTS_EVRET_OK
||
3104 mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
== SO_FILT_HINT_DISCONNECTED
)) {
3105 mpts
->mpts_evctl
&= ~mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
;
3107 mpsub_ev_entry_tbl
[i
].sofilt_hint_ev_hdlr(mpte
, mpts
, p_mpsofilt_hint
, mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
);
3108 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
3113 * We should be getting only events specified via sock_catchevents(),
3114 * so loudly complain if we have any unprocessed one(s).
3116 if (mpts
->mpts_evctl
|| ret
< MPTS_EVRET_OK
)
3117 mptcplog((LOG_WARNING
, "%s%s: cid %d evret %s (%d) unhandled events=%b\n", __func__
,
3118 (mpts
->mpts_evctl
&& ret
== MPTS_EVRET_OK
) ? "MPTCP_ERROR " : "",
3120 mptcp_evret2str(ret
), ret
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3121 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3123 mptcplog((LOG_DEBUG
, "%s: Done, events %b\n", __func__
,
3124 mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3125 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3131 mptcp_subflow_propagate_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3132 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3134 struct socket
*mp_so
, *so
;
3135 struct mptcb
*mp_tp
;
3137 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3138 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3139 mp_so
= mptetoso(mpte
);
3140 mp_tp
= mpte
->mpte_mptcb
;
3141 so
= mpts
->mpts_socket
;
3143 mptcplog((LOG_DEBUG
, "%s: cid %d event %d\n", __func__
,
3144 mpts
->mpts_connid
, event
),
3145 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3148 * We got an event for this subflow that might need to be propagated,
3149 * based on the state of the MPTCP connection.
3151 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3152 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
3153 mp_so
->so_error
= so
->so_error
;
3154 *p_mpsofilt_hint
|= event
;
3157 return (MPTS_EVRET_OK
);
3161 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
3164 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3165 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3167 #pragma unused(p_mpsofilt_hint, event)
3168 struct socket
*mp_so
;
3171 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3173 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3174 mp_so
= mptetoso(mpte
);
3175 tp
= intotcpcb(sotoinpcb(mpts
->mpts_socket
));
3178 * This overwrites any previous mpte_lost_aid to avoid storing
3179 * too much state when the typical case has only two subflows.
3181 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
3182 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
3184 mptcplog((LOG_DEBUG
, "%s cid %d\n", __func__
, mpts
->mpts_connid
),
3185 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3188 * The subflow connection has lost its source address.
3190 mptcp_subflow_abort(mpts
, EADDRNOTAVAIL
);
3192 if (mp_so
->so_flags
& SOF_NOADDRAVAIL
)
3193 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3195 return (MPTS_EVRET_DELETE
);
3199 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
3200 * indicates that the remote side sent a Data FIN
3203 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3204 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3206 #pragma unused(event)
3207 struct mptcb
*mp_tp
;
3209 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3210 mp_tp
= mpte
->mpte_mptcb
;
3212 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
3213 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3216 * We got a Data FIN for the MPTCP connection.
3217 * The FIN may arrive with data. The data is handed up to the
3218 * mptcp socket and the user is notified so that it may close
3219 * the socket if needed.
3221 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
)
3222 *p_mpsofilt_hint
|= SO_FILT_HINT_CANTRCVMORE
;
3224 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
3228 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
3231 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3232 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3234 #pragma unused(event, p_mpsofilt_hint)
3235 struct mptsub
*mpts_alt
= NULL
;
3236 struct socket
*alt_so
= NULL
;
3237 struct socket
*mp_so
;
3238 int altpath_exists
= 0;
3240 mpte_lock_assert_held(mpte
);
3241 mp_so
= mptetoso(mpte
);
3242 mptcplog((LOG_NOTICE
, "%s: mp_so 0x%llx\n", __func__
,
3243 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
3244 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3246 mptcp_reinject_mbufs(mpts
->mpts_socket
);
3248 mpts_alt
= mptcp_get_subflow(mpte
, mpts
, NULL
);
3250 * If there is no alternate eligible subflow, ignore the
3253 if (mpts_alt
== NULL
) {
3254 mptcplog((LOG_WARNING
, "%s: no alternate path\n", __func__
),
3255 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3261 alt_so
= mpts_alt
->mpts_socket
;
3262 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
3263 /* All data acknowledged and no RTT spike */
3264 if (alt_so
->so_snd
.sb_cc
== 0 && mptcp_no_rto_spike(alt_so
)) {
3265 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
3267 /* no alternate path available */
3272 if (altpath_exists
) {
3273 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
3275 mpte
->mpte_active_sub
= mpts_alt
;
3276 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
3277 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
3279 mptcplog((LOG_NOTICE
, "%s: switched from %d to %d\n",
3280 __func__
, mpts
->mpts_connid
, mpts_alt
->mpts_connid
),
3281 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3283 mptcpstats_inc_switch(mpte
, mpts
);
3287 mptcplog((LOG_DEBUG
, "%s: no alt cid = %d\n", __func__
,
3289 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3291 mpts
->mpts_socket
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
3294 return (MPTS_EVRET_OK
);
3298 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
3301 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3302 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3304 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3305 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3307 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
,
3308 mpts
->mpts_connid
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3311 * The subflow connection cannot use the outgoing interface, let's
3312 * close this subflow.
3314 mptcp_subflow_abort(mpts
, EPERM
);
3316 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3318 return (MPTS_EVRET_DELETE
);
3322 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
3325 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3326 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3328 #pragma unused(event, p_mpsofilt_hint)
3329 struct socket
*mp_so
, *so
;
3332 struct mptcb
*mp_tp
;
3334 boolean_t mpok
= FALSE
;
3336 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3337 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3339 mp_so
= mptetoso(mpte
);
3340 mp_tp
= mpte
->mpte_mptcb
;
3341 so
= mpts
->mpts_socket
;
3343 af
= mpts
->mpts_dst
.sa_family
;
3345 if (mpts
->mpts_flags
& MPTSF_CONNECTED
)
3346 return (MPTS_EVRET_OK
);
3348 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
3349 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
3350 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
3351 (so
->so_state
& SS_ISCONNECTED
)) {
3352 mptcplog((LOG_DEBUG
, "%s: cid %d disconnect before tcp connect\n",
3353 __func__
, mpts
->mpts_connid
),
3354 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3355 (void) soshutdownlock(so
, SHUT_RD
);
3356 (void) soshutdownlock(so
, SHUT_WR
);
3357 (void) sodisconnectlocked(so
);
3359 return (MPTS_EVRET_OK
);
3363 * The subflow connection has been connected. Find out whether it
3364 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
3366 * a. If MPTCP connection is not yet established, then this must be
3367 * the first subflow connection. If MPTCP failed to negotiate,
3368 * fallback to regular TCP by degrading this subflow.
3370 * b. If MPTCP connection has been established, then this must be
3371 * one of the subsequent subflow connections. If MPTCP failed
3372 * to negotiate, disconnect the connection.
3374 * Right now, we simply unblock any waiters at the MPTCP socket layer
3375 * if the MPTCP connection has not been established.
3378 if (so
->so_state
& SS_ISDISCONNECTED
) {
3380 * With MPTCP joins, a connection is connected at the subflow
3381 * level, but the 4th ACK from the server elevates the MPTCP
3382 * subflow to connected state. So there is a small window
3383 * where the subflow could get disconnected before the
3384 * connected event is processed.
3386 return (MPTS_EVRET_OK
);
3389 if (mpts
->mpts_flags
& MPTSF_TFO_REQD
)
3390 mptcp_drop_tfo_data(mpte
, mpts
);
3392 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_TFO_REQD
);
3393 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
3395 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
)
3396 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
3398 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
3400 /* get/verify the outbound interface */
3401 inp
= sotoinpcb(so
);
3403 mpts
->mpts_maxseg
= tp
->t_maxseg
;
3405 mptcplog((LOG_DEBUG
, "%s: cid %d outif %s is %s\n", __func__
, mpts
->mpts_connid
,
3406 ((inp
->inp_last_outifp
!= NULL
) ? inp
->inp_last_outifp
->if_xname
: "NULL"),
3407 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ? "MPTCP capable" : "a regular TCP")),
3408 (MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
), MPTCP_LOGLVL_LOG
);
3410 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
3412 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
3413 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
3414 mpte
->mpte_associd
= mpts
->mpts_connid
;
3415 DTRACE_MPTCP2(state__change
,
3416 struct mptcb
*, mp_tp
,
3417 uint32_t, 0 /* event */);
3419 if (SOCK_DOM(so
) == AF_INET
) {
3420 in_getsockaddr_s(so
, &mpte
->__mpte_src_v4
);
3422 in6_getsockaddr_s(so
, &mpte
->__mpte_src_v6
);
3425 /* case (a) above */
3427 tcpstat
.tcps_mpcap_fallback
++;
3429 tp
->t_mpflags
|= TMPF_INFIN_SENT
;
3430 mptcp_notify_mpfail(so
);
3432 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
3433 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_AGGREGATE
) {
3434 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
3436 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
3438 mpts
->mpts_flags
|= MPTSF_ACTIVE
;
3440 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
3441 mpte
->mpte_nummpcapflows
++;
3443 mptcp_check_subflows_and_add(mpte
);
3445 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
))
3446 mpte
->mpte_initial_cell
= 1;
3448 mpte
->mpte_handshake_success
= 1;
3451 mp_tp
->mpt_sndwnd
= tp
->snd_wnd
;
3452 mp_tp
->mpt_sndwl1
= mp_tp
->mpt_rcvnxt
;
3453 mp_tp
->mpt_sndwl2
= mp_tp
->mpt_snduna
;
3454 soisconnected(mp_so
);
3456 mptcplog((LOG_DEBUG
, "%s: MPTCPS_ESTABLISHED for mp_so 0x%llx mpok %u\n",
3457 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpok
),
3458 MPTCP_STATE_DBG
, MPTCP_LOGLVL_LOG
);
3462 * In case of additional flows, the MPTCP socket is not
3463 * MPTSF_MP_CAPABLE until an ACK is received from server
3464 * for 3-way handshake. TCP would have guaranteed that this
3465 * is an MPTCP subflow.
3467 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
3468 !(tp
->t_mpflags
& TMPF_BACKUP_PATH
) &&
3469 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_AGGREGATE
) {
3470 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
3471 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
3473 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
3476 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
3477 mpte
->mpte_nummpcapflows
++;
3479 mpts
->mpts_rel_seq
= 1;
3481 mptcp_check_subflows_and_remove(mpte
);
3485 /* Mark this interface as non-MPTCP */
3486 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
3487 struct mpt_itf_info
*info
= &mpte
->mpte_itfinfo
[i
];
3489 if (inp
->inp_last_outifp
->if_index
== info
->ifindex
) {
3490 info
->no_mptcp_support
= 1;
3495 tcpstat
.tcps_join_fallback
++;
3496 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
))
3497 tcpstat
.tcps_mptcp_cell_proxy
++;
3499 tcpstat
.tcps_mptcp_wifi_proxy
++;
3501 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
3503 return (MPTS_EVRET_OK
);
3506 /* This call, just to "book" an entry in the stats-table for this ifindex */
3507 mptcp_get_statsindex(mpte
->mpte_itfstats
, mpts
);
3511 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
3515 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
3518 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3519 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3521 #pragma unused(event, p_mpsofilt_hint)
3522 struct socket
*mp_so
, *so
;
3523 struct mptcb
*mp_tp
;
3525 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3526 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3527 mp_so
= mptetoso(mpte
);
3528 mp_tp
= mpte
->mpte_mptcb
;
3529 so
= mpts
->mpts_socket
;
3531 mptcplog((LOG_DEBUG
, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
3532 __func__
, mpts
->mpts_connid
, so
->so_error
, mp_tp
->mpt_state
,
3533 !!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
),
3534 !!(mpts
->mpts_flags
& MPTSF_ACTIVE
), sototcpcb(so
)->t_mpflags
),
3535 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3537 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
3538 return (MPTS_EVRET_DELETE
);
3540 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
3542 /* The subflow connection has been disconnected. */
3544 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
3545 mpte
->mpte_nummpcapflows
--;
3546 if (mpte
->mpte_active_sub
== mpts
) {
3547 mpte
->mpte_active_sub
= NULL
;
3548 mptcplog((LOG_DEBUG
, "%s: resetting active subflow \n",
3549 __func__
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3551 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
3554 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3555 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
)) ||
3556 (sototcpcb(so
)->t_mpflags
& TMPF_FASTCLOSERCV
)) {
3557 mptcp_drop(mpte
, mp_tp
, so
->so_error
);
3561 * Clear flags that are used by getconninfo to return state.
3562 * Retain like MPTSF_DELETEOK for internal purposes.
3564 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
|MPTSF_CONNECT_PENDING
|
3565 MPTSF_CONNECTED
|MPTSF_DISCONNECTING
|MPTSF_PREFERRED
|
3566 MPTSF_MP_CAPABLE
|MPTSF_MP_READY
|MPTSF_MP_DEGRADED
|MPTSF_ACTIVE
);
3568 return (MPTS_EVRET_DELETE
);
3572 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
3575 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3576 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3578 #pragma unused(event, p_mpsofilt_hint)
3579 struct socket
*mp_so
, *so
;
3580 struct mptcb
*mp_tp
;
3581 ev_ret_t ret
= MPTS_EVRET_OK
;
3583 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3584 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3585 mp_so
= mptetoso(mpte
);
3586 mp_tp
= mpte
->mpte_mptcb
;
3587 so
= mpts
->mpts_socket
;
3589 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
)
3590 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
3592 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
3594 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
3595 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
3597 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
3600 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
3602 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
)
3603 mpts
->mpts_flags
|= MPTSF_MP_READY
;
3605 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
3607 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3608 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
3609 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
3612 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
3613 VERIFY(!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
));
3614 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
3615 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
3616 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
3617 ret
= MPTS_EVRET_CONNECT_PENDING
;
3620 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx mpt_flags=%b cid %d mptsf=%b\n",
3621 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3622 mp_tp
->mpt_flags
, MPTCPF_BITS
, mpts
->mpts_connid
,
3623 mpts
->mpts_flags
, MPTSF_BITS
),
3624 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3631 * Handle SO_FILT_HINT_MUSTRST subflow socket event
3634 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3635 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3637 #pragma unused(event)
3638 struct socket
*mp_so
, *so
;
3639 struct mptcb
*mp_tp
;
3640 boolean_t is_fastclose
;
3642 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3643 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3644 mp_so
= mptetoso(mpte
);
3645 mp_tp
= mpte
->mpte_mptcb
;
3646 so
= mpts
->mpts_socket
;
3648 /* We got an invalid option or a fast close */
3649 struct tcptemp
*t_template
;
3650 struct inpcb
*inp
= sotoinpcb(so
);
3651 struct tcpcb
*tp
= NULL
;
3653 tp
= intotcpcb(inp
);
3654 so
->so_error
= ECONNABORTED
;
3656 is_fastclose
= !!(tp
->t_mpflags
& TMPF_FASTCLOSERCV
);
3658 t_template
= tcp_maketemplate(tp
);
3660 struct tcp_respond_args tra
;
3662 bzero(&tra
, sizeof(tra
));
3663 if (inp
->inp_flags
& INP_BOUND_IF
)
3664 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
3666 tra
.ifscope
= IFSCOPE_NONE
;
3667 tra
.awdl_unrestricted
= 1;
3669 tcp_respond(tp
, t_template
->tt_ipgen
,
3670 &t_template
->tt_t
, (struct mbuf
*)NULL
,
3671 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
3672 (void) m_free(dtom(t_template
));
3673 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3674 "%s: mp_so 0x%llx cid %d \n",
3675 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3676 so
, mpts
->mpts_connid
),
3677 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3679 mptcp_subflow_abort(mpts
, ECONNABORTED
);
3681 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && is_fastclose
) {
3682 *p_mpsofilt_hint
|= SO_FILT_HINT_CONNRESET
;
3684 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
)
3685 mp_so
->so_error
= ECONNABORTED
;
3687 mp_so
->so_error
= ECONNRESET
;
3690 * mptcp_drop is being called after processing the events, to fully
3691 * close the MPTCP connection
3695 if (mp_tp
->mpt_gc_ticks
== MPT_GC_TICKS
)
3696 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS_FAST
;
3698 return (MPTS_EVRET_DELETE
);
3702 mptcp_subflow_adaptive_rtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3703 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3705 #pragma unused(event)
3706 bool found_active
= false;
3708 mpts
->mpts_flags
|= MPTSF_READ_STALL
;
3710 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
3711 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
3713 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
3714 TCPS_HAVERCVDFIN2(tp
->t_state
))
3717 if (!(mpts
->mpts_flags
& MPTSF_READ_STALL
)) {
3718 found_active
= true;
3724 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_RTIMO
;
3726 return (MPTS_EVRET_OK
);
3730 mptcp_subflow_adaptive_wtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3731 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3733 #pragma unused(event)
3734 bool found_active
= false;
3736 mpts
->mpts_flags
|= MPTSF_WRITE_STALL
;
3738 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
3739 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
3741 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
3742 tp
->t_state
> TCPS_CLOSE_WAIT
)
3745 if (!(mpts
->mpts_flags
& MPTSF_WRITE_STALL
)) {
3746 found_active
= true;
3752 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_WTIMO
;
3754 return (MPTS_EVRET_OK
);
3758 mptcp_evret2str(ev_ret_t ret
)
3760 const char *c
= "UNKNOWN";
3763 case MPTS_EVRET_DELETE
:
3764 c
= "MPTS_EVRET_DELETE";
3766 case MPTS_EVRET_CONNECT_PENDING
:
3767 c
= "MPTS_EVRET_CONNECT_PENDING";
3769 case MPTS_EVRET_DISCONNECT_FALLBACK
:
3770 c
= "MPTS_EVRET_DISCONNECT_FALLBACK";
3773 c
= "MPTS_EVRET_OK";
3782 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
3783 * caller must ensure that the option can be issued on subflow sockets, via
3784 * MPOF_SUBFLOW_OK flag.
3787 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct mptsub
*mpts
, struct mptopt
*mpo
)
3789 struct socket
*mp_so
, *so
;
3790 struct sockopt sopt
;
3793 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3794 mpte_lock_assert_held(mpte
);
3796 mp_so
= mptetoso(mpte
);
3797 so
= mpts
->mpts_socket
;
3799 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
&&
3800 mpo
->mpo_level
== SOL_SOCKET
&&
3801 mpo
->mpo_name
== SO_MARK_CELLFALLBACK
) {
3802 mptcplog((LOG_DEBUG
, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %u lastcell? %d boundcell? %d\n",
3803 __func__
, mpte
->mpte_flags
, mpte
->mpte_svctype
, mptcp_is_wifi_unusable(),
3804 sotoinpcb(so
)->inp_last_outifp
? IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
) : -1,
3805 mpts
->mpts_ifscope
!= IFSCOPE_NONE
? IFNET_IS_CELLULAR(ifindex2ifnet
[mpts
->mpts_ifscope
]) : -1),
3806 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3809 * When we open a new subflow, mark it as cell fallback, if
3810 * this subflow goes over cell.
3812 * (except for first-party apps)
3815 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
)
3818 if (sotoinpcb(so
)->inp_last_outifp
&&
3819 !IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
))
3823 * This here is an OR, because if the app is not binding to the
3824 * interface, then it definitely is not a cell-fallback
3827 if (mpts
->mpts_ifscope
== IFSCOPE_NONE
||
3828 !IFNET_IS_CELLULAR(ifindex2ifnet
[mpts
->mpts_ifscope
]))
3832 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
3834 bzero(&sopt
, sizeof (sopt
));
3835 sopt
.sopt_dir
= SOPT_SET
;
3836 sopt
.sopt_level
= mpo
->mpo_level
;
3837 sopt
.sopt_name
= mpo
->mpo_name
;
3838 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3839 sopt
.sopt_valsize
= sizeof (int);
3840 sopt
.sopt_p
= kernproc
;
3842 error
= sosetoptlock(so
, &sopt
, 0);
3844 mptcplog((LOG_INFO
, "%s: mp_so 0x%llx sopt %s "
3845 "val %d set successful\n", __func__
,
3846 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3847 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
3849 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
3851 mptcplog((LOG_ERR
, "%s:mp_so 0x%llx sopt %s "
3852 "val %d set error %d\n", __func__
,
3853 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3854 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
3855 mpo
->mpo_intval
, error
),
3856 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
3862 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
3863 * caller must ensure that the option can be issued on subflow sockets, via
3864 * MPOF_SUBFLOW_OK flag.
3867 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
3870 struct socket
*mp_so
;
3871 struct sockopt sopt
;
3874 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3875 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3876 mp_so
= mptetoso(mpte
);
3878 bzero(&sopt
, sizeof (sopt
));
3879 sopt
.sopt_dir
= SOPT_GET
;
3880 sopt
.sopt_level
= mpo
->mpo_level
;
3881 sopt
.sopt_name
= mpo
->mpo_name
;
3882 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3883 sopt
.sopt_valsize
= sizeof (int);
3884 sopt
.sopt_p
= kernproc
;
3886 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
3888 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3889 "%s: mp_so 0x%llx sopt %s "
3890 "val %d get successful\n", __func__
,
3891 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3892 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
3894 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3896 mptcplog((LOG_ERR
, "MPTCP Socket: "
3897 "%s: mp_so 0x%llx sopt %s get error %d\n",
3898 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3899 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
), error
),
3900 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
3907 * MPTCP garbage collector.
3909 * This routine is called by the MP domain on-demand, periodic callout,
3910 * which is triggered when a MPTCP socket is closed. The callout will
3911 * repeat as long as this routine returns a non-zero value.
3914 mptcp_gc(struct mppcbinfo
*mppi
)
3916 struct mppcb
*mpp
, *tmpp
;
3917 uint32_t active
= 0;
3919 LCK_MTX_ASSERT(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
3921 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
3922 struct socket
*mp_so
;
3923 struct mptses
*mpte
;
3924 struct mptcb
*mp_tp
;
3926 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
3927 mp_so
= mpp
->mpp_socket
;
3928 VERIFY(mp_so
!= NULL
);
3929 mpte
= mptompte(mpp
);
3930 VERIFY(mpte
!= NULL
);
3931 mp_tp
= mpte
->mpte_mptcb
;
3932 VERIFY(mp_tp
!= NULL
);
3934 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3935 "%s: mp_so 0x%llx found "
3936 "(u=%d,r=%d,s=%d)\n", __func__
,
3937 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mp_so
->so_usecount
,
3938 mp_so
->so_retaincnt
, mpp
->mpp_state
),
3939 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3941 if (!mpte_try_lock(mpte
)) {
3942 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3943 "%s: mp_so 0x%llx skipped lock "
3944 "(u=%d,r=%d)\n", __func__
,
3945 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3946 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3947 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3952 /* check again under the lock */
3953 if (mp_so
->so_usecount
> 0) {
3954 boolean_t wakeup
= FALSE
;
3955 struct mptsub
*mpts
, *tmpts
;
3957 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3958 "%s: mp_so 0x%llx skipped usecount "
3959 "[u=%d,r=%d] %d %d\n", __func__
,
3960 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3961 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3962 mp_tp
->mpt_gc_ticks
,
3964 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3966 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
3967 if (mp_tp
->mpt_gc_ticks
> 0)
3968 mp_tp
->mpt_gc_ticks
--;
3969 if (mp_tp
->mpt_gc_ticks
== 0) {
3974 TAILQ_FOREACH_SAFE(mpts
,
3975 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3976 mptcp_subflow_eupcall1(mpts
->mpts_socket
,
3977 mpts
, SO_FILT_HINT_DISCONNECTED
);
3985 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
3986 panic("MPTCP Socket: %s: mp_so 0x%llx skipped state "
3987 "[u=%d,r=%d,s=%d]\n", __func__
,
3988 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3989 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3993 if (mp_tp
->mpt_state
== MPTCPS_TIME_WAIT
)
3994 mptcp_close(mpte
, mp_tp
);
3996 mptcp_session_destroy(mpte
);
3998 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3999 "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
4000 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
4001 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
4002 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4004 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
4005 struct sockbuf
*, &mp_so
->so_rcv
,
4006 struct sockbuf
*, &mp_so
->so_snd
,
4007 struct mppcb
*, mpp
);
4017 * Drop a MPTCP connection, reporting the specified error.
4020 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, int errno
)
4022 struct socket
*mp_so
;
4024 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
4025 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4026 mp_so
= mptetoso(mpte
);
4028 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
4029 uint32_t, 0 /* event */);
4031 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0)
4032 errno
= mp_tp
->mpt_softerror
;
4033 mp_so
->so_error
= errno
;
4035 return (mptcp_close(mpte
, mp_tp
));
4039 * Close a MPTCP control block.
4042 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
4044 struct socket
*mp_so
= NULL
;
4045 struct mptsub
*mpts
= NULL
, *tmpts
= NULL
;
4047 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
4048 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4049 mp_so
= mptetoso(mpte
);
4051 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
4055 soisdisconnected(mp_so
);
4057 /* Clean up all subflows */
4058 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4059 mptcp_subflow_disconnect(mpte
, mpts
);
4066 mptcp_notify_close(struct socket
*so
)
4068 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
4075 mptcp_subflow_workloop(struct mptses
*mpte
)
4077 struct socket
*mp_so
;
4078 struct mptsub
*mpts
, *tmpts
;
4079 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
4080 uint64_t mpsofilt_hint_mask
= SO_FILT_HINT_LOCKED
;
4082 mpte_lock_assert_held(mpte
);
4083 VERIFY(mpte
->mpte_mppcb
!= NULL
);
4084 mp_so
= mptetoso(mpte
);
4085 VERIFY(mp_so
!= NULL
);
4087 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4090 if (mpts
->mpts_socket
->so_usecount
== 0) {
4091 /* Will be removed soon by tcp_garbage_collect */
4095 mptcp_subflow_addref(mpts
);
4096 mpts
->mpts_socket
->so_usecount
++;
4098 ret
= mptcp_subflow_events(mpte
, mpts
, &mpsofilt_hint_mask
);
4101 * If MPTCP socket is closed, disconnect all subflows.
4102 * This will generate a disconnect event which will
4103 * be handled during the next iteration, causing a
4104 * non-zero error to be returned above.
4106 if (mp_so
->so_flags
& SOF_PCBCLEARING
)
4107 mptcp_subflow_disconnect(mpte
, mpts
);
4113 case MPTS_EVRET_DELETE
:
4114 mptcp_subflow_soclose(mpts
);
4116 case MPTS_EVRET_CONNECT_PENDING
:
4117 connect_pending
= TRUE
;
4119 case MPTS_EVRET_DISCONNECT_FALLBACK
:
4120 disconnect_fallback
= TRUE
;
4123 mptcplog((LOG_DEBUG
,
4124 "MPTCP Socket: %s: mptcp_subflow_events "
4125 "returned invalid value: %d\n", __func__
,
4127 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4130 mptcp_subflow_remref(mpts
); /* ours */
4132 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
4133 mpts
->mpts_socket
->so_usecount
--;
4136 if (mpsofilt_hint_mask
!= SO_FILT_HINT_LOCKED
) {
4137 VERIFY(mpsofilt_hint_mask
& SO_FILT_HINT_LOCKED
);
4139 soevent(mp_so
, mpsofilt_hint_mask
);
4142 if (!connect_pending
&& !disconnect_fallback
)
4145 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4146 if (disconnect_fallback
) {
4147 struct socket
*so
= NULL
;
4148 struct inpcb
*inp
= NULL
;
4149 struct tcpcb
*tp
= NULL
;
4151 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
4154 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4156 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
4157 MPTSF_DISCONNECTED
|MPTSF_CONNECT_PENDING
))
4160 so
= mpts
->mpts_socket
;
4163 * The MPTCP connection has degraded to a fallback
4164 * mode, so there is no point in keeping this subflow
4165 * regardless of its MPTCP-readiness state, unless it
4166 * is the primary one which we use for fallback. This
4167 * assumes that the subflow used for fallback is the
4171 inp
= sotoinpcb(so
);
4172 tp
= intotcpcb(inp
);
4174 ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
4175 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
4177 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
4180 tp
->t_mpflags
|= TMPF_RESET
;
4181 soevent(so
, SO_FILT_HINT_MUSTRST
);
4182 } else if (connect_pending
) {
4184 * The MPTCP connection has progressed to a state
4185 * where it supports full multipath semantics; allow
4186 * additional joins to be attempted for all subflows
4187 * that are in the PENDING state.
4189 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
4190 int error
= mptcp_subflow_soconnectx(mpte
, mpts
);
4193 mptcp_subflow_abort(mpts
, error
);
4200 * Protocol pr_lock callback.
4203 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
4205 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4209 lr_saved
= __builtin_return_address(0);
4214 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
4215 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
4220 if (mp_so
->so_usecount
< 0) {
4221 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
4222 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
4223 solockhistory_nr(mp_so
));
4227 mp_so
->so_usecount
++;
4228 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
4229 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
4235 * Protocol pr_unlock callback.
4238 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
4240 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4244 lr_saved
= __builtin_return_address(0);
4249 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
4250 mp_so
, mp_so
->so_usecount
, lr_saved
,
4251 solockhistory_nr(mp_so
));
4254 mpp_lock_assert_held(mpp
);
4257 mp_so
->so_usecount
--;
4259 if (mp_so
->so_usecount
< 0) {
4260 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4261 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4264 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
4265 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
4272 * Protocol pr_getlock callback.
4275 mptcp_getlock(struct socket
*mp_so
, int flags
)
4277 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4280 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
4281 solockhistory_nr(mp_so
));
4284 if (mp_so
->so_usecount
< 0) {
4285 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4286 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4289 return (mpp_getlock(mpp
, flags
));
4293 * MPTCP Join support
4297 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
,
4300 struct tcpcb
*tp
= sototcpcb(so
);
4301 struct mptcp_subf_auth_entry
*sauth_entry
;
4302 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4305 * The address ID of the first flow is implicitly 0.
4307 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
4308 tp
->t_local_aid
= 0;
4310 tp
->t_local_aid
= addr_id
;
4311 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
4312 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
4314 sauth_entry
= zalloc(mpt_subauth_zone
);
4315 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
4316 sauth_entry
->msae_raddr_id
= 0;
4317 sauth_entry
->msae_raddr_rand
= 0;
4319 sauth_entry
->msae_laddr_rand
= RandomULong();
4320 if (sauth_entry
->msae_laddr_rand
== 0)
4322 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
4326 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
4328 struct mptcp_subf_auth_entry
*sauth_entry
;
4329 struct tcpcb
*tp
= NULL
;
4336 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4337 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
4343 LIST_REMOVE(sauth_entry
, msae_next
);
4347 zfree(mpt_subauth_zone
, sauth_entry
);
4351 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
4354 struct mptcp_subf_auth_entry
*sauth_entry
;
4355 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4357 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4358 if (sauth_entry
->msae_laddr_id
== addr_id
) {
4360 *lrand
= sauth_entry
->msae_laddr_rand
;
4362 *rrand
= sauth_entry
->msae_raddr_rand
;
4369 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
4370 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
4372 struct mptcp_subf_auth_entry
*sauth_entry
;
4373 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4375 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4376 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
4377 if ((sauth_entry
->msae_raddr_id
!= 0) &&
4378 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
4379 mptcplog((LOG_ERR
, "MPTCP Socket: %s mismatched"
4380 " address ids %d %d \n", __func__
, raddr_id
,
4381 sauth_entry
->msae_raddr_id
),
4382 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4385 sauth_entry
->msae_raddr_id
= raddr_id
;
4386 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
4387 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
4388 mptcplog((LOG_ERR
, "MPTCP Socket: "
4389 "%s: dup SYN_ACK %d %d \n",
4390 __func__
, raddr_rand
,
4391 sauth_entry
->msae_raddr_rand
),
4392 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4395 sauth_entry
->msae_raddr_rand
= raddr_rand
;
4402 * SHA1 support for MPTCP
4405 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
)
4408 const unsigned char *sha1_base
;
4411 sha1_base
= (const unsigned char *) key
;
4412 sha1_size
= sizeof (mptcp_key_t
);
4413 SHA1Init(&sha1ctxt
);
4414 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
4415 SHA1Final(sha_digest
, &sha1ctxt
);
4419 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
4420 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
)
4423 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
4424 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
4428 bzero(digest
, SHA1_RESULTLEN
);
4430 /* Set up the Key for HMAC */
4437 /* Set up the message for HMAC */
4441 /* Key is 512 block length, so no need to compute hash */
4443 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
4445 for (i
= 0; i
< 8; i
++) {
4446 key_ipad
[i
] ^= 0x3636363636363636;
4447 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
4450 /* Perform inner SHA1 */
4451 SHA1Init(&sha1ctxt
);
4452 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof (key_ipad
));
4453 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof (data
));
4454 SHA1Final(digest
, &sha1ctxt
);
4456 /* Perform outer SHA1 */
4457 SHA1Init(&sha1ctxt
);
4458 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof (key_opad
));
4459 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
4460 SHA1Final(digest
, &sha1ctxt
);
4464 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
4465 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
4468 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
)
4470 uint32_t lrand
, rrand
;
4472 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4475 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
4476 mptcp_hmac_sha1(mp_tp
->mpt_localkey
, mp_tp
->mpt_remotekey
, lrand
, rrand
,
4481 * Authentication data generation
4484 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
4487 VERIFY(token_len
== sizeof (u_int32_t
));
4488 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4490 /* Most significant 32 bits of the SHA1 hash */
4491 bcopy(sha_digest
, token
, sizeof (u_int32_t
));
4496 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
4499 VERIFY(idsn_len
== sizeof (u_int64_t
));
4500 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4503 * Least significant 64 bits of the SHA1 hash
4506 idsn
[7] = sha_digest
[12];
4507 idsn
[6] = sha_digest
[13];
4508 idsn
[5] = sha_digest
[14];
4509 idsn
[4] = sha_digest
[15];
4510 idsn
[3] = sha_digest
[16];
4511 idsn
[2] = sha_digest
[17];
4512 idsn
[1] = sha_digest
[18];
4513 idsn
[0] = sha_digest
[19];
4518 mptcp_conn_properties(struct mptcb
*mp_tp
)
4520 /* There is only Version 0 at this time */
4521 mp_tp
->mpt_version
= MPTCP_STD_VERSION_0
;
4523 /* Set DSS checksum flag */
4525 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
4527 /* Set up receive window */
4528 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
4530 /* Set up gc ticks */
4531 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
4535 mptcp_init_local_parms(struct mptses
*mpte
)
4537 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
4538 char key_digest
[SHA1_RESULTLEN
];
4540 read_frandom(&mp_tp
->mpt_localkey
, sizeof(mp_tp
->mpt_localkey
));
4541 mptcp_do_sha1(&mp_tp
->mpt_localkey
, key_digest
);
4543 mptcp_generate_token(key_digest
, SHA1_RESULTLEN
,
4544 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof (mp_tp
->mpt_localtoken
));
4545 mptcp_generate_idsn(key_digest
, SHA1_RESULTLEN
,
4546 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof (u_int64_t
));
4548 /* The subflow SYN is also first MPTCP byte */
4549 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
4550 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
4552 mptcp_conn_properties(mp_tp
);
4556 mptcp_init_remote_parms(struct mptcb
*mp_tp
)
4558 char remote_digest
[SHA1_RESULTLEN
];
4559 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4561 /* Only Version 0 is supported for auth purposes */
4562 if (mp_tp
->mpt_version
!= MPTCP_STD_VERSION_0
)
4565 /* Setup local and remote tokens and Initial DSNs */
4566 mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
);
4567 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
4568 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof (mp_tp
->mpt_remotetoken
));
4569 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
4570 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof (u_int64_t
));
4571 mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
4577 mptcp_send_dfin(struct socket
*so
)
4579 struct tcpcb
*tp
= NULL
;
4580 struct inpcb
*inp
= NULL
;
4582 inp
= sotoinpcb(so
);
4586 tp
= intotcpcb(inp
);
4590 if (!(tp
->t_mpflags
& TMPF_RESET
))
4591 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
4595 * Data Sequence Mapping routines
4598 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
4600 struct mptcb
*mp_tp
;
4605 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
4606 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4609 VERIFY(m
->m_flags
& M_PKTHDR
);
4610 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
4611 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
4612 m
->m_pkthdr
.mp_rlen
= m_pktlen(m
);
4613 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
4619 mptcp_fallback_sbdrop(struct socket
*so
, struct mbuf
*m
, int len
)
4621 struct mptcb
*mp_tp
= tptomptp(sototcpcb(so
));
4628 while (m
&& len
> 0) {
4629 VERIFY(m
->m_flags
& M_PKTHDR
);
4630 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4632 data_ack
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
4633 dsn
= m
->m_pkthdr
.mp_dsn
;
4639 if (m
&& len
== 0) {
4641 * If there is one more mbuf in the chain, it automatically means
4642 * that up to m->mp_dsn has been ack'ed.
4644 * This means, we actually correct data_ack back down (compared
4645 * to what we set inside the loop - dsn + data_len). Because in
4646 * the loop we are "optimistic" and assume that the full mapping
4647 * will be acked. If that's not the case and we get out of the
4648 * loop with m != NULL, it means only up to m->mp_dsn has been
4651 data_ack
= m
->m_pkthdr
.mp_dsn
;
4656 * If len is negative, meaning we acked in the middle of an mbuf,
4657 * only up to this mbuf's data-sequence number has been acked
4658 * at the MPTCP-level.
4663 mptcplog((LOG_DEBUG
, "%s inferred ack up to %u\n", __func__
, (uint32_t)data_ack
),
4664 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4665 mptcp_data_ack_rcvd(mp_tp
, sototcpcb(so
), data_ack
);
4669 mptcp_preproc_sbdrop(struct socket
*so
, struct mbuf
*m
, unsigned int len
)
4673 /* TFO makes things complicated. */
4674 if (so
->so_flags1
& SOF1_TFO_REWIND
) {
4676 so
->so_flags1
&= ~SOF1_TFO_REWIND
;
4679 while (m
&& (!(so
->so_flags
& SOF_MP_SUBFLOW
) || rewinding
)) {
4681 VERIFY(m
->m_flags
& M_PKTHDR
);
4682 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4684 sub_len
= m
->m_pkthdr
.mp_rlen
;
4686 if (sub_len
< len
) {
4687 m
->m_pkthdr
.mp_dsn
+= sub_len
;
4688 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4689 m
->m_pkthdr
.mp_rseq
+= sub_len
;
4691 m
->m_pkthdr
.mp_rlen
= 0;
4694 /* sub_len >= len */
4696 m
->m_pkthdr
.mp_dsn
+= len
;
4697 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4699 m
->m_pkthdr
.mp_rseq
+= len
;
4701 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u len %d %d\n",
4702 __func__
, (u_int32_t
)m
->m_pkthdr
.mp_dsn
,
4703 m
->m_pkthdr
.mp_rseq
, m
->m_pkthdr
.mp_rlen
, len
),
4704 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4705 m
->m_pkthdr
.mp_rlen
-= len
;
4711 if (so
->so_flags
& SOF_MP_SUBFLOW
&&
4712 !(sototcpcb(so
)->t_mpflags
& TMPF_TFO_REQUEST
) &&
4713 !(sototcpcb(so
)->t_mpflags
& TMPF_RCVD_DACK
)) {
4715 * Received an ack without receiving a DATA_ACK.
4716 * Need to fallback to regular TCP (or destroy this subflow).
4718 sototcpcb(so
)->t_mpflags
|= TMPF_INFIN_SENT
;
4719 mptcp_notify_mpfail(so
);
4723 /* Obtain the DSN mapping stored in the mbuf */
4725 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
,
4726 uint32_t *dsn
, uint32_t *relseq
, uint16_t *data_len
, uint16_t *dss_csum
)
4730 mptcp_output_getm_dsnmap64(so
, off
, &dsn64
, relseq
, data_len
, dss_csum
);
4731 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
4735 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint64_t *dsn
,
4736 uint32_t *relseq
, uint16_t *data_len
,
4739 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4745 * In the subflow socket, the DSN sequencing can be discontiguous,
4746 * but the subflow sequence mapping is contiguous. Use the subflow
4747 * sequence property to find the right mbuf and corresponding dsn
4752 VERIFY(m
->m_flags
& M_PKTHDR
);
4753 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4755 if (off
>= m
->m_len
) {
4765 VERIFY(m
->m_pkthdr
.mp_rlen
<= UINT16_MAX
);
4767 *dsn
= m
->m_pkthdr
.mp_dsn
;
4768 *relseq
= m
->m_pkthdr
.mp_rseq
;
4769 *data_len
= m
->m_pkthdr
.mp_rlen
;
4770 *dss_csum
= m
->m_pkthdr
.mp_csum
;
4772 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
4773 __func__
, (u_int32_t
)(*dsn
), *relseq
, *data_len
, off
, off_orig
),
4774 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4778 * Note that this is called only from tcp_input() via mptcp_input_preproc()
4779 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
4780 * When it trims data tcp_input calls m_adj() which does not remove the
4781 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
4782 * The dsn map insertion cannot be delayed after trim, because data can be in
4783 * the reassembly queue for a while and the DSN option info in tp will be
4784 * overwritten for every new packet received.
4785 * The dsn map will be adjusted just prior to appending to subflow sockbuf
4786 * with mptcp_adj_rmap()
4789 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
, struct tcphdr
*th
)
4791 VERIFY(m
->m_flags
& M_PKTHDR
);
4792 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
4794 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
4795 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
4796 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
4797 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
4798 m
->m_pkthdr
.mp_csum
= tp
->t_rcv_map
.mpt_csum
;
4799 if (tp
->t_rcv_map
.mpt_dfin
)
4800 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
4802 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
4804 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
4805 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
4806 } else if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
) {
4807 if (th
->th_flags
& TH_FIN
)
4808 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_DFIN
;
4813 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
, int off
, uint64_t dsn
,
4814 uint32_t rseq
, uint16_t dlen
)
4816 struct mptsub
*mpts
= sototcpcb(so
)->t_mpsub
;
4818 if (m_pktlen(m
) == 0)
4821 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
4822 if (off
&& (dsn
!= m
->m_pkthdr
.mp_dsn
||
4823 rseq
!= m
->m_pkthdr
.mp_rseq
||
4824 dlen
!= m
->m_pkthdr
.mp_rlen
)) {
4825 mptcplog((LOG_ERR
, "%s: Received incorrect second mapping: %llu - %llu , %u - %u, %u - %u\n",
4826 __func__
, dsn
, m
->m_pkthdr
.mp_dsn
,
4827 rseq
, m
->m_pkthdr
.mp_rseq
,
4828 dlen
, m
->m_pkthdr
.mp_rlen
),
4829 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
4832 m
->m_pkthdr
.mp_dsn
+= off
;
4833 m
->m_pkthdr
.mp_rseq
+= off
;
4834 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
4836 if (!(mpts
->mpts_flags
& MPTSF_CONFIRMED
)) {
4837 /* data arrived without an DSS option mapping */
4839 /* initial subflow can fallback right after SYN handshake */
4840 mptcp_notify_mpfail(so
);
4844 mpts
->mpts_flags
|= MPTSF_CONFIRMED
;
4850 * Following routines help with failure detection and failover of data
4851 * transfer from one subflow to another.
4854 mptcp_act_on_txfail(struct socket
*so
)
4856 struct tcpcb
*tp
= NULL
;
4857 struct inpcb
*inp
= sotoinpcb(so
);
4862 tp
= intotcpcb(inp
);
4866 if (so
->so_flags
& SOF_MP_TRYFAILOVER
)
4869 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
4870 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
4874 * Support for MP_FAIL option
4877 mptcp_get_map_for_dsn(struct socket
*so
, u_int64_t dsn_fail
, u_int32_t
*tcp_seq
)
4879 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4888 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4889 VERIFY(m
->m_flags
& M_PKTHDR
);
4890 dsn
= m
->m_pkthdr
.mp_dsn
;
4891 datalen
= m
->m_pkthdr
.mp_rlen
;
4892 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
4893 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
4894 off
= dsn_fail
- dsn
;
4895 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
4896 mptcplog((LOG_DEBUG
, "%s: %llu %llu \n", __func__
, dsn
,
4897 dsn_fail
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
4905 * If there was no mbuf data and a fallback to TCP occurred, there's
4906 * not much else to do.
4909 mptcplog((LOG_ERR
, "MPTCP Sender: "
4910 "%s: %llu not found \n", __func__
, dsn_fail
),
4911 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
4916 * Support for sending contiguous MPTCP bytes in subflow
4917 * Also for preventing sending data with ACK in 3-way handshake
4920 mptcp_adj_sendlen(struct socket
*so
, int32_t off
)
4922 struct tcpcb
*tp
= sototcpcb(so
);
4923 struct mptsub
*mpts
= tp
->t_mpsub
;
4925 uint32_t mdss_subflow_seq
;
4926 int mdss_subflow_off
;
4927 uint16_t mdss_data_len
;
4930 mptcp_output_getm_dsnmap64(so
, off
, &mdss_dsn
, &mdss_subflow_seq
,
4931 &mdss_data_len
, &dss_csum
);
4934 * We need to compute how much of the mapping still remains.
4935 * So, we compute the offset in the send-buffer of the dss-sub-seq.
4937 mdss_subflow_off
= (mdss_subflow_seq
+ mpts
->mpts_iss
) - tp
->snd_una
;
4940 * When TFO is used, we are sending the mpts->mpts_iss although the relative
4941 * seq has been set to 1 (while it should be 0).
4943 if (tp
->t_mpflags
& TMPF_TFO_REQUEST
)
4946 if (off
< mdss_subflow_off
)
4947 printf("%s off %d mdss_subflow_off %d mdss_subflow_seq %u iss %u suna %u\n", __func__
,
4948 off
, mdss_subflow_off
, mdss_subflow_seq
, mpts
->mpts_iss
, tp
->snd_una
);
4949 VERIFY(off
>= mdss_subflow_off
);
4951 mptcplog((LOG_DEBUG
, "%s dlen %u off %d sub_off %d sub_seq %u iss %u suna %u\n",
4952 __func__
, mdss_data_len
, off
, mdss_subflow_off
, mdss_subflow_seq
,
4953 mpts
->mpts_iss
, tp
->snd_una
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4954 return (mdss_data_len
- (off
- mdss_subflow_off
));
4958 mptcp_get_maxseg(struct mptses
*mpte
)
4960 struct mptsub
*mpts
;
4961 uint32_t maxseg
= 0;
4963 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4964 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4966 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4967 TCPS_HAVERCVDFIN2(tp
->t_state
))
4970 if (tp
->t_maxseg
> maxseg
)
4971 maxseg
= tp
->t_maxseg
;
4978 mptcp_get_rcvscale(struct mptses
*mpte
)
4980 struct mptsub
*mpts
;
4981 uint8_t rcvscale
= UINT8_MAX
;
4983 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4984 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4986 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4987 TCPS_HAVERCVDFIN2(tp
->t_state
))
4990 if (tp
->rcv_scale
< rcvscale
)
4991 rcvscale
= tp
->rcv_scale
;
4997 /* Similar to tcp_sbrcv_reserve */
4999 mptcp_sbrcv_reserve(struct mptcb
*mp_tp
, struct sockbuf
*sbrcv
,
5000 u_int32_t newsize
, u_int32_t idealsize
)
5002 uint8_t rcvscale
= mptcp_get_rcvscale(mp_tp
->mpt_mpte
);
5004 /* newsize should not exceed max */
5005 newsize
= min(newsize
, tcp_autorcvbuf_max
);
5007 /* The receive window scale negotiated at the
5008 * beginning of the connection will also set a
5009 * limit on the socket buffer size
5011 newsize
= min(newsize
, TCP_MAXWIN
<< rcvscale
);
5013 /* Set new socket buffer size */
5014 if (newsize
> sbrcv
->sb_hiwat
&&
5015 (sbreserve(sbrcv
, newsize
) == 1)) {
5016 sbrcv
->sb_idealsize
= min(max(sbrcv
->sb_idealsize
,
5017 (idealsize
!= 0) ? idealsize
: newsize
), tcp_autorcvbuf_max
);
5019 /* Again check the limit set by the advertised
5022 sbrcv
->sb_idealsize
= min(sbrcv
->sb_idealsize
,
5023 TCP_MAXWIN
<< rcvscale
);
5028 mptcp_sbrcv_grow(struct mptcb
*mp_tp
)
5030 struct mptses
*mpte
= mp_tp
->mpt_mpte
;
5031 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
5032 struct sockbuf
*sbrcv
= &mp_so
->so_rcv
;
5033 uint32_t hiwat_sum
= 0;
5034 uint32_t ideal_sum
= 0;
5035 struct mptsub
*mpts
;
5038 * Do not grow the receive socket buffer if
5039 * - auto resizing is disabled, globally or on this socket
5040 * - the high water mark already reached the maximum
5041 * - the stream is in background and receive side is being
5043 * - if there are segments in reassembly queue indicating loss,
5044 * do not need to increase recv window during recovery as more
5045 * data is not going to be sent. A duplicate ack sent during
5046 * recovery should not change the receive window
5048 if (tcp_do_autorcvbuf
== 0 ||
5049 (sbrcv
->sb_flags
& SB_AUTOSIZE
) == 0 ||
5050 tcp_cansbgrow(sbrcv
) == 0 ||
5051 sbrcv
->sb_hiwat
>= tcp_autorcvbuf_max
||
5052 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) ||
5053 !LIST_EMPTY(&mp_tp
->mpt_segq
)) {
5054 /* Can not resize the socket buffer, just return */
5059 * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2)
5061 * But, for this we first need accurate receiver-RTT estimations, which
5062 * we currently don't have.
5064 * Let's use a dummy algorithm for now, just taking the sum of all
5065 * subflow's receive-buffers. It's too low, but that's all we can get
5069 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5070 hiwat_sum
+= mpts
->mpts_socket
->so_rcv
.sb_hiwat
;
5071 ideal_sum
+= mpts
->mpts_socket
->so_rcv
.sb_idealsize
;
5074 mptcp_sbrcv_reserve(mp_tp
, sbrcv
, hiwat_sum
, ideal_sum
);
5078 * Determine if we can grow the recieve socket buffer to avoid sending
5079 * a zero window update to the peer. We allow even socket buffers that
5080 * have fixed size (set by the application) to grow if the resource
5081 * constraints are met. They will also be trimmed after the application
5084 * Similar to tcp_sbrcv_grow_rwin
5087 mptcp_sbrcv_grow_rwin(struct mptcb
*mp_tp
, struct sockbuf
*sb
)
5089 struct socket
*mp_so
= mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
;
5090 u_int32_t rcvbufinc
= mptcp_get_maxseg(mp_tp
->mpt_mpte
) << 4;
5091 u_int32_t rcvbuf
= sb
->sb_hiwat
;
5093 if (tcp_recv_bg
== 1 || IS_TCP_RECV_BG(mp_so
))
5096 if (tcp_do_autorcvbuf
== 1 &&
5097 tcp_cansbgrow(sb
) &&
5098 /* Diff to tcp_sbrcv_grow_rwin */
5099 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) == 0 &&
5100 (rcvbuf
- sb
->sb_cc
) < rcvbufinc
&&
5101 rcvbuf
< tcp_autorcvbuf_max
&&
5102 (sb
->sb_idealsize
> 0 &&
5103 sb
->sb_hiwat
<= (sb
->sb_idealsize
+ rcvbufinc
))) {
5104 sbreserve(sb
, min((sb
->sb_hiwat
+ rcvbufinc
), tcp_autorcvbuf_max
));
5108 /* Similar to tcp_sbspace */
5110 mptcp_sbspace(struct mptcb
*mp_tp
)
5112 struct sockbuf
*sb
= &mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
5115 int32_t pending
= 0;
5117 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5119 mptcp_sbrcv_grow_rwin(mp_tp
, sb
);
5121 /* hiwat might have changed */
5122 rcvbuf
= sb
->sb_hiwat
;
5124 space
= ((int32_t) imin((rcvbuf
- sb
->sb_cc
),
5125 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
5130 /* Compensate for data being processed by content filters */
5131 pending
= cfil_sock_data_space(sb
);
5132 #endif /* CONTENT_FILTER */
5133 if (pending
> space
)
5142 * Support Fallback to Regular TCP
5145 mptcp_notify_mpready(struct socket
*so
)
5147 struct tcpcb
*tp
= NULL
;
5152 tp
= intotcpcb(sotoinpcb(so
));
5157 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
5158 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5159 struct tcpcb
*, tp
);
5161 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
))
5164 if (tp
->t_mpflags
& TMPF_MPTCP_READY
)
5167 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
5168 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
5170 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5174 mptcp_notify_mpfail(struct socket
*so
)
5176 struct tcpcb
*tp
= NULL
;
5181 tp
= intotcpcb(sotoinpcb(so
));
5186 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
5187 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5188 struct tcpcb
*, tp
);
5190 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
)
5193 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
5194 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
5196 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5200 * Keepalive helper function
5203 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
5206 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5208 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
5215 * MPTCP t_maxseg adjustment function
5218 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
5221 struct mptcb
*mp_tp
= tptomptp(tp
);
5223 #define MPTCP_COMPUTE_LEN { \
5224 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
5225 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
5228 /* adjust to 32-bit boundary + EOL */ \
5234 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5237 * For the first subflow and subsequent subflows, adjust mss for
5238 * most common MPTCP option size, for case where tcp_mss is called
5239 * during option processing and MTU discovery.
5242 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
&&
5243 !(tp
->t_mpflags
& TMPF_JOINED_FLOW
)) {
5247 if (tp
->t_mpflags
& TMPF_PREESTABLISHED
&&
5248 tp
->t_mpflags
& TMPF_SENT_JOIN
) {
5252 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
5261 * Update the pid, upid, uuid of the subflow so, based on parent so
5264 mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
)
5266 if (so
->last_pid
!= mp_so
->last_pid
||
5267 so
->last_upid
!= mp_so
->last_upid
) {
5268 so
->last_upid
= mp_so
->last_upid
;
5269 so
->last_pid
= mp_so
->last_pid
;
5270 uuid_copy(so
->last_uuid
, mp_so
->last_uuid
);
5272 so_update_policy(so
);
5276 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
5280 tcp_getconninfo(so
, &flow
->flow_ci
);
5281 inp
= sotoinpcb(so
);
5283 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
5284 flow
->flow_src
.ss_family
= AF_INET6
;
5285 flow
->flow_dst
.ss_family
= AF_INET6
;
5286 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
5287 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
5288 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
5289 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
5290 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
5291 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
5294 if ((inp
->inp_vflag
& INP_IPV4
) != 0) {
5295 flow
->flow_src
.ss_family
= AF_INET
;
5296 flow
->flow_dst
.ss_family
= AF_INET
;
5297 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
5298 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
5299 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
5300 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
5301 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
5302 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
5304 flow
->flow_len
= sizeof(*flow
);
5305 flow
->flow_tcpci_offset
= offsetof(mptcp_flow_t
, flow_ci
);
5306 flow
->flow_flags
= mpts
->mpts_flags
;
5307 flow
->flow_cid
= mpts
->mpts_connid
;
5308 flow
->flow_relseq
= mpts
->mpts_rel_seq
;
5309 flow
->flow_soerror
= mpts
->mpts_socket
->so_error
;
5310 flow
->flow_probecnt
= mpts
->mpts_probecnt
;
5314 mptcp_pcblist SYSCTL_HANDLER_ARGS
5316 #pragma unused(oidp, arg1, arg2)
5320 struct mptses
*mpte
;
5321 struct mptcb
*mp_tp
;
5322 struct mptsub
*mpts
;
5324 conninfo_mptcp_t mptcpci
;
5325 mptcp_flow_t
*flows
= NULL
;
5327 if (req
->newptr
!= USER_ADDR_NULL
)
5330 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5331 if (req
->oldptr
== USER_ADDR_NULL
) {
5332 size_t n
= mtcbinfo
.mppi_count
;
5333 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5334 req
->oldidx
= (n
+ n
/8) * sizeof(conninfo_mptcp_t
) +
5335 4 * (n
+ n
/8) * sizeof(mptcp_flow_t
);
5338 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5341 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
5342 mpte
= mptompte(mpp
);
5343 VERIFY(mpte
!= NULL
);
5344 mpte_lock_assert_held(mpte
);
5345 mp_tp
= mpte
->mpte_mptcb
;
5346 VERIFY(mp_tp
!= NULL
);
5348 bzero(&mptcpci
, sizeof(mptcpci
));
5349 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
5350 mptcpci
.mptcpci_flags
= mp_tp
->mpt_flags
;
5351 mptcpci
.mptcpci_ltoken
= mp_tp
->mpt_localtoken
;
5352 mptcpci
.mptcpci_rtoken
= mp_tp
->mpt_remotetoken
;
5353 mptcpci
.mptcpci_notsent_lowat
= mp_tp
->mpt_notsent_lowat
;
5354 mptcpci
.mptcpci_snduna
= mp_tp
->mpt_snduna
;
5355 mptcpci
.mptcpci_sndnxt
= mp_tp
->mpt_sndnxt
;
5356 mptcpci
.mptcpci_sndmax
= mp_tp
->mpt_sndmax
;
5357 mptcpci
.mptcpci_lidsn
= mp_tp
->mpt_local_idsn
;
5358 mptcpci
.mptcpci_sndwnd
= mp_tp
->mpt_sndwnd
;
5359 mptcpci
.mptcpci_rcvnxt
= mp_tp
->mpt_rcvnxt
;
5360 mptcpci
.mptcpci_rcvatmark
= mp_tp
->mpt_rcvnxt
;
5361 mptcpci
.mptcpci_ridsn
= mp_tp
->mpt_remote_idsn
;
5362 mptcpci
.mptcpci_rcvwnd
= mp_tp
->mpt_rcvwnd
;
5364 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
5365 mptcpci
.mptcpci_mpte_flags
= mpte
->mpte_flags
;
5366 mptcpci
.mptcpci_mpte_addrid
= mpte
->mpte_addrid_last
;
5367 mptcpci
.mptcpci_flow_offset
=
5368 offsetof(conninfo_mptcp_t
, mptcpci_flows
);
5370 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
5371 if (mpte
->mpte_numflows
!= 0) {
5372 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
5373 if (flows
== NULL
) {
5377 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
5378 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
5379 error
= SYSCTL_OUT(req
, &mptcpci
,
5380 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
5382 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
5383 error
= SYSCTL_OUT(req
, &mptcpci
, sizeof(mptcpci
));
5387 FREE(flows
, M_TEMP
);
5391 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5392 so
= mpts
->mpts_socket
;
5393 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
5398 error
= SYSCTL_OUT(req
, flows
, len
);
5399 FREE(flows
, M_TEMP
);
5404 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5409 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
5410 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
5411 "List of active MPTCP connections");
5414 * Set notsent lowat mark on the MPTCB
5417 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
5419 struct mptcb
*mp_tp
= NULL
;
5422 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5423 mp_tp
= mpte
->mpte_mptcb
;
5426 mp_tp
->mpt_notsent_lowat
= optval
;
5434 mptcp_get_notsent_lowat(struct mptses
*mpte
)
5436 struct mptcb
*mp_tp
= NULL
;
5438 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5439 mp_tp
= mpte
->mpte_mptcb
;
5442 return (mp_tp
->mpt_notsent_lowat
);
5448 mptcp_notsent_lowat_check(struct socket
*so
)
5450 struct mptses
*mpte
;
5452 struct mptcb
*mp_tp
;
5453 struct mptsub
*mpts
;
5457 mpp
= mpsotomppcb(so
);
5458 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
5462 mpte
= mptompte(mpp
);
5463 mpte_lock_assert_held(mpte
);
5464 mp_tp
= mpte
->mpte_mptcb
;
5466 notsent
= so
->so_snd
.sb_cc
;
5468 if ((notsent
== 0) ||
5469 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
5470 mp_tp
->mpt_notsent_lowat
)) {
5471 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
5472 "lowat %d notsent %d actual %d \n",
5473 mp_tp
->mpt_notsent_lowat
, notsent
,
5474 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)),
5475 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5479 /* When Nagle's algorithm is not disabled, it is better
5480 * to wakeup the client even before there is atleast one
5481 * maxseg of data to write.
5483 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5485 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
5486 struct socket
*subf_so
= mpts
->mpts_socket
;
5487 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
5489 notsent
= so
->so_snd
.sb_cc
-
5490 (tp
->snd_nxt
- tp
->snd_una
);
5492 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
5493 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
5496 mptcplog((LOG_DEBUG
, "MPTCP Sender: lowat %d notsent %d"
5497 " nodelay false \n",
5498 mp_tp
->mpt_notsent_lowat
, notsent
),
5499 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5506 /* Using Symptoms Advisory to detect poor WiFi or poor Cell */
5507 static kern_ctl_ref mptcp_kern_ctrl_ref
= NULL
;
5508 static uint32_t mptcp_kern_skt_inuse
= 0;
5509 static uint32_t mptcp_kern_skt_unit
;
5510 symptoms_advisory_t mptcp_advisory
;
5513 mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
5516 #pragma unused(kctlref, sac, unitinfo)
5518 if (OSIncrementAtomic(&mptcp_kern_skt_inuse
) > 0)
5519 mptcplog((LOG_ERR
, "%s MPTCP kernel-control socket already open!", __func__
),
5520 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5522 mptcp_kern_skt_unit
= sac
->sc_unit
;
5528 mptcp_allow_uuid(uuid_t uuid
)
5532 /* Iterate over all MPTCP connections */
5534 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5536 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5537 struct mptses
*mpte
;
5538 struct socket
*mp_so
;
5542 mpte
= mpp
->mpp_pcbe
;
5543 mp_so
= mpp
->mpp_socket
;
5545 if (mp_so
->so_flags
& SOF_DELEGATED
&&
5546 uuid_compare(uuid
, mp_so
->e_uuid
))
5548 else if (!(mp_so
->so_flags
& SOF_DELEGATED
) &&
5549 uuid_compare(uuid
, mp_so
->last_uuid
))
5552 mpte
->mpte_flags
|= MPTE_ACCESS_GRANTED
;
5554 mptcp_check_subflows_and_add(mpte
);
5555 mptcp_remove_subflows(mpte
);
5557 mpte
->mpte_flags
&= ~MPTE_ACCESS_GRANTED
;
5563 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5567 mptcp_wifi_status_changed(void)
5571 /* Iterate over all MPTCP connections */
5573 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5575 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5576 struct mptses
*mpte
;
5577 struct socket
*mp_so
;
5581 mpte
= mpp
->mpp_pcbe
;
5582 mp_so
= mpp
->mpp_socket
;
5584 /* Only handover-mode is purely driven by Symptom's Wi-Fi status */
5585 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
)
5588 mptcp_check_subflows_and_add(mpte
);
5589 mptcp_check_subflows_and_remove(mpte
);
5595 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5599 mptcp_ask_symptoms(struct mptses
*mpte
)
5601 struct mptcp_symptoms_ask_uuid ask
;
5602 struct socket
*mp_so
;
5606 if (mptcp_kern_skt_unit
== 0) {
5607 mptcplog((LOG_ERR
, "%s skt_unit is still 0\n", __func__
),
5608 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5612 mp_so
= mptetoso(mpte
);
5614 if (mp_so
->so_flags
& SOF_DELEGATED
)
5617 pid
= mp_so
->last_pid
;
5620 if (p
== PROC_NULL
) {
5621 mptcplog((LOG_ERR
, "%s Couldn't find proc for pid %u\n", __func__
,
5622 pid
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5626 ask
.cmd
= MPTCP_SYMPTOMS_ASK_UUID
;
5628 if (mp_so
->so_flags
& SOF_DELEGATED
)
5629 uuid_copy(ask
.uuid
, mp_so
->e_uuid
);
5631 uuid_copy(ask
.uuid
, mp_so
->last_uuid
);
5633 prio
= proc_get_effective_task_policy(proc_task(p
), TASK_POLICY_ROLE
);
5635 if (prio
== TASK_BACKGROUND_APPLICATION
)
5636 ask
.priority
= MPTCP_SYMPTOMS_BACKGROUND
;
5637 else if (prio
== TASK_FOREGROUND_APPLICATION
)
5638 ask
.priority
= MPTCP_SYMPTOMS_FOREGROUND
;
5640 ask
.priority
= MPTCP_SYMPTOMS_UNKNOWN
;
5642 mptcplog((LOG_DEBUG
, "%s ask symptoms about pid %u, prio %u\n", __func__
,
5643 pid
, ask
.priority
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5645 err
= ctl_enqueuedata(mptcp_kern_ctrl_ref
, mptcp_kern_skt_unit
,
5646 &ask
, sizeof(ask
), CTL_DATA_EOR
);
5648 mptcplog((LOG_ERR
, "%s ctl_enqueuedata failed %d\n", __func__
, err
),
5649 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5655 mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
,
5658 #pragma unused(kctlref, kcunit, unitinfo)
5660 OSDecrementAtomic(&mptcp_kern_skt_inuse
);
5666 mptcp_symptoms_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
5667 mbuf_t m
, int flags
)
5669 #pragma unused(kctlref, unitinfo, flags)
5670 symptoms_advisory_t
*sa
= NULL
;
5672 if (kcunit
!= mptcp_kern_skt_unit
)
5673 mptcplog((LOG_ERR
, "%s kcunit %u is different from expected one %u\n",
5674 __func__
, kcunit
, mptcp_kern_skt_unit
),
5675 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5677 if (mbuf_pkthdr_len(m
) < sizeof(*sa
)) {
5682 if (mbuf_len(m
) >= sizeof(*sa
))
5687 if (sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_NOCOMMENT
&&
5688 sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_USEAPP
) {
5689 uint8_t old_wifi_status
= mptcp_advisory
.sa_wifi_status
;
5691 mptcplog((LOG_DEBUG
, "%s: wifi %d,%d\n",
5692 __func__
, sa
->sa_wifi_status
, mptcp_advisory
.sa_wifi_status
),
5693 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
5695 if ((sa
->sa_wifi_status
&
5696 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
)) !=
5697 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
))
5698 mptcp_advisory
.sa_wifi_status
= sa
->sa_wifi_status
;
5700 if (old_wifi_status
!= mptcp_advisory
.sa_wifi_status
)
5701 mptcp_wifi_status_changed();
5702 } else if (sa
->sa_nwk_status
== SYMPTOMS_ADVISORY_NOCOMMENT
) {
5703 mptcplog((LOG_DEBUG
, "%s: NOCOMMENT wifi %d\n", __func__
,
5704 mptcp_advisory
.sa_wifi_status
),
5705 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
5706 } else if (sa
->sa_nwk_status
== SYMPTOMS_ADVISORY_USEAPP
) {
5709 mptcplog((LOG_DEBUG
, "%s Got response about useApp\n", __func__
),
5710 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5712 uuid_copy(uuid
, (unsigned char *)(sa
+ 1));
5714 mptcp_allow_uuid(uuid
);
5721 mptcp_control_register(void)
5723 /* Set up the advisory control socket */
5724 struct kern_ctl_reg mptcp_kern_ctl
;
5726 bzero(&mptcp_kern_ctl
, sizeof(mptcp_kern_ctl
));
5727 strlcpy(mptcp_kern_ctl
.ctl_name
, MPTCP_KERN_CTL_NAME
,
5728 sizeof(mptcp_kern_ctl
.ctl_name
));
5729 mptcp_kern_ctl
.ctl_connect
= mptcp_symptoms_ctl_connect
;
5730 mptcp_kern_ctl
.ctl_disconnect
= mptcp_symptoms_ctl_disconnect
;
5731 mptcp_kern_ctl
.ctl_send
= mptcp_symptoms_ctl_send
;
5732 mptcp_kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
;
5734 (void)ctl_register(&mptcp_kern_ctl
, &mptcp_kern_ctrl_ref
);
5738 mptcp_is_wifi_unusable(void)
5740 /* a false return val indicates there is no info or wifi is ok */
5741 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
);
5744 /* If TFO data is succesfully acked, it must be dropped from the mptcp so */
5746 mptcp_drop_tfo_data(struct mptses
*mpte
, struct mptsub
*mpts
)
5748 struct socket
*mp_so
= mptetoso(mpte
);
5749 struct socket
*so
= mpts
->mpts_socket
;
5750 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
5751 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
5753 /* If data was sent with SYN, rewind state */
5754 if (tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) {
5755 u_int64_t mp_droplen
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
5756 unsigned int tcp_droplen
= tp
->snd_una
- tp
->iss
- 1;
5758 VERIFY(mp_droplen
<= (UINT_MAX
));
5759 VERIFY(mp_droplen
>= tcp_droplen
);
5761 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
5762 mpts
->mpts_iss
+= tcp_droplen
;
5763 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
5765 if (mp_droplen
> tcp_droplen
) {
5766 /* handle partial TCP ack */
5767 mp_so
->so_flags1
|= SOF1_TFO_REWIND
;
5768 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
+ (mp_droplen
- tcp_droplen
);
5769 mp_droplen
= tcp_droplen
;
5771 /* all data on SYN was acked */
5772 mpts
->mpts_rel_seq
= 1;
5773 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
5775 mp_tp
->mpt_sndmax
-= tcp_droplen
;
5777 if (mp_droplen
!= 0) {
5778 VERIFY(mp_so
->so_snd
.sb_mb
!= NULL
);
5779 sbdrop(&mp_so
->so_snd
, (int)mp_droplen
);
5781 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx cid %d TFO tcp len %d mptcp len %d\n",
5782 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
5783 mpts
->mpts_connid
, tcp_droplen
, mp_droplen
),
5784 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5789 mptcp_freeq(struct mptcb
*mp_tp
)
5791 struct tseg_qent
*q
;
5794 while ((q
= LIST_FIRST(&mp_tp
->mpt_segq
)) != NULL
) {
5795 LIST_REMOVE(q
, tqe_q
);
5797 zfree(tcp_reass_zone
, q
);
5800 mp_tp
->mpt_reassqlen
= 0;
5805 mptcp_post_event(u_int32_t event_code
, int value
)
5807 struct kev_mptcp_data event_data
;
5808 struct kev_msg ev_msg
;
5810 memset(&ev_msg
, 0, sizeof(ev_msg
));
5812 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
5813 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
5814 ev_msg
.kev_subclass
= KEV_MPTCP_SUBCLASS
;
5815 ev_msg
.event_code
= event_code
;
5817 event_data
.value
= value
;
5819 ev_msg
.dv
[0].data_ptr
= &event_data
;
5820 ev_msg
.dv
[0].data_length
= sizeof(event_data
);
5822 return kev_post_msg(&ev_msg
);
5826 mptcp_set_cellicon(struct mptses
*mpte
)
5830 /* First-party apps (Siri) don't flip the cellicon */
5831 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
)
5834 /* Remember the last time we set the cellicon (see mptcp_unset_cellicon) */
5835 mptcp_last_cellicon_set
= tcp_now
;
5837 /* If cellicon is already set, get out of here! */
5838 if (OSTestAndSet(7, &mptcp_cellicon_is_set
))
5841 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 1);
5844 mptcplog((LOG_ERR
, "%s: Setting cellicon failed with %d\n",
5845 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5847 mptcplog((LOG_DEBUG
, "%s successfully set the cellicon\n", __func__
),
5848 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5852 mptcp_unset_cellicon(void)
5856 /* If cellicon is already unset, get out of here! */
5857 if (OSTestAndClear(7, &mptcp_cellicon_is_set
))
5861 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
5862 * explicitly set the cellicon (see mptcp_set_cellicon()), then we unset
5865 if (TSTMP_GT(mptcp_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
,
5867 OSTestAndSet(7, &mptcp_cellicon_is_set
);
5871 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 0);
5874 mptcplog((LOG_ERR
, "%s: Unsetting cellicon failed with %d\n",
5875 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5877 mptcplog((LOG_DEBUG
, "%s successfully unset the cellicon\n", __func__
),
5878 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5882 mptcp_reset_rexmit_state(struct tcpcb
*tp
)
5884 struct mptsub
*mpts
;
5892 so
= inp
->inp_socket
;
5896 if (!(so
->so_flags
& SOF_MP_SUBFLOW
))
5901 mpts
->mpts_flags
&= ~MPTSF_WRITE_STALL
;
5902 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
5906 mptcp_reset_keepalive(struct tcpcb
*tp
)
5908 struct mptsub
*mpts
= tp
->t_mpsub
;
5910 mpts
->mpts_flags
&= ~MPTSF_READ_STALL
;