2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <kern/locks.h>
30 #include <kern/policy_internal.h>
31 #include <kern/zalloc.h>
35 #include <sys/domain.h>
36 #include <sys/kdebug.h>
37 #include <sys/kern_control.h>
38 #include <sys/kernel.h>
40 #include <sys/mcache.h>
41 #include <sys/param.h>
43 #include <sys/protosw.h>
44 #include <sys/resourcevar.h>
45 #include <sys/socket.h>
46 #include <sys/socketvar.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/systm.h>
51 #include <net/content_filter.h>
53 #include <net/if_var.h>
54 #include <netinet/in.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/in_var.h>
57 #include <netinet/tcp.h>
58 #include <netinet/tcp_fsm.h>
59 #include <netinet/tcp_seq.h>
60 #include <netinet/tcp_var.h>
61 #include <netinet/mptcp_var.h>
62 #include <netinet/mptcp.h>
63 #include <netinet/mptcp_opt.h>
64 #include <netinet/mptcp_seq.h>
65 #include <netinet/mptcp_timer.h>
66 #include <libkern/crypto/sha1.h>
68 #include <netinet6/in6_pcb.h>
69 #include <netinet6/ip6protosw.h>
71 #include <dev/random/randomdev.h>
74 * Notes on MPTCP implementation.
76 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
77 * communication domain. The structure mtcbinfo describes the MPTCP instance
78 * of a Multipath protocol in that domain. It is used to keep track of all
79 * MPTCP PCB instances in the system, and is protected by the global lock
82 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
83 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
84 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
85 * allocated from the same memory block, and each structure has a pointer
86 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
87 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
88 * PCB (mppcb) as well as the MPTCP Session (mptses).
90 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
92 * A functioning MPTCP Session consists of one or more subflow sockets. Each
93 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
94 * represented by the mptsub structure. Because each subflow requires access
95 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
96 * subflow. This gets decremented prior to the subflow's destruction.
98 * To handle events (read, write, control) from the subflows, we do direct
99 * upcalls into the specific function.
101 * The whole MPTCP connection is protected by a single lock, the MPTCP socket's
102 * lock. Incoming data on a subflow also ends up taking this single lock. To
103 * achieve the latter, tcp_lock/unlock has been changed to rather use the lock
104 * of the MPTCP-socket.
106 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
107 * work is done by the MPTCP garbage collector which is invoked on demand by
108 * the PF_MULTIPATH garbage collector. This process will take place once all
109 * of the subflows have been destroyed.
112 static void mptcp_attach_to_subf(struct socket
*, struct mptcb
*, uint8_t);
113 static void mptcp_detach_mptcb_from_subf(struct mptcb
*, struct socket
*);
115 static uint32_t mptcp_gc(struct mppcbinfo
*);
116 static int mptcp_subflow_soreceive(struct socket
*, struct sockaddr
**,
117 struct uio
*, struct mbuf
**, struct mbuf
**, int *);
118 static int mptcp_subflow_sosend(struct socket
*, struct sockaddr
*,
119 struct uio
*, struct mbuf
*, struct mbuf
*, int);
120 static void mptcp_subflow_rupcall(struct socket
*, void *, int);
121 static void mptcp_subflow_input(struct mptses
*, struct mptsub
*);
122 static void mptcp_subflow_wupcall(struct socket
*, void *, int);
123 static void mptcp_subflow_eupcall1(struct socket
*, void *, uint32_t);
124 static void mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
);
125 static void mptcp_drop_tfo_data(struct mptses
*, struct mptsub
*);
127 static void mptcp_subflow_abort(struct mptsub
*, int);
129 static void mptcp_send_dfin(struct socket
*so
);
132 * Possible return values for subflow event handlers. Note that success
133 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
134 * indicate errors or actions which require immediate attention; they will
135 * prevent the rest of the handlers from processing their respective events
136 * until the next round of events processing.
139 MPTS_EVRET_DELETE
= 1, /* delete this subflow */
140 MPTS_EVRET_OK
= 2, /* OK */
141 MPTS_EVRET_CONNECT_PENDING
= 3, /* resume pended connects */
142 MPTS_EVRET_DISCONNECT_FALLBACK
= 4, /* abort all but preferred */
145 static ev_ret_t
mptcp_subflow_events(struct mptses
*, struct mptsub
*, uint64_t *);
146 static ev_ret_t
mptcp_subflow_propagate_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
147 static ev_ret_t
mptcp_subflow_nosrcaddr_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
148 static ev_ret_t
mptcp_subflow_failover_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
149 static ev_ret_t
mptcp_subflow_ifdenied_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
150 static ev_ret_t
mptcp_subflow_connected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
151 static ev_ret_t
mptcp_subflow_disconnected_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
152 static ev_ret_t
mptcp_subflow_mpstatus_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
153 static ev_ret_t
mptcp_subflow_mustrst_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
154 static ev_ret_t
mptcp_subflow_mpcantrcvmore_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
155 static ev_ret_t
mptcp_subflow_adaptive_rtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
156 static ev_ret_t
mptcp_subflow_adaptive_wtimo_ev(struct mptses
*, struct mptsub
*, uint64_t *, uint64_t);
158 static const char *mptcp_evret2str(ev_ret_t
);
160 static void mptcp_do_sha1(mptcp_key_t
*, char *);
161 static void mptcp_init_local_parms(struct mptses
*);
163 static unsigned int mptsub_zone_size
; /* size of mptsub */
164 static struct zone
*mptsub_zone
; /* zone for mptsub */
166 static unsigned int mptopt_zone_size
; /* size of mptopt */
167 static struct zone
*mptopt_zone
; /* zone for mptopt */
169 static unsigned int mpt_subauth_entry_size
; /* size of subf auth entry */
170 static struct zone
*mpt_subauth_zone
; /* zone of subf auth entry */
172 struct mppcbinfo mtcbinfo
;
174 #define MPTCP_SUBFLOW_WRITELEN (8 * 1024) /* bytes to write each time */
175 #define MPTCP_SUBFLOW_READLEN (8 * 1024) /* bytes to read each time */
177 SYSCTL_DECL(_net_inet
);
179 SYSCTL_NODE(_net_inet
, OID_AUTO
, mptcp
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, "MPTCP");
181 uint32_t mptcp_dbg_area
= 31; /* more noise if greater than 1 */
182 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, dbg_area
, CTLFLAG_RW
|CTLFLAG_LOCKED
,
183 &mptcp_dbg_area
, 0, "MPTCP debug area");
185 uint32_t mptcp_dbg_level
= 1;
186 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, dbg_level
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
187 &mptcp_dbg_level
, 0, "MPTCP debug level");
189 SYSCTL_UINT(_net_inet_mptcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
|CTLFLAG_LOCKED
,
190 &mtcbinfo
.mppi_count
, 0, "Number of active PCBs");
192 static struct protosw mptcp_subflow_protosw
;
193 static struct pr_usrreqs mptcp_subflow_usrreqs
;
195 static struct ip6protosw mptcp_subflow_protosw6
;
196 static struct pr_usrreqs mptcp_subflow_usrreqs6
;
199 static uint8_t mptcp_create_subflows_scheduled
;
201 typedef struct mptcp_subflow_event_entry
{
202 uint64_t sofilt_hint_mask
;
203 ev_ret_t (*sofilt_hint_ev_hdlr
)(
206 uint64_t *p_mpsofilt_hint
,
210 static uint8_t mptcp_cellicon_is_set
;
211 static uint32_t mptcp_last_cellicon_set
;
212 #define MPTCP_CELLICON_TOGGLE_RATE (5 * TCP_RETRANSHZ) /* Only toggle every 5 seconds */
215 * XXX The order of the event handlers below is really
216 * really important. Think twice before changing it.
218 static mptsub_ev_entry_t mpsub_ev_entry_tbl
[] = {
220 .sofilt_hint_mask
= SO_FILT_HINT_MPCANTRCVMORE
,
221 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpcantrcvmore_ev
,
224 .sofilt_hint_mask
= SO_FILT_HINT_MPFAILOVER
,
225 .sofilt_hint_ev_hdlr
= mptcp_subflow_failover_ev
,
228 .sofilt_hint_mask
= SO_FILT_HINT_CONNRESET
,
229 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
232 .sofilt_hint_mask
= SO_FILT_HINT_MUSTRST
,
233 .sofilt_hint_ev_hdlr
= mptcp_subflow_mustrst_ev
,
236 .sofilt_hint_mask
= SO_FILT_HINT_CANTRCVMORE
,
237 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
240 .sofilt_hint_mask
= SO_FILT_HINT_TIMEOUT
,
241 .sofilt_hint_ev_hdlr
= mptcp_subflow_propagate_ev
,
244 .sofilt_hint_mask
= SO_FILT_HINT_NOSRCADDR
,
245 .sofilt_hint_ev_hdlr
= mptcp_subflow_nosrcaddr_ev
,
248 .sofilt_hint_mask
= SO_FILT_HINT_IFDENIED
,
249 .sofilt_hint_ev_hdlr
= mptcp_subflow_ifdenied_ev
,
252 .sofilt_hint_mask
= SO_FILT_HINT_CONNECTED
,
253 .sofilt_hint_ev_hdlr
= mptcp_subflow_connected_ev
,
256 .sofilt_hint_mask
= SO_FILT_HINT_MPSTATUS
,
257 .sofilt_hint_ev_hdlr
= mptcp_subflow_mpstatus_ev
,
260 .sofilt_hint_mask
= SO_FILT_HINT_DISCONNECTED
,
261 .sofilt_hint_ev_hdlr
= mptcp_subflow_disconnected_ev
,
264 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_RTIMO
,
265 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_rtimo_ev
,
268 .sofilt_hint_mask
= SO_FILT_HINT_ADAPTIVE_WTIMO
,
269 .sofilt_hint_ev_hdlr
= mptcp_subflow_adaptive_wtimo_ev
,
274 * Protocol pr_init callback.
277 mptcp_init(struct protosw
*pp
, struct domain
*dp
)
280 static int mptcp_initialized
= 0;
283 struct ip6protosw
*prp6
;
286 VERIFY((pp
->pr_flags
& (PR_INITIALIZED
|PR_ATTACHED
)) == PR_ATTACHED
);
288 /* do this only once */
289 if (mptcp_initialized
)
291 mptcp_initialized
= 1;
294 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
295 * we must be able to find IPPROTO_TCP entries for both.
297 prp
= pffindproto_locked(PF_INET
, IPPROTO_TCP
, SOCK_STREAM
);
299 bcopy(prp
, &mptcp_subflow_protosw
, sizeof (*prp
));
300 bcopy(prp
->pr_usrreqs
, &mptcp_subflow_usrreqs
,
301 sizeof (mptcp_subflow_usrreqs
));
302 mptcp_subflow_protosw
.pr_entry
.tqe_next
= NULL
;
303 mptcp_subflow_protosw
.pr_entry
.tqe_prev
= NULL
;
304 mptcp_subflow_protosw
.pr_usrreqs
= &mptcp_subflow_usrreqs
;
305 mptcp_subflow_usrreqs
.pru_soreceive
= mptcp_subflow_soreceive
;
306 mptcp_subflow_usrreqs
.pru_sosend
= mptcp_subflow_sosend
;
307 mptcp_subflow_usrreqs
.pru_rcvoob
= pru_rcvoob_notsupp
;
309 * Socket filters shouldn't attach/detach to/from this protosw
310 * since pr_protosw is to be used instead, which points to the
311 * real protocol; if they do, it is a bug and we should panic.
313 mptcp_subflow_protosw
.pr_filter_head
.tqh_first
=
314 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
315 mptcp_subflow_protosw
.pr_filter_head
.tqh_last
=
316 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
319 prp6
= (struct ip6protosw
*)pffindproto_locked(PF_INET6
,
320 IPPROTO_TCP
, SOCK_STREAM
);
321 VERIFY(prp6
!= NULL
);
322 bcopy(prp6
, &mptcp_subflow_protosw6
, sizeof (*prp6
));
323 bcopy(prp6
->pr_usrreqs
, &mptcp_subflow_usrreqs6
,
324 sizeof (mptcp_subflow_usrreqs6
));
325 mptcp_subflow_protosw6
.pr_entry
.tqe_next
= NULL
;
326 mptcp_subflow_protosw6
.pr_entry
.tqe_prev
= NULL
;
327 mptcp_subflow_protosw6
.pr_usrreqs
= &mptcp_subflow_usrreqs6
;
328 mptcp_subflow_usrreqs6
.pru_soreceive
= mptcp_subflow_soreceive
;
329 mptcp_subflow_usrreqs6
.pru_sosend
= mptcp_subflow_sosend
;
330 mptcp_subflow_usrreqs6
.pru_rcvoob
= pru_rcvoob_notsupp
;
332 * Socket filters shouldn't attach/detach to/from this protosw
333 * since pr_protosw is to be used instead, which points to the
334 * real protocol; if they do, it is a bug and we should panic.
336 mptcp_subflow_protosw6
.pr_filter_head
.tqh_first
=
337 (struct socket_filter
*)(uintptr_t)0xdeadbeefdeadbeef;
338 mptcp_subflow_protosw6
.pr_filter_head
.tqh_last
=
339 (struct socket_filter
**)(uintptr_t)0xdeadbeefdeadbeef;
342 bzero(&mtcbinfo
, sizeof (mtcbinfo
));
343 TAILQ_INIT(&mtcbinfo
.mppi_pcbs
);
344 mtcbinfo
.mppi_size
= sizeof (struct mpp_mtp
);
345 if ((mtcbinfo
.mppi_zone
= zinit(mtcbinfo
.mppi_size
,
346 1024 * mtcbinfo
.mppi_size
, 8192, "mptcb")) == NULL
) {
347 panic("%s: unable to allocate MPTCP PCB zone\n", __func__
);
350 zone_change(mtcbinfo
.mppi_zone
, Z_CALLERACCT
, FALSE
);
351 zone_change(mtcbinfo
.mppi_zone
, Z_EXPAND
, TRUE
);
353 mtcbinfo
.mppi_lock_grp_attr
= lck_grp_attr_alloc_init();
354 mtcbinfo
.mppi_lock_grp
= lck_grp_alloc_init("mppcb",
355 mtcbinfo
.mppi_lock_grp_attr
);
356 mtcbinfo
.mppi_lock_attr
= lck_attr_alloc_init();
357 lck_mtx_init(&mtcbinfo
.mppi_lock
, mtcbinfo
.mppi_lock_grp
,
358 mtcbinfo
.mppi_lock_attr
);
360 mtcbinfo
.mppi_gc
= mptcp_gc
;
361 mtcbinfo
.mppi_timer
= mptcp_timer
;
363 /* attach to MP domain for garbage collection to take place */
364 mp_pcbinfo_attach(&mtcbinfo
);
366 mptsub_zone_size
= sizeof (struct mptsub
);
367 if ((mptsub_zone
= zinit(mptsub_zone_size
, 1024 * mptsub_zone_size
,
368 8192, "mptsub")) == NULL
) {
369 panic("%s: unable to allocate MPTCP subflow zone\n", __func__
);
372 zone_change(mptsub_zone
, Z_CALLERACCT
, FALSE
);
373 zone_change(mptsub_zone
, Z_EXPAND
, TRUE
);
375 mptopt_zone_size
= sizeof (struct mptopt
);
376 if ((mptopt_zone
= zinit(mptopt_zone_size
, 128 * mptopt_zone_size
,
377 1024, "mptopt")) == NULL
) {
378 panic("%s: unable to allocate MPTCP option zone\n", __func__
);
381 zone_change(mptopt_zone
, Z_CALLERACCT
, FALSE
);
382 zone_change(mptopt_zone
, Z_EXPAND
, TRUE
);
384 mpt_subauth_entry_size
= sizeof (struct mptcp_subf_auth_entry
);
385 if ((mpt_subauth_zone
= zinit(mpt_subauth_entry_size
,
386 1024 * mpt_subauth_entry_size
, 8192, "mptauth")) == NULL
) {
387 panic("%s: unable to allocate MPTCP address auth zone \n",
391 zone_change(mpt_subauth_zone
, Z_CALLERACCT
, FALSE
);
392 zone_change(mpt_subauth_zone
, Z_EXPAND
, TRUE
);
394 mptcp_last_cellicon_set
= tcp_now
;
398 mptcp_get_statsindex(struct mptcp_itf_stats
*stats
, const struct mptsub
*mpts
)
400 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
405 mptcplog((LOG_ERR
, "%s: no ifp on subflow\n", __func__
),
406 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
410 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
411 if (stats
[i
].ifindex
== IFSCOPE_NONE
) {
417 if (stats
[i
].ifindex
== ifp
->if_index
) {
424 stats
[index
].ifindex
= ifp
->if_index
;
425 if (stats
[index
].is_expensive
== 0)
426 stats
[index
].is_expensive
= IFNET_IS_CELLULAR(ifp
);
433 mptcpstats_inc_switch(struct mptses
*mpte
, const struct mptsub
*mpts
)
437 tcpstat
.tcps_mp_switches
++;
438 mpte
->mpte_subflow_switches
++;
440 index
= mptcp_get_statsindex(mpte
->mpte_itfstats
, mpts
);
443 mpte
->mpte_itfstats
[index
].switches
++;
447 * Flushes all recorded socket options from an MP socket.
450 mptcp_flush_sopts(struct mptses
*mpte
)
452 struct mptopt
*mpo
, *tmpo
;
454 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
455 mptcp_sopt_remove(mpte
, mpo
);
456 mptcp_sopt_free(mpo
);
458 VERIFY(TAILQ_EMPTY(&mpte
->mpte_sopts
));
462 * Create an MPTCP session, called as a result of opening a MPTCP socket.
465 mptcp_sescreate(struct mppcb
*mpp
)
467 struct mppcbinfo
*mppi
;
472 mppi
= mpp
->mpp_pcbinfo
;
473 VERIFY(mppi
!= NULL
);
475 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
476 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
478 /* MPTCP Multipath PCB Extension */
479 bzero(mpte
, sizeof (*mpte
));
480 VERIFY(mpp
->mpp_pcbe
== NULL
);
481 mpp
->mpp_pcbe
= mpte
;
482 mpte
->mpte_mppcb
= mpp
;
483 mpte
->mpte_mptcb
= mp_tp
;
485 TAILQ_INIT(&mpte
->mpte_sopts
);
486 TAILQ_INIT(&mpte
->mpte_subflows
);
487 mpte
->mpte_associd
= SAE_ASSOCID_ANY
;
488 mpte
->mpte_connid_last
= SAE_CONNID_ANY
;
490 mpte
->mpte_itfinfo
= &mpte
->_mpte_itfinfo
[0];
491 mpte
->mpte_itfinfo_size
= MPTE_ITFINFO_SIZE
;
493 /* MPTCP Protocol Control Block */
494 bzero(mp_tp
, sizeof (*mp_tp
));
495 mp_tp
->mpt_mpte
= mpte
;
496 mp_tp
->mpt_state
= MPTCPS_CLOSED
;
498 DTRACE_MPTCP1(session__create
, struct mppcb
*, mpp
);
504 mptcpstats_get_bytes(struct mptses
*mpte
, boolean_t initial_cell
,
505 uint64_t *cellbytes
, uint64_t *allbytes
)
507 int64_t mycellbytes
= 0;
508 uint64_t myallbytes
= 0;
511 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
512 if (mpte
->mpte_itfstats
[i
].is_expensive
) {
513 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
514 mycellbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
517 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_txbytes
;
518 myallbytes
+= mpte
->mpte_itfstats
[i
].mpis_rxbytes
;
522 mycellbytes
-= mpte
->mpte_init_txbytes
;
523 mycellbytes
-= mpte
->mpte_init_txbytes
;
526 if (mycellbytes
< 0) {
527 mptcplog((LOG_ERR
, "%s cellbytes is %d\n", __func__
, mycellbytes
),
528 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
532 *cellbytes
= mycellbytes
;
533 *allbytes
= myallbytes
;
538 mptcpstats_session_wrapup(struct mptses
*mpte
)
540 boolean_t cell
= mpte
->mpte_initial_cell
;
542 switch (mpte
->mpte_svctype
) {
543 case MPTCP_SVCTYPE_HANDOVER
:
544 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
545 tcpstat
.tcps_mptcp_fp_handover_attempt
++;
547 if (cell
&& mpte
->mpte_handshake_success
) {
548 tcpstat
.tcps_mptcp_fp_handover_success_cell
++;
550 if (mpte
->mpte_used_wifi
)
551 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
552 } else if (mpte
->mpte_handshake_success
) {
553 tcpstat
.tcps_mptcp_fp_handover_success_wifi
++;
555 if (mpte
->mpte_used_cell
)
556 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
559 tcpstat
.tcps_mptcp_handover_attempt
++;
561 if (cell
&& mpte
->mpte_handshake_success
) {
562 tcpstat
.tcps_mptcp_handover_success_cell
++;
564 if (mpte
->mpte_used_wifi
)
565 tcpstat
.tcps_mptcp_handover_wifi_from_cell
++;
566 } else if (mpte
->mpte_handshake_success
) {
567 tcpstat
.tcps_mptcp_handover_success_wifi
++;
569 if (mpte
->mpte_used_cell
)
570 tcpstat
.tcps_mptcp_handover_cell_from_wifi
++;
574 if (mpte
->mpte_handshake_success
) {
578 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
580 tcpstat
.tcps_mptcp_handover_cell_bytes
+= cellbytes
;
581 tcpstat
.tcps_mptcp_handover_all_bytes
+= allbytes
;
584 case MPTCP_SVCTYPE_INTERACTIVE
:
585 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
586 tcpstat
.tcps_mptcp_fp_interactive_attempt
++;
588 if (mpte
->mpte_handshake_success
) {
589 tcpstat
.tcps_mptcp_fp_interactive_success
++;
591 if (!cell
&& mpte
->mpte_used_cell
)
592 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
595 tcpstat
.tcps_mptcp_interactive_attempt
++;
597 if (mpte
->mpte_handshake_success
) {
598 tcpstat
.tcps_mptcp_interactive_success
++;
600 if (!cell
&& mpte
->mpte_used_cell
)
601 tcpstat
.tcps_mptcp_interactive_cell_from_wifi
++;
605 if (mpte
->mpte_handshake_success
) {
609 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
611 tcpstat
.tcps_mptcp_interactive_cell_bytes
+= cellbytes
;
612 tcpstat
.tcps_mptcp_interactive_all_bytes
+= allbytes
;
615 case MPTCP_SVCTYPE_AGGREGATE
:
616 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
617 tcpstat
.tcps_mptcp_fp_aggregate_attempt
++;
619 if (mpte
->mpte_handshake_success
)
620 tcpstat
.tcps_mptcp_fp_aggregate_success
++;
622 tcpstat
.tcps_mptcp_aggregate_attempt
++;
624 if (mpte
->mpte_handshake_success
) {
625 tcpstat
.tcps_mptcp_aggregate_success
++;
629 if (mpte
->mpte_handshake_success
) {
633 mptcpstats_get_bytes(mpte
, cell
, &cellbytes
, &allbytes
);
635 tcpstat
.tcps_mptcp_aggregate_cell_bytes
+= cellbytes
;
636 tcpstat
.tcps_mptcp_aggregate_all_bytes
+= allbytes
;
641 if (cell
&& mpte
->mpte_handshake_success
&& mpte
->mpte_used_wifi
)
642 tcpstat
.tcps_mptcp_back_to_wifi
++;
646 * Destroy an MPTCP session.
649 mptcp_session_destroy(struct mptses
*mpte
)
653 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
655 mp_tp
= mpte
->mpte_mptcb
;
656 VERIFY(mp_tp
!= NULL
);
658 mptcpstats_session_wrapup(mpte
);
660 mptcp_unset_cellicon();
663 * MPTCP Multipath PCB Extension section
665 mptcp_flush_sopts(mpte
);
666 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
) && mpte
->mpte_numflows
== 0);
668 if (mpte
->mpte_itfinfo_size
> MPTE_ITFINFO_SIZE
)
669 _FREE(mpte
->mpte_itfinfo
, M_TEMP
);
671 mpte
->mpte_itfinfo
= NULL
;
673 m_freem_list(mpte
->mpte_reinjectq
);
676 * MPTCP Protocol Control Block section
678 DTRACE_MPTCP2(session__destroy
, struct mptses
*, mpte
,
679 struct mptcb
*, mp_tp
);
683 mptcp_ok_to_create_subflows(struct mptcb
*mp_tp
)
685 return (mp_tp
->mpt_state
>= MPTCPS_ESTABLISHED
&&
686 mp_tp
->mpt_state
< MPTCPS_TIME_WAIT
&&
687 !(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
));
691 mptcp_synthesize_nat64(struct in6_addr
*addr
, uint32_t len
, struct in_addr
*addrv4
)
693 static const struct in6_addr well_known_prefix
= {
694 .__u6_addr
.__u6_addr8
= {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
695 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
696 0x00, 0x00, 0x00, 0x00},
698 char buf
[MAX_IPv6_STR_LEN
];
699 char *ptrv4
= (char *)addrv4
;
700 char *ptr
= (char *)addr
;
702 if (IN_ZERONET(addrv4
->s_addr
) || // 0.0.0.0/8 Source hosts on local network
703 IN_LOOPBACK(addrv4
->s_addr
) || // 127.0.0.0/8 Loopback
704 IN_LINKLOCAL(addrv4
->s_addr
) || // 169.254.0.0/16 Link Local
705 IN_DS_LITE(addrv4
->s_addr
) || // 192.0.0.0/29 DS-Lite
706 IN_6TO4_RELAY_ANYCAST(addrv4
->s_addr
) || // 192.88.99.0/24 6to4 Relay Anycast
707 IN_MULTICAST(addrv4
->s_addr
) || // 224.0.0.0/4 Multicast
708 INADDR_BROADCAST
== addrv4
->s_addr
) { // 255.255.255.255/32 Limited Broadcast
712 /* Check for the well-known prefix */
713 if (len
== NAT64_PREFIX_LEN_96
&&
714 IN6_ARE_ADDR_EQUAL(addr
, &well_known_prefix
)) {
715 if (IN_PRIVATE(addrv4
->s_addr
) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use
716 IN_SHARED_ADDRESS_SPACE(addrv4
->s_addr
)) // 100.64.0.0/10 Shared Address Space
721 case NAT64_PREFIX_LEN_96
:
722 memcpy(ptr
+ 12, ptrv4
, 4);
724 case NAT64_PREFIX_LEN_64
:
725 memcpy(ptr
+ 9, ptrv4
, 4);
727 case NAT64_PREFIX_LEN_56
:
728 memcpy(ptr
+ 7, ptrv4
, 1);
729 memcpy(ptr
+ 9, ptrv4
+ 1, 3);
731 case NAT64_PREFIX_LEN_48
:
732 memcpy(ptr
+ 6, ptrv4
, 2);
733 memcpy(ptr
+ 9, ptrv4
+ 2, 2);
735 case NAT64_PREFIX_LEN_40
:
736 memcpy(ptr
+ 5, ptrv4
, 3);
737 memcpy(ptr
+ 9, ptrv4
+ 3, 1);
739 case NAT64_PREFIX_LEN_32
:
740 memcpy(ptr
+ 4, ptrv4
, 4);
743 panic("NAT64-prefix len is wrong: %u\n", len
);
746 mptcplog((LOG_DEBUG
, "%s: nat64prefix-len %u synthesized %s\n", __func__
,
747 len
, inet_ntop(AF_INET6
, (void *)addr
, buf
, sizeof(buf
))),
748 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
754 mptcp_check_subflows_and_add(struct mptses
*mpte
)
756 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
759 if (!mptcp_ok_to_create_subflows(mp_tp
))
762 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
763 struct mpt_itf_info
*info
;
768 info
= &mpte
->mpte_itfinfo
[i
];
770 if (info
->no_mptcp_support
)
773 ifindex
= info
->ifindex
;
774 if (ifindex
== IFSCOPE_NONE
)
777 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
778 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
783 if (ifp
->if_index
== ifindex
&&
784 !(mpts
->mpts_socket
->so_state
& SS_ISDISCONNECTED
)) {
786 * We found a subflow on this interface.
787 * No need to create a new one.
794 * In Handover mode, only create cell subflow if
795 * 1. Wi-Fi Assist is active
796 * 2. Symptoms marked WiFi as weak
797 * 3. We are experiencing RTOs or we are not sending data.
799 * This covers the scenario, where:
800 * 1. We send and get retransmission timeouts (thus,
801 * we confirmed that WiFi is indeed bad).
802 * 2. We are not sending and the server tries to send.
803 * Establshing a cell-subflow gives the server a
804 * chance to send us some data over cell if WiFi
805 * is dead. We establish the subflow with the
806 * backup-bit set, so the server is not allowed to
807 * send on this subflow as long as WiFi is providing
810 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
&&
811 !IFNET_IS_CELLULAR(ifp
) &&
812 !(mpts
->mpts_flags
& (MPTSF_DISCONNECTING
| MPTSF_DISCONNECTED
| MPTSF_CLOSE_REQD
)) &&
813 (!mptcp_is_wifi_unusable() ||
814 (sototcpcb(mpts
->mpts_socket
)->t_rxtshift
< mptcp_fail_thresh
&&
815 mptetoso(mpte
)->so_snd
.sb_cc
))) {
816 mptcplog((LOG_DEBUG
, "%s handover, wifi state %u rxt %u ifindex %u this %u\n",
817 __func__
, mptcp_is_wifi_unusable(), sototcpcb(mpts
->mpts_socket
)->t_rxtshift
, ifindex
,
819 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
825 if (!found
&& !(mpte
->mpte_flags
& MPTE_FIRSTPARTY
) &&
826 !(mpte
->mpte_flags
& MPTE_ACCESS_GRANTED
) &&
827 mptcp_developer_mode
== 0) {
828 mptcp_ask_symptoms(mpte
);
833 struct sockaddr
*dst
= &mpte
->mpte_dst
;
834 struct sockaddr_in6 nat64pre
;
836 if (mpte
->mpte_dst
.sa_family
== AF_INET
&&
837 !info
->has_v4_conn
&& info
->has_v6_conn
) {
838 struct ipv6_prefix nat64prefixes
[NAT64_MAX_NUM_PREFIXES
];
842 bzero(&nat64pre
, sizeof(struct sockaddr_in6
));
844 ifnet_head_lock_shared();
845 ifp
= ifindex2ifnet
[ifindex
];
848 error
= ifnet_get_nat64prefix(ifp
, nat64prefixes
);
850 mptcplog((LOG_ERR
, "%s: no NAT64-prefix on itf %s, error %d\n",
851 __func__
, ifp
->if_name
, error
),
852 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
856 for (j
= 0; j
< NAT64_MAX_NUM_PREFIXES
; j
++) {
857 if (nat64prefixes
[j
].prefix_len
!= 0)
861 VERIFY(j
< NAT64_MAX_NUM_PREFIXES
);
863 error
= mptcp_synthesize_nat64(&nat64prefixes
[j
].ipv6_prefix
,
864 nat64prefixes
[j
].prefix_len
,
865 &mpte
->__mpte_dst_v4
.sin_addr
);
867 mptcplog((LOG_INFO
, "%s: cannot synthesize this addr\n", __func__
),
868 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
872 memcpy(&nat64pre
.sin6_addr
,
873 &nat64prefixes
[j
].ipv6_prefix
,
874 sizeof(nat64pre
.sin6_addr
));
875 nat64pre
.sin6_len
= sizeof(struct sockaddr_in6
);
876 nat64pre
.sin6_family
= AF_INET6
;
877 nat64pre
.sin6_port
= mpte
->__mpte_dst_v6
.sin6_port
;
878 nat64pre
.sin6_flowinfo
= 0;
879 nat64pre
.sin6_scope_id
= 0;
881 dst
= (struct sockaddr
*)&nat64pre
;
884 mptcp_subflow_add(mpte
, NULL
, dst
, ifindex
, NULL
);
890 * Based on the MPTCP Service-type and the state of the subflows, we
891 * will destroy subflows here.
894 mptcp_check_subflows_and_remove(struct mptses
*mpte
)
896 struct mptsub
*mpts
, *tmpts
;
897 int found_working_subflow
= 0, removed_some
= 0;
898 int wifi_unusable
= mptcp_is_wifi_unusable();
900 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
)
904 * Look for a subflow that is on a non-cellular interface
905 * and actually works (aka, no retransmission timeout).
907 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
908 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
912 if (ifp
== NULL
|| IFNET_IS_CELLULAR(ifp
))
915 so
= mpts
->mpts_socket
;
918 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
) ||
919 tp
->t_state
!= TCPS_ESTABLISHED
)
922 /* Either this subflow is in good condition while we try to send */
923 if (tp
->t_rxtshift
== 0 && mptetoso(mpte
)->so_snd
.sb_cc
)
924 found_working_subflow
= 1;
926 /* Or WiFi is fine */
928 found_working_subflow
= 1;
932 * Couldn't find a working subflow, let's not remove those on a cellular
935 if (!found_working_subflow
)
938 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
939 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
941 /* Only remove cellular subflows */
942 if (ifp
== NULL
|| !IFNET_IS_CELLULAR(ifp
))
945 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
950 mptcp_unset_cellicon();
954 mptcp_remove_subflows(struct mptses
*mpte
)
956 struct mptsub
*mpts
, *tmpts
;
958 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
959 if (mpts
->mpts_flags
& MPTSF_CLOSE_REQD
) {
960 mpts
->mpts_flags
&= ~MPTSF_CLOSE_REQD
;
962 soevent(mpts
->mpts_socket
,
963 SO_FILT_HINT_LOCKED
| SO_FILT_HINT_NOSRCADDR
);
969 mptcp_create_subflows(__unused
void *arg
)
974 * Start with clearing, because we might be processing connections
975 * while a new event comes in.
977 if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled
))
978 mptcplog((LOG_ERR
, "%s: bit was already cleared!\n", __func__
),
979 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
981 /* Iterate over all MPTCP connections */
983 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
985 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
987 struct socket
*mp_so
;
989 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
))
994 mpp
->mpp_flags
&= ~MPP_CREATE_SUBFLOWS
;
996 mpte
= mpp
->mpp_pcbe
;
997 mp_so
= mpp
->mpp_socket
;
999 VERIFY(mp_so
->so_usecount
> 0);
1001 mptcp_check_subflows_and_add(mpte
);
1002 mptcp_remove_subflows(mpte
);
1004 mp_so
->so_usecount
--; /* See mptcp_sched_create_subflows */
1008 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
1012 * We need this because we are coming from an NECP-event. This event gets posted
1013 * while holding NECP-locks. The creation of the subflow however leads us back
1014 * into NECP (e.g., to add the necp_cb and also from tcp_connect).
1015 * So, we would deadlock there as we already hold the NECP-lock.
1017 * So, let's schedule this separately. It also gives NECP the chance to make
1018 * progress, without having to wait for MPTCP to finish its subflow creation.
1021 mptcp_sched_create_subflows(struct mptses
*mpte
)
1023 struct mppcb
*mpp
= mpte
->mpte_mppcb
;
1024 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1025 struct socket
*mp_so
= mpp
->mpp_socket
;
1027 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
1028 mptcplog((LOG_DEBUG
, "%s: not a good time for subflows, state %u flags %#x",
1029 __func__
, mp_tp
->mpt_state
, mp_tp
->mpt_flags
),
1030 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
1034 if (!(mpp
->mpp_flags
& MPP_CREATE_SUBFLOWS
)) {
1035 mp_so
->so_usecount
++; /* To prevent it from being free'd in-between */
1036 mpp
->mpp_flags
|= MPP_CREATE_SUBFLOWS
;
1039 if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled
))
1042 /* Do the call in 100ms to allow NECP to schedule it on all sockets */
1043 timeout(mptcp_create_subflows
, NULL
, hz
/10);
1047 * Allocate an MPTCP socket option structure.
1050 mptcp_sopt_alloc(int how
)
1054 mpo
= (how
== M_WAITOK
) ? zalloc(mptopt_zone
) :
1055 zalloc_noblock(mptopt_zone
);
1057 bzero(mpo
, mptopt_zone_size
);
1064 * Free an MPTCP socket option structure.
1067 mptcp_sopt_free(struct mptopt
*mpo
)
1069 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
1071 zfree(mptopt_zone
, mpo
);
1075 * Add a socket option to the MPTCP socket option list.
1078 mptcp_sopt_insert(struct mptses
*mpte
, struct mptopt
*mpo
)
1080 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1081 VERIFY(!(mpo
->mpo_flags
& MPOF_ATTACHED
));
1082 mpo
->mpo_flags
|= MPOF_ATTACHED
;
1083 TAILQ_INSERT_TAIL(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1087 * Remove a socket option from the MPTCP socket option list.
1090 mptcp_sopt_remove(struct mptses
*mpte
, struct mptopt
*mpo
)
1092 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1093 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1094 mpo
->mpo_flags
&= ~MPOF_ATTACHED
;
1095 TAILQ_REMOVE(&mpte
->mpte_sopts
, mpo
, mpo_entry
);
1099 * Search for an existing <sopt_level,sopt_name> socket option.
1102 mptcp_sopt_find(struct mptses
*mpte
, struct sockopt
*sopt
)
1106 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1108 TAILQ_FOREACH(mpo
, &mpte
->mpte_sopts
, mpo_entry
) {
1109 if (mpo
->mpo_level
== sopt
->sopt_level
&&
1110 mpo
->mpo_name
== sopt
->sopt_name
)
1113 VERIFY(mpo
== NULL
|| sopt
->sopt_valsize
== sizeof (int));
1119 * Allocate a MPTCP subflow structure.
1121 static struct mptsub
*
1122 mptcp_subflow_alloc(void)
1124 struct mptsub
*mpts
= zalloc(mptsub_zone
);
1129 bzero(mpts
, mptsub_zone_size
);
1134 * Deallocate a subflow structure, called when all of the references held
1135 * on it have been released. This implies that the subflow has been deleted.
1138 mptcp_subflow_free(struct mptsub
*mpts
)
1140 VERIFY(mpts
->mpts_refcnt
== 0);
1141 VERIFY(!(mpts
->mpts_flags
& MPTSF_ATTACHED
));
1142 VERIFY(mpts
->mpts_mpte
== NULL
);
1143 VERIFY(mpts
->mpts_socket
== NULL
);
1145 if (mpts
->mpts_src
!= NULL
) {
1146 FREE(mpts
->mpts_src
, M_SONAME
);
1147 mpts
->mpts_src
= NULL
;
1150 zfree(mptsub_zone
, mpts
);
1154 mptcp_subflow_addref(struct mptsub
*mpts
)
1156 if (++mpts
->mpts_refcnt
== 0)
1157 panic("%s: mpts %p wraparound refcnt\n", __func__
, mpts
);
1162 mptcp_subflow_remref(struct mptsub
*mpts
)
1164 if (mpts
->mpts_refcnt
== 0) {
1165 panic("%s: mpts %p negative refcnt\n", __func__
, mpts
);
1168 if (--mpts
->mpts_refcnt
> 0)
1171 /* callee will unlock and destroy lock */
1172 mptcp_subflow_free(mpts
);
1176 mptcp_subflow_attach(struct mptses
*mpte
, struct mptsub
*mpts
, struct socket
*so
)
1178 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1179 struct tcpcb
*tp
= sototcpcb(so
);
1182 * From this moment on, the subflow is linked to the MPTCP-connection.
1183 * Locking,... happens now at the MPTCP-layer
1185 tp
->t_mptcb
= mpte
->mpte_mptcb
;
1186 so
->so_flags
|= SOF_MP_SUBFLOW
;
1187 mp_so
->so_usecount
++;
1190 * Insert the subflow into the list, and associate the MPTCP PCB
1191 * as well as the the subflow socket. From this point on, removing
1192 * the subflow needs to be done via mptcp_subflow_del().
1194 TAILQ_INSERT_TAIL(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
1195 mpte
->mpte_numflows
++;
1197 atomic_bitset_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
1198 mpts
->mpts_mpte
= mpte
;
1199 mpts
->mpts_socket
= so
;
1201 mptcp_subflow_addref(mpts
); /* for being in MPTCP subflow list */
1202 mptcp_subflow_addref(mpts
); /* for subflow socket */
1206 mptcp_subflow_necp_cb(void *handle
, __unused
int action
,
1207 __unused
struct necp_client_flow
*flow
)
1209 struct inpcb
*inp
= (struct inpcb
*)handle
;
1210 struct socket
*so
= inp
->inp_socket
;
1211 struct mptsub
*mpts
;
1212 struct mptses
*mpte
;
1214 if (action
!= NECP_CLIENT_CBACTION_NONVIABLE
)
1218 * The socket is being garbage-collected. There is nothing to be done
1221 if (so
->so_usecount
== 0)
1226 /* Check again after we acquired the lock. */
1227 if (so
->so_usecount
== 0)
1230 mpte
= tptomptp(sototcpcb(so
))->mpt_mpte
;
1231 mpts
= sototcpcb(so
)->t_mpsub
;
1233 mptcplog((LOG_DEBUG
, "%s: Subflow became non-viable", __func__
),
1234 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
1236 mpts
->mpts_flags
|= MPTSF_CLOSE_REQD
;
1238 mptcp_sched_create_subflows(mpte
);
1240 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_HANDOVER
)
1244 socket_unlock(so
, 1);
1248 * Create an MPTCP subflow socket.
1251 mptcp_subflow_socreate(struct mptses
*mpte
, struct mptsub
*mpts
, int dom
,
1254 lck_mtx_t
*subflow_mtx
;
1255 struct mptopt smpo
, *mpo
, *tmpo
;
1257 struct socket
*mp_so
;
1261 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1262 mp_so
= mptetoso(mpte
);
1264 p
= proc_find(mp_so
->last_pid
);
1265 if (p
== PROC_NULL
) {
1266 mptcplog((LOG_ERR
, "%s: Couldn't find proc for pid %u\n", __func__
, mp_so
->last_pid
),
1267 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1273 * Create the subflow socket (multipath subflow, non-blocking.)
1275 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
1276 * socket; it will be cleared when the socket is peeled off or closed.
1277 * It also indicates to the underlying TCP to handle MPTCP options.
1278 * A multipath subflow socket implies SS_NOFDREF state.
1282 * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes
1283 * the ipi-lock. We cannot hold the socket-lock at that point.
1286 error
= socreate_internal(dom
, so
, SOCK_STREAM
, IPPROTO_TCP
, p
,
1287 SOCF_ASYNC
, PROC_NULL
);
1290 mptcplog((LOG_ERR
, "%s: subflow socreate mp_so 0x%llx unable to create subflow socket error %d\n",
1291 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), error
),
1292 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1296 mptcp_subflow_free(mpts
);
1301 * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because
1302 * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock.
1303 * Which is why we also need to get the lock with pr_getlock, as after
1304 * setting the flag, socket_unlock will work on the MPTCP-level lock.
1306 subflow_mtx
= ((*so
)->so_proto
->pr_getlock
)(*so
, 0);
1307 lck_mtx_lock(subflow_mtx
);
1310 * Must be the first thing we do, to make sure all pointers for this
1313 mptcp_subflow_attach(mpte
, mpts
, *so
);
1316 * A multipath subflow socket is used internally in the kernel,
1317 * therefore it does not have a file desciptor associated by
1320 (*so
)->so_state
|= SS_NOFDREF
;
1322 lck_mtx_unlock(subflow_mtx
);
1324 /* prevent the socket buffers from being compressed */
1325 (*so
)->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
1326 (*so
)->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
1328 /* Inherit preconnect and TFO data flags */
1329 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)
1330 (*so
)->so_flags1
|= SOF1_PRECONNECT_DATA
;
1331 if (mp_so
->so_flags1
& SOF1_DATA_IDEMPOTENT
)
1332 (*so
)->so_flags1
|= SOF1_DATA_IDEMPOTENT
;
1334 /* Inherit uuid and create the related flow. */
1335 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1336 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1338 sotoinpcb(*so
)->necp_cb
= mptcp_subflow_necp_cb
;
1341 * A note on the unlock: With MPTCP, we do multiple times a
1342 * necp_client_register_socket_flow. This is problematic,
1343 * because now the lock-ordering guarantee (first necp-locks,
1344 * then socket-locks) is no more respected. So, we need to
1348 error
= necp_client_register_socket_flow(mp_so
->last_pid
,
1349 mpsotomppcb(mp_so
)->necp_client_uuid
, sotoinpcb(*so
));
1355 /* Possible state-change during the unlock above */
1356 if (mp_tp
->mpt_state
>= MPTCPS_TIME_WAIT
||
1357 (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
))
1360 uuid_copy(sotoinpcb(*so
)->necp_client_uuid
, mpsotomppcb(mp_so
)->necp_client_uuid
);
1362 mptcplog((LOG_NOTICE
, "%s: uuid is not set!\n"),
1363 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1366 /* inherit the other socket options */
1367 bzero(&smpo
, sizeof (smpo
));
1368 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1369 smpo
.mpo_level
= SOL_SOCKET
;
1370 smpo
.mpo_intval
= 1;
1372 /* disable SIGPIPE */
1373 smpo
.mpo_name
= SO_NOSIGPIPE
;
1374 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1377 /* find out if the subflow's source address goes away */
1378 smpo
.mpo_name
= SO_NOADDRERR
;
1379 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1382 /* enable keepalive */
1383 smpo
.mpo_name
= SO_KEEPALIVE
;
1384 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1387 smpo
.mpo_level
= IPPROTO_TCP
;
1388 smpo
.mpo_intval
= mptcp_subflow_keeptime
;
1389 smpo
.mpo_name
= TCP_KEEPALIVE
;
1390 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1393 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
) {
1395 * On secondary subflows we might need to set the cell-fallback
1396 * flag (see conditions in mptcp_subflow_sosetopt).
1398 smpo
.mpo_level
= SOL_SOCKET
;
1399 smpo
.mpo_name
= SO_MARK_CELLFALLBACK
;
1400 smpo
.mpo_intval
= 1;
1401 if ((error
= mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
)) != 0)
1405 /* replay setsockopt(2) on the subflow sockets for eligible options */
1406 TAILQ_FOREACH_SAFE(mpo
, &mpte
->mpte_sopts
, mpo_entry
, tmpo
) {
1409 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
))
1413 * Skip those that are handled internally; these options
1414 * should not have been recorded and marked with the
1415 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1417 if (mpo
->mpo_level
== SOL_SOCKET
&&
1418 (mpo
->mpo_name
== SO_NOSIGPIPE
||
1419 mpo
->mpo_name
== SO_NOADDRERR
||
1420 mpo
->mpo_name
== SO_KEEPALIVE
))
1423 interim
= (mpo
->mpo_flags
& MPOF_INTERIM
);
1424 if (mptcp_subflow_sosetopt(mpte
, mpts
, mpo
) != 0 && interim
) {
1425 mptcplog((LOG_ERR
, "%s: subflow socreate mp_so 0x%llx"
1426 " sopt %s val %d interim record removed\n", __func__
,
1427 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1428 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
1430 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1431 mptcp_sopt_remove(mpte
, mpo
);
1432 mptcp_sopt_free(mpo
);
1438 * We need to receive everything that the subflow socket has,
1439 * so use a customized socket receive function. We will undo
1440 * this when the socket is peeled off or closed.
1444 (*so
)->so_proto
= &mptcp_subflow_protosw
;
1448 (*so
)->so_proto
= (struct protosw
*)&mptcp_subflow_protosw6
;
1458 DTRACE_MPTCP3(subflow__create
, struct mptses
*, mpte
,
1459 int, dom
, int, error
);
1464 mptcp_subflow_abort(mpts
, error
);
1468 mptcplog((LOG_ERR
, "%s: subflow socreate failed with error %d\n",
1469 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1475 * Close an MPTCP subflow socket.
1477 * Note that this may be called on an embryonic subflow, and the only
1478 * thing that is guaranteed valid is the protocol-user request.
1481 mptcp_subflow_soclose(struct mptsub
*mpts
)
1483 struct socket
*so
= mpts
->mpts_socket
;
1485 if (mpts
->mpts_flags
& MPTSF_CLOSED
)
1489 VERIFY(so
->so_flags
& SOF_MP_SUBFLOW
);
1490 VERIFY((so
->so_state
& (SS_NBIO
|SS_NOFDREF
)) == (SS_NBIO
|SS_NOFDREF
));
1492 DTRACE_MPTCP5(subflow__close
, struct mptsub
*, mpts
,
1493 struct socket
*, so
,
1494 struct sockbuf
*, &so
->so_rcv
,
1495 struct sockbuf
*, &so
->so_snd
,
1496 struct mptses
*, mpts
->mpts_mpte
);
1498 mpts
->mpts_flags
|= MPTSF_CLOSED
;
1500 if (so
->so_retaincnt
== 0) {
1505 VERIFY(so
->so_usecount
> 0);
1513 * Connect an MPTCP subflow socket.
1515 * Note that in the pending connect case, the subflow socket may have been
1516 * bound to an interface and/or a source IP address which may no longer be
1517 * around by the time this routine is called; in that case the connect attempt
1518 * will most likely fail.
1521 mptcp_subflow_soconnectx(struct mptses
*mpte
, struct mptsub
*mpts
)
1523 char dbuf
[MAX_IPv6_STR_LEN
];
1524 struct socket
*mp_so
, *so
;
1525 struct mptcb
*mp_tp
;
1526 struct sockaddr
*dst
;
1530 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1532 mp_so
= mptetoso(mpte
);
1533 mp_tp
= mpte
->mpte_mptcb
;
1535 p
= proc_find(mp_so
->last_pid
);
1536 if (p
== PROC_NULL
) {
1537 mptcplog((LOG_ERR
, "%s: Couldn't find proc for pid %u\n", __func__
, mp_so
->last_pid
),
1538 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1543 so
= mpts
->mpts_socket
;
1544 af
= mpts
->mpts_dst
.sa_family
;
1546 VERIFY((mpts
->mpts_flags
& (MPTSF_CONNECTING
|MPTSF_CONNECTED
)) == MPTSF_CONNECTING
);
1547 VERIFY(mpts
->mpts_socket
!= NULL
);
1548 VERIFY(af
== AF_INET
|| af
== AF_INET6
);
1550 dst
= &mpts
->mpts_dst
;
1551 mptcplog((LOG_DEBUG
, "%s: connectx mp_so 0x%llx dst %s[%d] cid %d [pended %s]\n",
1552 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1553 inet_ntop(af
, ((af
== AF_INET
) ? (void *)&SIN(dst
)->sin_addr
.s_addr
:
1554 (void *)&SIN6(dst
)->sin6_addr
),
1555 dbuf
, sizeof (dbuf
)),
1556 ((af
== AF_INET
) ? ntohs(SIN(dst
)->sin_port
) : ntohs(SIN6(dst
)->sin6_port
)),
1558 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ? "YES" : "NO")),
1559 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
1561 mpts
->mpts_flags
&= ~MPTSF_CONNECT_PENDING
;
1563 mptcp_attach_to_subf(so
, mpte
->mpte_mptcb
, mpte
->mpte_addrid_last
);
1565 /* connect the subflow socket */
1566 error
= soconnectxlocked(so
, mpts
->mpts_src
, &mpts
->mpts_dst
,
1567 p
, mpts
->mpts_ifscope
,
1568 mpte
->mpte_associd
, NULL
, 0, NULL
, 0, NULL
, NULL
);
1570 mpts
->mpts_iss
= sototcpcb(so
)->iss
;
1572 /* See tcp_connect_complete */
1573 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&&
1574 (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1575 mp_tp
->mpt_sndwnd
= sototcpcb(so
)->snd_wnd
;
1578 /* Allocate a unique address id per subflow */
1579 mpte
->mpte_addrid_last
++;
1580 if (mpte
->mpte_addrid_last
== 0)
1581 mpte
->mpte_addrid_last
++;
1585 DTRACE_MPTCP3(subflow__connect
, struct mptses
*, mpte
,
1586 struct mptsub
*, mpts
, int, error
);
1588 mptcplog((LOG_ERR
, "%s: connectx failed with error %d ifscope %u\n",
1589 __func__
, error
, mpts
->mpts_ifscope
),
1590 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1596 * MPTCP subflow socket receive routine, derived from soreceive().
1599 mptcp_subflow_soreceive(struct socket
*so
, struct sockaddr
**psa
,
1600 struct uio
*uio
, struct mbuf
**mp0
, struct mbuf
**controlp
, int *flagsp
)
1603 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
1604 int flags
, error
= 0;
1605 struct proc
*p
= current_proc();
1606 struct mbuf
*m
, **mp
= mp0
;
1607 boolean_t proc_held
= FALSE
;
1609 mpte_lock_assert_held(tptomptp(sototcpcb(so
))->mpt_mpte
);
1610 VERIFY(so
->so_proto
->pr_flags
& PR_CONNREQUIRED
);
1612 #ifdef MORE_LOCKING_DEBUG
1613 if (so
->so_usecount
== 1) {
1614 panic("%s: so=%x no other reference on socket\n", __func__
, so
);
1619 * We return all that is there in the subflow's socket receive buffer
1620 * to the MPTCP layer, so we require that the caller passes in the
1621 * expected parameters.
1623 if (mp
== NULL
|| controlp
!= NULL
)
1630 flags
= *flagsp
&~ MSG_EOR
;
1634 if (flags
& (MSG_PEEK
|MSG_OOB
|MSG_NEEDSA
|MSG_WAITALL
|MSG_WAITSTREAM
))
1635 return (EOPNOTSUPP
);
1637 flags
|= (MSG_DONTWAIT
|MSG_NBIO
);
1640 * If a recv attempt is made on a previously-accepted socket
1641 * that has been marked as inactive (disconnected), reject
1644 if (so
->so_flags
& SOF_DEFUNCT
) {
1645 struct sockbuf
*sb
= &so
->so_rcv
;
1649 * This socket should have been disconnected and flushed
1650 * prior to being returned from sodefunct(); there should
1651 * be no data on its receive list, so panic otherwise.
1653 if (so
->so_state
& SS_DEFUNCT
)
1654 sb_empty_assert(sb
, __func__
);
1659 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
1660 * and if so just return to the caller. This could happen when
1661 * soreceive() is called by a socket upcall function during the
1662 * time the socket is freed. The socket buffer would have been
1663 * locked across the upcall, therefore we cannot put this thread
1664 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
1665 * we may livelock), because the lock on the socket buffer will
1666 * only be released when the upcall routine returns to its caller.
1667 * Because the socket has been officially closed, there can be
1668 * no further read on it.
1670 * A multipath subflow socket would have its SS_NOFDREF set by
1671 * default, so check for SOF_MP_SUBFLOW socket flag; when the
1672 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
1674 if ((so
->so_state
& (SS_NOFDREF
| SS_CANTRCVMORE
)) ==
1675 (SS_NOFDREF
| SS_CANTRCVMORE
) && !(so
->so_flags
& SOF_MP_SUBFLOW
))
1679 * For consistency with soreceive() semantics, we need to obey
1680 * SB_LOCK in case some other code path has locked the buffer.
1682 error
= sblock(&so
->so_rcv
, 0);
1686 m
= so
->so_rcv
.sb_mb
;
1689 * Panic if we notice inconsistencies in the socket's
1690 * receive list; both sb_mb and sb_cc should correctly
1691 * reflect the contents of the list, otherwise we may
1692 * end up with false positives during select() or poll()
1693 * which could put the application in a bad state.
1695 SB_MB_CHECK(&so
->so_rcv
);
1697 if (so
->so_error
!= 0) {
1698 error
= so
->so_error
;
1703 if (so
->so_state
& SS_CANTRCVMORE
) {
1707 if (!(so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
))) {
1713 * MSG_DONTWAIT is implicitly defined and this routine will
1714 * never block, so return EWOULDBLOCK when there is nothing.
1716 error
= EWOULDBLOCK
;
1720 mptcp_update_last_owner(so
, mp_so
);
1722 if (mp_so
->last_pid
!= proc_pid(p
)) {
1723 p
= proc_find(mp_so
->last_pid
);
1724 if (p
== PROC_NULL
) {
1731 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgrcv
);
1732 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1733 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 1");
1737 struct mbuf
*start
= m
;
1743 VERIFY(m
->m_nextpkt
== NULL
);
1745 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
1746 orig_dlen
= dlen
= m
->m_pkthdr
.mp_rlen
;
1747 dsn
= m
->m_pkthdr
.mp_dsn
;
1748 sseq
= m
->m_pkthdr
.mp_rseq
;
1749 csum
= m
->m_pkthdr
.mp_csum
;
1751 /* We did fallback */
1752 mptcp_adj_rmap(so
, m
, 0);
1754 sbfree(&so
->so_rcv
, m
);
1759 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1765 so
->so_rcv
.sb_lastrecord
= m
;
1767 SB_EMPTY_FIXUP(&so
->so_rcv
);
1774 * Check if the full mapping is now present
1776 if ((int)so
->so_rcv
.sb_cc
< dlen
) {
1777 mptcplog((LOG_INFO
, "%s not enough data (%u) need %u\n",
1778 __func__
, so
->so_rcv
.sb_cc
, dlen
),
1779 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_LOG
);
1782 error
= EWOULDBLOCK
;
1786 /* Now, get the full mapping */
1788 mptcp_adj_rmap(so
, m
, orig_dlen
- dlen
);
1791 sbfree(&so
->so_rcv
, m
);
1796 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
1800 VERIFY(dlen
<= 0 || m
);
1806 so
->so_rcv
.sb_lastrecord
= m
;
1808 SB_EMPTY_FIXUP(&so
->so_rcv
);
1811 if (mptcp_validate_csum(sototcpcb(so
), start
, dsn
, sseq
, orig_dlen
, csum
)) {
1817 SBLASTRECORDCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1818 SBLASTMBUFCHK(&so
->so_rcv
, "mptcp_subflow_soreceive 2");
1821 DTRACE_MPTCP3(subflow__receive
, struct socket
*, so
,
1822 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
);
1828 sbunlock(&so
->so_rcv
, TRUE
);
1838 * MPTCP subflow socket send routine, derived from sosend().
1841 mptcp_subflow_sosend(struct socket
*so
, struct sockaddr
*addr
, struct uio
*uio
,
1842 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1844 struct socket
*mp_so
= mptetoso(tptomptp(sototcpcb(so
))->mpt_mpte
);
1845 struct proc
*p
= current_proc();
1846 boolean_t en_tracing
= FALSE
, proc_held
= FALSE
;
1848 int sblocked
= 1; /* Pretend as if it is already locked, so we won't relock it */
1851 VERIFY(control
== NULL
);
1852 VERIFY(addr
== NULL
);
1853 VERIFY(uio
== NULL
);
1855 VERIFY((so
->so_flags
& SOF_CONTENT_FILTER
) == 0);
1857 VERIFY(top
->m_pkthdr
.len
> 0 && top
->m_pkthdr
.len
<= UINT16_MAX
);
1858 VERIFY(top
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
1861 * trace if tracing & network (vs. unix) sockets & and
1864 if (ENTR_SHOULDTRACE
&&
1865 (SOCK_CHECK_DOM(so
, AF_INET
) || SOCK_CHECK_DOM(so
, AF_INET6
))) {
1866 struct inpcb
*inp
= sotoinpcb(so
);
1867 if (inp
->inp_last_outifp
!= NULL
&&
1868 !(inp
->inp_last_outifp
->if_flags
& IFF_LOOPBACK
)) {
1870 en_tracing_val
= top
->m_pkthdr
.len
;
1871 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_START
,
1872 VM_KERNEL_ADDRPERM(so
),
1873 ((so
->so_state
& SS_NBIO
) ? kEnTrFlagNonBlocking
: 0),
1874 (int64_t)en_tracing_val
);
1878 mptcp_update_last_owner(so
, mp_so
);
1880 if (mp_so
->last_pid
!= proc_pid(p
)) {
1881 p
= proc_find(mp_so
->last_pid
);
1882 if (p
== PROC_NULL
) {
1890 inp_update_necp_policy(sotoinpcb(so
), NULL
, NULL
, 0);
1893 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1895 error
= sosendcheck(so
, NULL
, top
->m_pkthdr
.len
, 0, 1, 0, &sblocked
, NULL
);
1899 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, top
, NULL
, NULL
, p
);
1909 soclearfastopen(so
);
1912 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite
, DBG_FUNC_END
,
1913 VM_KERNEL_ADDRPERM(so
),
1914 ((error
== EWOULDBLOCK
) ? kEnTrFlagNoWork
: 0),
1915 (int64_t)en_tracing_val
);
1923 * Establish an initial MPTCP connection (if first subflow and not yet
1924 * connected), or add a subflow to an existing MPTCP connection.
1927 mptcp_subflow_add(struct mptses
*mpte
, struct sockaddr
*src
,
1928 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
1930 struct socket
*mp_so
, *so
= NULL
;
1931 struct mptcb
*mp_tp
;
1932 struct mptsub
*mpts
= NULL
;
1935 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1936 mp_so
= mptetoso(mpte
);
1937 mp_tp
= mpte
->mpte_mptcb
;
1939 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
1940 /* If the remote end sends Data FIN, refuse subflow adds */
1941 mptcplog((LOG_ERR
, "%s state %u\n", __func__
, mp_tp
->mpt_state
),
1942 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1947 mpts
= mptcp_subflow_alloc();
1949 mptcplog((LOG_ERR
, "%s malloc subflow failed\n", __func__
),
1950 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1956 int len
= src
->sa_len
;
1958 MALLOC(mpts
->mpts_src
, struct sockaddr
*, len
, M_SONAME
,
1960 if (mpts
->mpts_src
== NULL
) {
1961 mptcplog((LOG_ERR
, "%s malloc mpts_src failed", __func__
),
1962 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1966 bcopy(src
, mpts
->mpts_src
, len
);
1969 memcpy(&mpts
->mpts_dst
, dst
, dst
->sa_len
);
1971 af
= mpts
->mpts_dst
.sa_family
;
1973 mpts
->mpts_ifscope
= ifscope
;
1975 /* create the subflow socket */
1976 if ((error
= mptcp_subflow_socreate(mpte
, mpts
, af
, &so
)) != 0)
1978 * Returning (error) and not cleaning up, because up to here
1979 * all we did is creating mpts.
1981 * And the contract is that the call to mptcp_subflow_socreate,
1982 * moves ownership of mpts to mptcp_subflow_socreate.
1987 * We may be called from within the kernel. Still need to account this
1988 * one to the real app.
1990 mptcp_update_last_owner(mpts
->mpts_socket
, mp_so
);
1993 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
1994 * -1 (SAE_CONNID_ALL).
1996 mpte
->mpte_connid_last
++;
1997 if (mpte
->mpte_connid_last
== SAE_CONNID_ALL
||
1998 mpte
->mpte_connid_last
== SAE_CONNID_ANY
)
1999 mpte
->mpte_connid_last
++;
2001 mpts
->mpts_connid
= mpte
->mpte_connid_last
;
2003 mpts
->mpts_rel_seq
= 1;
2005 /* Allocate a unique address id per subflow */
2006 mpte
->mpte_addrid_last
++;
2007 if (mpte
->mpte_addrid_last
== 0)
2008 mpte
->mpte_addrid_last
++;
2010 /* register for subflow socket read/write events */
2011 sock_setupcalls_locked(so
, mptcp_subflow_rupcall
, mpts
, mptcp_subflow_wupcall
, mpts
, 1);
2013 /* Register for subflow socket control events */
2014 sock_catchevents_locked(so
, mptcp_subflow_eupcall1
, mpts
,
2015 SO_FILT_HINT_CONNRESET
| SO_FILT_HINT_CANTRCVMORE
|
2016 SO_FILT_HINT_TIMEOUT
| SO_FILT_HINT_NOSRCADDR
|
2017 SO_FILT_HINT_IFDENIED
| SO_FILT_HINT_CONNECTED
|
2018 SO_FILT_HINT_DISCONNECTED
| SO_FILT_HINT_MPFAILOVER
|
2019 SO_FILT_HINT_MPSTATUS
| SO_FILT_HINT_MUSTRST
|
2020 SO_FILT_HINT_MPCANTRCVMORE
| SO_FILT_HINT_ADAPTIVE_RTIMO
|
2021 SO_FILT_HINT_ADAPTIVE_WTIMO
);
2024 VERIFY(!(mpts
->mpts_flags
&
2025 (MPTSF_CONNECTING
|MPTSF_CONNECTED
|MPTSF_CONNECT_PENDING
)));
2028 * Indicate to the TCP subflow whether or not it should establish
2029 * the initial MPTCP connection, or join an existing one. Fill
2030 * in the connection request structure with additional info needed
2031 * by the underlying TCP (to be used in the TCP options, etc.)
2033 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
&& mpte
->mpte_numflows
== 1) {
2034 mpts
->mpts_flags
|= MPTSF_INITIAL_SUB
;
2036 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
2037 mptcp_init_local_parms(mpte
);
2039 soisconnecting(mp_so
);
2041 /* If fastopen is requested, set state in mpts */
2042 if (so
->so_flags1
& SOF1_PRECONNECT_DATA
)
2043 mpts
->mpts_flags
|= MPTSF_TFO_REQD
;
2045 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
))
2046 mpts
->mpts_flags
|= MPTSF_CONNECT_PENDING
;
2049 mpts
->mpts_flags
|= MPTSF_CONNECTING
;
2051 if (af
== AF_INET
|| af
== AF_INET6
) {
2052 char dbuf
[MAX_IPv6_STR_LEN
];
2054 mptcplog((LOG_DEBUG
, "MPTCP Socket: %s "
2055 "mp_so 0x%llx dst %s[%d] cid %d "
2056 "[pending %s]\n", __func__
,
2057 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2058 inet_ntop(af
, ((af
== AF_INET
) ?
2059 (void *)&SIN(&mpts
->mpts_dst
)->sin_addr
.s_addr
:
2060 (void *)&SIN6(&mpts
->mpts_dst
)->sin6_addr
),
2061 dbuf
, sizeof (dbuf
)), ((af
== AF_INET
) ?
2062 ntohs(SIN(&mpts
->mpts_dst
)->sin_port
) :
2063 ntohs(SIN6(&mpts
->mpts_dst
)->sin6_port
)),
2065 ((mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) ?
2067 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2070 /* connect right away if first attempt, or if join can be done now */
2071 if (!(mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
))
2072 error
= mptcp_subflow_soconnectx(mpte
, mpts
);
2078 *pcid
= mpts
->mpts_connid
;
2083 mptcp_subflow_abort(mpts
, error
);
2089 mptcp_subflow_free(mpts
);
2095 mptcpstats_update(struct mptcp_itf_stats
*stats
, struct mptsub
*mpts
)
2097 int index
= mptcp_get_statsindex(stats
, mpts
);
2100 struct inpcb
*inp
= sotoinpcb(mpts
->mpts_socket
);
2102 stats
[index
].mpis_txbytes
+= inp
->inp_stat
->txbytes
;
2103 stats
[index
].mpis_rxbytes
+= inp
->inp_stat
->rxbytes
;
2108 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
2109 * will no longer be accessible after a subflow is deleted, thus this
2110 * should occur only after the subflow socket has been disconnected.
2113 mptcp_subflow_del(struct mptses
*mpte
, struct mptsub
*mpts
)
2115 struct socket
*mp_so
= mptetoso(mpte
);
2116 struct socket
*so
= mpts
->mpts_socket
;
2117 struct tcpcb
*tp
= sototcpcb(so
);
2119 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
2120 VERIFY(mpts
->mpts_mpte
== mpte
);
2121 VERIFY(mpts
->mpts_flags
& MPTSF_ATTACHED
);
2122 VERIFY(mpte
->mpte_numflows
!= 0);
2123 VERIFY(mp_so
->so_usecount
> 0);
2125 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx [u=%d,r=%d] cid %d %x error %d\n",
2126 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
2127 mp_so
->so_usecount
, mp_so
->so_retaincnt
, mpts
->mpts_connid
,
2128 mpts
->mpts_flags
, mp_so
->so_error
),
2129 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2131 mptcpstats_update(mpte
->mpte_itfstats
, mpts
);
2132 mpte
->mpte_init_rxbytes
= sotoinpcb(so
)->inp_stat
->rxbytes
;
2133 mpte
->mpte_init_txbytes
= sotoinpcb(so
)->inp_stat
->txbytes
;
2135 atomic_bitclear_32(&mpts
->mpts_flags
, MPTSF_ATTACHED
);
2136 TAILQ_REMOVE(&mpte
->mpte_subflows
, mpts
, mpts_entry
);
2137 mpte
->mpte_numflows
--;
2138 if (mpte
->mpte_active_sub
== mpts
)
2139 mpte
->mpte_active_sub
= NULL
;
2142 * Drop references held by this subflow socket; there
2143 * will be no further upcalls made from this point.
2145 sock_setupcalls_locked(so
, NULL
, NULL
, NULL
, NULL
, 0);
2146 sock_catchevents_locked(so
, NULL
, NULL
, 0);
2148 mptcp_detach_mptcb_from_subf(mpte
->mpte_mptcb
, so
);
2150 mp_so
->so_usecount
--; /* for subflow socket */
2151 mpts
->mpts_mpte
= NULL
;
2152 mpts
->mpts_socket
= NULL
;
2154 mptcp_subflow_remref(mpts
); /* for MPTCP subflow list */
2155 mptcp_subflow_remref(mpts
); /* for subflow socket */
2157 so
->so_flags
&= ~SOF_MP_SUBFLOW
;
2163 mptcp_subflow_shutdown(struct mptses
*mpte
, struct mptsub
*mpts
)
2165 struct socket
*so
= mpts
->mpts_socket
;
2166 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2169 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
)
2172 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2173 (so
->so_state
& SS_ISCONNECTED
)) {
2174 mptcplog((LOG_DEBUG
, "MPTCP subflow shutdown %s: cid %d fin %d\n",
2175 __func__
, mpts
->mpts_connid
, send_dfin
),
2176 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2179 mptcp_send_dfin(so
);
2180 soshutdownlock(so
, SHUT_WR
);
2186 mptcp_subflow_abort(struct mptsub
*mpts
, int error
)
2188 struct socket
*so
= mpts
->mpts_socket
;
2189 struct tcpcb
*tp
= sototcpcb(so
);
2191 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
2194 mptcplog((LOG_DEBUG
, "%s aborting connection state %u\n", __func__
, tp
->t_state
),
2195 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2197 if (tp
->t_state
!= TCPS_CLOSED
)
2198 tcp_drop(tp
, error
);
2200 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2204 * Disconnect a subflow socket.
2207 mptcp_subflow_disconnect(struct mptses
*mpte
, struct mptsub
*mpts
)
2210 struct mptcb
*mp_tp
;
2213 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
2215 VERIFY(mpts
->mpts_mpte
== mpte
);
2216 VERIFY(mpts
->mpts_socket
!= NULL
);
2218 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|MPTSF_DISCONNECTED
))
2221 mpts
->mpts_flags
|= MPTSF_DISCONNECTING
;
2223 so
= mpts
->mpts_socket
;
2224 mp_tp
= mpte
->mpte_mptcb
;
2225 if (mp_tp
->mpt_state
> MPTCPS_CLOSE_WAIT
)
2228 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
2229 (so
->so_state
& SS_ISCONNECTED
)) {
2230 mptcplog((LOG_DEBUG
, "MPTCP Socket %s: cid %d fin %d\n",
2231 __func__
, mpts
->mpts_connid
, send_dfin
),
2232 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2235 mptcp_send_dfin(so
);
2236 (void) soshutdownlock(so
, SHUT_RD
);
2237 (void) soshutdownlock(so
, SHUT_WR
);
2238 (void) sodisconnectlocked(so
);
2241 * Generate a disconnect event for this subflow socket, in case
2242 * the lower layer doesn't do it; this is needed because the
2243 * subflow socket deletion relies on it.
2245 mptcp_subflow_eupcall1(so
, mpts
, SO_FILT_HINT_DISCONNECTED
);
2249 * Called when the associated subflow socket posted a read event.
2252 mptcp_subflow_rupcall(struct socket
*so
, void *arg
, int waitf
)
2254 #pragma unused(so, waitf)
2255 struct mptsub
*mpts
= arg
, *tmpts
;
2256 struct mptses
*mpte
= mpts
->mpts_mpte
;
2258 VERIFY(mpte
!= NULL
);
2260 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2261 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_RUPCALL
))
2262 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2266 mpte
->mpte_mppcb
->mpp_flags
|= MPP_RUPCALL
;
2267 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
2268 if (mpts
->mpts_socket
->so_usecount
== 0) {
2269 /* Will be removed soon by tcp_garbage_collect */
2273 mptcp_subflow_addref(mpts
);
2274 mpts
->mpts_socket
->so_usecount
++;
2276 mptcp_subflow_input(mpte
, mpts
);
2278 mptcp_subflow_remref(mpts
); /* ours */
2280 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
2281 mpts
->mpts_socket
->so_usecount
--;
2284 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_RUPCALL
);
2288 * Subflow socket input.
2291 mptcp_subflow_input(struct mptses
*mpte
, struct mptsub
*mpts
)
2293 struct socket
*mp_so
= mptetoso(mpte
);
2294 struct mbuf
*m
= NULL
;
2296 int error
, wakeup
= 0;
2298 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_INPUT
));
2299 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_INPUT
;
2301 DTRACE_MPTCP2(subflow__input
, struct mptses
*, mpte
,
2302 struct mptsub
*, mpts
);
2304 if (!(mpts
->mpts_flags
& MPTSF_CONNECTED
))
2307 so
= mpts
->mpts_socket
;
2309 error
= sock_receive_internal(so
, NULL
, &m
, 0, NULL
);
2310 if (error
!= 0 && error
!= EWOULDBLOCK
) {
2311 mptcplog((LOG_ERR
, "%s: cid %d error %d\n",
2312 __func__
, mpts
->mpts_connid
, error
),
2313 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_ERR
);
2314 if (error
== ENODATA
) {
2316 * Don't ignore ENODATA so as to discover
2317 * nasty middleboxes.
2319 mp_so
->so_error
= ENODATA
;
2324 } else if (error
== 0) {
2325 mptcplog((LOG_DEBUG
, "%s: cid %d \n", __func__
, mpts
->mpts_connid
),
2326 MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2329 /* In fallback, make sure to accept data on all but one subflow */
2330 if (m
&& (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2331 !(mpts
->mpts_flags
& MPTSF_ACTIVE
)) {
2332 mptcplog((LOG_DEBUG
, "%s: degraded and got data on non-active flow\n",
2333 __func__
), MPTCP_RECEIVER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2339 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
2340 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SET_CELLICON
;
2342 mpte
->mpte_used_cell
= 1;
2344 mpte
->mpte_mppcb
->mpp_flags
|= MPP_UNSET_CELLICON
;
2346 mpte
->mpte_used_wifi
= 1;
2349 mptcp_input(mpte
, m
);
2352 /* notify protocol that we drained all the data */
2353 if (error
== 0 && m
!= NULL
&&
2354 (so
->so_proto
->pr_flags
& PR_WANTRCVD
) && so
->so_pcb
!= NULL
)
2355 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, 0);
2359 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_RWAKEUP
;
2361 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_INPUT
);
2365 * Subflow socket write upcall.
2367 * Called when the associated subflow socket posted a read event.
2370 mptcp_subflow_wupcall(struct socket
*so
, void *arg
, int waitf
)
2372 #pragma unused(so, waitf)
2373 struct mptsub
*mpts
= arg
;
2374 struct mptses
*mpte
= mpts
->mpts_mpte
;
2376 VERIFY(mpte
!= NULL
);
2378 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
2379 if (!(mpte
->mpte_mppcb
->mpp_flags
& MPP_WUPCALL
))
2380 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
2388 * Subflow socket output.
2390 * Called for sending data from MPTCP to the underlying subflow socket.
2393 mptcp_subflow_output(struct mptses
*mpte
, struct mptsub
*mpts
, int flags
)
2395 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
2396 struct mbuf
*sb_mb
, *m
, *mpt_mbuf
= NULL
, *head
, *tail
;
2397 struct socket
*mp_so
, *so
;
2399 uint64_t mpt_dsn
= 0, off
= 0;
2400 int sb_cc
= 0, error
= 0, wakeup
= 0;
2402 uint16_t tot_sent
= 0;
2403 boolean_t reinjected
= FALSE
;
2405 mpte_lock_assert_held(mpte
);
2407 mp_so
= mptetoso(mpte
);
2408 so
= mpts
->mpts_socket
;
2411 VERIFY(!(mpte
->mpte_mppcb
->mpp_flags
& MPP_INSIDE_OUTPUT
));
2412 mpte
->mpte_mppcb
->mpp_flags
|= MPP_INSIDE_OUTPUT
;
2414 VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so
)));
2415 VERIFY((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ||
2416 (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2417 (mpts
->mpts_flags
& MPTSF_TFO_REQD
));
2418 VERIFY(mptcp_subflow_cwnd_space(mpts
->mpts_socket
) > 0);
2420 mptcplog((LOG_DEBUG
, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
2421 __func__
, mpts
->mpts_flags
, mpte
->mpte_flags
,
2422 mptcp_subflow_cwnd_space(so
)),
2423 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2424 DTRACE_MPTCP2(subflow__output
, struct mptses
*, mpte
,
2425 struct mptsub
*, mpts
);
2427 /* Remove Addr Option is not sent reliably as per I-D */
2428 if (mpte
->mpte_flags
& MPTE_SND_REM_ADDR
) {
2429 tp
->t_rem_aid
= mpte
->mpte_lost_aid
;
2430 tp
->t_mpflags
|= TMPF_SND_REM_ADDR
;
2431 mpte
->mpte_flags
&= ~MPTE_SND_REM_ADDR
;
2435 * The mbuf chains containing the metadata (as well as pointing to
2436 * the user data sitting at the MPTCP output queue) would then be
2437 * sent down to the subflow socket.
2439 * Some notes on data sequencing:
2441 * a. Each mbuf must be a M_PKTHDR.
2442 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
2443 * in the mbuf pkthdr structure.
2444 * c. Each mbuf containing the MPTCP metadata must have its
2445 * pkt_flags marked with the PKTF_MPTCP flag.
2448 if (mpte
->mpte_reinjectq
)
2449 sb_mb
= mpte
->mpte_reinjectq
;
2451 sb_mb
= mp_so
->so_snd
.sb_mb
;
2453 if (sb_mb
== NULL
) {
2454 mptcplog((LOG_ERR
, "%s: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u\n",
2455 __func__
, (uint32_t)mp_tp
->mpt_sndmax
, (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_snduna
),
2456 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2460 VERIFY(sb_mb
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
2462 if (sb_mb
->m_pkthdr
.mp_rlen
== 0 &&
2463 !(so
->so_state
& SS_ISCONNECTED
) &&
2464 (so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
2465 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
2466 goto zero_len_write
;
2469 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
2471 /* First, drop acknowledged data */
2472 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
2473 mptcplog((LOG_ERR
, "%s: dropping data, should have been done earlier "
2474 "dsn %u suna %u reinject? %u\n",
2475 __func__
, (uint32_t)mpt_dsn
,
2476 (uint32_t)mp_tp
->mpt_snduna
, !!mpte
->mpte_reinjectq
),
2477 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2478 if (mpte
->mpte_reinjectq
) {
2479 mptcp_clean_reinjectq(mpte
);
2482 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
2483 sbdrop(&mp_so
->so_snd
, (int)len
);
2488 /* Check again because of above sbdrop */
2489 if (mp_so
->so_snd
.sb_mb
== NULL
&& mpte
->mpte_reinjectq
== NULL
) {
2490 mptcplog((LOG_ERR
, "%s send-buffer is empty\n", __func__
),
2491 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2496 * In degraded mode, we don't receive data acks, so force free
2497 * mbufs less than snd_nxt
2499 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2500 (mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
) &&
2501 mp_so
->so_snd
.sb_mb
) {
2502 mpt_dsn
= mp_so
->so_snd
.sb_mb
->m_pkthdr
.mp_dsn
;
2503 if (MPTCP_SEQ_LT(mpt_dsn
, mp_tp
->mpt_snduna
)) {
2505 len
= mp_tp
->mpt_snduna
- mpt_dsn
;
2506 sbdrop(&mp_so
->so_snd
, (int)len
);
2509 mptcplog((LOG_ERR
, "%s: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
2510 __func__
, (uint32_t)mpt_dsn
, (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_snduna
),
2511 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2515 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) &&
2516 !(mp_tp
->mpt_flags
& MPTCPF_POST_FALLBACK_SYNC
)) {
2517 mp_tp
->mpt_flags
|= MPTCPF_POST_FALLBACK_SYNC
;
2518 so
->so_flags1
|= SOF1_POST_FALLBACK_SYNC
;
2522 * Adjust the top level notion of next byte used for retransmissions
2525 if (MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_snduna
))
2526 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
2528 /* Now determine the offset from which to start transmitting data */
2529 if (mpte
->mpte_reinjectq
)
2530 sb_mb
= mpte
->mpte_reinjectq
;
2532 sb_mb
= mp_so
->so_snd
.sb_mb
;
2533 if (sb_mb
== NULL
) {
2534 mptcplog((LOG_ERR
, "%s send-buffer is still empty\n", __func__
),
2535 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2539 if (mpte
->mpte_reinjectq
) {
2540 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
2541 } else if (flags
& MPTCP_SUBOUT_PROBING
) {
2542 sb_cc
= sb_mb
->m_pkthdr
.mp_rlen
;
2545 sb_cc
= min(mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
);
2548 * With TFO, there might be no data at all, thus still go into this
2551 if ((mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
) ||
2552 MPTCP_SEQ_LT(mp_tp
->mpt_sndnxt
, mp_tp
->mpt_sndmax
)) {
2553 off
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
2556 mptcplog((LOG_ERR
, "%s this should not happen: sndnxt %u sndmax %u\n",
2557 __func__
, (uint32_t)mp_tp
->mpt_sndnxt
,
2558 (uint32_t)mp_tp
->mpt_sndmax
),
2559 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2565 sb_cc
= min(sb_cc
, mptcp_subflow_cwnd_space(so
));
2567 mptcplog((LOG_ERR
, "%s sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
2568 __func__
, sb_cc
, mp_so
->so_snd
.sb_cc
, mp_tp
->mpt_sndwnd
,
2569 (uint32_t)mp_tp
->mpt_sndnxt
, (uint32_t)mp_tp
->mpt_sndmax
,
2570 mptcp_subflow_cwnd_space(so
)),
2571 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2574 sb_cc
= min(sb_cc
, UINT16_MAX
);
2577 * Create a DSN mapping for the data we are about to send. It all
2578 * has the same mapping.
2580 if (mpte
->mpte_reinjectq
)
2581 mpt_dsn
= sb_mb
->m_pkthdr
.mp_dsn
;
2583 mpt_dsn
= mp_tp
->mpt_snduna
+ off
;
2586 while (mpt_mbuf
&& mpte
->mpte_reinjectq
== NULL
&&
2587 (mpt_mbuf
->m_pkthdr
.mp_rlen
== 0 ||
2588 mpt_mbuf
->m_pkthdr
.mp_rlen
<= (uint32_t)off
)) {
2589 off
-= mpt_mbuf
->m_pkthdr
.mp_rlen
;
2590 mpt_mbuf
= mpt_mbuf
->m_next
;
2592 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
2593 mptcplog((LOG_DEBUG
, "%s: %u snduna = %u sndnxt = %u probe %d\n",
2594 __func__
, mpts
->mpts_connid
, (uint32_t)mp_tp
->mpt_snduna
, (uint32_t)mp_tp
->mpt_sndnxt
,
2595 mpts
->mpts_probecnt
),
2596 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2598 VERIFY((mpt_mbuf
== NULL
) || (mpt_mbuf
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
2602 while (tot_sent
< sb_cc
) {
2605 mlen
= mpt_mbuf
->m_len
;
2607 mlen
= min(mlen
, sb_cc
- tot_sent
);
2610 mptcplog((LOG_ERR
, "%s mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
2611 __func__
, (int)mlen
, mpt_mbuf
->m_pkthdr
.mp_rlen
,
2612 (uint32_t)off
, sb_cc
, tot_sent
),
2613 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2620 m
= m_copym_mode(mpt_mbuf
, (int)off
, mlen
, M_DONTWAIT
,
2621 M_COPYM_MUST_COPY_HDR
);
2623 mptcplog((LOG_ERR
, "%s m_copym_mode failed\n", __func__
),
2624 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2629 /* Create a DSN mapping for the data (m_copym does it) */
2630 VERIFY(m
->m_flags
& M_PKTHDR
);
2631 VERIFY(m
->m_next
== NULL
);
2633 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
2634 m
->m_pkthdr
.pkt_flags
&= ~PKTF_MPSO
;
2635 m
->m_pkthdr
.mp_dsn
= mpt_dsn
;
2636 m
->m_pkthdr
.mp_rseq
= mpts
->mpts_rel_seq
;
2637 m
->m_pkthdr
.len
= mlen
;
2649 mpt_mbuf
= mpt_mbuf
->m_next
;
2652 if (mpte
->mpte_reinjectq
) {
2655 if (sb_cc
< sb_mb
->m_pkthdr
.mp_rlen
) {
2656 struct mbuf
*n
= sb_mb
;
2659 n
->m_pkthdr
.mp_dsn
+= sb_cc
;
2660 n
->m_pkthdr
.mp_rlen
-= sb_cc
;
2663 m_adj(sb_mb
, sb_cc
);
2665 mpte
->mpte_reinjectq
= sb_mb
->m_nextpkt
;
2670 mptcplog((LOG_DEBUG
, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
2671 __func__
, (uint32_t)mpt_dsn
, mpts
->mpts_rel_seq
,
2672 tot_sent
, mpts
->mpts_connid
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2674 if (head
&& (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
)) {
2675 dss_csum
= mptcp_output_csum(head
, mpt_dsn
, mpts
->mpts_rel_seq
,
2679 /* Now, let's update rel-seq and the data-level length */
2680 mpts
->mpts_rel_seq
+= tot_sent
;
2683 if (mp_tp
->mpt_flags
& MPTCPF_CHECKSUM
)
2684 m
->m_pkthdr
.mp_csum
= dss_csum
;
2685 m
->m_pkthdr
.mp_rlen
= tot_sent
;
2690 if ((mpts
->mpts_flags
& MPTSF_TFO_REQD
) &&
2691 (tp
->t_tfo_stats
== 0))
2692 tp
->t_mpflags
|= TMPF_TFO_REQUEST
;
2694 error
= sock_sendmbuf(so
, NULL
, head
, 0, NULL
);
2696 DTRACE_MPTCP7(send
, struct mbuf
*, m
, struct socket
*, so
,
2697 struct sockbuf
*, &so
->so_rcv
,
2698 struct sockbuf
*, &so
->so_snd
,
2699 struct mptses
*, mpte
, struct mptsub
*, mpts
,
2705 (error
== EWOULDBLOCK
&& (tp
->t_mpflags
& TMPF_TFO_REQUEST
))) {
2706 uint64_t new_sndnxt
= mp_tp
->mpt_sndnxt
+ tot_sent
;
2708 if (mpts
->mpts_probesoon
&& mpts
->mpts_maxseg
&& tot_sent
) {
2709 tcpstat
.tcps_mp_num_probes
++;
2710 if ((uint32_t)tot_sent
< mpts
->mpts_maxseg
)
2711 mpts
->mpts_probecnt
+= 1;
2713 mpts
->mpts_probecnt
+=
2714 tot_sent
/mpts
->mpts_maxseg
;
2717 if (!reinjected
&& !(flags
& MPTCP_SUBOUT_PROBING
)) {
2718 if (MPTCP_DATASEQ_HIGH32(new_sndnxt
) >
2719 MPTCP_DATASEQ_HIGH32(mp_tp
->mpt_sndnxt
))
2720 mp_tp
->mpt_flags
|= MPTCPF_SND_64BITDSN
;
2721 mp_tp
->mpt_sndnxt
= new_sndnxt
;
2724 mptcp_cancel_timer(mp_tp
, MPTT_REXMT
);
2726 /* Must be here as mptcp_can_send_more() checks for this */
2727 soclearfastopen(mp_so
);
2729 if ((mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) ||
2730 (mpts
->mpts_probesoon
!= 0))
2731 mptcplog((LOG_DEBUG
, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
2732 __func__
, mpts
->mpts_connid
,
2733 !!(mpts
->mpts_flags
& MPTSF_MP_DEGRADED
),
2734 tot_sent
, (int) sb_cc
, mpts
->mpts_probecnt
,
2735 (tcp_now
- mpts
->mpts_probesoon
)),
2736 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2738 if (IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
)) {
2739 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SET_CELLICON
;
2741 mpte
->mpte_used_cell
= 1;
2743 mpte
->mpte_mppcb
->mpp_flags
|= MPP_UNSET_CELLICON
;
2745 mpte
->mpte_used_wifi
= 1;
2749 * Don't propagate EWOULDBLOCK - it's already taken care of
2750 * in mptcp_usr_send for TFO.
2754 mptcplog((LOG_ERR
, "%s: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
2755 __func__
, mpts
->mpts_connid
, error
, tot_sent
, so
->so_flags
, so
->so_state
, so
->so_error
, so
->so_snd
.sb_hiwat
, so
->so_snd
.sb_lowat
),
2756 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_ERR
);
2761 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WWAKEUP
;
2763 mptcp_handle_deferred_upcalls(mpte
->mpte_mppcb
, MPP_INSIDE_OUTPUT
);
2767 /* Opting to call pru_send as no mbuf at subflow level */
2768 error
= (*so
->so_proto
->pr_usrreqs
->pru_send
)(so
, 0, NULL
, NULL
,
2769 NULL
, current_proc());
2775 mptcp_add_reinjectq(struct mptses
*mpte
, struct mbuf
*m
)
2777 struct mbuf
*n
, *prev
= NULL
;
2779 mptcplog((LOG_DEBUG
, "%s reinjecting dsn %u dlen %u rseq %u\n",
2780 __func__
, (uint32_t)m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
2781 m
->m_pkthdr
.mp_rseq
),
2782 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2784 n
= mpte
->mpte_reinjectq
;
2786 /* First, look for an mbuf n, whose data-sequence-number is bigger or
2787 * equal than m's sequence number.
2790 if (MPTCP_SEQ_GEQ(n
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_dsn
))
2799 /* m is already fully covered by the next mbuf in the queue */
2800 if (n
->m_pkthdr
.mp_dsn
== m
->m_pkthdr
.mp_dsn
&&
2801 n
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_rlen
) {
2802 mptcplog((LOG_DEBUG
, "%s fully covered with len %u\n",
2803 __func__
, n
->m_pkthdr
.mp_rlen
),
2804 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2808 /* m is covering the next mbuf entirely, thus we remove this guy */
2809 if (m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
>= n
->m_pkthdr
.mp_dsn
+ n
->m_pkthdr
.mp_rlen
) {
2810 struct mbuf
*tmp
= n
->m_nextpkt
;
2812 mptcplog((LOG_DEBUG
, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
2813 __func__
, m
->m_pkthdr
.mp_dsn
, m
->m_pkthdr
.mp_rlen
,
2814 n
->m_pkthdr
.mp_dsn
, n
->m_pkthdr
.mp_rlen
),
2815 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2817 m
->m_nextpkt
= NULL
;
2819 mpte
->mpte_reinjectq
= tmp
;
2821 prev
->m_nextpkt
= tmp
;
2830 /* m is already fully covered by the previous mbuf in the queue */
2831 if (prev
->m_pkthdr
.mp_dsn
+ prev
->m_pkthdr
.mp_rlen
>= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.len
) {
2832 mptcplog((LOG_DEBUG
, "%s prev covers us from %u with len %u\n",
2833 __func__
, prev
->m_pkthdr
.mp_dsn
, prev
->m_pkthdr
.mp_rlen
),
2834 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
2840 mpte
->mpte_reinjectq
= m
;
2842 prev
->m_nextpkt
= m
;
2853 static struct mbuf
*
2854 mptcp_lookup_dsn(struct mptses
*mpte
, uint64_t dsn
)
2856 struct socket
*mp_so
= mptetoso(mpte
);
2859 m
= mp_so
->so_snd
.sb_mb
;
2862 /* If this segment covers what we are looking for, return it. */
2863 if (MPTCP_SEQ_LEQ(m
->m_pkthdr
.mp_dsn
, dsn
) &&
2864 MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, dsn
))
2868 /* Segment is no more in the queue */
2869 if (MPTCP_SEQ_GT(m
->m_pkthdr
.mp_dsn
, dsn
))
2878 static struct mbuf
*
2879 mptcp_copy_mbuf_list(struct mbuf
*m
, int len
)
2881 struct mbuf
*top
= NULL
, *tail
= NULL
;
2883 uint32_t dlen
, rseq
;
2885 dsn
= m
->m_pkthdr
.mp_dsn
;
2886 dlen
= m
->m_pkthdr
.mp_rlen
;
2887 rseq
= m
->m_pkthdr
.mp_rseq
;
2892 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
2894 n
= m_copym_mode(m
, 0, m
->m_len
, M_DONTWAIT
, M_COPYM_MUST_COPY_HDR
);
2896 mptcplog((LOG_ERR
, "%s m_copym_mode returned NULL\n", __func__
),
2897 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
2901 VERIFY(n
->m_flags
& M_PKTHDR
);
2902 VERIFY(n
->m_next
== NULL
);
2903 VERIFY(n
->m_pkthdr
.mp_dsn
== dsn
);
2904 VERIFY(n
->m_pkthdr
.mp_rlen
== dlen
);
2905 VERIFY(n
->m_pkthdr
.mp_rseq
== rseq
);
2906 VERIFY(n
->m_len
== m
->m_len
);
2908 n
->m_pkthdr
.pkt_flags
|= (PKTF_MPSO
| PKTF_MPTCP
);
2932 mptcp_reinject_mbufs(struct socket
*so
)
2934 struct tcpcb
*tp
= sototcpcb(so
);
2935 struct mptsub
*mpts
= tp
->t_mpsub
;
2936 struct mptcb
*mp_tp
= tptomptp(tp
);
2937 struct mptses
*mpte
= mp_tp
->mpt_mpte
;;
2938 struct sockbuf
*sb
= &so
->so_snd
;
2943 struct mbuf
*n
= m
->m_next
, *orig
= m
;
2945 mptcplog((LOG_DEBUG
, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
2946 __func__
, tp
->snd_una
, m
->m_pkthdr
.mp_rseq
, mpts
->mpts_iss
,
2947 m
->m_pkthdr
.mp_rlen
, m
->m_pkthdr
.pkt_flags
),
2948 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
2950 VERIFY((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
2952 if (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP_REINJ
)
2955 /* Has it all already been acknowledged at the data-level? */
2956 if (MPTCP_SEQ_GEQ(mp_tp
->mpt_snduna
, m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
))
2959 /* Part of this has already been acknowledged - lookup in the
2960 * MPTCP-socket for the segment.
2962 if (SEQ_GT(tp
->snd_una
- mpts
->mpts_iss
, m
->m_pkthdr
.mp_rseq
)) {
2963 m
= mptcp_lookup_dsn(mpte
, m
->m_pkthdr
.mp_dsn
);
2968 /* Copy the mbuf with headers (aka, DSN-numbers) */
2969 m
= mptcp_copy_mbuf_list(m
, m
->m_pkthdr
.mp_rlen
);
2973 VERIFY(m
->m_nextpkt
== NULL
);
2975 /* Now, add to the reinject-queue, eliminating overlapping
2978 mptcp_add_reinjectq(mpte
, m
);
2980 orig
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
2983 /* mp_rlen can cover multiple mbufs, so advance to the end of it. */
2985 VERIFY((n
->m_flags
& M_PKTHDR
) && (n
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
2987 if (n
->m_pkthdr
.mp_dsn
!= orig
->m_pkthdr
.mp_dsn
)
2990 n
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP_REINJ
;
2999 mptcp_clean_reinjectq(struct mptses
*mpte
)
3001 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
3003 mpte_lock_assert_held(mpte
);
3005 while (mpte
->mpte_reinjectq
) {
3006 struct mbuf
*m
= mpte
->mpte_reinjectq
;
3008 if (MPTCP_SEQ_GEQ(m
->m_pkthdr
.mp_dsn
, mp_tp
->mpt_snduna
) ||
3009 MPTCP_SEQ_GEQ(m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
, mp_tp
->mpt_snduna
))
3012 mpte
->mpte_reinjectq
= m
->m_nextpkt
;
3013 m
->m_nextpkt
= NULL
;
3019 * Subflow socket control event upcall.
3022 mptcp_subflow_eupcall1(struct socket
*so
, void *arg
, uint32_t events
)
3025 struct mptsub
*mpts
= arg
;
3026 struct mptses
*mpte
= mpts
->mpts_mpte
;
3028 VERIFY(mpte
!= NULL
);
3029 mpte_lock_assert_held(mpte
);
3031 if ((mpts
->mpts_evctl
& events
) == events
)
3034 mpts
->mpts_evctl
|= events
;
3036 if (mptcp_should_defer_upcall(mpte
->mpte_mppcb
)) {
3037 mpte
->mpte_mppcb
->mpp_flags
|= MPP_SHOULD_WORKLOOP
;
3041 mptcp_subflow_workloop(mpte
);
3045 * Subflow socket control events.
3047 * Called for handling events related to the underlying subflow socket.
3050 mptcp_subflow_events(struct mptses
*mpte
, struct mptsub
*mpts
,
3051 uint64_t *p_mpsofilt_hint
)
3053 ev_ret_t ret
= MPTS_EVRET_OK
;
3054 int i
, mpsub_ev_entry_count
= sizeof(mpsub_ev_entry_tbl
) /
3055 sizeof(mpsub_ev_entry_tbl
[0]);
3057 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3059 /* bail if there's nothing to process */
3060 if (!mpts
->mpts_evctl
)
3063 if (mpts
->mpts_evctl
& (SO_FILT_HINT_CONNRESET
|SO_FILT_HINT_MUSTRST
|
3064 SO_FILT_HINT_CANTSENDMORE
|SO_FILT_HINT_TIMEOUT
|
3065 SO_FILT_HINT_NOSRCADDR
|SO_FILT_HINT_IFDENIED
|
3066 SO_FILT_HINT_DISCONNECTED
)) {
3067 mpts
->mpts_evctl
|= SO_FILT_HINT_MPFAILOVER
;
3070 DTRACE_MPTCP3(subflow__events
, struct mptses
*, mpte
,
3071 struct mptsub
*, mpts
, uint32_t, mpts
->mpts_evctl
);
3073 mptcplog((LOG_DEBUG
, "%s cid %d events=%b\n", __func__
,
3074 mpts
->mpts_connid
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3075 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3078 * Process all the socket filter hints and reset the hint
3079 * once it is handled
3081 for (i
= 0; i
< mpsub_ev_entry_count
&& mpts
->mpts_evctl
; i
++) {
3083 * Always execute the DISCONNECTED event, because it will wakeup
3086 if ((mpts
->mpts_evctl
& mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
) &&
3087 (ret
>= MPTS_EVRET_OK
||
3088 mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
== SO_FILT_HINT_DISCONNECTED
)) {
3089 mpts
->mpts_evctl
&= ~mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
;
3091 mpsub_ev_entry_tbl
[i
].sofilt_hint_ev_hdlr(mpte
, mpts
, p_mpsofilt_hint
, mpsub_ev_entry_tbl
[i
].sofilt_hint_mask
);
3092 ret
= ((error
>= MPTS_EVRET_OK
) ? MAX(error
, ret
) : error
);
3097 * We should be getting only events specified via sock_catchevents(),
3098 * so loudly complain if we have any unprocessed one(s).
3100 if (mpts
->mpts_evctl
|| ret
< MPTS_EVRET_OK
)
3101 mptcplog((LOG_WARNING
, "%s%s: cid %d evret %s (%d) unhandled events=%b\n", __func__
,
3102 (mpts
->mpts_evctl
&& ret
== MPTS_EVRET_OK
) ? "MPTCP_ERROR " : "",
3104 mptcp_evret2str(ret
), ret
, mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3105 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3107 mptcplog((LOG_DEBUG
, "%s: Done, events %b\n", __func__
,
3108 mpts
->mpts_evctl
, SO_FILT_HINT_BITS
),
3109 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
3115 mptcp_subflow_propagate_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3116 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3118 struct socket
*mp_so
, *so
;
3119 struct mptcb
*mp_tp
;
3121 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3122 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3123 mp_so
= mptetoso(mpte
);
3124 mp_tp
= mpte
->mpte_mptcb
;
3125 so
= mpts
->mpts_socket
;
3127 mptcplog((LOG_DEBUG
, "%s: cid %d event %d\n", __func__
,
3128 mpts
->mpts_connid
, event
),
3129 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3132 * We got an event for this subflow that might need to be propagated,
3133 * based on the state of the MPTCP connection.
3135 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3136 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
))) {
3137 mp_so
->so_error
= so
->so_error
;
3138 *p_mpsofilt_hint
|= event
;
3141 return (MPTS_EVRET_OK
);
3145 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
3148 mptcp_subflow_nosrcaddr_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3149 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3151 #pragma unused(p_mpsofilt_hint, event)
3152 struct socket
*mp_so
;
3155 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3157 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3158 mp_so
= mptetoso(mpte
);
3159 tp
= intotcpcb(sotoinpcb(mpts
->mpts_socket
));
3162 * This overwrites any previous mpte_lost_aid to avoid storing
3163 * too much state when the typical case has only two subflows.
3165 mpte
->mpte_flags
|= MPTE_SND_REM_ADDR
;
3166 mpte
->mpte_lost_aid
= tp
->t_local_aid
;
3168 mptcplog((LOG_DEBUG
, "%s cid %d\n", __func__
, mpts
->mpts_connid
),
3169 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3172 * The subflow connection has lost its source address.
3174 mptcp_subflow_abort(mpts
, EADDRNOTAVAIL
);
3176 if (mp_so
->so_flags
& SOF_NOADDRAVAIL
)
3177 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3179 return (MPTS_EVRET_DELETE
);
3183 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
3184 * indicates that the remote side sent a Data FIN
3187 mptcp_subflow_mpcantrcvmore_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3188 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3190 #pragma unused(event)
3191 struct mptcb
*mp_tp
;
3193 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3194 mp_tp
= mpte
->mpte_mptcb
;
3196 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
, mpts
->mpts_connid
),
3197 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3200 * We got a Data FIN for the MPTCP connection.
3201 * The FIN may arrive with data. The data is handed up to the
3202 * mptcp socket and the user is notified so that it may close
3203 * the socket if needed.
3205 if (mp_tp
->mpt_state
== MPTCPS_CLOSE_WAIT
)
3206 *p_mpsofilt_hint
|= SO_FILT_HINT_CANTRCVMORE
;
3208 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
3212 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
3215 mptcp_subflow_failover_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3216 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3218 #pragma unused(event, p_mpsofilt_hint)
3219 struct mptsub
*mpts_alt
= NULL
;
3220 struct socket
*alt_so
= NULL
;
3221 struct socket
*mp_so
;
3222 int altpath_exists
= 0;
3224 mpte_lock_assert_held(mpte
);
3225 mp_so
= mptetoso(mpte
);
3226 mptcplog((LOG_NOTICE
, "%s: mp_so 0x%llx\n", __func__
,
3227 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
3228 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3230 mptcp_reinject_mbufs(mpts
->mpts_socket
);
3232 mpts_alt
= mptcp_get_subflow(mpte
, mpts
, NULL
);
3234 * If there is no alternate eligible subflow, ignore the
3237 if (mpts_alt
== NULL
) {
3238 mptcplog((LOG_WARNING
, "%s: no alternate path\n", __func__
),
3239 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3245 alt_so
= mpts_alt
->mpts_socket
;
3246 if (mpts_alt
->mpts_flags
& MPTSF_FAILINGOVER
) {
3247 /* All data acknowledged and no RTT spike */
3248 if (alt_so
->so_snd
.sb_cc
== 0 && mptcp_no_rto_spike(alt_so
)) {
3249 mpts_alt
->mpts_flags
&= ~MPTSF_FAILINGOVER
;
3251 /* no alternate path available */
3256 if (altpath_exists
) {
3257 mpts_alt
->mpts_flags
|= MPTSF_ACTIVE
;
3259 mpte
->mpte_active_sub
= mpts_alt
;
3260 mpts
->mpts_flags
|= MPTSF_FAILINGOVER
;
3261 mpts
->mpts_flags
&= ~MPTSF_ACTIVE
;
3263 mptcplog((LOG_NOTICE
, "%s: switched from %d to %d\n",
3264 __func__
, mpts
->mpts_connid
, mpts_alt
->mpts_connid
),
3265 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3267 mptcpstats_inc_switch(mpte
, mpts
);
3271 mptcplog((LOG_DEBUG
, "%s: no alt cid = %d\n", __func__
,
3273 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3275 mpts
->mpts_socket
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
3278 return (MPTS_EVRET_OK
);
3282 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
3285 mptcp_subflow_ifdenied_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3286 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3288 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3289 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3291 mptcplog((LOG_DEBUG
, "%s: cid %d\n", __func__
,
3292 mpts
->mpts_connid
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3295 * The subflow connection cannot use the outgoing interface, let's
3296 * close this subflow.
3298 mptcp_subflow_abort(mpts
, EPERM
);
3300 mptcp_subflow_propagate_ev(mpte
, mpts
, p_mpsofilt_hint
, event
);
3302 return (MPTS_EVRET_DELETE
);
3306 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
3309 mptcp_subflow_connected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3310 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3312 #pragma unused(event, p_mpsofilt_hint)
3313 struct socket
*mp_so
, *so
;
3316 struct mptcb
*mp_tp
;
3318 boolean_t mpok
= FALSE
;
3320 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3321 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3323 mp_so
= mptetoso(mpte
);
3324 mp_tp
= mpte
->mpte_mptcb
;
3325 so
= mpts
->mpts_socket
;
3327 af
= mpts
->mpts_dst
.sa_family
;
3329 if (mpts
->mpts_flags
& MPTSF_CONNECTED
)
3330 return (MPTS_EVRET_OK
);
3332 if ((mpts
->mpts_flags
& MPTSF_DISCONNECTED
) ||
3333 (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)) {
3334 if (!(so
->so_state
& (SS_ISDISCONNECTING
| SS_ISDISCONNECTED
)) &&
3335 (so
->so_state
& SS_ISCONNECTED
)) {
3336 mptcplog((LOG_DEBUG
, "%s: cid %d disconnect before tcp connect\n",
3337 __func__
, mpts
->mpts_connid
),
3338 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3339 (void) soshutdownlock(so
, SHUT_RD
);
3340 (void) soshutdownlock(so
, SHUT_WR
);
3341 (void) sodisconnectlocked(so
);
3343 return (MPTS_EVRET_OK
);
3347 * The subflow connection has been connected. Find out whether it
3348 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
3350 * a. If MPTCP connection is not yet established, then this must be
3351 * the first subflow connection. If MPTCP failed to negotiate,
3352 * fallback to regular TCP by degrading this subflow.
3354 * b. If MPTCP connection has been established, then this must be
3355 * one of the subsequent subflow connections. If MPTCP failed
3356 * to negotiate, disconnect the connection.
3358 * Right now, we simply unblock any waiters at the MPTCP socket layer
3359 * if the MPTCP connection has not been established.
3362 if (so
->so_state
& SS_ISDISCONNECTED
) {
3364 * With MPTCP joins, a connection is connected at the subflow
3365 * level, but the 4th ACK from the server elevates the MPTCP
3366 * subflow to connected state. So there is a small window
3367 * where the subflow could get disconnected before the
3368 * connected event is processed.
3370 return (MPTS_EVRET_OK
);
3373 if (mpts
->mpts_flags
& MPTSF_TFO_REQD
)
3374 mptcp_drop_tfo_data(mpte
, mpts
);
3376 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
| MPTSF_TFO_REQD
);
3377 mpts
->mpts_flags
|= MPTSF_CONNECTED
;
3379 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
)
3380 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
3382 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
3384 /* get/verify the outbound interface */
3385 inp
= sotoinpcb(so
);
3387 mpts
->mpts_maxseg
= tp
->t_maxseg
;
3389 mptcplog((LOG_DEBUG
, "%s: cid %d outif %s is %s\n", __func__
, mpts
->mpts_connid
,
3390 ((inp
->inp_last_outifp
!= NULL
) ? inp
->inp_last_outifp
->if_xname
: "NULL"),
3391 ((mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) ? "MPTCP capable" : "a regular TCP")),
3392 (MPTCP_SOCKET_DBG
| MPTCP_EVENTS_DBG
), MPTCP_LOGLVL_LOG
);
3394 mpok
= (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
);
3396 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
3397 mp_tp
->mpt_state
= MPTCPS_ESTABLISHED
;
3398 mpte
->mpte_associd
= mpts
->mpts_connid
;
3399 DTRACE_MPTCP2(state__change
,
3400 struct mptcb
*, mp_tp
,
3401 uint32_t, 0 /* event */);
3403 if (SOCK_DOM(so
) == AF_INET
) {
3404 in_getsockaddr_s(so
, &mpte
->__mpte_src_v4
);
3406 in6_getsockaddr_s(so
, &mpte
->__mpte_src_v6
);
3409 /* case (a) above */
3411 tcpstat
.tcps_mpcap_fallback
++;
3413 tp
->t_mpflags
|= TMPF_INFIN_SENT
;
3414 mptcp_notify_mpfail(so
);
3416 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
3417 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_AGGREGATE
) {
3418 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
3420 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
3422 mpts
->mpts_flags
|= MPTSF_ACTIVE
;
3424 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
3425 mpte
->mpte_nummpcapflows
++;
3427 mptcp_check_subflows_and_add(mpte
);
3429 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
))
3430 mpte
->mpte_initial_cell
= 1;
3432 mpte
->mpte_handshake_success
= 1;
3435 mp_tp
->mpt_sndwnd
= tp
->snd_wnd
;
3436 mp_tp
->mpt_sndwl1
= mp_tp
->mpt_rcvnxt
;
3437 mp_tp
->mpt_sndwl2
= mp_tp
->mpt_snduna
;
3438 soisconnected(mp_so
);
3440 mptcplog((LOG_DEBUG
, "%s: MPTCPS_ESTABLISHED for mp_so 0x%llx mpok %u\n",
3441 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mpok
),
3442 MPTCP_STATE_DBG
, MPTCP_LOGLVL_LOG
);
3446 * In case of additional flows, the MPTCP socket is not
3447 * MPTSF_MP_CAPABLE until an ACK is received from server
3448 * for 3-way handshake. TCP would have guaranteed that this
3449 * is an MPTCP subflow.
3451 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
) &&
3452 !(tp
->t_mpflags
& TMPF_BACKUP_PATH
) &&
3453 mpte
->mpte_svctype
!= MPTCP_SVCTYPE_AGGREGATE
) {
3454 tp
->t_mpflags
|= (TMPF_BACKUP_PATH
| TMPF_SND_MPPRIO
);
3455 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
3457 mpts
->mpts_flags
|= MPTSF_PREFERRED
;
3460 mpts
->mpts_flags
|= MPTSF_MPCAP_CTRSET
;
3461 mpte
->mpte_nummpcapflows
++;
3463 mpts
->mpts_rel_seq
= 1;
3465 mptcp_check_subflows_and_remove(mpte
);
3469 /* Mark this interface as non-MPTCP */
3470 for (i
= 0; i
< mpte
->mpte_itfinfo_size
; i
++) {
3471 struct mpt_itf_info
*info
= &mpte
->mpte_itfinfo
[i
];
3473 if (inp
->inp_last_outifp
->if_index
== info
->ifindex
) {
3474 info
->no_mptcp_support
= 1;
3479 tcpstat
.tcps_join_fallback
++;
3480 if (IFNET_IS_CELLULAR(inp
->inp_last_outifp
))
3481 tcpstat
.tcps_mptcp_cell_proxy
++;
3483 tcpstat
.tcps_mptcp_wifi_proxy
++;
3485 soevent(mpts
->mpts_socket
, SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MUSTRST
);
3487 return (MPTS_EVRET_OK
);
3490 /* This call, just to "book" an entry in the stats-table for this ifindex */
3491 mptcp_get_statsindex(mpte
->mpte_itfstats
, mpts
);
3495 return (MPTS_EVRET_OK
); /* keep the subflow socket around */
3499 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
3502 mptcp_subflow_disconnected_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3503 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3505 #pragma unused(event, p_mpsofilt_hint)
3506 struct socket
*mp_so
, *so
;
3507 struct mptcb
*mp_tp
;
3509 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3510 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3511 mp_so
= mptetoso(mpte
);
3512 mp_tp
= mpte
->mpte_mptcb
;
3513 so
= mpts
->mpts_socket
;
3515 mptcplog((LOG_DEBUG
, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
3516 __func__
, mpts
->mpts_connid
, so
->so_error
, mp_tp
->mpt_state
,
3517 !!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
),
3518 !!(mpts
->mpts_flags
& MPTSF_ACTIVE
), sototcpcb(so
)->t_mpflags
),
3519 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3521 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
3522 return (MPTS_EVRET_DELETE
);
3524 mpts
->mpts_flags
|= MPTSF_DISCONNECTED
;
3526 /* The subflow connection has been disconnected. */
3528 if (mpts
->mpts_flags
& MPTSF_MPCAP_CTRSET
) {
3529 mpte
->mpte_nummpcapflows
--;
3530 if (mpte
->mpte_active_sub
== mpts
) {
3531 mpte
->mpte_active_sub
= NULL
;
3532 mptcplog((LOG_DEBUG
, "%s: resetting active subflow \n",
3533 __func__
), MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3535 mpts
->mpts_flags
&= ~MPTSF_MPCAP_CTRSET
;
3538 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
||
3539 ((mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && (mpts
->mpts_flags
& MPTSF_ACTIVE
)) ||
3540 (sototcpcb(so
)->t_mpflags
& TMPF_FASTCLOSERCV
)) {
3541 mptcp_drop(mpte
, mp_tp
, so
->so_error
);
3545 * Clear flags that are used by getconninfo to return state.
3546 * Retain like MPTSF_DELETEOK for internal purposes.
3548 mpts
->mpts_flags
&= ~(MPTSF_CONNECTING
|MPTSF_CONNECT_PENDING
|
3549 MPTSF_CONNECTED
|MPTSF_DISCONNECTING
|MPTSF_PREFERRED
|
3550 MPTSF_MP_CAPABLE
|MPTSF_MP_READY
|MPTSF_MP_DEGRADED
|MPTSF_ACTIVE
);
3552 return (MPTS_EVRET_DELETE
);
3556 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
3559 mptcp_subflow_mpstatus_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3560 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3562 #pragma unused(event, p_mpsofilt_hint)
3563 struct socket
*mp_so
, *so
;
3564 struct mptcb
*mp_tp
;
3565 ev_ret_t ret
= MPTS_EVRET_OK
;
3567 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3568 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3569 mp_so
= mptetoso(mpte
);
3570 mp_tp
= mpte
->mpte_mptcb
;
3571 so
= mpts
->mpts_socket
;
3573 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_TRUE
)
3574 mpts
->mpts_flags
|= MPTSF_MP_CAPABLE
;
3576 mpts
->mpts_flags
&= ~MPTSF_MP_CAPABLE
;
3578 if (sototcpcb(so
)->t_mpflags
& TMPF_TCP_FALLBACK
) {
3579 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
3581 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
3584 mpts
->mpts_flags
&= ~MPTSF_MP_DEGRADED
;
3586 if (sototcpcb(so
)->t_mpflags
& TMPF_MPTCP_READY
)
3587 mpts
->mpts_flags
|= MPTSF_MP_READY
;
3589 mpts
->mpts_flags
&= ~MPTSF_MP_READY
;
3591 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
3592 mp_tp
->mpt_flags
|= MPTCPF_FALLBACK_TO_TCP
;
3593 mp_tp
->mpt_flags
&= ~MPTCPF_JOIN_READY
;
3596 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
3597 VERIFY(!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
));
3598 ret
= MPTS_EVRET_DISCONNECT_FALLBACK
;
3599 } else if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
3600 mp_tp
->mpt_flags
|= MPTCPF_JOIN_READY
;
3601 ret
= MPTS_EVRET_CONNECT_PENDING
;
3604 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx mpt_flags=%b cid %d mptsf=%b\n",
3605 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3606 mp_tp
->mpt_flags
, MPTCPF_BITS
, mpts
->mpts_connid
,
3607 mpts
->mpts_flags
, MPTSF_BITS
),
3608 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3615 * Handle SO_FILT_HINT_MUSTRST subflow socket event
3618 mptcp_subflow_mustrst_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3619 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3621 #pragma unused(event)
3622 struct socket
*mp_so
, *so
;
3623 struct mptcb
*mp_tp
;
3624 boolean_t is_fastclose
;
3626 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3627 VERIFY(mpte
->mpte_mppcb
!= NULL
);
3628 mp_so
= mptetoso(mpte
);
3629 mp_tp
= mpte
->mpte_mptcb
;
3630 so
= mpts
->mpts_socket
;
3632 /* We got an invalid option or a fast close */
3633 struct tcptemp
*t_template
;
3634 struct inpcb
*inp
= sotoinpcb(so
);
3635 struct tcpcb
*tp
= NULL
;
3637 tp
= intotcpcb(inp
);
3638 so
->so_error
= ECONNABORTED
;
3640 is_fastclose
= !!(tp
->t_mpflags
& TMPF_FASTCLOSERCV
);
3642 t_template
= tcp_maketemplate(tp
);
3644 struct tcp_respond_args tra
;
3646 bzero(&tra
, sizeof(tra
));
3647 if (inp
->inp_flags
& INP_BOUND_IF
)
3648 tra
.ifscope
= inp
->inp_boundifp
->if_index
;
3650 tra
.ifscope
= IFSCOPE_NONE
;
3651 tra
.awdl_unrestricted
= 1;
3653 tcp_respond(tp
, t_template
->tt_ipgen
,
3654 &t_template
->tt_t
, (struct mbuf
*)NULL
,
3655 tp
->rcv_nxt
, tp
->snd_una
, TH_RST
, &tra
);
3656 (void) m_free(dtom(t_template
));
3657 mptcplog((LOG_DEBUG
, "MPTCP Events: "
3658 "%s: mp_so 0x%llx cid %d \n",
3659 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3660 so
, mpts
->mpts_connid
),
3661 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_LOG
);
3663 mptcp_subflow_abort(mpts
, ECONNABORTED
);
3665 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) && is_fastclose
) {
3666 *p_mpsofilt_hint
|= SO_FILT_HINT_CONNRESET
;
3668 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
)
3669 mp_so
->so_error
= ECONNABORTED
;
3671 mp_so
->so_error
= ECONNRESET
;
3674 * mptcp_drop is being called after processing the events, to fully
3675 * close the MPTCP connection
3679 if (mp_tp
->mpt_gc_ticks
== MPT_GC_TICKS
)
3680 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS_FAST
;
3682 return (MPTS_EVRET_DELETE
);
3686 mptcp_subflow_adaptive_rtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3687 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3689 #pragma unused(event)
3690 bool found_active
= false;
3692 mpts
->mpts_flags
|= MPTSF_READ_STALL
;
3694 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
3695 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
3697 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
3698 TCPS_HAVERCVDFIN2(tp
->t_state
))
3701 if (!(mpts
->mpts_flags
& MPTSF_READ_STALL
)) {
3702 found_active
= true;
3708 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_RTIMO
;
3710 return (MPTS_EVRET_OK
);
3714 mptcp_subflow_adaptive_wtimo_ev(struct mptses
*mpte
, struct mptsub
*mpts
,
3715 uint64_t *p_mpsofilt_hint
, uint64_t event
)
3717 #pragma unused(event)
3718 bool found_active
= false;
3720 mpts
->mpts_flags
|= MPTSF_WRITE_STALL
;
3722 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
3723 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
3725 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
3726 tp
->t_state
> TCPS_CLOSE_WAIT
)
3729 if (!(mpts
->mpts_flags
& MPTSF_WRITE_STALL
)) {
3730 found_active
= true;
3736 *p_mpsofilt_hint
|= SO_FILT_HINT_ADAPTIVE_WTIMO
;
3738 return (MPTS_EVRET_OK
);
3742 mptcp_evret2str(ev_ret_t ret
)
3744 const char *c
= "UNKNOWN";
3747 case MPTS_EVRET_DELETE
:
3748 c
= "MPTS_EVRET_DELETE";
3750 case MPTS_EVRET_CONNECT_PENDING
:
3751 c
= "MPTS_EVRET_CONNECT_PENDING";
3753 case MPTS_EVRET_DISCONNECT_FALLBACK
:
3754 c
= "MPTS_EVRET_DISCONNECT_FALLBACK";
3757 c
= "MPTS_EVRET_OK";
3766 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
3767 * caller must ensure that the option can be issued on subflow sockets, via
3768 * MPOF_SUBFLOW_OK flag.
3771 mptcp_subflow_sosetopt(struct mptses
*mpte
, struct mptsub
*mpts
, struct mptopt
*mpo
)
3773 struct socket
*mp_so
, *so
;
3774 struct sockopt sopt
;
3777 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3778 mpte_lock_assert_held(mpte
);
3780 mp_so
= mptetoso(mpte
);
3781 so
= mpts
->mpts_socket
;
3783 if (mpte
->mpte_mptcb
->mpt_state
>= MPTCPS_ESTABLISHED
&&
3784 mpo
->mpo_level
== SOL_SOCKET
&&
3785 mpo
->mpo_name
== SO_MARK_CELLFALLBACK
) {
3786 mptcplog((LOG_DEBUG
, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %u lastcell? %d boundcell? %d\n",
3787 __func__
, mpte
->mpte_flags
, mpte
->mpte_svctype
, mptcp_is_wifi_unusable(),
3788 sotoinpcb(so
)->inp_last_outifp
? IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
) : -1,
3789 mpts
->mpts_ifscope
!= IFSCOPE_NONE
? IFNET_IS_CELLULAR(ifindex2ifnet
[mpts
->mpts_ifscope
]) : -1),
3790 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3793 * When we open a new subflow, mark it as cell fallback, if
3794 * this subflow goes over cell.
3796 * (except for first-party apps)
3799 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
)
3802 if (sotoinpcb(so
)->inp_last_outifp
&&
3803 !IFNET_IS_CELLULAR(sotoinpcb(so
)->inp_last_outifp
))
3807 * This here is an OR, because if the app is not binding to the
3808 * interface, then it definitely is not a cell-fallback
3811 if (mpts
->mpts_ifscope
== IFSCOPE_NONE
||
3812 !IFNET_IS_CELLULAR(ifindex2ifnet
[mpts
->mpts_ifscope
]))
3816 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
3818 bzero(&sopt
, sizeof (sopt
));
3819 sopt
.sopt_dir
= SOPT_SET
;
3820 sopt
.sopt_level
= mpo
->mpo_level
;
3821 sopt
.sopt_name
= mpo
->mpo_name
;
3822 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3823 sopt
.sopt_valsize
= sizeof (int);
3824 sopt
.sopt_p
= kernproc
;
3826 error
= sosetoptlock(so
, &sopt
, 0);
3828 mptcplog((LOG_INFO
, "%s: mp_so 0x%llx sopt %s "
3829 "val %d set successful\n", __func__
,
3830 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3831 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
3833 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
3835 mptcplog((LOG_ERR
, "%s:mp_so 0x%llx sopt %s "
3836 "val %d set error %d\n", __func__
,
3837 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3838 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
3839 mpo
->mpo_intval
, error
),
3840 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
3846 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
3847 * caller must ensure that the option can be issued on subflow sockets, via
3848 * MPOF_SUBFLOW_OK flag.
3851 mptcp_subflow_sogetopt(struct mptses
*mpte
, struct socket
*so
,
3854 struct socket
*mp_so
;
3855 struct sockopt sopt
;
3858 VERIFY(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
);
3859 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
3860 mp_so
= mptetoso(mpte
);
3862 bzero(&sopt
, sizeof (sopt
));
3863 sopt
.sopt_dir
= SOPT_GET
;
3864 sopt
.sopt_level
= mpo
->mpo_level
;
3865 sopt
.sopt_name
= mpo
->mpo_name
;
3866 sopt
.sopt_val
= CAST_USER_ADDR_T(&mpo
->mpo_intval
);
3867 sopt
.sopt_valsize
= sizeof (int);
3868 sopt
.sopt_p
= kernproc
;
3870 error
= sogetoptlock(so
, &sopt
, 0); /* already locked */
3872 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3873 "%s: mp_so 0x%llx sopt %s "
3874 "val %d get successful\n", __func__
,
3875 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3876 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
),
3878 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3880 mptcplog((LOG_ERR
, "MPTCP Socket: "
3881 "%s: mp_so 0x%llx sopt %s get error %d\n",
3882 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3883 mptcp_sopt2str(mpo
->mpo_level
, mpo
->mpo_name
), error
),
3884 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
3891 * MPTCP garbage collector.
3893 * This routine is called by the MP domain on-demand, periodic callout,
3894 * which is triggered when a MPTCP socket is closed. The callout will
3895 * repeat as long as this routine returns a non-zero value.
3898 mptcp_gc(struct mppcbinfo
*mppi
)
3900 struct mppcb
*mpp
, *tmpp
;
3901 uint32_t active
= 0;
3903 LCK_MTX_ASSERT(&mppi
->mppi_lock
, LCK_MTX_ASSERT_OWNED
);
3905 TAILQ_FOREACH_SAFE(mpp
, &mppi
->mppi_pcbs
, mpp_entry
, tmpp
) {
3906 struct socket
*mp_so
;
3907 struct mptses
*mpte
;
3908 struct mptcb
*mp_tp
;
3910 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
3911 mp_so
= mpp
->mpp_socket
;
3912 VERIFY(mp_so
!= NULL
);
3913 mpte
= mptompte(mpp
);
3914 VERIFY(mpte
!= NULL
);
3915 mp_tp
= mpte
->mpte_mptcb
;
3916 VERIFY(mp_tp
!= NULL
);
3918 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3919 "%s: mp_so 0x%llx found "
3920 "(u=%d,r=%d,s=%d)\n", __func__
,
3921 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mp_so
->so_usecount
,
3922 mp_so
->so_retaincnt
, mpp
->mpp_state
),
3923 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3925 if (!mpte_try_lock(mpte
)) {
3926 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3927 "%s: mp_so 0x%llx skipped lock "
3928 "(u=%d,r=%d)\n", __func__
,
3929 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3930 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3931 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3936 /* check again under the lock */
3937 if (mp_so
->so_usecount
> 0) {
3938 boolean_t wakeup
= FALSE
;
3939 struct mptsub
*mpts
, *tmpts
;
3941 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3942 "%s: mp_so 0x%llx skipped usecount "
3943 "[u=%d,r=%d] %d %d\n", __func__
,
3944 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3945 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3946 mp_tp
->mpt_gc_ticks
,
3948 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3950 if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_1
) {
3951 if (mp_tp
->mpt_gc_ticks
> 0)
3952 mp_tp
->mpt_gc_ticks
--;
3953 if (mp_tp
->mpt_gc_ticks
== 0) {
3958 TAILQ_FOREACH_SAFE(mpts
,
3959 &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
3960 mptcp_subflow_eupcall1(mpts
->mpts_socket
,
3961 mpts
, SO_FILT_HINT_DISCONNECTED
);
3969 if (mpp
->mpp_state
!= MPPCB_STATE_DEAD
) {
3970 panic("MPTCP Socket: %s: mp_so 0x%llx skipped state "
3971 "[u=%d,r=%d,s=%d]\n", __func__
,
3972 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3973 mp_so
->so_usecount
, mp_so
->so_retaincnt
,
3977 if (mp_tp
->mpt_state
== MPTCPS_TIME_WAIT
)
3978 mptcp_close(mpte
, mp_tp
);
3980 mptcp_session_destroy(mpte
);
3982 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
3983 "%s: mp_so 0x%llx destroyed [u=%d,r=%d]\n",
3984 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
3985 mp_so
->so_usecount
, mp_so
->so_retaincnt
),
3986 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
3988 DTRACE_MPTCP4(dispose
, struct socket
*, mp_so
,
3989 struct sockbuf
*, &mp_so
->so_rcv
,
3990 struct sockbuf
*, &mp_so
->so_snd
,
3991 struct mppcb
*, mpp
);
4001 * Drop a MPTCP connection, reporting the specified error.
4004 mptcp_drop(struct mptses
*mpte
, struct mptcb
*mp_tp
, int errno
)
4006 struct socket
*mp_so
;
4008 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
4009 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4010 mp_so
= mptetoso(mpte
);
4012 DTRACE_MPTCP2(state__change
, struct mptcb
*, mp_tp
,
4013 uint32_t, 0 /* event */);
4015 if (errno
== ETIMEDOUT
&& mp_tp
->mpt_softerror
!= 0)
4016 errno
= mp_tp
->mpt_softerror
;
4017 mp_so
->so_error
= errno
;
4019 return (mptcp_close(mpte
, mp_tp
));
4023 * Close a MPTCP control block.
4026 mptcp_close(struct mptses
*mpte
, struct mptcb
*mp_tp
)
4028 struct socket
*mp_so
= NULL
;
4029 struct mptsub
*mpts
= NULL
, *tmpts
= NULL
;
4031 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
4032 VERIFY(mpte
->mpte_mptcb
== mp_tp
);
4033 mp_so
= mptetoso(mpte
);
4035 mp_tp
->mpt_state
= MPTCPS_TERMINATE
;
4039 soisdisconnected(mp_so
);
4041 /* Clean up all subflows */
4042 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4043 mptcp_subflow_disconnect(mpte
, mpts
);
4050 mptcp_notify_close(struct socket
*so
)
4052 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_DISCONNECTED
));
4059 mptcp_subflow_workloop(struct mptses
*mpte
)
4061 struct socket
*mp_so
;
4062 struct mptsub
*mpts
, *tmpts
;
4063 boolean_t connect_pending
= FALSE
, disconnect_fallback
= FALSE
;
4064 uint64_t mpsofilt_hint_mask
= SO_FILT_HINT_LOCKED
;
4066 mpte_lock_assert_held(mpte
);
4067 VERIFY(mpte
->mpte_mppcb
!= NULL
);
4068 mp_so
= mptetoso(mpte
);
4069 VERIFY(mp_so
!= NULL
);
4071 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4074 if (mpts
->mpts_socket
->so_usecount
== 0) {
4075 /* Will be removed soon by tcp_garbage_collect */
4079 mptcp_subflow_addref(mpts
);
4080 mpts
->mpts_socket
->so_usecount
++;
4082 ret
= mptcp_subflow_events(mpte
, mpts
, &mpsofilt_hint_mask
);
4085 * If MPTCP socket is closed, disconnect all subflows.
4086 * This will generate a disconnect event which will
4087 * be handled during the next iteration, causing a
4088 * non-zero error to be returned above.
4090 if (mp_so
->so_flags
& SOF_PCBCLEARING
)
4091 mptcp_subflow_disconnect(mpte
, mpts
);
4097 case MPTS_EVRET_DELETE
:
4098 mptcp_subflow_soclose(mpts
);
4100 case MPTS_EVRET_CONNECT_PENDING
:
4101 connect_pending
= TRUE
;
4103 case MPTS_EVRET_DISCONNECT_FALLBACK
:
4104 disconnect_fallback
= TRUE
;
4107 mptcplog((LOG_DEBUG
,
4108 "MPTCP Socket: %s: mptcp_subflow_events "
4109 "returned invalid value: %d\n", __func__
,
4111 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4114 mptcp_subflow_remref(mpts
); /* ours */
4116 VERIFY(mpts
->mpts_socket
->so_usecount
!= 0);
4117 mpts
->mpts_socket
->so_usecount
--;
4120 if (mpsofilt_hint_mask
!= SO_FILT_HINT_LOCKED
) {
4121 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
4123 VERIFY(mpsofilt_hint_mask
& SO_FILT_HINT_LOCKED
);
4125 if (mpsofilt_hint_mask
& SO_FILT_HINT_CANTRCVMORE
) {
4126 mptcp_close_fsm(mp_tp
, MPCE_RECV_DATA_FIN
);
4127 socantrcvmore(mp_so
);
4128 mpsofilt_hint_mask
&= ~SO_FILT_HINT_CANTRCVMORE
;
4131 soevent(mp_so
, mpsofilt_hint_mask
);
4134 if (!connect_pending
&& !disconnect_fallback
)
4137 TAILQ_FOREACH_SAFE(mpts
, &mpte
->mpte_subflows
, mpts_entry
, tmpts
) {
4138 if (disconnect_fallback
) {
4139 struct socket
*so
= NULL
;
4140 struct inpcb
*inp
= NULL
;
4141 struct tcpcb
*tp
= NULL
;
4143 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
4146 mpts
->mpts_flags
|= MPTSF_MP_DEGRADED
;
4148 if (mpts
->mpts_flags
& (MPTSF_DISCONNECTING
|
4149 MPTSF_DISCONNECTED
|MPTSF_CONNECT_PENDING
))
4152 so
= mpts
->mpts_socket
;
4155 * The MPTCP connection has degraded to a fallback
4156 * mode, so there is no point in keeping this subflow
4157 * regardless of its MPTCP-readiness state, unless it
4158 * is the primary one which we use for fallback. This
4159 * assumes that the subflow used for fallback is the
4163 inp
= sotoinpcb(so
);
4164 tp
= intotcpcb(inp
);
4166 ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
4167 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
4169 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
4172 tp
->t_mpflags
|= TMPF_RESET
;
4173 soevent(so
, SO_FILT_HINT_MUSTRST
);
4174 } else if (connect_pending
) {
4176 * The MPTCP connection has progressed to a state
4177 * where it supports full multipath semantics; allow
4178 * additional joins to be attempted for all subflows
4179 * that are in the PENDING state.
4181 if (mpts
->mpts_flags
& MPTSF_CONNECT_PENDING
) {
4182 int error
= mptcp_subflow_soconnectx(mpte
, mpts
);
4185 mptcp_subflow_abort(mpts
, error
);
4192 * Protocol pr_lock callback.
4195 mptcp_lock(struct socket
*mp_so
, int refcount
, void *lr
)
4197 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4201 lr_saved
= __builtin_return_address(0);
4206 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__
,
4207 mp_so
, lr_saved
, solockhistory_nr(mp_so
));
4212 if (mp_so
->so_usecount
< 0) {
4213 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__
,
4214 mp_so
, mp_so
->so_pcb
, lr_saved
, mp_so
->so_usecount
,
4215 solockhistory_nr(mp_so
));
4219 mp_so
->so_usecount
++;
4220 mp_so
->lock_lr
[mp_so
->next_lock_lr
] = lr_saved
;
4221 mp_so
->next_lock_lr
= (mp_so
->next_lock_lr
+ 1) % SO_LCKDBG_MAX
;
4227 * Protocol pr_unlock callback.
4230 mptcp_unlock(struct socket
*mp_so
, int refcount
, void *lr
)
4232 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4236 lr_saved
= __builtin_return_address(0);
4241 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__
,
4242 mp_so
, mp_so
->so_usecount
, lr_saved
,
4243 solockhistory_nr(mp_so
));
4246 mpp_lock_assert_held(mpp
);
4249 mp_so
->so_usecount
--;
4251 if (mp_so
->so_usecount
< 0) {
4252 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4253 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4256 mp_so
->unlock_lr
[mp_so
->next_unlock_lr
] = lr_saved
;
4257 mp_so
->next_unlock_lr
= (mp_so
->next_unlock_lr
+ 1) % SO_LCKDBG_MAX
;
4264 * Protocol pr_getlock callback.
4267 mptcp_getlock(struct socket
*mp_so
, int flags
)
4269 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
4272 panic("%s: so=%p NULL so_pcb %s\n", __func__
, mp_so
,
4273 solockhistory_nr(mp_so
));
4276 if (mp_so
->so_usecount
< 0) {
4277 panic("%s: so=%p usecount=%x lrh= %s\n", __func__
,
4278 mp_so
, mp_so
->so_usecount
, solockhistory_nr(mp_so
));
4281 return (mpp_getlock(mpp
, flags
));
4285 * MPTCP Join support
4289 mptcp_attach_to_subf(struct socket
*so
, struct mptcb
*mp_tp
,
4292 struct tcpcb
*tp
= sototcpcb(so
);
4293 struct mptcp_subf_auth_entry
*sauth_entry
;
4294 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4297 * The address ID of the first flow is implicitly 0.
4299 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
4300 tp
->t_local_aid
= 0;
4302 tp
->t_local_aid
= addr_id
;
4303 tp
->t_mpflags
|= (TMPF_PREESTABLISHED
| TMPF_JOINED_FLOW
);
4304 so
->so_flags
|= SOF_MP_SEC_SUBFLOW
;
4306 sauth_entry
= zalloc(mpt_subauth_zone
);
4307 sauth_entry
->msae_laddr_id
= tp
->t_local_aid
;
4308 sauth_entry
->msae_raddr_id
= 0;
4309 sauth_entry
->msae_raddr_rand
= 0;
4311 sauth_entry
->msae_laddr_rand
= RandomULong();
4312 if (sauth_entry
->msae_laddr_rand
== 0)
4314 LIST_INSERT_HEAD(&mp_tp
->mpt_subauth_list
, sauth_entry
, msae_next
);
4318 mptcp_detach_mptcb_from_subf(struct mptcb
*mp_tp
, struct socket
*so
)
4320 struct mptcp_subf_auth_entry
*sauth_entry
;
4321 struct tcpcb
*tp
= NULL
;
4328 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4329 if (sauth_entry
->msae_laddr_id
== tp
->t_local_aid
) {
4335 LIST_REMOVE(sauth_entry
, msae_next
);
4339 zfree(mpt_subauth_zone
, sauth_entry
);
4343 mptcp_get_rands(mptcp_addr_id addr_id
, struct mptcb
*mp_tp
, u_int32_t
*lrand
,
4346 struct mptcp_subf_auth_entry
*sauth_entry
;
4347 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4349 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4350 if (sauth_entry
->msae_laddr_id
== addr_id
) {
4352 *lrand
= sauth_entry
->msae_laddr_rand
;
4354 *rrand
= sauth_entry
->msae_raddr_rand
;
4361 mptcp_set_raddr_rand(mptcp_addr_id laddr_id
, struct mptcb
*mp_tp
,
4362 mptcp_addr_id raddr_id
, u_int32_t raddr_rand
)
4364 struct mptcp_subf_auth_entry
*sauth_entry
;
4365 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4367 LIST_FOREACH(sauth_entry
, &mp_tp
->mpt_subauth_list
, msae_next
) {
4368 if (sauth_entry
->msae_laddr_id
== laddr_id
) {
4369 if ((sauth_entry
->msae_raddr_id
!= 0) &&
4370 (sauth_entry
->msae_raddr_id
!= raddr_id
)) {
4371 mptcplog((LOG_ERR
, "MPTCP Socket: %s mismatched"
4372 " address ids %d %d \n", __func__
, raddr_id
,
4373 sauth_entry
->msae_raddr_id
),
4374 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4377 sauth_entry
->msae_raddr_id
= raddr_id
;
4378 if ((sauth_entry
->msae_raddr_rand
!= 0) &&
4379 (sauth_entry
->msae_raddr_rand
!= raddr_rand
)) {
4380 mptcplog((LOG_ERR
, "MPTCP Socket: "
4381 "%s: dup SYN_ACK %d %d \n",
4382 __func__
, raddr_rand
,
4383 sauth_entry
->msae_raddr_rand
),
4384 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
4387 sauth_entry
->msae_raddr_rand
= raddr_rand
;
4394 * SHA1 support for MPTCP
4397 mptcp_do_sha1(mptcp_key_t
*key
, char *sha_digest
)
4400 const unsigned char *sha1_base
;
4403 sha1_base
= (const unsigned char *) key
;
4404 sha1_size
= sizeof (mptcp_key_t
);
4405 SHA1Init(&sha1ctxt
);
4406 SHA1Update(&sha1ctxt
, sha1_base
, sha1_size
);
4407 SHA1Final(sha_digest
, &sha1ctxt
);
4411 mptcp_hmac_sha1(mptcp_key_t key1
, mptcp_key_t key2
,
4412 u_int32_t rand1
, u_int32_t rand2
, u_char
*digest
)
4415 mptcp_key_t key_ipad
[8] = {0}; /* key XOR'd with inner pad */
4416 mptcp_key_t key_opad
[8] = {0}; /* key XOR'd with outer pad */
4420 bzero(digest
, SHA1_RESULTLEN
);
4422 /* Set up the Key for HMAC */
4429 /* Set up the message for HMAC */
4433 /* Key is 512 block length, so no need to compute hash */
4435 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
4437 for (i
= 0; i
< 8; i
++) {
4438 key_ipad
[i
] ^= 0x3636363636363636;
4439 key_opad
[i
] ^= 0x5c5c5c5c5c5c5c5c;
4442 /* Perform inner SHA1 */
4443 SHA1Init(&sha1ctxt
);
4444 SHA1Update(&sha1ctxt
, (unsigned char *)key_ipad
, sizeof (key_ipad
));
4445 SHA1Update(&sha1ctxt
, (unsigned char *)data
, sizeof (data
));
4446 SHA1Final(digest
, &sha1ctxt
);
4448 /* Perform outer SHA1 */
4449 SHA1Init(&sha1ctxt
);
4450 SHA1Update(&sha1ctxt
, (unsigned char *)key_opad
, sizeof (key_opad
));
4451 SHA1Update(&sha1ctxt
, (unsigned char *)digest
, SHA1_RESULTLEN
);
4452 SHA1Final(digest
, &sha1ctxt
);
4456 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
4457 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
4460 mptcp_get_hmac(mptcp_addr_id aid
, struct mptcb
*mp_tp
, u_char
*digest
)
4462 uint32_t lrand
, rrand
;
4464 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4467 mptcp_get_rands(aid
, mp_tp
, &lrand
, &rrand
);
4468 mptcp_hmac_sha1(mp_tp
->mpt_localkey
, mp_tp
->mpt_remotekey
, lrand
, rrand
,
4473 * Authentication data generation
4476 mptcp_generate_token(char *sha_digest
, int sha_digest_len
, caddr_t token
,
4479 VERIFY(token_len
== sizeof (u_int32_t
));
4480 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4482 /* Most significant 32 bits of the SHA1 hash */
4483 bcopy(sha_digest
, token
, sizeof (u_int32_t
));
4488 mptcp_generate_idsn(char *sha_digest
, int sha_digest_len
, caddr_t idsn
,
4491 VERIFY(idsn_len
== sizeof (u_int64_t
));
4492 VERIFY(sha_digest_len
== SHA1_RESULTLEN
);
4495 * Least significant 64 bits of the SHA1 hash
4498 idsn
[7] = sha_digest
[12];
4499 idsn
[6] = sha_digest
[13];
4500 idsn
[5] = sha_digest
[14];
4501 idsn
[4] = sha_digest
[15];
4502 idsn
[3] = sha_digest
[16];
4503 idsn
[2] = sha_digest
[17];
4504 idsn
[1] = sha_digest
[18];
4505 idsn
[0] = sha_digest
[19];
4510 mptcp_conn_properties(struct mptcb
*mp_tp
)
4512 /* There is only Version 0 at this time */
4513 mp_tp
->mpt_version
= MPTCP_STD_VERSION_0
;
4515 /* Set DSS checksum flag */
4517 mp_tp
->mpt_flags
|= MPTCPF_CHECKSUM
;
4519 /* Set up receive window */
4520 mp_tp
->mpt_rcvwnd
= mptcp_sbspace(mp_tp
);
4522 /* Set up gc ticks */
4523 mp_tp
->mpt_gc_ticks
= MPT_GC_TICKS
;
4527 mptcp_init_local_parms(struct mptses
*mpte
)
4529 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
4530 char key_digest
[SHA1_RESULTLEN
];
4532 read_frandom(&mp_tp
->mpt_localkey
, sizeof(mp_tp
->mpt_localkey
));
4533 mptcp_do_sha1(&mp_tp
->mpt_localkey
, key_digest
);
4535 mptcp_generate_token(key_digest
, SHA1_RESULTLEN
,
4536 (caddr_t
)&mp_tp
->mpt_localtoken
, sizeof (mp_tp
->mpt_localtoken
));
4537 mptcp_generate_idsn(key_digest
, SHA1_RESULTLEN
,
4538 (caddr_t
)&mp_tp
->mpt_local_idsn
, sizeof (u_int64_t
));
4540 /* The subflow SYN is also first MPTCP byte */
4541 mp_tp
->mpt_snduna
= mp_tp
->mpt_sndmax
= mp_tp
->mpt_local_idsn
+ 1;
4542 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
4544 mptcp_conn_properties(mp_tp
);
4548 mptcp_init_remote_parms(struct mptcb
*mp_tp
)
4550 char remote_digest
[SHA1_RESULTLEN
];
4551 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4553 /* Only Version 0 is supported for auth purposes */
4554 if (mp_tp
->mpt_version
!= MPTCP_STD_VERSION_0
)
4557 /* Setup local and remote tokens and Initial DSNs */
4558 mptcp_do_sha1(&mp_tp
->mpt_remotekey
, remote_digest
);
4559 mptcp_generate_token(remote_digest
, SHA1_RESULTLEN
,
4560 (caddr_t
)&mp_tp
->mpt_remotetoken
, sizeof (mp_tp
->mpt_remotetoken
));
4561 mptcp_generate_idsn(remote_digest
, SHA1_RESULTLEN
,
4562 (caddr_t
)&mp_tp
->mpt_remote_idsn
, sizeof (u_int64_t
));
4563 mp_tp
->mpt_rcvnxt
= mp_tp
->mpt_remote_idsn
+ 1;
4569 mptcp_send_dfin(struct socket
*so
)
4571 struct tcpcb
*tp
= NULL
;
4572 struct inpcb
*inp
= NULL
;
4574 inp
= sotoinpcb(so
);
4578 tp
= intotcpcb(inp
);
4582 if (!(tp
->t_mpflags
& TMPF_RESET
))
4583 tp
->t_mpflags
|= TMPF_SEND_DFIN
;
4587 * Data Sequence Mapping routines
4590 mptcp_insert_dsn(struct mppcb
*mpp
, struct mbuf
*m
)
4592 struct mptcb
*mp_tp
;
4597 __IGNORE_WCASTALIGN(mp_tp
= &((struct mpp_mtp
*)mpp
)->mtcb
);
4598 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
4601 VERIFY(m
->m_flags
& M_PKTHDR
);
4602 m
->m_pkthdr
.pkt_flags
|= (PKTF_MPTCP
| PKTF_MPSO
);
4603 m
->m_pkthdr
.mp_dsn
= mp_tp
->mpt_sndmax
;
4604 m
->m_pkthdr
.mp_rlen
= m_pktlen(m
);
4605 mp_tp
->mpt_sndmax
+= m_pktlen(m
);
4611 mptcp_fallback_sbdrop(struct socket
*so
, struct mbuf
*m
, int len
)
4613 struct mptcb
*mp_tp
= tptomptp(sototcpcb(so
));
4620 while (m
&& len
> 0) {
4621 VERIFY(m
->m_flags
& M_PKTHDR
);
4622 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4624 data_ack
= m
->m_pkthdr
.mp_dsn
+ m
->m_pkthdr
.mp_rlen
;
4625 dsn
= m
->m_pkthdr
.mp_dsn
;
4631 if (m
&& len
== 0) {
4633 * If there is one more mbuf in the chain, it automatically means
4634 * that up to m->mp_dsn has been ack'ed.
4636 * This means, we actually correct data_ack back down (compared
4637 * to what we set inside the loop - dsn + data_len). Because in
4638 * the loop we are "optimistic" and assume that the full mapping
4639 * will be acked. If that's not the case and we get out of the
4640 * loop with m != NULL, it means only up to m->mp_dsn has been
4643 data_ack
= m
->m_pkthdr
.mp_dsn
;
4648 * If len is negative, meaning we acked in the middle of an mbuf,
4649 * only up to this mbuf's data-sequence number has been acked
4650 * at the MPTCP-level.
4655 mptcplog((LOG_DEBUG
, "%s inferred ack up to %u\n", __func__
, (uint32_t)data_ack
),
4656 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
4657 mptcp_data_ack_rcvd(mp_tp
, sototcpcb(so
), data_ack
);
4661 mptcp_preproc_sbdrop(struct socket
*so
, struct mbuf
*m
, unsigned int len
)
4665 /* TFO makes things complicated. */
4666 if (so
->so_flags1
& SOF1_TFO_REWIND
) {
4668 so
->so_flags1
&= ~SOF1_TFO_REWIND
;
4671 while (m
&& (!(so
->so_flags
& SOF_MP_SUBFLOW
) || rewinding
)) {
4673 VERIFY(m
->m_flags
& M_PKTHDR
);
4674 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4676 sub_len
= m
->m_pkthdr
.mp_rlen
;
4678 if (sub_len
< len
) {
4679 m
->m_pkthdr
.mp_dsn
+= sub_len
;
4680 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4681 m
->m_pkthdr
.mp_rseq
+= sub_len
;
4683 m
->m_pkthdr
.mp_rlen
= 0;
4686 /* sub_len >= len */
4688 m
->m_pkthdr
.mp_dsn
+= len
;
4689 if (!(m
->m_pkthdr
.pkt_flags
& PKTF_MPSO
)) {
4691 m
->m_pkthdr
.mp_rseq
+= len
;
4693 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u len %d %d\n",
4694 __func__
, (u_int32_t
)m
->m_pkthdr
.mp_dsn
,
4695 m
->m_pkthdr
.mp_rseq
, m
->m_pkthdr
.mp_rlen
, len
),
4696 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4697 m
->m_pkthdr
.mp_rlen
-= len
;
4703 if (so
->so_flags
& SOF_MP_SUBFLOW
&&
4704 !(sototcpcb(so
)->t_mpflags
& TMPF_TFO_REQUEST
) &&
4705 !(sototcpcb(so
)->t_mpflags
& TMPF_RCVD_DACK
)) {
4707 * Received an ack without receiving a DATA_ACK.
4708 * Need to fallback to regular TCP (or destroy this subflow).
4710 sototcpcb(so
)->t_mpflags
|= TMPF_INFIN_SENT
;
4711 mptcp_notify_mpfail(so
);
4715 /* Obtain the DSN mapping stored in the mbuf */
4717 mptcp_output_getm_dsnmap32(struct socket
*so
, int off
,
4718 uint32_t *dsn
, uint32_t *relseq
, uint16_t *data_len
, uint16_t *dss_csum
)
4722 mptcp_output_getm_dsnmap64(so
, off
, &dsn64
, relseq
, data_len
, dss_csum
);
4723 *dsn
= (u_int32_t
)MPTCP_DATASEQ_LOW32(dsn64
);
4727 mptcp_output_getm_dsnmap64(struct socket
*so
, int off
, uint64_t *dsn
,
4728 uint32_t *relseq
, uint16_t *data_len
,
4731 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4737 * In the subflow socket, the DSN sequencing can be discontiguous,
4738 * but the subflow sequence mapping is contiguous. Use the subflow
4739 * sequence property to find the right mbuf and corresponding dsn
4744 VERIFY(m
->m_flags
& M_PKTHDR
);
4745 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4747 if (off
>= m
->m_len
) {
4757 VERIFY(m
->m_pkthdr
.mp_rlen
<= UINT16_MAX
);
4759 *dsn
= m
->m_pkthdr
.mp_dsn
;
4760 *relseq
= m
->m_pkthdr
.mp_rseq
;
4761 *data_len
= m
->m_pkthdr
.mp_rlen
;
4762 *dss_csum
= m
->m_pkthdr
.mp_csum
;
4764 mptcplog((LOG_DEBUG
, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
4765 __func__
, (u_int32_t
)(*dsn
), *relseq
, *data_len
, off
, off_orig
),
4766 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4770 * Note that this is called only from tcp_input() via mptcp_input_preproc()
4771 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
4772 * When it trims data tcp_input calls m_adj() which does not remove the
4773 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
4774 * The dsn map insertion cannot be delayed after trim, because data can be in
4775 * the reassembly queue for a while and the DSN option info in tp will be
4776 * overwritten for every new packet received.
4777 * The dsn map will be adjusted just prior to appending to subflow sockbuf
4778 * with mptcp_adj_rmap()
4781 mptcp_insert_rmap(struct tcpcb
*tp
, struct mbuf
*m
)
4783 VERIFY(!(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
));
4785 if (tp
->t_mpflags
& TMPF_EMBED_DSN
) {
4786 VERIFY(m
->m_flags
& M_PKTHDR
);
4787 m
->m_pkthdr
.mp_dsn
= tp
->t_rcv_map
.mpt_dsn
;
4788 m
->m_pkthdr
.mp_rseq
= tp
->t_rcv_map
.mpt_sseq
;
4789 m
->m_pkthdr
.mp_rlen
= tp
->t_rcv_map
.mpt_len
;
4790 m
->m_pkthdr
.mp_csum
= tp
->t_rcv_map
.mpt_csum
;
4791 m
->m_pkthdr
.pkt_flags
|= PKTF_MPTCP
;
4792 tp
->t_mpflags
&= ~TMPF_EMBED_DSN
;
4793 tp
->t_mpflags
|= TMPF_MPTCP_ACKNOW
;
4798 mptcp_adj_rmap(struct socket
*so
, struct mbuf
*m
, int off
)
4800 struct mptsub
*mpts
= sototcpcb(so
)->t_mpsub
;
4802 if (m_pktlen(m
) == 0)
4805 if ((m
->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
)) {
4806 m
->m_pkthdr
.mp_dsn
+= off
;
4807 m
->m_pkthdr
.mp_rseq
+= off
;
4808 m
->m_pkthdr
.mp_rlen
= m
->m_pkthdr
.len
;
4810 if (!(mpts
->mpts_flags
& MPTSF_CONFIRMED
)) {
4811 /* data arrived without an DSS option mapping */
4813 /* initial subflow can fallback right after SYN handshake */
4814 mptcp_notify_mpfail(so
);
4818 mpts
->mpts_flags
|= MPTSF_CONFIRMED
;
4824 * Following routines help with failure detection and failover of data
4825 * transfer from one subflow to another.
4828 mptcp_act_on_txfail(struct socket
*so
)
4830 struct tcpcb
*tp
= NULL
;
4831 struct inpcb
*inp
= sotoinpcb(so
);
4836 tp
= intotcpcb(inp
);
4840 if (so
->so_flags
& SOF_MP_TRYFAILOVER
)
4843 so
->so_flags
|= SOF_MP_TRYFAILOVER
;
4844 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPFAILOVER
));
4848 * Support for MP_FAIL option
4851 mptcp_get_map_for_dsn(struct socket
*so
, u_int64_t dsn_fail
, u_int32_t
*tcp_seq
)
4853 struct mbuf
*m
= so
->so_snd
.sb_mb
;
4862 VERIFY(m
->m_pkthdr
.pkt_flags
& PKTF_MPTCP
);
4863 VERIFY(m
->m_flags
& M_PKTHDR
);
4864 dsn
= m
->m_pkthdr
.mp_dsn
;
4865 datalen
= m
->m_pkthdr
.mp_rlen
;
4866 if (MPTCP_SEQ_LEQ(dsn
, dsn_fail
) &&
4867 (MPTCP_SEQ_GEQ(dsn
+ datalen
, dsn_fail
))) {
4868 off
= dsn_fail
- dsn
;
4869 *tcp_seq
= m
->m_pkthdr
.mp_rseq
+ off
;
4870 mptcplog((LOG_DEBUG
, "%s: %llu %llu \n", __func__
, dsn
,
4871 dsn_fail
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
4879 * If there was no mbuf data and a fallback to TCP occurred, there's
4880 * not much else to do.
4883 mptcplog((LOG_ERR
, "MPTCP Sender: "
4884 "%s: %llu not found \n", __func__
, dsn_fail
),
4885 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_LOG
);
4890 * Support for sending contiguous MPTCP bytes in subflow
4891 * Also for preventing sending data with ACK in 3-way handshake
4894 mptcp_adj_sendlen(struct socket
*so
, int32_t off
)
4896 struct tcpcb
*tp
= sototcpcb(so
);
4897 struct mptsub
*mpts
= tp
->t_mpsub
;
4899 uint32_t mdss_subflow_seq
;
4900 int mdss_subflow_off
;
4901 uint16_t mdss_data_len
;
4904 mptcp_output_getm_dsnmap64(so
, off
, &mdss_dsn
, &mdss_subflow_seq
,
4905 &mdss_data_len
, &dss_csum
);
4908 * We need to compute how much of the mapping still remains.
4909 * So, we compute the offset in the send-buffer of the dss-sub-seq.
4911 mdss_subflow_off
= (mdss_subflow_seq
+ mpts
->mpts_iss
) - tp
->snd_una
;
4914 * When TFO is used, we are sending the mpts->mpts_iss although the relative
4915 * seq has been set to 1 (while it should be 0).
4917 if (tp
->t_mpflags
& TMPF_TFO_REQUEST
)
4920 if (off
< mdss_subflow_off
)
4921 printf("%s off %d mdss_subflow_off %d mdss_subflow_seq %u iss %u suna %u\n", __func__
,
4922 off
, mdss_subflow_off
, mdss_subflow_seq
, mpts
->mpts_iss
, tp
->snd_una
);
4923 VERIFY(off
>= mdss_subflow_off
);
4925 mptcplog((LOG_DEBUG
, "%s dlen %u off %d sub_off %d sub_seq %u iss %u suna %u\n",
4926 __func__
, mdss_data_len
, off
, mdss_subflow_off
, mdss_subflow_seq
,
4927 mpts
->mpts_iss
, tp
->snd_una
), MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
4928 return (mdss_data_len
- (off
- mdss_subflow_off
));
4932 mptcp_get_maxseg(struct mptses
*mpte
)
4934 struct mptsub
*mpts
;
4935 uint32_t maxseg
= 0;
4937 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4938 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4940 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4941 TCPS_HAVERCVDFIN2(tp
->t_state
))
4944 if (tp
->t_maxseg
> maxseg
)
4945 maxseg
= tp
->t_maxseg
;
4952 mptcp_get_rcvscale(struct mptses
*mpte
)
4954 struct mptsub
*mpts
;
4955 uint8_t rcvscale
= UINT8_MAX
;
4957 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
4958 struct tcpcb
*tp
= sototcpcb(mpts
->mpts_socket
);
4960 if (!TCPS_HAVEESTABLISHED(tp
->t_state
) ||
4961 TCPS_HAVERCVDFIN2(tp
->t_state
))
4964 if (tp
->rcv_scale
< rcvscale
)
4965 rcvscale
= tp
->rcv_scale
;
4971 /* Similar to tcp_sbrcv_reserve */
4973 mptcp_sbrcv_reserve(struct mptcb
*mp_tp
, struct sockbuf
*sbrcv
,
4974 u_int32_t newsize
, u_int32_t idealsize
)
4976 uint8_t rcvscale
= mptcp_get_rcvscale(mp_tp
->mpt_mpte
);
4978 /* newsize should not exceed max */
4979 newsize
= min(newsize
, tcp_autorcvbuf_max
);
4981 /* The receive window scale negotiated at the
4982 * beginning of the connection will also set a
4983 * limit on the socket buffer size
4985 newsize
= min(newsize
, TCP_MAXWIN
<< rcvscale
);
4987 /* Set new socket buffer size */
4988 if (newsize
> sbrcv
->sb_hiwat
&&
4989 (sbreserve(sbrcv
, newsize
) == 1)) {
4990 sbrcv
->sb_idealsize
= min(max(sbrcv
->sb_idealsize
,
4991 (idealsize
!= 0) ? idealsize
: newsize
), tcp_autorcvbuf_max
);
4993 /* Again check the limit set by the advertised
4996 sbrcv
->sb_idealsize
= min(sbrcv
->sb_idealsize
,
4997 TCP_MAXWIN
<< rcvscale
);
5002 mptcp_sbrcv_grow(struct mptcb
*mp_tp
)
5004 struct mptses
*mpte
= mp_tp
->mpt_mpte
;
5005 struct socket
*mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
5006 struct sockbuf
*sbrcv
= &mp_so
->so_rcv
;
5007 uint32_t hiwat_sum
= 0;
5008 uint32_t ideal_sum
= 0;
5009 struct mptsub
*mpts
;
5012 * Do not grow the receive socket buffer if
5013 * - auto resizing is disabled, globally or on this socket
5014 * - the high water mark already reached the maximum
5015 * - the stream is in background and receive side is being
5017 * - if there are segments in reassembly queue indicating loss,
5018 * do not need to increase recv window during recovery as more
5019 * data is not going to be sent. A duplicate ack sent during
5020 * recovery should not change the receive window
5022 if (tcp_do_autorcvbuf
== 0 ||
5023 (sbrcv
->sb_flags
& SB_AUTOSIZE
) == 0 ||
5024 tcp_cansbgrow(sbrcv
) == 0 ||
5025 sbrcv
->sb_hiwat
>= tcp_autorcvbuf_max
||
5026 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) ||
5027 !LIST_EMPTY(&mp_tp
->mpt_segq
)) {
5028 /* Can not resize the socket buffer, just return */
5033 * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2)
5035 * But, for this we first need accurate receiver-RTT estimations, which
5036 * we currently don't have.
5038 * Let's use a dummy algorithm for now, just taking the sum of all
5039 * subflow's receive-buffers. It's too low, but that's all we can get
5043 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5044 hiwat_sum
+= mpts
->mpts_socket
->so_rcv
.sb_hiwat
;
5045 ideal_sum
+= mpts
->mpts_socket
->so_rcv
.sb_idealsize
;
5048 mptcp_sbrcv_reserve(mp_tp
, sbrcv
, hiwat_sum
, ideal_sum
);
5052 * Determine if we can grow the recieve socket buffer to avoid sending
5053 * a zero window update to the peer. We allow even socket buffers that
5054 * have fixed size (set by the application) to grow if the resource
5055 * constraints are met. They will also be trimmed after the application
5058 * Similar to tcp_sbrcv_grow_rwin
5061 mptcp_sbrcv_grow_rwin(struct mptcb
*mp_tp
, struct sockbuf
*sb
)
5063 struct socket
*mp_so
= mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
;
5064 u_int32_t rcvbufinc
= mptcp_get_maxseg(mp_tp
->mpt_mpte
) << 4;
5065 u_int32_t rcvbuf
= sb
->sb_hiwat
;
5067 if (tcp_recv_bg
== 1 || IS_TCP_RECV_BG(mp_so
))
5070 if (tcp_do_autorcvbuf
== 1 &&
5071 tcp_cansbgrow(sb
) &&
5072 /* Diff to tcp_sbrcv_grow_rwin */
5073 (mp_so
->so_flags1
& SOF1_EXTEND_BK_IDLE_WANTED
) == 0 &&
5074 (rcvbuf
- sb
->sb_cc
) < rcvbufinc
&&
5075 rcvbuf
< tcp_autorcvbuf_max
&&
5076 (sb
->sb_idealsize
> 0 &&
5077 sb
->sb_hiwat
<= (sb
->sb_idealsize
+ rcvbufinc
))) {
5078 sbreserve(sb
, min((sb
->sb_hiwat
+ rcvbufinc
), tcp_autorcvbuf_max
));
5082 /* Similar to tcp_sbspace */
5084 mptcp_sbspace(struct mptcb
*mp_tp
)
5086 struct sockbuf
*sb
= &mp_tp
->mpt_mpte
->mpte_mppcb
->mpp_socket
->so_rcv
;
5089 int32_t pending
= 0;
5091 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5093 mptcp_sbrcv_grow_rwin(mp_tp
, sb
);
5095 /* hiwat might have changed */
5096 rcvbuf
= sb
->sb_hiwat
;
5098 space
= ((int32_t) imin((rcvbuf
- sb
->sb_cc
),
5099 (sb
->sb_mbmax
- sb
->sb_mbcnt
)));
5104 /* Compensate for data being processed by content filters */
5105 pending
= cfil_sock_data_space(sb
);
5106 #endif /* CONTENT_FILTER */
5107 if (pending
> space
)
5116 * Support Fallback to Regular TCP
5119 mptcp_notify_mpready(struct socket
*so
)
5121 struct tcpcb
*tp
= NULL
;
5126 tp
= intotcpcb(sotoinpcb(so
));
5131 DTRACE_MPTCP4(multipath__ready
, struct socket
*, so
,
5132 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5133 struct tcpcb
*, tp
);
5135 if (!(tp
->t_mpflags
& TMPF_MPTCP_TRUE
))
5138 if (tp
->t_mpflags
& TMPF_MPTCP_READY
)
5141 tp
->t_mpflags
&= ~TMPF_TCP_FALLBACK
;
5142 tp
->t_mpflags
|= TMPF_MPTCP_READY
;
5144 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5148 mptcp_notify_mpfail(struct socket
*so
)
5150 struct tcpcb
*tp
= NULL
;
5155 tp
= intotcpcb(sotoinpcb(so
));
5160 DTRACE_MPTCP4(multipath__failed
, struct socket
*, so
,
5161 struct sockbuf
*, &so
->so_rcv
, struct sockbuf
*, &so
->so_snd
,
5162 struct tcpcb
*, tp
);
5164 if (tp
->t_mpflags
& TMPF_TCP_FALLBACK
)
5167 tp
->t_mpflags
&= ~(TMPF_MPTCP_READY
|TMPF_MPTCP_TRUE
);
5168 tp
->t_mpflags
|= TMPF_TCP_FALLBACK
;
5170 soevent(so
, (SO_FILT_HINT_LOCKED
| SO_FILT_HINT_MPSTATUS
));
5174 * Keepalive helper function
5177 mptcp_ok_to_keepalive(struct mptcb
*mp_tp
)
5180 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5182 if (mp_tp
->mpt_state
>= MPTCPS_CLOSE_WAIT
) {
5189 * MPTCP t_maxseg adjustment function
5192 mptcp_adj_mss(struct tcpcb
*tp
, boolean_t mtudisc
)
5195 struct mptcb
*mp_tp
= tptomptp(tp
);
5197 #define MPTCP_COMPUTE_LEN { \
5198 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
5199 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
5202 /* adjust to 32-bit boundary + EOL */ \
5208 mpte_lock_assert_held(mp_tp
->mpt_mpte
);
5211 * For the first subflow and subsequent subflows, adjust mss for
5212 * most common MPTCP option size, for case where tcp_mss is called
5213 * during option processing and MTU discovery.
5216 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
&&
5217 !(tp
->t_mpflags
& TMPF_JOINED_FLOW
)) {
5221 if (tp
->t_mpflags
& TMPF_PREESTABLISHED
&&
5222 tp
->t_mpflags
& TMPF_SENT_JOIN
) {
5226 if (tp
->t_mpflags
& TMPF_MPTCP_TRUE
) {
5235 * Update the pid, upid, uuid of the subflow so, based on parent so
5238 mptcp_update_last_owner(struct socket
*so
, struct socket
*mp_so
)
5240 if (so
->last_pid
!= mp_so
->last_pid
||
5241 so
->last_upid
!= mp_so
->last_upid
) {
5242 so
->last_upid
= mp_so
->last_upid
;
5243 so
->last_pid
= mp_so
->last_pid
;
5244 uuid_copy(so
->last_uuid
, mp_so
->last_uuid
);
5246 so_update_policy(so
);
5250 fill_mptcp_subflow(struct socket
*so
, mptcp_flow_t
*flow
, struct mptsub
*mpts
)
5254 tcp_getconninfo(so
, &flow
->flow_ci
);
5255 inp
= sotoinpcb(so
);
5257 if ((inp
->inp_vflag
& INP_IPV6
) != 0) {
5258 flow
->flow_src
.ss_family
= AF_INET6
;
5259 flow
->flow_dst
.ss_family
= AF_INET6
;
5260 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in6
);
5261 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in6
);
5262 SIN6(&flow
->flow_src
)->sin6_port
= inp
->in6p_lport
;
5263 SIN6(&flow
->flow_dst
)->sin6_port
= inp
->in6p_fport
;
5264 SIN6(&flow
->flow_src
)->sin6_addr
= inp
->in6p_laddr
;
5265 SIN6(&flow
->flow_dst
)->sin6_addr
= inp
->in6p_faddr
;
5268 if ((inp
->inp_vflag
& INP_IPV4
) != 0) {
5269 flow
->flow_src
.ss_family
= AF_INET
;
5270 flow
->flow_dst
.ss_family
= AF_INET
;
5271 flow
->flow_src
.ss_len
= sizeof(struct sockaddr_in
);
5272 flow
->flow_dst
.ss_len
= sizeof(struct sockaddr_in
);
5273 SIN(&flow
->flow_src
)->sin_port
= inp
->inp_lport
;
5274 SIN(&flow
->flow_dst
)->sin_port
= inp
->inp_fport
;
5275 SIN(&flow
->flow_src
)->sin_addr
= inp
->inp_laddr
;
5276 SIN(&flow
->flow_dst
)->sin_addr
= inp
->inp_faddr
;
5278 flow
->flow_len
= sizeof(*flow
);
5279 flow
->flow_tcpci_offset
= offsetof(mptcp_flow_t
, flow_ci
);
5280 flow
->flow_flags
= mpts
->mpts_flags
;
5281 flow
->flow_cid
= mpts
->mpts_connid
;
5282 flow
->flow_relseq
= mpts
->mpts_rel_seq
;
5283 flow
->flow_soerror
= mpts
->mpts_socket
->so_error
;
5284 flow
->flow_probecnt
= mpts
->mpts_probecnt
;
5288 mptcp_pcblist SYSCTL_HANDLER_ARGS
5290 #pragma unused(oidp, arg1, arg2)
5294 struct mptses
*mpte
;
5295 struct mptcb
*mp_tp
;
5296 struct mptsub
*mpts
;
5298 conninfo_mptcp_t mptcpci
;
5299 mptcp_flow_t
*flows
= NULL
;
5301 if (req
->newptr
!= USER_ADDR_NULL
)
5304 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5305 if (req
->oldptr
== USER_ADDR_NULL
) {
5306 size_t n
= mtcbinfo
.mppi_count
;
5307 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5308 req
->oldidx
= (n
+ n
/8) * sizeof(conninfo_mptcp_t
) +
5309 4 * (n
+ n
/8) * sizeof(mptcp_flow_t
);
5312 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5315 VERIFY(mpp
->mpp_flags
& MPP_ATTACHED
);
5316 mpte
= mptompte(mpp
);
5317 VERIFY(mpte
!= NULL
);
5318 mpte_lock_assert_held(mpte
);
5319 mp_tp
= mpte
->mpte_mptcb
;
5320 VERIFY(mp_tp
!= NULL
);
5322 bzero(&mptcpci
, sizeof(mptcpci
));
5323 mptcpci
.mptcpci_state
= mp_tp
->mpt_state
;
5324 mptcpci
.mptcpci_flags
= mp_tp
->mpt_flags
;
5325 mptcpci
.mptcpci_ltoken
= mp_tp
->mpt_localtoken
;
5326 mptcpci
.mptcpci_rtoken
= mp_tp
->mpt_remotetoken
;
5327 mptcpci
.mptcpci_notsent_lowat
= mp_tp
->mpt_notsent_lowat
;
5328 mptcpci
.mptcpci_snduna
= mp_tp
->mpt_snduna
;
5329 mptcpci
.mptcpci_sndnxt
= mp_tp
->mpt_sndnxt
;
5330 mptcpci
.mptcpci_sndmax
= mp_tp
->mpt_sndmax
;
5331 mptcpci
.mptcpci_lidsn
= mp_tp
->mpt_local_idsn
;
5332 mptcpci
.mptcpci_sndwnd
= mp_tp
->mpt_sndwnd
;
5333 mptcpci
.mptcpci_rcvnxt
= mp_tp
->mpt_rcvnxt
;
5334 mptcpci
.mptcpci_rcvatmark
= mp_tp
->mpt_rcvnxt
;
5335 mptcpci
.mptcpci_ridsn
= mp_tp
->mpt_remote_idsn
;
5336 mptcpci
.mptcpci_rcvwnd
= mp_tp
->mpt_rcvwnd
;
5338 mptcpci
.mptcpci_nflows
= mpte
->mpte_numflows
;
5339 mptcpci
.mptcpci_mpte_flags
= mpte
->mpte_flags
;
5340 mptcpci
.mptcpci_mpte_addrid
= mpte
->mpte_addrid_last
;
5341 mptcpci
.mptcpci_flow_offset
=
5342 offsetof(conninfo_mptcp_t
, mptcpci_flows
);
5344 len
= sizeof(*flows
) * mpte
->mpte_numflows
;
5345 if (mpte
->mpte_numflows
!= 0) {
5346 flows
= _MALLOC(len
, M_TEMP
, M_WAITOK
| M_ZERO
);
5347 if (flows
== NULL
) {
5351 mptcpci
.mptcpci_len
= sizeof(mptcpci
) +
5352 sizeof(*flows
) * (mptcpci
.mptcpci_nflows
- 1);
5353 error
= SYSCTL_OUT(req
, &mptcpci
,
5354 sizeof(mptcpci
) - sizeof(mptcp_flow_t
));
5356 mptcpci
.mptcpci_len
= sizeof(mptcpci
);
5357 error
= SYSCTL_OUT(req
, &mptcpci
, sizeof(mptcpci
));
5361 FREE(flows
, M_TEMP
);
5365 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5366 so
= mpts
->mpts_socket
;
5367 fill_mptcp_subflow(so
, &flows
[f
], mpts
);
5372 error
= SYSCTL_OUT(req
, flows
, len
);
5373 FREE(flows
, M_TEMP
);
5378 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5383 SYSCTL_PROC(_net_inet_mptcp
, OID_AUTO
, pcblist
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
5384 0, 0, mptcp_pcblist
, "S,conninfo_mptcp_t",
5385 "List of active MPTCP connections");
5388 * Set notsent lowat mark on the MPTCB
5391 mptcp_set_notsent_lowat(struct mptses
*mpte
, int optval
)
5393 struct mptcb
*mp_tp
= NULL
;
5396 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5397 mp_tp
= mpte
->mpte_mptcb
;
5400 mp_tp
->mpt_notsent_lowat
= optval
;
5408 mptcp_get_notsent_lowat(struct mptses
*mpte
)
5410 struct mptcb
*mp_tp
= NULL
;
5412 if (mpte
->mpte_mppcb
->mpp_flags
& MPP_ATTACHED
)
5413 mp_tp
= mpte
->mpte_mptcb
;
5416 return (mp_tp
->mpt_notsent_lowat
);
5422 mptcp_notsent_lowat_check(struct socket
*so
)
5424 struct mptses
*mpte
;
5426 struct mptcb
*mp_tp
;
5427 struct mptsub
*mpts
;
5431 mpp
= mpsotomppcb(so
);
5432 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
5436 mpte
= mptompte(mpp
);
5437 mpte_lock_assert_held(mpte
);
5438 mp_tp
= mpte
->mpte_mptcb
;
5440 notsent
= so
->so_snd
.sb_cc
;
5442 if ((notsent
== 0) ||
5443 ((notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)) <=
5444 mp_tp
->mpt_notsent_lowat
)) {
5445 mptcplog((LOG_DEBUG
, "MPTCP Sender: "
5446 "lowat %d notsent %d actual %d \n",
5447 mp_tp
->mpt_notsent_lowat
, notsent
,
5448 notsent
- (mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
)),
5449 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5453 /* When Nagle's algorithm is not disabled, it is better
5454 * to wakeup the client even before there is atleast one
5455 * maxseg of data to write.
5457 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
5459 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
5460 struct socket
*subf_so
= mpts
->mpts_socket
;
5461 struct tcpcb
*tp
= intotcpcb(sotoinpcb(subf_so
));
5463 notsent
= so
->so_snd
.sb_cc
-
5464 (tp
->snd_nxt
- tp
->snd_una
);
5466 if ((tp
->t_flags
& TF_NODELAY
) == 0 &&
5467 notsent
> 0 && (notsent
<= (int)tp
->t_maxseg
)) {
5470 mptcplog((LOG_DEBUG
, "MPTCP Sender: lowat %d notsent %d"
5471 " nodelay false \n",
5472 mp_tp
->mpt_notsent_lowat
, notsent
),
5473 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5480 /* Using Symptoms Advisory to detect poor WiFi or poor Cell */
5481 static kern_ctl_ref mptcp_kern_ctrl_ref
= NULL
;
5482 static uint32_t mptcp_kern_skt_inuse
= 0;
5483 static uint32_t mptcp_kern_skt_unit
;
5484 symptoms_advisory_t mptcp_advisory
;
5487 mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref
, struct sockaddr_ctl
*sac
,
5490 #pragma unused(kctlref, sac, unitinfo)
5492 if (OSIncrementAtomic(&mptcp_kern_skt_inuse
) > 0)
5493 mptcplog((LOG_ERR
, "%s MPTCP kernel-control socket already open!", __func__
),
5494 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5496 mptcp_kern_skt_unit
= sac
->sc_unit
;
5502 mptcp_allow_uuid(uuid_t uuid
)
5506 /* Iterate over all MPTCP connections */
5508 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5510 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5511 struct mptses
*mpte
;
5512 struct socket
*mp_so
;
5516 mpte
= mpp
->mpp_pcbe
;
5517 mp_so
= mpp
->mpp_socket
;
5519 if (mp_so
->so_flags
& SOF_DELEGATED
&&
5520 uuid_compare(uuid
, mp_so
->e_uuid
))
5522 else if (!(mp_so
->so_flags
& SOF_DELEGATED
) &&
5523 uuid_compare(uuid
, mp_so
->last_uuid
))
5526 mpte
->mpte_flags
|= MPTE_ACCESS_GRANTED
;
5528 mptcp_check_subflows_and_add(mpte
);
5529 mptcp_remove_subflows(mpte
);
5531 mpte
->mpte_flags
&= ~MPTE_ACCESS_GRANTED
;
5537 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5541 mptcp_wifi_status_changed(void)
5545 /* Iterate over all MPTCP connections */
5547 lck_mtx_lock(&mtcbinfo
.mppi_lock
);
5549 TAILQ_FOREACH(mpp
, &mtcbinfo
.mppi_pcbs
, mpp_entry
) {
5550 struct mptses
*mpte
;
5551 struct socket
*mp_so
;
5555 mpte
= mpp
->mpp_pcbe
;
5556 mp_so
= mpp
->mpp_socket
;
5558 /* Only handover-mode is purely driven by Symptom's Wi-Fi status */
5559 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_HANDOVER
)
5562 mptcp_check_subflows_and_add(mpte
);
5563 mptcp_check_subflows_and_remove(mpte
);
5569 lck_mtx_unlock(&mtcbinfo
.mppi_lock
);
5573 mptcp_ask_symptoms(struct mptses
*mpte
)
5575 struct mptcp_symptoms_ask_uuid ask
;
5576 struct socket
*mp_so
;
5580 if (mptcp_kern_skt_unit
== 0) {
5581 mptcplog((LOG_ERR
, "%s skt_unit is still 0\n", __func__
),
5582 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5586 mp_so
= mptetoso(mpte
);
5588 if (mp_so
->so_flags
& SOF_DELEGATED
)
5591 pid
= mp_so
->last_pid
;
5594 if (p
== PROC_NULL
) {
5595 mptcplog((LOG_ERR
, "%s Couldn't find proc for pid %u\n", __func__
,
5596 pid
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5600 ask
.cmd
= MPTCP_SYMPTOMS_ASK_UUID
;
5602 if (mp_so
->so_flags
& SOF_DELEGATED
)
5603 uuid_copy(ask
.uuid
, mp_so
->e_uuid
);
5605 uuid_copy(ask
.uuid
, mp_so
->last_uuid
);
5607 prio
= proc_get_effective_task_policy(proc_task(p
), TASK_POLICY_ROLE
);
5609 if (prio
== TASK_BACKGROUND_APPLICATION
)
5610 ask
.priority
= MPTCP_SYMPTOMS_BACKGROUND
;
5611 else if (prio
== TASK_FOREGROUND_APPLICATION
)
5612 ask
.priority
= MPTCP_SYMPTOMS_FOREGROUND
;
5614 ask
.priority
= MPTCP_SYMPTOMS_UNKNOWN
;
5616 mptcplog((LOG_DEBUG
, "%s ask symptoms about pid %u, prio %u\n", __func__
,
5617 pid
, ask
.priority
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5619 err
= ctl_enqueuedata(mptcp_kern_ctrl_ref
, mptcp_kern_skt_unit
,
5620 &ask
, sizeof(ask
), CTL_DATA_EOR
);
5622 mptcplog((LOG_ERR
, "%s ctl_enqueuedata failed %d\n", __func__
, err
),
5623 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5629 mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref
, u_int32_t kcunit
,
5632 #pragma unused(kctlref, kcunit, unitinfo)
5634 OSDecrementAtomic(&mptcp_kern_skt_inuse
);
5640 mptcp_symptoms_ctl_send(kern_ctl_ref kctlref
, u_int32_t kcunit
, void *unitinfo
,
5641 mbuf_t m
, int flags
)
5643 #pragma unused(kctlref, unitinfo, flags)
5644 symptoms_advisory_t
*sa
= NULL
;
5646 if (kcunit
!= mptcp_kern_skt_unit
)
5647 mptcplog((LOG_ERR
, "%s kcunit %u is different from expected one %u\n",
5648 __func__
, kcunit
, mptcp_kern_skt_unit
),
5649 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5651 if (mbuf_pkthdr_len(m
) < sizeof(*sa
)) {
5656 if (mbuf_len(m
) >= sizeof(*sa
))
5661 if (sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_NOCOMMENT
&&
5662 sa
->sa_nwk_status
!= SYMPTOMS_ADVISORY_USEAPP
) {
5663 uint8_t old_wifi_status
= mptcp_advisory
.sa_wifi_status
;
5665 mptcplog((LOG_DEBUG
, "%s: wifi %d,%d\n",
5666 __func__
, sa
->sa_wifi_status
, mptcp_advisory
.sa_wifi_status
),
5667 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
5669 if ((sa
->sa_wifi_status
&
5670 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
)) !=
5671 (SYMPTOMS_ADVISORY_WIFI_BAD
| SYMPTOMS_ADVISORY_WIFI_OK
))
5672 mptcp_advisory
.sa_wifi_status
= sa
->sa_wifi_status
;
5674 if (old_wifi_status
!= mptcp_advisory
.sa_wifi_status
)
5675 mptcp_wifi_status_changed();
5676 } else if (sa
->sa_nwk_status
== SYMPTOMS_ADVISORY_NOCOMMENT
) {
5677 mptcplog((LOG_DEBUG
, "%s: NOCOMMENT wifi %d\n", __func__
,
5678 mptcp_advisory
.sa_wifi_status
),
5679 MPTCP_EVENTS_DBG
, MPTCP_LOGLVL_VERBOSE
);
5680 } else if (sa
->sa_nwk_status
== SYMPTOMS_ADVISORY_USEAPP
) {
5683 mptcplog((LOG_DEBUG
, "%s Got response about useApp\n", __func__
),
5684 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5686 uuid_copy(uuid
, (unsigned char *)(sa
+ 1));
5688 mptcp_allow_uuid(uuid
);
5695 mptcp_control_register(void)
5697 /* Set up the advisory control socket */
5698 struct kern_ctl_reg mptcp_kern_ctl
;
5700 bzero(&mptcp_kern_ctl
, sizeof(mptcp_kern_ctl
));
5701 strlcpy(mptcp_kern_ctl
.ctl_name
, MPTCP_KERN_CTL_NAME
,
5702 sizeof(mptcp_kern_ctl
.ctl_name
));
5703 mptcp_kern_ctl
.ctl_connect
= mptcp_symptoms_ctl_connect
;
5704 mptcp_kern_ctl
.ctl_disconnect
= mptcp_symptoms_ctl_disconnect
;
5705 mptcp_kern_ctl
.ctl_send
= mptcp_symptoms_ctl_send
;
5706 mptcp_kern_ctl
.ctl_flags
= CTL_FLAG_PRIVILEGED
;
5708 (void)ctl_register(&mptcp_kern_ctl
, &mptcp_kern_ctrl_ref
);
5712 mptcp_is_wifi_unusable(void)
5714 /* a false return val indicates there is no info or wifi is ok */
5715 return (mptcp_advisory
.sa_wifi_status
& SYMPTOMS_ADVISORY_WIFI_BAD
);
5718 /* If TFO data is succesfully acked, it must be dropped from the mptcp so */
5720 mptcp_drop_tfo_data(struct mptses
*mpte
, struct mptsub
*mpts
)
5722 struct socket
*mp_so
= mptetoso(mpte
);
5723 struct socket
*so
= mpts
->mpts_socket
;
5724 struct tcpcb
*tp
= intotcpcb(sotoinpcb(so
));
5725 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
5727 /* If data was sent with SYN, rewind state */
5728 if (tp
->t_tfo_stats
& TFO_S_SYN_DATA_ACKED
) {
5729 u_int64_t mp_droplen
= mp_tp
->mpt_sndnxt
- mp_tp
->mpt_snduna
;
5730 unsigned int tcp_droplen
= tp
->snd_una
- tp
->iss
- 1;
5732 VERIFY(mp_droplen
<= (UINT_MAX
));
5733 VERIFY(mp_droplen
>= tcp_droplen
);
5735 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
5736 mpts
->mpts_iss
+= tcp_droplen
;
5737 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
5739 if (mp_droplen
> tcp_droplen
) {
5740 /* handle partial TCP ack */
5741 mp_so
->so_flags1
|= SOF1_TFO_REWIND
;
5742 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
+ (mp_droplen
- tcp_droplen
);
5743 mp_droplen
= tcp_droplen
;
5745 /* all data on SYN was acked */
5746 mpts
->mpts_rel_seq
= 1;
5747 mp_tp
->mpt_sndnxt
= mp_tp
->mpt_snduna
;
5749 mp_tp
->mpt_sndmax
-= tcp_droplen
;
5751 if (mp_droplen
!= 0) {
5752 VERIFY(mp_so
->so_snd
.sb_mb
!= NULL
);
5753 sbdrop(&mp_so
->so_snd
, (int)mp_droplen
);
5755 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx cid %d TFO tcp len %d mptcp len %d\n",
5756 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
5757 mpts
->mpts_connid
, tcp_droplen
, mp_droplen
),
5758 MPTCP_SENDER_DBG
, MPTCP_LOGLVL_VERBOSE
);
5763 mptcp_freeq(struct mptcb
*mp_tp
)
5765 struct tseg_qent
*q
;
5768 while ((q
= LIST_FIRST(&mp_tp
->mpt_segq
)) != NULL
) {
5769 LIST_REMOVE(q
, tqe_q
);
5771 zfree(tcp_reass_zone
, q
);
5774 mp_tp
->mpt_reassqlen
= 0;
5779 mptcp_post_event(u_int32_t event_code
, int value
)
5781 struct kev_mptcp_data event_data
;
5782 struct kev_msg ev_msg
;
5784 memset(&ev_msg
, 0, sizeof(ev_msg
));
5786 ev_msg
.vendor_code
= KEV_VENDOR_APPLE
;
5787 ev_msg
.kev_class
= KEV_NETWORK_CLASS
;
5788 ev_msg
.kev_subclass
= KEV_MPTCP_SUBCLASS
;
5789 ev_msg
.event_code
= event_code
;
5791 event_data
.value
= value
;
5793 ev_msg
.dv
[0].data_ptr
= &event_data
;
5794 ev_msg
.dv
[0].data_length
= sizeof(event_data
);
5796 return kev_post_msg(&ev_msg
);
5800 mptcp_set_cellicon(struct mptses
*mpte
)
5804 /* First-party apps (Siri) don't flip the cellicon */
5805 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
)
5808 /* Remember the last time we set the cellicon (see mptcp_unset_cellicon) */
5809 mptcp_last_cellicon_set
= tcp_now
;
5811 /* If cellicon is already set, get out of here! */
5812 if (OSTestAndSet(7, &mptcp_cellicon_is_set
))
5815 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 1);
5818 mptcplog((LOG_ERR
, "%s: Setting cellicon failed with %d\n",
5819 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5821 mptcplog((LOG_DEBUG
, "%s successfully set the cellicon\n", __func__
),
5822 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5826 mptcp_unset_cellicon(void)
5830 /* If cellicon is already unset, get out of here! */
5831 if (OSTestAndClear(7, &mptcp_cellicon_is_set
))
5835 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
5836 * explicitly set the cellicon (see mptcp_set_cellicon()), then we unset
5839 if (TSTMP_GT(mptcp_last_cellicon_set
+ MPTCP_CELLICON_TOGGLE_RATE
,
5841 OSTestAndSet(7, &mptcp_cellicon_is_set
);
5845 error
= mptcp_post_event(KEV_MPTCP_CELLUSE
, 0);
5848 mptcplog((LOG_ERR
, "%s: Unsetting cellicon failed with %d\n",
5849 __func__
, error
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
5851 mptcplog((LOG_DEBUG
, "%s successfully unset the cellicon\n", __func__
),
5852 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
5856 mptcp_reset_rexmit_state(struct tcpcb
*tp
)
5858 struct mptsub
*mpts
;
5866 so
= inp
->inp_socket
;
5870 if (!(so
->so_flags
& SOF_MP_SUBFLOW
))
5875 mpts
->mpts_flags
&= ~MPTSF_WRITE_STALL
;
5876 so
->so_flags
&= ~SOF_MP_TRYFAILOVER
;
5880 mptcp_reset_keepalive(struct tcpcb
*tp
)
5882 struct mptsub
*mpts
= tp
->t_mpsub
;
5884 mpts
->mpts_flags
&= ~MPTSF_READ_STALL
;