]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/mptcp_subr.c
xnu-7195.81.3.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp_subr.c
CommitLineData
39236c6e 1/*
f427ee49 2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
5ba3f43e
A
29#include <kern/locks.h>
30#include <kern/policy_internal.h>
31#include <kern/zalloc.h>
32
33#include <mach/sdt.h>
34
35#include <sys/domain.h>
36#include <sys/kdebug.h>
37#include <sys/kern_control.h>
39236c6e
A
38#include <sys/kernel.h>
39#include <sys/mbuf.h>
40#include <sys/mcache.h>
5ba3f43e
A
41#include <sys/param.h>
42#include <sys/proc.h>
43#include <sys/protosw.h>
39236c6e
A
44#include <sys/resourcevar.h>
45#include <sys/socket.h>
46#include <sys/socketvar.h>
39236c6e 47#include <sys/sysctl.h>
5ba3f43e
A
48#include <sys/syslog.h>
49#include <sys/systm.h>
39236c6e 50
5ba3f43e 51#include <net/content_filter.h>
39236c6e 52#include <net/if.h>
3e170ce0 53#include <net/if_var.h>
39236c6e
A
54#include <netinet/in.h>
55#include <netinet/in_pcb.h>
56#include <netinet/in_var.h>
57#include <netinet/tcp.h>
58#include <netinet/tcp_fsm.h>
59#include <netinet/tcp_seq.h>
60#include <netinet/tcp_var.h>
61#include <netinet/mptcp_var.h>
62#include <netinet/mptcp.h>
5ba3f43e 63#include <netinet/mptcp_opt.h>
39236c6e
A
64#include <netinet/mptcp_seq.h>
65#include <netinet/mptcp_timer.h>
66#include <libkern/crypto/sha1.h>
39236c6e
A
67#include <netinet6/in6_pcb.h>
68#include <netinet6/ip6protosw.h>
39236c6e
A
69#include <dev/random/randomdev.h>
70
71/*
72 * Notes on MPTCP implementation.
73 *
74 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
75 * communication domain. The structure mtcbinfo describes the MPTCP instance
76 * of a Multipath protocol in that domain. It is used to keep track of all
77 * MPTCP PCB instances in the system, and is protected by the global lock
78 * mppi_lock.
79 *
80 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
81 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
82 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
83 * allocated from the same memory block, and each structure has a pointer
84 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
85 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
86 * PCB (mppcb) as well as the MPTCP Session (mptses).
87 *
88 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
39236c6e
A
89 *
90 * A functioning MPTCP Session consists of one or more subflow sockets. Each
91 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
92 * represented by the mptsub structure. Because each subflow requires access
93 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
5ba3f43e 94 * subflow. This gets decremented prior to the subflow's destruction.
39236c6e 95 *
5ba3f43e
A
96 * To handle events (read, write, control) from the subflows, we do direct
97 * upcalls into the specific function.
39236c6e 98 *
5ba3f43e
A
99 * The whole MPTCP connection is protected by a single lock, the MPTCP socket's
100 * lock. Incoming data on a subflow also ends up taking this single lock. To
101 * achieve the latter, tcp_lock/unlock has been changed to rather use the lock
102 * of the MPTCP-socket.
39236c6e
A
103 *
104 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
105 * work is done by the MPTCP garbage collector which is invoked on demand by
106 * the PF_MULTIPATH garbage collector. This process will take place once all
5ba3f43e 107 * of the subflows have been destroyed.
39236c6e
A
108 */
109
fe8ab488 110static void mptcp_attach_to_subf(struct socket *, struct mptcb *, uint8_t);
39236c6e 111static void mptcp_detach_mptcb_from_subf(struct mptcb *, struct socket *);
39236c6e
A
112
113static uint32_t mptcp_gc(struct mppcbinfo *);
39236c6e
A
114static int mptcp_subflow_soreceive(struct socket *, struct sockaddr **,
115 struct uio *, struct mbuf **, struct mbuf **, int *);
5ba3f43e
A
116static int mptcp_subflow_sosend(struct socket *, struct sockaddr *,
117 struct uio *, struct mbuf *, struct mbuf *, int);
39236c6e 118static void mptcp_subflow_wupcall(struct socket *, void *, int);
f427ee49 119static void mptcp_subflow_eupcall1(struct socket *so, void *arg, long events);
5ba3f43e
A
120static void mptcp_update_last_owner(struct socket *so, struct socket *mp_so);
121static void mptcp_drop_tfo_data(struct mptses *, struct mptsub *);
122
123static void mptcp_subflow_abort(struct mptsub *, int);
124
125static void mptcp_send_dfin(struct socket *so);
cb323159 126static void mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts);
cb323159 127static int mptcp_freeq(struct mptcb *mp_tp);
39236c6e
A
128
129/*
130 * Possible return values for subflow event handlers. Note that success
131 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
132 * indicate errors or actions which require immediate attention; they will
133 * prevent the rest of the handlers from processing their respective events
134 * until the next round of events processing.
135 */
136typedef enum {
0a7de745
A
137 MPTS_EVRET_DELETE = 1, /* delete this subflow */
138 MPTS_EVRET_OK = 2, /* OK */
139 MPTS_EVRET_CONNECT_PENDING = 3, /* resume pended connects */
140 MPTS_EVRET_DISCONNECT_FALLBACK = 4, /* abort all but preferred */
39236c6e
A
141} ev_ret_t;
142
f427ee49
A
143static ev_ret_t mptcp_subflow_propagate_ev(struct mptses *, struct mptsub *, long *, long);
144static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *, long *, long);
145static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *, long *, long);
146static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *, long *, long);
147static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *, long *, long);
148static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *, long *, long);
149static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *, long *, long);
150static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *, long *, long);
151static ev_ret_t mptcp_subflow_mpcantrcvmore_ev(struct mptses *, struct mptsub *, long *, long);
152static ev_ret_t mptcp_subflow_mpsuberror_ev(struct mptses *, struct mptsub *, long *, long);
153static ev_ret_t mptcp_subflow_adaptive_rtimo_ev(struct mptses *, struct mptsub *, long *, long);
154static ev_ret_t mptcp_subflow_adaptive_wtimo_ev(struct mptses *, struct mptsub *, long *, long);
fe8ab488 155
5ba3f43e
A
156static void mptcp_do_sha1(mptcp_key_t *, char *);
157static void mptcp_init_local_parms(struct mptses *);
39236c6e 158
f427ee49
A
159static ZONE_DECLARE(mptsub_zone, "mptsub", sizeof(struct mptsub), ZC_ZFREE_CLEARMEM);
160static ZONE_DECLARE(mptopt_zone, "mptopt", sizeof(struct mptopt), ZC_ZFREE_CLEARMEM);
161static ZONE_DECLARE(mpt_subauth_zone, "mptauth",
162 sizeof(struct mptcp_subf_auth_entry), ZC_NONE);
39236c6e
A
163
164struct mppcbinfo mtcbinfo;
165
39236c6e
A
166SYSCTL_DECL(_net_inet);
167
0a7de745 168SYSCTL_NODE(_net_inet, OID_AUTO, mptcp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "MPTCP");
39236c6e 169
0a7de745
A
170uint32_t mptcp_dbg_area = 31; /* more noise if greater than 1 */
171SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, dbg_area, CTLFLAG_RW | CTLFLAG_LOCKED,
172 &mptcp_dbg_area, 0, "MPTCP debug area");
3e170ce0 173
5ba3f43e 174uint32_t mptcp_dbg_level = 1;
3e170ce0 175SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dbg_level, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 176 &mptcp_dbg_level, 0, "MPTCP debug level");
3e170ce0 177
0a7de745
A
178SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED,
179 &mtcbinfo.mppi_count, 0, "Number of active PCBs");
39236c6e 180
a39ff7e2
A
181
182static int mptcp_alternate_port = 0;
183SYSCTL_INT(_net_inet_mptcp, OID_AUTO, alternate_port, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 184 &mptcp_alternate_port, 0, "Set alternate port for MPTCP connections");
a39ff7e2 185
39236c6e
A
186static struct protosw mptcp_subflow_protosw;
187static struct pr_usrreqs mptcp_subflow_usrreqs;
39236c6e
A
188static struct ip6protosw mptcp_subflow_protosw6;
189static struct pr_usrreqs mptcp_subflow_usrreqs6;
39236c6e 190
0a7de745 191static uint8_t mptcp_create_subflows_scheduled;
5ba3f43e 192
3e170ce0 193typedef struct mptcp_subflow_event_entry {
f427ee49
A
194 long sofilt_hint_mask;
195 ev_ret_t (*sofilt_hint_ev_hdlr)(
0a7de745
A
196 struct mptses *mpte,
197 struct mptsub *mpts,
f427ee49
A
198 long *p_mpsofilt_hint,
199 long event);
3e170ce0
A
200} mptsub_ev_entry_t;
201
cb323159
A
202/* Using Symptoms Advisory to detect poor WiFi or poor Cell */
203static kern_ctl_ref mptcp_kern_ctrl_ref = NULL;
204static uint32_t mptcp_kern_skt_inuse = 0;
205static uint32_t mptcp_kern_skt_unit;
206static symptoms_advisory_t mptcp_advisory;
207
208uint32_t mptcp_cellicon_refcount = 0;
5ba3f43e 209
490019cf
A
210/*
211 * XXX The order of the event handlers below is really
5ba3f43e 212 * really important. Think twice before changing it.
490019cf 213 */
0a7de745 214static mptsub_ev_entry_t mpsub_ev_entry_tbl[] = {
cb323159
A
215 {
216 .sofilt_hint_mask = SO_FILT_HINT_MP_SUB_ERROR,
217 .sofilt_hint_ev_hdlr = mptcp_subflow_mpsuberror_ev,
218 },
3e170ce0
A
219 {
220 .sofilt_hint_mask = SO_FILT_HINT_MPCANTRCVMORE,
0a7de745 221 .sofilt_hint_ev_hdlr = mptcp_subflow_mpcantrcvmore_ev,
3e170ce0
A
222 },
223 {
224 .sofilt_hint_mask = SO_FILT_HINT_MPFAILOVER,
225 .sofilt_hint_ev_hdlr = mptcp_subflow_failover_ev,
226 },
227 {
228 .sofilt_hint_mask = SO_FILT_HINT_CONNRESET,
5ba3f43e 229 .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
3e170ce0
A
230 },
231 {
232 .sofilt_hint_mask = SO_FILT_HINT_MUSTRST,
233 .sofilt_hint_ev_hdlr = mptcp_subflow_mustrst_ev,
234 },
235 {
236 .sofilt_hint_mask = SO_FILT_HINT_CANTRCVMORE,
5ba3f43e 237 .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
3e170ce0
A
238 },
239 {
240 .sofilt_hint_mask = SO_FILT_HINT_TIMEOUT,
5ba3f43e 241 .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
3e170ce0
A
242 },
243 {
244 .sofilt_hint_mask = SO_FILT_HINT_NOSRCADDR,
245 .sofilt_hint_ev_hdlr = mptcp_subflow_nosrcaddr_ev,
246 },
247 {
248 .sofilt_hint_mask = SO_FILT_HINT_IFDENIED,
249 .sofilt_hint_ev_hdlr = mptcp_subflow_ifdenied_ev,
250 },
3e170ce0
A
251 {
252 .sofilt_hint_mask = SO_FILT_HINT_CONNECTED,
253 .sofilt_hint_ev_hdlr = mptcp_subflow_connected_ev,
254 },
255 {
256 .sofilt_hint_mask = SO_FILT_HINT_MPSTATUS,
257 .sofilt_hint_ev_hdlr = mptcp_subflow_mpstatus_ev,
258 },
3e170ce0
A
259 {
260 .sofilt_hint_mask = SO_FILT_HINT_DISCONNECTED,
261 .sofilt_hint_ev_hdlr = mptcp_subflow_disconnected_ev,
262 },
263 {
5ba3f43e
A
264 .sofilt_hint_mask = SO_FILT_HINT_ADAPTIVE_RTIMO,
265 .sofilt_hint_ev_hdlr = mptcp_subflow_adaptive_rtimo_ev,
266 },
267 {
268 .sofilt_hint_mask = SO_FILT_HINT_ADAPTIVE_WTIMO,
269 .sofilt_hint_ev_hdlr = mptcp_subflow_adaptive_wtimo_ev,
270 },
3e170ce0
A
271};
272
a39ff7e2
A
273os_log_t mptcp_log_handle;
274
39236c6e
A
275/*
276 * Protocol pr_init callback.
277 */
278void
279mptcp_init(struct protosw *pp, struct domain *dp)
280{
281#pragma unused(dp)
282 static int mptcp_initialized = 0;
283 struct protosw *prp;
39236c6e 284 struct ip6protosw *prp6;
39236c6e 285
0a7de745 286 VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
39236c6e
A
287
288 /* do this only once */
0a7de745 289 if (mptcp_initialized) {
39236c6e 290 return;
0a7de745 291 }
39236c6e
A
292 mptcp_initialized = 1;
293
cb323159
A
294 mptcp_advisory.sa_wifi_status = SYMPTOMS_ADVISORY_WIFI_OK;
295
39236c6e
A
296 /*
297 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
298 * we must be able to find IPPROTO_TCP entries for both.
299 */
300 prp = pffindproto_locked(PF_INET, IPPROTO_TCP, SOCK_STREAM);
301 VERIFY(prp != NULL);
0a7de745 302 bcopy(prp, &mptcp_subflow_protosw, sizeof(*prp));
39236c6e 303 bcopy(prp->pr_usrreqs, &mptcp_subflow_usrreqs,
0a7de745 304 sizeof(mptcp_subflow_usrreqs));
39236c6e
A
305 mptcp_subflow_protosw.pr_entry.tqe_next = NULL;
306 mptcp_subflow_protosw.pr_entry.tqe_prev = NULL;
307 mptcp_subflow_protosw.pr_usrreqs = &mptcp_subflow_usrreqs;
308 mptcp_subflow_usrreqs.pru_soreceive = mptcp_subflow_soreceive;
5ba3f43e 309 mptcp_subflow_usrreqs.pru_sosend = mptcp_subflow_sosend;
39236c6e
A
310 mptcp_subflow_usrreqs.pru_rcvoob = pru_rcvoob_notsupp;
311 /*
312 * Socket filters shouldn't attach/detach to/from this protosw
313 * since pr_protosw is to be used instead, which points to the
314 * real protocol; if they do, it is a bug and we should panic.
315 */
316 mptcp_subflow_protosw.pr_filter_head.tqh_first =
317 (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
318 mptcp_subflow_protosw.pr_filter_head.tqh_last =
319 (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
320
39236c6e
A
321 prp6 = (struct ip6protosw *)pffindproto_locked(PF_INET6,
322 IPPROTO_TCP, SOCK_STREAM);
323 VERIFY(prp6 != NULL);
0a7de745 324 bcopy(prp6, &mptcp_subflow_protosw6, sizeof(*prp6));
39236c6e 325 bcopy(prp6->pr_usrreqs, &mptcp_subflow_usrreqs6,
0a7de745 326 sizeof(mptcp_subflow_usrreqs6));
39236c6e
A
327 mptcp_subflow_protosw6.pr_entry.tqe_next = NULL;
328 mptcp_subflow_protosw6.pr_entry.tqe_prev = NULL;
329 mptcp_subflow_protosw6.pr_usrreqs = &mptcp_subflow_usrreqs6;
330 mptcp_subflow_usrreqs6.pru_soreceive = mptcp_subflow_soreceive;
5ba3f43e 331 mptcp_subflow_usrreqs6.pru_sosend = mptcp_subflow_sosend;
39236c6e
A
332 mptcp_subflow_usrreqs6.pru_rcvoob = pru_rcvoob_notsupp;
333 /*
334 * Socket filters shouldn't attach/detach to/from this protosw
335 * since pr_protosw is to be used instead, which points to the
336 * real protocol; if they do, it is a bug and we should panic.
337 */
338 mptcp_subflow_protosw6.pr_filter_head.tqh_first =
339 (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
340 mptcp_subflow_protosw6.pr_filter_head.tqh_last =
341 (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
39236c6e 342
0a7de745 343 bzero(&mtcbinfo, sizeof(mtcbinfo));
39236c6e 344 TAILQ_INIT(&mtcbinfo.mppi_pcbs);
0a7de745 345 mtcbinfo.mppi_size = sizeof(struct mpp_mtp);
f427ee49
A
346 mtcbinfo.mppi_zone = zone_create("mptc", mtcbinfo.mppi_size,
347 ZC_NONE);
39236c6e
A
348
349 mtcbinfo.mppi_lock_grp_attr = lck_grp_attr_alloc_init();
350 mtcbinfo.mppi_lock_grp = lck_grp_alloc_init("mppcb",
351 mtcbinfo.mppi_lock_grp_attr);
352 mtcbinfo.mppi_lock_attr = lck_attr_alloc_init();
353 lck_mtx_init(&mtcbinfo.mppi_lock, mtcbinfo.mppi_lock_grp,
354 mtcbinfo.mppi_lock_attr);
39236c6e 355
3e170ce0 356 mtcbinfo.mppi_gc = mptcp_gc;
39236c6e
A
357 mtcbinfo.mppi_timer = mptcp_timer;
358
359 /* attach to MP domain for garbage collection to take place */
360 mp_pcbinfo_attach(&mtcbinfo);
361
a39ff7e2 362 mptcp_log_handle = os_log_create("com.apple.xnu.net.mptcp", "mptcp");
5ba3f43e
A
363}
364
365int
f427ee49 366mptcpstats_get_index_by_ifindex(struct mptcp_itf_stats *stats, u_short ifindex, boolean_t create)
5ba3f43e 367{
5ba3f43e
A
368 int i, index = -1;
369
5ba3f43e 370 for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
cb323159 371 if (create && stats[i].ifindex == IFSCOPE_NONE) {
0a7de745 372 if (index < 0) {
5ba3f43e 373 index = i;
0a7de745 374 }
5ba3f43e
A
375 continue;
376 }
377
cb323159 378 if (stats[i].ifindex == ifindex) {
5ba3f43e 379 index = i;
0a7de745 380 return index;
5ba3f43e
A
381 }
382 }
383
384 if (index != -1) {
cb323159
A
385 stats[index].ifindex = ifindex;
386 }
387
388 return index;
389}
390
391static int
392mptcpstats_get_index(struct mptcp_itf_stats *stats, const struct mptsub *mpts)
393{
394 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
395 int index;
396
397 if (ifp == NULL) {
398 os_log_error(mptcp_log_handle, "%s - %lx: no ifp on subflow, state %u flags %#x\n",
399 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte),
400 sototcpcb(mpts->mpts_socket)->t_state, mpts->mpts_flags);
401 return -1;
402 }
403
404 index = mptcpstats_get_index_by_ifindex(stats, ifp->if_index, true);
405
406 if (index != -1) {
0a7de745 407 if (stats[index].is_expensive == 0) {
5ba3f43e 408 stats[index].is_expensive = IFNET_IS_CELLULAR(ifp);
0a7de745 409 }
5ba3f43e
A
410 }
411
0a7de745 412 return index;
5ba3f43e
A
413}
414
415void
416mptcpstats_inc_switch(struct mptses *mpte, const struct mptsub *mpts)
417{
418 int index;
419
420 tcpstat.tcps_mp_switches++;
421 mpte->mpte_subflow_switches++;
422
cb323159 423 index = mptcpstats_get_index(mpte->mpte_itfstats, mpts);
5ba3f43e 424
0a7de745 425 if (index != -1) {
5ba3f43e 426 mpte->mpte_itfstats[index].switches++;
0a7de745 427 }
5ba3f43e
A
428}
429
430/*
431 * Flushes all recorded socket options from an MP socket.
432 */
433static void
434mptcp_flush_sopts(struct mptses *mpte)
435{
436 struct mptopt *mpo, *tmpo;
437
438 TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) {
439 mptcp_sopt_remove(mpte, mpo);
440 mptcp_sopt_free(mpo);
441 }
442 VERIFY(TAILQ_EMPTY(&mpte->mpte_sopts));
39236c6e
A
443}
444
445/*
446 * Create an MPTCP session, called as a result of opening a MPTCP socket.
447 */
5ba3f43e 448int
cb323159 449mptcp_session_create(struct mppcb *mpp)
39236c6e
A
450{
451 struct mppcbinfo *mppi;
452 struct mptses *mpte;
453 struct mptcb *mp_tp;
39236c6e
A
454
455 VERIFY(mpp != NULL);
456 mppi = mpp->mpp_pcbinfo;
457 VERIFY(mppi != NULL);
458
3e170ce0
A
459 __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
460 __IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb);
39236c6e
A
461
462 /* MPTCP Multipath PCB Extension */
0a7de745 463 bzero(mpte, sizeof(*mpte));
39236c6e
A
464 VERIFY(mpp->mpp_pcbe == NULL);
465 mpp->mpp_pcbe = mpte;
466 mpte->mpte_mppcb = mpp;
467 mpte->mpte_mptcb = mp_tp;
468
469 TAILQ_INIT(&mpte->mpte_sopts);
470 TAILQ_INIT(&mpte->mpte_subflows);
3e170ce0
A
471 mpte->mpte_associd = SAE_ASSOCID_ANY;
472 mpte->mpte_connid_last = SAE_CONNID_ANY;
39236c6e 473
cb323159
A
474 mptcp_init_urgency_timer(mpte);
475
5ba3f43e
A
476 mpte->mpte_itfinfo = &mpte->_mpte_itfinfo[0];
477 mpte->mpte_itfinfo_size = MPTE_ITFINFO_SIZE;
39236c6e 478
f427ee49
A
479 if (mptcp_alternate_port > 0 && mptcp_alternate_port < UINT16_MAX) {
480 mpte->mpte_alternate_port = htons((uint16_t)mptcp_alternate_port);
0a7de745 481 }
a39ff7e2 482
cb323159
A
483 mpte->mpte_last_cellicon_set = tcp_now;
484
39236c6e 485 /* MPTCP Protocol Control Block */
0a7de745 486 bzero(mp_tp, sizeof(*mp_tp));
39236c6e 487 mp_tp->mpt_mpte = mpte;
3e170ce0 488 mp_tp->mpt_state = MPTCPS_CLOSED;
39236c6e 489
5ba3f43e
A
490 DTRACE_MPTCP1(session__create, struct mppcb *, mpp);
491
0a7de745 492 return 0;
5ba3f43e
A
493}
494
cb323159
A
495struct sockaddr *
496mptcp_get_session_dst(struct mptses *mpte, boolean_t ipv6, boolean_t ipv4)
497{
498 if (!(mpte->mpte_flags & MPTE_UNICAST_IP)) {
499 return &mpte->mpte_dst;
500 }
501
502 if (ipv6 && mpte->mpte_dst_unicast_v6.sin6_family == AF_INET6) {
503 return (struct sockaddr *)&mpte->mpte_dst_unicast_v6;
504 }
505
506 if (ipv4 && mpte->mpte_dst_unicast_v4.sin_family == AF_INET) {
507 return (struct sockaddr *)&mpte->mpte_dst_unicast_v4;
508 }
509
510 /* The interface has neither IPv4 nor IPv6 routes. Give our best guess,
511 * meaning we prefer IPv6 over IPv4.
512 */
513 if (mpte->mpte_dst_unicast_v6.sin6_family == AF_INET6) {
514 return (struct sockaddr *)&mpte->mpte_dst_unicast_v6;
515 }
516
517 if (mpte->mpte_dst_unicast_v4.sin_family == AF_INET) {
518 return (struct sockaddr *)&mpte->mpte_dst_unicast_v4;
519 }
520
521 /* We don't yet have a unicast IP */
522 return NULL;
523}
524
5ba3f43e
A
525static void
526mptcpstats_get_bytes(struct mptses *mpte, boolean_t initial_cell,
0a7de745 527 uint64_t *cellbytes, uint64_t *allbytes)
5ba3f43e
A
528{
529 int64_t mycellbytes = 0;
530 uint64_t myallbytes = 0;
531 int i;
532
533 for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
534 if (mpte->mpte_itfstats[i].is_expensive) {
535 mycellbytes += mpte->mpte_itfstats[i].mpis_txbytes;
536 mycellbytes += mpte->mpte_itfstats[i].mpis_rxbytes;
537 }
538
539 myallbytes += mpte->mpte_itfstats[i].mpis_txbytes;
540 myallbytes += mpte->mpte_itfstats[i].mpis_rxbytes;
541 }
542
543 if (initial_cell) {
544 mycellbytes -= mpte->mpte_init_txbytes;
cb323159 545 mycellbytes -= mpte->mpte_init_rxbytes;
5ba3f43e
A
546 }
547
548 if (mycellbytes < 0) {
cb323159
A
549 os_log_error(mptcp_log_handle, "%s - %lx: cellbytes is %lld\n",
550 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mycellbytes);
5ba3f43e
A
551 *cellbytes = 0;
552 *allbytes = 0;
553 } else {
554 *cellbytes = mycellbytes;
555 *allbytes = myallbytes;
556 }
557}
558
559static void
560mptcpstats_session_wrapup(struct mptses *mpte)
561{
562 boolean_t cell = mpte->mpte_initial_cell;
563
564 switch (mpte->mpte_svctype) {
565 case MPTCP_SVCTYPE_HANDOVER:
566 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
567 tcpstat.tcps_mptcp_fp_handover_attempt++;
568
569 if (cell && mpte->mpte_handshake_success) {
570 tcpstat.tcps_mptcp_fp_handover_success_cell++;
571
0a7de745 572 if (mpte->mpte_used_wifi) {
5ba3f43e 573 tcpstat.tcps_mptcp_handover_wifi_from_cell++;
0a7de745 574 }
5ba3f43e
A
575 } else if (mpte->mpte_handshake_success) {
576 tcpstat.tcps_mptcp_fp_handover_success_wifi++;
577
0a7de745 578 if (mpte->mpte_used_cell) {
5ba3f43e 579 tcpstat.tcps_mptcp_handover_cell_from_wifi++;
0a7de745 580 }
5ba3f43e
A
581 }
582 } else {
583 tcpstat.tcps_mptcp_handover_attempt++;
584
585 if (cell && mpte->mpte_handshake_success) {
586 tcpstat.tcps_mptcp_handover_success_cell++;
587
0a7de745 588 if (mpte->mpte_used_wifi) {
5ba3f43e 589 tcpstat.tcps_mptcp_handover_wifi_from_cell++;
0a7de745 590 }
5ba3f43e
A
591 } else if (mpte->mpte_handshake_success) {
592 tcpstat.tcps_mptcp_handover_success_wifi++;
593
0a7de745 594 if (mpte->mpte_used_cell) {
5ba3f43e 595 tcpstat.tcps_mptcp_handover_cell_from_wifi++;
0a7de745 596 }
5ba3f43e
A
597 }
598 }
599
600 if (mpte->mpte_handshake_success) {
601 uint64_t cellbytes;
602 uint64_t allbytes;
603
604 mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
605
606 tcpstat.tcps_mptcp_handover_cell_bytes += cellbytes;
607 tcpstat.tcps_mptcp_handover_all_bytes += allbytes;
608 }
609 break;
610 case MPTCP_SVCTYPE_INTERACTIVE:
611 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
612 tcpstat.tcps_mptcp_fp_interactive_attempt++;
613
614 if (mpte->mpte_handshake_success) {
615 tcpstat.tcps_mptcp_fp_interactive_success++;
616
0a7de745 617 if (!cell && mpte->mpte_used_cell) {
5ba3f43e 618 tcpstat.tcps_mptcp_interactive_cell_from_wifi++;
0a7de745 619 }
5ba3f43e
A
620 }
621 } else {
622 tcpstat.tcps_mptcp_interactive_attempt++;
623
624 if (mpte->mpte_handshake_success) {
625 tcpstat.tcps_mptcp_interactive_success++;
626
0a7de745 627 if (!cell && mpte->mpte_used_cell) {
5ba3f43e 628 tcpstat.tcps_mptcp_interactive_cell_from_wifi++;
0a7de745 629 }
5ba3f43e
A
630 }
631 }
632
633 if (mpte->mpte_handshake_success) {
634 uint64_t cellbytes;
635 uint64_t allbytes;
636
637 mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
638
639 tcpstat.tcps_mptcp_interactive_cell_bytes += cellbytes;
640 tcpstat.tcps_mptcp_interactive_all_bytes += allbytes;
641 }
642 break;
643 case MPTCP_SVCTYPE_AGGREGATE:
644 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
645 tcpstat.tcps_mptcp_fp_aggregate_attempt++;
646
0a7de745 647 if (mpte->mpte_handshake_success) {
5ba3f43e 648 tcpstat.tcps_mptcp_fp_aggregate_success++;
0a7de745 649 }
5ba3f43e
A
650 } else {
651 tcpstat.tcps_mptcp_aggregate_attempt++;
652
653 if (mpte->mpte_handshake_success) {
654 tcpstat.tcps_mptcp_aggregate_success++;
655 }
656 }
657
658 if (mpte->mpte_handshake_success) {
659 uint64_t cellbytes;
660 uint64_t allbytes;
661
662 mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
663
664 tcpstat.tcps_mptcp_aggregate_cell_bytes += cellbytes;
665 tcpstat.tcps_mptcp_aggregate_all_bytes += allbytes;
666 }
667 break;
668 }
669
0a7de745 670 if (cell && mpte->mpte_handshake_success && mpte->mpte_used_wifi) {
5ba3f43e 671 tcpstat.tcps_mptcp_back_to_wifi++;
0a7de745 672 }
d9a64523 673
0a7de745 674 if (mpte->mpte_triggered_cell) {
d9a64523 675 tcpstat.tcps_mptcp_triggered_cell++;
0a7de745 676 }
39236c6e
A
677}
678
679/*
680 * Destroy an MPTCP session.
681 */
682static void
5ba3f43e 683mptcp_session_destroy(struct mptses *mpte)
39236c6e 684{
cb323159 685 struct mptcb *mp_tp = mpte->mpte_mptcb;
39236c6e 686
39236c6e 687 VERIFY(mp_tp != NULL);
cb323159 688 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows) && mpte->mpte_numflows == 0);
39236c6e 689
5ba3f43e 690 mptcpstats_session_wrapup(mpte);
cb323159 691 mptcp_unset_cellicon(mpte, NULL, mpte->mpte_cellicon_increments);
39236c6e 692 mptcp_flush_sopts(mpte);
39236c6e 693
0a7de745 694 if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) {
5ba3f43e 695 _FREE(mpte->mpte_itfinfo, M_TEMP);
0a7de745 696 }
5ba3f43e
A
697 mpte->mpte_itfinfo = NULL;
698
699 m_freem_list(mpte->mpte_reinjectq);
39236c6e 700
cb323159
A
701 os_log(mptcp_log_handle, "%s - %lx: Destroying session\n",
702 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
39236c6e
A
703}
704
cb323159 705boolean_t
5ba3f43e 706mptcp_ok_to_create_subflows(struct mptcb *mp_tp)
39236c6e 707{
0a7de745
A
708 return mp_tp->mpt_state >= MPTCPS_ESTABLISHED &&
709 mp_tp->mpt_state < MPTCPS_FIN_WAIT_1 &&
710 !(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP);
5ba3f43e 711}
39236c6e 712
5ba3f43e 713static int
cb323159
A
714mptcp_synthesize_nat64(struct in6_addr *addr, uint32_t len,
715 const struct in_addr *addrv4)
5ba3f43e
A
716{
717 static const struct in6_addr well_known_prefix = {
718 .__u6_addr.__u6_addr8 = {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
0a7de745
A
719 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
720 0x00, 0x00, 0x00, 0x00},
5ba3f43e 721 };
cb323159 722 const char *ptrv4 = (const char *)addrv4;
5ba3f43e
A
723 char *ptr = (char *)addr;
724
d9a64523
A
725 if (IN_ZERONET(ntohl(addrv4->s_addr)) || // 0.0.0.0/8 Source hosts on local network
726 IN_LOOPBACK(ntohl(addrv4->s_addr)) || // 127.0.0.0/8 Loopback
727 IN_LINKLOCAL(ntohl(addrv4->s_addr)) || // 169.254.0.0/16 Link Local
728 IN_DS_LITE(ntohl(addrv4->s_addr)) || // 192.0.0.0/29 DS-Lite
729 IN_6TO4_RELAY_ANYCAST(ntohl(addrv4->s_addr)) || // 192.88.99.0/24 6to4 Relay Anycast
730 IN_MULTICAST(ntohl(addrv4->s_addr)) || // 224.0.0.0/4 Multicast
5ba3f43e 731 INADDR_BROADCAST == addrv4->s_addr) { // 255.255.255.255/32 Limited Broadcast
0a7de745 732 return -1;
39236c6e
A
733 }
734
5ba3f43e
A
735 /* Check for the well-known prefix */
736 if (len == NAT64_PREFIX_LEN_96 &&
737 IN6_ARE_ADDR_EQUAL(addr, &well_known_prefix)) {
d9a64523 738 if (IN_PRIVATE(ntohl(addrv4->s_addr)) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use
0a7de745
A
739 IN_SHARED_ADDRESS_SPACE(ntohl(addrv4->s_addr))) { // 100.64.0.0/10 Shared Address Space
740 return -1;
741 }
5ba3f43e 742 }
39236c6e 743
5ba3f43e 744 switch (len) {
0a7de745
A
745 case NAT64_PREFIX_LEN_96:
746 memcpy(ptr + 12, ptrv4, 4);
747 break;
748 case NAT64_PREFIX_LEN_64:
749 memcpy(ptr + 9, ptrv4, 4);
750 break;
751 case NAT64_PREFIX_LEN_56:
752 memcpy(ptr + 7, ptrv4, 1);
753 memcpy(ptr + 9, ptrv4 + 1, 3);
754 break;
755 case NAT64_PREFIX_LEN_48:
756 memcpy(ptr + 6, ptrv4, 2);
757 memcpy(ptr + 9, ptrv4 + 2, 2);
758 break;
759 case NAT64_PREFIX_LEN_40:
760 memcpy(ptr + 5, ptrv4, 3);
761 memcpy(ptr + 9, ptrv4 + 3, 1);
762 break;
763 case NAT64_PREFIX_LEN_32:
764 memcpy(ptr + 4, ptrv4, 4);
765 break;
766 default:
767 panic("NAT64-prefix len is wrong: %u\n", len);
5ba3f43e 768 }
39236c6e 769
0a7de745 770 return 0;
39236c6e
A
771}
772
d9a64523
A
773static void
774mptcp_trigger_cell_bringup(struct mptses *mpte)
775{
776 struct socket *mp_so = mptetoso(mpte);
777
778 if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
779 uuid_string_t uuidstr;
780 int err;
781
cb323159 782 socket_unlock(mp_so, 0);
d9a64523 783 err = necp_client_assert_bb_radio_manager(mpsotomppcb(mp_so)->necp_client_uuid,
0a7de745 784 TRUE);
cb323159 785 socket_lock(mp_so, 0);
d9a64523 786
0a7de745 787 if (err == 0) {
d9a64523 788 mpte->mpte_triggered_cell = 1;
0a7de745 789 }
d9a64523
A
790
791 uuid_unparse_upper(mpsotomppcb(mp_so)->necp_client_uuid, uuidstr);
cb323159
A
792 os_log_info(mptcp_log_handle, "%s - %lx: asked irat to bringup cell for uuid %s, err %d\n",
793 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), uuidstr, err);
d9a64523 794 } else {
cb323159
A
795 os_log_info(mptcp_log_handle, "%s - %lx: UUID is already null\n",
796 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
d9a64523
A
797 }
798}
799
cb323159
A
800static boolean_t
801mptcp_subflow_disconnecting(struct mptsub *mpts)
802{
cb323159
A
803 if (mpts->mpts_socket->so_state & SS_ISDISCONNECTED) {
804 return true;
805 }
806
807 if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED | MPTSF_CLOSE_REQD)) {
808 return true;
809 }
810
811 if (sototcpcb(mpts->mpts_socket)->t_state == TCPS_CLOSED) {
812 return true;
813 }
814
815 return false;
816}
d9a64523 817
f427ee49
A
818/*
819 * In Handover mode, only create cell subflow if
820 * - Symptoms marked WiFi as weak:
821 * Here, if we are sending data, then we can check the RTO-state. That is a
822 * stronger signal of WiFi quality than the Symptoms indicator.
823 * If however we are not sending any data, the only thing we can do is guess
824 * and thus bring up Cell.
825 *
826 * - Symptoms marked WiFi as unknown:
827 * In this state we don't know what the situation is and thus remain
828 * conservative, only bringing up cell if there are retransmissions going on.
829 */
830static boolean_t
831mptcp_handover_use_cellular(struct mptses *mpte, struct tcpcb *tp)
832{
833 int unusable_state = mptcp_is_wifi_unusable_for_session(mpte);
834
835 if (unusable_state == 0) {
836 /* WiFi is good - don't use cell */
837 return false;
838 }
839
840 if (unusable_state == -1) {
841 /*
842 * We are in unknown state, only use Cell if we have confirmed
843 * that WiFi is bad.
844 */
845 if (mptetoso(mpte)->so_snd.sb_cc != 0 && tp->t_rxtshift >= mptcp_fail_thresh * 2) {
846 return true;
847 } else {
848 return false;
849 }
850 }
851
852 if (unusable_state == 1) {
853 /*
854 * WiFi is confirmed to be bad from Symptoms-Framework.
855 * If we are sending data, check the RTOs.
856 * Otherwise, be pessimistic and use Cell.
857 */
858 if (mptetoso(mpte)->so_snd.sb_cc != 0) {
859 if (tp->t_rxtshift >= mptcp_fail_thresh * 2) {
860 return true;
861 } else {
862 return false;
863 }
864 } else {
865 return true;
866 }
867 }
868
869 return false;
870}
871
39236c6e 872void
5ba3f43e 873mptcp_check_subflows_and_add(struct mptses *mpte)
39236c6e 874{
5ba3f43e 875 struct mptcb *mp_tp = mpte->mpte_mptcb;
d9a64523
A
876 boolean_t cellular_viable = FALSE;
877 boolean_t want_cellular = TRUE;
5ba3f43e 878 uint32_t i;
39236c6e 879
0a7de745 880 if (!mptcp_ok_to_create_subflows(mp_tp)) {
cb323159
A
881 os_log_debug(mptcp_log_handle, "%s - %lx: not a good time for subflows, state %u flags %#x",
882 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
883 return;
884 }
885
886 if (mptcp_get_session_dst(mpte, false, false) == NULL) {
5ba3f43e 887 return;
0a7de745 888 }
39236c6e 889
5ba3f43e 890 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
cb323159 891 boolean_t need_to_ask_symptoms = FALSE, found = FALSE;
5ba3f43e 892 struct mpt_itf_info *info;
cb323159
A
893 struct sockaddr_in6 nat64pre;
894 struct sockaddr *dst;
5ba3f43e 895 struct mptsub *mpts;
d9a64523 896 struct ifnet *ifp;
5ba3f43e 897 uint32_t ifindex;
39236c6e 898
5ba3f43e 899 info = &mpte->mpte_itfinfo[i];
39236c6e 900
cb323159
A
901 ifindex = info->ifindex;
902 if (ifindex == IFSCOPE_NONE) {
5ba3f43e 903 continue;
0a7de745 904 }
39236c6e 905
cb323159
A
906 os_log(mptcp_log_handle, "%s - %lx: itf %u no support %u hasv4 %u has v6 %u hasnat64 %u\n",
907 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), info->ifindex, info->no_mptcp_support,
908 info->has_v4_conn, info->has_v6_conn, info->has_nat64_conn);
909
910 if (info->no_mptcp_support) {
5ba3f43e 911 continue;
0a7de745 912 }
39236c6e 913
d9a64523
A
914 ifnet_head_lock_shared();
915 ifp = ifindex2ifnet[ifindex];
916 ifnet_head_done();
917
0a7de745 918 if (ifp == NULL) {
d9a64523 919 continue;
0a7de745 920 }
d9a64523 921
0a7de745 922 if (IFNET_IS_CELLULAR(ifp)) {
d9a64523 923 cellular_viable = TRUE;
0a7de745 924 }
d9a64523 925
5ba3f43e 926 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
d9a64523 927 const struct ifnet *subifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
cb323159 928 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
39236c6e 929
0a7de745 930 if (subifp == NULL) {
5ba3f43e 931 continue;
0a7de745 932 }
39236c6e 933
cb323159
A
934 /*
935 * If there is at least one functioning subflow on WiFi
936 * and we are checking for the cell interface, then
937 * we always need to ask symptoms for permission as
938 * cell is triggered even if WiFi is available.
939 */
940 if (!IFNET_IS_CELLULAR(subifp) &&
941 !mptcp_subflow_disconnecting(mpts) &&
942 IFNET_IS_CELLULAR(ifp)) {
943 need_to_ask_symptoms = TRUE;
944 }
945
f427ee49
A
946 if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
947 os_log(mptcp_log_handle,
948 "%s - %lx: handover: cell %u wifi-state %d flags %#x rxt %u first-party %u sb_cc %u ifindex %u this %u rtt %u rttvar %u rto %u\n",
cb323159 949 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
f427ee49 950 IFNET_IS_CELLULAR(subifp),
cb323159 951 mptcp_is_wifi_unusable_for_session(mpte),
f427ee49 952 mpts->mpts_flags,
cb323159 953 tp->t_rxtshift,
0a7de745
A
954 !!(mpte->mpte_flags & MPTE_FIRSTPARTY),
955 mptetoso(mpte)->so_snd.sb_cc,
cb323159
A
956 ifindex, subifp->if_index,
957 tp->t_srtt >> TCP_RTT_SHIFT,
958 tp->t_rttvar >> TCP_RTTVAR_SHIFT,
959 tp->t_rxtcur);
d9a64523 960
f427ee49
A
961 if (!IFNET_IS_CELLULAR(subifp) &&
962 !mptcp_subflow_disconnecting(mpts) &&
963 (mpts->mpts_flags & MPTSF_CONNECTED) &&
964 !mptcp_handover_use_cellular(mpte, tp)) {
965 found = TRUE;
966
967 /* We found a proper subflow on WiFi - no need for cell */
968 want_cellular = FALSE;
969 break;
970 }
cb323159
A
971 } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) {
972 uint64_t time_now = mach_continuous_time();
973
974 os_log(mptcp_log_handle,
975 "%s - %lx: target-based: %llu now %llu unusable? %d cell %u sostat %#x mpts_flags %#x tcp-state %u\n",
976 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_time_target,
977 time_now, mptcp_is_wifi_unusable_for_session(mpte),
978 IFNET_IS_CELLULAR(subifp), mpts->mpts_socket->so_state,
979 mpts->mpts_flags, sototcpcb(mpts->mpts_socket)->t_state);
980
981 if (!IFNET_IS_CELLULAR(subifp) &&
982 !mptcp_subflow_disconnecting(mpts) &&
983 (mpte->mpte_time_target == 0 ||
984 (int64_t)(mpte->mpte_time_target - time_now) > 0 ||
985 !mptcp_is_wifi_unusable_for_session(mpte))) {
986 found = TRUE;
987
988 want_cellular = FALSE;
989 break;
990 }
d9a64523
A
991 }
992
993 if (subifp->if_index == ifindex &&
cb323159 994 !mptcp_subflow_disconnecting(mpts)) {
d9a64523
A
995 /*
996 * We found a subflow on this interface.
997 * No need to create a new one.
998 */
cb323159 999 found = TRUE;
5ba3f43e
A
1000 break;
1001 }
1002 }
1003
cb323159
A
1004 if (found) {
1005 continue;
1006 }
1007
1008 if (need_to_ask_symptoms &&
1009 !(mpte->mpte_flags & MPTE_FIRSTPARTY) &&
5ba3f43e
A
1010 !(mpte->mpte_flags & MPTE_ACCESS_GRANTED) &&
1011 mptcp_developer_mode == 0) {
1012 mptcp_ask_symptoms(mpte);
1013 return;
1014 }
1015
cb323159 1016 dst = mptcp_get_session_dst(mpte, info->has_v6_conn, info->has_v4_conn);
5ba3f43e 1017
cb323159
A
1018 if (dst->sa_family == AF_INET &&
1019 !info->has_v4_conn && info->has_nat64_conn) {
1020 struct ipv6_prefix nat64prefixes[NAT64_MAX_NUM_PREFIXES];
1021 int error, j;
5ba3f43e 1022
cb323159 1023 bzero(&nat64pre, sizeof(struct sockaddr_in6));
5ba3f43e 1024
cb323159
A
1025 error = ifnet_get_nat64prefix(ifp, nat64prefixes);
1026 if (error) {
1027 os_log_error(mptcp_log_handle, "%s - %lx: no NAT64-prefix on itf %s, error %d\n",
1028 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ifp->if_name, error);
1029 continue;
1030 }
5ba3f43e 1031
cb323159
A
1032 for (j = 0; j < NAT64_MAX_NUM_PREFIXES; j++) {
1033 if (nat64prefixes[j].prefix_len != 0) {
1034 break;
5ba3f43e 1035 }
5ba3f43e
A
1036 }
1037
cb323159 1038 VERIFY(j < NAT64_MAX_NUM_PREFIXES);
a39ff7e2 1039
cb323159
A
1040 error = mptcp_synthesize_nat64(&nat64prefixes[j].ipv6_prefix,
1041 nat64prefixes[j].prefix_len,
1042 &((struct sockaddr_in *)(void *)dst)->sin_addr);
1043 if (error != 0) {
f427ee49 1044 os_log_error(mptcp_log_handle, "%s - %lx: cannot synthesize this addr\n",
cb323159 1045 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
a39ff7e2 1046 continue;
0a7de745 1047 }
a39ff7e2 1048
cb323159
A
1049 memcpy(&nat64pre.sin6_addr,
1050 &nat64prefixes[j].ipv6_prefix,
1051 sizeof(nat64pre.sin6_addr));
1052 nat64pre.sin6_len = sizeof(struct sockaddr_in6);
1053 nat64pre.sin6_family = AF_INET6;
1054 nat64pre.sin6_port = ((struct sockaddr_in *)(void *)dst)->sin_port;
1055 nat64pre.sin6_flowinfo = 0;
1056 nat64pre.sin6_scope_id = 0;
1057
1058 dst = (struct sockaddr *)&nat64pre;
5ba3f43e 1059 }
cb323159
A
1060
1061 /* Initial subflow started on a NAT64'd address? */
1062 if (!(mpte->mpte_flags & MPTE_UNICAST_IP) &&
1063 mpte->mpte_dst.sa_family == AF_INET6 &&
1064 mpte->mpte_dst_v4_nat64.sin_family == AF_INET) {
1065 dst = (struct sockaddr *)&mpte->mpte_dst_v4_nat64;
1066 }
1067
1068 if (dst->sa_family == AF_INET && !info->has_v4_conn) {
1069 continue;
1070 }
1071 if (dst->sa_family == AF_INET6 && !info->has_v6_conn) {
1072 continue;
1073 }
1074
1075 mptcp_subflow_add(mpte, NULL, dst, ifindex, NULL);
5ba3f43e 1076 }
d9a64523
A
1077
1078 if (!cellular_viable && want_cellular) {
1079 /* Trigger Cell Bringup */
1080 mptcp_trigger_cell_bringup(mpte);
1081 }
5ba3f43e
A
1082}
1083
5ba3f43e 1084static void
cb323159 1085mptcp_remove_cell_subflows(struct mptses *mpte)
5ba3f43e
A
1086{
1087 struct mptsub *mpts, *tmpts;
cb323159
A
1088 boolean_t found = false;
1089
1090 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1091 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
5ba3f43e 1092
cb323159
A
1093 if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) {
1094 continue;
1095 }
1096
1097 /* We have a functioning subflow on WiFi. No need for cell! */
1098 if (mpts->mpts_flags & MPTSF_CONNECTED &&
1099 !mptcp_subflow_disconnecting(mpts)) {
1100 found = true;
1101 }
1102 }
1103
1104 /* Didn't found functional sub on WiFi - stay on cell */
1105 if (!found) {
5ba3f43e 1106 return;
0a7de745 1107 }
5ba3f43e 1108
cb323159
A
1109 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
1110 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
1111
1112 /* Only remove cellular subflows */
1113 if (ifp == NULL || !IFNET_IS_CELLULAR(ifp)) {
1114 continue;
1115 }
1116
1117 os_log(mptcp_log_handle, "%s - %lx: removing cell subflow\n",
1118 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
1119
1120 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1121 }
1122
1123 return;
1124}
1125
1126/* Returns true if it removed a subflow on cell */
1127static void
1128mptcp_handover_subflows_remove(struct mptses *mpte)
1129{
1130 int wifi_unusable = mptcp_is_wifi_unusable_for_session(mpte);
1131 boolean_t found_working_subflow = false;
1132 struct mptsub *mpts;
1133
5ba3f43e
A
1134 /*
1135 * Look for a subflow that is on a non-cellular interface
1136 * and actually works (aka, no retransmission timeout).
1137 */
1138 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1139 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
1140 struct socket *so;
1141 struct tcpcb *tp;
1142
0a7de745 1143 if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) {
5ba3f43e 1144 continue;
0a7de745 1145 }
5ba3f43e
A
1146
1147 so = mpts->mpts_socket;
1148 tp = sototcpcb(so);
1149
1150 if (!(mpts->mpts_flags & MPTSF_CONNECTED) ||
0a7de745 1151 tp->t_state != TCPS_ESTABLISHED) {
5ba3f43e 1152 continue;
0a7de745 1153 }
5ba3f43e 1154
cb323159
A
1155 os_log_debug(mptcp_log_handle, "%s - %lx: rxt %u sb_cc %u unusable %d\n",
1156 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), tp->t_rxtshift, mptetoso(mpte)->so_snd.sb_cc, wifi_unusable);
1157
f427ee49 1158 if (!mptcp_handover_use_cellular(mpte, tp)) {
cb323159 1159 found_working_subflow = true;
f427ee49 1160 break;
0a7de745 1161 }
39236c6e
A
1162 }
1163
5ba3f43e
A
1164 /*
1165 * Couldn't find a working subflow, let's not remove those on a cellular
1166 * interface.
1167 */
0a7de745 1168 if (!found_working_subflow) {
5ba3f43e 1169 return;
0a7de745 1170 }
5ba3f43e 1171
cb323159
A
1172 mptcp_remove_cell_subflows(mpte);
1173}
5ba3f43e 1174
cb323159
A
1175static void
1176mptcp_targetbased_subflows_remove(struct mptses *mpte)
1177{
1178 uint64_t time_now = mach_continuous_time();
5ba3f43e 1179
cb323159
A
1180 if (mpte->mpte_time_target != 0 &&
1181 (int64_t)(mpte->mpte_time_target - time_now) <= 0 &&
1182 mptcp_is_wifi_unusable_for_session(mpte)) {
1183 /* WiFi is bad and we are below the target - don't remove any subflows */
1184 return;
5ba3f43e
A
1185 }
1186
cb323159
A
1187 mptcp_remove_cell_subflows(mpte);
1188}
1189
1190/*
1191 * Based on the MPTCP Service-type and the state of the subflows, we
1192 * will destroy subflows here.
1193 */
1194void
1195mptcp_check_subflows_and_remove(struct mptses *mpte)
1196{
1197 if (!mptcp_ok_to_create_subflows(mpte->mpte_mptcb)) {
1198 return;
1199 }
1200
1201 socket_lock_assert_owned(mptetoso(mpte));
1202
1203 if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
1204 mptcp_handover_subflows_remove(mpte);
1205 }
1206
1207 if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) {
1208 mptcp_targetbased_subflows_remove(mpte);
0a7de745 1209 }
5ba3f43e
A
1210}
1211
1212static void
1213mptcp_remove_subflows(struct mptses *mpte)
1214{
1215 struct mptsub *mpts, *tmpts;
1216
cb323159
A
1217 if (!mptcp_ok_to_create_subflows(mpte->mpte_mptcb)) {
1218 return;
1219 }
1220
5ba3f43e 1221 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
cb323159
A
1222 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
1223 boolean_t found = false;
1224 uint32_t ifindex;
1225 uint32_t i;
1226
5ba3f43e
A
1227 if (mpts->mpts_flags & MPTSF_CLOSE_REQD) {
1228 mpts->mpts_flags &= ~MPTSF_CLOSE_REQD;
1229
cb323159
A
1230 os_log(mptcp_log_handle, "%s - %lx: itf %u close_reqd last itf %d\n",
1231 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_ifscope,
1232 ifp ? ifp->if_index : -1);
1233 soevent(mpts->mpts_socket,
1234 SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR);
1235
1236 continue;
1237 }
1238
1239 if (ifp == NULL && mpts->mpts_ifscope == IFSCOPE_NONE) {
1240 continue;
1241 }
1242
1243 if (ifp) {
1244 ifindex = ifp->if_index;
1245 } else {
1246 ifindex = mpts->mpts_ifscope;
1247 }
1248
1249 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
1250 if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) {
1251 continue;
1252 }
1253
1254 if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
1255 if (mpts->mpts_dst.sa_family == AF_INET6 &&
1256 (mpte->mpte_itfinfo[i].has_v6_conn || mpte->mpte_itfinfo[i].has_nat64_conn)) {
1257 found = true;
1258 break;
1259 }
1260
1261 if (mpts->mpts_dst.sa_family == AF_INET &&
1262 mpte->mpte_itfinfo[i].has_v4_conn) {
1263 found = true;
1264 break;
1265 }
1266 }
1267 }
1268
1269 if (!found) {
1270 os_log(mptcp_log_handle, "%s - %lx: itf %u killing %#x\n",
1271 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1272 ifindex, mpts->mpts_flags);
1273
5ba3f43e 1274 soevent(mpts->mpts_socket,
0a7de745 1275 SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR);
5ba3f43e
A
1276 }
1277 }
1278}
1279
1280static void
1281mptcp_create_subflows(__unused void *arg)
1282{
1283 struct mppcb *mpp;
1284
1285 /*
1286 * Start with clearing, because we might be processing connections
1287 * while a new event comes in.
1288 */
0a7de745 1289 if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled)) {
cb323159 1290 os_log_error(mptcp_log_handle, "%s: bit was already cleared!\n", __func__);
0a7de745 1291 }
5ba3f43e
A
1292
1293 /* Iterate over all MPTCP connections */
1294
1295 lck_mtx_lock(&mtcbinfo.mppi_lock);
1296
1297 TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
cb323159
A
1298 struct socket *mp_so = mpp->mpp_socket;
1299 struct mptses *mpte = mpp->mpp_pcbe;
5ba3f43e 1300
0a7de745 1301 if (!(mpp->mpp_flags & MPP_CREATE_SUBFLOWS)) {
5ba3f43e 1302 continue;
0a7de745 1303 }
5ba3f43e 1304
cb323159
A
1305 socket_lock(mp_so, 1);
1306 VERIFY(mp_so->so_usecount > 0);
5ba3f43e
A
1307
1308 mpp->mpp_flags &= ~MPP_CREATE_SUBFLOWS;
1309
5ba3f43e
A
1310 mptcp_check_subflows_and_add(mpte);
1311 mptcp_remove_subflows(mpte);
1312
1313 mp_so->so_usecount--; /* See mptcp_sched_create_subflows */
cb323159 1314 socket_unlock(mp_so, 1);
5ba3f43e
A
1315 }
1316
1317 lck_mtx_unlock(&mtcbinfo.mppi_lock);
1318}
1319
1320/*
1321 * We need this because we are coming from an NECP-event. This event gets posted
1322 * while holding NECP-locks. The creation of the subflow however leads us back
1323 * into NECP (e.g., to add the necp_cb and also from tcp_connect).
1324 * So, we would deadlock there as we already hold the NECP-lock.
1325 *
1326 * So, let's schedule this separately. It also gives NECP the chance to make
1327 * progress, without having to wait for MPTCP to finish its subflow creation.
1328 */
1329void
1330mptcp_sched_create_subflows(struct mptses *mpte)
1331{
1332 struct mppcb *mpp = mpte->mpte_mppcb;
1333 struct mptcb *mp_tp = mpte->mpte_mptcb;
1334 struct socket *mp_so = mpp->mpp_socket;
1335
1336 if (!mptcp_ok_to_create_subflows(mp_tp)) {
cb323159
A
1337 os_log_debug(mptcp_log_handle, "%s - %lx: not a good time for subflows, state %u flags %#x",
1338 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
5ba3f43e
A
1339 return;
1340 }
1341
1342 if (!(mpp->mpp_flags & MPP_CREATE_SUBFLOWS)) {
1343 mp_so->so_usecount++; /* To prevent it from being free'd in-between */
1344 mpp->mpp_flags |= MPP_CREATE_SUBFLOWS;
1345 }
1346
0a7de745 1347 if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled)) {
5ba3f43e 1348 return;
0a7de745 1349 }
5ba3f43e
A
1350
1351 /* Do the call in 100ms to allow NECP to schedule it on all sockets */
0a7de745 1352 timeout(mptcp_create_subflows, NULL, hz / 10);
5ba3f43e
A
1353}
1354
1355/*
1356 * Allocate an MPTCP socket option structure.
1357 */
1358struct mptopt *
f427ee49 1359mptcp_sopt_alloc(zalloc_flags_t how)
5ba3f43e 1360{
f427ee49 1361 return zalloc_flags(mptopt_zone, how | Z_ZERO);
5ba3f43e
A
1362}
1363
1364/*
1365 * Free an MPTCP socket option structure.
1366 */
1367void
1368mptcp_sopt_free(struct mptopt *mpo)
1369{
1370 VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED));
1371
1372 zfree(mptopt_zone, mpo);
1373}
1374
1375/*
1376 * Add a socket option to the MPTCP socket option list.
1377 */
1378void
1379mptcp_sopt_insert(struct mptses *mpte, struct mptopt *mpo)
1380{
cb323159 1381 socket_lock_assert_owned(mptetoso(mpte));
5ba3f43e
A
1382 mpo->mpo_flags |= MPOF_ATTACHED;
1383 TAILQ_INSERT_TAIL(&mpte->mpte_sopts, mpo, mpo_entry);
1384}
1385
1386/*
1387 * Remove a socket option from the MPTCP socket option list.
1388 */
1389void
1390mptcp_sopt_remove(struct mptses *mpte, struct mptopt *mpo)
1391{
cb323159 1392 socket_lock_assert_owned(mptetoso(mpte));
5ba3f43e
A
1393 VERIFY(mpo->mpo_flags & MPOF_ATTACHED);
1394 mpo->mpo_flags &= ~MPOF_ATTACHED;
1395 TAILQ_REMOVE(&mpte->mpte_sopts, mpo, mpo_entry);
1396}
1397
1398/*
1399 * Search for an existing <sopt_level,sopt_name> socket option.
1400 */
1401struct mptopt *
1402mptcp_sopt_find(struct mptses *mpte, struct sockopt *sopt)
1403{
1404 struct mptopt *mpo;
1405
cb323159 1406 socket_lock_assert_owned(mptetoso(mpte));
5ba3f43e
A
1407
1408 TAILQ_FOREACH(mpo, &mpte->mpte_sopts, mpo_entry) {
1409 if (mpo->mpo_level == sopt->sopt_level &&
0a7de745 1410 mpo->mpo_name == sopt->sopt_name) {
5ba3f43e 1411 break;
0a7de745 1412 }
5ba3f43e 1413 }
0a7de745 1414 return mpo;
5ba3f43e
A
1415}
1416
1417/*
1418 * Allocate a MPTCP subflow structure.
1419 */
1420static struct mptsub *
1421mptcp_subflow_alloc(void)
1422{
f427ee49 1423 return zalloc_flags(mptsub_zone, Z_WAITOK | Z_ZERO);
39236c6e
A
1424}
1425
1426/*
1427 * Deallocate a subflow structure, called when all of the references held
1428 * on it have been released. This implies that the subflow has been deleted.
1429 */
5ba3f43e 1430static void
39236c6e
A
1431mptcp_subflow_free(struct mptsub *mpts)
1432{
39236c6e
A
1433 VERIFY(mpts->mpts_refcnt == 0);
1434 VERIFY(!(mpts->mpts_flags & MPTSF_ATTACHED));
1435 VERIFY(mpts->mpts_mpte == NULL);
1436 VERIFY(mpts->mpts_socket == NULL);
1437
813fb2f6
A
1438 if (mpts->mpts_src != NULL) {
1439 FREE(mpts->mpts_src, M_SONAME);
1440 mpts->mpts_src = NULL;
39236c6e 1441 }
39236c6e
A
1442
1443 zfree(mptsub_zone, mpts);
1444}
1445
5ba3f43e
A
1446static void
1447mptcp_subflow_addref(struct mptsub *mpts)
1448{
0a7de745 1449 if (++mpts->mpts_refcnt == 0) {
5ba3f43e 1450 panic("%s: mpts %p wraparound refcnt\n", __func__, mpts);
0a7de745
A
1451 }
1452 /* NOTREACHED */
5ba3f43e
A
1453}
1454
1455static void
1456mptcp_subflow_remref(struct mptsub *mpts)
1457{
1458 if (mpts->mpts_refcnt == 0) {
1459 panic("%s: mpts %p negative refcnt\n", __func__, mpts);
1460 /* NOTREACHED */
1461 }
0a7de745 1462 if (--mpts->mpts_refcnt > 0) {
5ba3f43e 1463 return;
0a7de745 1464 }
5ba3f43e
A
1465
1466 /* callee will unlock and destroy lock */
1467 mptcp_subflow_free(mpts);
1468}
1469
1470static void
1471mptcp_subflow_attach(struct mptses *mpte, struct mptsub *mpts, struct socket *so)
1472{
1473 struct socket *mp_so = mpte->mpte_mppcb->mpp_socket;
1474 struct tcpcb *tp = sototcpcb(so);
1475
1476 /*
1477 * From this moment on, the subflow is linked to the MPTCP-connection.
1478 * Locking,... happens now at the MPTCP-layer
1479 */
1480 tp->t_mptcb = mpte->mpte_mptcb;
1481 so->so_flags |= SOF_MP_SUBFLOW;
1482 mp_so->so_usecount++;
1483
1484 /*
1485 * Insert the subflow into the list, and associate the MPTCP PCB
1486 * as well as the the subflow socket. From this point on, removing
1487 * the subflow needs to be done via mptcp_subflow_del().
1488 */
1489 TAILQ_INSERT_TAIL(&mpte->mpte_subflows, mpts, mpts_entry);
1490 mpte->mpte_numflows++;
1491
1492 atomic_bitset_32(&mpts->mpts_flags, MPTSF_ATTACHED);
1493 mpts->mpts_mpte = mpte;
1494 mpts->mpts_socket = so;
1495 tp->t_mpsub = mpts;
0a7de745
A
1496 mptcp_subflow_addref(mpts); /* for being in MPTCP subflow list */
1497 mptcp_subflow_addref(mpts); /* for subflow socket */
5ba3f43e
A
1498}
1499
1500static void
1501mptcp_subflow_necp_cb(void *handle, __unused int action,
0a7de745
A
1502 __unused uint32_t interface_index,
1503 uint32_t necp_flags, bool *viable)
5ba3f43e 1504{
d9a64523 1505 boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER);
5ba3f43e
A
1506 struct inpcb *inp = (struct inpcb *)handle;
1507 struct socket *so = inp->inp_socket;
1508 struct mptsub *mpts;
1509 struct mptses *mpte;
1510
0a7de745 1511 if (low_power) {
d9a64523 1512 action = NECP_CLIENT_CBACTION_NONVIABLE;
0a7de745 1513 }
d9a64523 1514
0a7de745 1515 if (action != NECP_CLIENT_CBACTION_NONVIABLE) {
5ba3f43e 1516 return;
0a7de745 1517 }
5ba3f43e
A
1518
1519 /*
1520 * The socket is being garbage-collected. There is nothing to be done
1521 * here.
1522 */
cb323159 1523 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
5ba3f43e 1524 return;
0a7de745 1525 }
5ba3f43e
A
1526
1527 socket_lock(so, 1);
1528
1529 /* Check again after we acquired the lock. */
cb323159 1530 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
5ba3f43e 1531 goto out;
0a7de745 1532 }
5ba3f43e
A
1533
1534 mpte = tptomptp(sototcpcb(so))->mpt_mpte;
1535 mpts = sototcpcb(so)->t_mpsub;
1536
cb323159
A
1537 os_log_debug(mptcp_log_handle, "%s - %lx: Subflow on itf %u became non-viable, power %u",
1538 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_ifscope, low_power);
5ba3f43e
A
1539
1540 mpts->mpts_flags |= MPTSF_CLOSE_REQD;
1541
1542 mptcp_sched_create_subflows(mpte);
1543
cb323159
A
1544 if ((mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER ||
1545 mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) &&
1546 viable != NULL) {
d9a64523 1547 *viable = 1;
0a7de745 1548 }
5ba3f43e
A
1549
1550out:
1551 socket_unlock(so, 1);
1552}
1553
39236c6e
A
1554/*
1555 * Create an MPTCP subflow socket.
1556 */
1557static int
1558mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom,
5ba3f43e 1559 struct socket **so)
39236c6e 1560{
5ba3f43e 1561 lck_mtx_t *subflow_mtx;
39236c6e 1562 struct mptopt smpo, *mpo, *tmpo;
5ba3f43e 1563 struct proc *p;
39236c6e
A
1564 struct socket *mp_so;
1565 int error;
1566
1567 *so = NULL;
cb323159 1568
5ba3f43e
A
1569 mp_so = mptetoso(mpte);
1570
1571 p = proc_find(mp_so->last_pid);
1572 if (p == PROC_NULL) {
cb323159
A
1573 os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n",
1574 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_so->last_pid);
5ba3f43e 1575
f427ee49 1576 mptcp_subflow_free(mpts);
0a7de745 1577 return ESRCH;
5ba3f43e 1578 }
39236c6e
A
1579
1580 /*
1581 * Create the subflow socket (multipath subflow, non-blocking.)
1582 *
1583 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
1584 * socket; it will be cleared when the socket is peeled off or closed.
1585 * It also indicates to the underlying TCP to handle MPTCP options.
1586 * A multipath subflow socket implies SS_NOFDREF state.
1587 */
5ba3f43e
A
1588
1589 /*
1590 * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes
1591 * the ipi-lock. We cannot hold the socket-lock at that point.
1592 */
cb323159 1593 socket_unlock(mp_so, 0);
5ba3f43e 1594 error = socreate_internal(dom, so, SOCK_STREAM, IPPROTO_TCP, p,
cb323159
A
1595 SOCF_MPTCP, PROC_NULL);
1596 socket_lock(mp_so, 0);
5ba3f43e 1597 if (error) {
cb323159
A
1598 os_log_error(mptcp_log_handle, "%s - %lx: unable to create subflow socket error %d\n",
1599 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
5ba3f43e
A
1600
1601 proc_rele(p);
1602
1603 mptcp_subflow_free(mpts);
0a7de745 1604 return error;
39236c6e
A
1605 }
1606
5ba3f43e
A
1607 /*
1608 * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because
1609 * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock.
1610 * Which is why we also need to get the lock with pr_getlock, as after
1611 * setting the flag, socket_unlock will work on the MPTCP-level lock.
1612 */
1613 subflow_mtx = ((*so)->so_proto->pr_getlock)(*so, 0);
1614 lck_mtx_lock(subflow_mtx);
1615
1616 /*
1617 * Must be the first thing we do, to make sure all pointers for this
1618 * subflow are set.
1619 */
1620 mptcp_subflow_attach(mpte, mpts, *so);
1621
1622 /*
1623 * A multipath subflow socket is used internally in the kernel,
1624 * therefore it does not have a file desciptor associated by
1625 * default.
1626 */
1627 (*so)->so_state |= SS_NOFDREF;
1628
1629 lck_mtx_unlock(subflow_mtx);
39236c6e
A
1630
1631 /* prevent the socket buffers from being compressed */
1632 (*so)->so_rcv.sb_flags |= SB_NOCOMPRESS;
1633 (*so)->so_snd.sb_flags |= SB_NOCOMPRESS;
1634
490019cf 1635 /* Inherit preconnect and TFO data flags */
0a7de745 1636 if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA) {
490019cf 1637 (*so)->so_flags1 |= SOF1_PRECONNECT_DATA;
0a7de745
A
1638 }
1639 if (mp_so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
490019cf 1640 (*so)->so_flags1 |= SOF1_DATA_IDEMPOTENT;
0a7de745 1641 }
490019cf 1642
5ba3f43e
A
1643 /* Inherit uuid and create the related flow. */
1644 if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1645 struct mptcb *mp_tp = mpte->mpte_mptcb;
1646
1647 sotoinpcb(*so)->necp_cb = mptcp_subflow_necp_cb;
1648
1649 /*
1650 * A note on the unlock: With MPTCP, we do multiple times a
1651 * necp_client_register_socket_flow. This is problematic,
1652 * because now the lock-ordering guarantee (first necp-locks,
1653 * then socket-locks) is no more respected. So, we need to
1654 * unlock here.
1655 */
cb323159 1656 socket_unlock(mp_so, 0);
5ba3f43e
A
1657 error = necp_client_register_socket_flow(mp_so->last_pid,
1658 mpsotomppcb(mp_so)->necp_client_uuid, sotoinpcb(*so));
cb323159 1659 socket_lock(mp_so, 0);
5ba3f43e 1660
0a7de745 1661 if (error) {
cb323159
A
1662 os_log_error(mptcp_log_handle, "%s - %lx: necp_client_register_socket_flow failed with error %d\n",
1663 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
1664
5ba3f43e 1665 goto out_err;
0a7de745 1666 }
5ba3f43e
A
1667
1668 /* Possible state-change during the unlock above */
1669 if (mp_tp->mpt_state >= MPTCPS_TIME_WAIT ||
0a7de745 1670 (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) {
cb323159
A
1671 os_log_error(mptcp_log_handle, "%s - %lx: state changed during unlock: %u flags %#x\n",
1672 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1673 mp_tp->mpt_state, mp_tp->mpt_flags);
1674
1675 error = EINVAL;
5ba3f43e 1676 goto out_err;
0a7de745 1677 }
5ba3f43e
A
1678
1679 uuid_copy(sotoinpcb(*so)->necp_client_uuid, mpsotomppcb(mp_so)->necp_client_uuid);
cb323159
A
1680 }
1681
1682 /* Needs to happen prior to the delegation! */
1683 (*so)->last_pid = mp_so->last_pid;
1684
1685 if (mp_so->so_flags & SOF_DELEGATED) {
1686 if (mpte->mpte_epid) {
1687 error = so_set_effective_pid(*so, mpte->mpte_epid, p, false);
1688 if (error) {
1689 os_log_error(mptcp_log_handle, "%s - %lx: so_set_effective_pid failed with error %d\n",
1690 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
1691 goto out_err;
1692 }
1693 }
1694 if (!uuid_is_null(mpte->mpte_euuid)) {
1695 error = so_set_effective_uuid(*so, mpte->mpte_euuid, p, false);
1696 if (error) {
1697 os_log_error(mptcp_log_handle, "%s - %lx: so_set_effective_uuid failed with error %d\n",
1698 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
1699 goto out_err;
1700 }
1701 }
5ba3f43e
A
1702 }
1703
1704 /* inherit the other socket options */
0a7de745 1705 bzero(&smpo, sizeof(smpo));
39236c6e
A
1706 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1707 smpo.mpo_level = SOL_SOCKET;
1708 smpo.mpo_intval = 1;
1709
1710 /* disable SIGPIPE */
1711 smpo.mpo_name = SO_NOSIGPIPE;
0a7de745 1712 if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) {
5ba3f43e 1713 goto out_err;
0a7de745 1714 }
39236c6e
A
1715
1716 /* find out if the subflow's source address goes away */
1717 smpo.mpo_name = SO_NOADDRERR;
0a7de745 1718 if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) {
5ba3f43e 1719 goto out_err;
0a7de745 1720 }
39236c6e 1721
5ba3f43e
A
1722 if (mpte->mpte_mptcb->mpt_state >= MPTCPS_ESTABLISHED) {
1723 /*
1724 * On secondary subflows we might need to set the cell-fallback
1725 * flag (see conditions in mptcp_subflow_sosetopt).
1726 */
1727 smpo.mpo_level = SOL_SOCKET;
1728 smpo.mpo_name = SO_MARK_CELLFALLBACK;
1729 smpo.mpo_intval = 1;
0a7de745 1730 if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) {
5ba3f43e 1731 goto out_err;
0a7de745 1732 }
5ba3f43e 1733 }
39236c6e
A
1734
1735 /* replay setsockopt(2) on the subflow sockets for eligible options */
1736 TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) {
1737 int interim;
1738
0a7de745 1739 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
39236c6e 1740 continue;
0a7de745 1741 }
39236c6e
A
1742
1743 /*
1744 * Skip those that are handled internally; these options
1745 * should not have been recorded and marked with the
1746 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1747 */
1748 if (mpo->mpo_level == SOL_SOCKET &&
1749 (mpo->mpo_name == SO_NOSIGPIPE ||
1750 mpo->mpo_name == SO_NOADDRERR ||
0a7de745 1751 mpo->mpo_name == SO_KEEPALIVE)) {
39236c6e 1752 continue;
0a7de745 1753 }
39236c6e
A
1754
1755 interim = (mpo->mpo_flags & MPOF_INTERIM);
5ba3f43e 1756 if (mptcp_subflow_sosetopt(mpte, mpts, mpo) != 0 && interim) {
cb323159
A
1757 os_log_error(mptcp_log_handle, "%s - %lx: sopt %s val %d interim record removed\n",
1758 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
5ba3f43e 1759 mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name),
cb323159 1760 mpo->mpo_intval);
39236c6e
A
1761 mptcp_sopt_remove(mpte, mpo);
1762 mptcp_sopt_free(mpo);
1763 continue;
1764 }
1765 }
1766
1767 /*
1768 * We need to receive everything that the subflow socket has,
1769 * so use a customized socket receive function. We will undo
1770 * this when the socket is peeled off or closed.
1771 */
39236c6e
A
1772 switch (dom) {
1773 case PF_INET:
1774 (*so)->so_proto = &mptcp_subflow_protosw;
1775 break;
39236c6e
A
1776 case PF_INET6:
1777 (*so)->so_proto = (struct protosw *)&mptcp_subflow_protosw6;
1778 break;
39236c6e
A
1779 default:
1780 VERIFY(0);
1781 /* NOTREACHED */
1782 }
1783
5ba3f43e
A
1784 proc_rele(p);
1785
1786 DTRACE_MPTCP3(subflow__create, struct mptses *, mpte,
1787 int, dom, int, error);
1788
0a7de745 1789 return 0;
39236c6e 1790
5ba3f43e
A
1791out_err:
1792 mptcp_subflow_abort(mpts, error);
1793
1794 proc_rele(p);
1795
0a7de745 1796 return error;
39236c6e
A
1797}
1798
1799/*
1800 * Close an MPTCP subflow socket.
1801 *
1802 * Note that this may be called on an embryonic subflow, and the only
1803 * thing that is guaranteed valid is the protocol-user request.
1804 */
5ba3f43e
A
1805static void
1806mptcp_subflow_soclose(struct mptsub *mpts)
39236c6e 1807{
5ba3f43e
A
1808 struct socket *so = mpts->mpts_socket;
1809
0a7de745 1810 if (mpts->mpts_flags & MPTSF_CLOSED) {
5ba3f43e 1811 return;
0a7de745 1812 }
39236c6e 1813
5ba3f43e 1814 VERIFY(so != NULL);
39236c6e 1815 VERIFY(so->so_flags & SOF_MP_SUBFLOW);
0a7de745 1816 VERIFY((so->so_state & (SS_NBIO | SS_NOFDREF)) == (SS_NBIO | SS_NOFDREF));
39236c6e 1817
39236c6e
A
1818 DTRACE_MPTCP5(subflow__close, struct mptsub *, mpts,
1819 struct socket *, so,
1820 struct sockbuf *, &so->so_rcv,
1821 struct sockbuf *, &so->so_snd,
1822 struct mptses *, mpts->mpts_mpte);
1823
5ba3f43e
A
1824 mpts->mpts_flags |= MPTSF_CLOSED;
1825
1826 if (so->so_retaincnt == 0) {
1827 soclose_locked(so);
1828
1829 return;
1830 } else {
1831 VERIFY(so->so_usecount > 0);
1832 so->so_usecount--;
1833 }
1834
1835 return;
39236c6e
A
1836}
1837
1838/*
1839 * Connect an MPTCP subflow socket.
1840 *
5ba3f43e
A
1841 * Note that in the pending connect case, the subflow socket may have been
1842 * bound to an interface and/or a source IP address which may no longer be
1843 * around by the time this routine is called; in that case the connect attempt
1844 * will most likely fail.
39236c6e
A
1845 */
1846static int
1847mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts)
1848{
5ba3f43e
A
1849 char dbuf[MAX_IPv6_STR_LEN];
1850 struct socket *mp_so, *so;
1851 struct mptcb *mp_tp;
1852 struct sockaddr *dst;
1853 struct proc *p;
a39ff7e2 1854 int af, error, dport;
39236c6e 1855
5ba3f43e
A
1856 mp_so = mptetoso(mpte);
1857 mp_tp = mpte->mpte_mptcb;
a39ff7e2
A
1858 so = mpts->mpts_socket;
1859 af = mpts->mpts_dst.sa_family;
1860 dst = &mpts->mpts_dst;
1861
0a7de745 1862 VERIFY((mpts->mpts_flags & (MPTSF_CONNECTING | MPTSF_CONNECTED)) == MPTSF_CONNECTING);
a39ff7e2
A
1863 VERIFY(mpts->mpts_socket != NULL);
1864 VERIFY(af == AF_INET || af == AF_INET6);
1865
1866 if (af == AF_INET) {
0a7de745 1867 inet_ntop(af, &SIN(dst)->sin_addr.s_addr, dbuf, sizeof(dbuf));
a39ff7e2
A
1868 dport = ntohs(SIN(dst)->sin_port);
1869 } else {
0a7de745 1870 inet_ntop(af, &SIN6(dst)->sin6_addr, dbuf, sizeof(dbuf));
a39ff7e2
A
1871 dport = ntohs(SIN6(dst)->sin6_port);
1872 }
1873
f427ee49 1874 os_log(mptcp_log_handle,
cb323159
A
1875 "%s - %lx: ifindex %u dst %s:%d pended %u\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1876 mpts->mpts_ifscope, dbuf, dport, !!(mpts->mpts_flags & MPTSF_CONNECT_PENDING));
39236c6e 1877
5ba3f43e
A
1878 p = proc_find(mp_so->last_pid);
1879 if (p == PROC_NULL) {
cb323159
A
1880 os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n",
1881 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_so->last_pid);
39236c6e 1882
0a7de745 1883 return ESRCH;
39236c6e
A
1884 }
1885
1886 mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING;
1887
fe8ab488 1888 mptcp_attach_to_subf(so, mpte->mpte_mptcb, mpte->mpte_addrid_last);
39037602 1889
39236c6e 1890 /* connect the subflow socket */
5ba3f43e
A
1891 error = soconnectxlocked(so, mpts->mpts_src, &mpts->mpts_dst,
1892 p, mpts->mpts_ifscope,
1893 mpte->mpte_associd, NULL, 0, NULL, 0, NULL, NULL);
1894
1895 mpts->mpts_iss = sototcpcb(so)->iss;
1896
1897 /* See tcp_connect_complete */
1898 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED &&
1899 (mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1900 mp_tp->mpt_sndwnd = sototcpcb(so)->snd_wnd;
1901 }
39236c6e 1902
fe8ab488
A
1903 /* Allocate a unique address id per subflow */
1904 mpte->mpte_addrid_last++;
0a7de745 1905 if (mpte->mpte_addrid_last == 0) {
fe8ab488 1906 mpte->mpte_addrid_last++;
0a7de745 1907 }
fe8ab488 1908
5ba3f43e
A
1909 proc_rele(p);
1910
39236c6e
A
1911 DTRACE_MPTCP3(subflow__connect, struct mptses *, mpte,
1912 struct mptsub *, mpts, int, error);
0a7de745 1913 if (error) {
cb323159
A
1914 os_log_error(mptcp_log_handle, "%s - %lx: connectx failed with error %d ifscope %u\n",
1915 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error, mpts->mpts_ifscope);
0a7de745 1916 }
39236c6e 1917
0a7de745 1918 return error;
39236c6e
A
1919}
1920
cb323159
A
1921static int
1922mptcp_adj_rmap(struct socket *so, struct mbuf *m, int off, uint64_t dsn,
1923 uint32_t rseq, uint16_t dlen)
1924{
1925 struct mptsub *mpts = sototcpcb(so)->t_mpsub;
1926
1927 if (m_pktlen(m) == 0) {
1928 return 0;
1929 }
1930
2a1bd2d3
A
1931 if (!(m->m_flags & M_PKTHDR)) {
1932 return 0;
1933 }
1934
1935 if (m->m_pkthdr.pkt_flags & PKTF_MPTCP) {
cb323159
A
1936 if (off && (dsn != m->m_pkthdr.mp_dsn ||
1937 rseq != m->m_pkthdr.mp_rseq ||
1938 dlen != m->m_pkthdr.mp_rlen)) {
1939 os_log_error(mptcp_log_handle, "%s - %lx: Received incorrect second mapping: %u - %u , %u - %u, %u - %u\n",
1940 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte),
1941 (uint32_t)dsn, (uint32_t)m->m_pkthdr.mp_dsn,
1942 rseq, m->m_pkthdr.mp_rseq,
1943 dlen, m->m_pkthdr.mp_rlen);
1944
1945 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1946 return -1;
1947 }
2a1bd2d3 1948 }
f427ee49 1949
2a1bd2d3
A
1950 /* If mbuf is beyond right edge of the mapping, we need to split */
1951 if (m_pktlen(m) > dlen - off) {
1952 struct mbuf *new = m_split(m, dlen - off, M_DONTWAIT);
1953 if (new == NULL) {
1954 os_log_error(mptcp_log_handle, "%s - %lx: m_split failed dlen %u off %d pktlen %d, killing subflow %d",
1955 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte),
1956 dlen, off, m_pktlen(m),
1957 mpts->mpts_connid);
cb323159 1958
2a1bd2d3
A
1959 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1960 return -1;
1961 }
cb323159 1962
2a1bd2d3
A
1963 m->m_next = new;
1964 sballoc(&so->so_rcv, new);
1965 /* Undo, as sballoc will add to it as well */
1966 so->so_rcv.sb_cc -= new->m_len;
1967
1968 if (so->so_rcv.sb_mbtail == m) {
1969 so->so_rcv.sb_mbtail = new;
cb323159
A
1970 }
1971 }
1972
2a1bd2d3
A
1973 m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
1974 m->m_pkthdr.mp_dsn = dsn + off;
1975 m->m_pkthdr.mp_rseq = rseq + off;
1976
1977 VERIFY(m_pktlen(m) < UINT16_MAX);
1978 m->m_pkthdr.mp_rlen = (uint16_t)m_pktlen(m);
1979
cb323159
A
1980 mpts->mpts_flags |= MPTSF_FULLY_ESTABLISHED;
1981
1982 return 0;
1983}
1984
39236c6e
A
1985/*
1986 * MPTCP subflow socket receive routine, derived from soreceive().
1987 */
1988static int
1989mptcp_subflow_soreceive(struct socket *so, struct sockaddr **psa,
1990 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1991{
1992#pragma unused(uio)
2a1bd2d3
A
1993 struct socket *mp_so;
1994 struct mptses *mpte;
1995 struct mptcb *mp_tp;
39236c6e 1996 int flags, error = 0;
39236c6e 1997 struct mbuf *m, **mp = mp0;
2a1bd2d3
A
1998
1999 mpte = tptomptp(sototcpcb(so))->mpt_mpte;
2000 mp_so = mptetoso(mpte);
2001 mp_tp = mpte->mpte_mptcb;
39236c6e 2002
39236c6e
A
2003 VERIFY(so->so_proto->pr_flags & PR_CONNREQUIRED);
2004
2005#ifdef MORE_LOCKING_DEBUG
2006 if (so->so_usecount == 1) {
2007 panic("%s: so=%x no other reference on socket\n", __func__, so);
2008 /* NOTREACHED */
2009 }
2010#endif
2011 /*
2012 * We return all that is there in the subflow's socket receive buffer
2013 * to the MPTCP layer, so we require that the caller passes in the
2014 * expected parameters.
2015 */
0a7de745
A
2016 if (mp == NULL || controlp != NULL) {
2017 return EINVAL;
2018 }
5ba3f43e 2019
39236c6e 2020 *mp = NULL;
0a7de745 2021 if (psa != NULL) {
39236c6e 2022 *psa = NULL;
0a7de745
A
2023 }
2024 if (flagsp != NULL) {
2025 flags = *flagsp & ~MSG_EOR;
2026 } else {
39236c6e 2027 flags = 0;
0a7de745 2028 }
39236c6e 2029
0a7de745
A
2030 if (flags & (MSG_PEEK | MSG_OOB | MSG_NEEDSA | MSG_WAITALL | MSG_WAITSTREAM)) {
2031 return EOPNOTSUPP;
2032 }
5ba3f43e 2033
0a7de745 2034 flags |= (MSG_DONTWAIT | MSG_NBIO);
39236c6e
A
2035
2036 /*
2037 * If a recv attempt is made on a previously-accepted socket
2038 * that has been marked as inactive (disconnected), reject
2039 * the request.
2040 */
2041 if (so->so_flags & SOF_DEFUNCT) {
2042 struct sockbuf *sb = &so->so_rcv;
2043
2044 error = ENOTCONN;
39236c6e
A
2045 /*
2046 * This socket should have been disconnected and flushed
2047 * prior to being returned from sodefunct(); there should
2048 * be no data on its receive list, so panic otherwise.
2049 */
0a7de745 2050 if (so->so_state & SS_DEFUNCT) {
39236c6e 2051 sb_empty_assert(sb, __func__);
0a7de745
A
2052 }
2053 return error;
39236c6e
A
2054 }
2055
2056 /*
2057 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2058 * and if so just return to the caller. This could happen when
2059 * soreceive() is called by a socket upcall function during the
2060 * time the socket is freed. The socket buffer would have been
2061 * locked across the upcall, therefore we cannot put this thread
2062 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2063 * we may livelock), because the lock on the socket buffer will
2064 * only be released when the upcall routine returns to its caller.
2065 * Because the socket has been officially closed, there can be
2066 * no further read on it.
2067 *
2068 * A multipath subflow socket would have its SS_NOFDREF set by
2069 * default, so check for SOF_MP_SUBFLOW socket flag; when the
2070 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2071 */
2072 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
0a7de745
A
2073 (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) {
2074 return 0;
2075 }
39236c6e
A
2076
2077 /*
2078 * For consistency with soreceive() semantics, we need to obey
2079 * SB_LOCK in case some other code path has locked the buffer.
2080 */
2081 error = sblock(&so->so_rcv, 0);
0a7de745
A
2082 if (error != 0) {
2083 return error;
2084 }
39236c6e
A
2085
2086 m = so->so_rcv.sb_mb;
2087 if (m == NULL) {
2088 /*
2089 * Panic if we notice inconsistencies in the socket's
2090 * receive list; both sb_mb and sb_cc should correctly
2091 * reflect the contents of the list, otherwise we may
2092 * end up with false positives during select() or poll()
2093 * which could put the application in a bad state.
2094 */
2095 SB_MB_CHECK(&so->so_rcv);
2096
2097 if (so->so_error != 0) {
2098 error = so->so_error;
2099 so->so_error = 0;
2100 goto release;
2101 }
2102
5ba3f43e
A
2103 if (so->so_state & SS_CANTRCVMORE) {
2104 goto release;
2105 }
2106
0a7de745 2107 if (!(so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING))) {
5ba3f43e
A
2108 error = ENOTCONN;
2109 goto release;
2110 }
2111
2112 /*
2113 * MSG_DONTWAIT is implicitly defined and this routine will
2114 * never block, so return EWOULDBLOCK when there is nothing.
2115 */
2116 error = EWOULDBLOCK;
2117 goto release;
2118 }
2119
2120 mptcp_update_last_owner(so, mp_so);
2121
5ba3f43e
A
2122 SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
2123 SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
2124
2125 while (m != NULL) {
5c9f4661 2126 int dlen = 0, dfin = 0, error_out = 0;
5ba3f43e
A
2127 struct mbuf *start = m;
2128 uint64_t dsn;
2129 uint32_t sseq;
2130 uint16_t orig_dlen;
2131 uint16_t csum;
2132
2133 VERIFY(m->m_nextpkt == NULL);
2134
2a1bd2d3
A
2135 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
2136fallback:
2137 /* Just move mbuf to MPTCP-level */
5ba3f43e
A
2138
2139 sbfree(&so->so_rcv, m);
2140
2141 if (mp != NULL) {
2142 *mp = m;
2143 mp = &m->m_next;
2144 so->so_rcv.sb_mb = m = m->m_next;
2145 *mp = NULL;
5ba3f43e
A
2146 }
2147
2148 if (m != NULL) {
2149 so->so_rcv.sb_lastrecord = m;
2150 } else {
2151 SB_EMPTY_FIXUP(&so->so_rcv);
2152 }
2153
2154 continue;
2a1bd2d3
A
2155 } else if (!(m->m_flags & M_PKTHDR) || !(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
2156 struct mptsub *mpts = sototcpcb(so)->t_mpsub;
2157 boolean_t found_mapping = false;
2158 int parsed_length = 0;
2159 struct mbuf *m_iter;
2160
2161 /*
2162 * No MPTCP-option in the header. Either fallback or
2163 * wait for additional mappings.
2164 */
2165 if (!(mpts->mpts_flags & MPTSF_FULLY_ESTABLISHED)) {
2166 /* data arrived without a DSS option mapping */
2167
2168 /* initial subflow can fallback right after SYN handshake */
2169 if (mpts->mpts_flags & MPTSF_INITIAL_SUB) {
2170 mptcp_notify_mpfail(so);
2171
2172 goto fallback;
2173 } else {
2174 os_log_error(mptcp_log_handle, "%s - %lx: No DSS on secondary subflow. Killing %d\n",
2175 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
2176 mpts->mpts_connid);
2177 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
2178
2179 error = EIO;
2180 *mp0 = NULL;
2181 goto release;
2182 }
2183 }
2184
2185 /* Thus, let's look for an mbuf with the mapping */
2186 m_iter = m->m_next;
2187 parsed_length = m->m_len;
2188 while (m_iter != NULL && parsed_length < UINT16_MAX) {
2189 if (!(m_iter->m_flags & M_PKTHDR) || !(m_iter->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
2190 parsed_length += m_iter->m_len;
2191 m_iter = m_iter->m_next;
2192 continue;
2193 }
2194
2195 found_mapping = true;
2196
2197 /* Found an mbuf with a DSS-mapping */
2198 orig_dlen = dlen = m_iter->m_pkthdr.mp_rlen;
2199 dsn = m_iter->m_pkthdr.mp_dsn;
2200 sseq = m_iter->m_pkthdr.mp_rseq;
2201 csum = m_iter->m_pkthdr.mp_csum;
2202
2203 if (m_iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN) {
2204 dfin = 1;
2205 }
2206
2207 break;
2208 }
2209
2210 if (!found_mapping && parsed_length < UINT16_MAX) {
2211 /* Mapping not yet present, we can wait! */
2212 if (*mp0 == NULL) {
2213 error = EWOULDBLOCK;
2214 }
2215 goto release;
2216 } else if (!found_mapping && parsed_length >= UINT16_MAX) {
2217 os_log_error(mptcp_log_handle, "%s - %lx: Received more than 64KB without DSS mapping. Killing %d\n",
2218 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
2219 mpts->mpts_connid);
2220 /* Received 64KB without DSS-mapping. We should kill the subflow */
2221 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
2222
2223 error = EIO;
2224 *mp0 = NULL;
2225 goto release;
2226 }
2227 } else {
2228 orig_dlen = dlen = m->m_pkthdr.mp_rlen;
2229 dsn = m->m_pkthdr.mp_dsn;
2230 sseq = m->m_pkthdr.mp_rseq;
2231 csum = m->m_pkthdr.mp_csum;
39236c6e 2232
2a1bd2d3
A
2233 if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN) {
2234 dfin = 1;
2235 }
0a7de745 2236 }
5c9f4661 2237
5ba3f43e
A
2238 /*
2239 * Check if the full mapping is now present
2240 */
5c9f4661 2241 if ((int)so->so_rcv.sb_cc < dlen - dfin) {
0a7de745 2242 if (*mp0 == NULL) {
5ba3f43e 2243 error = EWOULDBLOCK;
0a7de745 2244 }
39236c6e
A
2245 goto release;
2246 }
2247
5ba3f43e
A
2248 /* Now, get the full mapping */
2249 while (dlen > 0) {
5c9f4661
A
2250 if (mptcp_adj_rmap(so, m, orig_dlen - dlen, dsn, sseq, orig_dlen)) {
2251 error_out = 1;
2252 error = EIO;
2253 dlen = 0;
0a7de745 2254 *mp0 = NULL;
5c9f4661
A
2255 break;
2256 }
39236c6e 2257
5ba3f43e
A
2258 dlen -= m->m_len;
2259 sbfree(&so->so_rcv, m);
39236c6e 2260
5ba3f43e
A
2261 if (mp != NULL) {
2262 *mp = m;
2263 mp = &m->m_next;
2264 so->so_rcv.sb_mb = m = m->m_next;
2265 *mp = NULL;
2266 }
2267
0a7de745 2268 if (dlen - dfin == 0) {
5c9f4661 2269 dlen = 0;
0a7de745 2270 }
5c9f4661 2271
5ba3f43e 2272 VERIFY(dlen <= 0 || m);
39236c6e
A
2273 }
2274
5ba3f43e
A
2275 VERIFY(dlen == 0);
2276
39236c6e 2277 if (m != NULL) {
5ba3f43e 2278 so->so_rcv.sb_lastrecord = m;
39236c6e 2279 } else {
39236c6e
A
2280 SB_EMPTY_FIXUP(&so->so_rcv);
2281 }
5ba3f43e 2282
0a7de745 2283 if (error_out) {
5c9f4661 2284 goto release;
0a7de745 2285 }
5c9f4661
A
2286
2287 if (mptcp_validate_csum(sototcpcb(so), start, dsn, sseq, orig_dlen, csum, dfin)) {
5ba3f43e
A
2288 error = EIO;
2289 *mp0 = NULL;
2290 goto release;
2291 }
2292
39236c6e
A
2293 SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 2");
2294 SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 2");
2295 }
2296
2297 DTRACE_MPTCP3(subflow__receive, struct socket *, so,
2298 struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd);
39236c6e 2299
0a7de745 2300 if (flagsp != NULL) {
39236c6e 2301 *flagsp |= flags;
0a7de745 2302 }
39236c6e
A
2303
2304release:
5ba3f43e
A
2305 sbunlock(&so->so_rcv, TRUE);
2306
0a7de745 2307 return error;
39236c6e
A
2308}
2309
39236c6e 2310/*
5ba3f43e 2311 * MPTCP subflow socket send routine, derived from sosend().
39236c6e 2312 */
5ba3f43e
A
2313static int
2314mptcp_subflow_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
2315 struct mbuf *top, struct mbuf *control, int flags)
39236c6e 2316{
5ba3f43e 2317 struct socket *mp_so = mptetoso(tptomptp(sototcpcb(so))->mpt_mpte);
5ba3f43e 2318 boolean_t en_tracing = FALSE, proc_held = FALSE;
2a1bd2d3 2319 struct proc *p = current_proc();
5ba3f43e
A
2320 int en_tracing_val;
2321 int sblocked = 1; /* Pretend as if it is already locked, so we won't relock it */
2322 int error;
39236c6e 2323
5ba3f43e
A
2324 VERIFY(control == NULL);
2325 VERIFY(addr == NULL);
2326 VERIFY(uio == NULL);
2327 VERIFY(flags == 0);
2328 VERIFY((so->so_flags & SOF_CONTENT_FILTER) == 0);
39236c6e 2329
5ba3f43e
A
2330 VERIFY(top->m_pkthdr.len > 0 && top->m_pkthdr.len <= UINT16_MAX);
2331 VERIFY(top->m_pkthdr.pkt_flags & PKTF_MPTCP);
39236c6e
A
2332
2333 /*
5ba3f43e
A
2334 * trace if tracing & network (vs. unix) sockets & and
2335 * non-loopback
39236c6e 2336 */
5ba3f43e
A
2337 if (ENTR_SHOULDTRACE &&
2338 (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
2339 struct inpcb *inp = sotoinpcb(so);
2340 if (inp->inp_last_outifp != NULL &&
2341 !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
2342 en_tracing = TRUE;
2343 en_tracing_val = top->m_pkthdr.len;
2344 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START,
cb323159 2345 (unsigned long)VM_KERNEL_ADDRPERM(so),
5ba3f43e
A
2346 ((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0),
2347 (int64_t)en_tracing_val);
2348 }
2349 }
39236c6e 2350
5ba3f43e 2351 mptcp_update_last_owner(so, mp_so);
39236c6e 2352
5ba3f43e
A
2353 if (mp_so->last_pid != proc_pid(p)) {
2354 p = proc_find(mp_so->last_pid);
2355 if (p == PROC_NULL) {
2356 p = current_proc();
2357 } else {
2358 proc_held = TRUE;
2359 }
2360 }
39236c6e 2361
5ba3f43e
A
2362#if NECP
2363 inp_update_necp_policy(sotoinpcb(so), NULL, NULL, 0);
2364#endif /* NECP */
39236c6e 2365
f427ee49 2366 error = sosendcheck(so, NULL, top->m_pkthdr.len, 0, 1, 0, &sblocked);
0a7de745 2367 if (error) {
5ba3f43e 2368 goto out;
0a7de745 2369 }
39236c6e 2370
5ba3f43e
A
2371 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, top, NULL, NULL, p);
2372 top = NULL;
39236c6e 2373
5ba3f43e 2374out:
0a7de745 2375 if (top != NULL) {
5ba3f43e 2376 m_freem(top);
0a7de745 2377 }
39236c6e 2378
0a7de745 2379 if (proc_held) {
5ba3f43e 2380 proc_rele(p);
0a7de745 2381 }
5ba3f43e
A
2382
2383 soclearfastopen(so);
2384
2385 if (en_tracing) {
2386 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END,
cb323159 2387 (unsigned long)VM_KERNEL_ADDRPERM(so),
5ba3f43e
A
2388 ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
2389 (int64_t)en_tracing_val);
2390 }
2391
0a7de745 2392 return error;
39236c6e
A
2393}
2394
2395/*
2396 * Establish an initial MPTCP connection (if first subflow and not yet
2397 * connected), or add a subflow to an existing MPTCP connection.
2398 */
2399int
5ba3f43e
A
2400mptcp_subflow_add(struct mptses *mpte, struct sockaddr *src,
2401 struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
39236c6e 2402{
39236c6e 2403 struct socket *mp_so, *so = NULL;
39236c6e 2404 struct mptcb *mp_tp;
5ba3f43e 2405 struct mptsub *mpts = NULL;
39236c6e
A
2406 int af, error = 0;
2407
5ba3f43e 2408 mp_so = mptetoso(mpte);
39236c6e
A
2409 mp_tp = mpte->mpte_mptcb;
2410
cb323159
A
2411 socket_lock_assert_owned(mp_so);
2412
fe8ab488
A
2413 if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) {
2414 /* If the remote end sends Data FIN, refuse subflow adds */
cb323159
A
2415 os_log_error(mptcp_log_handle, "%s - %lx: state %u\n",
2416 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state);
fe8ab488 2417 error = ENOTCONN;
5ba3f43e 2418 goto out_err;
fe8ab488 2419 }
39236c6e 2420
bca245ac
A
2421 if (mpte->mpte_numflows > MPTCP_MAX_NUM_SUBFLOWS) {
2422 error = EOVERFLOW;
2423 goto out_err;
2424 }
2425
5ba3f43e
A
2426 mpts = mptcp_subflow_alloc();
2427 if (mpts == NULL) {
cb323159
A
2428 os_log_error(mptcp_log_handle, "%s - %lx: malloc subflow failed\n",
2429 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e
A
2430 error = ENOMEM;
2431 goto out_err;
2432 }
39236c6e 2433
0a7de745
A
2434 if (src) {
2435 if (src->sa_family != AF_INET && src->sa_family != AF_INET6) {
2436 error = EAFNOSUPPORT;
2437 goto out_err;
2438 }
813fb2f6 2439
0a7de745
A
2440 if (src->sa_family == AF_INET &&
2441 src->sa_len != sizeof(struct sockaddr_in)) {
2442 error = EINVAL;
2443 goto out_err;
2444 }
2445
2446 if (src->sa_family == AF_INET6 &&
2447 src->sa_len != sizeof(struct sockaddr_in6)) {
2448 error = EINVAL;
2449 goto out_err;
2450 }
2451
2452 MALLOC(mpts->mpts_src, struct sockaddr *, src->sa_len, M_SONAME,
813fb2f6
A
2453 M_WAITOK | M_ZERO);
2454 if (mpts->mpts_src == NULL) {
5ba3f43e
A
2455 error = ENOMEM;
2456 goto out_err;
39236c6e 2457 }
0a7de745
A
2458 bcopy(src, mpts->mpts_src, src->sa_len);
2459 }
2460
2461 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
2462 error = EAFNOSUPPORT;
2463 goto out_err;
2464 }
2465
2466 if (dst->sa_family == AF_INET &&
2467 dst->sa_len != sizeof(mpts->__mpts_dst_v4)) {
2468 error = EINVAL;
2469 goto out_err;
2470 }
2471
2472 if (dst->sa_family == AF_INET6 &&
2473 dst->sa_len != sizeof(mpts->__mpts_dst_v6)) {
2474 error = EINVAL;
2475 goto out_err;
39236c6e
A
2476 }
2477
cb323159 2478 memcpy(&mpts->mpts_u_dst, dst, dst->sa_len);
5ba3f43e
A
2479
2480 af = mpts->mpts_dst.sa_family;
2481
0a7de745
A
2482 ifnet_head_lock_shared();
2483 if ((ifscope > (unsigned)if_index)) {
2484 ifnet_head_done();
2485 error = ENXIO;
2486 goto out_err;
2487 }
2488 ifnet_head_done();
2489
5ba3f43e
A
2490 mpts->mpts_ifscope = ifscope;
2491
39236c6e 2492 /* create the subflow socket */
0a7de745 2493 if ((error = mptcp_subflow_socreate(mpte, mpts, af, &so)) != 0) {
5ba3f43e
A
2494 /*
2495 * Returning (error) and not cleaning up, because up to here
2496 * all we did is creating mpts.
2497 *
2498 * And the contract is that the call to mptcp_subflow_socreate,
2499 * moves ownership of mpts to mptcp_subflow_socreate.
2500 */
0a7de745
A
2501 return error;
2502 }
5ba3f43e
A
2503
2504 /*
2505 * We may be called from within the kernel. Still need to account this
2506 * one to the real app.
2507 */
2508 mptcp_update_last_owner(mpts->mpts_socket, mp_so);
39236c6e
A
2509
2510 /*
3e170ce0
A
2511 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
2512 * -1 (SAE_CONNID_ALL).
39236c6e
A
2513 */
2514 mpte->mpte_connid_last++;
3e170ce0 2515 if (mpte->mpte_connid_last == SAE_CONNID_ALL ||
0a7de745 2516 mpte->mpte_connid_last == SAE_CONNID_ANY) {
39236c6e 2517 mpte->mpte_connid_last++;
0a7de745 2518 }
39236c6e
A
2519
2520 mpts->mpts_connid = mpte->mpte_connid_last;
490019cf
A
2521
2522 mpts->mpts_rel_seq = 1;
2523
fe8ab488
A
2524 /* Allocate a unique address id per subflow */
2525 mpte->mpte_addrid_last++;
0a7de745 2526 if (mpte->mpte_addrid_last == 0) {
fe8ab488 2527 mpte->mpte_addrid_last++;
0a7de745 2528 }
39236c6e 2529
39236c6e 2530 /* register for subflow socket read/write events */
cb323159 2531 sock_setupcalls_locked(so, NULL, NULL, mptcp_subflow_wupcall, mpts, 1);
39236c6e 2532
5ba3f43e
A
2533 /* Register for subflow socket control events */
2534 sock_catchevents_locked(so, mptcp_subflow_eupcall1, mpts,
39236c6e 2535 SO_FILT_HINT_CONNRESET | SO_FILT_HINT_CANTRCVMORE |
5ba3f43e
A
2536 SO_FILT_HINT_TIMEOUT | SO_FILT_HINT_NOSRCADDR |
2537 SO_FILT_HINT_IFDENIED | SO_FILT_HINT_CONNECTED |
2538 SO_FILT_HINT_DISCONNECTED | SO_FILT_HINT_MPFAILOVER |
2539 SO_FILT_HINT_MPSTATUS | SO_FILT_HINT_MUSTRST |
2540 SO_FILT_HINT_MPCANTRCVMORE | SO_FILT_HINT_ADAPTIVE_RTIMO |
cb323159 2541 SO_FILT_HINT_ADAPTIVE_WTIMO | SO_FILT_HINT_MP_SUB_ERROR);
39236c6e
A
2542
2543 /* sanity check */
2544 VERIFY(!(mpts->mpts_flags &
0a7de745 2545 (MPTSF_CONNECTING | MPTSF_CONNECTED | MPTSF_CONNECT_PENDING)));
39236c6e 2546
39236c6e
A
2547 /*
2548 * Indicate to the TCP subflow whether or not it should establish
2549 * the initial MPTCP connection, or join an existing one. Fill
2550 * in the connection request structure with additional info needed
2551 * by the underlying TCP (to be used in the TCP options, etc.)
2552 */
39236c6e 2553 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED && mpte->mpte_numflows == 1) {
5ba3f43e
A
2554 mpts->mpts_flags |= MPTSF_INITIAL_SUB;
2555
39236c6e 2556 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
5ba3f43e 2557 mptcp_init_local_parms(mpte);
39236c6e 2558 }
39236c6e 2559 soisconnecting(mp_so);
5ba3f43e
A
2560
2561 /* If fastopen is requested, set state in mpts */
0a7de745 2562 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
5ba3f43e 2563 mpts->mpts_flags |= MPTSF_TFO_REQD;
0a7de745 2564 }
39236c6e 2565 } else {
0a7de745 2566 if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY)) {
39236c6e 2567 mpts->mpts_flags |= MPTSF_CONNECT_PENDING;
0a7de745 2568 }
490019cf
A
2569 }
2570
39236c6e
A
2571 mpts->mpts_flags |= MPTSF_CONNECTING;
2572
39236c6e 2573 /* connect right away if first attempt, or if join can be done now */
0a7de745 2574 if (!(mpts->mpts_flags & MPTSF_CONNECT_PENDING)) {
39236c6e 2575 error = mptcp_subflow_soconnectx(mpte, mpts);
0a7de745 2576 }
39236c6e 2577
0a7de745 2578 if (error) {
5ba3f43e 2579 goto out_err_close;
0a7de745 2580 }
5ba3f43e 2581
0a7de745 2582 if (pcid) {
5ba3f43e 2583 *pcid = mpts->mpts_connid;
0a7de745 2584 }
5ba3f43e 2585
0a7de745 2586 return 0;
5ba3f43e
A
2587
2588out_err_close:
2589 mptcp_subflow_abort(mpts, error);
2590
0a7de745 2591 return error;
5ba3f43e
A
2592
2593out_err:
0a7de745 2594 if (mpts) {
5ba3f43e 2595 mptcp_subflow_free(mpts);
0a7de745 2596 }
5ba3f43e 2597
0a7de745 2598 return error;
39236c6e
A
2599}
2600
5ba3f43e 2601void
cb323159 2602mptcpstats_update(struct mptcp_itf_stats *stats, const struct mptsub *mpts)
5ba3f43e 2603{
cb323159 2604 int index = mptcpstats_get_index(stats, mpts);
5ba3f43e
A
2605
2606 if (index != -1) {
2607 struct inpcb *inp = sotoinpcb(mpts->mpts_socket);
2608
2609 stats[index].mpis_txbytes += inp->inp_stat->txbytes;
2610 stats[index].mpis_rxbytes += inp->inp_stat->rxbytes;
cb323159
A
2611
2612 stats[index].mpis_wifi_txbytes += inp->inp_wstat->txbytes;
2613 stats[index].mpis_wifi_rxbytes += inp->inp_wstat->rxbytes;
2614
2615 stats[index].mpis_wired_txbytes += inp->inp_Wstat->txbytes;
2616 stats[index].mpis_wired_rxbytes += inp->inp_Wstat->rxbytes;
2617
2618 stats[index].mpis_cell_txbytes += inp->inp_cstat->txbytes;
2619 stats[index].mpis_cell_rxbytes += inp->inp_cstat->rxbytes;
5ba3f43e
A
2620 }
2621}
2622
39236c6e
A
2623/*
2624 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
2625 * will no longer be accessible after a subflow is deleted, thus this
2626 * should occur only after the subflow socket has been disconnected.
39236c6e
A
2627 */
2628void
5ba3f43e 2629mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts)
39236c6e 2630{
5ba3f43e
A
2631 struct socket *mp_so = mptetoso(mpte);
2632 struct socket *so = mpts->mpts_socket;
2633 struct tcpcb *tp = sototcpcb(so);
39037602 2634
cb323159 2635 socket_lock_assert_owned(mp_so);
5ba3f43e
A
2636 VERIFY(mpts->mpts_mpte == mpte);
2637 VERIFY(mpts->mpts_flags & MPTSF_ATTACHED);
2638 VERIFY(mpte->mpte_numflows != 0);
2639 VERIFY(mp_so->so_usecount > 0);
39236c6e 2640
5ba3f43e 2641 mptcpstats_update(mpte->mpte_itfstats, mpts);
cb323159
A
2642
2643 mptcp_unset_cellicon(mpte, mpts, 1);
2644
5ba3f43e
A
2645 mpte->mpte_init_rxbytes = sotoinpcb(so)->inp_stat->rxbytes;
2646 mpte->mpte_init_txbytes = sotoinpcb(so)->inp_stat->txbytes;
39236c6e 2647
39236c6e
A
2648 atomic_bitclear_32(&mpts->mpts_flags, MPTSF_ATTACHED);
2649 TAILQ_REMOVE(&mpte->mpte_subflows, mpts, mpts_entry);
39236c6e 2650 mpte->mpte_numflows--;
0a7de745 2651 if (mpte->mpte_active_sub == mpts) {
fe8ab488 2652 mpte->mpte_active_sub = NULL;
0a7de745 2653 }
39236c6e
A
2654
2655 /*
2656 * Drop references held by this subflow socket; there
2657 * will be no further upcalls made from this point.
2658 */
5ba3f43e
A
2659 sock_setupcalls_locked(so, NULL, NULL, NULL, NULL, 0);
2660 sock_catchevents_locked(so, NULL, NULL, 0);
fe8ab488 2661
39236c6e 2662 mptcp_detach_mptcb_from_subf(mpte->mpte_mptcb, so);
39037602 2663
0a7de745 2664 mp_so->so_usecount--; /* for subflow socket */
39236c6e
A
2665 mpts->mpts_mpte = NULL;
2666 mpts->mpts_socket = NULL;
39236c6e 2667
0a7de745
A
2668 mptcp_subflow_remref(mpts); /* for MPTCP subflow list */
2669 mptcp_subflow_remref(mpts); /* for subflow socket */
5ba3f43e
A
2670
2671 so->so_flags &= ~SOF_MP_SUBFLOW;
2672 tp->t_mptcb = NULL;
2673 tp->t_mpsub = NULL;
2674}
2675
2676void
2677mptcp_subflow_shutdown(struct mptses *mpte, struct mptsub *mpts)
2678{
2679 struct socket *so = mpts->mpts_socket;
2680 struct mptcb *mp_tp = mpte->mpte_mptcb;
2681 int send_dfin = 0;
2682
0a7de745 2683 if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
5ba3f43e 2684 send_dfin = 1;
0a7de745 2685 }
5ba3f43e
A
2686
2687 if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
2688 (so->so_state & SS_ISCONNECTED)) {
2689 mptcplog((LOG_DEBUG, "MPTCP subflow shutdown %s: cid %d fin %d\n",
2690 __func__, mpts->mpts_connid, send_dfin),
2691 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
2692
0a7de745 2693 if (send_dfin) {
5ba3f43e 2694 mptcp_send_dfin(so);
0a7de745 2695 }
5ba3f43e
A
2696 soshutdownlock(so, SHUT_WR);
2697 }
5ba3f43e
A
2698}
2699
2700static void
2701mptcp_subflow_abort(struct mptsub *mpts, int error)
2702{
2703 struct socket *so = mpts->mpts_socket;
2704 struct tcpcb *tp = sototcpcb(so);
2705
0a7de745 2706 if (mpts->mpts_flags & MPTSF_DISCONNECTED) {
5ba3f43e 2707 return;
0a7de745 2708 }
5ba3f43e
A
2709
2710 mptcplog((LOG_DEBUG, "%s aborting connection state %u\n", __func__, tp->t_state),
0a7de745 2711 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 2712
0a7de745 2713 if (tp->t_state != TCPS_CLOSED) {
5ba3f43e 2714 tcp_drop(tp, error);
0a7de745 2715 }
5ba3f43e
A
2716
2717 mptcp_subflow_eupcall1(so, mpts, SO_FILT_HINT_DISCONNECTED);
39236c6e
A
2718}
2719
2720/*
2721 * Disconnect a subflow socket.
2722 */
2723void
5ba3f43e 2724mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts)
39236c6e 2725{
94ff46dc 2726 struct socket *so, *mp_so;
39236c6e
A
2727 struct mptcb *mp_tp;
2728 int send_dfin = 0;
2729
94ff46dc
A
2730 so = mpts->mpts_socket;
2731 mp_tp = mpte->mpte_mptcb;
2732 mp_so = mptetoso(mpte);
2733
2734 socket_lock_assert_owned(mp_so);
39236c6e 2735
0a7de745 2736 if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) {
39236c6e 2737 return;
0a7de745 2738 }
39236c6e 2739
cb323159
A
2740 mptcp_unset_cellicon(mpte, mpts, 1);
2741
39236c6e
A
2742 mpts->mpts_flags |= MPTSF_DISCONNECTING;
2743
0a7de745 2744 if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
39236c6e 2745 send_dfin = 1;
0a7de745 2746 }
39236c6e 2747
39236c6e
A
2748 if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
2749 (so->so_state & SS_ISCONNECTED)) {
a39ff7e2 2750 mptcplog((LOG_DEBUG, "%s: cid %d fin %d\n",
5ba3f43e
A
2751 __func__, mpts->mpts_connid, send_dfin),
2752 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 2753
0a7de745 2754 if (send_dfin) {
39236c6e 2755 mptcp_send_dfin(so);
0a7de745 2756 }
94ff46dc
A
2757
2758 if (mp_so->so_flags & SOF_DEFUNCT) {
2759 errno_t ret;
2760
2761 ret = sosetdefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE);
2762 if (ret == 0) {
2763 ret = sodefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
2764
2765 if (ret != 0) {
2766 os_log_error(mptcp_log_handle, "%s - %lx: sodefunct failed with %d\n",
2767 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
2768 }
2769 } else {
2770 os_log_error(mptcp_log_handle, "%s - %lx: sosetdefunct failed with %d\n",
2771 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
2772 }
2773 } else {
2774 (void) soshutdownlock(so, SHUT_RD);
2775 (void) soshutdownlock(so, SHUT_WR);
2776 (void) sodisconnectlocked(so);
2777 }
39236c6e 2778 }
94ff46dc 2779
39236c6e
A
2780 /*
2781 * Generate a disconnect event for this subflow socket, in case
2782 * the lower layer doesn't do it; this is needed because the
5ba3f43e 2783 * subflow socket deletion relies on it.
39236c6e 2784 */
5ba3f43e 2785 mptcp_subflow_eupcall1(so, mpts, SO_FILT_HINT_DISCONNECTED);
39236c6e
A
2786}
2787
39236c6e
A
2788/*
2789 * Subflow socket input.
39236c6e
A
2790 */
2791static void
2792mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts)
2793{
5ba3f43e 2794 struct socket *mp_so = mptetoso(mpte);
39236c6e
A
2795 struct mbuf *m = NULL;
2796 struct socket *so;
5ba3f43e 2797 int error, wakeup = 0;
39236c6e 2798
5ba3f43e
A
2799 VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_INSIDE_INPUT));
2800 mpte->mpte_mppcb->mpp_flags |= MPP_INSIDE_INPUT;
39236c6e 2801
39037602 2802 DTRACE_MPTCP2(subflow__input, struct mptses *, mpte,
39236c6e
A
2803 struct mptsub *, mpts);
2804
0a7de745 2805 if (!(mpts->mpts_flags & MPTSF_CONNECTED)) {
5ba3f43e 2806 goto out;
0a7de745 2807 }
39236c6e
A
2808
2809 so = mpts->mpts_socket;
2810
2811 error = sock_receive_internal(so, NULL, &m, 0, NULL);
2812 if (error != 0 && error != EWOULDBLOCK) {
cb323159
A
2813 os_log_error(mptcp_log_handle, "%s - %lx: cid %d error %d\n",
2814 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, error);
5ba3f43e
A
2815 if (error == ENODATA) {
2816 /*
2817 * Don't ignore ENODATA so as to discover
2818 * nasty middleboxes.
2819 */
2820 mp_so->so_error = ENODATA;
2821
2822 wakeup = 1;
2823 goto out;
39236c6e 2824 }
39236c6e 2825 } else if (error == 0) {
5ba3f43e 2826 mptcplog((LOG_DEBUG, "%s: cid %d \n", __func__, mpts->mpts_connid),
3e170ce0 2827 MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e
A
2828 }
2829
2830 /* In fallback, make sure to accept data on all but one subflow */
5ba3f43e
A
2831 if (m && (mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
2832 !(mpts->mpts_flags & MPTSF_ACTIVE)) {
2833 mptcplog((LOG_DEBUG, "%s: degraded and got data on non-active flow\n",
2834 __func__), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 2835 m_freem(m);
5ba3f43e 2836 goto out;
39236c6e
A
2837 }
2838
2839 if (m != NULL) {
5ba3f43e 2840 if (IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) {
cb323159 2841 mptcp_set_cellicon(mpte, mpts);
3e170ce0 2842
5ba3f43e
A
2843 mpte->mpte_used_cell = 1;
2844 } else {
cb323159
A
2845 /*
2846 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
2847 * explicitly set the cellicon, then we unset it again.
2848 */
2849 if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) {
2850 mptcp_unset_cellicon(mpte, NULL, 1);
2851 }
5ba3f43e
A
2852
2853 mpte->mpte_used_wifi = 1;
2854 }
3e170ce0 2855
39236c6e 2856 mptcp_input(mpte, m);
39236c6e 2857 }
5ba3f43e 2858
5ba3f43e 2859out:
0a7de745 2860 if (wakeup) {
5ba3f43e 2861 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_RWAKEUP;
0a7de745 2862 }
5ba3f43e 2863
cb323159
A
2864 mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_INSIDE_INPUT);
2865}
2866
2867void
2868mptcp_handle_input(struct socket *so)
2869{
2870 struct mptsub *mpts, *tmpts;
2871 struct mptses *mpte;
2872
2873 if (!(so->so_flags & SOF_MP_SUBFLOW)) {
2874 return;
2875 }
2876
2877 mpts = sototcpcb(so)->t_mpsub;
2878 mpte = mpts->mpts_mpte;
2879
2880 socket_lock_assert_owned(mptetoso(mpte));
2881
2882 if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
2883 if (!(mpte->mpte_mppcb->mpp_flags & MPP_INPUT_HANDLE)) {
2884 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_RWAKEUP;
2885 }
2886 return;
2887 }
2888
2889 mpte->mpte_mppcb->mpp_flags |= MPP_INPUT_HANDLE;
2890 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
2891 if (mpts->mpts_socket->so_usecount == 0) {
2892 /* Will be removed soon by tcp_garbage_collect */
2893 continue;
2894 }
2895
2896 mptcp_subflow_addref(mpts);
2897 mpts->mpts_socket->so_usecount++;
2898
2899 mptcp_subflow_input(mpte, mpts);
2900
2901 mptcp_subflow_remref(mpts); /* ours */
2902
2903 VERIFY(mpts->mpts_socket->so_usecount != 0);
2904 mpts->mpts_socket->so_usecount--;
2905 }
2906
2907 mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_INPUT_HANDLE);
39236c6e
A
2908}
2909
2910/*
2911 * Subflow socket write upcall.
2912 *
5ba3f43e 2913 * Called when the associated subflow socket posted a read event.
39236c6e
A
2914 */
2915static void
2916mptcp_subflow_wupcall(struct socket *so, void *arg, int waitf)
2917{
2918#pragma unused(so, waitf)
2919 struct mptsub *mpts = arg;
2920 struct mptses *mpte = mpts->mpts_mpte;
2921
5ba3f43e
A
2922 VERIFY(mpte != NULL);
2923
2924 if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
0a7de745 2925 if (!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL)) {
5ba3f43e 2926 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WWAKEUP;
0a7de745 2927 }
fe8ab488 2928 return;
5ba3f43e 2929 }
39236c6e 2930
5ba3f43e 2931 mptcp_output(mpte);
39236c6e
A
2932}
2933
a39ff7e2
A
2934static boolean_t
2935mptcp_search_seq_in_sub(struct mbuf *m, struct socket *so)
2936{
2937 struct mbuf *so_m = so->so_snd.sb_mb;
2938 uint64_t dsn = m->m_pkthdr.mp_dsn;
2939
2940 while (so_m) {
2941 VERIFY(so_m->m_flags & M_PKTHDR);
2942 VERIFY(so_m->m_pkthdr.pkt_flags & PKTF_MPTCP);
2943
2944 /* Part of the segment is covered, don't reinject here */
2945 if (so_m->m_pkthdr.mp_dsn <= dsn &&
0a7de745 2946 so_m->m_pkthdr.mp_dsn + so_m->m_pkthdr.mp_rlen > dsn) {
a39ff7e2 2947 return TRUE;
0a7de745 2948 }
a39ff7e2
A
2949
2950 so_m = so_m->m_next;
2951 }
2952
2953 return FALSE;
2954}
2955
39236c6e
A
2956/*
2957 * Subflow socket output.
2958 *
2959 * Called for sending data from MPTCP to the underlying subflow socket.
2960 */
2961int
5ba3f43e 2962mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts, int flags)
39236c6e 2963{
39236c6e 2964 struct mptcb *mp_tp = mpte->mpte_mptcb;
5ba3f43e
A
2965 struct mbuf *sb_mb, *m, *mpt_mbuf = NULL, *head, *tail;
2966 struct socket *mp_so, *so;
2967 struct tcpcb *tp;
2968 uint64_t mpt_dsn = 0, off = 0;
2969 int sb_cc = 0, error = 0, wakeup = 0;
f427ee49 2970 uint16_t dss_csum;
5ba3f43e
A
2971 uint16_t tot_sent = 0;
2972 boolean_t reinjected = FALSE;
2973
5ba3f43e 2974 mp_so = mptetoso(mpte);
39236c6e 2975 so = mpts->mpts_socket;
5ba3f43e 2976 tp = sototcpcb(so);
39236c6e 2977
cb323159
A
2978 socket_lock_assert_owned(mp_so);
2979
5ba3f43e
A
2980 VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_INSIDE_OUTPUT));
2981 mpte->mpte_mppcb->mpp_flags |= MPP_INSIDE_OUTPUT;
39236c6e 2982
5ba3f43e
A
2983 VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so)));
2984 VERIFY((mpts->mpts_flags & MPTSF_MP_CAPABLE) ||
0a7de745
A
2985 (mpts->mpts_flags & MPTSF_MP_DEGRADED) ||
2986 (mpts->mpts_flags & MPTSF_TFO_REQD));
5ba3f43e 2987 VERIFY(mptcp_subflow_cwnd_space(mpts->mpts_socket) > 0);
39236c6e 2988
5ba3f43e 2989 mptcplog((LOG_DEBUG, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
0a7de745
A
2990 __func__, mpts->mpts_flags, mpte->mpte_flags,
2991 mptcp_subflow_cwnd_space(so)),
2992 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
2993 DTRACE_MPTCP2(subflow__output, struct mptses *, mpte,
2994 struct mptsub *, mpts);
39236c6e
A
2995
2996 /* Remove Addr Option is not sent reliably as per I-D */
2997 if (mpte->mpte_flags & MPTE_SND_REM_ADDR) {
39236c6e 2998 tp->t_rem_aid = mpte->mpte_lost_aid;
5ba3f43e 2999 tp->t_mpflags |= TMPF_SND_REM_ADDR;
39236c6e
A
3000 mpte->mpte_flags &= ~MPTE_SND_REM_ADDR;
3001 }
3002
3003 /*
3004 * The mbuf chains containing the metadata (as well as pointing to
3005 * the user data sitting at the MPTCP output queue) would then be
3006 * sent down to the subflow socket.
3007 *
3008 * Some notes on data sequencing:
3009 *
3010 * a. Each mbuf must be a M_PKTHDR.
3011 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
3012 * in the mbuf pkthdr structure.
3013 * c. Each mbuf containing the MPTCP metadata must have its
3014 * pkt_flags marked with the PKTF_MPTCP flag.
3015 */
3016
0a7de745 3017 if (mpte->mpte_reinjectq) {
5ba3f43e 3018 sb_mb = mpte->mpte_reinjectq;
0a7de745 3019 } else {
5ba3f43e 3020 sb_mb = mp_so->so_snd.sb_mb;
0a7de745 3021 }
5ba3f43e 3022
39236c6e 3023 if (sb_mb == NULL) {
cb323159
A
3024 os_log_error(mptcp_log_handle, "%s - %lx: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u state %u flags %#x\n",
3025 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
3026 (uint32_t)mp_tp->mpt_sndmax, (uint32_t)mp_tp->mpt_sndnxt,
3027 (uint32_t)mp_tp->mpt_snduna, mp_tp->mpt_state, mp_so->so_flags1);
a39ff7e2
A
3028
3029 /* Fix it to prevent looping */
0a7de745 3030 if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
a39ff7e2 3031 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
0a7de745 3032 }
39236c6e
A
3033 goto out;
3034 }
3035
3036 VERIFY(sb_mb->m_pkthdr.pkt_flags & PKTF_MPTCP);
3037
5ba3f43e
A
3038 if (sb_mb->m_pkthdr.mp_rlen == 0 &&
3039 !(so->so_state & SS_ISCONNECTED) &&
3040 (so->so_flags1 & SOF1_PRECONNECT_DATA)) {
3041 tp->t_mpflags |= TMPF_TFO_REQUEST;
3042 goto zero_len_write;
39236c6e
A
3043 }
3044
5ba3f43e
A
3045 mpt_dsn = sb_mb->m_pkthdr.mp_dsn;
3046
3047 /* First, drop acknowledged data */
39236c6e 3048 if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) {
cb323159 3049 os_log_error(mptcp_log_handle, "%s - %lx: dropping data, should have been done earlier "
0a7de745 3050 "dsn %u suna %u reinject? %u\n",
cb323159
A
3051 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), (uint32_t)mpt_dsn,
3052 (uint32_t)mp_tp->mpt_snduna, !!mpte->mpte_reinjectq);
5ba3f43e
A
3053 if (mpte->mpte_reinjectq) {
3054 mptcp_clean_reinjectq(mpte);
3055 } else {
3056 uint64_t len = 0;
3057 len = mp_tp->mpt_snduna - mpt_dsn;
3058 sbdrop(&mp_so->so_snd, (int)len);
3059 wakeup = 1;
3060 }
3061 }
3062
3063 /* Check again because of above sbdrop */
3064 if (mp_so->so_snd.sb_mb == NULL && mpte->mpte_reinjectq == NULL) {
cb323159
A
3065 os_log_error(mptcp_log_handle, "%s - $%lx: send-buffer is empty\n",
3066 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e 3067 goto out;
39236c6e
A
3068 }
3069
3070 /*
3071 * In degraded mode, we don't receive data acks, so force free
3072 * mbufs less than snd_nxt
3073 */
39236c6e 3074 if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
fe8ab488 3075 (mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) &&
5ba3f43e
A
3076 mp_so->so_snd.sb_mb) {
3077 mpt_dsn = mp_so->so_snd.sb_mb->m_pkthdr.mp_dsn;
3078 if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) {
3079 uint64_t len = 0;
3080 len = mp_tp->mpt_snduna - mpt_dsn;
3081 sbdrop(&mp_so->so_snd, (int)len);
3082 wakeup = 1;
3083
cb323159
A
3084 os_log_error(mptcp_log_handle, "%s - %lx: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
3085 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
3086 (uint32_t)mpt_dsn, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_snduna);
5ba3f43e 3087 }
39236c6e
A
3088 }
3089
fe8ab488
A
3090 if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
3091 !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC)) {
3092 mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC;
3093 so->so_flags1 |= SOF1_POST_FALLBACK_SYNC;
39236c6e
A
3094 }
3095
3096 /*
3097 * Adjust the top level notion of next byte used for retransmissions
3098 * and sending FINs.
3099 */
0a7de745 3100 if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
39236c6e 3101 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
0a7de745 3102 }
39236c6e
A
3103
3104 /* Now determine the offset from which to start transmitting data */
0a7de745 3105 if (mpte->mpte_reinjectq) {
5ba3f43e 3106 sb_mb = mpte->mpte_reinjectq;
0a7de745 3107 } else {
a39ff7e2 3108dont_reinject:
5ba3f43e 3109 sb_mb = mp_so->so_snd.sb_mb;
0a7de745 3110 }
39236c6e 3111 if (sb_mb == NULL) {
cb323159
A
3112 os_log_error(mptcp_log_handle, "%s - %lx: send-buffer is still empty\n", __func__,
3113 (unsigned long)VM_KERNEL_ADDRPERM(mpte));
39236c6e
A
3114 goto out;
3115 }
5ba3f43e 3116
a39ff7e2 3117 if (sb_mb == mpte->mpte_reinjectq) {
5ba3f43e 3118 sb_cc = sb_mb->m_pkthdr.mp_rlen;
a39ff7e2
A
3119 off = 0;
3120
3121 if (mptcp_search_seq_in_sub(sb_mb, so)) {
3122 if (mptcp_can_send_more(mp_tp, TRUE)) {
3123 goto dont_reinject;
3124 }
3125
3126 error = ECANCELED;
3127 goto out;
3128 }
3129
3130 reinjected = TRUE;
5ba3f43e
A
3131 } else if (flags & MPTCP_SUBOUT_PROBING) {
3132 sb_cc = sb_mb->m_pkthdr.mp_rlen;
3133 off = 0;
39236c6e 3134 } else {
5ba3f43e
A
3135 sb_cc = min(mp_so->so_snd.sb_cc, mp_tp->mpt_sndwnd);
3136
3137 /*
3138 * With TFO, there might be no data at all, thus still go into this
3139 * code-path here.
3140 */
3141 if ((mp_so->so_flags1 & SOF1_PRECONNECT_DATA) ||
3142 MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) {
3143 off = mp_tp->mpt_sndnxt - mp_tp->mpt_snduna;
3144 sb_cc -= off;
3145 } else {
cb323159
A
3146 os_log_error(mptcp_log_handle, "%s - %lx: this should not happen: sndnxt %u sndmax %u\n",
3147 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), (uint32_t)mp_tp->mpt_sndnxt,
3148 (uint32_t)mp_tp->mpt_sndmax);
5ba3f43e
A
3149
3150 goto out;
3151 }
39236c6e 3152 }
39236c6e 3153
5ba3f43e
A
3154 sb_cc = min(sb_cc, mptcp_subflow_cwnd_space(so));
3155 if (sb_cc <= 0) {
cb323159
A
3156 os_log_error(mptcp_log_handle, "%s - %lx: sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
3157 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), sb_cc, mp_so->so_snd.sb_cc, mp_tp->mpt_sndwnd,
0a7de745 3158 (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_sndmax,
cb323159 3159 mptcp_subflow_cwnd_space(so));
5ba3f43e
A
3160 }
3161
3162 sb_cc = min(sb_cc, UINT16_MAX);
3163
3164 /*
3165 * Create a DSN mapping for the data we are about to send. It all
3166 * has the same mapping.
3167 */
0a7de745 3168 if (reinjected) {
5ba3f43e 3169 mpt_dsn = sb_mb->m_pkthdr.mp_dsn;
0a7de745 3170 } else {
5ba3f43e 3171 mpt_dsn = mp_tp->mpt_snduna + off;
0a7de745 3172 }
39236c6e 3173
5ba3f43e 3174 mpt_mbuf = sb_mb;
a39ff7e2 3175 while (mpt_mbuf && reinjected == FALSE &&
0a7de745
A
3176 (mpt_mbuf->m_pkthdr.mp_rlen == 0 ||
3177 mpt_mbuf->m_pkthdr.mp_rlen <= (uint32_t)off)) {
39236c6e
A
3178 off -= mpt_mbuf->m_pkthdr.mp_rlen;
3179 mpt_mbuf = mpt_mbuf->m_next;
39236c6e 3180 }
0a7de745 3181 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
5ba3f43e
A
3182 mptcplog((LOG_DEBUG, "%s: %u snduna = %u sndnxt = %u probe %d\n",
3183 __func__, mpts->mpts_connid, (uint32_t)mp_tp->mpt_snduna, (uint32_t)mp_tp->mpt_sndnxt,
3e170ce0 3184 mpts->mpts_probecnt),
5ba3f43e 3185 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
0a7de745 3186 }
39236c6e 3187
ecc0ceb4 3188 VERIFY((mpt_mbuf == NULL) || (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP));
39236c6e 3189
fe8ab488
A
3190 head = tail = NULL;
3191
39236c6e 3192 while (tot_sent < sb_cc) {
f427ee49 3193 int32_t mlen;
39236c6e 3194
5ba3f43e 3195 mlen = mpt_mbuf->m_len;
39236c6e 3196 mlen -= off;
f427ee49 3197 mlen = MIN(mlen, sb_cc - tot_sent);
39236c6e 3198
5ba3f43e 3199 if (mlen < 0) {
cb323159 3200 os_log_error(mptcp_log_handle, "%s - %lx: mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
f427ee49 3201 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mlen, mpt_mbuf->m_pkthdr.mp_rlen,
cb323159 3202 (uint32_t)off, sb_cc, tot_sent);
5ba3f43e 3203 goto out;
39236c6e
A
3204 }
3205
0a7de745 3206 if (mlen == 0) {
5ba3f43e 3207 goto next;
0a7de745 3208 }
5ba3f43e 3209
fe8ab488
A
3210 m = m_copym_mode(mpt_mbuf, (int)off, mlen, M_DONTWAIT,
3211 M_COPYM_MUST_COPY_HDR);
39236c6e 3212 if (m == NULL) {
cb323159
A
3213 os_log_error(mptcp_log_handle, "%s - %lx: m_copym_mode failed\n", __func__,
3214 (unsigned long)VM_KERNEL_ADDRPERM(mpte));
39236c6e
A
3215 error = ENOBUFS;
3216 break;
3217 }
3218
3219 /* Create a DSN mapping for the data (m_copym does it) */
fe8ab488 3220 VERIFY(m->m_flags & M_PKTHDR);
5ba3f43e
A
3221 VERIFY(m->m_next == NULL);
3222
39236c6e
A
3223 m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
3224 m->m_pkthdr.pkt_flags &= ~PKTF_MPSO;
5ba3f43e 3225 m->m_pkthdr.mp_dsn = mpt_dsn;
39236c6e 3226 m->m_pkthdr.mp_rseq = mpts->mpts_rel_seq;
39236c6e
A
3227 m->m_pkthdr.len = mlen;
3228
fe8ab488 3229 if (head == NULL) {
0a7de745 3230 head = tail = m;
fe8ab488
A
3231 } else {
3232 tail->m_next = m;
3233 tail = m;
3234 }
3235
fe8ab488
A
3236 tot_sent += mlen;
3237 off = 0;
5ba3f43e 3238next:
fe8ab488
A
3239 mpt_mbuf = mpt_mbuf->m_next;
3240 }
3241
a39ff7e2 3242 if (reinjected) {
5ba3f43e
A
3243 if (sb_cc < sb_mb->m_pkthdr.mp_rlen) {
3244 struct mbuf *n = sb_mb;
3245
3246 while (n) {
3247 n->m_pkthdr.mp_dsn += sb_cc;
3248 n->m_pkthdr.mp_rlen -= sb_cc;
3249 n = n->m_next;
3250 }
3251 m_adj(sb_mb, sb_cc);
3252 } else {
3253 mpte->mpte_reinjectq = sb_mb->m_nextpkt;
3254 m_freem(sb_mb);
3255 }
3256 }
3257
3258 mptcplog((LOG_DEBUG, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
0a7de745
A
3259 __func__, (uint32_t)mpt_dsn, mpts->mpts_rel_seq,
3260 tot_sent, mpts->mpts_connid), MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3261
3262 if (head && (mp_tp->mpt_flags & MPTCPF_CHECKSUM)) {
3263 dss_csum = mptcp_output_csum(head, mpt_dsn, mpts->mpts_rel_seq,
0a7de745 3264 tot_sent);
5ba3f43e
A
3265 }
3266
3267 /* Now, let's update rel-seq and the data-level length */
3268 mpts->mpts_rel_seq += tot_sent;
3269 m = head;
3270 while (m) {
0a7de745 3271 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
5ba3f43e 3272 m->m_pkthdr.mp_csum = dss_csum;
0a7de745 3273 }
5ba3f43e
A
3274 m->m_pkthdr.mp_rlen = tot_sent;
3275 m = m->m_next;
3276 }
3277
3278 if (head != NULL) {
490019cf 3279 if ((mpts->mpts_flags & MPTSF_TFO_REQD) &&
0a7de745 3280 (tp->t_tfo_stats == 0)) {
39037602 3281 tp->t_mpflags |= TMPF_TFO_REQUEST;
0a7de745 3282 }
fe8ab488
A
3283
3284 error = sock_sendmbuf(so, NULL, head, 0, NULL);
3285
5ba3f43e 3286 DTRACE_MPTCP7(send, struct mbuf *, m, struct socket *, so,
39236c6e
A
3287 struct sockbuf *, &so->so_rcv,
3288 struct sockbuf *, &so->so_snd,
3289 struct mptses *, mpte, struct mptsub *, mpts,
fe8ab488
A
3290 size_t, tot_sent);
3291 }
3292
5ba3f43e
A
3293done_sending:
3294 if (error == 0 ||
3295 (error == EWOULDBLOCK && (tp->t_mpflags & TMPF_TFO_REQUEST))) {
3296 uint64_t new_sndnxt = mp_tp->mpt_sndnxt + tot_sent;
3e170ce0
A
3297
3298 if (mpts->mpts_probesoon && mpts->mpts_maxseg && tot_sent) {
3299 tcpstat.tcps_mp_num_probes++;
0a7de745 3300 if ((uint32_t)tot_sent < mpts->mpts_maxseg) {
3e170ce0 3301 mpts->mpts_probecnt += 1;
0a7de745 3302 } else {
3e170ce0 3303 mpts->mpts_probecnt +=
0a7de745
A
3304 tot_sent / mpts->mpts_maxseg;
3305 }
3e170ce0
A
3306 }
3307
5ba3f43e
A
3308 if (!reinjected && !(flags & MPTCP_SUBOUT_PROBING)) {
3309 if (MPTCP_DATASEQ_HIGH32(new_sndnxt) >
0a7de745 3310 MPTCP_DATASEQ_HIGH32(mp_tp->mpt_sndnxt)) {
39236c6e 3311 mp_tp->mpt_flags |= MPTCPF_SND_64BITDSN;
0a7de745 3312 }
5ba3f43e 3313 mp_tp->mpt_sndnxt = new_sndnxt;
39236c6e 3314 }
fe8ab488 3315
5ba3f43e 3316 mptcp_cancel_timer(mp_tp, MPTT_REXMT);
490019cf 3317
5ba3f43e
A
3318 /* Must be here as mptcp_can_send_more() checks for this */
3319 soclearfastopen(mp_so);
39236c6e 3320
3e170ce0 3321 if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) ||
0a7de745 3322 (mpts->mpts_probesoon != 0)) {
5ba3f43e
A
3323 mptcplog((LOG_DEBUG, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
3324 __func__, mpts->mpts_connid,
3325 !!(mpts->mpts_flags & MPTSF_MP_DEGRADED),
3326 tot_sent, (int) sb_cc, mpts->mpts_probecnt,
3e170ce0 3327 (tcp_now - mpts->mpts_probesoon)),
5ba3f43e 3328 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
0a7de745 3329 }
5ba3f43e
A
3330
3331 if (IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) {
cb323159 3332 mptcp_set_cellicon(mpte, mpts);
5ba3f43e
A
3333
3334 mpte->mpte_used_cell = 1;
3335 } else {
cb323159
A
3336 /*
3337 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
3338 * explicitly set the cellicon, then we unset it again.
3339 */
3340 if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) {
3341 mptcp_unset_cellicon(mpte, NULL, 1);
3342 }
5ba3f43e
A
3343
3344 mpte->mpte_used_wifi = 1;
3345 }
3346
3347 /*
3348 * Don't propagate EWOULDBLOCK - it's already taken care of
3349 * in mptcp_usr_send for TFO.
3350 */
3351 error = 0;
fe8ab488 3352 } else {
cb323159
A
3353 os_log_error(mptcp_log_handle, "%s - %lx: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
3354 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, error, tot_sent, so->so_flags, so->so_state, so->so_error, so->so_snd.sb_hiwat, so->so_snd.sb_lowat);
39236c6e
A
3355 }
3356out:
5ba3f43e 3357
0a7de745 3358 if (wakeup) {
5ba3f43e 3359 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WWAKEUP;
0a7de745 3360 }
39037602 3361
5ba3f43e 3362 mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_INSIDE_OUTPUT);
0a7de745 3363 return error;
5ba3f43e
A
3364
3365zero_len_write:
3366 /* Opting to call pru_send as no mbuf at subflow level */
3367 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, NULL, NULL,
0a7de745 3368 NULL, current_proc());
5ba3f43e
A
3369
3370 goto done_sending;
39236c6e
A
3371}
3372
39236c6e 3373static void
5ba3f43e 3374mptcp_add_reinjectq(struct mptses *mpte, struct mbuf *m)
39236c6e 3375{
5ba3f43e 3376 struct mbuf *n, *prev = NULL;
39236c6e 3377
5ba3f43e 3378 mptcplog((LOG_DEBUG, "%s reinjecting dsn %u dlen %u rseq %u\n",
0a7de745
A
3379 __func__, (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
3380 m->m_pkthdr.mp_rseq),
3381 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3382
3383 n = mpte->mpte_reinjectq;
3384
3385 /* First, look for an mbuf n, whose data-sequence-number is bigger or
3386 * equal than m's sequence number.
3387 */
3388 while (n) {
0a7de745 3389 if (MPTCP_SEQ_GEQ(n->m_pkthdr.mp_dsn, m->m_pkthdr.mp_dsn)) {
5ba3f43e 3390 break;
0a7de745 3391 }
5ba3f43e
A
3392
3393 prev = n;
3394
3395 n = n->m_nextpkt;
3396 }
3397
3398 if (n) {
3399 /* m is already fully covered by the next mbuf in the queue */
3400 if (n->m_pkthdr.mp_dsn == m->m_pkthdr.mp_dsn &&
3401 n->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_rlen) {
3402 mptcplog((LOG_DEBUG, "%s fully covered with len %u\n",
0a7de745
A
3403 __func__, n->m_pkthdr.mp_rlen),
3404 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3405 goto dont_queue;
3406 }
3407
3408 /* m is covering the next mbuf entirely, thus we remove this guy */
3409 if (m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen >= n->m_pkthdr.mp_dsn + n->m_pkthdr.mp_rlen) {
3410 struct mbuf *tmp = n->m_nextpkt;
3411
3412 mptcplog((LOG_DEBUG, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
f427ee49
A
3413 __func__, (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
3414 (uint32_t)n->m_pkthdr.mp_dsn, n->m_pkthdr.mp_rlen),
0a7de745 3415 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3416
3417 m->m_nextpkt = NULL;
0a7de745 3418 if (prev == NULL) {
5ba3f43e 3419 mpte->mpte_reinjectq = tmp;
0a7de745 3420 } else {
5ba3f43e 3421 prev->m_nextpkt = tmp;
0a7de745 3422 }
5ba3f43e
A
3423
3424 m_freem(n);
3425 n = tmp;
3426 }
5ba3f43e
A
3427 }
3428
3429 if (prev) {
3430 /* m is already fully covered by the previous mbuf in the queue */
3431 if (prev->m_pkthdr.mp_dsn + prev->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_dsn + m->m_pkthdr.len) {
3432 mptcplog((LOG_DEBUG, "%s prev covers us from %u with len %u\n",
f427ee49 3433 __func__, (uint32_t)prev->m_pkthdr.mp_dsn, prev->m_pkthdr.mp_rlen),
0a7de745 3434 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3435 goto dont_queue;
3436 }
3437 }
3438
0a7de745 3439 if (prev == NULL) {
5ba3f43e 3440 mpte->mpte_reinjectq = m;
0a7de745 3441 } else {
5ba3f43e 3442 prev->m_nextpkt = m;
0a7de745 3443 }
39236c6e 3444
5ba3f43e
A
3445 m->m_nextpkt = n;
3446
3447 return;
3448
3449dont_queue:
3450 m_freem(m);
3451 return;
39236c6e
A
3452}
3453
5ba3f43e
A
3454static struct mbuf *
3455mptcp_lookup_dsn(struct mptses *mpte, uint64_t dsn)
39236c6e 3456{
5ba3f43e
A
3457 struct socket *mp_so = mptetoso(mpte);
3458 struct mbuf *m;
39236c6e 3459
5ba3f43e 3460 m = mp_so->so_snd.sb_mb;
39236c6e 3461
5ba3f43e
A
3462 while (m) {
3463 /* If this segment covers what we are looking for, return it. */
3464 if (MPTCP_SEQ_LEQ(m->m_pkthdr.mp_dsn, dsn) &&
0a7de745 3465 MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen, dsn)) {
5ba3f43e 3466 break;
0a7de745 3467 }
5ba3f43e
A
3468
3469
3470 /* Segment is no more in the queue */
0a7de745 3471 if (MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn, dsn)) {
5ba3f43e 3472 return NULL;
0a7de745 3473 }
5ba3f43e
A
3474
3475 m = m->m_next;
39236c6e
A
3476 }
3477
5ba3f43e
A
3478 return m;
3479}
fe8ab488 3480
5ba3f43e 3481static struct mbuf *
cb323159 3482mptcp_copy_mbuf_list(struct mptses *mpte, struct mbuf *m, int len)
5ba3f43e
A
3483{
3484 struct mbuf *top = NULL, *tail = NULL;
3485 uint64_t dsn;
3486 uint32_t dlen, rseq;
39236c6e 3487
5ba3f43e
A
3488 dsn = m->m_pkthdr.mp_dsn;
3489 dlen = m->m_pkthdr.mp_rlen;
3490 rseq = m->m_pkthdr.mp_rseq;
3e170ce0 3491
5ba3f43e
A
3492 while (len > 0) {
3493 struct mbuf *n;
3494
3495 VERIFY((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP));
3496
3497 n = m_copym_mode(m, 0, m->m_len, M_DONTWAIT, M_COPYM_MUST_COPY_HDR);
3498 if (n == NULL) {
cb323159
A
3499 os_log_error(mptcp_log_handle, "%s - %lx: m_copym_mode returned NULL\n",
3500 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e 3501 goto err;
3e170ce0 3502 }
fe8ab488 3503
5ba3f43e
A
3504 VERIFY(n->m_flags & M_PKTHDR);
3505 VERIFY(n->m_next == NULL);
3506 VERIFY(n->m_pkthdr.mp_dsn == dsn);
3507 VERIFY(n->m_pkthdr.mp_rlen == dlen);
3508 VERIFY(n->m_pkthdr.mp_rseq == rseq);
3509 VERIFY(n->m_len == m->m_len);
3510
3511 n->m_pkthdr.pkt_flags |= (PKTF_MPSO | PKTF_MPTCP);
3512
0a7de745 3513 if (top == NULL) {
5ba3f43e 3514 top = n;
0a7de745 3515 }
5ba3f43e 3516
0a7de745 3517 if (tail != NULL) {
5ba3f43e 3518 tail->m_next = n;
0a7de745 3519 }
5ba3f43e
A
3520
3521 tail = n;
3522
3523 len -= m->m_len;
3524 m = m->m_next;
39236c6e
A
3525 }
3526
5ba3f43e
A
3527 return top;
3528
3529err:
0a7de745 3530 if (top) {
5ba3f43e 3531 m_freem(top);
0a7de745 3532 }
5ba3f43e
A
3533
3534 return NULL;
39236c6e
A
3535}
3536
5ba3f43e
A
3537static void
3538mptcp_reinject_mbufs(struct socket *so)
39236c6e 3539{
5ba3f43e
A
3540 struct tcpcb *tp = sototcpcb(so);
3541 struct mptsub *mpts = tp->t_mpsub;
3542 struct mptcb *mp_tp = tptomptp(tp);
3543 struct mptses *mpte = mp_tp->mpt_mpte;;
3544 struct sockbuf *sb = &so->so_snd;
3545 struct mbuf *m;
39236c6e 3546
5ba3f43e
A
3547 m = sb->sb_mb;
3548 while (m) {
3549 struct mbuf *n = m->m_next, *orig = m;
39236c6e 3550
5ba3f43e 3551 mptcplog((LOG_DEBUG, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
0a7de745
A
3552 __func__, tp->snd_una, m->m_pkthdr.mp_rseq, mpts->mpts_iss,
3553 m->m_pkthdr.mp_rlen, m->m_pkthdr.pkt_flags),
3554 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 3555
5ba3f43e 3556 VERIFY((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP));
39236c6e 3557
0a7de745 3558 if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_REINJ) {
5ba3f43e 3559 goto next;
0a7de745 3560 }
39236c6e 3561
5ba3f43e 3562 /* Has it all already been acknowledged at the data-level? */
0a7de745 3563 if (MPTCP_SEQ_GEQ(mp_tp->mpt_snduna, m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen)) {
5ba3f43e 3564 goto next;
0a7de745 3565 }
5ba3f43e
A
3566
3567 /* Part of this has already been acknowledged - lookup in the
3568 * MPTCP-socket for the segment.
3569 */
3570 if (SEQ_GT(tp->snd_una - mpts->mpts_iss, m->m_pkthdr.mp_rseq)) {
3571 m = mptcp_lookup_dsn(mpte, m->m_pkthdr.mp_dsn);
0a7de745 3572 if (m == NULL) {
5ba3f43e 3573 goto next;
0a7de745 3574 }
5ba3f43e
A
3575 }
3576
3577 /* Copy the mbuf with headers (aka, DSN-numbers) */
cb323159 3578 m = mptcp_copy_mbuf_list(mpte, m, m->m_pkthdr.mp_rlen);
0a7de745 3579 if (m == NULL) {
5ba3f43e 3580 break;
0a7de745 3581 }
5ba3f43e
A
3582
3583 VERIFY(m->m_nextpkt == NULL);
3584
3585 /* Now, add to the reinject-queue, eliminating overlapping
3586 * segments
3587 */
3588 mptcp_add_reinjectq(mpte, m);
3589
3590 orig->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
3591
3592next:
3593 /* mp_rlen can cover multiple mbufs, so advance to the end of it. */
3594 while (n) {
3595 VERIFY((n->m_flags & M_PKTHDR) && (n->m_pkthdr.pkt_flags & PKTF_MPTCP));
3596
0a7de745 3597 if (n->m_pkthdr.mp_dsn != orig->m_pkthdr.mp_dsn) {
5ba3f43e 3598 break;
0a7de745 3599 }
5ba3f43e
A
3600
3601 n->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
3602 n = n->m_next;
3603 }
3604
3605 m = n;
39236c6e 3606 }
5ba3f43e 3607}
39236c6e 3608
5ba3f43e
A
3609void
3610mptcp_clean_reinjectq(struct mptses *mpte)
3611{
3612 struct mptcb *mp_tp = mpte->mpte_mptcb;
3613
cb323159 3614 socket_lock_assert_owned(mptetoso(mpte));
5ba3f43e
A
3615
3616 while (mpte->mpte_reinjectq) {
3617 struct mbuf *m = mpte->mpte_reinjectq;
3618
3619 if (MPTCP_SEQ_GEQ(m->m_pkthdr.mp_dsn, mp_tp->mpt_snduna) ||
0a7de745 3620 MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen, mp_tp->mpt_snduna)) {
5ba3f43e 3621 break;
0a7de745 3622 }
5ba3f43e
A
3623
3624 mpte->mpte_reinjectq = m->m_nextpkt;
3625 m->m_nextpkt = NULL;
3626 m_freem(m);
3627 }
39236c6e
A
3628}
3629
3630/*
5ba3f43e 3631 * Subflow socket control event upcall.
39236c6e 3632 */
5ba3f43e 3633static void
f427ee49 3634mptcp_subflow_eupcall1(struct socket *so, void *arg, long events)
39236c6e 3635{
5ba3f43e
A
3636#pragma unused(so)
3637 struct mptsub *mpts = arg;
3638 struct mptses *mpte = mpts->mpts_mpte;
39236c6e 3639
cb323159 3640 socket_lock_assert_owned(mptetoso(mpte));
39236c6e 3641
0a7de745 3642 if ((mpts->mpts_evctl & events) == events) {
5ba3f43e 3643 return;
0a7de745 3644 }
39236c6e 3645
5ba3f43e
A
3646 mpts->mpts_evctl |= events;
3647
3648 if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
3649 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WORKLOOP;
3650 return;
39037602 3651 }
39236c6e 3652
5ba3f43e 3653 mptcp_subflow_workloop(mpte);
39236c6e
A
3654}
3655
3656/*
5ba3f43e
A
3657 * Subflow socket control events.
3658 *
3659 * Called for handling events related to the underlying subflow socket.
39236c6e
A
3660 */
3661static ev_ret_t
5ba3f43e 3662mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts,
f427ee49 3663 long *p_mpsofilt_hint)
39236c6e 3664{
5ba3f43e
A
3665 ev_ret_t ret = MPTS_EVRET_OK;
3666 int i, mpsub_ev_entry_count = sizeof(mpsub_ev_entry_tbl) /
0a7de745 3667 sizeof(mpsub_ev_entry_tbl[0]);
39236c6e 3668
5ba3f43e 3669 /* bail if there's nothing to process */
0a7de745
A
3670 if (!mpts->mpts_evctl) {
3671 return ret;
3672 }
39236c6e 3673
0a7de745
A
3674 if (mpts->mpts_evctl & (SO_FILT_HINT_CONNRESET | SO_FILT_HINT_MUSTRST |
3675 SO_FILT_HINT_CANTSENDMORE | SO_FILT_HINT_TIMEOUT |
3676 SO_FILT_HINT_NOSRCADDR | SO_FILT_HINT_IFDENIED |
5ba3f43e
A
3677 SO_FILT_HINT_DISCONNECTED)) {
3678 mpts->mpts_evctl |= SO_FILT_HINT_MPFAILOVER;
3679 }
3e170ce0 3680
5ba3f43e
A
3681 DTRACE_MPTCP3(subflow__events, struct mptses *, mpte,
3682 struct mptsub *, mpts, uint32_t, mpts->mpts_evctl);
3683
5ba3f43e
A
3684 /*
3685 * Process all the socket filter hints and reset the hint
3686 * once it is handled
3687 */
3688 for (i = 0; i < mpsub_ev_entry_count && mpts->mpts_evctl; i++) {
3689 /*
3690 * Always execute the DISCONNECTED event, because it will wakeup
3691 * the app.
3692 */
3693 if ((mpts->mpts_evctl & mpsub_ev_entry_tbl[i].sofilt_hint_mask) &&
3694 (ret >= MPTS_EVRET_OK ||
0a7de745 3695 mpsub_ev_entry_tbl[i].sofilt_hint_mask == SO_FILT_HINT_DISCONNECTED)) {
5ba3f43e
A
3696 mpts->mpts_evctl &= ~mpsub_ev_entry_tbl[i].sofilt_hint_mask;
3697 ev_ret_t error =
0a7de745 3698 mpsub_ev_entry_tbl[i].sofilt_hint_ev_hdlr(mpte, mpts, p_mpsofilt_hint, mpsub_ev_entry_tbl[i].sofilt_hint_mask);
5ba3f43e
A
3699 ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
3700 }
3701 }
3702
0a7de745 3703 return ret;
39236c6e
A
3704}
3705
39236c6e 3706static ev_ret_t
5ba3f43e 3707mptcp_subflow_propagate_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 3708 long *p_mpsofilt_hint, long event)
39236c6e
A
3709{
3710 struct socket *mp_so, *so;
3711 struct mptcb *mp_tp;
39236c6e 3712
5ba3f43e 3713 mp_so = mptetoso(mpte);
39236c6e
A
3714 mp_tp = mpte->mpte_mptcb;
3715 so = mpts->mpts_socket;
3716
39236c6e 3717 /*
5ba3f43e
A
3718 * We got an event for this subflow that might need to be propagated,
3719 * based on the state of the MPTCP connection.
39236c6e 3720 */
5ba3f43e 3721 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED ||
cb323159 3722 (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) && !(mpts->mpts_flags & MPTSF_MP_READY)) ||
5ba3f43e
A
3723 ((mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && (mpts->mpts_flags & MPTSF_ACTIVE))) {
3724 mp_so->so_error = so->so_error;
3725 *p_mpsofilt_hint |= event;
39236c6e 3726 }
39236c6e 3727
0a7de745 3728 return MPTS_EVRET_OK;
39236c6e
A
3729}
3730
3731/*
3732 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
3733 */
3734static ev_ret_t
3e170ce0 3735mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 3736 long *p_mpsofilt_hint, long event)
39236c6e 3737{
5ba3f43e
A
3738#pragma unused(p_mpsofilt_hint, event)
3739 struct socket *mp_so;
3740 struct tcpcb *tp;
39236c6e 3741
5ba3f43e
A
3742 mp_so = mptetoso(mpte);
3743 tp = intotcpcb(sotoinpcb(mpts->mpts_socket));
39236c6e 3744
39236c6e
A
3745 /*
3746 * This overwrites any previous mpte_lost_aid to avoid storing
3747 * too much state when the typical case has only two subflows.
3748 */
3749 mpte->mpte_flags |= MPTE_SND_REM_ADDR;
3750 mpte->mpte_lost_aid = tp->t_local_aid;
3751
5ba3f43e 3752 mptcplog((LOG_DEBUG, "%s cid %d\n", __func__, mpts->mpts_connid),
0a7de745 3753 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39236c6e
A
3754
3755 /*
3756 * The subflow connection has lost its source address.
39236c6e 3757 */
5ba3f43e 3758 mptcp_subflow_abort(mpts, EADDRNOTAVAIL);
39236c6e 3759
0a7de745 3760 if (mp_so->so_flags & SOF_NOADDRAVAIL) {
5ba3f43e 3761 mptcp_subflow_propagate_ev(mpte, mpts, p_mpsofilt_hint, event);
0a7de745 3762 }
39236c6e 3763
0a7de745 3764 return MPTS_EVRET_DELETE;
39236c6e
A
3765}
3766
cb323159
A
3767static ev_ret_t
3768mptcp_subflow_mpsuberror_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 3769 long *p_mpsofilt_hint, long event)
cb323159
A
3770{
3771#pragma unused(event, p_mpsofilt_hint)
3772 struct socket *so, *mp_so;
3773
3774 so = mpts->mpts_socket;
3775
3776 if (so->so_error != ENODATA) {
3777 return MPTS_EVRET_OK;
3778 }
3779
3780
3781 mp_so = mptetoso(mpte);
3782
3783 mp_so->so_error = ENODATA;
3784
3785 sorwakeup(mp_so);
3786 sowwakeup(mp_so);
3787
3788 return MPTS_EVRET_OK;
3789}
3790
3791
fe8ab488
A
3792/*
3793 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
3794 * indicates that the remote side sent a Data FIN
3795 */
3796static ev_ret_t
3e170ce0 3797mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 3798 long *p_mpsofilt_hint, long event)
fe8ab488 3799{
5ba3f43e 3800#pragma unused(event)
cb323159 3801 struct mptcb *mp_tp = mpte->mpte_mptcb;
fe8ab488 3802
5ba3f43e 3803 mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid),
3e170ce0 3804 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39037602 3805
fe8ab488 3806 /*
0a7de745
A
3807 * We got a Data FIN for the MPTCP connection.
3808 * The FIN may arrive with data. The data is handed up to the
3809 * mptcp socket and the user is notified so that it may close
3810 * the socket if needed.
3811 */
3812 if (mp_tp->mpt_state == MPTCPS_CLOSE_WAIT) {
5ba3f43e 3813 *p_mpsofilt_hint |= SO_FILT_HINT_CANTRCVMORE;
0a7de745 3814 }
39037602 3815
0a7de745 3816 return MPTS_EVRET_OK; /* keep the subflow socket around */
fe8ab488
A
3817}
3818
39236c6e
A
3819/*
3820 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
3821 */
3822static ev_ret_t
3e170ce0 3823mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 3824 long *p_mpsofilt_hint, long event)
39236c6e 3825{
5ba3f43e 3826#pragma unused(event, p_mpsofilt_hint)
39236c6e 3827 struct mptsub *mpts_alt = NULL;
5ba3f43e 3828 struct socket *alt_so = NULL;
39236c6e
A
3829 struct socket *mp_so;
3830 int altpath_exists = 0;
3831
5ba3f43e 3832 mp_so = mptetoso(mpte);
cb323159 3833 os_log_info(mptcp_log_handle, "%s - %lx\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
39236c6e 3834
5ba3f43e 3835 mptcp_reinject_mbufs(mpts->mpts_socket);
39236c6e 3836
cb323159
A
3837 mpts_alt = mptcp_get_subflow(mpte, NULL);
3838
3839 /* If there is no alternate eligible subflow, ignore the failover hint. */
3840 if (mpts_alt == NULL || mpts_alt == mpts) {
3841 os_log(mptcp_log_handle, "%s - %lx no alternate path\n", __func__,
3842 (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e 3843
39236c6e
A
3844 goto done;
3845 }
5ba3f43e 3846
39236c6e 3847 altpath_exists = 1;
5ba3f43e 3848 alt_so = mpts_alt->mpts_socket;
39236c6e 3849 if (mpts_alt->mpts_flags & MPTSF_FAILINGOVER) {
fe8ab488 3850 /* All data acknowledged and no RTT spike */
5ba3f43e 3851 if (alt_so->so_snd.sb_cc == 0 && mptcp_no_rto_spike(alt_so)) {
39236c6e
A
3852 mpts_alt->mpts_flags &= ~MPTSF_FAILINGOVER;
3853 } else {
3854 /* no alternate path available */
3855 altpath_exists = 0;
3856 }
39236c6e 3857 }
39236c6e
A
3858
3859 if (altpath_exists) {
5ba3f43e 3860 mpts_alt->mpts_flags |= MPTSF_ACTIVE;
39236c6e 3861
5ba3f43e 3862 mpte->mpte_active_sub = mpts_alt;
39236c6e
A
3863 mpts->mpts_flags |= MPTSF_FAILINGOVER;
3864 mpts->mpts_flags &= ~MPTSF_ACTIVE;
5ba3f43e 3865
cb323159
A
3866 os_log_info(mptcp_log_handle, "%s - %lx: switched from %d to %d\n",
3867 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, mpts_alt->mpts_connid);
5ba3f43e
A
3868
3869 mptcpstats_inc_switch(mpte, mpts);
3870
3871 sowwakeup(alt_so);
39236c6e 3872 } else {
5ba3f43e 3873 mptcplog((LOG_DEBUG, "%s: no alt cid = %d\n", __func__,
0a7de745
A
3874 mpts->mpts_connid),
3875 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
fe8ab488 3876done:
5ba3f43e 3877 mpts->mpts_socket->so_flags &= ~SOF_MP_TRYFAILOVER;
39236c6e 3878 }
5ba3f43e 3879
0a7de745 3880 return MPTS_EVRET_OK;
39236c6e
A
3881}
3882
3883/*
3884 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
3885 */
3886static ev_ret_t
3e170ce0 3887mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 3888 long *p_mpsofilt_hint, long event)
39236c6e 3889{
5ba3f43e
A
3890 mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__,
3891 mpts->mpts_connid), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39236c6e 3892
39236c6e 3893 /*
5ba3f43e
A
3894 * The subflow connection cannot use the outgoing interface, let's
3895 * close this subflow.
39236c6e 3896 */
5ba3f43e 3897 mptcp_subflow_abort(mpts, EPERM);
39236c6e 3898
5ba3f43e 3899 mptcp_subflow_propagate_ev(mpte, mpts, p_mpsofilt_hint, event);
39236c6e 3900
0a7de745 3901 return MPTS_EVRET_DELETE;
39236c6e
A
3902}
3903
a39ff7e2
A
3904/*
3905 * https://tools.ietf.org/html/rfc6052#section-2
3906 * https://tools.ietf.org/html/rfc6147#section-5.2
3907 */
3908static boolean_t
3909mptcp_desynthesize_ipv6_addr(const struct in6_addr *addr,
0a7de745
A
3910 const struct ipv6_prefix *prefix,
3911 struct in_addr *addrv4)
a39ff7e2
A
3912{
3913 char buf[MAX_IPv4_STR_LEN];
3914 char *ptrv4 = (char *)addrv4;
3915 const char *ptr = (const char *)addr;
3916
0a7de745 3917 if (memcmp(addr, &prefix->ipv6_prefix, prefix->prefix_len) != 0) {
a39ff7e2 3918 return false;
0a7de745 3919 }
a39ff7e2
A
3920
3921 switch (prefix->prefix_len) {
0a7de745
A
3922 case NAT64_PREFIX_LEN_96:
3923 memcpy(ptrv4, ptr + 12, 4);
3924 break;
3925 case NAT64_PREFIX_LEN_64:
3926 memcpy(ptrv4, ptr + 9, 4);
3927 break;
3928 case NAT64_PREFIX_LEN_56:
3929 memcpy(ptrv4, ptr + 7, 1);
3930 memcpy(ptrv4 + 1, ptr + 9, 3);
3931 break;
3932 case NAT64_PREFIX_LEN_48:
3933 memcpy(ptrv4, ptr + 6, 2);
3934 memcpy(ptrv4 + 2, ptr + 9, 2);
3935 break;
3936 case NAT64_PREFIX_LEN_40:
3937 memcpy(ptrv4, ptr + 5, 3);
3938 memcpy(ptrv4 + 3, ptr + 9, 1);
3939 break;
3940 case NAT64_PREFIX_LEN_32:
3941 memcpy(ptrv4, ptr + 4, 4);
3942 break;
3943 default:
3944 panic("NAT64-prefix len is wrong: %u\n",
3945 prefix->prefix_len);
a39ff7e2
A
3946 }
3947
3948 os_log_info(mptcp_log_handle, "%s desynthesized to %s\n", __func__,
0a7de745 3949 inet_ntop(AF_INET, (void *)addrv4, buf, sizeof(buf)));
a39ff7e2
A
3950
3951 return true;
3952}
3953
3954static void
3955mptcp_handle_ipv6_connection(struct mptses *mpte, const struct mptsub *mpts)
3956{
3957 struct ipv6_prefix nat64prefixes[NAT64_MAX_NUM_PREFIXES];
3958 struct socket *so = mpts->mpts_socket;
3959 struct ifnet *ifp;
3960 int j;
3961
cb323159
A
3962 /* Subflow IPs will be steered directly by the server - no need to
3963 * desynthesize.
3964 */
3965 if (mpte->mpte_flags & MPTE_UNICAST_IP) {
3966 return;
3967 }
3968
a39ff7e2
A
3969 ifp = sotoinpcb(so)->inp_last_outifp;
3970
3971 if (ifnet_get_nat64prefix(ifp, nat64prefixes) == ENOENT) {
3972 mptcp_ask_for_nat64(ifp);
3973 return;
3974 }
3975
3976
3977 for (j = 0; j < NAT64_MAX_NUM_PREFIXES; j++) {
3978 int success;
3979
0a7de745 3980 if (nat64prefixes[j].prefix_len == 0) {
a39ff7e2 3981 continue;
0a7de745 3982 }
a39ff7e2
A
3983
3984 success = mptcp_desynthesize_ipv6_addr(&mpte->__mpte_dst_v6.sin6_addr,
0a7de745
A
3985 &nat64prefixes[j],
3986 &mpte->mpte_dst_v4_nat64.sin_addr);
a39ff7e2
A
3987 if (success) {
3988 mpte->mpte_dst_v4_nat64.sin_len = sizeof(mpte->mpte_dst_v4_nat64);
3989 mpte->mpte_dst_v4_nat64.sin_family = AF_INET;
3990 mpte->mpte_dst_v4_nat64.sin_port = mpte->__mpte_dst_v6.sin6_port;
3991 break;
3992 }
3993 }
3994}
3995
f427ee49
A
3996static void
3997mptcp_try_alternate_port(struct mptses *mpte, struct mptsub *mpts)
3998{
3999 struct inpcb *inp;
4000
4001 if (!mptcp_ok_to_create_subflows(mpte->mpte_mptcb)) {
4002 return;
4003 }
4004
4005 inp = sotoinpcb(mpts->mpts_socket);
4006 if (inp == NULL) {
4007 return;
4008 }
4009
4010 /* Should we try the alternate port? */
4011 if (mpte->mpte_alternate_port &&
4012 inp->inp_fport != mpte->mpte_alternate_port) {
4013 union sockaddr_in_4_6 dst;
4014 struct sockaddr_in *dst_in = (struct sockaddr_in *)&dst;
4015
4016 memcpy(&dst, &mpts->mpts_dst, mpts->mpts_dst.sa_len);
4017
4018 dst_in->sin_port = mpte->mpte_alternate_port;
4019
4020 mptcp_subflow_add(mpte, NULL, (struct sockaddr *)&dst,
4021 mpts->mpts_ifscope, NULL);
4022 } else { /* Else, we tried all we could, mark this interface as non-MPTCP */
4023 unsigned int i;
4024
4025 if (inp->inp_last_outifp == NULL) {
4026 return;
4027 }
4028
4029 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
4030 struct mpt_itf_info *info = &mpte->mpte_itfinfo[i];
4031
4032 if (inp->inp_last_outifp->if_index == info->ifindex) {
4033 info->no_mptcp_support = 1;
4034 break;
4035 }
4036 }
4037 }
4038}
4039
39236c6e
A
4040/*
4041 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
4042 */
4043static ev_ret_t
3e170ce0 4044mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 4045 long *p_mpsofilt_hint, long event)
39236c6e 4046{
5ba3f43e 4047#pragma unused(event, p_mpsofilt_hint)
39236c6e 4048 struct socket *mp_so, *so;
5ba3f43e
A
4049 struct inpcb *inp;
4050 struct tcpcb *tp;
39236c6e 4051 struct mptcb *mp_tp;
5ba3f43e 4052 int af;
39236c6e
A
4053 boolean_t mpok = FALSE;
4054
5ba3f43e
A
4055 mp_so = mptetoso(mpte);
4056 mp_tp = mpte->mpte_mptcb;
39236c6e 4057 so = mpts->mpts_socket;
5ba3f43e
A
4058 tp = sototcpcb(so);
4059 af = mpts->mpts_dst.sa_family;
39236c6e 4060
0a7de745
A
4061 if (mpts->mpts_flags & MPTSF_CONNECTED) {
4062 return MPTS_EVRET_OK;
4063 }
39236c6e
A
4064
4065 if ((mpts->mpts_flags & MPTSF_DISCONNECTED) ||
4066 (mpts->mpts_flags & MPTSF_DISCONNECTING)) {
fe8ab488
A
4067 if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
4068 (so->so_state & SS_ISCONNECTED)) {
0a7de745
A
4069 mptcplog((LOG_DEBUG, "%s: cid %d disconnect before tcp connect\n",
4070 __func__, mpts->mpts_connid),
4071 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
fe8ab488
A
4072 (void) soshutdownlock(so, SHUT_RD);
4073 (void) soshutdownlock(so, SHUT_WR);
4074 (void) sodisconnectlocked(so);
4075 }
0a7de745 4076 return MPTS_EVRET_OK;
39236c6e
A
4077 }
4078
4079 /*
4080 * The subflow connection has been connected. Find out whether it
4081 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
4082 *
4083 * a. If MPTCP connection is not yet established, then this must be
4084 * the first subflow connection. If MPTCP failed to negotiate,
5ba3f43e 4085 * fallback to regular TCP by degrading this subflow.
39236c6e
A
4086 *
4087 * b. If MPTCP connection has been established, then this must be
4088 * one of the subsequent subflow connections. If MPTCP failed
5ba3f43e 4089 * to negotiate, disconnect the connection.
39236c6e
A
4090 *
4091 * Right now, we simply unblock any waiters at the MPTCP socket layer
4092 * if the MPTCP connection has not been established.
4093 */
39236c6e
A
4094
4095 if (so->so_state & SS_ISDISCONNECTED) {
4096 /*
4097 * With MPTCP joins, a connection is connected at the subflow
4098 * level, but the 4th ACK from the server elevates the MPTCP
490019cf
A
4099 * subflow to connected state. So there is a small window
4100 * where the subflow could get disconnected before the
39236c6e
A
4101 * connected event is processed.
4102 */
0a7de745 4103 return MPTS_EVRET_OK;
39236c6e
A
4104 }
4105
0a7de745 4106 if (mpts->mpts_flags & MPTSF_TFO_REQD) {
5ba3f43e 4107 mptcp_drop_tfo_data(mpte, mpts);
0a7de745 4108 }
490019cf 4109
5ba3f43e
A
4110 mpts->mpts_flags &= ~(MPTSF_CONNECTING | MPTSF_TFO_REQD);
4111 mpts->mpts_flags |= MPTSF_CONNECTED;
490019cf 4112
0a7de745 4113 if (tp->t_mpflags & TMPF_MPTCP_TRUE) {
39236c6e 4114 mpts->mpts_flags |= MPTSF_MP_CAPABLE;
0a7de745 4115 }
39236c6e 4116
490019cf
A
4117 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
4118
39236c6e 4119 /* get/verify the outbound interface */
5ba3f43e 4120 inp = sotoinpcb(so);
3e170ce0 4121
5ba3f43e 4122 mpts->mpts_maxseg = tp->t_maxseg;
3e170ce0 4123
5ba3f43e
A
4124 mptcplog((LOG_DEBUG, "%s: cid %d outif %s is %s\n", __func__, mpts->mpts_connid,
4125 ((inp->inp_last_outifp != NULL) ? inp->inp_last_outifp->if_xname : "NULL"),
4126 ((mpts->mpts_flags & MPTSF_MP_CAPABLE) ? "MPTCP capable" : "a regular TCP")),
3e170ce0 4127 (MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG), MPTCP_LOGLVL_LOG);
39236c6e
A
4128
4129 mpok = (mpts->mpts_flags & MPTSF_MP_CAPABLE);
39236c6e 4130
39236c6e 4131 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
5ba3f43e
A
4132 mp_tp->mpt_state = MPTCPS_ESTABLISHED;
4133 mpte->mpte_associd = mpts->mpts_connid;
4134 DTRACE_MPTCP2(state__change,
4135 struct mptcb *, mp_tp,
4136 uint32_t, 0 /* event */);
4137
4138 if (SOCK_DOM(so) == AF_INET) {
4139 in_getsockaddr_s(so, &mpte->__mpte_src_v4);
4140 } else {
4141 in6_getsockaddr_s(so, &mpte->__mpte_src_v6);
4142 }
4143
a39ff7e2
A
4144 mpts->mpts_flags |= MPTSF_ACTIVE;
4145
39236c6e
A
4146 /* case (a) above */
4147 if (!mpok) {
5ba3f43e
A
4148 tcpstat.tcps_mpcap_fallback++;
4149
4150 tp->t_mpflags |= TMPF_INFIN_SENT;
4151 mptcp_notify_mpfail(so);
39236c6e 4152 } else {
5ba3f43e 4153 if (IFNET_IS_CELLULAR(inp->inp_last_outifp) &&
cb323159 4154 mpte->mpte_svctype < MPTCP_SVCTYPE_AGGREGATE) {
5ba3f43e 4155 tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO);
39037602
A
4156 } else {
4157 mpts->mpts_flags |= MPTSF_PREFERRED;
4158 }
39236c6e
A
4159 mpts->mpts_flags |= MPTSF_MPCAP_CTRSET;
4160 mpte->mpte_nummpcapflows++;
5ba3f43e 4161
0a7de745 4162 if (SOCK_DOM(so) == AF_INET6) {
a39ff7e2 4163 mptcp_handle_ipv6_connection(mpte, mpts);
0a7de745 4164 }
a39ff7e2 4165
5ba3f43e
A
4166 mptcp_check_subflows_and_add(mpte);
4167
0a7de745 4168 if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
5ba3f43e 4169 mpte->mpte_initial_cell = 1;
0a7de745 4170 }
5ba3f43e
A
4171
4172 mpte->mpte_handshake_success = 1;
39236c6e 4173 }
5ba3f43e
A
4174
4175 mp_tp->mpt_sndwnd = tp->snd_wnd;
4176 mp_tp->mpt_sndwl1 = mp_tp->mpt_rcvnxt;
4177 mp_tp->mpt_sndwl2 = mp_tp->mpt_snduna;
4178 soisconnected(mp_so);
39236c6e 4179 } else if (mpok) {
39236c6e
A
4180 /*
4181 * case (b) above
4182 * In case of additional flows, the MPTCP socket is not
4183 * MPTSF_MP_CAPABLE until an ACK is received from server
4184 * for 3-way handshake. TCP would have guaranteed that this
4185 * is an MPTCP subflow.
4186 */
5ba3f43e
A
4187 if (IFNET_IS_CELLULAR(inp->inp_last_outifp) &&
4188 !(tp->t_mpflags & TMPF_BACKUP_PATH) &&
cb323159 4189 mpte->mpte_svctype < MPTCP_SVCTYPE_AGGREGATE) {
5ba3f43e
A
4190 tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO);
4191 mpts->mpts_flags &= ~MPTSF_PREFERRED;
4192 } else {
4193 mpts->mpts_flags |= MPTSF_PREFERRED;
4194 }
4195
39236c6e
A
4196 mpts->mpts_flags |= MPTSF_MPCAP_CTRSET;
4197 mpte->mpte_nummpcapflows++;
5ba3f43e
A
4198
4199 mpts->mpts_rel_seq = 1;
4200
4201 mptcp_check_subflows_and_remove(mpte);
fe8ab488 4202 } else {
f427ee49 4203 mptcp_try_alternate_port(mpte, mpts);
5ba3f43e
A
4204
4205 tcpstat.tcps_join_fallback++;
0a7de745 4206 if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
5ba3f43e 4207 tcpstat.tcps_mptcp_cell_proxy++;
0a7de745 4208 } else {
5ba3f43e 4209 tcpstat.tcps_mptcp_wifi_proxy++;
0a7de745 4210 }
5ba3f43e
A
4211
4212 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
4213
0a7de745 4214 return MPTS_EVRET_OK;
39236c6e 4215 }
fe8ab488 4216
5ba3f43e 4217 /* This call, just to "book" an entry in the stats-table for this ifindex */
cb323159 4218 mptcpstats_get_index(mpte->mpte_itfstats, mpts);
5ba3f43e
A
4219
4220 mptcp_output(mpte);
39236c6e 4221
0a7de745 4222 return MPTS_EVRET_OK; /* keep the subflow socket around */
39236c6e
A
4223}
4224
4225/*
4226 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
4227 */
4228static ev_ret_t
3e170ce0 4229mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 4230 long *p_mpsofilt_hint, long event)
39236c6e 4231{
5ba3f43e 4232#pragma unused(event, p_mpsofilt_hint)
39236c6e
A
4233 struct socket *mp_so, *so;
4234 struct mptcb *mp_tp;
39236c6e 4235
5ba3f43e 4236 mp_so = mptetoso(mpte);
39236c6e
A
4237 mp_tp = mpte->mpte_mptcb;
4238 so = mpts->mpts_socket;
4239
5ba3f43e
A
4240 mptcplog((LOG_DEBUG, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
4241 __func__, mpts->mpts_connid, so->so_error, mp_tp->mpt_state,
4242 !!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP),
4243 !!(mpts->mpts_flags & MPTSF_ACTIVE), sototcpcb(so)->t_mpflags),
3e170ce0 4244 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39236c6e 4245
0a7de745
A
4246 if (mpts->mpts_flags & MPTSF_DISCONNECTED) {
4247 return MPTS_EVRET_DELETE;
4248 }
39236c6e 4249
39236c6e
A
4250 mpts->mpts_flags |= MPTSF_DISCONNECTED;
4251
5ba3f43e 4252 /* The subflow connection has been disconnected. */
39236c6e
A
4253
4254 if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) {
4255 mpte->mpte_nummpcapflows--;
fe8ab488
A
4256 if (mpte->mpte_active_sub == mpts) {
4257 mpte->mpte_active_sub = NULL;
5ba3f43e 4258 mptcplog((LOG_DEBUG, "%s: resetting active subflow \n",
3e170ce0 4259 __func__), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
fe8ab488 4260 }
39236c6e 4261 mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET;
f427ee49
A
4262 } else {
4263 if (so->so_flags & SOF_MP_SEC_SUBFLOW &&
4264 !(mpts->mpts_flags & MPTSF_CONNECTED)) {
4265 mptcp_try_alternate_port(mpte, mpts);
4266 }
39236c6e
A
4267 }
4268
5ba3f43e 4269 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED ||
0a7de745 4270 ((mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && (mpts->mpts_flags & MPTSF_ACTIVE))) {
5ba3f43e 4271 mptcp_drop(mpte, mp_tp, so->so_error);
39236c6e
A
4272 }
4273
39236c6e 4274 /*
5ba3f43e
A
4275 * Clear flags that are used by getconninfo to return state.
4276 * Retain like MPTSF_DELETEOK for internal purposes.
39236c6e 4277 */
0a7de745
A
4278 mpts->mpts_flags &= ~(MPTSF_CONNECTING | MPTSF_CONNECT_PENDING |
4279 MPTSF_CONNECTED | MPTSF_DISCONNECTING | MPTSF_PREFERRED |
4280 MPTSF_MP_CAPABLE | MPTSF_MP_READY | MPTSF_MP_DEGRADED | MPTSF_ACTIVE);
5ba3f43e 4281
0a7de745 4282 return MPTS_EVRET_DELETE;
39236c6e
A
4283}
4284
4285/*
4286 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
4287 */
4288static ev_ret_t
3e170ce0 4289mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 4290 long *p_mpsofilt_hint, long event)
39236c6e 4291{
5ba3f43e 4292#pragma unused(event, p_mpsofilt_hint)
cb323159 4293 ev_ret_t ret = MPTS_EVRET_OK;
39236c6e
A
4294 struct socket *mp_so, *so;
4295 struct mptcb *mp_tp;
39236c6e 4296
5ba3f43e 4297 mp_so = mptetoso(mpte);
39236c6e 4298 mp_tp = mpte->mpte_mptcb;
39236c6e
A
4299 so = mpts->mpts_socket;
4300
0a7de745 4301 if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_TRUE) {
39236c6e 4302 mpts->mpts_flags |= MPTSF_MP_CAPABLE;
0a7de745 4303 } else {
39236c6e 4304 mpts->mpts_flags &= ~MPTSF_MP_CAPABLE;
0a7de745 4305 }
39236c6e
A
4306
4307 if (sototcpcb(so)->t_mpflags & TMPF_TCP_FALLBACK) {
0a7de745 4308 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
39236c6e 4309 goto done;
0a7de745 4310 }
39236c6e 4311 mpts->mpts_flags |= MPTSF_MP_DEGRADED;
d9a64523 4312 } else {
39236c6e 4313 mpts->mpts_flags &= ~MPTSF_MP_DEGRADED;
d9a64523 4314 }
39236c6e 4315
0a7de745 4316 if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_READY) {
39236c6e 4317 mpts->mpts_flags |= MPTSF_MP_READY;
0a7de745 4318 } else {
39236c6e 4319 mpts->mpts_flags &= ~MPTSF_MP_READY;
0a7de745 4320 }
39236c6e
A
4321
4322 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
4323 mp_tp->mpt_flags |= MPTCPF_FALLBACK_TO_TCP;
4324 mp_tp->mpt_flags &= ~MPTCPF_JOIN_READY;
4325 }
4326
4327 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
39236c6e 4328 ret = MPTS_EVRET_DISCONNECT_FALLBACK;
d9a64523
A
4329
4330 m_freem_list(mpte->mpte_reinjectq);
4331 mpte->mpte_reinjectq = NULL;
39236c6e
A
4332 } else if (mpts->mpts_flags & MPTSF_MP_READY) {
4333 mp_tp->mpt_flags |= MPTCPF_JOIN_READY;
4334 ret = MPTS_EVRET_CONNECT_PENDING;
4335 }
4336
39236c6e 4337done:
0a7de745 4338 return ret;
39236c6e
A
4339}
4340
4341/*
4342 * Handle SO_FILT_HINT_MUSTRST subflow socket event
4343 */
4344static ev_ret_t
3e170ce0 4345mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 4346 long *p_mpsofilt_hint, long event)
39236c6e 4347{
5ba3f43e 4348#pragma unused(event)
39236c6e
A
4349 struct socket *mp_so, *so;
4350 struct mptcb *mp_tp;
5ba3f43e 4351 boolean_t is_fastclose;
39236c6e 4352
5ba3f43e 4353 mp_so = mptetoso(mpte);
39236c6e
A
4354 mp_tp = mpte->mpte_mptcb;
4355 so = mpts->mpts_socket;
4356
39236c6e 4357 /* We got an invalid option or a fast close */
39236c6e
A
4358 struct inpcb *inp = sotoinpcb(so);
4359 struct tcpcb *tp = NULL;
4360
4361 tp = intotcpcb(inp);
fe8ab488 4362 so->so_error = ECONNABORTED;
39236c6e 4363
39037602
A
4364 is_fastclose = !!(tp->t_mpflags & TMPF_FASTCLOSERCV);
4365
cb323159
A
4366 tp->t_mpflags |= TMPF_RESET;
4367
f427ee49
A
4368 if (tp->t_state != TCPS_CLOSED) {
4369 struct tcptemp *t_template = tcp_maketemplate(tp);
39236c6e 4370
f427ee49
A
4371 if (t_template) {
4372 struct tcp_respond_args tra;
39236c6e 4373
f427ee49
A
4374 bzero(&tra, sizeof(tra));
4375 if (inp->inp_flags & INP_BOUND_IF) {
4376 tra.ifscope = inp->inp_boundifp->if_index;
4377 } else {
4378 tra.ifscope = IFSCOPE_NONE;
4379 }
4380 tra.awdl_unrestricted = 1;
4381
4382 tcp_respond(tp, t_template->tt_ipgen,
4383 &t_template->tt_t, (struct mbuf *)NULL,
4384 tp->rcv_nxt, tp->snd_una, TH_RST, &tra);
4385 (void) m_free(dtom(t_template));
4386 }
39236c6e 4387 }
39037602
A
4388
4389 if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && is_fastclose) {
cb323159
A
4390 struct mptsub *iter, *tmp;
4391
3e170ce0 4392 *p_mpsofilt_hint |= SO_FILT_HINT_CONNRESET;
39236c6e 4393
cb323159
A
4394 mp_so->so_error = ECONNRESET;
4395
4396 TAILQ_FOREACH_SAFE(iter, &mpte->mpte_subflows, mpts_entry, tmp) {
4397 if (iter == mpts) {
4398 continue;
4399 }
4400 mptcp_subflow_abort(iter, ECONNABORTED);
0a7de745 4401 }
39037602
A
4402
4403 /*
4404 * mptcp_drop is being called after processing the events, to fully
4405 * close the MPTCP connection
4406 */
cb323159 4407 mptcp_drop(mpte, mp_tp, mp_so->so_error);
39236c6e 4408 }
39037602 4409
cb323159
A
4410 mptcp_subflow_abort(mpts, ECONNABORTED);
4411
0a7de745 4412 if (mp_tp->mpt_gc_ticks == MPT_GC_TICKS) {
3e170ce0 4413 mp_tp->mpt_gc_ticks = MPT_GC_TICKS_FAST;
0a7de745 4414 }
39236c6e 4415
0a7de745 4416 return MPTS_EVRET_DELETE;
39236c6e
A
4417}
4418
fe8ab488 4419static ev_ret_t
5ba3f43e 4420mptcp_subflow_adaptive_rtimo_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 4421 long *p_mpsofilt_hint, long event)
fe8ab488 4422{
5ba3f43e
A
4423#pragma unused(event)
4424 bool found_active = false;
4425
4426 mpts->mpts_flags |= MPTSF_READ_STALL;
39037602 4427
5ba3f43e
A
4428 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
4429 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
3e170ce0 4430
5ba3f43e 4431 if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 4432 TCPS_HAVERCVDFIN2(tp->t_state)) {
5ba3f43e 4433 continue;
0a7de745 4434 }
5ba3f43e
A
4435
4436 if (!(mpts->mpts_flags & MPTSF_READ_STALL)) {
4437 found_active = true;
4438 break;
fe8ab488 4439 }
fe8ab488
A
4440 }
4441
0a7de745 4442 if (!found_active) {
5ba3f43e 4443 *p_mpsofilt_hint |= SO_FILT_HINT_ADAPTIVE_RTIMO;
0a7de745 4444 }
5ba3f43e 4445
0a7de745 4446 return MPTS_EVRET_OK;
fe8ab488
A
4447}
4448
4449static ev_ret_t
5ba3f43e 4450mptcp_subflow_adaptive_wtimo_ev(struct mptses *mpte, struct mptsub *mpts,
f427ee49 4451 long *p_mpsofilt_hint, long event)
fe8ab488 4452{
5ba3f43e
A
4453#pragma unused(event)
4454 bool found_active = false;
3e170ce0 4455
5ba3f43e 4456 mpts->mpts_flags |= MPTSF_WRITE_STALL;
fe8ab488 4457
5ba3f43e
A
4458 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
4459 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
4460
4461 if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 4462 tp->t_state > TCPS_CLOSE_WAIT) {
5ba3f43e 4463 continue;
0a7de745 4464 }
5ba3f43e
A
4465
4466 if (!(mpts->mpts_flags & MPTSF_WRITE_STALL)) {
4467 found_active = true;
4468 break;
4469 }
4470 }
4471
0a7de745 4472 if (!found_active) {
5ba3f43e 4473 *p_mpsofilt_hint |= SO_FILT_HINT_ADAPTIVE_WTIMO;
0a7de745 4474 }
5ba3f43e 4475
0a7de745 4476 return MPTS_EVRET_OK;
fe8ab488
A
4477}
4478
39236c6e
A
4479/*
4480 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
4481 * caller must ensure that the option can be issued on subflow sockets, via
4482 * MPOF_SUBFLOW_OK flag.
4483 */
4484int
5ba3f43e 4485mptcp_subflow_sosetopt(struct mptses *mpte, struct mptsub *mpts, struct mptopt *mpo)
39236c6e 4486{
5ba3f43e 4487 struct socket *mp_so, *so;
39236c6e 4488 struct sockopt sopt;
39236c6e
A
4489 int error;
4490
4491 VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK);
5ba3f43e
A
4492
4493 mp_so = mptetoso(mpte);
4494 so = mpts->mpts_socket;
4495
cb323159
A
4496 socket_lock_assert_owned(mp_so);
4497
5ba3f43e
A
4498 if (mpte->mpte_mptcb->mpt_state >= MPTCPS_ESTABLISHED &&
4499 mpo->mpo_level == SOL_SOCKET &&
4500 mpo->mpo_name == SO_MARK_CELLFALLBACK) {
d9a64523
A
4501 struct ifnet *ifp = ifindex2ifnet[mpts->mpts_ifscope];
4502
4503 mptcplog((LOG_DEBUG, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %d lastcell? %d boundcell? %d\n",
cb323159 4504 __func__, mpte->mpte_flags, mpte->mpte_svctype, mptcp_is_wifi_unusable_for_session(mpte),
0a7de745
A
4505 sotoinpcb(so)->inp_last_outifp ? IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp) : -1,
4506 mpts->mpts_ifscope != IFSCOPE_NONE && ifp ? IFNET_IS_CELLULAR(ifp) : -1),
4507 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
4508
4509 /*
4510 * When we open a new subflow, mark it as cell fallback, if
4511 * this subflow goes over cell.
4512 *
4513 * (except for first-party apps)
4514 */
4515
0a7de745
A
4516 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
4517 return 0;
4518 }
39236c6e 4519
5ba3f43e 4520 if (sotoinpcb(so)->inp_last_outifp &&
0a7de745
A
4521 !IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) {
4522 return 0;
4523 }
5ba3f43e
A
4524
4525 /*
4526 * This here is an OR, because if the app is not binding to the
4527 * interface, then it definitely is not a cell-fallback
4528 * connection.
4529 */
d9a64523 4530 if (mpts->mpts_ifscope == IFSCOPE_NONE || ifp == NULL ||
0a7de745
A
4531 !IFNET_IS_CELLULAR(ifp)) {
4532 return 0;
4533 }
5ba3f43e
A
4534 }
4535
4536 mpo->mpo_flags &= ~MPOF_INTERIM;
39236c6e 4537
0a7de745 4538 bzero(&sopt, sizeof(sopt));
39236c6e
A
4539 sopt.sopt_dir = SOPT_SET;
4540 sopt.sopt_level = mpo->mpo_level;
4541 sopt.sopt_name = mpo->mpo_name;
4542 sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval);
0a7de745 4543 sopt.sopt_valsize = sizeof(int);
39236c6e
A
4544 sopt.sopt_p = kernproc;
4545
5ba3f43e 4546 error = sosetoptlock(so, &sopt, 0);
cb323159
A
4547 if (error) {
4548 os_log_error(mptcp_log_handle, "%s - %lx: sopt %s "
39236c6e 4549 "val %d set error %d\n", __func__,
cb323159 4550 (unsigned long)VM_KERNEL_ADDRPERM(mpte),
5ba3f43e 4551 mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name),
cb323159 4552 mpo->mpo_intval, error);
39236c6e 4553 }
0a7de745 4554 return error;
39236c6e
A
4555}
4556
4557/*
4558 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
4559 * caller must ensure that the option can be issued on subflow sockets, via
4560 * MPOF_SUBFLOW_OK flag.
4561 */
4562int
4563mptcp_subflow_sogetopt(struct mptses *mpte, struct socket *so,
4564 struct mptopt *mpo)
4565{
4566 struct socket *mp_so;
4567 struct sockopt sopt;
39236c6e
A
4568 int error;
4569
4570 VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK);
5ba3f43e 4571 mp_so = mptetoso(mpte);
39236c6e 4572
cb323159
A
4573 socket_lock_assert_owned(mp_so);
4574
0a7de745 4575 bzero(&sopt, sizeof(sopt));
39236c6e
A
4576 sopt.sopt_dir = SOPT_GET;
4577 sopt.sopt_level = mpo->mpo_level;
4578 sopt.sopt_name = mpo->mpo_name;
4579 sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval);
0a7de745 4580 sopt.sopt_valsize = sizeof(int);
39236c6e
A
4581 sopt.sopt_p = kernproc;
4582
0a7de745 4583 error = sogetoptlock(so, &sopt, 0); /* already locked */
cb323159
A
4584 if (error) {
4585 os_log_error(mptcp_log_handle,
4586 "%s - %lx: sopt %s get error %d\n",
4587 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
4588 mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name), error);
39236c6e 4589 }
0a7de745 4590 return error;
39236c6e
A
4591}
4592
4593
4594/*
4595 * MPTCP garbage collector.
4596 *
4597 * This routine is called by the MP domain on-demand, periodic callout,
4598 * which is triggered when a MPTCP socket is closed. The callout will
4599 * repeat as long as this routine returns a non-zero value.
4600 */
4601static uint32_t
4602mptcp_gc(struct mppcbinfo *mppi)
4603{
4604 struct mppcb *mpp, *tmpp;
4605 uint32_t active = 0;
4606
5ba3f43e 4607 LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED);
39236c6e 4608
39236c6e
A
4609 TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) {
4610 struct socket *mp_so;
4611 struct mptses *mpte;
4612 struct mptcb *mp_tp;
4613
39236c6e 4614 mp_so = mpp->mpp_socket;
39236c6e 4615 mpte = mptompte(mpp);
39236c6e 4616 mp_tp = mpte->mpte_mptcb;
39236c6e 4617
cb323159 4618 if (!mpp_try_lock(mpp)) {
39236c6e
A
4619 active++;
4620 continue;
4621 }
4622
cb323159
A
4623 VERIFY(mpp->mpp_flags & MPP_ATTACHED);
4624
39236c6e 4625 /* check again under the lock */
5ba3f43e 4626 if (mp_so->so_usecount > 0) {
39236c6e
A
4627 boolean_t wakeup = FALSE;
4628 struct mptsub *mpts, *tmpts;
4629
39236c6e 4630 if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) {
0a7de745 4631 if (mp_tp->mpt_gc_ticks > 0) {
39236c6e 4632 mp_tp->mpt_gc_ticks--;
0a7de745 4633 }
39236c6e
A
4634 if (mp_tp->mpt_gc_ticks == 0) {
4635 wakeup = TRUE;
39236c6e
A
4636 }
4637 }
39236c6e
A
4638 if (wakeup) {
4639 TAILQ_FOREACH_SAFE(mpts,
4640 &mpte->mpte_subflows, mpts_entry, tmpts) {
5ba3f43e 4641 mptcp_subflow_eupcall1(mpts->mpts_socket,
39236c6e 4642 mpts, SO_FILT_HINT_DISCONNECTED);
39236c6e
A
4643 }
4644 }
cb323159 4645 socket_unlock(mp_so, 0);
39236c6e
A
4646 active++;
4647 continue;
4648 }
4649
4650 if (mpp->mpp_state != MPPCB_STATE_DEAD) {
cb323159 4651 panic("%s - %lx: skipped state "
0a7de745 4652 "[u=%d,r=%d,s=%d]\n", __func__,
cb323159 4653 (unsigned long)VM_KERNEL_ADDRPERM(mpte),
0a7de745
A
4654 mp_so->so_usecount, mp_so->so_retaincnt,
4655 mpp->mpp_state);
39236c6e
A
4656 }
4657
0a7de745 4658 if (mp_tp->mpt_state == MPTCPS_TIME_WAIT) {
5ba3f43e 4659 mptcp_close(mpte, mp_tp);
0a7de745 4660 }
3e170ce0 4661
5ba3f43e 4662 mptcp_session_destroy(mpte);
39236c6e 4663
39037602 4664 DTRACE_MPTCP4(dispose, struct socket *, mp_so,
39236c6e
A
4665 struct sockbuf *, &mp_so->so_rcv,
4666 struct sockbuf *, &mp_so->so_snd,
4667 struct mppcb *, mpp);
4668
4669 mp_pcbdispose(mpp);
39037602 4670 sodealloc(mp_so);
39236c6e
A
4671 }
4672
0a7de745 4673 return active;
39236c6e
A
4674}
4675
4676/*
4677 * Drop a MPTCP connection, reporting the specified error.
4678 */
4679struct mptses *
f427ee49 4680mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, u_short errno)
39236c6e 4681{
cb323159 4682 struct socket *mp_so = mptetoso(mpte);
39236c6e 4683
39236c6e 4684 VERIFY(mpte->mpte_mptcb == mp_tp);
cb323159
A
4685
4686 socket_lock_assert_owned(mp_so);
39236c6e 4687
39037602 4688 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
39236c6e
A
4689 uint32_t, 0 /* event */);
4690
0a7de745 4691 if (errno == ETIMEDOUT && mp_tp->mpt_softerror != 0) {
39236c6e 4692 errno = mp_tp->mpt_softerror;
0a7de745 4693 }
39236c6e
A
4694 mp_so->so_error = errno;
4695
0a7de745 4696 return mptcp_close(mpte, mp_tp);
39236c6e
A
4697}
4698
4699/*
4700 * Close a MPTCP control block.
4701 */
4702struct mptses *
4703mptcp_close(struct mptses *mpte, struct mptcb *mp_tp)
4704{
3e170ce0 4705 struct mptsub *mpts = NULL, *tmpts = NULL;
cb323159 4706 struct socket *mp_so = mptetoso(mpte);
39236c6e 4707
cb323159 4708 socket_lock_assert_owned(mp_so);
39236c6e 4709 VERIFY(mpte->mpte_mptcb == mp_tp);
39236c6e 4710
5ba3f43e 4711 mp_tp->mpt_state = MPTCPS_TERMINATE;
39236c6e 4712
5ba3f43e
A
4713 mptcp_freeq(mp_tp);
4714
4715 soisdisconnected(mp_so);
39236c6e
A
4716
4717 /* Clean up all subflows */
4718 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
5ba3f43e 4719 mptcp_subflow_disconnect(mpte, mpts);
39236c6e 4720 }
39236c6e 4721
0a7de745 4722 return NULL;
39236c6e
A
4723}
4724
4725void
4726mptcp_notify_close(struct socket *so)
4727{
4728 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED));
4729}
4730
4731/*
5ba3f43e 4732 * MPTCP workloop.
39236c6e
A
4733 */
4734void
5ba3f43e 4735mptcp_subflow_workloop(struct mptses *mpte)
39236c6e 4736{
39236c6e 4737 boolean_t connect_pending = FALSE, disconnect_fallback = FALSE;
f427ee49 4738 long mpsofilt_hint_mask = SO_FILT_HINT_LOCKED;
0a7de745
A
4739 struct mptsub *mpts, *tmpts;
4740 struct socket *mp_so;
39236c6e 4741
cb323159
A
4742 mp_so = mptetoso(mpte);
4743
4744 socket_lock_assert_owned(mp_so);
0a7de745
A
4745
4746 if (mpte->mpte_flags & MPTE_IN_WORKLOOP) {
4747 mpte->mpte_flags |= MPTE_WORKLOOP_RELAUNCH;
4748 return;
4749 }
4750 mpte->mpte_flags |= MPTE_IN_WORKLOOP;
4751
0a7de745 4752relaunch:
0a7de745 4753 mpte->mpte_flags &= ~MPTE_WORKLOOP_RELAUNCH;
39236c6e
A
4754
4755 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
4756 ev_ret_t ret;
4757
5ba3f43e
A
4758 if (mpts->mpts_socket->so_usecount == 0) {
4759 /* Will be removed soon by tcp_garbage_collect */
4760 continue;
4761 }
3e170ce0 4762
5ba3f43e
A
4763 mptcp_subflow_addref(mpts);
4764 mpts->mpts_socket->so_usecount++;
3e170ce0
A
4765
4766 ret = mptcp_subflow_events(mpte, mpts, &mpsofilt_hint_mask);
39236c6e 4767
39236c6e
A
4768 /*
4769 * If MPTCP socket is closed, disconnect all subflows.
4770 * This will generate a disconnect event which will
4771 * be handled during the next iteration, causing a
4772 * non-zero error to be returned above.
4773 */
0a7de745 4774 if (mp_so->so_flags & SOF_PCBCLEARING) {
5ba3f43e 4775 mptcp_subflow_disconnect(mpte, mpts);
0a7de745 4776 }
39236c6e
A
4777
4778 switch (ret) {
39236c6e
A
4779 case MPTS_EVRET_OK:
4780 /* nothing to do */
4781 break;
4782 case MPTS_EVRET_DELETE:
5ba3f43e 4783 mptcp_subflow_soclose(mpts);
39236c6e
A
4784 break;
4785 case MPTS_EVRET_CONNECT_PENDING:
4786 connect_pending = TRUE;
4787 break;
4788 case MPTS_EVRET_DISCONNECT_FALLBACK:
4789 disconnect_fallback = TRUE;
4790 break;
3e170ce0
A
4791 default:
4792 mptcplog((LOG_DEBUG,
4793 "MPTCP Socket: %s: mptcp_subflow_events "
0a7de745 4794 "returned invalid value: %d\n", __func__,
3e170ce0
A
4795 ret),
4796 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
4797 break;
39236c6e 4798 }
0a7de745 4799 mptcp_subflow_remref(mpts); /* ours */
5ba3f43e
A
4800
4801 VERIFY(mpts->mpts_socket->so_usecount != 0);
4802 mpts->mpts_socket->so_usecount--;
39236c6e
A
4803 }
4804
5ba3f43e 4805 if (mpsofilt_hint_mask != SO_FILT_HINT_LOCKED) {
5ba3f43e
A
4806 VERIFY(mpsofilt_hint_mask & SO_FILT_HINT_LOCKED);
4807
cb323159
A
4808 if (mpsofilt_hint_mask & SO_FILT_HINT_CANTRCVMORE) {
4809 mp_so->so_state |= SS_CANTRCVMORE;
4810 sorwakeup(mp_so);
4811 }
4812
3e170ce0 4813 soevent(mp_so, mpsofilt_hint_mask);
39236c6e
A
4814 }
4815
0a7de745
A
4816 if (!connect_pending && !disconnect_fallback) {
4817 goto exit;
4818 }
39236c6e
A
4819
4820 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
39236c6e
A
4821 if (disconnect_fallback) {
4822 struct socket *so = NULL;
4823 struct inpcb *inp = NULL;
4824 struct tcpcb *tp = NULL;
4825
0a7de745 4826 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
39236c6e 4827 continue;
0a7de745 4828 }
39236c6e
A
4829
4830 mpts->mpts_flags |= MPTSF_MP_DEGRADED;
4831
0a7de745 4832 if (mpts->mpts_flags & (MPTSF_DISCONNECTING |
f427ee49 4833 MPTSF_DISCONNECTED)) {
39236c6e 4834 continue;
0a7de745 4835 }
490019cf 4836
39236c6e
A
4837 so = mpts->mpts_socket;
4838
4839 /*
4840 * The MPTCP connection has degraded to a fallback
4841 * mode, so there is no point in keeping this subflow
4842 * regardless of its MPTCP-readiness state, unless it
4843 * is the primary one which we use for fallback. This
4844 * assumes that the subflow used for fallback is the
4845 * ACTIVE one.
4846 */
4847
39236c6e
A
4848 inp = sotoinpcb(so);
4849 tp = intotcpcb(inp);
4850 tp->t_mpflags &=
0a7de745 4851 ~(TMPF_MPTCP_READY | TMPF_MPTCP_TRUE);
39236c6e 4852 tp->t_mpflags |= TMPF_TCP_FALLBACK;
490019cf 4853
5ba3f43e 4854 soevent(so, SO_FILT_HINT_MUSTRST);
39236c6e
A
4855 } else if (connect_pending) {
4856 /*
4857 * The MPTCP connection has progressed to a state
4858 * where it supports full multipath semantics; allow
4859 * additional joins to be attempted for all subflows
4860 * that are in the PENDING state.
4861 */
4862 if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) {
5ba3f43e 4863 int error = mptcp_subflow_soconnectx(mpte, mpts);
39236c6e 4864
0a7de745 4865 if (error) {
5ba3f43e 4866 mptcp_subflow_abort(mpts, error);
0a7de745 4867 }
5ba3f43e 4868 }
39236c6e 4869 }
39236c6e 4870 }
0a7de745
A
4871
4872exit:
4873 if (mpte->mpte_flags & MPTE_WORKLOOP_RELAUNCH) {
4874 goto relaunch;
4875 }
4876
4877 mpte->mpte_flags &= ~MPTE_IN_WORKLOOP;
39236c6e
A
4878}
4879
39236c6e
A
4880/*
4881 * Protocol pr_lock callback.
4882 */
4883int
4884mptcp_lock(struct socket *mp_so, int refcount, void *lr)
4885{
5ba3f43e 4886 struct mppcb *mpp = mpsotomppcb(mp_so);
39236c6e
A
4887 void *lr_saved;
4888
0a7de745 4889 if (lr == NULL) {
39236c6e 4890 lr_saved = __builtin_return_address(0);
0a7de745 4891 } else {
39236c6e 4892 lr_saved = lr;
0a7de745 4893 }
39236c6e
A
4894
4895 if (mpp == NULL) {
4896 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__,
4897 mp_so, lr_saved, solockhistory_nr(mp_so));
4898 /* NOTREACHED */
4899 }
5ba3f43e 4900 mpp_lock(mpp);
39236c6e
A
4901
4902 if (mp_so->so_usecount < 0) {
4903 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__,
4904 mp_so, mp_so->so_pcb, lr_saved, mp_so->so_usecount,
4905 solockhistory_nr(mp_so));
4906 /* NOTREACHED */
4907 }
0a7de745 4908 if (refcount != 0) {
39236c6e 4909 mp_so->so_usecount++;
cb323159 4910 mpp->mpp_inside++;
0a7de745 4911 }
39236c6e
A
4912 mp_so->lock_lr[mp_so->next_lock_lr] = lr_saved;
4913 mp_so->next_lock_lr = (mp_so->next_lock_lr + 1) % SO_LCKDBG_MAX;
4914
0a7de745 4915 return 0;
39236c6e
A
4916}
4917
4918/*
4919 * Protocol pr_unlock callback.
4920 */
4921int
5ba3f43e 4922mptcp_unlock(struct socket *mp_so, int refcount, void *lr)
39236c6e 4923{
5ba3f43e
A
4924 struct mppcb *mpp = mpsotomppcb(mp_so);
4925 void *lr_saved;
39236c6e 4926
0a7de745 4927 if (lr == NULL) {
5ba3f43e 4928 lr_saved = __builtin_return_address(0);
0a7de745 4929 } else {
5ba3f43e 4930 lr_saved = lr;
0a7de745 4931 }
39236c6e 4932
5ba3f43e
A
4933 if (mpp == NULL) {
4934 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__,
4935 mp_so, mp_so->so_usecount, lr_saved,
4936 solockhistory_nr(mp_so));
4937 /* NOTREACHED */
4938 }
cb323159 4939 socket_lock_assert_owned(mp_so);
39236c6e 4940
0a7de745 4941 if (refcount != 0) {
5ba3f43e 4942 mp_so->so_usecount--;
cb323159 4943 mpp->mpp_inside--;
0a7de745 4944 }
39236c6e 4945
5ba3f43e
A
4946 if (mp_so->so_usecount < 0) {
4947 panic("%s: so=%p usecount=%x lrh= %s\n", __func__,
4948 mp_so, mp_so->so_usecount, solockhistory_nr(mp_so));
4949 /* NOTREACHED */
39236c6e 4950 }
cb323159
A
4951 if (mpp->mpp_inside < 0) {
4952 panic("%s: mpp=%p inside=%x lrh= %s\n", __func__,
4953 mpp, mpp->mpp_inside, solockhistory_nr(mp_so));
4954 /* NOTREACHED */
4955 }
5ba3f43e
A
4956 mp_so->unlock_lr[mp_so->next_unlock_lr] = lr_saved;
4957 mp_so->next_unlock_lr = (mp_so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
4958 mpp_unlock(mpp);
4959
0a7de745 4960 return 0;
39236c6e
A
4961}
4962
5ba3f43e
A
4963/*
4964 * Protocol pr_getlock callback.
4965 */
4966lck_mtx_t *
4967mptcp_getlock(struct socket *mp_so, int flags)
39236c6e 4968{
5ba3f43e
A
4969 struct mppcb *mpp = mpsotomppcb(mp_so);
4970
4971 if (mpp == NULL) {
4972 panic("%s: so=%p NULL so_pcb %s\n", __func__, mp_so,
4973 solockhistory_nr(mp_so));
39236c6e
A
4974 /* NOTREACHED */
4975 }
5ba3f43e
A
4976 if (mp_so->so_usecount < 0) {
4977 panic("%s: so=%p usecount=%x lrh= %s\n", __func__,
4978 mp_so, mp_so->so_usecount, solockhistory_nr(mp_so));
4979 /* NOTREACHED */
39236c6e 4980 }
0a7de745 4981 return mpp_getlock(mpp, flags);
39236c6e
A
4982}
4983
4984/*
4985 * MPTCP Join support
4986 */
4987
4988static void
cb323159 4989mptcp_attach_to_subf(struct socket *so, struct mptcb *mp_tp, uint8_t addr_id)
39236c6e
A
4990{
4991 struct tcpcb *tp = sototcpcb(so);
4992 struct mptcp_subf_auth_entry *sauth_entry;
39236c6e 4993
39236c6e 4994 /*
39236c6e
A
4995 * The address ID of the first flow is implicitly 0.
4996 */
4997 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
4998 tp->t_local_aid = 0;
4999 } else {
fe8ab488 5000 tp->t_local_aid = addr_id;
39236c6e
A
5001 tp->t_mpflags |= (TMPF_PREESTABLISHED | TMPF_JOINED_FLOW);
5002 so->so_flags |= SOF_MP_SEC_SUBFLOW;
5003 }
5004 sauth_entry = zalloc(mpt_subauth_zone);
5005 sauth_entry->msae_laddr_id = tp->t_local_aid;
5006 sauth_entry->msae_raddr_id = 0;
5007 sauth_entry->msae_raddr_rand = 0;
5008try_again:
5009 sauth_entry->msae_laddr_rand = RandomULong();
0a7de745 5010 if (sauth_entry->msae_laddr_rand == 0) {
39236c6e 5011 goto try_again;
0a7de745 5012 }
39236c6e
A
5013 LIST_INSERT_HEAD(&mp_tp->mpt_subauth_list, sauth_entry, msae_next);
5014}
5015
5016static void
5017mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so)
5018{
5019 struct mptcp_subf_auth_entry *sauth_entry;
fe8ab488 5020 struct tcpcb *tp = NULL;
39236c6e
A
5021 int found = 0;
5022
fe8ab488 5023 tp = sototcpcb(so);
0a7de745 5024 if (tp == NULL) {
39236c6e 5025 return;
0a7de745 5026 }
39236c6e 5027
39236c6e
A
5028 LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
5029 if (sauth_entry->msae_laddr_id == tp->t_local_aid) {
5030 found = 1;
5031 break;
5032 }
5033 }
5034 if (found) {
5035 LIST_REMOVE(sauth_entry, msae_next);
39236c6e 5036 }
fe8ab488 5037
0a7de745 5038 if (found) {
3e170ce0 5039 zfree(mpt_subauth_zone, sauth_entry);
0a7de745 5040 }
39236c6e
A
5041}
5042
5043void
5044mptcp_get_rands(mptcp_addr_id addr_id, struct mptcb *mp_tp, u_int32_t *lrand,
5045 u_int32_t *rrand)
5046{
5047 struct mptcp_subf_auth_entry *sauth_entry;
39236c6e 5048
39236c6e
A
5049 LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
5050 if (sauth_entry->msae_laddr_id == addr_id) {
0a7de745 5051 if (lrand) {
39236c6e 5052 *lrand = sauth_entry->msae_laddr_rand;
0a7de745
A
5053 }
5054 if (rrand) {
39236c6e 5055 *rrand = sauth_entry->msae_raddr_rand;
0a7de745 5056 }
39236c6e
A
5057 break;
5058 }
5059 }
39236c6e
A
5060}
5061
5062void
5063mptcp_set_raddr_rand(mptcp_addr_id laddr_id, struct mptcb *mp_tp,
5064 mptcp_addr_id raddr_id, u_int32_t raddr_rand)
5065{
5066 struct mptcp_subf_auth_entry *sauth_entry;
39236c6e 5067
39236c6e
A
5068 LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
5069 if (sauth_entry->msae_laddr_id == laddr_id) {
5070 if ((sauth_entry->msae_raddr_id != 0) &&
5071 (sauth_entry->msae_raddr_id != raddr_id)) {
cb323159
A
5072 os_log_error(mptcp_log_handle, "%s - %lx: mismatched"
5073 " address ids %d %d \n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
5074 raddr_id, sauth_entry->msae_raddr_id);
39236c6e
A
5075 return;
5076 }
5077 sauth_entry->msae_raddr_id = raddr_id;
5078 if ((sauth_entry->msae_raddr_rand != 0) &&
5079 (sauth_entry->msae_raddr_rand != raddr_rand)) {
cb323159
A
5080 os_log_error(mptcp_log_handle, "%s - %lx: "
5081 "dup SYN_ACK %d %d \n",
5082 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
5083 raddr_rand, sauth_entry->msae_raddr_rand);
39236c6e
A
5084 return;
5085 }
5086 sauth_entry->msae_raddr_rand = raddr_rand;
39236c6e
A
5087 return;
5088 }
5089 }
39236c6e
A
5090}
5091
5092/*
5093 * SHA1 support for MPTCP
5094 */
5ba3f43e
A
5095static void
5096mptcp_do_sha1(mptcp_key_t *key, char *sha_digest)
39236c6e
A
5097{
5098 SHA1_CTX sha1ctxt;
5099 const unsigned char *sha1_base;
5100 int sha1_size;
5101
39236c6e 5102 sha1_base = (const unsigned char *) key;
0a7de745 5103 sha1_size = sizeof(mptcp_key_t);
39236c6e
A
5104 SHA1Init(&sha1ctxt);
5105 SHA1Update(&sha1ctxt, sha1_base, sha1_size);
5106 SHA1Final(sha_digest, &sha1ctxt);
39236c6e
A
5107}
5108
5109void
5110mptcp_hmac_sha1(mptcp_key_t key1, mptcp_key_t key2,
0a7de745 5111 u_int32_t rand1, u_int32_t rand2, u_char *digest)
39236c6e
A
5112{
5113 SHA1_CTX sha1ctxt;
5114 mptcp_key_t key_ipad[8] = {0}; /* key XOR'd with inner pad */
5115 mptcp_key_t key_opad[8] = {0}; /* key XOR'd with outer pad */
5116 u_int32_t data[2];
5117 int i;
5118
5ba3f43e 5119 bzero(digest, SHA1_RESULTLEN);
39236c6e
A
5120
5121 /* Set up the Key for HMAC */
5122 key_ipad[0] = key1;
5123 key_ipad[1] = key2;
5124
5125 key_opad[0] = key1;
5126 key_opad[1] = key2;
5127
5128 /* Set up the message for HMAC */
5129 data[0] = rand1;
5130 data[1] = rand2;
5131
5132 /* Key is 512 block length, so no need to compute hash */
5133
5134 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
5135
5136 for (i = 0; i < 8; i++) {
5137 key_ipad[i] ^= 0x3636363636363636;
5138 key_opad[i] ^= 0x5c5c5c5c5c5c5c5c;
5139 }
5140
5141 /* Perform inner SHA1 */
5142 SHA1Init(&sha1ctxt);
0a7de745
A
5143 SHA1Update(&sha1ctxt, (unsigned char *)key_ipad, sizeof(key_ipad));
5144 SHA1Update(&sha1ctxt, (unsigned char *)data, sizeof(data));
39236c6e
A
5145 SHA1Final(digest, &sha1ctxt);
5146
5147 /* Perform outer SHA1 */
5148 SHA1Init(&sha1ctxt);
0a7de745 5149 SHA1Update(&sha1ctxt, (unsigned char *)key_opad, sizeof(key_opad));
39236c6e
A
5150 SHA1Update(&sha1ctxt, (unsigned char *)digest, SHA1_RESULTLEN);
5151 SHA1Final(digest, &sha1ctxt);
5152}
5153
5154/*
5155 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
5156 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
5157 */
5158void
5ba3f43e 5159mptcp_get_hmac(mptcp_addr_id aid, struct mptcb *mp_tp, u_char *digest)
39236c6e
A
5160{
5161 uint32_t lrand, rrand;
39236c6e 5162
39236c6e
A
5163 lrand = rrand = 0;
5164 mptcp_get_rands(aid, mp_tp, &lrand, &rrand);
5ba3f43e
A
5165 mptcp_hmac_sha1(mp_tp->mpt_localkey, mp_tp->mpt_remotekey, lrand, rrand,
5166 digest);
39236c6e
A
5167}
5168
5169/*
5170 * Authentication data generation
5171 */
5ba3f43e 5172static void
39236c6e
A
5173mptcp_generate_token(char *sha_digest, int sha_digest_len, caddr_t token,
5174 int token_len)
5175{
0a7de745 5176 VERIFY(token_len == sizeof(u_int32_t));
39236c6e
A
5177 VERIFY(sha_digest_len == SHA1_RESULTLEN);
5178
5179 /* Most significant 32 bits of the SHA1 hash */
0a7de745 5180 bcopy(sha_digest, token, sizeof(u_int32_t));
490019cf 5181 return;
39236c6e
A
5182}
5183
5ba3f43e 5184static void
39236c6e
A
5185mptcp_generate_idsn(char *sha_digest, int sha_digest_len, caddr_t idsn,
5186 int idsn_len)
5187{
0a7de745 5188 VERIFY(idsn_len == sizeof(u_int64_t));
39236c6e
A
5189 VERIFY(sha_digest_len == SHA1_RESULTLEN);
5190
5191 /*
5192 * Least significant 64 bits of the SHA1 hash
5193 */
5194
5195 idsn[7] = sha_digest[12];
5196 idsn[6] = sha_digest[13];
5197 idsn[5] = sha_digest[14];
5198 idsn[4] = sha_digest[15];
5199 idsn[3] = sha_digest[16];
5200 idsn[2] = sha_digest[17];
5201 idsn[1] = sha_digest[18];
5202 idsn[0] = sha_digest[19];
490019cf 5203 return;
39236c6e
A
5204}
5205
490019cf
A
5206static void
5207mptcp_conn_properties(struct mptcb *mp_tp)
5208{
5209 /* There is only Version 0 at this time */
5210 mp_tp->mpt_version = MPTCP_STD_VERSION_0;
5211
5212 /* Set DSS checksum flag */
0a7de745 5213 if (mptcp_dss_csum) {
490019cf 5214 mp_tp->mpt_flags |= MPTCPF_CHECKSUM;
0a7de745 5215 }
490019cf
A
5216
5217 /* Set up receive window */
5218 mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
5219
5220 /* Set up gc ticks */
5221 mp_tp->mpt_gc_ticks = MPT_GC_TICKS;
5222}
5223
5224static void
5ba3f43e 5225mptcp_init_local_parms(struct mptses *mpte)
39236c6e 5226{
5ba3f43e
A
5227 struct mptcb *mp_tp = mpte->mpte_mptcb;
5228 char key_digest[SHA1_RESULTLEN];
490019cf 5229
5ba3f43e
A
5230 read_frandom(&mp_tp->mpt_localkey, sizeof(mp_tp->mpt_localkey));
5231 mptcp_do_sha1(&mp_tp->mpt_localkey, key_digest);
5232
5233 mptcp_generate_token(key_digest, SHA1_RESULTLEN,
0a7de745 5234 (caddr_t)&mp_tp->mpt_localtoken, sizeof(mp_tp->mpt_localtoken));
5ba3f43e 5235 mptcp_generate_idsn(key_digest, SHA1_RESULTLEN,
0a7de745 5236 (caddr_t)&mp_tp->mpt_local_idsn, sizeof(u_int64_t));
490019cf
A
5237
5238 /* The subflow SYN is also first MPTCP byte */
5239 mp_tp->mpt_snduna = mp_tp->mpt_sndmax = mp_tp->mpt_local_idsn + 1;
5240 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
5241
5242 mptcp_conn_properties(mp_tp);
5243}
5244
5245int
5246mptcp_init_remote_parms(struct mptcb *mp_tp)
5247{
5ba3f43e 5248 char remote_digest[SHA1_RESULTLEN];
39236c6e
A
5249
5250 /* Only Version 0 is supported for auth purposes */
0a7de745
A
5251 if (mp_tp->mpt_version != MPTCP_STD_VERSION_0) {
5252 return -1;
5253 }
39236c6e
A
5254
5255 /* Setup local and remote tokens and Initial DSNs */
5ba3f43e 5256 mptcp_do_sha1(&mp_tp->mpt_remotekey, remote_digest);
39236c6e 5257 mptcp_generate_token(remote_digest, SHA1_RESULTLEN,
0a7de745 5258 (caddr_t)&mp_tp->mpt_remotetoken, sizeof(mp_tp->mpt_remotetoken));
39236c6e 5259 mptcp_generate_idsn(remote_digest, SHA1_RESULTLEN,
0a7de745 5260 (caddr_t)&mp_tp->mpt_remote_idsn, sizeof(u_int64_t));
5ba3f43e 5261 mp_tp->mpt_rcvnxt = mp_tp->mpt_remote_idsn + 1;
cb323159 5262 mp_tp->mpt_rcvadv = mp_tp->mpt_rcvnxt + mp_tp->mpt_rcvwnd;
39236c6e 5263
0a7de745 5264 return 0;
39236c6e
A
5265}
5266
5ba3f43e 5267static void
39236c6e
A
5268mptcp_send_dfin(struct socket *so)
5269{
5270 struct tcpcb *tp = NULL;
5271 struct inpcb *inp = NULL;
5272
5273 inp = sotoinpcb(so);
0a7de745 5274 if (!inp) {
39236c6e 5275 return;
0a7de745 5276 }
39236c6e
A
5277
5278 tp = intotcpcb(inp);
0a7de745 5279 if (!tp) {
39236c6e 5280 return;
0a7de745 5281 }
39236c6e 5282
0a7de745 5283 if (!(tp->t_mpflags & TMPF_RESET)) {
39236c6e 5284 tp->t_mpflags |= TMPF_SEND_DFIN;
0a7de745 5285 }
39236c6e
A
5286}
5287
5288/*
5289 * Data Sequence Mapping routines
5290 */
5291void
5292mptcp_insert_dsn(struct mppcb *mpp, struct mbuf *m)
5293{
5294 struct mptcb *mp_tp;
5295
0a7de745 5296 if (m == NULL) {
39236c6e 5297 return;
0a7de745 5298 }
39236c6e 5299
3e170ce0 5300 __IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb);
5ba3f43e 5301
39236c6e
A
5302 while (m) {
5303 VERIFY(m->m_flags & M_PKTHDR);
5304 m->m_pkthdr.pkt_flags |= (PKTF_MPTCP | PKTF_MPSO);
5305 m->m_pkthdr.mp_dsn = mp_tp->mpt_sndmax;
f427ee49
A
5306 VERIFY(m_pktlen(m) >= 0 && m_pktlen(m) < UINT16_MAX);
5307 m->m_pkthdr.mp_rlen = (uint16_t)m_pktlen(m);
39236c6e
A
5308 mp_tp->mpt_sndmax += m_pktlen(m);
5309 m = m->m_next;
5310 }
5ba3f43e
A
5311}
5312
5313void
5314mptcp_fallback_sbdrop(struct socket *so, struct mbuf *m, int len)
5315{
5316 struct mptcb *mp_tp = tptomptp(sototcpcb(so));
5317 uint64_t data_ack;
5318 uint64_t dsn;
5319
f427ee49
A
5320 VERIFY(len >= 0);
5321
0a7de745 5322 if (!m || len == 0) {
5ba3f43e 5323 return;
0a7de745 5324 }
5ba3f43e
A
5325
5326 while (m && len > 0) {
5327 VERIFY(m->m_flags & M_PKTHDR);
5328 VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
5329
5330 data_ack = m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen;
5331 dsn = m->m_pkthdr.mp_dsn;
5332
5333 len -= m->m_len;
5334 m = m->m_next;
5335 }
5336
5337 if (m && len == 0) {
5338 /*
5339 * If there is one more mbuf in the chain, it automatically means
5340 * that up to m->mp_dsn has been ack'ed.
5341 *
5342 * This means, we actually correct data_ack back down (compared
5343 * to what we set inside the loop - dsn + data_len). Because in
5344 * the loop we are "optimistic" and assume that the full mapping
5345 * will be acked. If that's not the case and we get out of the
5346 * loop with m != NULL, it means only up to m->mp_dsn has been
5347 * really acked.
5348 */
5349 data_ack = m->m_pkthdr.mp_dsn;
5350 }
5351
5352 if (len < 0) {
5353 /*
5354 * If len is negative, meaning we acked in the middle of an mbuf,
5355 * only up to this mbuf's data-sequence number has been acked
5356 * at the MPTCP-level.
5357 */
5358 data_ack = dsn;
5359 }
5360
5361 mptcplog((LOG_DEBUG, "%s inferred ack up to %u\n", __func__, (uint32_t)data_ack),
0a7de745 5362 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
cb323159
A
5363
5364 /* We can have data in the subflow's send-queue that is being acked,
5365 * while the DATA_ACK has already advanced. Thus, we should check whether
5366 * or not the DATA_ACK is actually new here.
5367 */
5368 if (MPTCP_SEQ_LEQ(data_ack, mp_tp->mpt_sndmax) &&
5369 MPTCP_SEQ_GEQ(data_ack, mp_tp->mpt_snduna)) {
5370 mptcp_data_ack_rcvd(mp_tp, sototcpcb(so), data_ack);
5371 }
39236c6e
A
5372}
5373
5374void
490019cf 5375mptcp_preproc_sbdrop(struct socket *so, struct mbuf *m, unsigned int len)
39236c6e 5376{
490019cf
A
5377 int rewinding = 0;
5378
5ba3f43e
A
5379 /* TFO makes things complicated. */
5380 if (so->so_flags1 & SOF1_TFO_REWIND) {
5381 rewinding = 1;
5382 so->so_flags1 &= ~SOF1_TFO_REWIND;
490019cf 5383 }
39236c6e 5384
5ba3f43e
A
5385 while (m && (!(so->so_flags & SOF_MP_SUBFLOW) || rewinding)) {
5386 u_int32_t sub_len;
39236c6e 5387 VERIFY(m->m_flags & M_PKTHDR);
5ba3f43e 5388 VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
39236c6e 5389
5ba3f43e 5390 sub_len = m->m_pkthdr.mp_rlen;
39236c6e 5391
5ba3f43e
A
5392 if (sub_len < len) {
5393 m->m_pkthdr.mp_dsn += sub_len;
5394 if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
5395 m->m_pkthdr.mp_rseq += sub_len;
39236c6e 5396 }
5ba3f43e
A
5397 m->m_pkthdr.mp_rlen = 0;
5398 len -= sub_len;
39236c6e 5399 } else {
5ba3f43e 5400 /* sub_len >= len */
0a7de745 5401 if (rewinding == 0) {
5ba3f43e 5402 m->m_pkthdr.mp_dsn += len;
0a7de745 5403 }
5ba3f43e 5404 if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
0a7de745 5405 if (rewinding == 0) {
5ba3f43e 5406 m->m_pkthdr.mp_rseq += len;
0a7de745 5407 }
5ba3f43e
A
5408 }
5409 mptcplog((LOG_DEBUG, "%s: dsn %u ssn %u len %d %d\n",
5410 __func__, (u_int32_t)m->m_pkthdr.mp_dsn,
5411 m->m_pkthdr.mp_rseq, m->m_pkthdr.mp_rlen, len),
5412 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
5413 m->m_pkthdr.mp_rlen -= len;
5414 break;
39236c6e
A
5415 }
5416 m = m->m_next;
5417 }
39037602
A
5418
5419 if (so->so_flags & SOF_MP_SUBFLOW &&
5420 !(sototcpcb(so)->t_mpflags & TMPF_TFO_REQUEST) &&
5421 !(sototcpcb(so)->t_mpflags & TMPF_RCVD_DACK)) {
5422 /*
5423 * Received an ack without receiving a DATA_ACK.
5424 * Need to fallback to regular TCP (or destroy this subflow).
5425 */
5ba3f43e 5426 sototcpcb(so)->t_mpflags |= TMPF_INFIN_SENT;
39037602
A
5427 mptcp_notify_mpfail(so);
5428 }
39236c6e
A
5429}
5430
5431/* Obtain the DSN mapping stored in the mbuf */
5432void
5ba3f43e
A
5433mptcp_output_getm_dsnmap32(struct socket *so, int off,
5434 uint32_t *dsn, uint32_t *relseq, uint16_t *data_len, uint16_t *dss_csum)
39236c6e
A
5435{
5436 u_int64_t dsn64;
5437
5ba3f43e 5438 mptcp_output_getm_dsnmap64(so, off, &dsn64, relseq, data_len, dss_csum);
39236c6e 5439 *dsn = (u_int32_t)MPTCP_DATASEQ_LOW32(dsn64);
39236c6e
A
5440}
5441
5442void
5ba3f43e 5443mptcp_output_getm_dsnmap64(struct socket *so, int off, uint64_t *dsn,
0a7de745
A
5444 uint32_t *relseq, uint16_t *data_len,
5445 uint16_t *dss_csum)
39236c6e
A
5446{
5447 struct mbuf *m = so->so_snd.sb_mb;
5ba3f43e 5448 int off_orig = off;
39236c6e 5449
5ba3f43e 5450 VERIFY(off >= 0);
39236c6e 5451
4ba76501
A
5452 if (m == NULL && (so->so_flags & SOF_DEFUNCT)) {
5453 *dsn = 0;
5454 *relseq = 0;
5455 *data_len = 0;
5456 *dss_csum = 0;
5457 return;
5458 }
5459
39236c6e
A
5460 /*
5461 * In the subflow socket, the DSN sequencing can be discontiguous,
5462 * but the subflow sequence mapping is contiguous. Use the subflow
5463 * sequence property to find the right mbuf and corresponding dsn
5464 * mapping.
5465 */
5466
5467 while (m) {
39236c6e 5468 VERIFY(m->m_flags & M_PKTHDR);
5ba3f43e 5469 VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
39236c6e 5470
5ba3f43e
A
5471 if (off >= m->m_len) {
5472 off -= m->m_len;
39236c6e
A
5473 m = m->m_next;
5474 } else {
5475 break;
5476 }
5477 }
5478
5ba3f43e
A
5479 VERIFY(off >= 0);
5480 VERIFY(m->m_pkthdr.mp_rlen <= UINT16_MAX);
39236c6e 5481
5ba3f43e
A
5482 *dsn = m->m_pkthdr.mp_dsn;
5483 *relseq = m->m_pkthdr.mp_rseq;
5484 *data_len = m->m_pkthdr.mp_rlen;
5485 *dss_csum = m->m_pkthdr.mp_csum;
39236c6e 5486
5ba3f43e 5487 mptcplog((LOG_DEBUG, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
0a7de745
A
5488 __func__, (u_int32_t)(*dsn), *relseq, *data_len, off, off_orig),
5489 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e
A
5490}
5491
5492/*
3e170ce0
A
5493 * Note that this is called only from tcp_input() via mptcp_input_preproc()
5494 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
5495 * When it trims data tcp_input calls m_adj() which does not remove the
5496 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
5497 * The dsn map insertion cannot be delayed after trim, because data can be in
5498 * the reassembly queue for a while and the DSN option info in tp will be
5499 * overwritten for every new packet received.
39236c6e
A
5500 * The dsn map will be adjusted just prior to appending to subflow sockbuf
5501 * with mptcp_adj_rmap()
5502 */
5503void
5c9f4661 5504mptcp_insert_rmap(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th)
39236c6e 5505{
5c9f4661 5506 VERIFY(m->m_flags & M_PKTHDR);
39236c6e
A
5507 VERIFY(!(m->m_pkthdr.pkt_flags & PKTF_MPTCP));
5508
5509 if (tp->t_mpflags & TMPF_EMBED_DSN) {
39236c6e
A
5510 m->m_pkthdr.mp_dsn = tp->t_rcv_map.mpt_dsn;
5511 m->m_pkthdr.mp_rseq = tp->t_rcv_map.mpt_sseq;
5512 m->m_pkthdr.mp_rlen = tp->t_rcv_map.mpt_len;
5ba3f43e 5513 m->m_pkthdr.mp_csum = tp->t_rcv_map.mpt_csum;
0a7de745 5514 if (tp->t_rcv_map.mpt_dfin) {
5c9f4661 5515 m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN;
0a7de745 5516 }
5c9f4661 5517
39236c6e 5518 m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
5c9f4661 5519
39236c6e
A
5520 tp->t_mpflags &= ~TMPF_EMBED_DSN;
5521 tp->t_mpflags |= TMPF_MPTCP_ACKNOW;
5c9f4661 5522 } else if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
0a7de745 5523 if (th->th_flags & TH_FIN) {
5c9f4661 5524 m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN;
0a7de745 5525 }
39236c6e
A
5526 }
5527}
5528
39236c6e
A
5529/*
5530 * Following routines help with failure detection and failover of data
5531 * transfer from one subflow to another.
5532 */
5533void
5534mptcp_act_on_txfail(struct socket *so)
5535{
5536 struct tcpcb *tp = NULL;
5537 struct inpcb *inp = sotoinpcb(so);
5538
0a7de745 5539 if (inp == NULL) {
39236c6e 5540 return;
0a7de745 5541 }
39236c6e
A
5542
5543 tp = intotcpcb(inp);
0a7de745 5544 if (tp == NULL) {
39236c6e 5545 return;
0a7de745 5546 }
39236c6e 5547
0a7de745 5548 if (so->so_flags & SOF_MP_TRYFAILOVER) {
39236c6e 5549 return;
0a7de745 5550 }
39236c6e
A
5551
5552 so->so_flags |= SOF_MP_TRYFAILOVER;
5553 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFAILOVER));
5554}
5555
5556/*
5557 * Support for MP_FAIL option
5558 */
5559int
f427ee49 5560mptcp_get_map_for_dsn(struct socket *so, uint64_t dsn_fail, uint32_t *tcp_seq)
39236c6e
A
5561{
5562 struct mbuf *m = so->so_snd.sb_mb;
f427ee49
A
5563 uint16_t datalen;
5564 uint64_t dsn;
39236c6e 5565 int off = 0;
39236c6e 5566
0a7de745
A
5567 if (m == NULL) {
5568 return -1;
5569 }
39236c6e
A
5570
5571 while (m != NULL) {
5572 VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
5573 VERIFY(m->m_flags & M_PKTHDR);
5574 dsn = m->m_pkthdr.mp_dsn;
5575 datalen = m->m_pkthdr.mp_rlen;
5576 if (MPTCP_SEQ_LEQ(dsn, dsn_fail) &&
5577 (MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) {
f427ee49 5578 off = (int)(dsn_fail - dsn);
39236c6e 5579 *tcp_seq = m->m_pkthdr.mp_rseq + off;
0a7de745 5580 return 0;
39236c6e
A
5581 }
5582
5583 m = m->m_next;
5584 }
5585
5586 /*
5587 * If there was no mbuf data and a fallback to TCP occurred, there's
5588 * not much else to do.
5589 */
5590
cb323159 5591 os_log_error(mptcp_log_handle, "%s: %llu not found \n", __func__, dsn_fail);
0a7de745 5592 return -1;
5ba3f43e
A
5593}
5594
5595/*
5596 * Support for sending contiguous MPTCP bytes in subflow
5597 * Also for preventing sending data with ACK in 3-way handshake
5598 */
5599int32_t
5600mptcp_adj_sendlen(struct socket *so, int32_t off)
5601{
5602 struct tcpcb *tp = sototcpcb(so);
5603 struct mptsub *mpts = tp->t_mpsub;
5604 uint64_t mdss_dsn;
5605 uint32_t mdss_subflow_seq;
5606 int mdss_subflow_off;
5607 uint16_t mdss_data_len;
5608 uint16_t dss_csum;
5609
4ba76501
A
5610 if (so->so_snd.sb_mb == NULL && (so->so_flags & SOF_DEFUNCT)) {
5611 return 0;
5612 }
5613
5ba3f43e 5614 mptcp_output_getm_dsnmap64(so, off, &mdss_dsn, &mdss_subflow_seq,
0a7de745 5615 &mdss_data_len, &dss_csum);
5ba3f43e
A
5616
5617 /*
5618 * We need to compute how much of the mapping still remains.
5619 * So, we compute the offset in the send-buffer of the dss-sub-seq.
5620 */
5621 mdss_subflow_off = (mdss_subflow_seq + mpts->mpts_iss) - tp->snd_una;
5622
5623 /*
5624 * When TFO is used, we are sending the mpts->mpts_iss although the relative
5625 * seq has been set to 1 (while it should be 0).
5626 */
0a7de745 5627 if (tp->t_mpflags & TMPF_TFO_REQUEST) {
5ba3f43e 5628 mdss_subflow_off--;
0a7de745 5629 }
5ba3f43e 5630
5ba3f43e
A
5631 VERIFY(off >= mdss_subflow_off);
5632
0a7de745 5633 return mdss_data_len - (off - mdss_subflow_off);
5ba3f43e
A
5634}
5635
5636static uint32_t
5637mptcp_get_maxseg(struct mptses *mpte)
5638{
5639 struct mptsub *mpts;
5640 uint32_t maxseg = 0;
5641
5642 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
5643 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
5644
5645 if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 5646 TCPS_HAVERCVDFIN2(tp->t_state)) {
5ba3f43e 5647 continue;
0a7de745 5648 }
5ba3f43e 5649
0a7de745 5650 if (tp->t_maxseg > maxseg) {
5ba3f43e 5651 maxseg = tp->t_maxseg;
0a7de745 5652 }
5ba3f43e
A
5653 }
5654
0a7de745 5655 return maxseg;
5ba3f43e
A
5656}
5657
5658static uint8_t
5659mptcp_get_rcvscale(struct mptses *mpte)
5660{
5661 struct mptsub *mpts;
5662 uint8_t rcvscale = UINT8_MAX;
5663
5664 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
5665 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
5666
5667 if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 5668 TCPS_HAVERCVDFIN2(tp->t_state)) {
5ba3f43e 5669 continue;
0a7de745 5670 }
5ba3f43e 5671
0a7de745 5672 if (tp->rcv_scale < rcvscale) {
5ba3f43e 5673 rcvscale = tp->rcv_scale;
0a7de745 5674 }
5ba3f43e
A
5675 }
5676
0a7de745 5677 return rcvscale;
5ba3f43e
A
5678}
5679
5680/* Similar to tcp_sbrcv_reserve */
5681static void
5682mptcp_sbrcv_reserve(struct mptcb *mp_tp, struct sockbuf *sbrcv,
0a7de745 5683 u_int32_t newsize, u_int32_t idealsize)
5ba3f43e
A
5684{
5685 uint8_t rcvscale = mptcp_get_rcvscale(mp_tp->mpt_mpte);
5686
5687 /* newsize should not exceed max */
5688 newsize = min(newsize, tcp_autorcvbuf_max);
5689
5690 /* The receive window scale negotiated at the
5691 * beginning of the connection will also set a
5692 * limit on the socket buffer size
5693 */
5694 newsize = min(newsize, TCP_MAXWIN << rcvscale);
5695
5696 /* Set new socket buffer size */
5697 if (newsize > sbrcv->sb_hiwat &&
0a7de745 5698 (sbreserve(sbrcv, newsize) == 1)) {
5ba3f43e
A
5699 sbrcv->sb_idealsize = min(max(sbrcv->sb_idealsize,
5700 (idealsize != 0) ? idealsize : newsize), tcp_autorcvbuf_max);
5701
5702 /* Again check the limit set by the advertised
5703 * window scale
5704 */
5705 sbrcv->sb_idealsize = min(sbrcv->sb_idealsize,
0a7de745 5706 TCP_MAXWIN << rcvscale);
5ba3f43e
A
5707 }
5708}
5709
5710void
5711mptcp_sbrcv_grow(struct mptcb *mp_tp)
5712{
5713 struct mptses *mpte = mp_tp->mpt_mpte;
5714 struct socket *mp_so = mpte->mpte_mppcb->mpp_socket;
5715 struct sockbuf *sbrcv = &mp_so->so_rcv;
5716 uint32_t hiwat_sum = 0;
5717 uint32_t ideal_sum = 0;
5718 struct mptsub *mpts;
5719
5720 /*
5721 * Do not grow the receive socket buffer if
5722 * - auto resizing is disabled, globally or on this socket
5723 * - the high water mark already reached the maximum
5724 * - the stream is in background and receive side is being
5725 * throttled
5726 * - if there are segments in reassembly queue indicating loss,
5727 * do not need to increase recv window during recovery as more
5728 * data is not going to be sent. A duplicate ack sent during
5729 * recovery should not change the receive window
5730 */
5731 if (tcp_do_autorcvbuf == 0 ||
5732 (sbrcv->sb_flags & SB_AUTOSIZE) == 0 ||
5733 tcp_cansbgrow(sbrcv) == 0 ||
5734 sbrcv->sb_hiwat >= tcp_autorcvbuf_max ||
5735 (mp_so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ||
5736 !LIST_EMPTY(&mp_tp->mpt_segq)) {
5737 /* Can not resize the socket buffer, just return */
5738 return;
5739 }
5740
5741 /*
5742 * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2)
5743 *
5744 * But, for this we first need accurate receiver-RTT estimations, which
5745 * we currently don't have.
5746 *
5747 * Let's use a dummy algorithm for now, just taking the sum of all
5748 * subflow's receive-buffers. It's too low, but that's all we can get
5749 * for now.
5750 */
5751
5752 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
5753 hiwat_sum += mpts->mpts_socket->so_rcv.sb_hiwat;
5754 ideal_sum += mpts->mpts_socket->so_rcv.sb_idealsize;
5755 }
5756
5757 mptcp_sbrcv_reserve(mp_tp, sbrcv, hiwat_sum, ideal_sum);
39236c6e
A
5758}
5759
5760/*
5ba3f43e
A
5761 * Determine if we can grow the recieve socket buffer to avoid sending
5762 * a zero window update to the peer. We allow even socket buffers that
5763 * have fixed size (set by the application) to grow if the resource
5764 * constraints are met. They will also be trimmed after the application
5765 * reads data.
5766 *
5767 * Similar to tcp_sbrcv_grow_rwin
39236c6e 5768 */
5ba3f43e
A
5769static void
5770mptcp_sbrcv_grow_rwin(struct mptcb *mp_tp, struct sockbuf *sb)
39236c6e 5771{
5ba3f43e
A
5772 struct socket *mp_so = mp_tp->mpt_mpte->mpte_mppcb->mpp_socket;
5773 u_int32_t rcvbufinc = mptcp_get_maxseg(mp_tp->mpt_mpte) << 4;
5774 u_int32_t rcvbuf = sb->sb_hiwat;
39236c6e 5775
0a7de745 5776 if (tcp_recv_bg == 1 || IS_TCP_RECV_BG(mp_so)) {
5ba3f43e 5777 return;
0a7de745 5778 }
39236c6e 5779
5ba3f43e
A
5780 if (tcp_do_autorcvbuf == 1 &&
5781 tcp_cansbgrow(sb) &&
5782 /* Diff to tcp_sbrcv_grow_rwin */
5783 (mp_so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) == 0 &&
5784 (rcvbuf - sb->sb_cc) < rcvbufinc &&
5785 rcvbuf < tcp_autorcvbuf_max &&
5786 (sb->sb_idealsize > 0 &&
5787 sb->sb_hiwat <= (sb->sb_idealsize + rcvbufinc))) {
5788 sbreserve(sb, min((sb->sb_hiwat + rcvbufinc), tcp_autorcvbuf_max));
490019cf 5789 }
39236c6e
A
5790}
5791
5ba3f43e 5792/* Similar to tcp_sbspace */
39236c6e 5793int32_t
5ba3f43e 5794mptcp_sbspace(struct mptcb *mp_tp)
39236c6e 5795{
5ba3f43e 5796 struct sockbuf *sb = &mp_tp->mpt_mpte->mpte_mppcb->mpp_socket->so_rcv;
39236c6e
A
5797 uint32_t rcvbuf;
5798 int32_t space;
5ba3f43e
A
5799 int32_t pending = 0;
5800
cb323159 5801 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
39236c6e 5802
5ba3f43e 5803 mptcp_sbrcv_grow_rwin(mp_tp, sb);
39236c6e 5804
5ba3f43e 5805 /* hiwat might have changed */
39236c6e 5806 rcvbuf = sb->sb_hiwat;
5ba3f43e
A
5807
5808 space = ((int32_t) imin((rcvbuf - sb->sb_cc),
0a7de745
A
5809 (sb->sb_mbmax - sb->sb_mbcnt)));
5810 if (space < 0) {
39236c6e 5811 space = 0;
0a7de745 5812 }
5ba3f43e
A
5813
5814#if CONTENT_FILTER
5815 /* Compensate for data being processed by content filters */
5816 pending = cfil_sock_data_space(sb);
5817#endif /* CONTENT_FILTER */
0a7de745 5818 if (pending > space) {
5ba3f43e 5819 space = 0;
0a7de745 5820 } else {
5ba3f43e 5821 space -= pending;
0a7de745 5822 }
39236c6e 5823
0a7de745 5824 return space;
39236c6e
A
5825}
5826
5827/*
5828 * Support Fallback to Regular TCP
5829 */
5830void
5831mptcp_notify_mpready(struct socket *so)
5832{
5833 struct tcpcb *tp = NULL;
5834
0a7de745 5835 if (so == NULL) {
39236c6e 5836 return;
0a7de745 5837 }
39236c6e
A
5838
5839 tp = intotcpcb(sotoinpcb(so));
5840
0a7de745 5841 if (tp == NULL) {
39236c6e 5842 return;
0a7de745 5843 }
39236c6e
A
5844
5845 DTRACE_MPTCP4(multipath__ready, struct socket *, so,
5846 struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd,
5847 struct tcpcb *, tp);
5848
0a7de745 5849 if (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) {
39236c6e 5850 return;
0a7de745 5851 }
39236c6e 5852
0a7de745 5853 if (tp->t_mpflags & TMPF_MPTCP_READY) {
39236c6e 5854 return;
0a7de745 5855 }
39236c6e
A
5856
5857 tp->t_mpflags &= ~TMPF_TCP_FALLBACK;
5858 tp->t_mpflags |= TMPF_MPTCP_READY;
5859
5860 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS));
5861}
5862
5863void
5864mptcp_notify_mpfail(struct socket *so)
5865{
5866 struct tcpcb *tp = NULL;
5867
0a7de745 5868 if (so == NULL) {
39236c6e 5869 return;
0a7de745 5870 }
39236c6e
A
5871
5872 tp = intotcpcb(sotoinpcb(so));
5873
0a7de745 5874 if (tp == NULL) {
39236c6e 5875 return;
0a7de745 5876 }
39236c6e
A
5877
5878 DTRACE_MPTCP4(multipath__failed, struct socket *, so,
5879 struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd,
5880 struct tcpcb *, tp);
5881
0a7de745 5882 if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
39236c6e 5883 return;
0a7de745 5884 }
39236c6e 5885
0a7de745 5886 tp->t_mpflags &= ~(TMPF_MPTCP_READY | TMPF_MPTCP_TRUE);
39236c6e
A
5887 tp->t_mpflags |= TMPF_TCP_FALLBACK;
5888
5889 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS));
5890}
5891
5892/*
5893 * Keepalive helper function
5894 */
5895boolean_t
5896mptcp_ok_to_keepalive(struct mptcb *mp_tp)
5897{
5898 boolean_t ret = 1;
cb323159
A
5899
5900 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
5ba3f43e 5901
39236c6e
A
5902 if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) {
5903 ret = 0;
5904 }
0a7de745 5905 return ret;
39236c6e
A
5906}
5907
5908/*
5909 * MPTCP t_maxseg adjustment function
5910 */
5911int
5912mptcp_adj_mss(struct tcpcb *tp, boolean_t mtudisc)
5913{
5914 int mss_lower = 0;
5915 struct mptcb *mp_tp = tptomptp(tp);
5916
0a7de745
A
5917#define MPTCP_COMPUTE_LEN { \
5918 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
5919 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
5920 mss_lower += 2; \
5921 else \
5922 /* adjust to 32-bit boundary + EOL */ \
5923 mss_lower += 2; \
39236c6e 5924}
0a7de745
A
5925 if (mp_tp == NULL) {
5926 return 0;
5927 }
39236c6e 5928
cb323159 5929 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
5ba3f43e 5930
39236c6e
A
5931 /*
5932 * For the first subflow and subsequent subflows, adjust mss for
5933 * most common MPTCP option size, for case where tcp_mss is called
5934 * during option processing and MTU discovery.
5935 */
5ba3f43e
A
5936 if (!mtudisc) {
5937 if (tp->t_mpflags & TMPF_MPTCP_TRUE &&
5938 !(tp->t_mpflags & TMPF_JOINED_FLOW)) {
5939 MPTCP_COMPUTE_LEN;
5940 }
39236c6e 5941
5ba3f43e
A
5942 if (tp->t_mpflags & TMPF_PREESTABLISHED &&
5943 tp->t_mpflags & TMPF_SENT_JOIN) {
5944 MPTCP_COMPUTE_LEN;
5945 }
5946 } else {
5947 if (tp->t_mpflags & TMPF_MPTCP_TRUE) {
5948 MPTCP_COMPUTE_LEN;
5949 }
39236c6e
A
5950 }
5951
0a7de745 5952 return mss_lower;
39236c6e
A
5953}
5954
5955/*
5956 * Update the pid, upid, uuid of the subflow so, based on parent so
5957 */
5958void
5ba3f43e 5959mptcp_update_last_owner(struct socket *so, struct socket *mp_so)
39236c6e 5960{
5ba3f43e
A
5961 if (so->last_pid != mp_so->last_pid ||
5962 so->last_upid != mp_so->last_upid) {
5963 so->last_upid = mp_so->last_upid;
5964 so->last_pid = mp_so->last_pid;
5965 uuid_copy(so->last_uuid, mp_so->last_uuid);
39236c6e 5966 }
5ba3f43e 5967 so_update_policy(so);
39236c6e
A
5968}
5969
5970static void
5971fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts)
5972{
5973 struct inpcb *inp;
5974
5975 tcp_getconninfo(so, &flow->flow_ci);
5976 inp = sotoinpcb(so);
39236c6e
A
5977 if ((inp->inp_vflag & INP_IPV6) != 0) {
5978 flow->flow_src.ss_family = AF_INET6;
5979 flow->flow_dst.ss_family = AF_INET6;
5980 flow->flow_src.ss_len = sizeof(struct sockaddr_in6);
5981 flow->flow_dst.ss_len = sizeof(struct sockaddr_in6);
5982 SIN6(&flow->flow_src)->sin6_port = inp->in6p_lport;
5983 SIN6(&flow->flow_dst)->sin6_port = inp->in6p_fport;
5984 SIN6(&flow->flow_src)->sin6_addr = inp->in6p_laddr;
5985 SIN6(&flow->flow_dst)->sin6_addr = inp->in6p_faddr;
f427ee49 5986 } else if ((inp->inp_vflag & INP_IPV4) != 0) {
39236c6e
A
5987 flow->flow_src.ss_family = AF_INET;
5988 flow->flow_dst.ss_family = AF_INET;
5989 flow->flow_src.ss_len = sizeof(struct sockaddr_in);
5990 flow->flow_dst.ss_len = sizeof(struct sockaddr_in);
5991 SIN(&flow->flow_src)->sin_port = inp->inp_lport;
5992 SIN(&flow->flow_dst)->sin_port = inp->inp_fport;
5993 SIN(&flow->flow_src)->sin_addr = inp->inp_laddr;
5994 SIN(&flow->flow_dst)->sin_addr = inp->inp_faddr;
5995 }
3e170ce0
A
5996 flow->flow_len = sizeof(*flow);
5997 flow->flow_tcpci_offset = offsetof(mptcp_flow_t, flow_ci);
39236c6e
A
5998 flow->flow_flags = mpts->mpts_flags;
5999 flow->flow_cid = mpts->mpts_connid;
3e170ce0 6000 flow->flow_relseq = mpts->mpts_rel_seq;
5ba3f43e 6001 flow->flow_soerror = mpts->mpts_socket->so_error;
3e170ce0 6002 flow->flow_probecnt = mpts->mpts_probecnt;
39236c6e
A
6003}
6004
6005static int
6006mptcp_pcblist SYSCTL_HANDLER_ARGS
6007{
6008#pragma unused(oidp, arg1, arg2)
6009 int error = 0, f;
5ba3f43e 6010 size_t len;
39236c6e
A
6011 struct mppcb *mpp;
6012 struct mptses *mpte;
6013 struct mptcb *mp_tp;
6014 struct mptsub *mpts;
6015 struct socket *so;
6016 conninfo_mptcp_t mptcpci;
fe8ab488 6017 mptcp_flow_t *flows = NULL;
39236c6e 6018
0a7de745
A
6019 if (req->newptr != USER_ADDR_NULL) {
6020 return EPERM;
6021 }
39236c6e
A
6022
6023 lck_mtx_lock(&mtcbinfo.mppi_lock);
39236c6e 6024 if (req->oldptr == USER_ADDR_NULL) {
5ba3f43e 6025 size_t n = mtcbinfo.mppi_count;
39236c6e 6026 lck_mtx_unlock(&mtcbinfo.mppi_lock);
0a7de745
A
6027 req->oldidx = (n + n / 8) * sizeof(conninfo_mptcp_t) +
6028 4 * (n + n / 8) * sizeof(mptcp_flow_t);
6029 return 0;
39236c6e
A
6030 }
6031 TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
fe8ab488 6032 flows = NULL;
cb323159 6033 socket_lock(mpp->mpp_socket, 1);
39236c6e
A
6034 VERIFY(mpp->mpp_flags & MPP_ATTACHED);
6035 mpte = mptompte(mpp);
cb323159
A
6036
6037 socket_lock_assert_owned(mptetoso(mpte));
39236c6e 6038 mp_tp = mpte->mpte_mptcb;
3e170ce0
A
6039
6040 bzero(&mptcpci, sizeof(mptcpci));
39236c6e 6041 mptcpci.mptcpci_state = mp_tp->mpt_state;
3e170ce0
A
6042 mptcpci.mptcpci_flags = mp_tp->mpt_flags;
6043 mptcpci.mptcpci_ltoken = mp_tp->mpt_localtoken;
6044 mptcpci.mptcpci_rtoken = mp_tp->mpt_remotetoken;
6045 mptcpci.mptcpci_notsent_lowat = mp_tp->mpt_notsent_lowat;
6046 mptcpci.mptcpci_snduna = mp_tp->mpt_snduna;
6047 mptcpci.mptcpci_sndnxt = mp_tp->mpt_sndnxt;
6048 mptcpci.mptcpci_sndmax = mp_tp->mpt_sndmax;
6049 mptcpci.mptcpci_lidsn = mp_tp->mpt_local_idsn;
6050 mptcpci.mptcpci_sndwnd = mp_tp->mpt_sndwnd;
6051 mptcpci.mptcpci_rcvnxt = mp_tp->mpt_rcvnxt;
5ba3f43e 6052 mptcpci.mptcpci_rcvatmark = mp_tp->mpt_rcvnxt;
3e170ce0
A
6053 mptcpci.mptcpci_ridsn = mp_tp->mpt_remote_idsn;
6054 mptcpci.mptcpci_rcvwnd = mp_tp->mpt_rcvwnd;
3e170ce0 6055
39236c6e 6056 mptcpci.mptcpci_nflows = mpte->mpte_numflows;
3e170ce0
A
6057 mptcpci.mptcpci_mpte_flags = mpte->mpte_flags;
6058 mptcpci.mptcpci_mpte_addrid = mpte->mpte_addrid_last;
6059 mptcpci.mptcpci_flow_offset =
6060 offsetof(conninfo_mptcp_t, mptcpci_flows);
6061
fe8ab488
A
6062 len = sizeof(*flows) * mpte->mpte_numflows;
6063 if (mpte->mpte_numflows != 0) {
6064 flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
6065 if (flows == NULL) {
cb323159 6066 socket_unlock(mpp->mpp_socket, 1);
fe8ab488
A
6067 break;
6068 }
6069 mptcpci.mptcpci_len = sizeof(mptcpci) +
6070 sizeof(*flows) * (mptcpci.mptcpci_nflows - 1);
6071 error = SYSCTL_OUT(req, &mptcpci,
6072 sizeof(mptcpci) - sizeof(mptcp_flow_t));
6073 } else {
6074 mptcpci.mptcpci_len = sizeof(mptcpci);
3e170ce0 6075 error = SYSCTL_OUT(req, &mptcpci, sizeof(mptcpci));
39037602 6076 }
39236c6e 6077 if (error) {
cb323159 6078 socket_unlock(mpp->mpp_socket, 1);
39236c6e
A
6079 FREE(flows, M_TEMP);
6080 break;
6081 }
6082 f = 0;
6083 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
39236c6e 6084 so = mpts->mpts_socket;
39236c6e 6085 fill_mptcp_subflow(so, &flows[f], mpts);
39236c6e
A
6086 f++;
6087 }
cb323159 6088 socket_unlock(mpp->mpp_socket, 1);
fe8ab488
A
6089 if (flows) {
6090 error = SYSCTL_OUT(req, flows, len);
6091 FREE(flows, M_TEMP);
0a7de745 6092 if (error) {
fe8ab488 6093 break;
0a7de745 6094 }
fe8ab488 6095 }
39236c6e
A
6096 }
6097 lck_mtx_unlock(&mtcbinfo.mppi_lock);
6098
0a7de745 6099 return error;
39236c6e
A
6100}
6101
6102SYSCTL_PROC(_net_inet_mptcp, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
39037602 6103 0, 0, mptcp_pcblist, "S,conninfo_mptcp_t",
39236c6e 6104 "List of active MPTCP connections");
fe8ab488 6105
fe8ab488
A
6106/*
6107 * Set notsent lowat mark on the MPTCB
6108 */
6109int
6110mptcp_set_notsent_lowat(struct mptses *mpte, int optval)
6111{
6112 struct mptcb *mp_tp = NULL;
6113 int error = 0;
6114
0a7de745 6115 if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED) {
fe8ab488 6116 mp_tp = mpte->mpte_mptcb;
0a7de745 6117 }
fe8ab488 6118
0a7de745 6119 if (mp_tp) {
fe8ab488 6120 mp_tp->mpt_notsent_lowat = optval;
0a7de745 6121 } else {
fe8ab488 6122 error = EINVAL;
0a7de745 6123 }
fe8ab488 6124
0a7de745 6125 return error;
fe8ab488
A
6126}
6127
6128u_int32_t
6129mptcp_get_notsent_lowat(struct mptses *mpte)
6130{
6131 struct mptcb *mp_tp = NULL;
6132
0a7de745 6133 if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED) {
fe8ab488 6134 mp_tp = mpte->mpte_mptcb;
0a7de745 6135 }
fe8ab488 6136
0a7de745
A
6137 if (mp_tp) {
6138 return mp_tp->mpt_notsent_lowat;
6139 } else {
6140 return 0;
6141 }
fe8ab488
A
6142}
6143
39037602 6144int
5ba3f43e
A
6145mptcp_notsent_lowat_check(struct socket *so)
6146{
fe8ab488
A
6147 struct mptses *mpte;
6148 struct mppcb *mpp;
6149 struct mptcb *mp_tp;
6150 struct mptsub *mpts;
6151
6152 int notsent = 0;
6153
5ba3f43e 6154 mpp = mpsotomppcb(so);
fe8ab488 6155 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
0a7de745 6156 return 0;
fe8ab488
A
6157 }
6158
6159 mpte = mptompte(mpp);
cb323159 6160 socket_lock_assert_owned(mptetoso(mpte));
fe8ab488
A
6161 mp_tp = mpte->mpte_mptcb;
6162
fe8ab488
A
6163 notsent = so->so_snd.sb_cc;
6164
6165 if ((notsent == 0) ||
6166 ((notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)) <=
6167 mp_tp->mpt_notsent_lowat)) {
3e170ce0 6168 mptcplog((LOG_DEBUG, "MPTCP Sender: "
f427ee49 6169 "lowat %d notsent %d actual %llu \n",
3e170ce0
A
6170 mp_tp->mpt_notsent_lowat, notsent,
6171 notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)),
0a7de745
A
6172 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
6173 return 1;
fe8ab488 6174 }
fe8ab488
A
6175
6176 /* When Nagle's algorithm is not disabled, it is better
6177 * to wakeup the client even before there is atleast one
6178 * maxseg of data to write.
6179 */
6180 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
6181 int retval = 0;
fe8ab488
A
6182 if (mpts->mpts_flags & MPTSF_ACTIVE) {
6183 struct socket *subf_so = mpts->mpts_socket;
fe8ab488 6184 struct tcpcb *tp = intotcpcb(sotoinpcb(subf_so));
39037602 6185
fe8ab488 6186 notsent = so->so_snd.sb_cc -
0a7de745 6187 (tp->snd_nxt - tp->snd_una);
39037602 6188
fe8ab488
A
6189 if ((tp->t_flags & TF_NODELAY) == 0 &&
6190 notsent > 0 && (notsent <= (int)tp->t_maxseg)) {
6191 retval = 1;
6192 }
3e170ce0 6193 mptcplog((LOG_DEBUG, "MPTCP Sender: lowat %d notsent %d"
fe8ab488 6194 " nodelay false \n",
3e170ce0 6195 mp_tp->mpt_notsent_lowat, notsent),
0a7de745
A
6196 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
6197 return retval;
fe8ab488 6198 }
fe8ab488 6199 }
0a7de745 6200 return 0;
fe8ab488
A
6201}
6202
3e170ce0
A
6203static errno_t
6204mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
0a7de745 6205 void **unitinfo)
3e170ce0
A
6206{
6207#pragma unused(kctlref, sac, unitinfo)
5ba3f43e 6208
0a7de745 6209 if (OSIncrementAtomic(&mptcp_kern_skt_inuse) > 0) {
cb323159 6210 os_log_error(mptcp_log_handle, "%s: MPTCP kernel-control socket for Symptoms already open!", __func__);
0a7de745 6211 }
5ba3f43e
A
6212
6213 mptcp_kern_skt_unit = sac->sc_unit;
6214
0a7de745 6215 return 0;
5ba3f43e
A
6216}
6217
6218static void
cb323159 6219mptcp_allow_uuid(uuid_t uuid, int32_t rssi)
5ba3f43e
A
6220{
6221 struct mppcb *mpp;
6222
6223 /* Iterate over all MPTCP connections */
6224
6225 lck_mtx_lock(&mtcbinfo.mppi_lock);
6226
6227 TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
cb323159
A
6228 struct socket *mp_so = mpp->mpp_socket;
6229 struct mptses *mpte = mpp->mpp_pcbe;
5ba3f43e 6230
cb323159 6231 socket_lock(mp_so, 1);
5ba3f43e
A
6232
6233 if (mp_so->so_flags & SOF_DELEGATED &&
0a7de745 6234 uuid_compare(uuid, mp_so->e_uuid)) {
5ba3f43e 6235 goto next;
0a7de745
A
6236 } else if (!(mp_so->so_flags & SOF_DELEGATED) &&
6237 uuid_compare(uuid, mp_so->last_uuid)) {
5ba3f43e 6238 goto next;
0a7de745
A
6239 }
6240
cb323159
A
6241 os_log(mptcp_log_handle, "%s - %lx: Got allowance for useApp with rssi %d\n",
6242 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), rssi);
5ba3f43e
A
6243
6244 mpte->mpte_flags |= MPTE_ACCESS_GRANTED;
6245
cb323159
A
6246 if (rssi > MPTCP_TARGET_BASED_RSSI_THRESHOLD) {
6247 mpte->mpte_flags |= MPTE_CELL_PROHIBITED;
6248 }
6249
5ba3f43e
A
6250 mptcp_check_subflows_and_add(mpte);
6251 mptcp_remove_subflows(mpte);
6252
cb323159 6253 mpte->mpte_flags &= ~(MPTE_ACCESS_GRANTED | MPTE_CELL_PROHIBITED);
5ba3f43e
A
6254
6255next:
cb323159 6256 socket_unlock(mp_so, 1);
5ba3f43e
A
6257 }
6258
6259 lck_mtx_unlock(&mtcbinfo.mppi_lock);
6260}
6261
6262static void
6263mptcp_wifi_status_changed(void)
6264{
6265 struct mppcb *mpp;
6266
6267 /* Iterate over all MPTCP connections */
6268
6269 lck_mtx_lock(&mtcbinfo.mppi_lock);
6270
6271 TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
cb323159
A
6272 struct socket *mp_so = mpp->mpp_socket;
6273 struct mptses *mpte = mpp->mpp_pcbe;
5ba3f43e 6274
cb323159 6275 socket_lock(mp_so, 1);
5ba3f43e 6276
cb323159
A
6277 /* Only handover- and urgency-mode are purely driven by Symptom's Wi-Fi status */
6278 if (mpte->mpte_svctype != MPTCP_SVCTYPE_HANDOVER &&
6279 mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) {
5ba3f43e 6280 goto next;
0a7de745 6281 }
5ba3f43e
A
6282
6283 mptcp_check_subflows_and_add(mpte);
6284 mptcp_check_subflows_and_remove(mpte);
6285
6286next:
cb323159 6287 socket_unlock(mp_so, 1);
5ba3f43e
A
6288 }
6289
6290 lck_mtx_unlock(&mtcbinfo.mppi_lock);
6291}
6292
6293void
6294mptcp_ask_symptoms(struct mptses *mpte)
6295{
6296 struct mptcp_symptoms_ask_uuid ask;
6297 struct socket *mp_so;
6298 struct proc *p;
6299 int pid, prio, err;
6300
6301 if (mptcp_kern_skt_unit == 0) {
cb323159
A
6302 os_log_error(mptcp_log_handle, "%s - %lx: skt_unit is still 0\n",
6303 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e
A
6304 return;
6305 }
6306
6307 mp_so = mptetoso(mpte);
6308
0a7de745 6309 if (mp_so->so_flags & SOF_DELEGATED) {
5ba3f43e 6310 pid = mp_so->e_pid;
0a7de745 6311 } else {
5ba3f43e 6312 pid = mp_so->last_pid;
0a7de745 6313 }
5ba3f43e
A
6314
6315 p = proc_find(pid);
6316 if (p == PROC_NULL) {
cb323159
A
6317 os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n",
6318 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), pid);
5ba3f43e
A
6319 return;
6320 }
6321
6322 ask.cmd = MPTCP_SYMPTOMS_ASK_UUID;
6323
0a7de745 6324 if (mp_so->so_flags & SOF_DELEGATED) {
5ba3f43e 6325 uuid_copy(ask.uuid, mp_so->e_uuid);
0a7de745 6326 } else {
5ba3f43e 6327 uuid_copy(ask.uuid, mp_so->last_uuid);
0a7de745 6328 }
5ba3f43e
A
6329
6330 prio = proc_get_effective_task_policy(proc_task(p), TASK_POLICY_ROLE);
6331
cb323159
A
6332 if (prio == TASK_BACKGROUND_APPLICATION || prio == TASK_NONUI_APPLICATION ||
6333 prio == TASK_DARWINBG_APPLICATION) {
5ba3f43e 6334 ask.priority = MPTCP_SYMPTOMS_BACKGROUND;
0a7de745 6335 } else if (prio == TASK_FOREGROUND_APPLICATION) {
5ba3f43e 6336 ask.priority = MPTCP_SYMPTOMS_FOREGROUND;
0a7de745 6337 } else {
5ba3f43e 6338 ask.priority = MPTCP_SYMPTOMS_UNKNOWN;
0a7de745 6339 }
5ba3f43e 6340
5ba3f43e 6341 err = ctl_enqueuedata(mptcp_kern_ctrl_ref, mptcp_kern_skt_unit,
0a7de745 6342 &ask, sizeof(ask), CTL_DATA_EOR);
d9a64523 6343
cb323159
A
6344 os_log(mptcp_log_handle, "%s - %lx: asked symptoms about pid %u, taskprio %u, prio %u, err %d\n",
6345 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), pid, prio, ask.priority, err);
d9a64523 6346
5ba3f43e
A
6347
6348 proc_rele(p);
3e170ce0
A
6349}
6350
6351static errno_t
6352mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit,
0a7de745 6353 void *unitinfo)
3e170ce0
A
6354{
6355#pragma unused(kctlref, kcunit, unitinfo)
5ba3f43e
A
6356
6357 OSDecrementAtomic(&mptcp_kern_skt_inuse);
6358
0a7de745 6359 return 0;
3e170ce0
A
6360}
6361
6362static errno_t
6363mptcp_symptoms_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
0a7de745 6364 mbuf_t m, int flags)
3e170ce0 6365{
5ba3f43e 6366#pragma unused(kctlref, unitinfo, flags)
0a7de745 6367 symptoms_advisory_t *sa = NULL;
3e170ce0 6368
0a7de745 6369 if (kcunit != mptcp_kern_skt_unit) {
cb323159 6370 os_log_error(mptcp_log_handle, "%s: kcunit %u is different from expected one %u\n",
0a7de745
A
6371 __func__, kcunit, mptcp_kern_skt_unit);
6372 }
5ba3f43e 6373
3e170ce0
A
6374 if (mbuf_pkthdr_len(m) < sizeof(*sa)) {
6375 mbuf_freem(m);
0a7de745 6376 return EINVAL;
3e170ce0
A
6377 }
6378
d9a64523 6379 if (mbuf_len(m) < sizeof(*sa)) {
0a7de745
A
6380 os_log_error(mptcp_log_handle, "%s: mbuf is %lu but need %lu\n",
6381 __func__, mbuf_len(m), sizeof(*sa));
d9a64523 6382 mbuf_freem(m);
0a7de745 6383 return EINVAL;
d9a64523
A
6384 }
6385
6386 sa = mbuf_data(m);
3e170ce0 6387
cb323159
A
6388 if (sa->sa_nwk_status != SYMPTOMS_ADVISORY_USEAPP) {
6389 os_log(mptcp_log_handle, "%s: wifi new,old: %d,%d, cell new, old: %d,%d\n", __func__,
6390 sa->sa_wifi_status, mptcp_advisory.sa_wifi_status,
6391 sa->sa_cell_status, mptcp_advisory.sa_cell_status);
3e170ce0 6392
cb323159 6393 if (sa->sa_wifi_status != mptcp_advisory.sa_wifi_status) {
3e170ce0 6394 mptcp_advisory.sa_wifi_status = sa->sa_wifi_status;
5ba3f43e 6395 mptcp_wifi_status_changed();
0a7de745 6396 }
cb323159
A
6397 } else {
6398 struct mptcp_symptoms_answer answer;
0a7de745 6399 errno_t err;
5ba3f43e 6400
cb323159
A
6401 /* We temporarily allow different sizes for ease of submission */
6402 if (mbuf_len(m) != sizeof(uuid_t) + sizeof(*sa) &&
6403 mbuf_len(m) != sizeof(answer)) {
6404 os_log_error(mptcp_log_handle, "%s: mbuf is %lu but need %lu or %lu\n",
6405 __func__, mbuf_len(m), sizeof(uuid_t) + sizeof(*sa),
6406 sizeof(answer));
0a7de745
A
6407 mbuf_free(m);
6408 return EINVAL;
6409 }
5ba3f43e 6410
cb323159
A
6411 memset(&answer, 0, sizeof(answer));
6412
6413 err = mbuf_copydata(m, 0, mbuf_len(m), &answer);
0a7de745
A
6414 if (err) {
6415 os_log_error(mptcp_log_handle, "%s: mbuf_copydata returned %d\n", __func__, err);
6416 mbuf_free(m);
6417 return err;
6418 }
5ba3f43e 6419
cb323159 6420 mptcp_allow_uuid(answer.uuid, answer.rssi);
3e170ce0 6421 }
5ba3f43e 6422
d9a64523 6423 mbuf_freem(m);
0a7de745 6424 return 0;
3e170ce0
A
6425}
6426
6427void
6428mptcp_control_register(void)
6429{
6430 /* Set up the advisory control socket */
6431 struct kern_ctl_reg mptcp_kern_ctl;
6432
6433 bzero(&mptcp_kern_ctl, sizeof(mptcp_kern_ctl));
6434 strlcpy(mptcp_kern_ctl.ctl_name, MPTCP_KERN_CTL_NAME,
6435 sizeof(mptcp_kern_ctl.ctl_name));
6436 mptcp_kern_ctl.ctl_connect = mptcp_symptoms_ctl_connect;
6437 mptcp_kern_ctl.ctl_disconnect = mptcp_symptoms_ctl_disconnect;
6438 mptcp_kern_ctl.ctl_send = mptcp_symptoms_ctl_send;
6439 mptcp_kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED;
6440
6441 (void)ctl_register(&mptcp_kern_ctl, &mptcp_kern_ctrl_ref);
6442}
6443
d9a64523
A
6444/*
6445 * Three return-values:
6446 * 1 : WiFi is bad
6447 * 0 : WiFi is good
cb323159 6448 * -1 : WiFi-state is unknown
d9a64523 6449 */
3e170ce0 6450int
cb323159 6451mptcp_is_wifi_unusable_for_session(struct mptses *mpte)
3e170ce0 6452{
d9a64523 6453 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
f427ee49
A
6454 if (mpte->mpte_svctype != MPTCP_SVCTYPE_HANDOVER &&
6455 mptcp_advisory.sa_wifi_status) {
cb323159 6456 return symptoms_is_wifi_lossy() ? 1 : 0;
0a7de745 6457 }
d9a64523
A
6458
6459 /*
6460 * If it's a first-party app and we don't have any info
6461 * about the Wi-Fi state, let's be pessimistic.
6462 */
0a7de745 6463 return -1;
cb323159
A
6464 } else {
6465 if (mptcp_advisory.sa_wifi_status & SYMPTOMS_ADVISORY_WIFI_BAD) {
6466 return 1;
6467 }
d9a64523 6468
cb323159
A
6469 /*
6470 * If we are target-based (meaning, we allow to be more lax on
6471 * the "unusable" target. We only *know* about the state once
6472 * we got the allowance from Symptoms (MPTE_ACCESS_GRANTED).
6473 *
6474 * If RSSI is not bad enough, MPTE_CELL_PROHIBITED will then
6475 * be set.
6476 *
6477 * In any other case (while in target-mode), consider WiFi bad
6478 * and we are going to ask for allowance from Symptoms anyway.
6479 */
6480 if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) {
6481 if (mpte->mpte_flags & MPTE_ACCESS_GRANTED &&
6482 mpte->mpte_flags & MPTE_CELL_PROHIBITED) {
6483 return 0;
6484 }
d9a64523 6485
cb323159
A
6486 return 1;
6487 }
d9a64523 6488
cb323159 6489 return 0;
0a7de745 6490 }
cb323159 6491}
d9a64523 6492
cb323159
A
6493boolean_t
6494symptoms_is_wifi_lossy(void)
6495{
6496 return (mptcp_advisory.sa_wifi_status & SYMPTOMS_ADVISORY_WIFI_OK) ? false : true;
3e170ce0
A
6497}
6498
490019cf
A
6499/* If TFO data is succesfully acked, it must be dropped from the mptcp so */
6500static void
5ba3f43e 6501mptcp_drop_tfo_data(struct mptses *mpte, struct mptsub *mpts)
490019cf 6502{
5ba3f43e 6503 struct socket *mp_so = mptetoso(mpte);
490019cf
A
6504 struct socket *so = mpts->mpts_socket;
6505 struct tcpcb *tp = intotcpcb(sotoinpcb(so));
6506 struct mptcb *mp_tp = mpte->mpte_mptcb;
6507
6508 /* If data was sent with SYN, rewind state */
6509 if (tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED) {
5ba3f43e 6510 u_int64_t mp_droplen = mp_tp->mpt_sndnxt - mp_tp->mpt_snduna;
490019cf 6511 unsigned int tcp_droplen = tp->snd_una - tp->iss - 1;
5ba3f43e 6512
490019cf
A
6513 VERIFY(mp_droplen <= (UINT_MAX));
6514 VERIFY(mp_droplen >= tcp_droplen);
6515
5ba3f43e
A
6516 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
6517 mpts->mpts_iss += tcp_droplen;
6518 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
6519
490019cf
A
6520 if (mp_droplen > tcp_droplen) {
6521 /* handle partial TCP ack */
6522 mp_so->so_flags1 |= SOF1_TFO_REWIND;
6523 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna + (mp_droplen - tcp_droplen);
490019cf
A
6524 mp_droplen = tcp_droplen;
6525 } else {
6526 /* all data on SYN was acked */
6527 mpts->mpts_rel_seq = 1;
6528 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
490019cf
A
6529 }
6530 mp_tp->mpt_sndmax -= tcp_droplen;
6531
490019cf
A
6532 if (mp_droplen != 0) {
6533 VERIFY(mp_so->so_snd.sb_mb != NULL);
6534 sbdrop(&mp_so->so_snd, (int)mp_droplen);
6535 }
5ba3f43e
A
6536 }
6537}
6538
6539int
6540mptcp_freeq(struct mptcb *mp_tp)
6541{
6542 struct tseg_qent *q;
6543 int rv = 0;
6544
6545 while ((q = LIST_FIRST(&mp_tp->mpt_segq)) != NULL) {
6546 LIST_REMOVE(q, tqe_q);
6547 m_freem(q->tqe_m);
6548 zfree(tcp_reass_zone, q);
6549 rv = 1;
6550 }
6551 mp_tp->mpt_reassqlen = 0;
0a7de745 6552 return rv;
5ba3f43e
A
6553}
6554
6555static int
6556mptcp_post_event(u_int32_t event_code, int value)
6557{
6558 struct kev_mptcp_data event_data;
6559 struct kev_msg ev_msg;
6560
6561 memset(&ev_msg, 0, sizeof(ev_msg));
6562
0a7de745
A
6563 ev_msg.vendor_code = KEV_VENDOR_APPLE;
6564 ev_msg.kev_class = KEV_NETWORK_CLASS;
6565 ev_msg.kev_subclass = KEV_MPTCP_SUBCLASS;
6566 ev_msg.event_code = event_code;
5ba3f43e
A
6567
6568 event_data.value = value;
6569
0a7de745 6570 ev_msg.dv[0].data_ptr = &event_data;
5ba3f43e
A
6571 ev_msg.dv[0].data_length = sizeof(event_data);
6572
6573 return kev_post_msg(&ev_msg);
6574}
6575
cb323159
A
6576static void
6577mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts)
5ba3f43e 6578{
94ff46dc 6579 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
5ba3f43e
A
6580 int error;
6581
6582 /* First-party apps (Siri) don't flip the cellicon */
0a7de745 6583 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
5ba3f43e 6584 return;
0a7de745 6585 }
5ba3f43e 6586
cb323159
A
6587 /* Subflow is disappearing - don't set it on this one */
6588 if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) {
6589 return;
6590 }
6591
94ff46dc
A
6592 /* Fallen back connections are not triggering the cellicon */
6593 if (mpte->mpte_mptcb->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
6594 return;
6595 }
6596
cb323159
A
6597 /* Remember the last time we set the cellicon. Needed for debouncing */
6598 mpte->mpte_last_cellicon_set = tcp_now;
6599
94ff46dc
A
6600 tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE);
6601 tcp_sched_timers(tp);
6602
cb323159
A
6603 if (mpts->mpts_flags & MPTSF_CELLICON_SET &&
6604 mpte->mpte_cellicon_increments != 0) {
6605 if (mptcp_cellicon_refcount == 0) {
6606 os_log_error(mptcp_log_handle, "%s - %lx: Cell should be set (count is %u), but it's zero!\n",
6607 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments);
6608
6609 /* Continue, so that the icon gets set... */
6610 } else {
6611 /*
6612 * In this case, the cellicon is already set. No need to bump it
6613 * even higher
6614 */
6615
6616 return;
6617 }
6618 }
6619
6620 /* When tearing down this subflow, we need to decrement the
6621 * reference counter
6622 */
6623 mpts->mpts_flags |= MPTSF_CELLICON_SET;
6624
6625 /* This counter, so that when a session gets destroyed we decrement
6626 * the reference counter by whatever is left
6627 */
6628 mpte->mpte_cellicon_increments++;
5ba3f43e 6629
cb323159
A
6630 if (OSIncrementAtomic(&mptcp_cellicon_refcount)) {
6631 /* If cellicon is already set, get out of here! */
5ba3f43e 6632 return;
0a7de745 6633 }
5ba3f43e
A
6634
6635 error = mptcp_post_event(KEV_MPTCP_CELLUSE, 1);
6636
0a7de745 6637 if (error) {
cb323159
A
6638 os_log_error(mptcp_log_handle, "%s - %lx: Setting cellicon failed with %d\n",
6639 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
0a7de745 6640 } else {
cb323159
A
6641 os_log(mptcp_log_handle, "%s - %lx: successfully set the cellicon\n",
6642 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
0a7de745 6643 }
5ba3f43e
A
6644}
6645
6646void
cb323159 6647mptcp_clear_cellicon(void)
5ba3f43e 6648{
cb323159
A
6649 int error = mptcp_post_event(KEV_MPTCP_CELLUSE, 0);
6650
6651 if (error) {
6652 os_log_error(mptcp_log_handle, "%s: Unsetting cellicon failed with %d\n",
6653 __func__, error);
6654 } else {
6655 os_log(mptcp_log_handle, "%s: successfully unset the cellicon\n",
6656 __func__);
6657 }
6658}
6659
6660/*
6661 * Returns true if the icon has been flipped to WiFi.
6662 */
6663static boolean_t
f427ee49 6664__mptcp_unset_cellicon(uint32_t val)
cb323159 6665{
f427ee49
A
6666 VERIFY(val < INT32_MAX);
6667 if (OSAddAtomic((int32_t)-val, &mptcp_cellicon_refcount) != 1) {
cb323159
A
6668 return false;
6669 }
6670
6671 mptcp_clear_cellicon();
6672
6673 return true;
6674}
5ba3f43e 6675
94ff46dc
A
6676void
6677mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val)
cb323159
A
6678{
6679 /* First-party apps (Siri) don't flip the cellicon */
6680 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
5ba3f43e 6681 return;
0a7de745 6682 }
5ba3f43e 6683
cb323159
A
6684 if (mpte->mpte_cellicon_increments == 0) {
6685 /* This flow never used cell - get out of here! */
5ba3f43e 6686 return;
490019cf 6687 }
5ba3f43e 6688
cb323159
A
6689 if (mptcp_cellicon_refcount == 0) {
6690 os_log_error(mptcp_log_handle, "%s - %lx: Cell is off, but should be at least %u\n",
6691 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments);
5ba3f43e 6692
cb323159
A
6693 return;
6694 }
6695
6696 if (mpts) {
6697 if (!(mpts->mpts_flags & MPTSF_CELLICON_SET)) {
6698 return;
6699 }
6700
6701 mpts->mpts_flags &= ~MPTSF_CELLICON_SET;
6702 }
6703
94ff46dc
A
6704 if (mpte->mpte_cellicon_increments < val) {
6705 os_log_error(mptcp_log_handle, "%s - %lx: Increments is %u but want to dec by %u.\n",
6706 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments, val);
6707 val = mpte->mpte_cellicon_increments;
6708 }
6709
6710 mpte->mpte_cellicon_increments -= val;
cb323159
A
6711
6712 if (__mptcp_unset_cellicon(val) == false) {
6713 return;
6714 }
6715
6716 /* All flows are gone - our counter should be at zero too! */
6717 if (mpte->mpte_cellicon_increments != 0) {
6718 os_log_error(mptcp_log_handle, "%s - %lx: Inconsistent state! Cell refcount is zero but increments are at %u\n",
6719 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments);
0a7de745 6720 }
5ba3f43e
A
6721}
6722
6723void
6724mptcp_reset_rexmit_state(struct tcpcb *tp)
6725{
6726 struct mptsub *mpts;
6727 struct inpcb *inp;
6728 struct socket *so;
6729
6730 inp = tp->t_inpcb;
0a7de745 6731 if (inp == NULL) {
5ba3f43e 6732 return;
0a7de745 6733 }
5ba3f43e
A
6734
6735 so = inp->inp_socket;
0a7de745 6736 if (so == NULL) {
5ba3f43e 6737 return;
0a7de745 6738 }
5ba3f43e 6739
0a7de745 6740 if (!(so->so_flags & SOF_MP_SUBFLOW)) {
5ba3f43e 6741 return;
0a7de745 6742 }
5ba3f43e
A
6743
6744 mpts = tp->t_mpsub;
6745
6746 mpts->mpts_flags &= ~MPTSF_WRITE_STALL;
6747 so->so_flags &= ~SOF_MP_TRYFAILOVER;
6748}
6749
6750void
6751mptcp_reset_keepalive(struct tcpcb *tp)
6752{
6753 struct mptsub *mpts = tp->t_mpsub;
6754
6755 mpts->mpts_flags &= ~MPTSF_READ_STALL;
490019cf 6756}