]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/mptcp_subr.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp_subr.c
CommitLineData
39236c6e 1/*
5ba3f43e 2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
5ba3f43e
A
29#include <kern/locks.h>
30#include <kern/policy_internal.h>
31#include <kern/zalloc.h>
32
33#include <mach/sdt.h>
34
35#include <sys/domain.h>
36#include <sys/kdebug.h>
37#include <sys/kern_control.h>
39236c6e
A
38#include <sys/kernel.h>
39#include <sys/mbuf.h>
40#include <sys/mcache.h>
5ba3f43e
A
41#include <sys/param.h>
42#include <sys/proc.h>
43#include <sys/protosw.h>
39236c6e
A
44#include <sys/resourcevar.h>
45#include <sys/socket.h>
46#include <sys/socketvar.h>
39236c6e 47#include <sys/sysctl.h>
5ba3f43e
A
48#include <sys/syslog.h>
49#include <sys/systm.h>
39236c6e 50
5ba3f43e 51#include <net/content_filter.h>
39236c6e 52#include <net/if.h>
3e170ce0 53#include <net/if_var.h>
39236c6e
A
54#include <netinet/in.h>
55#include <netinet/in_pcb.h>
56#include <netinet/in_var.h>
57#include <netinet/tcp.h>
58#include <netinet/tcp_fsm.h>
59#include <netinet/tcp_seq.h>
60#include <netinet/tcp_var.h>
61#include <netinet/mptcp_var.h>
62#include <netinet/mptcp.h>
5ba3f43e 63#include <netinet/mptcp_opt.h>
39236c6e
A
64#include <netinet/mptcp_seq.h>
65#include <netinet/mptcp_timer.h>
66#include <libkern/crypto/sha1.h>
67#if INET6
68#include <netinet6/in6_pcb.h>
69#include <netinet6/ip6protosw.h>
70#endif /* INET6 */
71#include <dev/random/randomdev.h>
72
73/*
74 * Notes on MPTCP implementation.
75 *
76 * MPTCP is implemented as <SOCK_STREAM,IPPROTO_TCP> protocol in PF_MULTIPATH
77 * communication domain. The structure mtcbinfo describes the MPTCP instance
78 * of a Multipath protocol in that domain. It is used to keep track of all
79 * MPTCP PCB instances in the system, and is protected by the global lock
80 * mppi_lock.
81 *
82 * An MPTCP socket is opened by calling socket(PF_MULTIPATH, SOCK_STREAM,
83 * IPPROTO_TCP). Upon success, a Multipath PCB gets allocated and along with
84 * it comes an MPTCP Session and an MPTCP PCB. All three structures are
85 * allocated from the same memory block, and each structure has a pointer
86 * to the adjacent ones. The layout is defined by the mpp_mtp structure.
87 * The socket lock (mpp_lock) is used to protect accesses to the Multipath
88 * PCB (mppcb) as well as the MPTCP Session (mptses).
89 *
90 * The MPTCP Session is an MPTCP-specific extension to the Multipath PCB;
39236c6e
A
91 *
92 * A functioning MPTCP Session consists of one or more subflow sockets. Each
93 * subflow socket is essentially a regular PF_INET/PF_INET6 TCP socket, and is
94 * represented by the mptsub structure. Because each subflow requires access
95 * to the MPTCP Session, the MPTCP socket's so_usecount is bumped up for each
5ba3f43e 96 * subflow. This gets decremented prior to the subflow's destruction.
39236c6e 97 *
5ba3f43e
A
98 * To handle events (read, write, control) from the subflows, we do direct
99 * upcalls into the specific function.
39236c6e 100 *
5ba3f43e
A
101 * The whole MPTCP connection is protected by a single lock, the MPTCP socket's
102 * lock. Incoming data on a subflow also ends up taking this single lock. To
103 * achieve the latter, tcp_lock/unlock has been changed to rather use the lock
104 * of the MPTCP-socket.
39236c6e
A
105 *
106 * An MPTCP socket will be destroyed when its so_usecount drops to zero; this
107 * work is done by the MPTCP garbage collector which is invoked on demand by
108 * the PF_MULTIPATH garbage collector. This process will take place once all
5ba3f43e 109 * of the subflows have been destroyed.
39236c6e
A
110 */
111
fe8ab488 112static void mptcp_attach_to_subf(struct socket *, struct mptcb *, uint8_t);
39236c6e 113static void mptcp_detach_mptcb_from_subf(struct mptcb *, struct socket *);
39236c6e
A
114
115static uint32_t mptcp_gc(struct mppcbinfo *);
39236c6e
A
116static int mptcp_subflow_soreceive(struct socket *, struct sockaddr **,
117 struct uio *, struct mbuf **, struct mbuf **, int *);
5ba3f43e
A
118static int mptcp_subflow_sosend(struct socket *, struct sockaddr *,
119 struct uio *, struct mbuf *, struct mbuf *, int);
39236c6e 120static void mptcp_subflow_wupcall(struct socket *, void *, int);
5ba3f43e
A
121static void mptcp_subflow_eupcall1(struct socket *, void *, uint32_t);
122static void mptcp_update_last_owner(struct socket *so, struct socket *mp_so);
123static void mptcp_drop_tfo_data(struct mptses *, struct mptsub *);
124
125static void mptcp_subflow_abort(struct mptsub *, int);
126
127static void mptcp_send_dfin(struct socket *so);
cb323159 128static void mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts);
cb323159 129static int mptcp_freeq(struct mptcb *mp_tp);
39236c6e
A
130
131/*
132 * Possible return values for subflow event handlers. Note that success
133 * values must be greater or equal than MPTS_EVRET_OK. Values less than that
134 * indicate errors or actions which require immediate attention; they will
135 * prevent the rest of the handlers from processing their respective events
136 * until the next round of events processing.
137 */
138typedef enum {
0a7de745
A
139 MPTS_EVRET_DELETE = 1, /* delete this subflow */
140 MPTS_EVRET_OK = 2, /* OK */
141 MPTS_EVRET_CONNECT_PENDING = 3, /* resume pended connects */
142 MPTS_EVRET_DISCONNECT_FALLBACK = 4, /* abort all but preferred */
39236c6e
A
143} ev_ret_t;
144
5ba3f43e
A
145static ev_ret_t mptcp_subflow_propagate_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
146static ev_ret_t mptcp_subflow_nosrcaddr_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
147static ev_ret_t mptcp_subflow_failover_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
148static ev_ret_t mptcp_subflow_ifdenied_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
149static ev_ret_t mptcp_subflow_connected_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
150static ev_ret_t mptcp_subflow_disconnected_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
151static ev_ret_t mptcp_subflow_mpstatus_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
152static ev_ret_t mptcp_subflow_mustrst_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
153static ev_ret_t mptcp_subflow_mpcantrcvmore_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
cb323159 154static ev_ret_t mptcp_subflow_mpsuberror_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
5ba3f43e
A
155static ev_ret_t mptcp_subflow_adaptive_rtimo_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
156static ev_ret_t mptcp_subflow_adaptive_wtimo_ev(struct mptses *, struct mptsub *, uint64_t *, uint64_t);
fe8ab488 157
5ba3f43e
A
158static void mptcp_do_sha1(mptcp_key_t *, char *);
159static void mptcp_init_local_parms(struct mptses *);
39236c6e 160
0a7de745
A
161static unsigned int mptsub_zone_size; /* size of mptsub */
162static struct zone *mptsub_zone; /* zone for mptsub */
39236c6e 163
0a7de745
A
164static unsigned int mptopt_zone_size; /* size of mptopt */
165static struct zone *mptopt_zone; /* zone for mptopt */
39236c6e 166
0a7de745
A
167static unsigned int mpt_subauth_entry_size; /* size of subf auth entry */
168static struct zone *mpt_subauth_zone; /* zone of subf auth entry */
39236c6e
A
169
170struct mppcbinfo mtcbinfo;
171
39236c6e
A
172SYSCTL_DECL(_net_inet);
173
0a7de745 174SYSCTL_NODE(_net_inet, OID_AUTO, mptcp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "MPTCP");
39236c6e 175
0a7de745
A
176uint32_t mptcp_dbg_area = 31; /* more noise if greater than 1 */
177SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, dbg_area, CTLFLAG_RW | CTLFLAG_LOCKED,
178 &mptcp_dbg_area, 0, "MPTCP debug area");
3e170ce0 179
5ba3f43e 180uint32_t mptcp_dbg_level = 1;
3e170ce0 181SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dbg_level, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 182 &mptcp_dbg_level, 0, "MPTCP debug level");
3e170ce0 183
0a7de745
A
184SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, pcbcount, CTLFLAG_RD | CTLFLAG_LOCKED,
185 &mtcbinfo.mppi_count, 0, "Number of active PCBs");
39236c6e 186
a39ff7e2
A
187
188static int mptcp_alternate_port = 0;
189SYSCTL_INT(_net_inet_mptcp, OID_AUTO, alternate_port, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 190 &mptcp_alternate_port, 0, "Set alternate port for MPTCP connections");
a39ff7e2 191
39236c6e
A
192static struct protosw mptcp_subflow_protosw;
193static struct pr_usrreqs mptcp_subflow_usrreqs;
194#if INET6
195static struct ip6protosw mptcp_subflow_protosw6;
196static struct pr_usrreqs mptcp_subflow_usrreqs6;
197#endif /* INET6 */
198
0a7de745 199static uint8_t mptcp_create_subflows_scheduled;
5ba3f43e 200
3e170ce0
A
201typedef struct mptcp_subflow_event_entry {
202 uint64_t sofilt_hint_mask;
203 ev_ret_t (*sofilt_hint_ev_hdlr)(
0a7de745
A
204 struct mptses *mpte,
205 struct mptsub *mpts,
206 uint64_t *p_mpsofilt_hint,
207 uint64_t event);
3e170ce0
A
208} mptsub_ev_entry_t;
209
cb323159
A
210/* Using Symptoms Advisory to detect poor WiFi or poor Cell */
211static kern_ctl_ref mptcp_kern_ctrl_ref = NULL;
212static uint32_t mptcp_kern_skt_inuse = 0;
213static uint32_t mptcp_kern_skt_unit;
214static symptoms_advisory_t mptcp_advisory;
215
216uint32_t mptcp_cellicon_refcount = 0;
5ba3f43e 217
490019cf
A
218/*
219 * XXX The order of the event handlers below is really
5ba3f43e 220 * really important. Think twice before changing it.
490019cf 221 */
0a7de745 222static mptsub_ev_entry_t mpsub_ev_entry_tbl[] = {
cb323159
A
223 {
224 .sofilt_hint_mask = SO_FILT_HINT_MP_SUB_ERROR,
225 .sofilt_hint_ev_hdlr = mptcp_subflow_mpsuberror_ev,
226 },
3e170ce0
A
227 {
228 .sofilt_hint_mask = SO_FILT_HINT_MPCANTRCVMORE,
0a7de745 229 .sofilt_hint_ev_hdlr = mptcp_subflow_mpcantrcvmore_ev,
3e170ce0
A
230 },
231 {
232 .sofilt_hint_mask = SO_FILT_HINT_MPFAILOVER,
233 .sofilt_hint_ev_hdlr = mptcp_subflow_failover_ev,
234 },
235 {
236 .sofilt_hint_mask = SO_FILT_HINT_CONNRESET,
5ba3f43e 237 .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
3e170ce0
A
238 },
239 {
240 .sofilt_hint_mask = SO_FILT_HINT_MUSTRST,
241 .sofilt_hint_ev_hdlr = mptcp_subflow_mustrst_ev,
242 },
243 {
244 .sofilt_hint_mask = SO_FILT_HINT_CANTRCVMORE,
5ba3f43e 245 .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
3e170ce0
A
246 },
247 {
248 .sofilt_hint_mask = SO_FILT_HINT_TIMEOUT,
5ba3f43e 249 .sofilt_hint_ev_hdlr = mptcp_subflow_propagate_ev,
3e170ce0
A
250 },
251 {
252 .sofilt_hint_mask = SO_FILT_HINT_NOSRCADDR,
253 .sofilt_hint_ev_hdlr = mptcp_subflow_nosrcaddr_ev,
254 },
255 {
256 .sofilt_hint_mask = SO_FILT_HINT_IFDENIED,
257 .sofilt_hint_ev_hdlr = mptcp_subflow_ifdenied_ev,
258 },
3e170ce0
A
259 {
260 .sofilt_hint_mask = SO_FILT_HINT_CONNECTED,
261 .sofilt_hint_ev_hdlr = mptcp_subflow_connected_ev,
262 },
263 {
264 .sofilt_hint_mask = SO_FILT_HINT_MPSTATUS,
265 .sofilt_hint_ev_hdlr = mptcp_subflow_mpstatus_ev,
266 },
3e170ce0
A
267 {
268 .sofilt_hint_mask = SO_FILT_HINT_DISCONNECTED,
269 .sofilt_hint_ev_hdlr = mptcp_subflow_disconnected_ev,
270 },
271 {
5ba3f43e
A
272 .sofilt_hint_mask = SO_FILT_HINT_ADAPTIVE_RTIMO,
273 .sofilt_hint_ev_hdlr = mptcp_subflow_adaptive_rtimo_ev,
274 },
275 {
276 .sofilt_hint_mask = SO_FILT_HINT_ADAPTIVE_WTIMO,
277 .sofilt_hint_ev_hdlr = mptcp_subflow_adaptive_wtimo_ev,
278 },
3e170ce0
A
279};
280
a39ff7e2
A
281os_log_t mptcp_log_handle;
282
39236c6e
A
283/*
284 * Protocol pr_init callback.
285 */
286void
287mptcp_init(struct protosw *pp, struct domain *dp)
288{
289#pragma unused(dp)
290 static int mptcp_initialized = 0;
291 struct protosw *prp;
292#if INET6
293 struct ip6protosw *prp6;
294#endif /* INET6 */
295
0a7de745 296 VERIFY((pp->pr_flags & (PR_INITIALIZED | PR_ATTACHED)) == PR_ATTACHED);
39236c6e
A
297
298 /* do this only once */
0a7de745 299 if (mptcp_initialized) {
39236c6e 300 return;
0a7de745 301 }
39236c6e
A
302 mptcp_initialized = 1;
303
cb323159
A
304 mptcp_advisory.sa_wifi_status = SYMPTOMS_ADVISORY_WIFI_OK;
305
39236c6e
A
306 /*
307 * Since PF_MULTIPATH gets initialized after PF_INET/INET6,
308 * we must be able to find IPPROTO_TCP entries for both.
309 */
310 prp = pffindproto_locked(PF_INET, IPPROTO_TCP, SOCK_STREAM);
311 VERIFY(prp != NULL);
0a7de745 312 bcopy(prp, &mptcp_subflow_protosw, sizeof(*prp));
39236c6e 313 bcopy(prp->pr_usrreqs, &mptcp_subflow_usrreqs,
0a7de745 314 sizeof(mptcp_subflow_usrreqs));
39236c6e
A
315 mptcp_subflow_protosw.pr_entry.tqe_next = NULL;
316 mptcp_subflow_protosw.pr_entry.tqe_prev = NULL;
317 mptcp_subflow_protosw.pr_usrreqs = &mptcp_subflow_usrreqs;
318 mptcp_subflow_usrreqs.pru_soreceive = mptcp_subflow_soreceive;
5ba3f43e 319 mptcp_subflow_usrreqs.pru_sosend = mptcp_subflow_sosend;
39236c6e
A
320 mptcp_subflow_usrreqs.pru_rcvoob = pru_rcvoob_notsupp;
321 /*
322 * Socket filters shouldn't attach/detach to/from this protosw
323 * since pr_protosw is to be used instead, which points to the
324 * real protocol; if they do, it is a bug and we should panic.
325 */
326 mptcp_subflow_protosw.pr_filter_head.tqh_first =
327 (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
328 mptcp_subflow_protosw.pr_filter_head.tqh_last =
329 (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
330
331#if INET6
332 prp6 = (struct ip6protosw *)pffindproto_locked(PF_INET6,
333 IPPROTO_TCP, SOCK_STREAM);
334 VERIFY(prp6 != NULL);
0a7de745 335 bcopy(prp6, &mptcp_subflow_protosw6, sizeof(*prp6));
39236c6e 336 bcopy(prp6->pr_usrreqs, &mptcp_subflow_usrreqs6,
0a7de745 337 sizeof(mptcp_subflow_usrreqs6));
39236c6e
A
338 mptcp_subflow_protosw6.pr_entry.tqe_next = NULL;
339 mptcp_subflow_protosw6.pr_entry.tqe_prev = NULL;
340 mptcp_subflow_protosw6.pr_usrreqs = &mptcp_subflow_usrreqs6;
341 mptcp_subflow_usrreqs6.pru_soreceive = mptcp_subflow_soreceive;
5ba3f43e 342 mptcp_subflow_usrreqs6.pru_sosend = mptcp_subflow_sosend;
39236c6e
A
343 mptcp_subflow_usrreqs6.pru_rcvoob = pru_rcvoob_notsupp;
344 /*
345 * Socket filters shouldn't attach/detach to/from this protosw
346 * since pr_protosw is to be used instead, which points to the
347 * real protocol; if they do, it is a bug and we should panic.
348 */
349 mptcp_subflow_protosw6.pr_filter_head.tqh_first =
350 (struct socket_filter *)(uintptr_t)0xdeadbeefdeadbeef;
351 mptcp_subflow_protosw6.pr_filter_head.tqh_last =
352 (struct socket_filter **)(uintptr_t)0xdeadbeefdeadbeef;
353#endif /* INET6 */
354
0a7de745 355 bzero(&mtcbinfo, sizeof(mtcbinfo));
39236c6e 356 TAILQ_INIT(&mtcbinfo.mppi_pcbs);
0a7de745 357 mtcbinfo.mppi_size = sizeof(struct mpp_mtp);
39236c6e
A
358 if ((mtcbinfo.mppi_zone = zinit(mtcbinfo.mppi_size,
359 1024 * mtcbinfo.mppi_size, 8192, "mptcb")) == NULL) {
360 panic("%s: unable to allocate MPTCP PCB zone\n", __func__);
361 /* NOTREACHED */
362 }
363 zone_change(mtcbinfo.mppi_zone, Z_CALLERACCT, FALSE);
364 zone_change(mtcbinfo.mppi_zone, Z_EXPAND, TRUE);
365
366 mtcbinfo.mppi_lock_grp_attr = lck_grp_attr_alloc_init();
367 mtcbinfo.mppi_lock_grp = lck_grp_alloc_init("mppcb",
368 mtcbinfo.mppi_lock_grp_attr);
369 mtcbinfo.mppi_lock_attr = lck_attr_alloc_init();
370 lck_mtx_init(&mtcbinfo.mppi_lock, mtcbinfo.mppi_lock_grp,
371 mtcbinfo.mppi_lock_attr);
39236c6e 372
3e170ce0 373 mtcbinfo.mppi_gc = mptcp_gc;
39236c6e
A
374 mtcbinfo.mppi_timer = mptcp_timer;
375
376 /* attach to MP domain for garbage collection to take place */
377 mp_pcbinfo_attach(&mtcbinfo);
378
0a7de745 379 mptsub_zone_size = sizeof(struct mptsub);
39236c6e
A
380 if ((mptsub_zone = zinit(mptsub_zone_size, 1024 * mptsub_zone_size,
381 8192, "mptsub")) == NULL) {
382 panic("%s: unable to allocate MPTCP subflow zone\n", __func__);
383 /* NOTREACHED */
384 }
385 zone_change(mptsub_zone, Z_CALLERACCT, FALSE);
386 zone_change(mptsub_zone, Z_EXPAND, TRUE);
387
0a7de745 388 mptopt_zone_size = sizeof(struct mptopt);
39236c6e
A
389 if ((mptopt_zone = zinit(mptopt_zone_size, 128 * mptopt_zone_size,
390 1024, "mptopt")) == NULL) {
391 panic("%s: unable to allocate MPTCP option zone\n", __func__);
392 /* NOTREACHED */
393 }
394 zone_change(mptopt_zone, Z_CALLERACCT, FALSE);
395 zone_change(mptopt_zone, Z_EXPAND, TRUE);
396
0a7de745 397 mpt_subauth_entry_size = sizeof(struct mptcp_subf_auth_entry);
39236c6e
A
398 if ((mpt_subauth_zone = zinit(mpt_subauth_entry_size,
399 1024 * mpt_subauth_entry_size, 8192, "mptauth")) == NULL) {
400 panic("%s: unable to allocate MPTCP address auth zone \n",
401 __func__);
402 /* NOTREACHED */
403 }
404 zone_change(mpt_subauth_zone, Z_CALLERACCT, FALSE);
405 zone_change(mpt_subauth_zone, Z_EXPAND, TRUE);
406
a39ff7e2 407 mptcp_log_handle = os_log_create("com.apple.xnu.net.mptcp", "mptcp");
5ba3f43e
A
408}
409
410int
cb323159 411mptcpstats_get_index_by_ifindex(struct mptcp_itf_stats *stats, int ifindex, boolean_t create)
5ba3f43e 412{
5ba3f43e
A
413 int i, index = -1;
414
5ba3f43e 415 for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
cb323159 416 if (create && stats[i].ifindex == IFSCOPE_NONE) {
0a7de745 417 if (index < 0) {
5ba3f43e 418 index = i;
0a7de745 419 }
5ba3f43e
A
420 continue;
421 }
422
cb323159 423 if (stats[i].ifindex == ifindex) {
5ba3f43e 424 index = i;
0a7de745 425 return index;
5ba3f43e
A
426 }
427 }
428
429 if (index != -1) {
cb323159
A
430 stats[index].ifindex = ifindex;
431 }
432
433 return index;
434}
435
436static int
437mptcpstats_get_index(struct mptcp_itf_stats *stats, const struct mptsub *mpts)
438{
439 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
440 int index;
441
442 if (ifp == NULL) {
443 os_log_error(mptcp_log_handle, "%s - %lx: no ifp on subflow, state %u flags %#x\n",
444 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte),
445 sototcpcb(mpts->mpts_socket)->t_state, mpts->mpts_flags);
446 return -1;
447 }
448
449 index = mptcpstats_get_index_by_ifindex(stats, ifp->if_index, true);
450
451 if (index != -1) {
0a7de745 452 if (stats[index].is_expensive == 0) {
5ba3f43e 453 stats[index].is_expensive = IFNET_IS_CELLULAR(ifp);
0a7de745 454 }
5ba3f43e
A
455 }
456
0a7de745 457 return index;
5ba3f43e
A
458}
459
460void
461mptcpstats_inc_switch(struct mptses *mpte, const struct mptsub *mpts)
462{
463 int index;
464
465 tcpstat.tcps_mp_switches++;
466 mpte->mpte_subflow_switches++;
467
cb323159 468 index = mptcpstats_get_index(mpte->mpte_itfstats, mpts);
5ba3f43e 469
0a7de745 470 if (index != -1) {
5ba3f43e 471 mpte->mpte_itfstats[index].switches++;
0a7de745 472 }
5ba3f43e
A
473}
474
475/*
476 * Flushes all recorded socket options from an MP socket.
477 */
478static void
479mptcp_flush_sopts(struct mptses *mpte)
480{
481 struct mptopt *mpo, *tmpo;
482
483 TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) {
484 mptcp_sopt_remove(mpte, mpo);
485 mptcp_sopt_free(mpo);
486 }
487 VERIFY(TAILQ_EMPTY(&mpte->mpte_sopts));
39236c6e
A
488}
489
490/*
491 * Create an MPTCP session, called as a result of opening a MPTCP socket.
492 */
5ba3f43e 493int
cb323159 494mptcp_session_create(struct mppcb *mpp)
39236c6e
A
495{
496 struct mppcbinfo *mppi;
497 struct mptses *mpte;
498 struct mptcb *mp_tp;
39236c6e
A
499
500 VERIFY(mpp != NULL);
501 mppi = mpp->mpp_pcbinfo;
502 VERIFY(mppi != NULL);
503
3e170ce0
A
504 __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
505 __IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb);
39236c6e
A
506
507 /* MPTCP Multipath PCB Extension */
0a7de745 508 bzero(mpte, sizeof(*mpte));
39236c6e
A
509 VERIFY(mpp->mpp_pcbe == NULL);
510 mpp->mpp_pcbe = mpte;
511 mpte->mpte_mppcb = mpp;
512 mpte->mpte_mptcb = mp_tp;
513
514 TAILQ_INIT(&mpte->mpte_sopts);
515 TAILQ_INIT(&mpte->mpte_subflows);
3e170ce0
A
516 mpte->mpte_associd = SAE_ASSOCID_ANY;
517 mpte->mpte_connid_last = SAE_CONNID_ANY;
39236c6e 518
cb323159
A
519 mptcp_init_urgency_timer(mpte);
520
5ba3f43e
A
521 mpte->mpte_itfinfo = &mpte->_mpte_itfinfo[0];
522 mpte->mpte_itfinfo_size = MPTE_ITFINFO_SIZE;
39236c6e 523
0a7de745 524 if (mptcp_alternate_port) {
a39ff7e2 525 mpte->mpte_alternate_port = htons(mptcp_alternate_port);
0a7de745 526 }
a39ff7e2 527
cb323159
A
528 mpte->mpte_last_cellicon_set = tcp_now;
529
39236c6e 530 /* MPTCP Protocol Control Block */
0a7de745 531 bzero(mp_tp, sizeof(*mp_tp));
39236c6e 532 mp_tp->mpt_mpte = mpte;
3e170ce0 533 mp_tp->mpt_state = MPTCPS_CLOSED;
39236c6e 534
5ba3f43e
A
535 DTRACE_MPTCP1(session__create, struct mppcb *, mpp);
536
0a7de745 537 return 0;
5ba3f43e
A
538}
539
cb323159
A
540struct sockaddr *
541mptcp_get_session_dst(struct mptses *mpte, boolean_t ipv6, boolean_t ipv4)
542{
543 if (!(mpte->mpte_flags & MPTE_UNICAST_IP)) {
544 return &mpte->mpte_dst;
545 }
546
547 if (ipv6 && mpte->mpte_dst_unicast_v6.sin6_family == AF_INET6) {
548 return (struct sockaddr *)&mpte->mpte_dst_unicast_v6;
549 }
550
551 if (ipv4 && mpte->mpte_dst_unicast_v4.sin_family == AF_INET) {
552 return (struct sockaddr *)&mpte->mpte_dst_unicast_v4;
553 }
554
555 /* The interface has neither IPv4 nor IPv6 routes. Give our best guess,
556 * meaning we prefer IPv6 over IPv4.
557 */
558 if (mpte->mpte_dst_unicast_v6.sin6_family == AF_INET6) {
559 return (struct sockaddr *)&mpte->mpte_dst_unicast_v6;
560 }
561
562 if (mpte->mpte_dst_unicast_v4.sin_family == AF_INET) {
563 return (struct sockaddr *)&mpte->mpte_dst_unicast_v4;
564 }
565
566 /* We don't yet have a unicast IP */
567 return NULL;
568}
569
5ba3f43e
A
570static void
571mptcpstats_get_bytes(struct mptses *mpte, boolean_t initial_cell,
0a7de745 572 uint64_t *cellbytes, uint64_t *allbytes)
5ba3f43e
A
573{
574 int64_t mycellbytes = 0;
575 uint64_t myallbytes = 0;
576 int i;
577
578 for (i = 0; i < MPTCP_ITFSTATS_SIZE; i++) {
579 if (mpte->mpte_itfstats[i].is_expensive) {
580 mycellbytes += mpte->mpte_itfstats[i].mpis_txbytes;
581 mycellbytes += mpte->mpte_itfstats[i].mpis_rxbytes;
582 }
583
584 myallbytes += mpte->mpte_itfstats[i].mpis_txbytes;
585 myallbytes += mpte->mpte_itfstats[i].mpis_rxbytes;
586 }
587
588 if (initial_cell) {
589 mycellbytes -= mpte->mpte_init_txbytes;
cb323159 590 mycellbytes -= mpte->mpte_init_rxbytes;
5ba3f43e
A
591 }
592
593 if (mycellbytes < 0) {
cb323159
A
594 os_log_error(mptcp_log_handle, "%s - %lx: cellbytes is %lld\n",
595 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mycellbytes);
5ba3f43e
A
596 *cellbytes = 0;
597 *allbytes = 0;
598 } else {
599 *cellbytes = mycellbytes;
600 *allbytes = myallbytes;
601 }
602}
603
604static void
605mptcpstats_session_wrapup(struct mptses *mpte)
606{
607 boolean_t cell = mpte->mpte_initial_cell;
608
609 switch (mpte->mpte_svctype) {
610 case MPTCP_SVCTYPE_HANDOVER:
611 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
612 tcpstat.tcps_mptcp_fp_handover_attempt++;
613
614 if (cell && mpte->mpte_handshake_success) {
615 tcpstat.tcps_mptcp_fp_handover_success_cell++;
616
0a7de745 617 if (mpte->mpte_used_wifi) {
5ba3f43e 618 tcpstat.tcps_mptcp_handover_wifi_from_cell++;
0a7de745 619 }
5ba3f43e
A
620 } else if (mpte->mpte_handshake_success) {
621 tcpstat.tcps_mptcp_fp_handover_success_wifi++;
622
0a7de745 623 if (mpte->mpte_used_cell) {
5ba3f43e 624 tcpstat.tcps_mptcp_handover_cell_from_wifi++;
0a7de745 625 }
5ba3f43e
A
626 }
627 } else {
628 tcpstat.tcps_mptcp_handover_attempt++;
629
630 if (cell && mpte->mpte_handshake_success) {
631 tcpstat.tcps_mptcp_handover_success_cell++;
632
0a7de745 633 if (mpte->mpte_used_wifi) {
5ba3f43e 634 tcpstat.tcps_mptcp_handover_wifi_from_cell++;
0a7de745 635 }
5ba3f43e
A
636 } else if (mpte->mpte_handshake_success) {
637 tcpstat.tcps_mptcp_handover_success_wifi++;
638
0a7de745 639 if (mpte->mpte_used_cell) {
5ba3f43e 640 tcpstat.tcps_mptcp_handover_cell_from_wifi++;
0a7de745 641 }
5ba3f43e
A
642 }
643 }
644
645 if (mpte->mpte_handshake_success) {
646 uint64_t cellbytes;
647 uint64_t allbytes;
648
649 mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
650
651 tcpstat.tcps_mptcp_handover_cell_bytes += cellbytes;
652 tcpstat.tcps_mptcp_handover_all_bytes += allbytes;
653 }
654 break;
655 case MPTCP_SVCTYPE_INTERACTIVE:
656 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
657 tcpstat.tcps_mptcp_fp_interactive_attempt++;
658
659 if (mpte->mpte_handshake_success) {
660 tcpstat.tcps_mptcp_fp_interactive_success++;
661
0a7de745 662 if (!cell && mpte->mpte_used_cell) {
5ba3f43e 663 tcpstat.tcps_mptcp_interactive_cell_from_wifi++;
0a7de745 664 }
5ba3f43e
A
665 }
666 } else {
667 tcpstat.tcps_mptcp_interactive_attempt++;
668
669 if (mpte->mpte_handshake_success) {
670 tcpstat.tcps_mptcp_interactive_success++;
671
0a7de745 672 if (!cell && mpte->mpte_used_cell) {
5ba3f43e 673 tcpstat.tcps_mptcp_interactive_cell_from_wifi++;
0a7de745 674 }
5ba3f43e
A
675 }
676 }
677
678 if (mpte->mpte_handshake_success) {
679 uint64_t cellbytes;
680 uint64_t allbytes;
681
682 mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
683
684 tcpstat.tcps_mptcp_interactive_cell_bytes += cellbytes;
685 tcpstat.tcps_mptcp_interactive_all_bytes += allbytes;
686 }
687 break;
688 case MPTCP_SVCTYPE_AGGREGATE:
689 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
690 tcpstat.tcps_mptcp_fp_aggregate_attempt++;
691
0a7de745 692 if (mpte->mpte_handshake_success) {
5ba3f43e 693 tcpstat.tcps_mptcp_fp_aggregate_success++;
0a7de745 694 }
5ba3f43e
A
695 } else {
696 tcpstat.tcps_mptcp_aggregate_attempt++;
697
698 if (mpte->mpte_handshake_success) {
699 tcpstat.tcps_mptcp_aggregate_success++;
700 }
701 }
702
703 if (mpte->mpte_handshake_success) {
704 uint64_t cellbytes;
705 uint64_t allbytes;
706
707 mptcpstats_get_bytes(mpte, cell, &cellbytes, &allbytes);
708
709 tcpstat.tcps_mptcp_aggregate_cell_bytes += cellbytes;
710 tcpstat.tcps_mptcp_aggregate_all_bytes += allbytes;
711 }
712 break;
713 }
714
0a7de745 715 if (cell && mpte->mpte_handshake_success && mpte->mpte_used_wifi) {
5ba3f43e 716 tcpstat.tcps_mptcp_back_to_wifi++;
0a7de745 717 }
d9a64523 718
0a7de745 719 if (mpte->mpte_triggered_cell) {
d9a64523 720 tcpstat.tcps_mptcp_triggered_cell++;
0a7de745 721 }
39236c6e
A
722}
723
724/*
725 * Destroy an MPTCP session.
726 */
727static void
5ba3f43e 728mptcp_session_destroy(struct mptses *mpte)
39236c6e 729{
cb323159 730 struct mptcb *mp_tp = mpte->mpte_mptcb;
39236c6e 731
39236c6e 732 VERIFY(mp_tp != NULL);
cb323159 733 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows) && mpte->mpte_numflows == 0);
39236c6e 734
5ba3f43e 735 mptcpstats_session_wrapup(mpte);
cb323159 736 mptcp_unset_cellicon(mpte, NULL, mpte->mpte_cellicon_increments);
39236c6e 737 mptcp_flush_sopts(mpte);
39236c6e 738
0a7de745 739 if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) {
5ba3f43e 740 _FREE(mpte->mpte_itfinfo, M_TEMP);
0a7de745 741 }
5ba3f43e
A
742 mpte->mpte_itfinfo = NULL;
743
744 m_freem_list(mpte->mpte_reinjectq);
39236c6e 745
cb323159
A
746 os_log(mptcp_log_handle, "%s - %lx: Destroying session\n",
747 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
39236c6e
A
748}
749
cb323159 750boolean_t
5ba3f43e 751mptcp_ok_to_create_subflows(struct mptcb *mp_tp)
39236c6e 752{
0a7de745
A
753 return mp_tp->mpt_state >= MPTCPS_ESTABLISHED &&
754 mp_tp->mpt_state < MPTCPS_FIN_WAIT_1 &&
755 !(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP);
5ba3f43e 756}
39236c6e 757
5ba3f43e 758static int
cb323159
A
759mptcp_synthesize_nat64(struct in6_addr *addr, uint32_t len,
760 const struct in_addr *addrv4)
5ba3f43e
A
761{
762 static const struct in6_addr well_known_prefix = {
763 .__u6_addr.__u6_addr8 = {0x00, 0x64, 0xff, 0x9b, 0x00, 0x00,
0a7de745
A
764 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
765 0x00, 0x00, 0x00, 0x00},
5ba3f43e 766 };
cb323159 767 const char *ptrv4 = (const char *)addrv4;
5ba3f43e 768 char buf[MAX_IPv6_STR_LEN];
5ba3f43e
A
769 char *ptr = (char *)addr;
770
d9a64523
A
771 if (IN_ZERONET(ntohl(addrv4->s_addr)) || // 0.0.0.0/8 Source hosts on local network
772 IN_LOOPBACK(ntohl(addrv4->s_addr)) || // 127.0.0.0/8 Loopback
773 IN_LINKLOCAL(ntohl(addrv4->s_addr)) || // 169.254.0.0/16 Link Local
774 IN_DS_LITE(ntohl(addrv4->s_addr)) || // 192.0.0.0/29 DS-Lite
775 IN_6TO4_RELAY_ANYCAST(ntohl(addrv4->s_addr)) || // 192.88.99.0/24 6to4 Relay Anycast
776 IN_MULTICAST(ntohl(addrv4->s_addr)) || // 224.0.0.0/4 Multicast
5ba3f43e 777 INADDR_BROADCAST == addrv4->s_addr) { // 255.255.255.255/32 Limited Broadcast
0a7de745 778 return -1;
39236c6e
A
779 }
780
5ba3f43e
A
781 /* Check for the well-known prefix */
782 if (len == NAT64_PREFIX_LEN_96 &&
783 IN6_ARE_ADDR_EQUAL(addr, &well_known_prefix)) {
d9a64523 784 if (IN_PRIVATE(ntohl(addrv4->s_addr)) || // 10.0.0.0/8, 172.16.0.0/12, 192.168.0.0/16 Private-Use
0a7de745
A
785 IN_SHARED_ADDRESS_SPACE(ntohl(addrv4->s_addr))) { // 100.64.0.0/10 Shared Address Space
786 return -1;
787 }
5ba3f43e 788 }
39236c6e 789
5ba3f43e 790 switch (len) {
0a7de745
A
791 case NAT64_PREFIX_LEN_96:
792 memcpy(ptr + 12, ptrv4, 4);
793 break;
794 case NAT64_PREFIX_LEN_64:
795 memcpy(ptr + 9, ptrv4, 4);
796 break;
797 case NAT64_PREFIX_LEN_56:
798 memcpy(ptr + 7, ptrv4, 1);
799 memcpy(ptr + 9, ptrv4 + 1, 3);
800 break;
801 case NAT64_PREFIX_LEN_48:
802 memcpy(ptr + 6, ptrv4, 2);
803 memcpy(ptr + 9, ptrv4 + 2, 2);
804 break;
805 case NAT64_PREFIX_LEN_40:
806 memcpy(ptr + 5, ptrv4, 3);
807 memcpy(ptr + 9, ptrv4 + 3, 1);
808 break;
809 case NAT64_PREFIX_LEN_32:
810 memcpy(ptr + 4, ptrv4, 4);
811 break;
812 default:
813 panic("NAT64-prefix len is wrong: %u\n", len);
5ba3f43e 814 }
39236c6e 815
a39ff7e2 816 os_log_info(mptcp_log_handle, "%s: nat64prefix-len %u synthesized %s\n",
0a7de745
A
817 __func__, len,
818 inet_ntop(AF_INET6, (void *)addr, buf, sizeof(buf)));
39236c6e 819
0a7de745 820 return 0;
39236c6e
A
821}
822
d9a64523
A
823static void
824mptcp_trigger_cell_bringup(struct mptses *mpte)
825{
826 struct socket *mp_so = mptetoso(mpte);
827
828 if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
829 uuid_string_t uuidstr;
830 int err;
831
cb323159 832 socket_unlock(mp_so, 0);
d9a64523 833 err = necp_client_assert_bb_radio_manager(mpsotomppcb(mp_so)->necp_client_uuid,
0a7de745 834 TRUE);
cb323159 835 socket_lock(mp_so, 0);
d9a64523 836
0a7de745 837 if (err == 0) {
d9a64523 838 mpte->mpte_triggered_cell = 1;
0a7de745 839 }
d9a64523
A
840
841 uuid_unparse_upper(mpsotomppcb(mp_so)->necp_client_uuid, uuidstr);
cb323159
A
842 os_log_info(mptcp_log_handle, "%s - %lx: asked irat to bringup cell for uuid %s, err %d\n",
843 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), uuidstr, err);
d9a64523 844 } else {
cb323159
A
845 os_log_info(mptcp_log_handle, "%s - %lx: UUID is already null\n",
846 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
d9a64523
A
847 }
848}
849
cb323159
A
850static boolean_t
851mptcp_subflow_disconnecting(struct mptsub *mpts)
852{
cb323159
A
853 if (mpts->mpts_socket->so_state & SS_ISDISCONNECTED) {
854 return true;
855 }
856
857 if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED | MPTSF_CLOSE_REQD)) {
858 return true;
859 }
860
861 if (sototcpcb(mpts->mpts_socket)->t_state == TCPS_CLOSED) {
862 return true;
863 }
864
865 return false;
866}
d9a64523 867
39236c6e 868void
5ba3f43e 869mptcp_check_subflows_and_add(struct mptses *mpte)
39236c6e 870{
5ba3f43e 871 struct mptcb *mp_tp = mpte->mpte_mptcb;
d9a64523
A
872 boolean_t cellular_viable = FALSE;
873 boolean_t want_cellular = TRUE;
5ba3f43e 874 uint32_t i;
39236c6e 875
0a7de745 876 if (!mptcp_ok_to_create_subflows(mp_tp)) {
cb323159
A
877 os_log_debug(mptcp_log_handle, "%s - %lx: not a good time for subflows, state %u flags %#x",
878 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
879 return;
880 }
881
882 if (mptcp_get_session_dst(mpte, false, false) == NULL) {
5ba3f43e 883 return;
0a7de745 884 }
39236c6e 885
5ba3f43e 886 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
cb323159 887 boolean_t need_to_ask_symptoms = FALSE, found = FALSE;
5ba3f43e 888 struct mpt_itf_info *info;
cb323159
A
889 struct sockaddr_in6 nat64pre;
890 struct sockaddr *dst;
5ba3f43e 891 struct mptsub *mpts;
d9a64523 892 struct ifnet *ifp;
5ba3f43e 893 uint32_t ifindex;
39236c6e 894
5ba3f43e 895 info = &mpte->mpte_itfinfo[i];
39236c6e 896
cb323159
A
897 ifindex = info->ifindex;
898 if (ifindex == IFSCOPE_NONE) {
5ba3f43e 899 continue;
0a7de745 900 }
39236c6e 901
cb323159
A
902 os_log(mptcp_log_handle, "%s - %lx: itf %u no support %u hasv4 %u has v6 %u hasnat64 %u\n",
903 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), info->ifindex, info->no_mptcp_support,
904 info->has_v4_conn, info->has_v6_conn, info->has_nat64_conn);
905
906 if (info->no_mptcp_support) {
5ba3f43e 907 continue;
0a7de745 908 }
39236c6e 909
d9a64523
A
910 ifnet_head_lock_shared();
911 ifp = ifindex2ifnet[ifindex];
912 ifnet_head_done();
913
0a7de745 914 if (ifp == NULL) {
d9a64523 915 continue;
0a7de745 916 }
d9a64523 917
0a7de745 918 if (IFNET_IS_CELLULAR(ifp)) {
d9a64523 919 cellular_viable = TRUE;
0a7de745 920 }
d9a64523 921
5ba3f43e 922 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
d9a64523 923 const struct ifnet *subifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
cb323159 924 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
39236c6e 925
0a7de745 926 if (subifp == NULL) {
5ba3f43e 927 continue;
0a7de745 928 }
39236c6e 929
cb323159
A
930 /*
931 * If there is at least one functioning subflow on WiFi
932 * and we are checking for the cell interface, then
933 * we always need to ask symptoms for permission as
934 * cell is triggered even if WiFi is available.
935 */
936 if (!IFNET_IS_CELLULAR(subifp) &&
937 !mptcp_subflow_disconnecting(mpts) &&
938 IFNET_IS_CELLULAR(ifp)) {
939 need_to_ask_symptoms = TRUE;
940 }
941
5ba3f43e
A
942 /*
943 * In Handover mode, only create cell subflow if
944 * 1. Wi-Fi Assist is active
945 * 2. Symptoms marked WiFi as weak
946 * 3. We are experiencing RTOs or we are not sending data.
947 *
948 * This covers the scenario, where:
949 * 1. We send and get retransmission timeouts (thus,
950 * we confirmed that WiFi is indeed bad).
951 * 2. We are not sending and the server tries to send.
952 * Establshing a cell-subflow gives the server a
953 * chance to send us some data over cell if WiFi
954 * is dead. We establish the subflow with the
955 * backup-bit set, so the server is not allowed to
956 * send on this subflow as long as WiFi is providing
957 * good performance.
958 */
959 if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER &&
d9a64523 960 !IFNET_IS_CELLULAR(subifp) &&
cb323159
A
961 !mptcp_subflow_disconnecting(mpts) &&
962 (mptcp_is_wifi_unusable_for_session(mpte) == 0 ||
963 (tp->t_rxtshift < mptcp_fail_thresh * 2 && mptetoso(mpte)->so_snd.sb_cc))) {
964 os_log_debug(mptcp_log_handle,
965 "%s - %lx: handover, wifi state %d rxt %u first-party %u sb_cc %u ifindex %u this %u rtt %u rttvar %u rto %u\n",
966 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
967 mptcp_is_wifi_unusable_for_session(mpte),
968 tp->t_rxtshift,
0a7de745
A
969 !!(mpte->mpte_flags & MPTE_FIRSTPARTY),
970 mptetoso(mpte)->so_snd.sb_cc,
cb323159
A
971 ifindex, subifp->if_index,
972 tp->t_srtt >> TCP_RTT_SHIFT,
973 tp->t_rttvar >> TCP_RTTVAR_SHIFT,
974 tp->t_rxtcur);
975 found = TRUE;
d9a64523
A
976
977 /* We found a proper subflow on WiFi - no need for cell */
978 want_cellular = FALSE;
979 break;
cb323159
A
980 } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) {
981 uint64_t time_now = mach_continuous_time();
982
983 os_log(mptcp_log_handle,
984 "%s - %lx: target-based: %llu now %llu unusable? %d cell %u sostat %#x mpts_flags %#x tcp-state %u\n",
985 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_time_target,
986 time_now, mptcp_is_wifi_unusable_for_session(mpte),
987 IFNET_IS_CELLULAR(subifp), mpts->mpts_socket->so_state,
988 mpts->mpts_flags, sototcpcb(mpts->mpts_socket)->t_state);
989
990 if (!IFNET_IS_CELLULAR(subifp) &&
991 !mptcp_subflow_disconnecting(mpts) &&
992 (mpte->mpte_time_target == 0 ||
993 (int64_t)(mpte->mpte_time_target - time_now) > 0 ||
994 !mptcp_is_wifi_unusable_for_session(mpte))) {
995 found = TRUE;
996
997 want_cellular = FALSE;
998 break;
999 }
d9a64523 1000 } else {
cb323159
A
1001 os_log_debug(mptcp_log_handle,
1002 "%s - %lx: svc %u cell %u flags %#x unusable %d rtx %u first %u sbcc %u rtt %u rttvar %u rto %u\n",
1003 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1004 mpte->mpte_svctype, IFNET_IS_CELLULAR(subifp), mpts->mpts_flags,
1005 mptcp_is_wifi_unusable_for_session(mpte), tp->t_rxtshift,
1006 !!(mpte->mpte_flags & MPTE_FIRSTPARTY), mptetoso(mpte)->so_snd.sb_cc,
1007 tp->t_srtt >> TCP_RTT_SHIFT,
1008 tp->t_rttvar >> TCP_RTTVAR_SHIFT,
1009 tp->t_rxtcur);
d9a64523
A
1010 }
1011
1012 if (subifp->if_index == ifindex &&
cb323159 1013 !mptcp_subflow_disconnecting(mpts)) {
d9a64523
A
1014 /*
1015 * We found a subflow on this interface.
1016 * No need to create a new one.
1017 */
cb323159 1018 found = TRUE;
5ba3f43e
A
1019 break;
1020 }
1021 }
1022
cb323159
A
1023 if (found) {
1024 continue;
1025 }
1026
1027 if (need_to_ask_symptoms &&
1028 !(mpte->mpte_flags & MPTE_FIRSTPARTY) &&
5ba3f43e
A
1029 !(mpte->mpte_flags & MPTE_ACCESS_GRANTED) &&
1030 mptcp_developer_mode == 0) {
1031 mptcp_ask_symptoms(mpte);
1032 return;
1033 }
1034
cb323159 1035 dst = mptcp_get_session_dst(mpte, info->has_v6_conn, info->has_v4_conn);
5ba3f43e 1036
cb323159
A
1037 if (dst->sa_family == AF_INET &&
1038 !info->has_v4_conn && info->has_nat64_conn) {
1039 struct ipv6_prefix nat64prefixes[NAT64_MAX_NUM_PREFIXES];
1040 int error, j;
5ba3f43e 1041
cb323159 1042 bzero(&nat64pre, sizeof(struct sockaddr_in6));
5ba3f43e 1043
cb323159
A
1044 error = ifnet_get_nat64prefix(ifp, nat64prefixes);
1045 if (error) {
1046 os_log_error(mptcp_log_handle, "%s - %lx: no NAT64-prefix on itf %s, error %d\n",
1047 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ifp->if_name, error);
1048 continue;
1049 }
5ba3f43e 1050
cb323159
A
1051 for (j = 0; j < NAT64_MAX_NUM_PREFIXES; j++) {
1052 if (nat64prefixes[j].prefix_len != 0) {
1053 break;
5ba3f43e 1054 }
5ba3f43e
A
1055 }
1056
cb323159 1057 VERIFY(j < NAT64_MAX_NUM_PREFIXES);
a39ff7e2 1058
cb323159
A
1059 error = mptcp_synthesize_nat64(&nat64prefixes[j].ipv6_prefix,
1060 nat64prefixes[j].prefix_len,
1061 &((struct sockaddr_in *)(void *)dst)->sin_addr);
1062 if (error != 0) {
1063 os_log_info(mptcp_log_handle, "%s - %lx: cannot synthesize this addr\n",
1064 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
a39ff7e2 1065 continue;
0a7de745 1066 }
a39ff7e2 1067
cb323159
A
1068 memcpy(&nat64pre.sin6_addr,
1069 &nat64prefixes[j].ipv6_prefix,
1070 sizeof(nat64pre.sin6_addr));
1071 nat64pre.sin6_len = sizeof(struct sockaddr_in6);
1072 nat64pre.sin6_family = AF_INET6;
1073 nat64pre.sin6_port = ((struct sockaddr_in *)(void *)dst)->sin_port;
1074 nat64pre.sin6_flowinfo = 0;
1075 nat64pre.sin6_scope_id = 0;
1076
1077 dst = (struct sockaddr *)&nat64pre;
5ba3f43e 1078 }
cb323159
A
1079
1080 /* Initial subflow started on a NAT64'd address? */
1081 if (!(mpte->mpte_flags & MPTE_UNICAST_IP) &&
1082 mpte->mpte_dst.sa_family == AF_INET6 &&
1083 mpte->mpte_dst_v4_nat64.sin_family == AF_INET) {
1084 dst = (struct sockaddr *)&mpte->mpte_dst_v4_nat64;
1085 }
1086
1087 if (dst->sa_family == AF_INET && !info->has_v4_conn) {
1088 continue;
1089 }
1090 if (dst->sa_family == AF_INET6 && !info->has_v6_conn) {
1091 continue;
1092 }
1093
1094 mptcp_subflow_add(mpte, NULL, dst, ifindex, NULL);
5ba3f43e 1095 }
d9a64523
A
1096
1097 if (!cellular_viable && want_cellular) {
1098 /* Trigger Cell Bringup */
1099 mptcp_trigger_cell_bringup(mpte);
1100 }
5ba3f43e
A
1101}
1102
5ba3f43e 1103static void
cb323159 1104mptcp_remove_cell_subflows(struct mptses *mpte)
5ba3f43e
A
1105{
1106 struct mptsub *mpts, *tmpts;
cb323159
A
1107 boolean_t found = false;
1108
1109 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1110 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
5ba3f43e 1111
cb323159
A
1112 if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) {
1113 continue;
1114 }
1115
1116 /* We have a functioning subflow on WiFi. No need for cell! */
1117 if (mpts->mpts_flags & MPTSF_CONNECTED &&
1118 !mptcp_subflow_disconnecting(mpts)) {
1119 found = true;
1120 }
1121 }
1122
1123 /* Didn't found functional sub on WiFi - stay on cell */
1124 if (!found) {
5ba3f43e 1125 return;
0a7de745 1126 }
5ba3f43e 1127
cb323159
A
1128 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
1129 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
1130
1131 /* Only remove cellular subflows */
1132 if (ifp == NULL || !IFNET_IS_CELLULAR(ifp)) {
1133 continue;
1134 }
1135
1136 os_log(mptcp_log_handle, "%s - %lx: removing cell subflow\n",
1137 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
1138
1139 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1140 }
1141
1142 return;
1143}
1144
1145/* Returns true if it removed a subflow on cell */
1146static void
1147mptcp_handover_subflows_remove(struct mptses *mpte)
1148{
1149 int wifi_unusable = mptcp_is_wifi_unusable_for_session(mpte);
1150 boolean_t found_working_subflow = false;
1151 struct mptsub *mpts;
1152
5ba3f43e
A
1153 /*
1154 * Look for a subflow that is on a non-cellular interface
1155 * and actually works (aka, no retransmission timeout).
1156 */
1157 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1158 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
1159 struct socket *so;
1160 struct tcpcb *tp;
1161
0a7de745 1162 if (ifp == NULL || IFNET_IS_CELLULAR(ifp)) {
5ba3f43e 1163 continue;
0a7de745 1164 }
5ba3f43e
A
1165
1166 so = mpts->mpts_socket;
1167 tp = sototcpcb(so);
1168
1169 if (!(mpts->mpts_flags & MPTSF_CONNECTED) ||
0a7de745 1170 tp->t_state != TCPS_ESTABLISHED) {
5ba3f43e 1171 continue;
0a7de745 1172 }
5ba3f43e 1173
cb323159
A
1174 os_log_debug(mptcp_log_handle, "%s - %lx: rxt %u sb_cc %u unusable %d\n",
1175 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), tp->t_rxtshift, mptetoso(mpte)->so_snd.sb_cc, wifi_unusable);
1176
d9a64523 1177 /* Is this subflow in good condition? */
cb323159
A
1178 if (tp->t_rxtshift == 0 && mptetoso(mpte)->so_snd.sb_cc) {
1179 found_working_subflow = true;
0a7de745 1180 }
39236c6e 1181
5ba3f43e 1182 /* Or WiFi is fine */
0a7de745 1183 if (!wifi_unusable) {
cb323159 1184 found_working_subflow = true;
0a7de745 1185 }
39236c6e
A
1186 }
1187
5ba3f43e
A
1188 /*
1189 * Couldn't find a working subflow, let's not remove those on a cellular
1190 * interface.
1191 */
0a7de745 1192 if (!found_working_subflow) {
5ba3f43e 1193 return;
0a7de745 1194 }
5ba3f43e 1195
cb323159
A
1196 mptcp_remove_cell_subflows(mpte);
1197}
5ba3f43e 1198
cb323159
A
1199static void
1200mptcp_targetbased_subflows_remove(struct mptses *mpte)
1201{
1202 uint64_t time_now = mach_continuous_time();
5ba3f43e 1203
cb323159
A
1204 if (mpte->mpte_time_target != 0 &&
1205 (int64_t)(mpte->mpte_time_target - time_now) <= 0 &&
1206 mptcp_is_wifi_unusable_for_session(mpte)) {
1207 /* WiFi is bad and we are below the target - don't remove any subflows */
1208 return;
5ba3f43e
A
1209 }
1210
cb323159
A
1211 mptcp_remove_cell_subflows(mpte);
1212}
1213
1214/*
1215 * Based on the MPTCP Service-type and the state of the subflows, we
1216 * will destroy subflows here.
1217 */
1218void
1219mptcp_check_subflows_and_remove(struct mptses *mpte)
1220{
1221 if (!mptcp_ok_to_create_subflows(mpte->mpte_mptcb)) {
1222 return;
1223 }
1224
1225 socket_lock_assert_owned(mptetoso(mpte));
1226
1227 if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
1228 mptcp_handover_subflows_remove(mpte);
1229 }
1230
1231 if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) {
1232 mptcp_targetbased_subflows_remove(mpte);
0a7de745 1233 }
5ba3f43e
A
1234}
1235
1236static void
1237mptcp_remove_subflows(struct mptses *mpte)
1238{
1239 struct mptsub *mpts, *tmpts;
1240
cb323159
A
1241 if (!mptcp_ok_to_create_subflows(mpte->mpte_mptcb)) {
1242 return;
1243 }
1244
5ba3f43e 1245 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
cb323159
A
1246 const struct ifnet *ifp = sotoinpcb(mpts->mpts_socket)->inp_last_outifp;
1247 boolean_t found = false;
1248 uint32_t ifindex;
1249 uint32_t i;
1250
5ba3f43e
A
1251 if (mpts->mpts_flags & MPTSF_CLOSE_REQD) {
1252 mpts->mpts_flags &= ~MPTSF_CLOSE_REQD;
1253
cb323159
A
1254 os_log(mptcp_log_handle, "%s - %lx: itf %u close_reqd last itf %d\n",
1255 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_ifscope,
1256 ifp ? ifp->if_index : -1);
1257 soevent(mpts->mpts_socket,
1258 SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR);
1259
1260 continue;
1261 }
1262
1263 if (ifp == NULL && mpts->mpts_ifscope == IFSCOPE_NONE) {
1264 continue;
1265 }
1266
1267 if (ifp) {
1268 ifindex = ifp->if_index;
1269 } else {
1270 ifindex = mpts->mpts_ifscope;
1271 }
1272
1273 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
1274 if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) {
1275 continue;
1276 }
1277
1278 if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
1279 if (mpts->mpts_dst.sa_family == AF_INET6 &&
1280 (mpte->mpte_itfinfo[i].has_v6_conn || mpte->mpte_itfinfo[i].has_nat64_conn)) {
1281 found = true;
1282 break;
1283 }
1284
1285 if (mpts->mpts_dst.sa_family == AF_INET &&
1286 mpte->mpte_itfinfo[i].has_v4_conn) {
1287 found = true;
1288 break;
1289 }
1290 }
1291 }
1292
1293 if (!found) {
1294 os_log(mptcp_log_handle, "%s - %lx: itf %u killing %#x\n",
1295 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1296 ifindex, mpts->mpts_flags);
1297
5ba3f43e 1298 soevent(mpts->mpts_socket,
0a7de745 1299 SO_FILT_HINT_LOCKED | SO_FILT_HINT_NOSRCADDR);
5ba3f43e
A
1300 }
1301 }
1302}
1303
1304static void
1305mptcp_create_subflows(__unused void *arg)
1306{
1307 struct mppcb *mpp;
1308
1309 /*
1310 * Start with clearing, because we might be processing connections
1311 * while a new event comes in.
1312 */
0a7de745 1313 if (OSTestAndClear(0x01, &mptcp_create_subflows_scheduled)) {
cb323159 1314 os_log_error(mptcp_log_handle, "%s: bit was already cleared!\n", __func__);
0a7de745 1315 }
5ba3f43e
A
1316
1317 /* Iterate over all MPTCP connections */
1318
1319 lck_mtx_lock(&mtcbinfo.mppi_lock);
1320
1321 TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
cb323159
A
1322 struct socket *mp_so = mpp->mpp_socket;
1323 struct mptses *mpte = mpp->mpp_pcbe;
5ba3f43e 1324
0a7de745 1325 if (!(mpp->mpp_flags & MPP_CREATE_SUBFLOWS)) {
5ba3f43e 1326 continue;
0a7de745 1327 }
5ba3f43e 1328
cb323159
A
1329 socket_lock(mp_so, 1);
1330 VERIFY(mp_so->so_usecount > 0);
5ba3f43e
A
1331
1332 mpp->mpp_flags &= ~MPP_CREATE_SUBFLOWS;
1333
5ba3f43e
A
1334 mptcp_check_subflows_and_add(mpte);
1335 mptcp_remove_subflows(mpte);
1336
1337 mp_so->so_usecount--; /* See mptcp_sched_create_subflows */
cb323159 1338 socket_unlock(mp_so, 1);
5ba3f43e
A
1339 }
1340
1341 lck_mtx_unlock(&mtcbinfo.mppi_lock);
1342}
1343
1344/*
1345 * We need this because we are coming from an NECP-event. This event gets posted
1346 * while holding NECP-locks. The creation of the subflow however leads us back
1347 * into NECP (e.g., to add the necp_cb and also from tcp_connect).
1348 * So, we would deadlock there as we already hold the NECP-lock.
1349 *
1350 * So, let's schedule this separately. It also gives NECP the chance to make
1351 * progress, without having to wait for MPTCP to finish its subflow creation.
1352 */
1353void
1354mptcp_sched_create_subflows(struct mptses *mpte)
1355{
1356 struct mppcb *mpp = mpte->mpte_mppcb;
1357 struct mptcb *mp_tp = mpte->mpte_mptcb;
1358 struct socket *mp_so = mpp->mpp_socket;
1359
1360 if (!mptcp_ok_to_create_subflows(mp_tp)) {
cb323159
A
1361 os_log_debug(mptcp_log_handle, "%s - %lx: not a good time for subflows, state %u flags %#x",
1362 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state, mp_tp->mpt_flags);
5ba3f43e
A
1363 return;
1364 }
1365
1366 if (!(mpp->mpp_flags & MPP_CREATE_SUBFLOWS)) {
1367 mp_so->so_usecount++; /* To prevent it from being free'd in-between */
1368 mpp->mpp_flags |= MPP_CREATE_SUBFLOWS;
1369 }
1370
0a7de745 1371 if (OSTestAndSet(0x01, &mptcp_create_subflows_scheduled)) {
5ba3f43e 1372 return;
0a7de745 1373 }
5ba3f43e
A
1374
1375 /* Do the call in 100ms to allow NECP to schedule it on all sockets */
0a7de745 1376 timeout(mptcp_create_subflows, NULL, hz / 10);
5ba3f43e
A
1377}
1378
1379/*
1380 * Allocate an MPTCP socket option structure.
1381 */
1382struct mptopt *
1383mptcp_sopt_alloc(int how)
1384{
1385 struct mptopt *mpo;
1386
1387 mpo = (how == M_WAITOK) ? zalloc(mptopt_zone) :
1388 zalloc_noblock(mptopt_zone);
1389 if (mpo != NULL) {
1390 bzero(mpo, mptopt_zone_size);
1391 }
1392
0a7de745 1393 return mpo;
5ba3f43e
A
1394}
1395
1396/*
1397 * Free an MPTCP socket option structure.
1398 */
1399void
1400mptcp_sopt_free(struct mptopt *mpo)
1401{
1402 VERIFY(!(mpo->mpo_flags & MPOF_ATTACHED));
1403
1404 zfree(mptopt_zone, mpo);
1405}
1406
1407/*
1408 * Add a socket option to the MPTCP socket option list.
1409 */
1410void
1411mptcp_sopt_insert(struct mptses *mpte, struct mptopt *mpo)
1412{
cb323159 1413 socket_lock_assert_owned(mptetoso(mpte));
5ba3f43e
A
1414 mpo->mpo_flags |= MPOF_ATTACHED;
1415 TAILQ_INSERT_TAIL(&mpte->mpte_sopts, mpo, mpo_entry);
1416}
1417
1418/*
1419 * Remove a socket option from the MPTCP socket option list.
1420 */
1421void
1422mptcp_sopt_remove(struct mptses *mpte, struct mptopt *mpo)
1423{
cb323159 1424 socket_lock_assert_owned(mptetoso(mpte));
5ba3f43e
A
1425 VERIFY(mpo->mpo_flags & MPOF_ATTACHED);
1426 mpo->mpo_flags &= ~MPOF_ATTACHED;
1427 TAILQ_REMOVE(&mpte->mpte_sopts, mpo, mpo_entry);
1428}
1429
1430/*
1431 * Search for an existing <sopt_level,sopt_name> socket option.
1432 */
1433struct mptopt *
1434mptcp_sopt_find(struct mptses *mpte, struct sockopt *sopt)
1435{
1436 struct mptopt *mpo;
1437
cb323159 1438 socket_lock_assert_owned(mptetoso(mpte));
5ba3f43e
A
1439
1440 TAILQ_FOREACH(mpo, &mpte->mpte_sopts, mpo_entry) {
1441 if (mpo->mpo_level == sopt->sopt_level &&
0a7de745 1442 mpo->mpo_name == sopt->sopt_name) {
5ba3f43e 1443 break;
0a7de745 1444 }
5ba3f43e 1445 }
0a7de745 1446 return mpo;
5ba3f43e
A
1447}
1448
1449/*
1450 * Allocate a MPTCP subflow structure.
1451 */
1452static struct mptsub *
1453mptcp_subflow_alloc(void)
1454{
1455 struct mptsub *mpts = zalloc(mptsub_zone);
1456
0a7de745
A
1457 if (mpts == NULL) {
1458 return NULL;
1459 }
5ba3f43e
A
1460
1461 bzero(mpts, mptsub_zone_size);
0a7de745 1462 return mpts;
39236c6e
A
1463}
1464
1465/*
1466 * Deallocate a subflow structure, called when all of the references held
1467 * on it have been released. This implies that the subflow has been deleted.
1468 */
5ba3f43e 1469static void
39236c6e
A
1470mptcp_subflow_free(struct mptsub *mpts)
1471{
39236c6e
A
1472 VERIFY(mpts->mpts_refcnt == 0);
1473 VERIFY(!(mpts->mpts_flags & MPTSF_ATTACHED));
1474 VERIFY(mpts->mpts_mpte == NULL);
1475 VERIFY(mpts->mpts_socket == NULL);
1476
813fb2f6
A
1477 if (mpts->mpts_src != NULL) {
1478 FREE(mpts->mpts_src, M_SONAME);
1479 mpts->mpts_src = NULL;
39236c6e 1480 }
39236c6e
A
1481
1482 zfree(mptsub_zone, mpts);
1483}
1484
5ba3f43e
A
1485static void
1486mptcp_subflow_addref(struct mptsub *mpts)
1487{
0a7de745 1488 if (++mpts->mpts_refcnt == 0) {
5ba3f43e 1489 panic("%s: mpts %p wraparound refcnt\n", __func__, mpts);
0a7de745
A
1490 }
1491 /* NOTREACHED */
5ba3f43e
A
1492}
1493
1494static void
1495mptcp_subflow_remref(struct mptsub *mpts)
1496{
1497 if (mpts->mpts_refcnt == 0) {
1498 panic("%s: mpts %p negative refcnt\n", __func__, mpts);
1499 /* NOTREACHED */
1500 }
0a7de745 1501 if (--mpts->mpts_refcnt > 0) {
5ba3f43e 1502 return;
0a7de745 1503 }
5ba3f43e
A
1504
1505 /* callee will unlock and destroy lock */
1506 mptcp_subflow_free(mpts);
1507}
1508
1509static void
1510mptcp_subflow_attach(struct mptses *mpte, struct mptsub *mpts, struct socket *so)
1511{
1512 struct socket *mp_so = mpte->mpte_mppcb->mpp_socket;
1513 struct tcpcb *tp = sototcpcb(so);
1514
1515 /*
1516 * From this moment on, the subflow is linked to the MPTCP-connection.
1517 * Locking,... happens now at the MPTCP-layer
1518 */
1519 tp->t_mptcb = mpte->mpte_mptcb;
1520 so->so_flags |= SOF_MP_SUBFLOW;
1521 mp_so->so_usecount++;
1522
1523 /*
1524 * Insert the subflow into the list, and associate the MPTCP PCB
1525 * as well as the the subflow socket. From this point on, removing
1526 * the subflow needs to be done via mptcp_subflow_del().
1527 */
1528 TAILQ_INSERT_TAIL(&mpte->mpte_subflows, mpts, mpts_entry);
1529 mpte->mpte_numflows++;
1530
1531 atomic_bitset_32(&mpts->mpts_flags, MPTSF_ATTACHED);
1532 mpts->mpts_mpte = mpte;
1533 mpts->mpts_socket = so;
1534 tp->t_mpsub = mpts;
0a7de745
A
1535 mptcp_subflow_addref(mpts); /* for being in MPTCP subflow list */
1536 mptcp_subflow_addref(mpts); /* for subflow socket */
5ba3f43e
A
1537}
1538
1539static void
1540mptcp_subflow_necp_cb(void *handle, __unused int action,
0a7de745
A
1541 __unused uint32_t interface_index,
1542 uint32_t necp_flags, bool *viable)
5ba3f43e 1543{
d9a64523 1544 boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER);
5ba3f43e
A
1545 struct inpcb *inp = (struct inpcb *)handle;
1546 struct socket *so = inp->inp_socket;
1547 struct mptsub *mpts;
1548 struct mptses *mpte;
1549
0a7de745 1550 if (low_power) {
d9a64523 1551 action = NECP_CLIENT_CBACTION_NONVIABLE;
0a7de745 1552 }
d9a64523 1553
0a7de745 1554 if (action != NECP_CLIENT_CBACTION_NONVIABLE) {
5ba3f43e 1555 return;
0a7de745 1556 }
5ba3f43e
A
1557
1558 /*
1559 * The socket is being garbage-collected. There is nothing to be done
1560 * here.
1561 */
cb323159 1562 if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) == WNT_STOPUSING) {
5ba3f43e 1563 return;
0a7de745 1564 }
5ba3f43e
A
1565
1566 socket_lock(so, 1);
1567
1568 /* Check again after we acquired the lock. */
cb323159 1569 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
5ba3f43e 1570 goto out;
0a7de745 1571 }
5ba3f43e
A
1572
1573 mpte = tptomptp(sototcpcb(so))->mpt_mpte;
1574 mpts = sototcpcb(so)->t_mpsub;
1575
cb323159
A
1576 os_log_debug(mptcp_log_handle, "%s - %lx: Subflow on itf %u became non-viable, power %u",
1577 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_ifscope, low_power);
5ba3f43e
A
1578
1579 mpts->mpts_flags |= MPTSF_CLOSE_REQD;
1580
1581 mptcp_sched_create_subflows(mpte);
1582
cb323159
A
1583 if ((mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER ||
1584 mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) &&
1585 viable != NULL) {
d9a64523 1586 *viable = 1;
0a7de745 1587 }
5ba3f43e
A
1588
1589out:
1590 socket_unlock(so, 1);
1591}
1592
39236c6e
A
1593/*
1594 * Create an MPTCP subflow socket.
1595 */
1596static int
1597mptcp_subflow_socreate(struct mptses *mpte, struct mptsub *mpts, int dom,
5ba3f43e 1598 struct socket **so)
39236c6e 1599{
5ba3f43e 1600 lck_mtx_t *subflow_mtx;
39236c6e 1601 struct mptopt smpo, *mpo, *tmpo;
5ba3f43e 1602 struct proc *p;
39236c6e
A
1603 struct socket *mp_so;
1604 int error;
1605
1606 *so = NULL;
cb323159 1607
5ba3f43e
A
1608 mp_so = mptetoso(mpte);
1609
1610 p = proc_find(mp_so->last_pid);
1611 if (p == PROC_NULL) {
cb323159
A
1612 os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n",
1613 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_so->last_pid);
5ba3f43e 1614
0a7de745 1615 return ESRCH;
5ba3f43e 1616 }
39236c6e
A
1617
1618 /*
1619 * Create the subflow socket (multipath subflow, non-blocking.)
1620 *
1621 * This will cause SOF_MP_SUBFLOW socket flag to be set on the subflow
1622 * socket; it will be cleared when the socket is peeled off or closed.
1623 * It also indicates to the underlying TCP to handle MPTCP options.
1624 * A multipath subflow socket implies SS_NOFDREF state.
1625 */
5ba3f43e
A
1626
1627 /*
1628 * Unlock, because tcp_usr_attach ends up in in_pcballoc, which takes
1629 * the ipi-lock. We cannot hold the socket-lock at that point.
1630 */
cb323159 1631 socket_unlock(mp_so, 0);
5ba3f43e 1632 error = socreate_internal(dom, so, SOCK_STREAM, IPPROTO_TCP, p,
cb323159
A
1633 SOCF_MPTCP, PROC_NULL);
1634 socket_lock(mp_so, 0);
5ba3f43e 1635 if (error) {
cb323159
A
1636 os_log_error(mptcp_log_handle, "%s - %lx: unable to create subflow socket error %d\n",
1637 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
5ba3f43e
A
1638
1639 proc_rele(p);
1640
1641 mptcp_subflow_free(mpts);
0a7de745 1642 return error;
39236c6e
A
1643 }
1644
5ba3f43e
A
1645 /*
1646 * We need to protect the setting of SOF_MP_SUBFLOW with a lock, because
1647 * this marks the moment of lock-switch from the TCP-lock to the MPTCP-lock.
1648 * Which is why we also need to get the lock with pr_getlock, as after
1649 * setting the flag, socket_unlock will work on the MPTCP-level lock.
1650 */
1651 subflow_mtx = ((*so)->so_proto->pr_getlock)(*so, 0);
1652 lck_mtx_lock(subflow_mtx);
1653
1654 /*
1655 * Must be the first thing we do, to make sure all pointers for this
1656 * subflow are set.
1657 */
1658 mptcp_subflow_attach(mpte, mpts, *so);
1659
1660 /*
1661 * A multipath subflow socket is used internally in the kernel,
1662 * therefore it does not have a file desciptor associated by
1663 * default.
1664 */
1665 (*so)->so_state |= SS_NOFDREF;
1666
1667 lck_mtx_unlock(subflow_mtx);
39236c6e
A
1668
1669 /* prevent the socket buffers from being compressed */
1670 (*so)->so_rcv.sb_flags |= SB_NOCOMPRESS;
1671 (*so)->so_snd.sb_flags |= SB_NOCOMPRESS;
1672
490019cf 1673 /* Inherit preconnect and TFO data flags */
0a7de745 1674 if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA) {
490019cf 1675 (*so)->so_flags1 |= SOF1_PRECONNECT_DATA;
0a7de745
A
1676 }
1677 if (mp_so->so_flags1 & SOF1_DATA_IDEMPOTENT) {
490019cf 1678 (*so)->so_flags1 |= SOF1_DATA_IDEMPOTENT;
0a7de745 1679 }
490019cf 1680
5ba3f43e
A
1681 /* Inherit uuid and create the related flow. */
1682 if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1683 struct mptcb *mp_tp = mpte->mpte_mptcb;
1684
1685 sotoinpcb(*so)->necp_cb = mptcp_subflow_necp_cb;
1686
1687 /*
1688 * A note on the unlock: With MPTCP, we do multiple times a
1689 * necp_client_register_socket_flow. This is problematic,
1690 * because now the lock-ordering guarantee (first necp-locks,
1691 * then socket-locks) is no more respected. So, we need to
1692 * unlock here.
1693 */
cb323159 1694 socket_unlock(mp_so, 0);
5ba3f43e
A
1695 error = necp_client_register_socket_flow(mp_so->last_pid,
1696 mpsotomppcb(mp_so)->necp_client_uuid, sotoinpcb(*so));
cb323159 1697 socket_lock(mp_so, 0);
5ba3f43e 1698
0a7de745 1699 if (error) {
cb323159
A
1700 os_log_error(mptcp_log_handle, "%s - %lx: necp_client_register_socket_flow failed with error %d\n",
1701 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
1702
5ba3f43e 1703 goto out_err;
0a7de745 1704 }
5ba3f43e
A
1705
1706 /* Possible state-change during the unlock above */
1707 if (mp_tp->mpt_state >= MPTCPS_TIME_WAIT ||
0a7de745 1708 (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) {
cb323159
A
1709 os_log_error(mptcp_log_handle, "%s - %lx: state changed during unlock: %u flags %#x\n",
1710 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1711 mp_tp->mpt_state, mp_tp->mpt_flags);
1712
1713 error = EINVAL;
5ba3f43e 1714 goto out_err;
0a7de745 1715 }
5ba3f43e
A
1716
1717 uuid_copy(sotoinpcb(*so)->necp_client_uuid, mpsotomppcb(mp_so)->necp_client_uuid);
cb323159
A
1718 }
1719
1720 /* Needs to happen prior to the delegation! */
1721 (*so)->last_pid = mp_so->last_pid;
1722
1723 if (mp_so->so_flags & SOF_DELEGATED) {
1724 if (mpte->mpte_epid) {
1725 error = so_set_effective_pid(*so, mpte->mpte_epid, p, false);
1726 if (error) {
1727 os_log_error(mptcp_log_handle, "%s - %lx: so_set_effective_pid failed with error %d\n",
1728 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
1729 goto out_err;
1730 }
1731 }
1732 if (!uuid_is_null(mpte->mpte_euuid)) {
1733 error = so_set_effective_uuid(*so, mpte->mpte_euuid, p, false);
1734 if (error) {
1735 os_log_error(mptcp_log_handle, "%s - %lx: so_set_effective_uuid failed with error %d\n",
1736 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
1737 goto out_err;
1738 }
1739 }
5ba3f43e
A
1740 }
1741
1742 /* inherit the other socket options */
0a7de745 1743 bzero(&smpo, sizeof(smpo));
39236c6e
A
1744 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1745 smpo.mpo_level = SOL_SOCKET;
1746 smpo.mpo_intval = 1;
1747
1748 /* disable SIGPIPE */
1749 smpo.mpo_name = SO_NOSIGPIPE;
0a7de745 1750 if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) {
5ba3f43e 1751 goto out_err;
0a7de745 1752 }
39236c6e
A
1753
1754 /* find out if the subflow's source address goes away */
1755 smpo.mpo_name = SO_NOADDRERR;
0a7de745 1756 if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) {
5ba3f43e 1757 goto out_err;
0a7de745 1758 }
39236c6e 1759
5ba3f43e
A
1760 if (mpte->mpte_mptcb->mpt_state >= MPTCPS_ESTABLISHED) {
1761 /*
1762 * On secondary subflows we might need to set the cell-fallback
1763 * flag (see conditions in mptcp_subflow_sosetopt).
1764 */
1765 smpo.mpo_level = SOL_SOCKET;
1766 smpo.mpo_name = SO_MARK_CELLFALLBACK;
1767 smpo.mpo_intval = 1;
0a7de745 1768 if ((error = mptcp_subflow_sosetopt(mpte, mpts, &smpo)) != 0) {
5ba3f43e 1769 goto out_err;
0a7de745 1770 }
5ba3f43e 1771 }
39236c6e
A
1772
1773 /* replay setsockopt(2) on the subflow sockets for eligible options */
1774 TAILQ_FOREACH_SAFE(mpo, &mpte->mpte_sopts, mpo_entry, tmpo) {
1775 int interim;
1776
0a7de745 1777 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
39236c6e 1778 continue;
0a7de745 1779 }
39236c6e
A
1780
1781 /*
1782 * Skip those that are handled internally; these options
1783 * should not have been recorded and marked with the
1784 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1785 */
1786 if (mpo->mpo_level == SOL_SOCKET &&
1787 (mpo->mpo_name == SO_NOSIGPIPE ||
1788 mpo->mpo_name == SO_NOADDRERR ||
0a7de745 1789 mpo->mpo_name == SO_KEEPALIVE)) {
39236c6e 1790 continue;
0a7de745 1791 }
39236c6e
A
1792
1793 interim = (mpo->mpo_flags & MPOF_INTERIM);
5ba3f43e 1794 if (mptcp_subflow_sosetopt(mpte, mpts, mpo) != 0 && interim) {
cb323159
A
1795 os_log_error(mptcp_log_handle, "%s - %lx: sopt %s val %d interim record removed\n",
1796 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
5ba3f43e 1797 mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name),
cb323159 1798 mpo->mpo_intval);
39236c6e
A
1799 mptcp_sopt_remove(mpte, mpo);
1800 mptcp_sopt_free(mpo);
1801 continue;
1802 }
1803 }
1804
1805 /*
1806 * We need to receive everything that the subflow socket has,
1807 * so use a customized socket receive function. We will undo
1808 * this when the socket is peeled off or closed.
1809 */
39236c6e
A
1810 switch (dom) {
1811 case PF_INET:
1812 (*so)->so_proto = &mptcp_subflow_protosw;
1813 break;
1814#if INET6
1815 case PF_INET6:
1816 (*so)->so_proto = (struct protosw *)&mptcp_subflow_protosw6;
1817 break;
1818#endif /* INET6 */
1819 default:
1820 VERIFY(0);
1821 /* NOTREACHED */
1822 }
1823
5ba3f43e
A
1824 proc_rele(p);
1825
1826 DTRACE_MPTCP3(subflow__create, struct mptses *, mpte,
1827 int, dom, int, error);
1828
0a7de745 1829 return 0;
39236c6e 1830
5ba3f43e
A
1831out_err:
1832 mptcp_subflow_abort(mpts, error);
1833
1834 proc_rele(p);
1835
0a7de745 1836 return error;
39236c6e
A
1837}
1838
1839/*
1840 * Close an MPTCP subflow socket.
1841 *
1842 * Note that this may be called on an embryonic subflow, and the only
1843 * thing that is guaranteed valid is the protocol-user request.
1844 */
5ba3f43e
A
1845static void
1846mptcp_subflow_soclose(struct mptsub *mpts)
39236c6e 1847{
5ba3f43e
A
1848 struct socket *so = mpts->mpts_socket;
1849
0a7de745 1850 if (mpts->mpts_flags & MPTSF_CLOSED) {
5ba3f43e 1851 return;
0a7de745 1852 }
39236c6e 1853
5ba3f43e 1854 VERIFY(so != NULL);
39236c6e 1855 VERIFY(so->so_flags & SOF_MP_SUBFLOW);
0a7de745 1856 VERIFY((so->so_state & (SS_NBIO | SS_NOFDREF)) == (SS_NBIO | SS_NOFDREF));
39236c6e 1857
39236c6e
A
1858 DTRACE_MPTCP5(subflow__close, struct mptsub *, mpts,
1859 struct socket *, so,
1860 struct sockbuf *, &so->so_rcv,
1861 struct sockbuf *, &so->so_snd,
1862 struct mptses *, mpts->mpts_mpte);
1863
5ba3f43e
A
1864 mpts->mpts_flags |= MPTSF_CLOSED;
1865
1866 if (so->so_retaincnt == 0) {
1867 soclose_locked(so);
1868
1869 return;
1870 } else {
1871 VERIFY(so->so_usecount > 0);
1872 so->so_usecount--;
1873 }
1874
1875 return;
39236c6e
A
1876}
1877
1878/*
1879 * Connect an MPTCP subflow socket.
1880 *
5ba3f43e
A
1881 * Note that in the pending connect case, the subflow socket may have been
1882 * bound to an interface and/or a source IP address which may no longer be
1883 * around by the time this routine is called; in that case the connect attempt
1884 * will most likely fail.
39236c6e
A
1885 */
1886static int
1887mptcp_subflow_soconnectx(struct mptses *mpte, struct mptsub *mpts)
1888{
5ba3f43e
A
1889 char dbuf[MAX_IPv6_STR_LEN];
1890 struct socket *mp_so, *so;
1891 struct mptcb *mp_tp;
1892 struct sockaddr *dst;
1893 struct proc *p;
a39ff7e2 1894 int af, error, dport;
39236c6e 1895
5ba3f43e
A
1896 mp_so = mptetoso(mpte);
1897 mp_tp = mpte->mpte_mptcb;
a39ff7e2
A
1898 so = mpts->mpts_socket;
1899 af = mpts->mpts_dst.sa_family;
1900 dst = &mpts->mpts_dst;
1901
0a7de745 1902 VERIFY((mpts->mpts_flags & (MPTSF_CONNECTING | MPTSF_CONNECTED)) == MPTSF_CONNECTING);
a39ff7e2
A
1903 VERIFY(mpts->mpts_socket != NULL);
1904 VERIFY(af == AF_INET || af == AF_INET6);
1905
1906 if (af == AF_INET) {
0a7de745 1907 inet_ntop(af, &SIN(dst)->sin_addr.s_addr, dbuf, sizeof(dbuf));
a39ff7e2
A
1908 dport = ntohs(SIN(dst)->sin_port);
1909 } else {
0a7de745 1910 inet_ntop(af, &SIN6(dst)->sin6_addr, dbuf, sizeof(dbuf));
a39ff7e2
A
1911 dport = ntohs(SIN6(dst)->sin6_port);
1912 }
1913
1914 os_log_info(mptcp_log_handle,
cb323159
A
1915 "%s - %lx: ifindex %u dst %s:%d pended %u\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
1916 mpts->mpts_ifscope, dbuf, dport, !!(mpts->mpts_flags & MPTSF_CONNECT_PENDING));
39236c6e 1917
5ba3f43e
A
1918 p = proc_find(mp_so->last_pid);
1919 if (p == PROC_NULL) {
cb323159
A
1920 os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n",
1921 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_so->last_pid);
39236c6e 1922
0a7de745 1923 return ESRCH;
39236c6e
A
1924 }
1925
1926 mpts->mpts_flags &= ~MPTSF_CONNECT_PENDING;
1927
fe8ab488 1928 mptcp_attach_to_subf(so, mpte->mpte_mptcb, mpte->mpte_addrid_last);
39037602 1929
39236c6e 1930 /* connect the subflow socket */
5ba3f43e
A
1931 error = soconnectxlocked(so, mpts->mpts_src, &mpts->mpts_dst,
1932 p, mpts->mpts_ifscope,
1933 mpte->mpte_associd, NULL, 0, NULL, 0, NULL, NULL);
1934
1935 mpts->mpts_iss = sototcpcb(so)->iss;
1936
1937 /* See tcp_connect_complete */
1938 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED &&
1939 (mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1940 mp_tp->mpt_sndwnd = sototcpcb(so)->snd_wnd;
1941 }
39236c6e 1942
fe8ab488
A
1943 /* Allocate a unique address id per subflow */
1944 mpte->mpte_addrid_last++;
0a7de745 1945 if (mpte->mpte_addrid_last == 0) {
fe8ab488 1946 mpte->mpte_addrid_last++;
0a7de745 1947 }
fe8ab488 1948
5ba3f43e
A
1949 proc_rele(p);
1950
39236c6e
A
1951 DTRACE_MPTCP3(subflow__connect, struct mptses *, mpte,
1952 struct mptsub *, mpts, int, error);
0a7de745 1953 if (error) {
cb323159
A
1954 os_log_error(mptcp_log_handle, "%s - %lx: connectx failed with error %d ifscope %u\n",
1955 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error, mpts->mpts_ifscope);
0a7de745 1956 }
39236c6e 1957
0a7de745 1958 return error;
39236c6e
A
1959}
1960
cb323159
A
1961static int
1962mptcp_adj_rmap(struct socket *so, struct mbuf *m, int off, uint64_t dsn,
1963 uint32_t rseq, uint16_t dlen)
1964{
1965 struct mptsub *mpts = sototcpcb(so)->t_mpsub;
1966
1967 if (m_pktlen(m) == 0) {
1968 return 0;
1969 }
1970
1971 if ((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
1972 if (off && (dsn != m->m_pkthdr.mp_dsn ||
1973 rseq != m->m_pkthdr.mp_rseq ||
1974 dlen != m->m_pkthdr.mp_rlen)) {
1975 os_log_error(mptcp_log_handle, "%s - %lx: Received incorrect second mapping: %u - %u , %u - %u, %u - %u\n",
1976 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpts->mpts_mpte),
1977 (uint32_t)dsn, (uint32_t)m->m_pkthdr.mp_dsn,
1978 rseq, m->m_pkthdr.mp_rseq,
1979 dlen, m->m_pkthdr.mp_rlen);
1980
1981 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1982 return -1;
1983 }
1984 m->m_pkthdr.mp_dsn += off;
1985 m->m_pkthdr.mp_rseq += off;
1986 m->m_pkthdr.mp_rlen = m->m_pkthdr.len;
1987 } else {
1988 if (!(mpts->mpts_flags & MPTSF_FULLY_ESTABLISHED)) {
1989 /* data arrived without an DSS option mapping */
1990
1991 /* initial subflow can fallback right after SYN handshake */
1992 if (mpts->mpts_flags & MPTSF_INITIAL_SUB) {
1993 mptcp_notify_mpfail(so);
1994 } else {
1995 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
1996
1997 return -1;
1998 }
1999 } else if (m->m_flags & M_PKTHDR) {
2000 /* We need to fake the DATA-mapping */
2001 m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
2002 m->m_pkthdr.mp_dsn = dsn + off;
2003 m->m_pkthdr.mp_rseq = rseq + off;
2004 m->m_pkthdr.mp_rlen = m->m_pkthdr.len;
2005 }
2006 }
2007
2008 mpts->mpts_flags |= MPTSF_FULLY_ESTABLISHED;
2009
2010 return 0;
2011}
2012
39236c6e
A
2013/*
2014 * MPTCP subflow socket receive routine, derived from soreceive().
2015 */
2016static int
2017mptcp_subflow_soreceive(struct socket *so, struct sockaddr **psa,
2018 struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
2019{
2020#pragma unused(uio)
5ba3f43e 2021 struct socket *mp_so = mptetoso(tptomptp(sototcpcb(so))->mpt_mpte);
39236c6e
A
2022 int flags, error = 0;
2023 struct proc *p = current_proc();
2024 struct mbuf *m, **mp = mp0;
5ba3f43e 2025 boolean_t proc_held = FALSE;
39236c6e 2026
39236c6e
A
2027 VERIFY(so->so_proto->pr_flags & PR_CONNREQUIRED);
2028
2029#ifdef MORE_LOCKING_DEBUG
2030 if (so->so_usecount == 1) {
2031 panic("%s: so=%x no other reference on socket\n", __func__, so);
2032 /* NOTREACHED */
2033 }
2034#endif
2035 /*
2036 * We return all that is there in the subflow's socket receive buffer
2037 * to the MPTCP layer, so we require that the caller passes in the
2038 * expected parameters.
2039 */
0a7de745
A
2040 if (mp == NULL || controlp != NULL) {
2041 return EINVAL;
2042 }
5ba3f43e 2043
39236c6e 2044 *mp = NULL;
0a7de745 2045 if (psa != NULL) {
39236c6e 2046 *psa = NULL;
0a7de745
A
2047 }
2048 if (flagsp != NULL) {
2049 flags = *flagsp & ~MSG_EOR;
2050 } else {
39236c6e 2051 flags = 0;
0a7de745 2052 }
39236c6e 2053
0a7de745
A
2054 if (flags & (MSG_PEEK | MSG_OOB | MSG_NEEDSA | MSG_WAITALL | MSG_WAITSTREAM)) {
2055 return EOPNOTSUPP;
2056 }
5ba3f43e 2057
0a7de745 2058 flags |= (MSG_DONTWAIT | MSG_NBIO);
39236c6e
A
2059
2060 /*
2061 * If a recv attempt is made on a previously-accepted socket
2062 * that has been marked as inactive (disconnected), reject
2063 * the request.
2064 */
2065 if (so->so_flags & SOF_DEFUNCT) {
2066 struct sockbuf *sb = &so->so_rcv;
2067
2068 error = ENOTCONN;
39236c6e
A
2069 /*
2070 * This socket should have been disconnected and flushed
2071 * prior to being returned from sodefunct(); there should
2072 * be no data on its receive list, so panic otherwise.
2073 */
0a7de745 2074 if (so->so_state & SS_DEFUNCT) {
39236c6e 2075 sb_empty_assert(sb, __func__);
0a7de745
A
2076 }
2077 return error;
39236c6e
A
2078 }
2079
2080 /*
2081 * See if the socket has been closed (SS_NOFDREF|SS_CANTRCVMORE)
2082 * and if so just return to the caller. This could happen when
2083 * soreceive() is called by a socket upcall function during the
2084 * time the socket is freed. The socket buffer would have been
2085 * locked across the upcall, therefore we cannot put this thread
2086 * to sleep (else we will deadlock) or return EWOULDBLOCK (else
2087 * we may livelock), because the lock on the socket buffer will
2088 * only be released when the upcall routine returns to its caller.
2089 * Because the socket has been officially closed, there can be
2090 * no further read on it.
2091 *
2092 * A multipath subflow socket would have its SS_NOFDREF set by
2093 * default, so check for SOF_MP_SUBFLOW socket flag; when the
2094 * socket is closed for real, SOF_MP_SUBFLOW would be cleared.
2095 */
2096 if ((so->so_state & (SS_NOFDREF | SS_CANTRCVMORE)) ==
0a7de745
A
2097 (SS_NOFDREF | SS_CANTRCVMORE) && !(so->so_flags & SOF_MP_SUBFLOW)) {
2098 return 0;
2099 }
39236c6e
A
2100
2101 /*
2102 * For consistency with soreceive() semantics, we need to obey
2103 * SB_LOCK in case some other code path has locked the buffer.
2104 */
2105 error = sblock(&so->so_rcv, 0);
0a7de745
A
2106 if (error != 0) {
2107 return error;
2108 }
39236c6e
A
2109
2110 m = so->so_rcv.sb_mb;
2111 if (m == NULL) {
2112 /*
2113 * Panic if we notice inconsistencies in the socket's
2114 * receive list; both sb_mb and sb_cc should correctly
2115 * reflect the contents of the list, otherwise we may
2116 * end up with false positives during select() or poll()
2117 * which could put the application in a bad state.
2118 */
2119 SB_MB_CHECK(&so->so_rcv);
2120
2121 if (so->so_error != 0) {
2122 error = so->so_error;
2123 so->so_error = 0;
2124 goto release;
2125 }
2126
5ba3f43e
A
2127 if (so->so_state & SS_CANTRCVMORE) {
2128 goto release;
2129 }
2130
0a7de745 2131 if (!(so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING))) {
5ba3f43e
A
2132 error = ENOTCONN;
2133 goto release;
2134 }
2135
2136 /*
2137 * MSG_DONTWAIT is implicitly defined and this routine will
2138 * never block, so return EWOULDBLOCK when there is nothing.
2139 */
2140 error = EWOULDBLOCK;
2141 goto release;
2142 }
2143
2144 mptcp_update_last_owner(so, mp_so);
2145
2146 if (mp_so->last_pid != proc_pid(p)) {
2147 p = proc_find(mp_so->last_pid);
2148 if (p == PROC_NULL) {
2149 p = current_proc();
2150 } else {
2151 proc_held = TRUE;
2152 }
2153 }
2154
2155 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgrcv);
2156 SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
2157 SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 1");
2158
2159 while (m != NULL) {
5c9f4661 2160 int dlen = 0, dfin = 0, error_out = 0;
5ba3f43e
A
2161 struct mbuf *start = m;
2162 uint64_t dsn;
2163 uint32_t sseq;
2164 uint16_t orig_dlen;
2165 uint16_t csum;
2166
2167 VERIFY(m->m_nextpkt == NULL);
2168
2169 if ((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
2170 orig_dlen = dlen = m->m_pkthdr.mp_rlen;
2171 dsn = m->m_pkthdr.mp_dsn;
2172 sseq = m->m_pkthdr.mp_rseq;
2173 csum = m->m_pkthdr.mp_csum;
2174 } else {
2175 /* We did fallback */
cb323159
A
2176 if (mptcp_adj_rmap(so, m, 0, 0, 0, 0)) {
2177 error = EIO;
2178 *mp0 = NULL;
2179 goto release;
2180 }
5ba3f43e
A
2181
2182 sbfree(&so->so_rcv, m);
2183
2184 if (mp != NULL) {
2185 *mp = m;
2186 mp = &m->m_next;
2187 so->so_rcv.sb_mb = m = m->m_next;
2188 *mp = NULL;
5ba3f43e
A
2189 }
2190
2191 if (m != NULL) {
2192 so->so_rcv.sb_lastrecord = m;
2193 } else {
2194 SB_EMPTY_FIXUP(&so->so_rcv);
2195 }
2196
2197 continue;
39236c6e
A
2198 }
2199
0a7de745 2200 if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN) {
5c9f4661 2201 dfin = 1;
0a7de745 2202 }
5c9f4661 2203
5ba3f43e
A
2204 /*
2205 * Check if the full mapping is now present
2206 */
5c9f4661 2207 if ((int)so->so_rcv.sb_cc < dlen - dfin) {
d9a64523 2208 mptcplog((LOG_INFO, "%s not enough data (%u) need %u for dsn %u\n",
0a7de745
A
2209 __func__, so->so_rcv.sb_cc, dlen, (uint32_t)dsn),
2210 MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_LOG);
5ba3f43e 2211
0a7de745 2212 if (*mp0 == NULL) {
5ba3f43e 2213 error = EWOULDBLOCK;
0a7de745 2214 }
39236c6e
A
2215 goto release;
2216 }
2217
5ba3f43e
A
2218 /* Now, get the full mapping */
2219 while (dlen > 0) {
5c9f4661
A
2220 if (mptcp_adj_rmap(so, m, orig_dlen - dlen, dsn, sseq, orig_dlen)) {
2221 error_out = 1;
2222 error = EIO;
2223 dlen = 0;
0a7de745 2224 *mp0 = NULL;
5c9f4661
A
2225 break;
2226 }
39236c6e 2227
5ba3f43e
A
2228 dlen -= m->m_len;
2229 sbfree(&so->so_rcv, m);
39236c6e 2230
5ba3f43e
A
2231 if (mp != NULL) {
2232 *mp = m;
2233 mp = &m->m_next;
2234 so->so_rcv.sb_mb = m = m->m_next;
2235 *mp = NULL;
2236 }
2237
0a7de745 2238 if (dlen - dfin == 0) {
5c9f4661 2239 dlen = 0;
0a7de745 2240 }
5c9f4661 2241
5ba3f43e 2242 VERIFY(dlen <= 0 || m);
39236c6e
A
2243 }
2244
5ba3f43e
A
2245 VERIFY(dlen == 0);
2246
39236c6e 2247 if (m != NULL) {
5ba3f43e 2248 so->so_rcv.sb_lastrecord = m;
39236c6e 2249 } else {
39236c6e
A
2250 SB_EMPTY_FIXUP(&so->so_rcv);
2251 }
5ba3f43e 2252
0a7de745 2253 if (error_out) {
5c9f4661 2254 goto release;
0a7de745 2255 }
5c9f4661
A
2256
2257 if (mptcp_validate_csum(sototcpcb(so), start, dsn, sseq, orig_dlen, csum, dfin)) {
5ba3f43e
A
2258 error = EIO;
2259 *mp0 = NULL;
2260 goto release;
2261 }
2262
39236c6e
A
2263 SBLASTRECORDCHK(&so->so_rcv, "mptcp_subflow_soreceive 2");
2264 SBLASTMBUFCHK(&so->so_rcv, "mptcp_subflow_soreceive 2");
2265 }
2266
2267 DTRACE_MPTCP3(subflow__receive, struct socket *, so,
2268 struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd);
39236c6e 2269
0a7de745 2270 if (flagsp != NULL) {
39236c6e 2271 *flagsp |= flags;
0a7de745 2272 }
39236c6e
A
2273
2274release:
5ba3f43e
A
2275 sbunlock(&so->so_rcv, TRUE);
2276
0a7de745 2277 if (proc_held) {
5ba3f43e 2278 proc_rele(p);
0a7de745 2279 }
5ba3f43e 2280
0a7de745 2281 return error;
39236c6e
A
2282}
2283
39236c6e 2284/*
5ba3f43e 2285 * MPTCP subflow socket send routine, derived from sosend().
39236c6e 2286 */
5ba3f43e
A
2287static int
2288mptcp_subflow_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
2289 struct mbuf *top, struct mbuf *control, int flags)
39236c6e 2290{
5ba3f43e
A
2291 struct socket *mp_so = mptetoso(tptomptp(sototcpcb(so))->mpt_mpte);
2292 struct proc *p = current_proc();
2293 boolean_t en_tracing = FALSE, proc_held = FALSE;
2294 int en_tracing_val;
2295 int sblocked = 1; /* Pretend as if it is already locked, so we won't relock it */
2296 int error;
39236c6e 2297
5ba3f43e
A
2298 VERIFY(control == NULL);
2299 VERIFY(addr == NULL);
2300 VERIFY(uio == NULL);
2301 VERIFY(flags == 0);
2302 VERIFY((so->so_flags & SOF_CONTENT_FILTER) == 0);
39236c6e 2303
5ba3f43e
A
2304 VERIFY(top->m_pkthdr.len > 0 && top->m_pkthdr.len <= UINT16_MAX);
2305 VERIFY(top->m_pkthdr.pkt_flags & PKTF_MPTCP);
39236c6e
A
2306
2307 /*
5ba3f43e
A
2308 * trace if tracing & network (vs. unix) sockets & and
2309 * non-loopback
39236c6e 2310 */
5ba3f43e
A
2311 if (ENTR_SHOULDTRACE &&
2312 (SOCK_CHECK_DOM(so, AF_INET) || SOCK_CHECK_DOM(so, AF_INET6))) {
2313 struct inpcb *inp = sotoinpcb(so);
2314 if (inp->inp_last_outifp != NULL &&
2315 !(inp->inp_last_outifp->if_flags & IFF_LOOPBACK)) {
2316 en_tracing = TRUE;
2317 en_tracing_val = top->m_pkthdr.len;
2318 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_START,
cb323159 2319 (unsigned long)VM_KERNEL_ADDRPERM(so),
5ba3f43e
A
2320 ((so->so_state & SS_NBIO) ? kEnTrFlagNonBlocking : 0),
2321 (int64_t)en_tracing_val);
2322 }
2323 }
39236c6e 2324
5ba3f43e 2325 mptcp_update_last_owner(so, mp_so);
39236c6e 2326
5ba3f43e
A
2327 if (mp_so->last_pid != proc_pid(p)) {
2328 p = proc_find(mp_so->last_pid);
2329 if (p == PROC_NULL) {
2330 p = current_proc();
2331 } else {
2332 proc_held = TRUE;
2333 }
2334 }
39236c6e 2335
5ba3f43e
A
2336#if NECP
2337 inp_update_necp_policy(sotoinpcb(so), NULL, NULL, 0);
2338#endif /* NECP */
39236c6e 2339
5ba3f43e 2340 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
39236c6e 2341
5ba3f43e 2342 error = sosendcheck(so, NULL, top->m_pkthdr.len, 0, 1, 0, &sblocked, NULL);
0a7de745 2343 if (error) {
5ba3f43e 2344 goto out;
0a7de745 2345 }
39236c6e 2346
5ba3f43e
A
2347 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, top, NULL, NULL, p);
2348 top = NULL;
39236c6e 2349
5ba3f43e 2350out:
0a7de745 2351 if (top != NULL) {
5ba3f43e 2352 m_freem(top);
0a7de745 2353 }
39236c6e 2354
0a7de745 2355 if (proc_held) {
5ba3f43e 2356 proc_rele(p);
0a7de745 2357 }
5ba3f43e
A
2358
2359 soclearfastopen(so);
2360
2361 if (en_tracing) {
2362 KERNEL_ENERGYTRACE(kEnTrActKernSockWrite, DBG_FUNC_END,
cb323159 2363 (unsigned long)VM_KERNEL_ADDRPERM(so),
5ba3f43e
A
2364 ((error == EWOULDBLOCK) ? kEnTrFlagNoWork : 0),
2365 (int64_t)en_tracing_val);
2366 }
2367
0a7de745 2368 return error;
39236c6e
A
2369}
2370
2371/*
2372 * Establish an initial MPTCP connection (if first subflow and not yet
2373 * connected), or add a subflow to an existing MPTCP connection.
2374 */
2375int
5ba3f43e
A
2376mptcp_subflow_add(struct mptses *mpte, struct sockaddr *src,
2377 struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
39236c6e 2378{
39236c6e 2379 struct socket *mp_so, *so = NULL;
39236c6e 2380 struct mptcb *mp_tp;
5ba3f43e 2381 struct mptsub *mpts = NULL;
39236c6e
A
2382 int af, error = 0;
2383
5ba3f43e 2384 mp_so = mptetoso(mpte);
39236c6e
A
2385 mp_tp = mpte->mpte_mptcb;
2386
cb323159
A
2387 socket_lock_assert_owned(mp_so);
2388
fe8ab488
A
2389 if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) {
2390 /* If the remote end sends Data FIN, refuse subflow adds */
cb323159
A
2391 os_log_error(mptcp_log_handle, "%s - %lx: state %u\n",
2392 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mp_tp->mpt_state);
fe8ab488 2393 error = ENOTCONN;
5ba3f43e 2394 goto out_err;
fe8ab488 2395 }
39236c6e 2396
5ba3f43e
A
2397 mpts = mptcp_subflow_alloc();
2398 if (mpts == NULL) {
cb323159
A
2399 os_log_error(mptcp_log_handle, "%s - %lx: malloc subflow failed\n",
2400 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e
A
2401 error = ENOMEM;
2402 goto out_err;
2403 }
39236c6e 2404
0a7de745
A
2405 if (src) {
2406 if (src->sa_family != AF_INET && src->sa_family != AF_INET6) {
2407 error = EAFNOSUPPORT;
2408 goto out_err;
2409 }
813fb2f6 2410
0a7de745
A
2411 if (src->sa_family == AF_INET &&
2412 src->sa_len != sizeof(struct sockaddr_in)) {
2413 error = EINVAL;
2414 goto out_err;
2415 }
2416
2417 if (src->sa_family == AF_INET6 &&
2418 src->sa_len != sizeof(struct sockaddr_in6)) {
2419 error = EINVAL;
2420 goto out_err;
2421 }
2422
2423 MALLOC(mpts->mpts_src, struct sockaddr *, src->sa_len, M_SONAME,
813fb2f6
A
2424 M_WAITOK | M_ZERO);
2425 if (mpts->mpts_src == NULL) {
5ba3f43e
A
2426 error = ENOMEM;
2427 goto out_err;
39236c6e 2428 }
0a7de745
A
2429 bcopy(src, mpts->mpts_src, src->sa_len);
2430 }
2431
2432 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
2433 error = EAFNOSUPPORT;
2434 goto out_err;
2435 }
2436
2437 if (dst->sa_family == AF_INET &&
2438 dst->sa_len != sizeof(mpts->__mpts_dst_v4)) {
2439 error = EINVAL;
2440 goto out_err;
2441 }
2442
2443 if (dst->sa_family == AF_INET6 &&
2444 dst->sa_len != sizeof(mpts->__mpts_dst_v6)) {
2445 error = EINVAL;
2446 goto out_err;
39236c6e
A
2447 }
2448
cb323159 2449 memcpy(&mpts->mpts_u_dst, dst, dst->sa_len);
5ba3f43e
A
2450
2451 af = mpts->mpts_dst.sa_family;
2452
0a7de745
A
2453 ifnet_head_lock_shared();
2454 if ((ifscope > (unsigned)if_index)) {
2455 ifnet_head_done();
2456 error = ENXIO;
2457 goto out_err;
2458 }
2459 ifnet_head_done();
2460
5ba3f43e
A
2461 mpts->mpts_ifscope = ifscope;
2462
39236c6e 2463 /* create the subflow socket */
0a7de745 2464 if ((error = mptcp_subflow_socreate(mpte, mpts, af, &so)) != 0) {
5ba3f43e
A
2465 /*
2466 * Returning (error) and not cleaning up, because up to here
2467 * all we did is creating mpts.
2468 *
2469 * And the contract is that the call to mptcp_subflow_socreate,
2470 * moves ownership of mpts to mptcp_subflow_socreate.
2471 */
0a7de745
A
2472 return error;
2473 }
5ba3f43e
A
2474
2475 /*
2476 * We may be called from within the kernel. Still need to account this
2477 * one to the real app.
2478 */
2479 mptcp_update_last_owner(mpts->mpts_socket, mp_so);
39236c6e
A
2480
2481 /*
3e170ce0
A
2482 * Increment the counter, while avoiding 0 (SAE_CONNID_ANY) and
2483 * -1 (SAE_CONNID_ALL).
39236c6e
A
2484 */
2485 mpte->mpte_connid_last++;
3e170ce0 2486 if (mpte->mpte_connid_last == SAE_CONNID_ALL ||
0a7de745 2487 mpte->mpte_connid_last == SAE_CONNID_ANY) {
39236c6e 2488 mpte->mpte_connid_last++;
0a7de745 2489 }
39236c6e
A
2490
2491 mpts->mpts_connid = mpte->mpte_connid_last;
490019cf
A
2492
2493 mpts->mpts_rel_seq = 1;
2494
fe8ab488
A
2495 /* Allocate a unique address id per subflow */
2496 mpte->mpte_addrid_last++;
0a7de745 2497 if (mpte->mpte_addrid_last == 0) {
fe8ab488 2498 mpte->mpte_addrid_last++;
0a7de745 2499 }
39236c6e 2500
39236c6e 2501 /* register for subflow socket read/write events */
cb323159 2502 sock_setupcalls_locked(so, NULL, NULL, mptcp_subflow_wupcall, mpts, 1);
39236c6e 2503
5ba3f43e
A
2504 /* Register for subflow socket control events */
2505 sock_catchevents_locked(so, mptcp_subflow_eupcall1, mpts,
39236c6e 2506 SO_FILT_HINT_CONNRESET | SO_FILT_HINT_CANTRCVMORE |
5ba3f43e
A
2507 SO_FILT_HINT_TIMEOUT | SO_FILT_HINT_NOSRCADDR |
2508 SO_FILT_HINT_IFDENIED | SO_FILT_HINT_CONNECTED |
2509 SO_FILT_HINT_DISCONNECTED | SO_FILT_HINT_MPFAILOVER |
2510 SO_FILT_HINT_MPSTATUS | SO_FILT_HINT_MUSTRST |
2511 SO_FILT_HINT_MPCANTRCVMORE | SO_FILT_HINT_ADAPTIVE_RTIMO |
cb323159 2512 SO_FILT_HINT_ADAPTIVE_WTIMO | SO_FILT_HINT_MP_SUB_ERROR);
39236c6e
A
2513
2514 /* sanity check */
2515 VERIFY(!(mpts->mpts_flags &
0a7de745 2516 (MPTSF_CONNECTING | MPTSF_CONNECTED | MPTSF_CONNECT_PENDING)));
39236c6e 2517
39236c6e
A
2518 /*
2519 * Indicate to the TCP subflow whether or not it should establish
2520 * the initial MPTCP connection, or join an existing one. Fill
2521 * in the connection request structure with additional info needed
2522 * by the underlying TCP (to be used in the TCP options, etc.)
2523 */
39236c6e 2524 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED && mpte->mpte_numflows == 1) {
5ba3f43e
A
2525 mpts->mpts_flags |= MPTSF_INITIAL_SUB;
2526
39236c6e 2527 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
5ba3f43e 2528 mptcp_init_local_parms(mpte);
39236c6e 2529 }
39236c6e 2530 soisconnecting(mp_so);
5ba3f43e
A
2531
2532 /* If fastopen is requested, set state in mpts */
0a7de745 2533 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
5ba3f43e 2534 mpts->mpts_flags |= MPTSF_TFO_REQD;
0a7de745 2535 }
39236c6e 2536 } else {
0a7de745 2537 if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY)) {
39236c6e 2538 mpts->mpts_flags |= MPTSF_CONNECT_PENDING;
0a7de745 2539 }
490019cf
A
2540 }
2541
39236c6e
A
2542 mpts->mpts_flags |= MPTSF_CONNECTING;
2543
39236c6e 2544 /* connect right away if first attempt, or if join can be done now */
0a7de745 2545 if (!(mpts->mpts_flags & MPTSF_CONNECT_PENDING)) {
39236c6e 2546 error = mptcp_subflow_soconnectx(mpte, mpts);
0a7de745 2547 }
39236c6e 2548
0a7de745 2549 if (error) {
5ba3f43e 2550 goto out_err_close;
0a7de745 2551 }
5ba3f43e 2552
0a7de745 2553 if (pcid) {
5ba3f43e 2554 *pcid = mpts->mpts_connid;
0a7de745 2555 }
5ba3f43e 2556
0a7de745 2557 return 0;
5ba3f43e
A
2558
2559out_err_close:
2560 mptcp_subflow_abort(mpts, error);
2561
0a7de745 2562 return error;
5ba3f43e
A
2563
2564out_err:
0a7de745 2565 if (mpts) {
5ba3f43e 2566 mptcp_subflow_free(mpts);
0a7de745 2567 }
5ba3f43e 2568
0a7de745 2569 return error;
39236c6e
A
2570}
2571
5ba3f43e 2572void
cb323159 2573mptcpstats_update(struct mptcp_itf_stats *stats, const struct mptsub *mpts)
5ba3f43e 2574{
cb323159 2575 int index = mptcpstats_get_index(stats, mpts);
5ba3f43e
A
2576
2577 if (index != -1) {
2578 struct inpcb *inp = sotoinpcb(mpts->mpts_socket);
2579
2580 stats[index].mpis_txbytes += inp->inp_stat->txbytes;
2581 stats[index].mpis_rxbytes += inp->inp_stat->rxbytes;
cb323159
A
2582
2583 stats[index].mpis_wifi_txbytes += inp->inp_wstat->txbytes;
2584 stats[index].mpis_wifi_rxbytes += inp->inp_wstat->rxbytes;
2585
2586 stats[index].mpis_wired_txbytes += inp->inp_Wstat->txbytes;
2587 stats[index].mpis_wired_rxbytes += inp->inp_Wstat->rxbytes;
2588
2589 stats[index].mpis_cell_txbytes += inp->inp_cstat->txbytes;
2590 stats[index].mpis_cell_rxbytes += inp->inp_cstat->rxbytes;
5ba3f43e
A
2591 }
2592}
2593
39236c6e
A
2594/*
2595 * Delete/remove a subflow from an MPTCP. The underlying subflow socket
2596 * will no longer be accessible after a subflow is deleted, thus this
2597 * should occur only after the subflow socket has been disconnected.
39236c6e
A
2598 */
2599void
5ba3f43e 2600mptcp_subflow_del(struct mptses *mpte, struct mptsub *mpts)
39236c6e 2601{
5ba3f43e
A
2602 struct socket *mp_so = mptetoso(mpte);
2603 struct socket *so = mpts->mpts_socket;
2604 struct tcpcb *tp = sototcpcb(so);
39037602 2605
cb323159 2606 socket_lock_assert_owned(mp_so);
5ba3f43e
A
2607 VERIFY(mpts->mpts_mpte == mpte);
2608 VERIFY(mpts->mpts_flags & MPTSF_ATTACHED);
2609 VERIFY(mpte->mpte_numflows != 0);
2610 VERIFY(mp_so->so_usecount > 0);
39236c6e 2611
5ba3f43e 2612 mptcpstats_update(mpte->mpte_itfstats, mpts);
cb323159
A
2613
2614 mptcp_unset_cellicon(mpte, mpts, 1);
2615
5ba3f43e
A
2616 mpte->mpte_init_rxbytes = sotoinpcb(so)->inp_stat->rxbytes;
2617 mpte->mpte_init_txbytes = sotoinpcb(so)->inp_stat->txbytes;
39236c6e 2618
39236c6e
A
2619 atomic_bitclear_32(&mpts->mpts_flags, MPTSF_ATTACHED);
2620 TAILQ_REMOVE(&mpte->mpte_subflows, mpts, mpts_entry);
39236c6e 2621 mpte->mpte_numflows--;
0a7de745 2622 if (mpte->mpte_active_sub == mpts) {
fe8ab488 2623 mpte->mpte_active_sub = NULL;
0a7de745 2624 }
39236c6e
A
2625
2626 /*
2627 * Drop references held by this subflow socket; there
2628 * will be no further upcalls made from this point.
2629 */
5ba3f43e
A
2630 sock_setupcalls_locked(so, NULL, NULL, NULL, NULL, 0);
2631 sock_catchevents_locked(so, NULL, NULL, 0);
fe8ab488 2632
39236c6e 2633 mptcp_detach_mptcb_from_subf(mpte->mpte_mptcb, so);
39037602 2634
0a7de745 2635 mp_so->so_usecount--; /* for subflow socket */
39236c6e
A
2636 mpts->mpts_mpte = NULL;
2637 mpts->mpts_socket = NULL;
39236c6e 2638
0a7de745
A
2639 mptcp_subflow_remref(mpts); /* for MPTCP subflow list */
2640 mptcp_subflow_remref(mpts); /* for subflow socket */
5ba3f43e
A
2641
2642 so->so_flags &= ~SOF_MP_SUBFLOW;
2643 tp->t_mptcb = NULL;
2644 tp->t_mpsub = NULL;
2645}
2646
2647void
2648mptcp_subflow_shutdown(struct mptses *mpte, struct mptsub *mpts)
2649{
2650 struct socket *so = mpts->mpts_socket;
2651 struct mptcb *mp_tp = mpte->mpte_mptcb;
2652 int send_dfin = 0;
2653
0a7de745 2654 if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
5ba3f43e 2655 send_dfin = 1;
0a7de745 2656 }
5ba3f43e
A
2657
2658 if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
2659 (so->so_state & SS_ISCONNECTED)) {
2660 mptcplog((LOG_DEBUG, "MPTCP subflow shutdown %s: cid %d fin %d\n",
2661 __func__, mpts->mpts_connid, send_dfin),
2662 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
2663
0a7de745 2664 if (send_dfin) {
5ba3f43e 2665 mptcp_send_dfin(so);
0a7de745 2666 }
5ba3f43e
A
2667 soshutdownlock(so, SHUT_WR);
2668 }
5ba3f43e
A
2669}
2670
2671static void
2672mptcp_subflow_abort(struct mptsub *mpts, int error)
2673{
2674 struct socket *so = mpts->mpts_socket;
2675 struct tcpcb *tp = sototcpcb(so);
2676
0a7de745 2677 if (mpts->mpts_flags & MPTSF_DISCONNECTED) {
5ba3f43e 2678 return;
0a7de745 2679 }
5ba3f43e
A
2680
2681 mptcplog((LOG_DEBUG, "%s aborting connection state %u\n", __func__, tp->t_state),
0a7de745 2682 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 2683
0a7de745 2684 if (tp->t_state != TCPS_CLOSED) {
5ba3f43e 2685 tcp_drop(tp, error);
0a7de745 2686 }
5ba3f43e
A
2687
2688 mptcp_subflow_eupcall1(so, mpts, SO_FILT_HINT_DISCONNECTED);
39236c6e
A
2689}
2690
2691/*
2692 * Disconnect a subflow socket.
2693 */
2694void
5ba3f43e 2695mptcp_subflow_disconnect(struct mptses *mpte, struct mptsub *mpts)
39236c6e 2696{
94ff46dc 2697 struct socket *so, *mp_so;
39236c6e
A
2698 struct mptcb *mp_tp;
2699 int send_dfin = 0;
2700
94ff46dc
A
2701 so = mpts->mpts_socket;
2702 mp_tp = mpte->mpte_mptcb;
2703 mp_so = mptetoso(mpte);
2704
2705 socket_lock_assert_owned(mp_so);
39236c6e 2706
0a7de745 2707 if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) {
39236c6e 2708 return;
0a7de745 2709 }
39236c6e 2710
cb323159
A
2711 mptcp_unset_cellicon(mpte, mpts, 1);
2712
39236c6e
A
2713 mpts->mpts_flags |= MPTSF_DISCONNECTING;
2714
0a7de745 2715 if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
39236c6e 2716 send_dfin = 1;
0a7de745 2717 }
39236c6e 2718
39236c6e
A
2719 if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
2720 (so->so_state & SS_ISCONNECTED)) {
a39ff7e2 2721 mptcplog((LOG_DEBUG, "%s: cid %d fin %d\n",
5ba3f43e
A
2722 __func__, mpts->mpts_connid, send_dfin),
2723 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 2724
0a7de745 2725 if (send_dfin) {
39236c6e 2726 mptcp_send_dfin(so);
0a7de745 2727 }
94ff46dc
A
2728
2729 if (mp_so->so_flags & SOF_DEFUNCT) {
2730 errno_t ret;
2731
2732 ret = sosetdefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL, TRUE);
2733 if (ret == 0) {
2734 ret = sodefunct(NULL, so, SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL);
2735
2736 if (ret != 0) {
2737 os_log_error(mptcp_log_handle, "%s - %lx: sodefunct failed with %d\n",
2738 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
2739 }
2740 } else {
2741 os_log_error(mptcp_log_handle, "%s - %lx: sosetdefunct failed with %d\n",
2742 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), ret);
2743 }
2744 } else {
2745 (void) soshutdownlock(so, SHUT_RD);
2746 (void) soshutdownlock(so, SHUT_WR);
2747 (void) sodisconnectlocked(so);
2748 }
39236c6e 2749 }
94ff46dc 2750
39236c6e
A
2751 /*
2752 * Generate a disconnect event for this subflow socket, in case
2753 * the lower layer doesn't do it; this is needed because the
5ba3f43e 2754 * subflow socket deletion relies on it.
39236c6e 2755 */
5ba3f43e 2756 mptcp_subflow_eupcall1(so, mpts, SO_FILT_HINT_DISCONNECTED);
39236c6e
A
2757}
2758
39236c6e
A
2759/*
2760 * Subflow socket input.
39236c6e
A
2761 */
2762static void
2763mptcp_subflow_input(struct mptses *mpte, struct mptsub *mpts)
2764{
5ba3f43e 2765 struct socket *mp_so = mptetoso(mpte);
39236c6e
A
2766 struct mbuf *m = NULL;
2767 struct socket *so;
5ba3f43e 2768 int error, wakeup = 0;
39236c6e 2769
5ba3f43e
A
2770 VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_INSIDE_INPUT));
2771 mpte->mpte_mppcb->mpp_flags |= MPP_INSIDE_INPUT;
39236c6e 2772
39037602 2773 DTRACE_MPTCP2(subflow__input, struct mptses *, mpte,
39236c6e
A
2774 struct mptsub *, mpts);
2775
0a7de745 2776 if (!(mpts->mpts_flags & MPTSF_CONNECTED)) {
5ba3f43e 2777 goto out;
0a7de745 2778 }
39236c6e
A
2779
2780 so = mpts->mpts_socket;
2781
2782 error = sock_receive_internal(so, NULL, &m, 0, NULL);
2783 if (error != 0 && error != EWOULDBLOCK) {
cb323159
A
2784 os_log_error(mptcp_log_handle, "%s - %lx: cid %d error %d\n",
2785 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, error);
5ba3f43e
A
2786 if (error == ENODATA) {
2787 /*
2788 * Don't ignore ENODATA so as to discover
2789 * nasty middleboxes.
2790 */
2791 mp_so->so_error = ENODATA;
2792
2793 wakeup = 1;
2794 goto out;
39236c6e 2795 }
39236c6e 2796 } else if (error == 0) {
5ba3f43e 2797 mptcplog((LOG_DEBUG, "%s: cid %d \n", __func__, mpts->mpts_connid),
3e170ce0 2798 MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e
A
2799 }
2800
2801 /* In fallback, make sure to accept data on all but one subflow */
5ba3f43e
A
2802 if (m && (mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
2803 !(mpts->mpts_flags & MPTSF_ACTIVE)) {
2804 mptcplog((LOG_DEBUG, "%s: degraded and got data on non-active flow\n",
2805 __func__), MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 2806 m_freem(m);
5ba3f43e 2807 goto out;
39236c6e
A
2808 }
2809
2810 if (m != NULL) {
5ba3f43e 2811 if (IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) {
cb323159 2812 mptcp_set_cellicon(mpte, mpts);
3e170ce0 2813
5ba3f43e
A
2814 mpte->mpte_used_cell = 1;
2815 } else {
cb323159
A
2816 /*
2817 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
2818 * explicitly set the cellicon, then we unset it again.
2819 */
2820 if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) {
2821 mptcp_unset_cellicon(mpte, NULL, 1);
2822 }
5ba3f43e
A
2823
2824 mpte->mpte_used_wifi = 1;
2825 }
3e170ce0 2826
39236c6e 2827 mptcp_input(mpte, m);
39236c6e 2828 }
5ba3f43e 2829
5ba3f43e 2830out:
0a7de745 2831 if (wakeup) {
5ba3f43e 2832 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_RWAKEUP;
0a7de745 2833 }
5ba3f43e 2834
cb323159
A
2835 mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_INSIDE_INPUT);
2836}
2837
2838void
2839mptcp_handle_input(struct socket *so)
2840{
2841 struct mptsub *mpts, *tmpts;
2842 struct mptses *mpte;
2843
2844 if (!(so->so_flags & SOF_MP_SUBFLOW)) {
2845 return;
2846 }
2847
2848 mpts = sototcpcb(so)->t_mpsub;
2849 mpte = mpts->mpts_mpte;
2850
2851 socket_lock_assert_owned(mptetoso(mpte));
2852
2853 if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
2854 if (!(mpte->mpte_mppcb->mpp_flags & MPP_INPUT_HANDLE)) {
2855 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_RWAKEUP;
2856 }
2857 return;
2858 }
2859
2860 mpte->mpte_mppcb->mpp_flags |= MPP_INPUT_HANDLE;
2861 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
2862 if (mpts->mpts_socket->so_usecount == 0) {
2863 /* Will be removed soon by tcp_garbage_collect */
2864 continue;
2865 }
2866
2867 mptcp_subflow_addref(mpts);
2868 mpts->mpts_socket->so_usecount++;
2869
2870 mptcp_subflow_input(mpte, mpts);
2871
2872 mptcp_subflow_remref(mpts); /* ours */
2873
2874 VERIFY(mpts->mpts_socket->so_usecount != 0);
2875 mpts->mpts_socket->so_usecount--;
2876 }
2877
2878 mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_INPUT_HANDLE);
39236c6e
A
2879}
2880
2881/*
2882 * Subflow socket write upcall.
2883 *
5ba3f43e 2884 * Called when the associated subflow socket posted a read event.
39236c6e
A
2885 */
2886static void
2887mptcp_subflow_wupcall(struct socket *so, void *arg, int waitf)
2888{
2889#pragma unused(so, waitf)
2890 struct mptsub *mpts = arg;
2891 struct mptses *mpte = mpts->mpts_mpte;
2892
5ba3f43e
A
2893 VERIFY(mpte != NULL);
2894
2895 if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
0a7de745 2896 if (!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL)) {
5ba3f43e 2897 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WWAKEUP;
0a7de745 2898 }
fe8ab488 2899 return;
5ba3f43e 2900 }
39236c6e 2901
5ba3f43e 2902 mptcp_output(mpte);
39236c6e
A
2903}
2904
a39ff7e2
A
2905static boolean_t
2906mptcp_search_seq_in_sub(struct mbuf *m, struct socket *so)
2907{
2908 struct mbuf *so_m = so->so_snd.sb_mb;
2909 uint64_t dsn = m->m_pkthdr.mp_dsn;
2910
2911 while (so_m) {
2912 VERIFY(so_m->m_flags & M_PKTHDR);
2913 VERIFY(so_m->m_pkthdr.pkt_flags & PKTF_MPTCP);
2914
2915 /* Part of the segment is covered, don't reinject here */
2916 if (so_m->m_pkthdr.mp_dsn <= dsn &&
0a7de745 2917 so_m->m_pkthdr.mp_dsn + so_m->m_pkthdr.mp_rlen > dsn) {
a39ff7e2 2918 return TRUE;
0a7de745 2919 }
a39ff7e2
A
2920
2921 so_m = so_m->m_next;
2922 }
2923
2924 return FALSE;
2925}
2926
39236c6e
A
2927/*
2928 * Subflow socket output.
2929 *
2930 * Called for sending data from MPTCP to the underlying subflow socket.
2931 */
2932int
5ba3f43e 2933mptcp_subflow_output(struct mptses *mpte, struct mptsub *mpts, int flags)
39236c6e 2934{
39236c6e 2935 struct mptcb *mp_tp = mpte->mpte_mptcb;
5ba3f43e
A
2936 struct mbuf *sb_mb, *m, *mpt_mbuf = NULL, *head, *tail;
2937 struct socket *mp_so, *so;
2938 struct tcpcb *tp;
2939 uint64_t mpt_dsn = 0, off = 0;
2940 int sb_cc = 0, error = 0, wakeup = 0;
2941 uint32_t dss_csum;
2942 uint16_t tot_sent = 0;
2943 boolean_t reinjected = FALSE;
2944
5ba3f43e 2945 mp_so = mptetoso(mpte);
39236c6e 2946 so = mpts->mpts_socket;
5ba3f43e 2947 tp = sototcpcb(so);
39236c6e 2948
cb323159
A
2949 socket_lock_assert_owned(mp_so);
2950
5ba3f43e
A
2951 VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_INSIDE_OUTPUT));
2952 mpte->mpte_mppcb->mpp_flags |= MPP_INSIDE_OUTPUT;
39236c6e 2953
5ba3f43e
A
2954 VERIFY(!INP_WAIT_FOR_IF_FEEDBACK(sotoinpcb(so)));
2955 VERIFY((mpts->mpts_flags & MPTSF_MP_CAPABLE) ||
0a7de745
A
2956 (mpts->mpts_flags & MPTSF_MP_DEGRADED) ||
2957 (mpts->mpts_flags & MPTSF_TFO_REQD));
5ba3f43e 2958 VERIFY(mptcp_subflow_cwnd_space(mpts->mpts_socket) > 0);
39236c6e 2959
5ba3f43e 2960 mptcplog((LOG_DEBUG, "%s mpts_flags %#x, mpte_flags %#x cwnd_space %u\n",
0a7de745
A
2961 __func__, mpts->mpts_flags, mpte->mpte_flags,
2962 mptcp_subflow_cwnd_space(so)),
2963 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
2964 DTRACE_MPTCP2(subflow__output, struct mptses *, mpte,
2965 struct mptsub *, mpts);
39236c6e
A
2966
2967 /* Remove Addr Option is not sent reliably as per I-D */
2968 if (mpte->mpte_flags & MPTE_SND_REM_ADDR) {
39236c6e 2969 tp->t_rem_aid = mpte->mpte_lost_aid;
5ba3f43e 2970 tp->t_mpflags |= TMPF_SND_REM_ADDR;
39236c6e
A
2971 mpte->mpte_flags &= ~MPTE_SND_REM_ADDR;
2972 }
2973
2974 /*
2975 * The mbuf chains containing the metadata (as well as pointing to
2976 * the user data sitting at the MPTCP output queue) would then be
2977 * sent down to the subflow socket.
2978 *
2979 * Some notes on data sequencing:
2980 *
2981 * a. Each mbuf must be a M_PKTHDR.
2982 * b. MPTCP metadata is stored in the mptcp_pktinfo structure
2983 * in the mbuf pkthdr structure.
2984 * c. Each mbuf containing the MPTCP metadata must have its
2985 * pkt_flags marked with the PKTF_MPTCP flag.
2986 */
2987
0a7de745 2988 if (mpte->mpte_reinjectq) {
5ba3f43e 2989 sb_mb = mpte->mpte_reinjectq;
0a7de745 2990 } else {
5ba3f43e 2991 sb_mb = mp_so->so_snd.sb_mb;
0a7de745 2992 }
5ba3f43e 2993
39236c6e 2994 if (sb_mb == NULL) {
cb323159
A
2995 os_log_error(mptcp_log_handle, "%s - %lx: No data in MPTCP-sendbuffer! smax %u snxt %u suna %u state %u flags %#x\n",
2996 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
2997 (uint32_t)mp_tp->mpt_sndmax, (uint32_t)mp_tp->mpt_sndnxt,
2998 (uint32_t)mp_tp->mpt_snduna, mp_tp->mpt_state, mp_so->so_flags1);
a39ff7e2
A
2999
3000 /* Fix it to prevent looping */
0a7de745 3001 if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
a39ff7e2 3002 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
0a7de745 3003 }
39236c6e
A
3004 goto out;
3005 }
3006
3007 VERIFY(sb_mb->m_pkthdr.pkt_flags & PKTF_MPTCP);
3008
5ba3f43e
A
3009 if (sb_mb->m_pkthdr.mp_rlen == 0 &&
3010 !(so->so_state & SS_ISCONNECTED) &&
3011 (so->so_flags1 & SOF1_PRECONNECT_DATA)) {
3012 tp->t_mpflags |= TMPF_TFO_REQUEST;
3013 goto zero_len_write;
39236c6e
A
3014 }
3015
5ba3f43e
A
3016 mpt_dsn = sb_mb->m_pkthdr.mp_dsn;
3017
3018 /* First, drop acknowledged data */
39236c6e 3019 if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) {
cb323159 3020 os_log_error(mptcp_log_handle, "%s - %lx: dropping data, should have been done earlier "
0a7de745 3021 "dsn %u suna %u reinject? %u\n",
cb323159
A
3022 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), (uint32_t)mpt_dsn,
3023 (uint32_t)mp_tp->mpt_snduna, !!mpte->mpte_reinjectq);
5ba3f43e
A
3024 if (mpte->mpte_reinjectq) {
3025 mptcp_clean_reinjectq(mpte);
3026 } else {
3027 uint64_t len = 0;
3028 len = mp_tp->mpt_snduna - mpt_dsn;
3029 sbdrop(&mp_so->so_snd, (int)len);
3030 wakeup = 1;
3031 }
3032 }
3033
3034 /* Check again because of above sbdrop */
3035 if (mp_so->so_snd.sb_mb == NULL && mpte->mpte_reinjectq == NULL) {
cb323159
A
3036 os_log_error(mptcp_log_handle, "%s - $%lx: send-buffer is empty\n",
3037 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e 3038 goto out;
39236c6e
A
3039 }
3040
3041 /*
3042 * In degraded mode, we don't receive data acks, so force free
3043 * mbufs less than snd_nxt
3044 */
39236c6e 3045 if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
fe8ab488 3046 (mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC) &&
5ba3f43e
A
3047 mp_so->so_snd.sb_mb) {
3048 mpt_dsn = mp_so->so_snd.sb_mb->m_pkthdr.mp_dsn;
3049 if (MPTCP_SEQ_LT(mpt_dsn, mp_tp->mpt_snduna)) {
3050 uint64_t len = 0;
3051 len = mp_tp->mpt_snduna - mpt_dsn;
3052 sbdrop(&mp_so->so_snd, (int)len);
3053 wakeup = 1;
3054
cb323159
A
3055 os_log_error(mptcp_log_handle, "%s - %lx: dropping data in degraded mode, should have been done earlier dsn %u sndnxt %u suna %u\n",
3056 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
3057 (uint32_t)mpt_dsn, (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_snduna);
5ba3f43e 3058 }
39236c6e
A
3059 }
3060
fe8ab488
A
3061 if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) &&
3062 !(mp_tp->mpt_flags & MPTCPF_POST_FALLBACK_SYNC)) {
3063 mp_tp->mpt_flags |= MPTCPF_POST_FALLBACK_SYNC;
3064 so->so_flags1 |= SOF1_POST_FALLBACK_SYNC;
39236c6e
A
3065 }
3066
3067 /*
3068 * Adjust the top level notion of next byte used for retransmissions
3069 * and sending FINs.
3070 */
0a7de745 3071 if (MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_snduna)) {
39236c6e 3072 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
0a7de745 3073 }
39236c6e
A
3074
3075 /* Now determine the offset from which to start transmitting data */
0a7de745 3076 if (mpte->mpte_reinjectq) {
5ba3f43e 3077 sb_mb = mpte->mpte_reinjectq;
0a7de745 3078 } else {
a39ff7e2 3079dont_reinject:
5ba3f43e 3080 sb_mb = mp_so->so_snd.sb_mb;
0a7de745 3081 }
39236c6e 3082 if (sb_mb == NULL) {
cb323159
A
3083 os_log_error(mptcp_log_handle, "%s - %lx: send-buffer is still empty\n", __func__,
3084 (unsigned long)VM_KERNEL_ADDRPERM(mpte));
39236c6e
A
3085 goto out;
3086 }
5ba3f43e 3087
a39ff7e2 3088 if (sb_mb == mpte->mpte_reinjectq) {
5ba3f43e 3089 sb_cc = sb_mb->m_pkthdr.mp_rlen;
a39ff7e2
A
3090 off = 0;
3091
3092 if (mptcp_search_seq_in_sub(sb_mb, so)) {
3093 if (mptcp_can_send_more(mp_tp, TRUE)) {
3094 goto dont_reinject;
3095 }
3096
3097 error = ECANCELED;
3098 goto out;
3099 }
3100
3101 reinjected = TRUE;
5ba3f43e
A
3102 } else if (flags & MPTCP_SUBOUT_PROBING) {
3103 sb_cc = sb_mb->m_pkthdr.mp_rlen;
3104 off = 0;
39236c6e 3105 } else {
5ba3f43e
A
3106 sb_cc = min(mp_so->so_snd.sb_cc, mp_tp->mpt_sndwnd);
3107
3108 /*
3109 * With TFO, there might be no data at all, thus still go into this
3110 * code-path here.
3111 */
3112 if ((mp_so->so_flags1 & SOF1_PRECONNECT_DATA) ||
3113 MPTCP_SEQ_LT(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) {
3114 off = mp_tp->mpt_sndnxt - mp_tp->mpt_snduna;
3115 sb_cc -= off;
3116 } else {
cb323159
A
3117 os_log_error(mptcp_log_handle, "%s - %lx: this should not happen: sndnxt %u sndmax %u\n",
3118 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), (uint32_t)mp_tp->mpt_sndnxt,
3119 (uint32_t)mp_tp->mpt_sndmax);
5ba3f43e
A
3120
3121 goto out;
3122 }
39236c6e 3123 }
39236c6e 3124
5ba3f43e
A
3125 sb_cc = min(sb_cc, mptcp_subflow_cwnd_space(so));
3126 if (sb_cc <= 0) {
cb323159
A
3127 os_log_error(mptcp_log_handle, "%s - %lx: sb_cc is %d, mp_so->sb_cc %u, sndwnd %u,sndnxt %u sndmax %u cwnd %u\n",
3128 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), sb_cc, mp_so->so_snd.sb_cc, mp_tp->mpt_sndwnd,
0a7de745 3129 (uint32_t)mp_tp->mpt_sndnxt, (uint32_t)mp_tp->mpt_sndmax,
cb323159 3130 mptcp_subflow_cwnd_space(so));
5ba3f43e
A
3131 }
3132
3133 sb_cc = min(sb_cc, UINT16_MAX);
3134
3135 /*
3136 * Create a DSN mapping for the data we are about to send. It all
3137 * has the same mapping.
3138 */
0a7de745 3139 if (reinjected) {
5ba3f43e 3140 mpt_dsn = sb_mb->m_pkthdr.mp_dsn;
0a7de745 3141 } else {
5ba3f43e 3142 mpt_dsn = mp_tp->mpt_snduna + off;
0a7de745 3143 }
39236c6e 3144
5ba3f43e 3145 mpt_mbuf = sb_mb;
a39ff7e2 3146 while (mpt_mbuf && reinjected == FALSE &&
0a7de745
A
3147 (mpt_mbuf->m_pkthdr.mp_rlen == 0 ||
3148 mpt_mbuf->m_pkthdr.mp_rlen <= (uint32_t)off)) {
39236c6e
A
3149 off -= mpt_mbuf->m_pkthdr.mp_rlen;
3150 mpt_mbuf = mpt_mbuf->m_next;
39236c6e 3151 }
0a7de745 3152 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
5ba3f43e
A
3153 mptcplog((LOG_DEBUG, "%s: %u snduna = %u sndnxt = %u probe %d\n",
3154 __func__, mpts->mpts_connid, (uint32_t)mp_tp->mpt_snduna, (uint32_t)mp_tp->mpt_sndnxt,
3e170ce0 3155 mpts->mpts_probecnt),
5ba3f43e 3156 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
0a7de745 3157 }
39236c6e 3158
ecc0ceb4 3159 VERIFY((mpt_mbuf == NULL) || (mpt_mbuf->m_pkthdr.pkt_flags & PKTF_MPTCP));
39236c6e 3160
fe8ab488
A
3161 head = tail = NULL;
3162
39236c6e 3163 while (tot_sent < sb_cc) {
5ba3f43e 3164 ssize_t mlen;
39236c6e 3165
5ba3f43e 3166 mlen = mpt_mbuf->m_len;
39236c6e 3167 mlen -= off;
5ba3f43e 3168 mlen = min(mlen, sb_cc - tot_sent);
39236c6e 3169
5ba3f43e 3170 if (mlen < 0) {
cb323159
A
3171 os_log_error(mptcp_log_handle, "%s - %lx: mlen %d mp_rlen %u off %u sb_cc %u tot_sent %u\n",
3172 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), (int)mlen, mpt_mbuf->m_pkthdr.mp_rlen,
3173 (uint32_t)off, sb_cc, tot_sent);
5ba3f43e 3174 goto out;
39236c6e
A
3175 }
3176
0a7de745 3177 if (mlen == 0) {
5ba3f43e 3178 goto next;
0a7de745 3179 }
5ba3f43e 3180
fe8ab488
A
3181 m = m_copym_mode(mpt_mbuf, (int)off, mlen, M_DONTWAIT,
3182 M_COPYM_MUST_COPY_HDR);
39236c6e 3183 if (m == NULL) {
cb323159
A
3184 os_log_error(mptcp_log_handle, "%s - %lx: m_copym_mode failed\n", __func__,
3185 (unsigned long)VM_KERNEL_ADDRPERM(mpte));
39236c6e
A
3186 error = ENOBUFS;
3187 break;
3188 }
3189
3190 /* Create a DSN mapping for the data (m_copym does it) */
fe8ab488 3191 VERIFY(m->m_flags & M_PKTHDR);
5ba3f43e
A
3192 VERIFY(m->m_next == NULL);
3193
39236c6e
A
3194 m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
3195 m->m_pkthdr.pkt_flags &= ~PKTF_MPSO;
5ba3f43e 3196 m->m_pkthdr.mp_dsn = mpt_dsn;
39236c6e 3197 m->m_pkthdr.mp_rseq = mpts->mpts_rel_seq;
39236c6e
A
3198 m->m_pkthdr.len = mlen;
3199
fe8ab488 3200 if (head == NULL) {
0a7de745 3201 head = tail = m;
fe8ab488
A
3202 } else {
3203 tail->m_next = m;
3204 tail = m;
3205 }
3206
fe8ab488
A
3207 tot_sent += mlen;
3208 off = 0;
5ba3f43e 3209next:
fe8ab488
A
3210 mpt_mbuf = mpt_mbuf->m_next;
3211 }
3212
a39ff7e2 3213 if (reinjected) {
5ba3f43e
A
3214 if (sb_cc < sb_mb->m_pkthdr.mp_rlen) {
3215 struct mbuf *n = sb_mb;
3216
3217 while (n) {
3218 n->m_pkthdr.mp_dsn += sb_cc;
3219 n->m_pkthdr.mp_rlen -= sb_cc;
3220 n = n->m_next;
3221 }
3222 m_adj(sb_mb, sb_cc);
3223 } else {
3224 mpte->mpte_reinjectq = sb_mb->m_nextpkt;
3225 m_freem(sb_mb);
3226 }
3227 }
3228
3229 mptcplog((LOG_DEBUG, "%s: Queued dsn %u ssn %u len %u on sub %u\n",
0a7de745
A
3230 __func__, (uint32_t)mpt_dsn, mpts->mpts_rel_seq,
3231 tot_sent, mpts->mpts_connid), MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3232
3233 if (head && (mp_tp->mpt_flags & MPTCPF_CHECKSUM)) {
3234 dss_csum = mptcp_output_csum(head, mpt_dsn, mpts->mpts_rel_seq,
0a7de745 3235 tot_sent);
5ba3f43e
A
3236 }
3237
3238 /* Now, let's update rel-seq and the data-level length */
3239 mpts->mpts_rel_seq += tot_sent;
3240 m = head;
3241 while (m) {
0a7de745 3242 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) {
5ba3f43e 3243 m->m_pkthdr.mp_csum = dss_csum;
0a7de745 3244 }
5ba3f43e
A
3245 m->m_pkthdr.mp_rlen = tot_sent;
3246 m = m->m_next;
3247 }
3248
3249 if (head != NULL) {
490019cf 3250 if ((mpts->mpts_flags & MPTSF_TFO_REQD) &&
0a7de745 3251 (tp->t_tfo_stats == 0)) {
39037602 3252 tp->t_mpflags |= TMPF_TFO_REQUEST;
0a7de745 3253 }
fe8ab488
A
3254
3255 error = sock_sendmbuf(so, NULL, head, 0, NULL);
3256
5ba3f43e 3257 DTRACE_MPTCP7(send, struct mbuf *, m, struct socket *, so,
39236c6e
A
3258 struct sockbuf *, &so->so_rcv,
3259 struct sockbuf *, &so->so_snd,
3260 struct mptses *, mpte, struct mptsub *, mpts,
fe8ab488
A
3261 size_t, tot_sent);
3262 }
3263
5ba3f43e
A
3264done_sending:
3265 if (error == 0 ||
3266 (error == EWOULDBLOCK && (tp->t_mpflags & TMPF_TFO_REQUEST))) {
3267 uint64_t new_sndnxt = mp_tp->mpt_sndnxt + tot_sent;
3e170ce0
A
3268
3269 if (mpts->mpts_probesoon && mpts->mpts_maxseg && tot_sent) {
3270 tcpstat.tcps_mp_num_probes++;
0a7de745 3271 if ((uint32_t)tot_sent < mpts->mpts_maxseg) {
3e170ce0 3272 mpts->mpts_probecnt += 1;
0a7de745 3273 } else {
3e170ce0 3274 mpts->mpts_probecnt +=
0a7de745
A
3275 tot_sent / mpts->mpts_maxseg;
3276 }
3e170ce0
A
3277 }
3278
5ba3f43e
A
3279 if (!reinjected && !(flags & MPTCP_SUBOUT_PROBING)) {
3280 if (MPTCP_DATASEQ_HIGH32(new_sndnxt) >
0a7de745 3281 MPTCP_DATASEQ_HIGH32(mp_tp->mpt_sndnxt)) {
39236c6e 3282 mp_tp->mpt_flags |= MPTCPF_SND_64BITDSN;
0a7de745 3283 }
5ba3f43e 3284 mp_tp->mpt_sndnxt = new_sndnxt;
39236c6e 3285 }
fe8ab488 3286
5ba3f43e 3287 mptcp_cancel_timer(mp_tp, MPTT_REXMT);
490019cf 3288
5ba3f43e
A
3289 /* Must be here as mptcp_can_send_more() checks for this */
3290 soclearfastopen(mp_so);
39236c6e 3291
3e170ce0 3292 if ((mpts->mpts_flags & MPTSF_MP_DEGRADED) ||
0a7de745 3293 (mpts->mpts_probesoon != 0)) {
5ba3f43e
A
3294 mptcplog((LOG_DEBUG, "%s %u degraded %u wrote %d %d probe %d probedelta %d\n",
3295 __func__, mpts->mpts_connid,
3296 !!(mpts->mpts_flags & MPTSF_MP_DEGRADED),
3297 tot_sent, (int) sb_cc, mpts->mpts_probecnt,
3e170ce0 3298 (tcp_now - mpts->mpts_probesoon)),
5ba3f43e 3299 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
0a7de745 3300 }
5ba3f43e
A
3301
3302 if (IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) {
cb323159 3303 mptcp_set_cellicon(mpte, mpts);
5ba3f43e
A
3304
3305 mpte->mpte_used_cell = 1;
3306 } else {
cb323159
A
3307 /*
3308 * If during the past MPTCP_CELLICON_TOGGLE_RATE seconds we didn't
3309 * explicitly set the cellicon, then we unset it again.
3310 */
3311 if (TSTMP_LT(mpte->mpte_last_cellicon_set + MPTCP_CELLICON_TOGGLE_RATE, tcp_now)) {
3312 mptcp_unset_cellicon(mpte, NULL, 1);
3313 }
5ba3f43e
A
3314
3315 mpte->mpte_used_wifi = 1;
3316 }
3317
3318 /*
3319 * Don't propagate EWOULDBLOCK - it's already taken care of
3320 * in mptcp_usr_send for TFO.
3321 */
3322 error = 0;
fe8ab488 3323 } else {
cb323159
A
3324 os_log_error(mptcp_log_handle, "%s - %lx: %u error %d len %d subflags %#x sostate %#x soerror %u hiwat %u lowat %u\n",
3325 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, error, tot_sent, so->so_flags, so->so_state, so->so_error, so->so_snd.sb_hiwat, so->so_snd.sb_lowat);
39236c6e
A
3326 }
3327out:
5ba3f43e 3328
0a7de745 3329 if (wakeup) {
5ba3f43e 3330 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WWAKEUP;
0a7de745 3331 }
39037602 3332
5ba3f43e 3333 mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_INSIDE_OUTPUT);
0a7de745 3334 return error;
5ba3f43e
A
3335
3336zero_len_write:
3337 /* Opting to call pru_send as no mbuf at subflow level */
3338 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, NULL, NULL,
0a7de745 3339 NULL, current_proc());
5ba3f43e
A
3340
3341 goto done_sending;
39236c6e
A
3342}
3343
39236c6e 3344static void
5ba3f43e 3345mptcp_add_reinjectq(struct mptses *mpte, struct mbuf *m)
39236c6e 3346{
5ba3f43e 3347 struct mbuf *n, *prev = NULL;
39236c6e 3348
5ba3f43e 3349 mptcplog((LOG_DEBUG, "%s reinjecting dsn %u dlen %u rseq %u\n",
0a7de745
A
3350 __func__, (uint32_t)m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
3351 m->m_pkthdr.mp_rseq),
3352 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3353
3354 n = mpte->mpte_reinjectq;
3355
3356 /* First, look for an mbuf n, whose data-sequence-number is bigger or
3357 * equal than m's sequence number.
3358 */
3359 while (n) {
0a7de745 3360 if (MPTCP_SEQ_GEQ(n->m_pkthdr.mp_dsn, m->m_pkthdr.mp_dsn)) {
5ba3f43e 3361 break;
0a7de745 3362 }
5ba3f43e
A
3363
3364 prev = n;
3365
3366 n = n->m_nextpkt;
3367 }
3368
3369 if (n) {
3370 /* m is already fully covered by the next mbuf in the queue */
3371 if (n->m_pkthdr.mp_dsn == m->m_pkthdr.mp_dsn &&
3372 n->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_rlen) {
3373 mptcplog((LOG_DEBUG, "%s fully covered with len %u\n",
0a7de745
A
3374 __func__, n->m_pkthdr.mp_rlen),
3375 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3376 goto dont_queue;
3377 }
3378
3379 /* m is covering the next mbuf entirely, thus we remove this guy */
3380 if (m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen >= n->m_pkthdr.mp_dsn + n->m_pkthdr.mp_rlen) {
3381 struct mbuf *tmp = n->m_nextpkt;
3382
3383 mptcplog((LOG_DEBUG, "%s m is covering that guy dsn %u len %u dsn %u len %u\n",
0a7de745
A
3384 __func__, m->m_pkthdr.mp_dsn, m->m_pkthdr.mp_rlen,
3385 n->m_pkthdr.mp_dsn, n->m_pkthdr.mp_rlen),
3386 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3387
3388 m->m_nextpkt = NULL;
0a7de745 3389 if (prev == NULL) {
5ba3f43e 3390 mpte->mpte_reinjectq = tmp;
0a7de745 3391 } else {
5ba3f43e 3392 prev->m_nextpkt = tmp;
0a7de745 3393 }
5ba3f43e
A
3394
3395 m_freem(n);
3396 n = tmp;
3397 }
5ba3f43e
A
3398 }
3399
3400 if (prev) {
3401 /* m is already fully covered by the previous mbuf in the queue */
3402 if (prev->m_pkthdr.mp_dsn + prev->m_pkthdr.mp_rlen >= m->m_pkthdr.mp_dsn + m->m_pkthdr.len) {
3403 mptcplog((LOG_DEBUG, "%s prev covers us from %u with len %u\n",
0a7de745
A
3404 __func__, prev->m_pkthdr.mp_dsn, prev->m_pkthdr.mp_rlen),
3405 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3406 goto dont_queue;
3407 }
3408 }
3409
0a7de745 3410 if (prev == NULL) {
5ba3f43e 3411 mpte->mpte_reinjectq = m;
0a7de745 3412 } else {
5ba3f43e 3413 prev->m_nextpkt = m;
0a7de745 3414 }
39236c6e 3415
5ba3f43e
A
3416 m->m_nextpkt = n;
3417
3418 return;
3419
3420dont_queue:
3421 m_freem(m);
3422 return;
39236c6e
A
3423}
3424
5ba3f43e
A
3425static struct mbuf *
3426mptcp_lookup_dsn(struct mptses *mpte, uint64_t dsn)
39236c6e 3427{
5ba3f43e
A
3428 struct socket *mp_so = mptetoso(mpte);
3429 struct mbuf *m;
39236c6e 3430
5ba3f43e 3431 m = mp_so->so_snd.sb_mb;
39236c6e 3432
5ba3f43e
A
3433 while (m) {
3434 /* If this segment covers what we are looking for, return it. */
3435 if (MPTCP_SEQ_LEQ(m->m_pkthdr.mp_dsn, dsn) &&
0a7de745 3436 MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen, dsn)) {
5ba3f43e 3437 break;
0a7de745 3438 }
5ba3f43e
A
3439
3440
3441 /* Segment is no more in the queue */
0a7de745 3442 if (MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn, dsn)) {
5ba3f43e 3443 return NULL;
0a7de745 3444 }
5ba3f43e
A
3445
3446 m = m->m_next;
39236c6e
A
3447 }
3448
5ba3f43e
A
3449 return m;
3450}
fe8ab488 3451
5ba3f43e 3452static struct mbuf *
cb323159 3453mptcp_copy_mbuf_list(struct mptses *mpte, struct mbuf *m, int len)
5ba3f43e
A
3454{
3455 struct mbuf *top = NULL, *tail = NULL;
3456 uint64_t dsn;
3457 uint32_t dlen, rseq;
39236c6e 3458
5ba3f43e
A
3459 dsn = m->m_pkthdr.mp_dsn;
3460 dlen = m->m_pkthdr.mp_rlen;
3461 rseq = m->m_pkthdr.mp_rseq;
3e170ce0 3462
5ba3f43e
A
3463 while (len > 0) {
3464 struct mbuf *n;
3465
3466 VERIFY((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP));
3467
3468 n = m_copym_mode(m, 0, m->m_len, M_DONTWAIT, M_COPYM_MUST_COPY_HDR);
3469 if (n == NULL) {
cb323159
A
3470 os_log_error(mptcp_log_handle, "%s - %lx: m_copym_mode returned NULL\n",
3471 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e 3472 goto err;
3e170ce0 3473 }
fe8ab488 3474
5ba3f43e
A
3475 VERIFY(n->m_flags & M_PKTHDR);
3476 VERIFY(n->m_next == NULL);
3477 VERIFY(n->m_pkthdr.mp_dsn == dsn);
3478 VERIFY(n->m_pkthdr.mp_rlen == dlen);
3479 VERIFY(n->m_pkthdr.mp_rseq == rseq);
3480 VERIFY(n->m_len == m->m_len);
3481
3482 n->m_pkthdr.pkt_flags |= (PKTF_MPSO | PKTF_MPTCP);
3483
0a7de745 3484 if (top == NULL) {
5ba3f43e 3485 top = n;
0a7de745 3486 }
5ba3f43e 3487
0a7de745 3488 if (tail != NULL) {
5ba3f43e 3489 tail->m_next = n;
0a7de745 3490 }
5ba3f43e
A
3491
3492 tail = n;
3493
3494 len -= m->m_len;
3495 m = m->m_next;
39236c6e
A
3496 }
3497
5ba3f43e
A
3498 return top;
3499
3500err:
0a7de745 3501 if (top) {
5ba3f43e 3502 m_freem(top);
0a7de745 3503 }
5ba3f43e
A
3504
3505 return NULL;
39236c6e
A
3506}
3507
5ba3f43e
A
3508static void
3509mptcp_reinject_mbufs(struct socket *so)
39236c6e 3510{
5ba3f43e
A
3511 struct tcpcb *tp = sototcpcb(so);
3512 struct mptsub *mpts = tp->t_mpsub;
3513 struct mptcb *mp_tp = tptomptp(tp);
3514 struct mptses *mpte = mp_tp->mpt_mpte;;
3515 struct sockbuf *sb = &so->so_snd;
3516 struct mbuf *m;
39236c6e 3517
5ba3f43e
A
3518 m = sb->sb_mb;
3519 while (m) {
3520 struct mbuf *n = m->m_next, *orig = m;
39236c6e 3521
5ba3f43e 3522 mptcplog((LOG_DEBUG, "%s working on suna %u relseq %u iss %u len %u pktflags %#x\n",
0a7de745
A
3523 __func__, tp->snd_una, m->m_pkthdr.mp_rseq, mpts->mpts_iss,
3524 m->m_pkthdr.mp_rlen, m->m_pkthdr.pkt_flags),
3525 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 3526
5ba3f43e 3527 VERIFY((m->m_flags & M_PKTHDR) && (m->m_pkthdr.pkt_flags & PKTF_MPTCP));
39236c6e 3528
0a7de745 3529 if (m->m_pkthdr.pkt_flags & PKTF_MPTCP_REINJ) {
5ba3f43e 3530 goto next;
0a7de745 3531 }
39236c6e 3532
5ba3f43e 3533 /* Has it all already been acknowledged at the data-level? */
0a7de745 3534 if (MPTCP_SEQ_GEQ(mp_tp->mpt_snduna, m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen)) {
5ba3f43e 3535 goto next;
0a7de745 3536 }
5ba3f43e
A
3537
3538 /* Part of this has already been acknowledged - lookup in the
3539 * MPTCP-socket for the segment.
3540 */
3541 if (SEQ_GT(tp->snd_una - mpts->mpts_iss, m->m_pkthdr.mp_rseq)) {
3542 m = mptcp_lookup_dsn(mpte, m->m_pkthdr.mp_dsn);
0a7de745 3543 if (m == NULL) {
5ba3f43e 3544 goto next;
0a7de745 3545 }
5ba3f43e
A
3546 }
3547
3548 /* Copy the mbuf with headers (aka, DSN-numbers) */
cb323159 3549 m = mptcp_copy_mbuf_list(mpte, m, m->m_pkthdr.mp_rlen);
0a7de745 3550 if (m == NULL) {
5ba3f43e 3551 break;
0a7de745 3552 }
5ba3f43e
A
3553
3554 VERIFY(m->m_nextpkt == NULL);
3555
3556 /* Now, add to the reinject-queue, eliminating overlapping
3557 * segments
3558 */
3559 mptcp_add_reinjectq(mpte, m);
3560
3561 orig->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
3562
3563next:
3564 /* mp_rlen can cover multiple mbufs, so advance to the end of it. */
3565 while (n) {
3566 VERIFY((n->m_flags & M_PKTHDR) && (n->m_pkthdr.pkt_flags & PKTF_MPTCP));
3567
0a7de745 3568 if (n->m_pkthdr.mp_dsn != orig->m_pkthdr.mp_dsn) {
5ba3f43e 3569 break;
0a7de745 3570 }
5ba3f43e
A
3571
3572 n->m_pkthdr.pkt_flags |= PKTF_MPTCP_REINJ;
3573 n = n->m_next;
3574 }
3575
3576 m = n;
39236c6e 3577 }
5ba3f43e 3578}
39236c6e 3579
5ba3f43e
A
3580void
3581mptcp_clean_reinjectq(struct mptses *mpte)
3582{
3583 struct mptcb *mp_tp = mpte->mpte_mptcb;
3584
cb323159 3585 socket_lock_assert_owned(mptetoso(mpte));
5ba3f43e
A
3586
3587 while (mpte->mpte_reinjectq) {
3588 struct mbuf *m = mpte->mpte_reinjectq;
3589
3590 if (MPTCP_SEQ_GEQ(m->m_pkthdr.mp_dsn, mp_tp->mpt_snduna) ||
0a7de745 3591 MPTCP_SEQ_GT(m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen, mp_tp->mpt_snduna)) {
5ba3f43e 3592 break;
0a7de745 3593 }
5ba3f43e
A
3594
3595 mpte->mpte_reinjectq = m->m_nextpkt;
3596 m->m_nextpkt = NULL;
3597 m_freem(m);
3598 }
39236c6e
A
3599}
3600
3601/*
5ba3f43e 3602 * Subflow socket control event upcall.
39236c6e 3603 */
5ba3f43e
A
3604static void
3605mptcp_subflow_eupcall1(struct socket *so, void *arg, uint32_t events)
39236c6e 3606{
5ba3f43e
A
3607#pragma unused(so)
3608 struct mptsub *mpts = arg;
3609 struct mptses *mpte = mpts->mpts_mpte;
39236c6e 3610
cb323159 3611 socket_lock_assert_owned(mptetoso(mpte));
39236c6e 3612
0a7de745 3613 if ((mpts->mpts_evctl & events) == events) {
5ba3f43e 3614 return;
0a7de745 3615 }
39236c6e 3616
5ba3f43e
A
3617 mpts->mpts_evctl |= events;
3618
3619 if (mptcp_should_defer_upcall(mpte->mpte_mppcb)) {
3620 mpte->mpte_mppcb->mpp_flags |= MPP_SHOULD_WORKLOOP;
3621 return;
39037602 3622 }
39236c6e 3623
5ba3f43e 3624 mptcp_subflow_workloop(mpte);
39236c6e
A
3625}
3626
3627/*
5ba3f43e
A
3628 * Subflow socket control events.
3629 *
3630 * Called for handling events related to the underlying subflow socket.
39236c6e
A
3631 */
3632static ev_ret_t
5ba3f43e 3633mptcp_subflow_events(struct mptses *mpte, struct mptsub *mpts,
0a7de745 3634 uint64_t *p_mpsofilt_hint)
39236c6e 3635{
5ba3f43e
A
3636 ev_ret_t ret = MPTS_EVRET_OK;
3637 int i, mpsub_ev_entry_count = sizeof(mpsub_ev_entry_tbl) /
0a7de745 3638 sizeof(mpsub_ev_entry_tbl[0]);
39236c6e 3639
5ba3f43e 3640 /* bail if there's nothing to process */
0a7de745
A
3641 if (!mpts->mpts_evctl) {
3642 return ret;
3643 }
39236c6e 3644
0a7de745
A
3645 if (mpts->mpts_evctl & (SO_FILT_HINT_CONNRESET | SO_FILT_HINT_MUSTRST |
3646 SO_FILT_HINT_CANTSENDMORE | SO_FILT_HINT_TIMEOUT |
3647 SO_FILT_HINT_NOSRCADDR | SO_FILT_HINT_IFDENIED |
5ba3f43e
A
3648 SO_FILT_HINT_DISCONNECTED)) {
3649 mpts->mpts_evctl |= SO_FILT_HINT_MPFAILOVER;
3650 }
3e170ce0 3651
5ba3f43e
A
3652 DTRACE_MPTCP3(subflow__events, struct mptses *, mpte,
3653 struct mptsub *, mpts, uint32_t, mpts->mpts_evctl);
3654
3655 mptcplog((LOG_DEBUG, "%s cid %d events=%b\n", __func__,
0a7de745
A
3656 mpts->mpts_connid, mpts->mpts_evctl, SO_FILT_HINT_BITS),
3657 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
3658
3659 /*
3660 * Process all the socket filter hints and reset the hint
3661 * once it is handled
3662 */
3663 for (i = 0; i < mpsub_ev_entry_count && mpts->mpts_evctl; i++) {
3664 /*
3665 * Always execute the DISCONNECTED event, because it will wakeup
3666 * the app.
3667 */
3668 if ((mpts->mpts_evctl & mpsub_ev_entry_tbl[i].sofilt_hint_mask) &&
3669 (ret >= MPTS_EVRET_OK ||
0a7de745 3670 mpsub_ev_entry_tbl[i].sofilt_hint_mask == SO_FILT_HINT_DISCONNECTED)) {
5ba3f43e
A
3671 mpts->mpts_evctl &= ~mpsub_ev_entry_tbl[i].sofilt_hint_mask;
3672 ev_ret_t error =
0a7de745 3673 mpsub_ev_entry_tbl[i].sofilt_hint_ev_hdlr(mpte, mpts, p_mpsofilt_hint, mpsub_ev_entry_tbl[i].sofilt_hint_mask);
5ba3f43e
A
3674 ret = ((error >= MPTS_EVRET_OK) ? MAX(error, ret) : error);
3675 }
3676 }
3677
3678 /*
3679 * We should be getting only events specified via sock_catchevents(),
3680 * so loudly complain if we have any unprocessed one(s).
3681 */
0a7de745 3682 if (mpts->mpts_evctl || ret < MPTS_EVRET_OK) {
cb323159 3683 mptcplog((LOG_WARNING, "%s%s: cid %d evret %d unhandled events=%b\n", __func__,
5ba3f43e
A
3684 (mpts->mpts_evctl && ret == MPTS_EVRET_OK) ? "MPTCP_ERROR " : "",
3685 mpts->mpts_connid,
cb323159 3686 ret, mpts->mpts_evctl, SO_FILT_HINT_BITS),
5ba3f43e 3687 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
0a7de745 3688 } else {
5ba3f43e 3689 mptcplog((LOG_DEBUG, "%s: Done, events %b\n", __func__,
0a7de745
A
3690 mpts->mpts_evctl, SO_FILT_HINT_BITS),
3691 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_VERBOSE);
3692 }
5ba3f43e 3693
0a7de745 3694 return ret;
39236c6e
A
3695}
3696
39236c6e 3697static ev_ret_t
5ba3f43e 3698mptcp_subflow_propagate_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 3699 uint64_t *p_mpsofilt_hint, uint64_t event)
39236c6e
A
3700{
3701 struct socket *mp_so, *so;
3702 struct mptcb *mp_tp;
39236c6e 3703
5ba3f43e 3704 mp_so = mptetoso(mpte);
39236c6e
A
3705 mp_tp = mpte->mpte_mptcb;
3706 so = mpts->mpts_socket;
3707
5ba3f43e
A
3708 mptcplog((LOG_DEBUG, "%s: cid %d event %d\n", __func__,
3709 mpts->mpts_connid, event),
3e170ce0 3710 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39236c6e 3711
39236c6e 3712 /*
5ba3f43e
A
3713 * We got an event for this subflow that might need to be propagated,
3714 * based on the state of the MPTCP connection.
39236c6e 3715 */
5ba3f43e 3716 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED ||
cb323159 3717 (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) && !(mpts->mpts_flags & MPTSF_MP_READY)) ||
5ba3f43e
A
3718 ((mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && (mpts->mpts_flags & MPTSF_ACTIVE))) {
3719 mp_so->so_error = so->so_error;
3720 *p_mpsofilt_hint |= event;
39236c6e 3721 }
39236c6e 3722
0a7de745 3723 return MPTS_EVRET_OK;
39236c6e
A
3724}
3725
3726/*
3727 * Handle SO_FILT_HINT_NOSRCADDR subflow socket event.
3728 */
3729static ev_ret_t
3e170ce0 3730mptcp_subflow_nosrcaddr_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 3731 uint64_t *p_mpsofilt_hint, uint64_t event)
39236c6e 3732{
5ba3f43e
A
3733#pragma unused(p_mpsofilt_hint, event)
3734 struct socket *mp_so;
3735 struct tcpcb *tp;
39236c6e 3736
5ba3f43e
A
3737 mp_so = mptetoso(mpte);
3738 tp = intotcpcb(sotoinpcb(mpts->mpts_socket));
39236c6e 3739
39236c6e
A
3740 /*
3741 * This overwrites any previous mpte_lost_aid to avoid storing
3742 * too much state when the typical case has only two subflows.
3743 */
3744 mpte->mpte_flags |= MPTE_SND_REM_ADDR;
3745 mpte->mpte_lost_aid = tp->t_local_aid;
3746
5ba3f43e 3747 mptcplog((LOG_DEBUG, "%s cid %d\n", __func__, mpts->mpts_connid),
0a7de745 3748 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39236c6e
A
3749
3750 /*
3751 * The subflow connection has lost its source address.
39236c6e 3752 */
5ba3f43e 3753 mptcp_subflow_abort(mpts, EADDRNOTAVAIL);
39236c6e 3754
0a7de745 3755 if (mp_so->so_flags & SOF_NOADDRAVAIL) {
5ba3f43e 3756 mptcp_subflow_propagate_ev(mpte, mpts, p_mpsofilt_hint, event);
0a7de745 3757 }
39236c6e 3758
0a7de745 3759 return MPTS_EVRET_DELETE;
39236c6e
A
3760}
3761
cb323159
A
3762static ev_ret_t
3763mptcp_subflow_mpsuberror_ev(struct mptses *mpte, struct mptsub *mpts,
3764 uint64_t *p_mpsofilt_hint, uint64_t event)
3765{
3766#pragma unused(event, p_mpsofilt_hint)
3767 struct socket *so, *mp_so;
3768
3769 so = mpts->mpts_socket;
3770
3771 if (so->so_error != ENODATA) {
3772 return MPTS_EVRET_OK;
3773 }
3774
3775
3776 mp_so = mptetoso(mpte);
3777
3778 mp_so->so_error = ENODATA;
3779
3780 sorwakeup(mp_so);
3781 sowwakeup(mp_so);
3782
3783 return MPTS_EVRET_OK;
3784}
3785
3786
fe8ab488
A
3787/*
3788 * Handle SO_FILT_HINT_MPCANTRCVMORE subflow socket event that
3789 * indicates that the remote side sent a Data FIN
3790 */
3791static ev_ret_t
3e170ce0 3792mptcp_subflow_mpcantrcvmore_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 3793 uint64_t *p_mpsofilt_hint, uint64_t event)
fe8ab488 3794{
5ba3f43e 3795#pragma unused(event)
cb323159 3796 struct mptcb *mp_tp = mpte->mpte_mptcb;
fe8ab488 3797
5ba3f43e 3798 mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__, mpts->mpts_connid),
3e170ce0 3799 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39037602 3800
fe8ab488 3801 /*
0a7de745
A
3802 * We got a Data FIN for the MPTCP connection.
3803 * The FIN may arrive with data. The data is handed up to the
3804 * mptcp socket and the user is notified so that it may close
3805 * the socket if needed.
3806 */
3807 if (mp_tp->mpt_state == MPTCPS_CLOSE_WAIT) {
5ba3f43e 3808 *p_mpsofilt_hint |= SO_FILT_HINT_CANTRCVMORE;
0a7de745 3809 }
39037602 3810
0a7de745 3811 return MPTS_EVRET_OK; /* keep the subflow socket around */
fe8ab488
A
3812}
3813
39236c6e
A
3814/*
3815 * Handle SO_FILT_HINT_MPFAILOVER subflow socket event
3816 */
3817static ev_ret_t
3e170ce0 3818mptcp_subflow_failover_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 3819 uint64_t *p_mpsofilt_hint, uint64_t event)
39236c6e 3820{
5ba3f43e 3821#pragma unused(event, p_mpsofilt_hint)
39236c6e 3822 struct mptsub *mpts_alt = NULL;
5ba3f43e 3823 struct socket *alt_so = NULL;
39236c6e
A
3824 struct socket *mp_so;
3825 int altpath_exists = 0;
3826
5ba3f43e 3827 mp_so = mptetoso(mpte);
cb323159 3828 os_log_info(mptcp_log_handle, "%s - %lx\n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
39236c6e 3829
5ba3f43e 3830 mptcp_reinject_mbufs(mpts->mpts_socket);
39236c6e 3831
cb323159
A
3832 mpts_alt = mptcp_get_subflow(mpte, NULL);
3833
3834 /* If there is no alternate eligible subflow, ignore the failover hint. */
3835 if (mpts_alt == NULL || mpts_alt == mpts) {
3836 os_log(mptcp_log_handle, "%s - %lx no alternate path\n", __func__,
3837 (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e 3838
39236c6e
A
3839 goto done;
3840 }
5ba3f43e 3841
39236c6e 3842 altpath_exists = 1;
5ba3f43e 3843 alt_so = mpts_alt->mpts_socket;
39236c6e 3844 if (mpts_alt->mpts_flags & MPTSF_FAILINGOVER) {
fe8ab488 3845 /* All data acknowledged and no RTT spike */
5ba3f43e 3846 if (alt_so->so_snd.sb_cc == 0 && mptcp_no_rto_spike(alt_so)) {
39236c6e
A
3847 mpts_alt->mpts_flags &= ~MPTSF_FAILINGOVER;
3848 } else {
3849 /* no alternate path available */
3850 altpath_exists = 0;
3851 }
39236c6e 3852 }
39236c6e
A
3853
3854 if (altpath_exists) {
5ba3f43e 3855 mpts_alt->mpts_flags |= MPTSF_ACTIVE;
39236c6e 3856
5ba3f43e 3857 mpte->mpte_active_sub = mpts_alt;
39236c6e
A
3858 mpts->mpts_flags |= MPTSF_FAILINGOVER;
3859 mpts->mpts_flags &= ~MPTSF_ACTIVE;
5ba3f43e 3860
cb323159
A
3861 os_log_info(mptcp_log_handle, "%s - %lx: switched from %d to %d\n",
3862 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpts->mpts_connid, mpts_alt->mpts_connid);
5ba3f43e
A
3863
3864 mptcpstats_inc_switch(mpte, mpts);
3865
3866 sowwakeup(alt_so);
39236c6e 3867 } else {
5ba3f43e 3868 mptcplog((LOG_DEBUG, "%s: no alt cid = %d\n", __func__,
0a7de745
A
3869 mpts->mpts_connid),
3870 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
fe8ab488 3871done:
5ba3f43e 3872 mpts->mpts_socket->so_flags &= ~SOF_MP_TRYFAILOVER;
39236c6e 3873 }
5ba3f43e 3874
0a7de745 3875 return MPTS_EVRET_OK;
39236c6e
A
3876}
3877
3878/*
3879 * Handle SO_FILT_HINT_IFDENIED subflow socket event.
3880 */
3881static ev_ret_t
3e170ce0 3882mptcp_subflow_ifdenied_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 3883 uint64_t *p_mpsofilt_hint, uint64_t event)
39236c6e 3884{
5ba3f43e
A
3885 mptcplog((LOG_DEBUG, "%s: cid %d\n", __func__,
3886 mpts->mpts_connid), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39236c6e 3887
39236c6e 3888 /*
5ba3f43e
A
3889 * The subflow connection cannot use the outgoing interface, let's
3890 * close this subflow.
39236c6e 3891 */
5ba3f43e 3892 mptcp_subflow_abort(mpts, EPERM);
39236c6e 3893
5ba3f43e 3894 mptcp_subflow_propagate_ev(mpte, mpts, p_mpsofilt_hint, event);
39236c6e 3895
0a7de745 3896 return MPTS_EVRET_DELETE;
39236c6e
A
3897}
3898
a39ff7e2
A
3899/*
3900 * https://tools.ietf.org/html/rfc6052#section-2
3901 * https://tools.ietf.org/html/rfc6147#section-5.2
3902 */
3903static boolean_t
3904mptcp_desynthesize_ipv6_addr(const struct in6_addr *addr,
0a7de745
A
3905 const struct ipv6_prefix *prefix,
3906 struct in_addr *addrv4)
a39ff7e2
A
3907{
3908 char buf[MAX_IPv4_STR_LEN];
3909 char *ptrv4 = (char *)addrv4;
3910 const char *ptr = (const char *)addr;
3911
0a7de745 3912 if (memcmp(addr, &prefix->ipv6_prefix, prefix->prefix_len) != 0) {
a39ff7e2 3913 return false;
0a7de745 3914 }
a39ff7e2
A
3915
3916 switch (prefix->prefix_len) {
0a7de745
A
3917 case NAT64_PREFIX_LEN_96:
3918 memcpy(ptrv4, ptr + 12, 4);
3919 break;
3920 case NAT64_PREFIX_LEN_64:
3921 memcpy(ptrv4, ptr + 9, 4);
3922 break;
3923 case NAT64_PREFIX_LEN_56:
3924 memcpy(ptrv4, ptr + 7, 1);
3925 memcpy(ptrv4 + 1, ptr + 9, 3);
3926 break;
3927 case NAT64_PREFIX_LEN_48:
3928 memcpy(ptrv4, ptr + 6, 2);
3929 memcpy(ptrv4 + 2, ptr + 9, 2);
3930 break;
3931 case NAT64_PREFIX_LEN_40:
3932 memcpy(ptrv4, ptr + 5, 3);
3933 memcpy(ptrv4 + 3, ptr + 9, 1);
3934 break;
3935 case NAT64_PREFIX_LEN_32:
3936 memcpy(ptrv4, ptr + 4, 4);
3937 break;
3938 default:
3939 panic("NAT64-prefix len is wrong: %u\n",
3940 prefix->prefix_len);
a39ff7e2
A
3941 }
3942
3943 os_log_info(mptcp_log_handle, "%s desynthesized to %s\n", __func__,
0a7de745 3944 inet_ntop(AF_INET, (void *)addrv4, buf, sizeof(buf)));
a39ff7e2
A
3945
3946 return true;
3947}
3948
3949static void
3950mptcp_handle_ipv6_connection(struct mptses *mpte, const struct mptsub *mpts)
3951{
3952 struct ipv6_prefix nat64prefixes[NAT64_MAX_NUM_PREFIXES];
3953 struct socket *so = mpts->mpts_socket;
3954 struct ifnet *ifp;
3955 int j;
3956
cb323159
A
3957 /* Subflow IPs will be steered directly by the server - no need to
3958 * desynthesize.
3959 */
3960 if (mpte->mpte_flags & MPTE_UNICAST_IP) {
3961 return;
3962 }
3963
a39ff7e2
A
3964 ifp = sotoinpcb(so)->inp_last_outifp;
3965
3966 if (ifnet_get_nat64prefix(ifp, nat64prefixes) == ENOENT) {
3967 mptcp_ask_for_nat64(ifp);
3968 return;
3969 }
3970
3971
3972 for (j = 0; j < NAT64_MAX_NUM_PREFIXES; j++) {
3973 int success;
3974
0a7de745 3975 if (nat64prefixes[j].prefix_len == 0) {
a39ff7e2 3976 continue;
0a7de745 3977 }
a39ff7e2
A
3978
3979 success = mptcp_desynthesize_ipv6_addr(&mpte->__mpte_dst_v6.sin6_addr,
0a7de745
A
3980 &nat64prefixes[j],
3981 &mpte->mpte_dst_v4_nat64.sin_addr);
a39ff7e2
A
3982 if (success) {
3983 mpte->mpte_dst_v4_nat64.sin_len = sizeof(mpte->mpte_dst_v4_nat64);
3984 mpte->mpte_dst_v4_nat64.sin_family = AF_INET;
3985 mpte->mpte_dst_v4_nat64.sin_port = mpte->__mpte_dst_v6.sin6_port;
3986 break;
3987 }
3988 }
3989}
3990
39236c6e
A
3991/*
3992 * Handle SO_FILT_HINT_CONNECTED subflow socket event.
3993 */
3994static ev_ret_t
3e170ce0 3995mptcp_subflow_connected_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 3996 uint64_t *p_mpsofilt_hint, uint64_t event)
39236c6e 3997{
5ba3f43e 3998#pragma unused(event, p_mpsofilt_hint)
39236c6e 3999 struct socket *mp_so, *so;
5ba3f43e
A
4000 struct inpcb *inp;
4001 struct tcpcb *tp;
39236c6e 4002 struct mptcb *mp_tp;
5ba3f43e 4003 int af;
39236c6e
A
4004 boolean_t mpok = FALSE;
4005
5ba3f43e
A
4006 mp_so = mptetoso(mpte);
4007 mp_tp = mpte->mpte_mptcb;
39236c6e 4008 so = mpts->mpts_socket;
5ba3f43e
A
4009 tp = sototcpcb(so);
4010 af = mpts->mpts_dst.sa_family;
39236c6e 4011
0a7de745
A
4012 if (mpts->mpts_flags & MPTSF_CONNECTED) {
4013 return MPTS_EVRET_OK;
4014 }
39236c6e
A
4015
4016 if ((mpts->mpts_flags & MPTSF_DISCONNECTED) ||
4017 (mpts->mpts_flags & MPTSF_DISCONNECTING)) {
fe8ab488
A
4018 if (!(so->so_state & (SS_ISDISCONNECTING | SS_ISDISCONNECTED)) &&
4019 (so->so_state & SS_ISCONNECTED)) {
0a7de745
A
4020 mptcplog((LOG_DEBUG, "%s: cid %d disconnect before tcp connect\n",
4021 __func__, mpts->mpts_connid),
4022 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
fe8ab488
A
4023 (void) soshutdownlock(so, SHUT_RD);
4024 (void) soshutdownlock(so, SHUT_WR);
4025 (void) sodisconnectlocked(so);
4026 }
0a7de745 4027 return MPTS_EVRET_OK;
39236c6e
A
4028 }
4029
4030 /*
4031 * The subflow connection has been connected. Find out whether it
4032 * is connected as a regular TCP or as a MPTCP subflow. The idea is:
4033 *
4034 * a. If MPTCP connection is not yet established, then this must be
4035 * the first subflow connection. If MPTCP failed to negotiate,
5ba3f43e 4036 * fallback to regular TCP by degrading this subflow.
39236c6e
A
4037 *
4038 * b. If MPTCP connection has been established, then this must be
4039 * one of the subsequent subflow connections. If MPTCP failed
5ba3f43e 4040 * to negotiate, disconnect the connection.
39236c6e
A
4041 *
4042 * Right now, we simply unblock any waiters at the MPTCP socket layer
4043 * if the MPTCP connection has not been established.
4044 */
39236c6e
A
4045
4046 if (so->so_state & SS_ISDISCONNECTED) {
4047 /*
4048 * With MPTCP joins, a connection is connected at the subflow
4049 * level, but the 4th ACK from the server elevates the MPTCP
490019cf
A
4050 * subflow to connected state. So there is a small window
4051 * where the subflow could get disconnected before the
39236c6e
A
4052 * connected event is processed.
4053 */
0a7de745 4054 return MPTS_EVRET_OK;
39236c6e
A
4055 }
4056
0a7de745 4057 if (mpts->mpts_flags & MPTSF_TFO_REQD) {
5ba3f43e 4058 mptcp_drop_tfo_data(mpte, mpts);
0a7de745 4059 }
490019cf 4060
5ba3f43e
A
4061 mpts->mpts_flags &= ~(MPTSF_CONNECTING | MPTSF_TFO_REQD);
4062 mpts->mpts_flags |= MPTSF_CONNECTED;
490019cf 4063
0a7de745 4064 if (tp->t_mpflags & TMPF_MPTCP_TRUE) {
39236c6e 4065 mpts->mpts_flags |= MPTSF_MP_CAPABLE;
0a7de745 4066 }
39236c6e 4067
490019cf
A
4068 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
4069
39236c6e 4070 /* get/verify the outbound interface */
5ba3f43e 4071 inp = sotoinpcb(so);
3e170ce0 4072
5ba3f43e 4073 mpts->mpts_maxseg = tp->t_maxseg;
3e170ce0 4074
5ba3f43e
A
4075 mptcplog((LOG_DEBUG, "%s: cid %d outif %s is %s\n", __func__, mpts->mpts_connid,
4076 ((inp->inp_last_outifp != NULL) ? inp->inp_last_outifp->if_xname : "NULL"),
4077 ((mpts->mpts_flags & MPTSF_MP_CAPABLE) ? "MPTCP capable" : "a regular TCP")),
3e170ce0 4078 (MPTCP_SOCKET_DBG | MPTCP_EVENTS_DBG), MPTCP_LOGLVL_LOG);
39236c6e
A
4079
4080 mpok = (mpts->mpts_flags & MPTSF_MP_CAPABLE);
39236c6e 4081
39236c6e 4082 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
5ba3f43e
A
4083 mp_tp->mpt_state = MPTCPS_ESTABLISHED;
4084 mpte->mpte_associd = mpts->mpts_connid;
4085 DTRACE_MPTCP2(state__change,
4086 struct mptcb *, mp_tp,
4087 uint32_t, 0 /* event */);
4088
4089 if (SOCK_DOM(so) == AF_INET) {
4090 in_getsockaddr_s(so, &mpte->__mpte_src_v4);
4091 } else {
4092 in6_getsockaddr_s(so, &mpte->__mpte_src_v6);
4093 }
4094
a39ff7e2
A
4095 mpts->mpts_flags |= MPTSF_ACTIVE;
4096
39236c6e
A
4097 /* case (a) above */
4098 if (!mpok) {
5ba3f43e
A
4099 tcpstat.tcps_mpcap_fallback++;
4100
4101 tp->t_mpflags |= TMPF_INFIN_SENT;
4102 mptcp_notify_mpfail(so);
39236c6e 4103 } else {
5ba3f43e 4104 if (IFNET_IS_CELLULAR(inp->inp_last_outifp) &&
cb323159 4105 mpte->mpte_svctype < MPTCP_SVCTYPE_AGGREGATE) {
5ba3f43e 4106 tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO);
39037602
A
4107 } else {
4108 mpts->mpts_flags |= MPTSF_PREFERRED;
4109 }
39236c6e
A
4110 mpts->mpts_flags |= MPTSF_MPCAP_CTRSET;
4111 mpte->mpte_nummpcapflows++;
5ba3f43e 4112
0a7de745 4113 if (SOCK_DOM(so) == AF_INET6) {
a39ff7e2 4114 mptcp_handle_ipv6_connection(mpte, mpts);
0a7de745 4115 }
a39ff7e2 4116
5ba3f43e
A
4117 mptcp_check_subflows_and_add(mpte);
4118
0a7de745 4119 if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
5ba3f43e 4120 mpte->mpte_initial_cell = 1;
0a7de745 4121 }
5ba3f43e
A
4122
4123 mpte->mpte_handshake_success = 1;
39236c6e 4124 }
5ba3f43e
A
4125
4126 mp_tp->mpt_sndwnd = tp->snd_wnd;
4127 mp_tp->mpt_sndwl1 = mp_tp->mpt_rcvnxt;
4128 mp_tp->mpt_sndwl2 = mp_tp->mpt_snduna;
4129 soisconnected(mp_so);
39236c6e 4130 } else if (mpok) {
39236c6e
A
4131 /*
4132 * case (b) above
4133 * In case of additional flows, the MPTCP socket is not
4134 * MPTSF_MP_CAPABLE until an ACK is received from server
4135 * for 3-way handshake. TCP would have guaranteed that this
4136 * is an MPTCP subflow.
4137 */
5ba3f43e
A
4138 if (IFNET_IS_CELLULAR(inp->inp_last_outifp) &&
4139 !(tp->t_mpflags & TMPF_BACKUP_PATH) &&
cb323159 4140 mpte->mpte_svctype < MPTCP_SVCTYPE_AGGREGATE) {
5ba3f43e
A
4141 tp->t_mpflags |= (TMPF_BACKUP_PATH | TMPF_SND_MPPRIO);
4142 mpts->mpts_flags &= ~MPTSF_PREFERRED;
4143 } else {
4144 mpts->mpts_flags |= MPTSF_PREFERRED;
4145 }
4146
39236c6e
A
4147 mpts->mpts_flags |= MPTSF_MPCAP_CTRSET;
4148 mpte->mpte_nummpcapflows++;
5ba3f43e
A
4149
4150 mpts->mpts_rel_seq = 1;
4151
4152 mptcp_check_subflows_and_remove(mpte);
fe8ab488 4153 } else {
5ba3f43e
A
4154 unsigned int i;
4155
a39ff7e2
A
4156 /* Should we try the alternate port? */
4157 if (mpte->mpte_alternate_port &&
4158 inp->inp_fport != mpte->mpte_alternate_port) {
4159 union sockaddr_in_4_6 dst;
4160 struct sockaddr_in *dst_in = (struct sockaddr_in *)&dst;
5ba3f43e 4161
a39ff7e2
A
4162 memcpy(&dst, &mpts->mpts_dst, mpts->mpts_dst.sa_len);
4163
4164 dst_in->sin_port = mpte->mpte_alternate_port;
4165
4166 mptcp_subflow_add(mpte, NULL, (struct sockaddr *)&dst,
0a7de745 4167 mpts->mpts_ifscope, NULL);
a39ff7e2
A
4168 } else { /* Else, we tried all we could, mark this interface as non-MPTCP */
4169 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
4170 struct mpt_itf_info *info = &mpte->mpte_itfinfo[i];
4171
4172 if (inp->inp_last_outifp->if_index == info->ifindex) {
4173 info->no_mptcp_support = 1;
4174 break;
4175 }
5ba3f43e
A
4176 }
4177 }
4178
4179 tcpstat.tcps_join_fallback++;
0a7de745 4180 if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
5ba3f43e 4181 tcpstat.tcps_mptcp_cell_proxy++;
0a7de745 4182 } else {
5ba3f43e 4183 tcpstat.tcps_mptcp_wifi_proxy++;
0a7de745 4184 }
5ba3f43e
A
4185
4186 soevent(mpts->mpts_socket, SO_FILT_HINT_LOCKED | SO_FILT_HINT_MUSTRST);
4187
0a7de745 4188 return MPTS_EVRET_OK;
39236c6e 4189 }
fe8ab488 4190
5ba3f43e 4191 /* This call, just to "book" an entry in the stats-table for this ifindex */
cb323159 4192 mptcpstats_get_index(mpte->mpte_itfstats, mpts);
5ba3f43e
A
4193
4194 mptcp_output(mpte);
39236c6e 4195
0a7de745 4196 return MPTS_EVRET_OK; /* keep the subflow socket around */
39236c6e
A
4197}
4198
4199/*
4200 * Handle SO_FILT_HINT_DISCONNECTED subflow socket event.
4201 */
4202static ev_ret_t
3e170ce0 4203mptcp_subflow_disconnected_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 4204 uint64_t *p_mpsofilt_hint, uint64_t event)
39236c6e 4205{
5ba3f43e 4206#pragma unused(event, p_mpsofilt_hint)
39236c6e
A
4207 struct socket *mp_so, *so;
4208 struct mptcb *mp_tp;
39236c6e 4209
5ba3f43e 4210 mp_so = mptetoso(mpte);
39236c6e
A
4211 mp_tp = mpte->mpte_mptcb;
4212 so = mpts->mpts_socket;
4213
5ba3f43e
A
4214 mptcplog((LOG_DEBUG, "%s: cid %d, so_err %d, mpt_state %u fallback %u active %u flags %#x\n",
4215 __func__, mpts->mpts_connid, so->so_error, mp_tp->mpt_state,
4216 !!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP),
4217 !!(mpts->mpts_flags & MPTSF_ACTIVE), sototcpcb(so)->t_mpflags),
3e170ce0 4218 MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
39236c6e 4219
0a7de745
A
4220 if (mpts->mpts_flags & MPTSF_DISCONNECTED) {
4221 return MPTS_EVRET_DELETE;
4222 }
39236c6e 4223
39236c6e
A
4224 mpts->mpts_flags |= MPTSF_DISCONNECTED;
4225
5ba3f43e 4226 /* The subflow connection has been disconnected. */
39236c6e
A
4227
4228 if (mpts->mpts_flags & MPTSF_MPCAP_CTRSET) {
4229 mpte->mpte_nummpcapflows--;
fe8ab488
A
4230 if (mpte->mpte_active_sub == mpts) {
4231 mpte->mpte_active_sub = NULL;
5ba3f43e 4232 mptcplog((LOG_DEBUG, "%s: resetting active subflow \n",
3e170ce0 4233 __func__), MPTCP_EVENTS_DBG, MPTCP_LOGLVL_LOG);
fe8ab488 4234 }
39236c6e
A
4235 mpts->mpts_flags &= ~MPTSF_MPCAP_CTRSET;
4236 }
4237
5ba3f43e 4238 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED ||
0a7de745 4239 ((mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && (mpts->mpts_flags & MPTSF_ACTIVE))) {
5ba3f43e 4240 mptcp_drop(mpte, mp_tp, so->so_error);
39236c6e
A
4241 }
4242
39236c6e 4243 /*
5ba3f43e
A
4244 * Clear flags that are used by getconninfo to return state.
4245 * Retain like MPTSF_DELETEOK for internal purposes.
39236c6e 4246 */
0a7de745
A
4247 mpts->mpts_flags &= ~(MPTSF_CONNECTING | MPTSF_CONNECT_PENDING |
4248 MPTSF_CONNECTED | MPTSF_DISCONNECTING | MPTSF_PREFERRED |
4249 MPTSF_MP_CAPABLE | MPTSF_MP_READY | MPTSF_MP_DEGRADED | MPTSF_ACTIVE);
5ba3f43e 4250
0a7de745 4251 return MPTS_EVRET_DELETE;
39236c6e
A
4252}
4253
4254/*
4255 * Handle SO_FILT_HINT_MPSTATUS subflow socket event
4256 */
4257static ev_ret_t
3e170ce0 4258mptcp_subflow_mpstatus_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 4259 uint64_t *p_mpsofilt_hint, uint64_t event)
39236c6e 4260{
5ba3f43e 4261#pragma unused(event, p_mpsofilt_hint)
cb323159 4262 ev_ret_t ret = MPTS_EVRET_OK;
39236c6e
A
4263 struct socket *mp_so, *so;
4264 struct mptcb *mp_tp;
39236c6e 4265
5ba3f43e 4266 mp_so = mptetoso(mpte);
39236c6e 4267 mp_tp = mpte->mpte_mptcb;
39236c6e
A
4268 so = mpts->mpts_socket;
4269
0a7de745 4270 if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_TRUE) {
39236c6e 4271 mpts->mpts_flags |= MPTSF_MP_CAPABLE;
0a7de745 4272 } else {
39236c6e 4273 mpts->mpts_flags &= ~MPTSF_MP_CAPABLE;
0a7de745 4274 }
39236c6e
A
4275
4276 if (sototcpcb(so)->t_mpflags & TMPF_TCP_FALLBACK) {
0a7de745 4277 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
39236c6e 4278 goto done;
0a7de745 4279 }
39236c6e 4280 mpts->mpts_flags |= MPTSF_MP_DEGRADED;
d9a64523 4281 } else {
39236c6e 4282 mpts->mpts_flags &= ~MPTSF_MP_DEGRADED;
d9a64523 4283 }
39236c6e 4284
0a7de745 4285 if (sototcpcb(so)->t_mpflags & TMPF_MPTCP_READY) {
39236c6e 4286 mpts->mpts_flags |= MPTSF_MP_READY;
0a7de745 4287 } else {
39236c6e 4288 mpts->mpts_flags &= ~MPTSF_MP_READY;
0a7de745 4289 }
39236c6e
A
4290
4291 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
4292 mp_tp->mpt_flags |= MPTCPF_FALLBACK_TO_TCP;
4293 mp_tp->mpt_flags &= ~MPTCPF_JOIN_READY;
4294 }
4295
4296 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
39236c6e 4297 ret = MPTS_EVRET_DISCONNECT_FALLBACK;
d9a64523
A
4298
4299 m_freem_list(mpte->mpte_reinjectq);
4300 mpte->mpte_reinjectq = NULL;
39236c6e
A
4301 } else if (mpts->mpts_flags & MPTSF_MP_READY) {
4302 mp_tp->mpt_flags |= MPTCPF_JOIN_READY;
4303 ret = MPTS_EVRET_CONNECT_PENDING;
4304 }
4305
39236c6e 4306done:
0a7de745 4307 return ret;
39236c6e
A
4308}
4309
4310/*
4311 * Handle SO_FILT_HINT_MUSTRST subflow socket event
4312 */
4313static ev_ret_t
3e170ce0 4314mptcp_subflow_mustrst_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 4315 uint64_t *p_mpsofilt_hint, uint64_t event)
39236c6e 4316{
5ba3f43e 4317#pragma unused(event)
39236c6e
A
4318 struct socket *mp_so, *so;
4319 struct mptcb *mp_tp;
5ba3f43e 4320 boolean_t is_fastclose;
39236c6e 4321
5ba3f43e 4322 mp_so = mptetoso(mpte);
39236c6e
A
4323 mp_tp = mpte->mpte_mptcb;
4324 so = mpts->mpts_socket;
4325
39236c6e 4326 /* We got an invalid option or a fast close */
39236c6e
A
4327 struct tcptemp *t_template;
4328 struct inpcb *inp = sotoinpcb(so);
4329 struct tcpcb *tp = NULL;
4330
4331 tp = intotcpcb(inp);
fe8ab488 4332 so->so_error = ECONNABORTED;
39236c6e 4333
39037602
A
4334 is_fastclose = !!(tp->t_mpflags & TMPF_FASTCLOSERCV);
4335
cb323159
A
4336 tp->t_mpflags |= TMPF_RESET;
4337
39236c6e
A
4338 t_template = tcp_maketemplate(tp);
4339 if (t_template) {
fe8ab488 4340 struct tcp_respond_args tra;
39236c6e 4341
fe8ab488 4342 bzero(&tra, sizeof(tra));
0a7de745 4343 if (inp->inp_flags & INP_BOUND_IF) {
fe8ab488 4344 tra.ifscope = inp->inp_boundifp->if_index;
0a7de745 4345 } else {
fe8ab488 4346 tra.ifscope = IFSCOPE_NONE;
0a7de745 4347 }
fe8ab488 4348 tra.awdl_unrestricted = 1;
39236c6e
A
4349
4350 tcp_respond(tp, t_template->tt_ipgen,
4351 &t_template->tt_t, (struct mbuf *)NULL,
fe8ab488 4352 tp->rcv_nxt, tp->snd_una, TH_RST, &tra);
39236c6e 4353 (void) m_free(dtom(t_template));
39236c6e 4354 }
39037602
A
4355
4356 if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) && is_fastclose) {
cb323159
A
4357 struct mptsub *iter, *tmp;
4358
3e170ce0 4359 *p_mpsofilt_hint |= SO_FILT_HINT_CONNRESET;
39236c6e 4360
cb323159
A
4361 mp_so->so_error = ECONNRESET;
4362
4363 TAILQ_FOREACH_SAFE(iter, &mpte->mpte_subflows, mpts_entry, tmp) {
4364 if (iter == mpts) {
4365 continue;
4366 }
4367 mptcp_subflow_abort(iter, ECONNABORTED);
0a7de745 4368 }
39037602
A
4369
4370 /*
4371 * mptcp_drop is being called after processing the events, to fully
4372 * close the MPTCP connection
4373 */
cb323159 4374 mptcp_drop(mpte, mp_tp, mp_so->so_error);
39236c6e 4375 }
39037602 4376
cb323159
A
4377 mptcp_subflow_abort(mpts, ECONNABORTED);
4378
4379
0a7de745 4380 if (mp_tp->mpt_gc_ticks == MPT_GC_TICKS) {
3e170ce0 4381 mp_tp->mpt_gc_ticks = MPT_GC_TICKS_FAST;
0a7de745 4382 }
39236c6e 4383
0a7de745 4384 return MPTS_EVRET_DELETE;
39236c6e
A
4385}
4386
fe8ab488 4387static ev_ret_t
5ba3f43e 4388mptcp_subflow_adaptive_rtimo_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 4389 uint64_t *p_mpsofilt_hint, uint64_t event)
fe8ab488 4390{
5ba3f43e
A
4391#pragma unused(event)
4392 bool found_active = false;
4393
4394 mpts->mpts_flags |= MPTSF_READ_STALL;
39037602 4395
5ba3f43e
A
4396 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
4397 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
3e170ce0 4398
5ba3f43e 4399 if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 4400 TCPS_HAVERCVDFIN2(tp->t_state)) {
5ba3f43e 4401 continue;
0a7de745 4402 }
5ba3f43e
A
4403
4404 if (!(mpts->mpts_flags & MPTSF_READ_STALL)) {
4405 found_active = true;
4406 break;
fe8ab488 4407 }
fe8ab488
A
4408 }
4409
0a7de745 4410 if (!found_active) {
5ba3f43e 4411 *p_mpsofilt_hint |= SO_FILT_HINT_ADAPTIVE_RTIMO;
0a7de745 4412 }
5ba3f43e 4413
0a7de745 4414 return MPTS_EVRET_OK;
fe8ab488
A
4415}
4416
4417static ev_ret_t
5ba3f43e 4418mptcp_subflow_adaptive_wtimo_ev(struct mptses *mpte, struct mptsub *mpts,
0a7de745 4419 uint64_t *p_mpsofilt_hint, uint64_t event)
fe8ab488 4420{
5ba3f43e
A
4421#pragma unused(event)
4422 bool found_active = false;
3e170ce0 4423
5ba3f43e 4424 mpts->mpts_flags |= MPTSF_WRITE_STALL;
fe8ab488 4425
5ba3f43e
A
4426 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
4427 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
4428
4429 if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 4430 tp->t_state > TCPS_CLOSE_WAIT) {
5ba3f43e 4431 continue;
0a7de745 4432 }
5ba3f43e
A
4433
4434 if (!(mpts->mpts_flags & MPTSF_WRITE_STALL)) {
4435 found_active = true;
4436 break;
4437 }
4438 }
4439
0a7de745 4440 if (!found_active) {
5ba3f43e 4441 *p_mpsofilt_hint |= SO_FILT_HINT_ADAPTIVE_WTIMO;
0a7de745 4442 }
5ba3f43e 4443
0a7de745 4444 return MPTS_EVRET_OK;
fe8ab488
A
4445}
4446
39236c6e
A
4447/*
4448 * Issues SOPT_SET on an MPTCP subflow socket; socket must already be locked,
4449 * caller must ensure that the option can be issued on subflow sockets, via
4450 * MPOF_SUBFLOW_OK flag.
4451 */
4452int
5ba3f43e 4453mptcp_subflow_sosetopt(struct mptses *mpte, struct mptsub *mpts, struct mptopt *mpo)
39236c6e 4454{
5ba3f43e 4455 struct socket *mp_so, *so;
39236c6e 4456 struct sockopt sopt;
39236c6e
A
4457 int error;
4458
4459 VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK);
5ba3f43e
A
4460
4461 mp_so = mptetoso(mpte);
4462 so = mpts->mpts_socket;
4463
cb323159
A
4464 socket_lock_assert_owned(mp_so);
4465
5ba3f43e
A
4466 if (mpte->mpte_mptcb->mpt_state >= MPTCPS_ESTABLISHED &&
4467 mpo->mpo_level == SOL_SOCKET &&
4468 mpo->mpo_name == SO_MARK_CELLFALLBACK) {
d9a64523
A
4469 struct ifnet *ifp = ifindex2ifnet[mpts->mpts_ifscope];
4470
4471 mptcplog((LOG_DEBUG, "%s Setting CELL_FALLBACK, mpte_flags %#x, svctype %u wifi unusable %d lastcell? %d boundcell? %d\n",
cb323159 4472 __func__, mpte->mpte_flags, mpte->mpte_svctype, mptcp_is_wifi_unusable_for_session(mpte),
0a7de745
A
4473 sotoinpcb(so)->inp_last_outifp ? IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp) : -1,
4474 mpts->mpts_ifscope != IFSCOPE_NONE && ifp ? IFNET_IS_CELLULAR(ifp) : -1),
4475 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
4476
4477 /*
4478 * When we open a new subflow, mark it as cell fallback, if
4479 * this subflow goes over cell.
4480 *
4481 * (except for first-party apps)
4482 */
4483
0a7de745
A
4484 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
4485 return 0;
4486 }
39236c6e 4487
5ba3f43e 4488 if (sotoinpcb(so)->inp_last_outifp &&
0a7de745
A
4489 !IFNET_IS_CELLULAR(sotoinpcb(so)->inp_last_outifp)) {
4490 return 0;
4491 }
5ba3f43e
A
4492
4493 /*
4494 * This here is an OR, because if the app is not binding to the
4495 * interface, then it definitely is not a cell-fallback
4496 * connection.
4497 */
d9a64523 4498 if (mpts->mpts_ifscope == IFSCOPE_NONE || ifp == NULL ||
0a7de745
A
4499 !IFNET_IS_CELLULAR(ifp)) {
4500 return 0;
4501 }
5ba3f43e
A
4502 }
4503
4504 mpo->mpo_flags &= ~MPOF_INTERIM;
39236c6e 4505
0a7de745 4506 bzero(&sopt, sizeof(sopt));
39236c6e
A
4507 sopt.sopt_dir = SOPT_SET;
4508 sopt.sopt_level = mpo->mpo_level;
4509 sopt.sopt_name = mpo->mpo_name;
4510 sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval);
0a7de745 4511 sopt.sopt_valsize = sizeof(int);
39236c6e
A
4512 sopt.sopt_p = kernproc;
4513
5ba3f43e 4514 error = sosetoptlock(so, &sopt, 0);
cb323159
A
4515 if (error) {
4516 os_log_error(mptcp_log_handle, "%s - %lx: sopt %s "
39236c6e 4517 "val %d set error %d\n", __func__,
cb323159 4518 (unsigned long)VM_KERNEL_ADDRPERM(mpte),
5ba3f43e 4519 mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name),
cb323159 4520 mpo->mpo_intval, error);
39236c6e 4521 }
0a7de745 4522 return error;
39236c6e
A
4523}
4524
4525/*
4526 * Issues SOPT_GET on an MPTCP subflow socket; socket must already be locked,
4527 * caller must ensure that the option can be issued on subflow sockets, via
4528 * MPOF_SUBFLOW_OK flag.
4529 */
4530int
4531mptcp_subflow_sogetopt(struct mptses *mpte, struct socket *so,
4532 struct mptopt *mpo)
4533{
4534 struct socket *mp_so;
4535 struct sockopt sopt;
39236c6e
A
4536 int error;
4537
4538 VERIFY(mpo->mpo_flags & MPOF_SUBFLOW_OK);
5ba3f43e 4539 mp_so = mptetoso(mpte);
39236c6e 4540
cb323159
A
4541 socket_lock_assert_owned(mp_so);
4542
0a7de745 4543 bzero(&sopt, sizeof(sopt));
39236c6e
A
4544 sopt.sopt_dir = SOPT_GET;
4545 sopt.sopt_level = mpo->mpo_level;
4546 sopt.sopt_name = mpo->mpo_name;
4547 sopt.sopt_val = CAST_USER_ADDR_T(&mpo->mpo_intval);
0a7de745 4548 sopt.sopt_valsize = sizeof(int);
39236c6e
A
4549 sopt.sopt_p = kernproc;
4550
0a7de745 4551 error = sogetoptlock(so, &sopt, 0); /* already locked */
cb323159
A
4552 if (error) {
4553 os_log_error(mptcp_log_handle,
4554 "%s - %lx: sopt %s get error %d\n",
4555 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
4556 mptcp_sopt2str(mpo->mpo_level, mpo->mpo_name), error);
39236c6e 4557 }
0a7de745 4558 return error;
39236c6e
A
4559}
4560
4561
4562/*
4563 * MPTCP garbage collector.
4564 *
4565 * This routine is called by the MP domain on-demand, periodic callout,
4566 * which is triggered when a MPTCP socket is closed. The callout will
4567 * repeat as long as this routine returns a non-zero value.
4568 */
4569static uint32_t
4570mptcp_gc(struct mppcbinfo *mppi)
4571{
4572 struct mppcb *mpp, *tmpp;
4573 uint32_t active = 0;
4574
5ba3f43e 4575 LCK_MTX_ASSERT(&mppi->mppi_lock, LCK_MTX_ASSERT_OWNED);
39236c6e 4576
39236c6e
A
4577 TAILQ_FOREACH_SAFE(mpp, &mppi->mppi_pcbs, mpp_entry, tmpp) {
4578 struct socket *mp_so;
4579 struct mptses *mpte;
4580 struct mptcb *mp_tp;
4581
39236c6e 4582 mp_so = mpp->mpp_socket;
39236c6e 4583 mpte = mptompte(mpp);
39236c6e 4584 mp_tp = mpte->mpte_mptcb;
39236c6e 4585
cb323159 4586 if (!mpp_try_lock(mpp)) {
39236c6e
A
4587 active++;
4588 continue;
4589 }
4590
cb323159
A
4591 VERIFY(mpp->mpp_flags & MPP_ATTACHED);
4592
39236c6e 4593 /* check again under the lock */
5ba3f43e 4594 if (mp_so->so_usecount > 0) {
39236c6e
A
4595 boolean_t wakeup = FALSE;
4596 struct mptsub *mpts, *tmpts;
4597
39236c6e 4598 if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_1) {
0a7de745 4599 if (mp_tp->mpt_gc_ticks > 0) {
39236c6e 4600 mp_tp->mpt_gc_ticks--;
0a7de745 4601 }
39236c6e
A
4602 if (mp_tp->mpt_gc_ticks == 0) {
4603 wakeup = TRUE;
39236c6e
A
4604 }
4605 }
39236c6e
A
4606 if (wakeup) {
4607 TAILQ_FOREACH_SAFE(mpts,
4608 &mpte->mpte_subflows, mpts_entry, tmpts) {
5ba3f43e 4609 mptcp_subflow_eupcall1(mpts->mpts_socket,
39236c6e 4610 mpts, SO_FILT_HINT_DISCONNECTED);
39236c6e
A
4611 }
4612 }
cb323159 4613 socket_unlock(mp_so, 0);
39236c6e
A
4614 active++;
4615 continue;
4616 }
4617
4618 if (mpp->mpp_state != MPPCB_STATE_DEAD) {
cb323159 4619 panic("%s - %lx: skipped state "
0a7de745 4620 "[u=%d,r=%d,s=%d]\n", __func__,
cb323159 4621 (unsigned long)VM_KERNEL_ADDRPERM(mpte),
0a7de745
A
4622 mp_so->so_usecount, mp_so->so_retaincnt,
4623 mpp->mpp_state);
39236c6e
A
4624 }
4625
0a7de745 4626 if (mp_tp->mpt_state == MPTCPS_TIME_WAIT) {
5ba3f43e 4627 mptcp_close(mpte, mp_tp);
0a7de745 4628 }
3e170ce0 4629
5ba3f43e 4630 mptcp_session_destroy(mpte);
39236c6e 4631
39037602 4632 DTRACE_MPTCP4(dispose, struct socket *, mp_so,
39236c6e
A
4633 struct sockbuf *, &mp_so->so_rcv,
4634 struct sockbuf *, &mp_so->so_snd,
4635 struct mppcb *, mpp);
4636
4637 mp_pcbdispose(mpp);
39037602 4638 sodealloc(mp_so);
39236c6e
A
4639 }
4640
0a7de745 4641 return active;
39236c6e
A
4642}
4643
4644/*
4645 * Drop a MPTCP connection, reporting the specified error.
4646 */
4647struct mptses *
4648mptcp_drop(struct mptses *mpte, struct mptcb *mp_tp, int errno)
4649{
cb323159 4650 struct socket *mp_so = mptetoso(mpte);
39236c6e 4651
39236c6e 4652 VERIFY(mpte->mpte_mptcb == mp_tp);
cb323159
A
4653
4654 socket_lock_assert_owned(mp_so);
39236c6e 4655
39037602 4656 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
39236c6e
A
4657 uint32_t, 0 /* event */);
4658
0a7de745 4659 if (errno == ETIMEDOUT && mp_tp->mpt_softerror != 0) {
39236c6e 4660 errno = mp_tp->mpt_softerror;
0a7de745 4661 }
39236c6e
A
4662 mp_so->so_error = errno;
4663
0a7de745 4664 return mptcp_close(mpte, mp_tp);
39236c6e
A
4665}
4666
4667/*
4668 * Close a MPTCP control block.
4669 */
4670struct mptses *
4671mptcp_close(struct mptses *mpte, struct mptcb *mp_tp)
4672{
3e170ce0 4673 struct mptsub *mpts = NULL, *tmpts = NULL;
cb323159 4674 struct socket *mp_so = mptetoso(mpte);
39236c6e 4675
cb323159 4676 socket_lock_assert_owned(mp_so);
39236c6e 4677 VERIFY(mpte->mpte_mptcb == mp_tp);
39236c6e 4678
5ba3f43e 4679 mp_tp->mpt_state = MPTCPS_TERMINATE;
39236c6e 4680
5ba3f43e
A
4681 mptcp_freeq(mp_tp);
4682
4683 soisdisconnected(mp_so);
39236c6e
A
4684
4685 /* Clean up all subflows */
4686 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
5ba3f43e 4687 mptcp_subflow_disconnect(mpte, mpts);
39236c6e 4688 }
39236c6e 4689
0a7de745 4690 return NULL;
39236c6e
A
4691}
4692
4693void
4694mptcp_notify_close(struct socket *so)
4695{
4696 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_DISCONNECTED));
4697}
4698
4699/*
5ba3f43e 4700 * MPTCP workloop.
39236c6e
A
4701 */
4702void
5ba3f43e 4703mptcp_subflow_workloop(struct mptses *mpte)
39236c6e 4704{
39236c6e 4705 boolean_t connect_pending = FALSE, disconnect_fallback = FALSE;
cb323159 4706 uint64_t mpsofilt_hint_mask = SO_FILT_HINT_LOCKED;
0a7de745
A
4707 struct mptsub *mpts, *tmpts;
4708 struct socket *mp_so;
39236c6e 4709
cb323159
A
4710 mp_so = mptetoso(mpte);
4711
4712 socket_lock_assert_owned(mp_so);
0a7de745
A
4713
4714 if (mpte->mpte_flags & MPTE_IN_WORKLOOP) {
4715 mpte->mpte_flags |= MPTE_WORKLOOP_RELAUNCH;
4716 return;
4717 }
4718 mpte->mpte_flags |= MPTE_IN_WORKLOOP;
4719
0a7de745 4720relaunch:
0a7de745 4721 mpte->mpte_flags &= ~MPTE_WORKLOOP_RELAUNCH;
39236c6e
A
4722
4723 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
4724 ev_ret_t ret;
4725
5ba3f43e
A
4726 if (mpts->mpts_socket->so_usecount == 0) {
4727 /* Will be removed soon by tcp_garbage_collect */
4728 continue;
4729 }
3e170ce0 4730
5ba3f43e
A
4731 mptcp_subflow_addref(mpts);
4732 mpts->mpts_socket->so_usecount++;
3e170ce0
A
4733
4734 ret = mptcp_subflow_events(mpte, mpts, &mpsofilt_hint_mask);
39236c6e 4735
39236c6e
A
4736 /*
4737 * If MPTCP socket is closed, disconnect all subflows.
4738 * This will generate a disconnect event which will
4739 * be handled during the next iteration, causing a
4740 * non-zero error to be returned above.
4741 */
0a7de745 4742 if (mp_so->so_flags & SOF_PCBCLEARING) {
5ba3f43e 4743 mptcp_subflow_disconnect(mpte, mpts);
0a7de745 4744 }
39236c6e
A
4745
4746 switch (ret) {
39236c6e
A
4747 case MPTS_EVRET_OK:
4748 /* nothing to do */
4749 break;
4750 case MPTS_EVRET_DELETE:
5ba3f43e 4751 mptcp_subflow_soclose(mpts);
39236c6e
A
4752 break;
4753 case MPTS_EVRET_CONNECT_PENDING:
4754 connect_pending = TRUE;
4755 break;
4756 case MPTS_EVRET_DISCONNECT_FALLBACK:
4757 disconnect_fallback = TRUE;
4758 break;
3e170ce0
A
4759 default:
4760 mptcplog((LOG_DEBUG,
4761 "MPTCP Socket: %s: mptcp_subflow_events "
0a7de745 4762 "returned invalid value: %d\n", __func__,
3e170ce0
A
4763 ret),
4764 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
4765 break;
39236c6e 4766 }
0a7de745 4767 mptcp_subflow_remref(mpts); /* ours */
5ba3f43e
A
4768
4769 VERIFY(mpts->mpts_socket->so_usecount != 0);
4770 mpts->mpts_socket->so_usecount--;
39236c6e
A
4771 }
4772
5ba3f43e 4773 if (mpsofilt_hint_mask != SO_FILT_HINT_LOCKED) {
5ba3f43e
A
4774 VERIFY(mpsofilt_hint_mask & SO_FILT_HINT_LOCKED);
4775
cb323159
A
4776 if (mpsofilt_hint_mask & SO_FILT_HINT_CANTRCVMORE) {
4777 mp_so->so_state |= SS_CANTRCVMORE;
4778 sorwakeup(mp_so);
4779 }
4780
3e170ce0 4781 soevent(mp_so, mpsofilt_hint_mask);
39236c6e
A
4782 }
4783
0a7de745
A
4784 if (!connect_pending && !disconnect_fallback) {
4785 goto exit;
4786 }
39236c6e
A
4787
4788 TAILQ_FOREACH_SAFE(mpts, &mpte->mpte_subflows, mpts_entry, tmpts) {
39236c6e
A
4789 if (disconnect_fallback) {
4790 struct socket *so = NULL;
4791 struct inpcb *inp = NULL;
4792 struct tcpcb *tp = NULL;
4793
0a7de745 4794 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
39236c6e 4795 continue;
0a7de745 4796 }
39236c6e
A
4797
4798 mpts->mpts_flags |= MPTSF_MP_DEGRADED;
4799
0a7de745
A
4800 if (mpts->mpts_flags & (MPTSF_DISCONNECTING |
4801 MPTSF_DISCONNECTED | MPTSF_CONNECT_PENDING)) {
39236c6e 4802 continue;
0a7de745 4803 }
490019cf 4804
39236c6e
A
4805 so = mpts->mpts_socket;
4806
4807 /*
4808 * The MPTCP connection has degraded to a fallback
4809 * mode, so there is no point in keeping this subflow
4810 * regardless of its MPTCP-readiness state, unless it
4811 * is the primary one which we use for fallback. This
4812 * assumes that the subflow used for fallback is the
4813 * ACTIVE one.
4814 */
4815
39236c6e
A
4816 inp = sotoinpcb(so);
4817 tp = intotcpcb(inp);
4818 tp->t_mpflags &=
0a7de745 4819 ~(TMPF_MPTCP_READY | TMPF_MPTCP_TRUE);
39236c6e 4820 tp->t_mpflags |= TMPF_TCP_FALLBACK;
490019cf 4821
5ba3f43e 4822 soevent(so, SO_FILT_HINT_MUSTRST);
39236c6e
A
4823 } else if (connect_pending) {
4824 /*
4825 * The MPTCP connection has progressed to a state
4826 * where it supports full multipath semantics; allow
4827 * additional joins to be attempted for all subflows
4828 * that are in the PENDING state.
4829 */
4830 if (mpts->mpts_flags & MPTSF_CONNECT_PENDING) {
5ba3f43e 4831 int error = mptcp_subflow_soconnectx(mpte, mpts);
39236c6e 4832
0a7de745 4833 if (error) {
5ba3f43e 4834 mptcp_subflow_abort(mpts, error);
0a7de745 4835 }
5ba3f43e 4836 }
39236c6e 4837 }
39236c6e 4838 }
0a7de745
A
4839
4840exit:
4841 if (mpte->mpte_flags & MPTE_WORKLOOP_RELAUNCH) {
4842 goto relaunch;
4843 }
4844
4845 mpte->mpte_flags &= ~MPTE_IN_WORKLOOP;
39236c6e
A
4846}
4847
39236c6e
A
4848/*
4849 * Protocol pr_lock callback.
4850 */
4851int
4852mptcp_lock(struct socket *mp_so, int refcount, void *lr)
4853{
5ba3f43e 4854 struct mppcb *mpp = mpsotomppcb(mp_so);
39236c6e
A
4855 void *lr_saved;
4856
0a7de745 4857 if (lr == NULL) {
39236c6e 4858 lr_saved = __builtin_return_address(0);
0a7de745 4859 } else {
39236c6e 4860 lr_saved = lr;
0a7de745 4861 }
39236c6e
A
4862
4863 if (mpp == NULL) {
4864 panic("%s: so=%p NO PCB! lr=%p lrh= %s\n", __func__,
4865 mp_so, lr_saved, solockhistory_nr(mp_so));
4866 /* NOTREACHED */
4867 }
5ba3f43e 4868 mpp_lock(mpp);
39236c6e
A
4869
4870 if (mp_so->so_usecount < 0) {
4871 panic("%s: so=%p so_pcb=%p lr=%p ref=%x lrh= %s\n", __func__,
4872 mp_so, mp_so->so_pcb, lr_saved, mp_so->so_usecount,
4873 solockhistory_nr(mp_so));
4874 /* NOTREACHED */
4875 }
0a7de745 4876 if (refcount != 0) {
39236c6e 4877 mp_so->so_usecount++;
cb323159 4878 mpp->mpp_inside++;
0a7de745 4879 }
39236c6e
A
4880 mp_so->lock_lr[mp_so->next_lock_lr] = lr_saved;
4881 mp_so->next_lock_lr = (mp_so->next_lock_lr + 1) % SO_LCKDBG_MAX;
4882
0a7de745 4883 return 0;
39236c6e
A
4884}
4885
4886/*
4887 * Protocol pr_unlock callback.
4888 */
4889int
5ba3f43e 4890mptcp_unlock(struct socket *mp_so, int refcount, void *lr)
39236c6e 4891{
5ba3f43e
A
4892 struct mppcb *mpp = mpsotomppcb(mp_so);
4893 void *lr_saved;
39236c6e 4894
0a7de745 4895 if (lr == NULL) {
5ba3f43e 4896 lr_saved = __builtin_return_address(0);
0a7de745 4897 } else {
5ba3f43e 4898 lr_saved = lr;
0a7de745 4899 }
39236c6e 4900
5ba3f43e
A
4901 if (mpp == NULL) {
4902 panic("%s: so=%p NO PCB usecount=%x lr=%p lrh= %s\n", __func__,
4903 mp_so, mp_so->so_usecount, lr_saved,
4904 solockhistory_nr(mp_so));
4905 /* NOTREACHED */
4906 }
cb323159 4907 socket_lock_assert_owned(mp_so);
39236c6e 4908
0a7de745 4909 if (refcount != 0) {
5ba3f43e 4910 mp_so->so_usecount--;
cb323159 4911 mpp->mpp_inside--;
0a7de745 4912 }
39236c6e 4913
5ba3f43e
A
4914 if (mp_so->so_usecount < 0) {
4915 panic("%s: so=%p usecount=%x lrh= %s\n", __func__,
4916 mp_so, mp_so->so_usecount, solockhistory_nr(mp_so));
4917 /* NOTREACHED */
39236c6e 4918 }
cb323159
A
4919 if (mpp->mpp_inside < 0) {
4920 panic("%s: mpp=%p inside=%x lrh= %s\n", __func__,
4921 mpp, mpp->mpp_inside, solockhistory_nr(mp_so));
4922 /* NOTREACHED */
4923 }
5ba3f43e
A
4924 mp_so->unlock_lr[mp_so->next_unlock_lr] = lr_saved;
4925 mp_so->next_unlock_lr = (mp_so->next_unlock_lr + 1) % SO_LCKDBG_MAX;
4926 mpp_unlock(mpp);
4927
0a7de745 4928 return 0;
39236c6e
A
4929}
4930
5ba3f43e
A
4931/*
4932 * Protocol pr_getlock callback.
4933 */
4934lck_mtx_t *
4935mptcp_getlock(struct socket *mp_so, int flags)
39236c6e 4936{
5ba3f43e
A
4937 struct mppcb *mpp = mpsotomppcb(mp_so);
4938
4939 if (mpp == NULL) {
4940 panic("%s: so=%p NULL so_pcb %s\n", __func__, mp_so,
4941 solockhistory_nr(mp_so));
39236c6e
A
4942 /* NOTREACHED */
4943 }
5ba3f43e
A
4944 if (mp_so->so_usecount < 0) {
4945 panic("%s: so=%p usecount=%x lrh= %s\n", __func__,
4946 mp_so, mp_so->so_usecount, solockhistory_nr(mp_so));
4947 /* NOTREACHED */
39236c6e 4948 }
0a7de745 4949 return mpp_getlock(mpp, flags);
39236c6e
A
4950}
4951
4952/*
4953 * MPTCP Join support
4954 */
4955
4956static void
cb323159 4957mptcp_attach_to_subf(struct socket *so, struct mptcb *mp_tp, uint8_t addr_id)
39236c6e
A
4958{
4959 struct tcpcb *tp = sototcpcb(so);
4960 struct mptcp_subf_auth_entry *sauth_entry;
39236c6e 4961
39236c6e 4962 /*
39236c6e
A
4963 * The address ID of the first flow is implicitly 0.
4964 */
4965 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
4966 tp->t_local_aid = 0;
4967 } else {
fe8ab488 4968 tp->t_local_aid = addr_id;
39236c6e
A
4969 tp->t_mpflags |= (TMPF_PREESTABLISHED | TMPF_JOINED_FLOW);
4970 so->so_flags |= SOF_MP_SEC_SUBFLOW;
4971 }
4972 sauth_entry = zalloc(mpt_subauth_zone);
4973 sauth_entry->msae_laddr_id = tp->t_local_aid;
4974 sauth_entry->msae_raddr_id = 0;
4975 sauth_entry->msae_raddr_rand = 0;
4976try_again:
4977 sauth_entry->msae_laddr_rand = RandomULong();
0a7de745 4978 if (sauth_entry->msae_laddr_rand == 0) {
39236c6e 4979 goto try_again;
0a7de745 4980 }
39236c6e
A
4981 LIST_INSERT_HEAD(&mp_tp->mpt_subauth_list, sauth_entry, msae_next);
4982}
4983
4984static void
4985mptcp_detach_mptcb_from_subf(struct mptcb *mp_tp, struct socket *so)
4986{
4987 struct mptcp_subf_auth_entry *sauth_entry;
fe8ab488 4988 struct tcpcb *tp = NULL;
39236c6e
A
4989 int found = 0;
4990
fe8ab488 4991 tp = sototcpcb(so);
0a7de745 4992 if (tp == NULL) {
39236c6e 4993 return;
0a7de745 4994 }
39236c6e 4995
39236c6e
A
4996 LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
4997 if (sauth_entry->msae_laddr_id == tp->t_local_aid) {
4998 found = 1;
4999 break;
5000 }
5001 }
5002 if (found) {
5003 LIST_REMOVE(sauth_entry, msae_next);
39236c6e 5004 }
fe8ab488 5005
0a7de745 5006 if (found) {
3e170ce0 5007 zfree(mpt_subauth_zone, sauth_entry);
0a7de745 5008 }
39236c6e
A
5009}
5010
5011void
5012mptcp_get_rands(mptcp_addr_id addr_id, struct mptcb *mp_tp, u_int32_t *lrand,
5013 u_int32_t *rrand)
5014{
5015 struct mptcp_subf_auth_entry *sauth_entry;
39236c6e 5016
39236c6e
A
5017 LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
5018 if (sauth_entry->msae_laddr_id == addr_id) {
0a7de745 5019 if (lrand) {
39236c6e 5020 *lrand = sauth_entry->msae_laddr_rand;
0a7de745
A
5021 }
5022 if (rrand) {
39236c6e 5023 *rrand = sauth_entry->msae_raddr_rand;
0a7de745 5024 }
39236c6e
A
5025 break;
5026 }
5027 }
39236c6e
A
5028}
5029
5030void
5031mptcp_set_raddr_rand(mptcp_addr_id laddr_id, struct mptcb *mp_tp,
5032 mptcp_addr_id raddr_id, u_int32_t raddr_rand)
5033{
5034 struct mptcp_subf_auth_entry *sauth_entry;
39236c6e 5035
39236c6e
A
5036 LIST_FOREACH(sauth_entry, &mp_tp->mpt_subauth_list, msae_next) {
5037 if (sauth_entry->msae_laddr_id == laddr_id) {
5038 if ((sauth_entry->msae_raddr_id != 0) &&
5039 (sauth_entry->msae_raddr_id != raddr_id)) {
cb323159
A
5040 os_log_error(mptcp_log_handle, "%s - %lx: mismatched"
5041 " address ids %d %d \n", __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
5042 raddr_id, sauth_entry->msae_raddr_id);
39236c6e
A
5043 return;
5044 }
5045 sauth_entry->msae_raddr_id = raddr_id;
5046 if ((sauth_entry->msae_raddr_rand != 0) &&
5047 (sauth_entry->msae_raddr_rand != raddr_rand)) {
cb323159
A
5048 os_log_error(mptcp_log_handle, "%s - %lx: "
5049 "dup SYN_ACK %d %d \n",
5050 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte),
5051 raddr_rand, sauth_entry->msae_raddr_rand);
39236c6e
A
5052 return;
5053 }
5054 sauth_entry->msae_raddr_rand = raddr_rand;
39236c6e
A
5055 return;
5056 }
5057 }
39236c6e
A
5058}
5059
5060/*
5061 * SHA1 support for MPTCP
5062 */
5ba3f43e
A
5063static void
5064mptcp_do_sha1(mptcp_key_t *key, char *sha_digest)
39236c6e
A
5065{
5066 SHA1_CTX sha1ctxt;
5067 const unsigned char *sha1_base;
5068 int sha1_size;
5069
39236c6e 5070 sha1_base = (const unsigned char *) key;
0a7de745 5071 sha1_size = sizeof(mptcp_key_t);
39236c6e
A
5072 SHA1Init(&sha1ctxt);
5073 SHA1Update(&sha1ctxt, sha1_base, sha1_size);
5074 SHA1Final(sha_digest, &sha1ctxt);
39236c6e
A
5075}
5076
5077void
5078mptcp_hmac_sha1(mptcp_key_t key1, mptcp_key_t key2,
0a7de745 5079 u_int32_t rand1, u_int32_t rand2, u_char *digest)
39236c6e
A
5080{
5081 SHA1_CTX sha1ctxt;
5082 mptcp_key_t key_ipad[8] = {0}; /* key XOR'd with inner pad */
5083 mptcp_key_t key_opad[8] = {0}; /* key XOR'd with outer pad */
5084 u_int32_t data[2];
5085 int i;
5086
5ba3f43e 5087 bzero(digest, SHA1_RESULTLEN);
39236c6e
A
5088
5089 /* Set up the Key for HMAC */
5090 key_ipad[0] = key1;
5091 key_ipad[1] = key2;
5092
5093 key_opad[0] = key1;
5094 key_opad[1] = key2;
5095
5096 /* Set up the message for HMAC */
5097 data[0] = rand1;
5098 data[1] = rand2;
5099
5100 /* Key is 512 block length, so no need to compute hash */
5101
5102 /* Compute SHA1(Key XOR opad, SHA1(Key XOR ipad, data)) */
5103
5104 for (i = 0; i < 8; i++) {
5105 key_ipad[i] ^= 0x3636363636363636;
5106 key_opad[i] ^= 0x5c5c5c5c5c5c5c5c;
5107 }
5108
5109 /* Perform inner SHA1 */
5110 SHA1Init(&sha1ctxt);
0a7de745
A
5111 SHA1Update(&sha1ctxt, (unsigned char *)key_ipad, sizeof(key_ipad));
5112 SHA1Update(&sha1ctxt, (unsigned char *)data, sizeof(data));
39236c6e
A
5113 SHA1Final(digest, &sha1ctxt);
5114
5115 /* Perform outer SHA1 */
5116 SHA1Init(&sha1ctxt);
0a7de745 5117 SHA1Update(&sha1ctxt, (unsigned char *)key_opad, sizeof(key_opad));
39236c6e
A
5118 SHA1Update(&sha1ctxt, (unsigned char *)digest, SHA1_RESULTLEN);
5119 SHA1Final(digest, &sha1ctxt);
5120}
5121
5122/*
5123 * corresponds to MAC-B = MAC (Key=(Key-B+Key-A), Msg=(R-B+R-A))
5124 * corresponds to MAC-A = MAC (Key=(Key-A+Key-B), Msg=(R-A+R-B))
5125 */
5126void
5ba3f43e 5127mptcp_get_hmac(mptcp_addr_id aid, struct mptcb *mp_tp, u_char *digest)
39236c6e
A
5128{
5129 uint32_t lrand, rrand;
39236c6e 5130
39236c6e
A
5131 lrand = rrand = 0;
5132 mptcp_get_rands(aid, mp_tp, &lrand, &rrand);
5ba3f43e
A
5133 mptcp_hmac_sha1(mp_tp->mpt_localkey, mp_tp->mpt_remotekey, lrand, rrand,
5134 digest);
39236c6e
A
5135}
5136
5137/*
5138 * Authentication data generation
5139 */
5ba3f43e 5140static void
39236c6e
A
5141mptcp_generate_token(char *sha_digest, int sha_digest_len, caddr_t token,
5142 int token_len)
5143{
0a7de745 5144 VERIFY(token_len == sizeof(u_int32_t));
39236c6e
A
5145 VERIFY(sha_digest_len == SHA1_RESULTLEN);
5146
5147 /* Most significant 32 bits of the SHA1 hash */
0a7de745 5148 bcopy(sha_digest, token, sizeof(u_int32_t));
490019cf 5149 return;
39236c6e
A
5150}
5151
5ba3f43e 5152static void
39236c6e
A
5153mptcp_generate_idsn(char *sha_digest, int sha_digest_len, caddr_t idsn,
5154 int idsn_len)
5155{
0a7de745 5156 VERIFY(idsn_len == sizeof(u_int64_t));
39236c6e
A
5157 VERIFY(sha_digest_len == SHA1_RESULTLEN);
5158
5159 /*
5160 * Least significant 64 bits of the SHA1 hash
5161 */
5162
5163 idsn[7] = sha_digest[12];
5164 idsn[6] = sha_digest[13];
5165 idsn[5] = sha_digest[14];
5166 idsn[4] = sha_digest[15];
5167 idsn[3] = sha_digest[16];
5168 idsn[2] = sha_digest[17];
5169 idsn[1] = sha_digest[18];
5170 idsn[0] = sha_digest[19];
490019cf 5171 return;
39236c6e
A
5172}
5173
490019cf
A
5174static void
5175mptcp_conn_properties(struct mptcb *mp_tp)
5176{
5177 /* There is only Version 0 at this time */
5178 mp_tp->mpt_version = MPTCP_STD_VERSION_0;
5179
5180 /* Set DSS checksum flag */
0a7de745 5181 if (mptcp_dss_csum) {
490019cf 5182 mp_tp->mpt_flags |= MPTCPF_CHECKSUM;
0a7de745 5183 }
490019cf
A
5184
5185 /* Set up receive window */
5186 mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
5187
5188 /* Set up gc ticks */
5189 mp_tp->mpt_gc_ticks = MPT_GC_TICKS;
5190}
5191
5192static void
5ba3f43e 5193mptcp_init_local_parms(struct mptses *mpte)
39236c6e 5194{
5ba3f43e
A
5195 struct mptcb *mp_tp = mpte->mpte_mptcb;
5196 char key_digest[SHA1_RESULTLEN];
490019cf 5197
5ba3f43e
A
5198 read_frandom(&mp_tp->mpt_localkey, sizeof(mp_tp->mpt_localkey));
5199 mptcp_do_sha1(&mp_tp->mpt_localkey, key_digest);
5200
5201 mptcp_generate_token(key_digest, SHA1_RESULTLEN,
0a7de745 5202 (caddr_t)&mp_tp->mpt_localtoken, sizeof(mp_tp->mpt_localtoken));
5ba3f43e 5203 mptcp_generate_idsn(key_digest, SHA1_RESULTLEN,
0a7de745 5204 (caddr_t)&mp_tp->mpt_local_idsn, sizeof(u_int64_t));
490019cf
A
5205
5206 /* The subflow SYN is also first MPTCP byte */
5207 mp_tp->mpt_snduna = mp_tp->mpt_sndmax = mp_tp->mpt_local_idsn + 1;
5208 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
5209
5210 mptcp_conn_properties(mp_tp);
5211}
5212
5213int
5214mptcp_init_remote_parms(struct mptcb *mp_tp)
5215{
5ba3f43e 5216 char remote_digest[SHA1_RESULTLEN];
39236c6e
A
5217
5218 /* Only Version 0 is supported for auth purposes */
0a7de745
A
5219 if (mp_tp->mpt_version != MPTCP_STD_VERSION_0) {
5220 return -1;
5221 }
39236c6e
A
5222
5223 /* Setup local and remote tokens and Initial DSNs */
5ba3f43e 5224 mptcp_do_sha1(&mp_tp->mpt_remotekey, remote_digest);
39236c6e 5225 mptcp_generate_token(remote_digest, SHA1_RESULTLEN,
0a7de745 5226 (caddr_t)&mp_tp->mpt_remotetoken, sizeof(mp_tp->mpt_remotetoken));
39236c6e 5227 mptcp_generate_idsn(remote_digest, SHA1_RESULTLEN,
0a7de745 5228 (caddr_t)&mp_tp->mpt_remote_idsn, sizeof(u_int64_t));
5ba3f43e 5229 mp_tp->mpt_rcvnxt = mp_tp->mpt_remote_idsn + 1;
cb323159 5230 mp_tp->mpt_rcvadv = mp_tp->mpt_rcvnxt + mp_tp->mpt_rcvwnd;
39236c6e 5231
0a7de745 5232 return 0;
39236c6e
A
5233}
5234
5ba3f43e 5235static void
39236c6e
A
5236mptcp_send_dfin(struct socket *so)
5237{
5238 struct tcpcb *tp = NULL;
5239 struct inpcb *inp = NULL;
5240
5241 inp = sotoinpcb(so);
0a7de745 5242 if (!inp) {
39236c6e 5243 return;
0a7de745 5244 }
39236c6e
A
5245
5246 tp = intotcpcb(inp);
0a7de745 5247 if (!tp) {
39236c6e 5248 return;
0a7de745 5249 }
39236c6e 5250
0a7de745 5251 if (!(tp->t_mpflags & TMPF_RESET)) {
39236c6e 5252 tp->t_mpflags |= TMPF_SEND_DFIN;
0a7de745 5253 }
39236c6e
A
5254}
5255
5256/*
5257 * Data Sequence Mapping routines
5258 */
5259void
5260mptcp_insert_dsn(struct mppcb *mpp, struct mbuf *m)
5261{
5262 struct mptcb *mp_tp;
5263
0a7de745 5264 if (m == NULL) {
39236c6e 5265 return;
0a7de745 5266 }
39236c6e 5267
3e170ce0 5268 __IGNORE_WCASTALIGN(mp_tp = &((struct mpp_mtp *)mpp)->mtcb);
5ba3f43e 5269
39236c6e
A
5270 while (m) {
5271 VERIFY(m->m_flags & M_PKTHDR);
5272 m->m_pkthdr.pkt_flags |= (PKTF_MPTCP | PKTF_MPSO);
5273 m->m_pkthdr.mp_dsn = mp_tp->mpt_sndmax;
5274 m->m_pkthdr.mp_rlen = m_pktlen(m);
5275 mp_tp->mpt_sndmax += m_pktlen(m);
5276 m = m->m_next;
5277 }
5ba3f43e
A
5278}
5279
5280void
5281mptcp_fallback_sbdrop(struct socket *so, struct mbuf *m, int len)
5282{
5283 struct mptcb *mp_tp = tptomptp(sototcpcb(so));
5284 uint64_t data_ack;
5285 uint64_t dsn;
5286
0a7de745 5287 if (!m || len == 0) {
5ba3f43e 5288 return;
0a7de745 5289 }
5ba3f43e
A
5290
5291 while (m && len > 0) {
5292 VERIFY(m->m_flags & M_PKTHDR);
5293 VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
5294
5295 data_ack = m->m_pkthdr.mp_dsn + m->m_pkthdr.mp_rlen;
5296 dsn = m->m_pkthdr.mp_dsn;
5297
5298 len -= m->m_len;
5299 m = m->m_next;
5300 }
5301
5302 if (m && len == 0) {
5303 /*
5304 * If there is one more mbuf in the chain, it automatically means
5305 * that up to m->mp_dsn has been ack'ed.
5306 *
5307 * This means, we actually correct data_ack back down (compared
5308 * to what we set inside the loop - dsn + data_len). Because in
5309 * the loop we are "optimistic" and assume that the full mapping
5310 * will be acked. If that's not the case and we get out of the
5311 * loop with m != NULL, it means only up to m->mp_dsn has been
5312 * really acked.
5313 */
5314 data_ack = m->m_pkthdr.mp_dsn;
5315 }
5316
5317 if (len < 0) {
5318 /*
5319 * If len is negative, meaning we acked in the middle of an mbuf,
5320 * only up to this mbuf's data-sequence number has been acked
5321 * at the MPTCP-level.
5322 */
5323 data_ack = dsn;
5324 }
5325
5326 mptcplog((LOG_DEBUG, "%s inferred ack up to %u\n", __func__, (uint32_t)data_ack),
0a7de745 5327 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
cb323159
A
5328
5329 /* We can have data in the subflow's send-queue that is being acked,
5330 * while the DATA_ACK has already advanced. Thus, we should check whether
5331 * or not the DATA_ACK is actually new here.
5332 */
5333 if (MPTCP_SEQ_LEQ(data_ack, mp_tp->mpt_sndmax) &&
5334 MPTCP_SEQ_GEQ(data_ack, mp_tp->mpt_snduna)) {
5335 mptcp_data_ack_rcvd(mp_tp, sototcpcb(so), data_ack);
5336 }
39236c6e
A
5337}
5338
5339void
490019cf 5340mptcp_preproc_sbdrop(struct socket *so, struct mbuf *m, unsigned int len)
39236c6e 5341{
490019cf
A
5342 int rewinding = 0;
5343
5ba3f43e
A
5344 /* TFO makes things complicated. */
5345 if (so->so_flags1 & SOF1_TFO_REWIND) {
5346 rewinding = 1;
5347 so->so_flags1 &= ~SOF1_TFO_REWIND;
490019cf 5348 }
39236c6e 5349
5ba3f43e
A
5350 while (m && (!(so->so_flags & SOF_MP_SUBFLOW) || rewinding)) {
5351 u_int32_t sub_len;
39236c6e 5352 VERIFY(m->m_flags & M_PKTHDR);
5ba3f43e 5353 VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
39236c6e 5354
5ba3f43e 5355 sub_len = m->m_pkthdr.mp_rlen;
39236c6e 5356
5ba3f43e
A
5357 if (sub_len < len) {
5358 m->m_pkthdr.mp_dsn += sub_len;
5359 if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
5360 m->m_pkthdr.mp_rseq += sub_len;
39236c6e 5361 }
5ba3f43e
A
5362 m->m_pkthdr.mp_rlen = 0;
5363 len -= sub_len;
39236c6e 5364 } else {
5ba3f43e 5365 /* sub_len >= len */
0a7de745 5366 if (rewinding == 0) {
5ba3f43e 5367 m->m_pkthdr.mp_dsn += len;
0a7de745 5368 }
5ba3f43e 5369 if (!(m->m_pkthdr.pkt_flags & PKTF_MPSO)) {
0a7de745 5370 if (rewinding == 0) {
5ba3f43e 5371 m->m_pkthdr.mp_rseq += len;
0a7de745 5372 }
5ba3f43e
A
5373 }
5374 mptcplog((LOG_DEBUG, "%s: dsn %u ssn %u len %d %d\n",
5375 __func__, (u_int32_t)m->m_pkthdr.mp_dsn,
5376 m->m_pkthdr.mp_rseq, m->m_pkthdr.mp_rlen, len),
5377 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
5378 m->m_pkthdr.mp_rlen -= len;
5379 break;
39236c6e
A
5380 }
5381 m = m->m_next;
5382 }
39037602
A
5383
5384 if (so->so_flags & SOF_MP_SUBFLOW &&
5385 !(sototcpcb(so)->t_mpflags & TMPF_TFO_REQUEST) &&
5386 !(sototcpcb(so)->t_mpflags & TMPF_RCVD_DACK)) {
5387 /*
5388 * Received an ack without receiving a DATA_ACK.
5389 * Need to fallback to regular TCP (or destroy this subflow).
5390 */
5ba3f43e 5391 sototcpcb(so)->t_mpflags |= TMPF_INFIN_SENT;
39037602
A
5392 mptcp_notify_mpfail(so);
5393 }
39236c6e
A
5394}
5395
5396/* Obtain the DSN mapping stored in the mbuf */
5397void
5ba3f43e
A
5398mptcp_output_getm_dsnmap32(struct socket *so, int off,
5399 uint32_t *dsn, uint32_t *relseq, uint16_t *data_len, uint16_t *dss_csum)
39236c6e
A
5400{
5401 u_int64_t dsn64;
5402
5ba3f43e 5403 mptcp_output_getm_dsnmap64(so, off, &dsn64, relseq, data_len, dss_csum);
39236c6e 5404 *dsn = (u_int32_t)MPTCP_DATASEQ_LOW32(dsn64);
39236c6e
A
5405}
5406
5407void
5ba3f43e 5408mptcp_output_getm_dsnmap64(struct socket *so, int off, uint64_t *dsn,
0a7de745
A
5409 uint32_t *relseq, uint16_t *data_len,
5410 uint16_t *dss_csum)
39236c6e
A
5411{
5412 struct mbuf *m = so->so_snd.sb_mb;
5ba3f43e 5413 int off_orig = off;
39236c6e 5414
5ba3f43e 5415 VERIFY(off >= 0);
39236c6e 5416
4ba76501
A
5417 if (m == NULL && (so->so_flags & SOF_DEFUNCT)) {
5418 *dsn = 0;
5419 *relseq = 0;
5420 *data_len = 0;
5421 *dss_csum = 0;
5422 return;
5423 }
5424
39236c6e
A
5425 /*
5426 * In the subflow socket, the DSN sequencing can be discontiguous,
5427 * but the subflow sequence mapping is contiguous. Use the subflow
5428 * sequence property to find the right mbuf and corresponding dsn
5429 * mapping.
5430 */
5431
5432 while (m) {
39236c6e 5433 VERIFY(m->m_flags & M_PKTHDR);
5ba3f43e 5434 VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
39236c6e 5435
5ba3f43e
A
5436 if (off >= m->m_len) {
5437 off -= m->m_len;
39236c6e
A
5438 m = m->m_next;
5439 } else {
5440 break;
5441 }
5442 }
5443
5ba3f43e
A
5444 VERIFY(off >= 0);
5445 VERIFY(m->m_pkthdr.mp_rlen <= UINT16_MAX);
39236c6e 5446
5ba3f43e
A
5447 *dsn = m->m_pkthdr.mp_dsn;
5448 *relseq = m->m_pkthdr.mp_rseq;
5449 *data_len = m->m_pkthdr.mp_rlen;
5450 *dss_csum = m->m_pkthdr.mp_csum;
39236c6e 5451
5ba3f43e 5452 mptcplog((LOG_DEBUG, "%s: dsn %u ssn %u data_len %d off %d off_orig %d\n",
0a7de745
A
5453 __func__, (u_int32_t)(*dsn), *relseq, *data_len, off, off_orig),
5454 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e
A
5455}
5456
5457/*
3e170ce0
A
5458 * Note that this is called only from tcp_input() via mptcp_input_preproc()
5459 * tcp_input() may trim data after the dsn mapping is inserted into the mbuf.
5460 * When it trims data tcp_input calls m_adj() which does not remove the
5461 * m_pkthdr even if the m_len becomes 0 as a result of trimming the mbuf.
5462 * The dsn map insertion cannot be delayed after trim, because data can be in
5463 * the reassembly queue for a while and the DSN option info in tp will be
5464 * overwritten for every new packet received.
39236c6e
A
5465 * The dsn map will be adjusted just prior to appending to subflow sockbuf
5466 * with mptcp_adj_rmap()
5467 */
5468void
5c9f4661 5469mptcp_insert_rmap(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th)
39236c6e 5470{
5c9f4661 5471 VERIFY(m->m_flags & M_PKTHDR);
39236c6e
A
5472 VERIFY(!(m->m_pkthdr.pkt_flags & PKTF_MPTCP));
5473
5474 if (tp->t_mpflags & TMPF_EMBED_DSN) {
39236c6e
A
5475 m->m_pkthdr.mp_dsn = tp->t_rcv_map.mpt_dsn;
5476 m->m_pkthdr.mp_rseq = tp->t_rcv_map.mpt_sseq;
5477 m->m_pkthdr.mp_rlen = tp->t_rcv_map.mpt_len;
5ba3f43e 5478 m->m_pkthdr.mp_csum = tp->t_rcv_map.mpt_csum;
0a7de745 5479 if (tp->t_rcv_map.mpt_dfin) {
5c9f4661 5480 m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN;
0a7de745 5481 }
5c9f4661 5482
39236c6e 5483 m->m_pkthdr.pkt_flags |= PKTF_MPTCP;
5c9f4661 5484
39236c6e
A
5485 tp->t_mpflags &= ~TMPF_EMBED_DSN;
5486 tp->t_mpflags |= TMPF_MPTCP_ACKNOW;
5c9f4661 5487 } else if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
0a7de745 5488 if (th->th_flags & TH_FIN) {
5c9f4661 5489 m->m_pkthdr.pkt_flags |= PKTF_MPTCP_DFIN;
0a7de745 5490 }
39236c6e
A
5491 }
5492}
5493
39236c6e
A
5494/*
5495 * Following routines help with failure detection and failover of data
5496 * transfer from one subflow to another.
5497 */
5498void
5499mptcp_act_on_txfail(struct socket *so)
5500{
5501 struct tcpcb *tp = NULL;
5502 struct inpcb *inp = sotoinpcb(so);
5503
0a7de745 5504 if (inp == NULL) {
39236c6e 5505 return;
0a7de745 5506 }
39236c6e
A
5507
5508 tp = intotcpcb(inp);
0a7de745 5509 if (tp == NULL) {
39236c6e 5510 return;
0a7de745 5511 }
39236c6e 5512
0a7de745 5513 if (so->so_flags & SOF_MP_TRYFAILOVER) {
39236c6e 5514 return;
0a7de745 5515 }
39236c6e
A
5516
5517 so->so_flags |= SOF_MP_TRYFAILOVER;
5518 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPFAILOVER));
5519}
5520
5521/*
5522 * Support for MP_FAIL option
5523 */
5524int
5525mptcp_get_map_for_dsn(struct socket *so, u_int64_t dsn_fail, u_int32_t *tcp_seq)
5526{
5527 struct mbuf *m = so->so_snd.sb_mb;
5528 u_int64_t dsn;
5529 int off = 0;
5530 u_int32_t datalen;
5531
0a7de745
A
5532 if (m == NULL) {
5533 return -1;
5534 }
39236c6e
A
5535
5536 while (m != NULL) {
5537 VERIFY(m->m_pkthdr.pkt_flags & PKTF_MPTCP);
5538 VERIFY(m->m_flags & M_PKTHDR);
5539 dsn = m->m_pkthdr.mp_dsn;
5540 datalen = m->m_pkthdr.mp_rlen;
5541 if (MPTCP_SEQ_LEQ(dsn, dsn_fail) &&
5542 (MPTCP_SEQ_GEQ(dsn + datalen, dsn_fail))) {
5543 off = dsn_fail - dsn;
5544 *tcp_seq = m->m_pkthdr.mp_rseq + off;
5ba3f43e 5545 mptcplog((LOG_DEBUG, "%s: %llu %llu \n", __func__, dsn,
0a7de745
A
5546 dsn_fail), MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
5547 return 0;
39236c6e
A
5548 }
5549
5550 m = m->m_next;
5551 }
5552
5553 /*
5554 * If there was no mbuf data and a fallback to TCP occurred, there's
5555 * not much else to do.
5556 */
5557
cb323159 5558 os_log_error(mptcp_log_handle, "%s: %llu not found \n", __func__, dsn_fail);
0a7de745 5559 return -1;
5ba3f43e
A
5560}
5561
5562/*
5563 * Support for sending contiguous MPTCP bytes in subflow
5564 * Also for preventing sending data with ACK in 3-way handshake
5565 */
5566int32_t
5567mptcp_adj_sendlen(struct socket *so, int32_t off)
5568{
5569 struct tcpcb *tp = sototcpcb(so);
5570 struct mptsub *mpts = tp->t_mpsub;
5571 uint64_t mdss_dsn;
5572 uint32_t mdss_subflow_seq;
5573 int mdss_subflow_off;
5574 uint16_t mdss_data_len;
5575 uint16_t dss_csum;
5576
4ba76501
A
5577 if (so->so_snd.sb_mb == NULL && (so->so_flags & SOF_DEFUNCT)) {
5578 return 0;
5579 }
5580
5ba3f43e 5581 mptcp_output_getm_dsnmap64(so, off, &mdss_dsn, &mdss_subflow_seq,
0a7de745 5582 &mdss_data_len, &dss_csum);
5ba3f43e
A
5583
5584 /*
5585 * We need to compute how much of the mapping still remains.
5586 * So, we compute the offset in the send-buffer of the dss-sub-seq.
5587 */
5588 mdss_subflow_off = (mdss_subflow_seq + mpts->mpts_iss) - tp->snd_una;
5589
5590 /*
5591 * When TFO is used, we are sending the mpts->mpts_iss although the relative
5592 * seq has been set to 1 (while it should be 0).
5593 */
0a7de745 5594 if (tp->t_mpflags & TMPF_TFO_REQUEST) {
5ba3f43e 5595 mdss_subflow_off--;
0a7de745 5596 }
5ba3f43e 5597
5ba3f43e
A
5598 VERIFY(off >= mdss_subflow_off);
5599
0a7de745 5600 return mdss_data_len - (off - mdss_subflow_off);
5ba3f43e
A
5601}
5602
5603static uint32_t
5604mptcp_get_maxseg(struct mptses *mpte)
5605{
5606 struct mptsub *mpts;
5607 uint32_t maxseg = 0;
5608
5609 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
5610 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
5611
5612 if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 5613 TCPS_HAVERCVDFIN2(tp->t_state)) {
5ba3f43e 5614 continue;
0a7de745 5615 }
5ba3f43e 5616
0a7de745 5617 if (tp->t_maxseg > maxseg) {
5ba3f43e 5618 maxseg = tp->t_maxseg;
0a7de745 5619 }
5ba3f43e
A
5620 }
5621
0a7de745 5622 return maxseg;
5ba3f43e
A
5623}
5624
5625static uint8_t
5626mptcp_get_rcvscale(struct mptses *mpte)
5627{
5628 struct mptsub *mpts;
5629 uint8_t rcvscale = UINT8_MAX;
5630
5631 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
5632 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
5633
5634 if (!TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 5635 TCPS_HAVERCVDFIN2(tp->t_state)) {
5ba3f43e 5636 continue;
0a7de745 5637 }
5ba3f43e 5638
0a7de745 5639 if (tp->rcv_scale < rcvscale) {
5ba3f43e 5640 rcvscale = tp->rcv_scale;
0a7de745 5641 }
5ba3f43e
A
5642 }
5643
0a7de745 5644 return rcvscale;
5ba3f43e
A
5645}
5646
5647/* Similar to tcp_sbrcv_reserve */
5648static void
5649mptcp_sbrcv_reserve(struct mptcb *mp_tp, struct sockbuf *sbrcv,
0a7de745 5650 u_int32_t newsize, u_int32_t idealsize)
5ba3f43e
A
5651{
5652 uint8_t rcvscale = mptcp_get_rcvscale(mp_tp->mpt_mpte);
5653
5654 /* newsize should not exceed max */
5655 newsize = min(newsize, tcp_autorcvbuf_max);
5656
5657 /* The receive window scale negotiated at the
5658 * beginning of the connection will also set a
5659 * limit on the socket buffer size
5660 */
5661 newsize = min(newsize, TCP_MAXWIN << rcvscale);
5662
5663 /* Set new socket buffer size */
5664 if (newsize > sbrcv->sb_hiwat &&
0a7de745 5665 (sbreserve(sbrcv, newsize) == 1)) {
5ba3f43e
A
5666 sbrcv->sb_idealsize = min(max(sbrcv->sb_idealsize,
5667 (idealsize != 0) ? idealsize : newsize), tcp_autorcvbuf_max);
5668
5669 /* Again check the limit set by the advertised
5670 * window scale
5671 */
5672 sbrcv->sb_idealsize = min(sbrcv->sb_idealsize,
0a7de745 5673 TCP_MAXWIN << rcvscale);
5ba3f43e
A
5674 }
5675}
5676
5677void
5678mptcp_sbrcv_grow(struct mptcb *mp_tp)
5679{
5680 struct mptses *mpte = mp_tp->mpt_mpte;
5681 struct socket *mp_so = mpte->mpte_mppcb->mpp_socket;
5682 struct sockbuf *sbrcv = &mp_so->so_rcv;
5683 uint32_t hiwat_sum = 0;
5684 uint32_t ideal_sum = 0;
5685 struct mptsub *mpts;
5686
5687 /*
5688 * Do not grow the receive socket buffer if
5689 * - auto resizing is disabled, globally or on this socket
5690 * - the high water mark already reached the maximum
5691 * - the stream is in background and receive side is being
5692 * throttled
5693 * - if there are segments in reassembly queue indicating loss,
5694 * do not need to increase recv window during recovery as more
5695 * data is not going to be sent. A duplicate ack sent during
5696 * recovery should not change the receive window
5697 */
5698 if (tcp_do_autorcvbuf == 0 ||
5699 (sbrcv->sb_flags & SB_AUTOSIZE) == 0 ||
5700 tcp_cansbgrow(sbrcv) == 0 ||
5701 sbrcv->sb_hiwat >= tcp_autorcvbuf_max ||
5702 (mp_so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) ||
5703 !LIST_EMPTY(&mp_tp->mpt_segq)) {
5704 /* Can not resize the socket buffer, just return */
5705 return;
5706 }
5707
5708 /*
5709 * Ideally, we want the rbuf to be (sum_i {bw_i} * rtt_max * 2)
5710 *
5711 * But, for this we first need accurate receiver-RTT estimations, which
5712 * we currently don't have.
5713 *
5714 * Let's use a dummy algorithm for now, just taking the sum of all
5715 * subflow's receive-buffers. It's too low, but that's all we can get
5716 * for now.
5717 */
5718
5719 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
5720 hiwat_sum += mpts->mpts_socket->so_rcv.sb_hiwat;
5721 ideal_sum += mpts->mpts_socket->so_rcv.sb_idealsize;
5722 }
5723
5724 mptcp_sbrcv_reserve(mp_tp, sbrcv, hiwat_sum, ideal_sum);
39236c6e
A
5725}
5726
5727/*
5ba3f43e
A
5728 * Determine if we can grow the recieve socket buffer to avoid sending
5729 * a zero window update to the peer. We allow even socket buffers that
5730 * have fixed size (set by the application) to grow if the resource
5731 * constraints are met. They will also be trimmed after the application
5732 * reads data.
5733 *
5734 * Similar to tcp_sbrcv_grow_rwin
39236c6e 5735 */
5ba3f43e
A
5736static void
5737mptcp_sbrcv_grow_rwin(struct mptcb *mp_tp, struct sockbuf *sb)
39236c6e 5738{
5ba3f43e
A
5739 struct socket *mp_so = mp_tp->mpt_mpte->mpte_mppcb->mpp_socket;
5740 u_int32_t rcvbufinc = mptcp_get_maxseg(mp_tp->mpt_mpte) << 4;
5741 u_int32_t rcvbuf = sb->sb_hiwat;
39236c6e 5742
0a7de745 5743 if (tcp_recv_bg == 1 || IS_TCP_RECV_BG(mp_so)) {
5ba3f43e 5744 return;
0a7de745 5745 }
39236c6e 5746
5ba3f43e
A
5747 if (tcp_do_autorcvbuf == 1 &&
5748 tcp_cansbgrow(sb) &&
5749 /* Diff to tcp_sbrcv_grow_rwin */
5750 (mp_so->so_flags1 & SOF1_EXTEND_BK_IDLE_WANTED) == 0 &&
5751 (rcvbuf - sb->sb_cc) < rcvbufinc &&
5752 rcvbuf < tcp_autorcvbuf_max &&
5753 (sb->sb_idealsize > 0 &&
5754 sb->sb_hiwat <= (sb->sb_idealsize + rcvbufinc))) {
5755 sbreserve(sb, min((sb->sb_hiwat + rcvbufinc), tcp_autorcvbuf_max));
490019cf 5756 }
39236c6e
A
5757}
5758
5ba3f43e 5759/* Similar to tcp_sbspace */
39236c6e 5760int32_t
5ba3f43e 5761mptcp_sbspace(struct mptcb *mp_tp)
39236c6e 5762{
5ba3f43e 5763 struct sockbuf *sb = &mp_tp->mpt_mpte->mpte_mppcb->mpp_socket->so_rcv;
39236c6e
A
5764 uint32_t rcvbuf;
5765 int32_t space;
5ba3f43e
A
5766 int32_t pending = 0;
5767
cb323159 5768 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
39236c6e 5769
5ba3f43e 5770 mptcp_sbrcv_grow_rwin(mp_tp, sb);
39236c6e 5771
5ba3f43e 5772 /* hiwat might have changed */
39236c6e 5773 rcvbuf = sb->sb_hiwat;
5ba3f43e
A
5774
5775 space = ((int32_t) imin((rcvbuf - sb->sb_cc),
0a7de745
A
5776 (sb->sb_mbmax - sb->sb_mbcnt)));
5777 if (space < 0) {
39236c6e 5778 space = 0;
0a7de745 5779 }
5ba3f43e
A
5780
5781#if CONTENT_FILTER
5782 /* Compensate for data being processed by content filters */
5783 pending = cfil_sock_data_space(sb);
5784#endif /* CONTENT_FILTER */
0a7de745 5785 if (pending > space) {
5ba3f43e 5786 space = 0;
0a7de745 5787 } else {
5ba3f43e 5788 space -= pending;
0a7de745 5789 }
39236c6e 5790
0a7de745 5791 return space;
39236c6e
A
5792}
5793
5794/*
5795 * Support Fallback to Regular TCP
5796 */
5797void
5798mptcp_notify_mpready(struct socket *so)
5799{
5800 struct tcpcb *tp = NULL;
5801
0a7de745 5802 if (so == NULL) {
39236c6e 5803 return;
0a7de745 5804 }
39236c6e
A
5805
5806 tp = intotcpcb(sotoinpcb(so));
5807
0a7de745 5808 if (tp == NULL) {
39236c6e 5809 return;
0a7de745 5810 }
39236c6e
A
5811
5812 DTRACE_MPTCP4(multipath__ready, struct socket *, so,
5813 struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd,
5814 struct tcpcb *, tp);
5815
0a7de745 5816 if (!(tp->t_mpflags & TMPF_MPTCP_TRUE)) {
39236c6e 5817 return;
0a7de745 5818 }
39236c6e 5819
0a7de745 5820 if (tp->t_mpflags & TMPF_MPTCP_READY) {
39236c6e 5821 return;
0a7de745 5822 }
39236c6e
A
5823
5824 tp->t_mpflags &= ~TMPF_TCP_FALLBACK;
5825 tp->t_mpflags |= TMPF_MPTCP_READY;
5826
5827 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS));
5828}
5829
5830void
5831mptcp_notify_mpfail(struct socket *so)
5832{
5833 struct tcpcb *tp = NULL;
5834
0a7de745 5835 if (so == NULL) {
39236c6e 5836 return;
0a7de745 5837 }
39236c6e
A
5838
5839 tp = intotcpcb(sotoinpcb(so));
5840
0a7de745 5841 if (tp == NULL) {
39236c6e 5842 return;
0a7de745 5843 }
39236c6e
A
5844
5845 DTRACE_MPTCP4(multipath__failed, struct socket *, so,
5846 struct sockbuf *, &so->so_rcv, struct sockbuf *, &so->so_snd,
5847 struct tcpcb *, tp);
5848
0a7de745 5849 if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
39236c6e 5850 return;
0a7de745 5851 }
39236c6e 5852
0a7de745 5853 tp->t_mpflags &= ~(TMPF_MPTCP_READY | TMPF_MPTCP_TRUE);
39236c6e
A
5854 tp->t_mpflags |= TMPF_TCP_FALLBACK;
5855
5856 soevent(so, (SO_FILT_HINT_LOCKED | SO_FILT_HINT_MPSTATUS));
5857}
5858
5859/*
5860 * Keepalive helper function
5861 */
5862boolean_t
5863mptcp_ok_to_keepalive(struct mptcb *mp_tp)
5864{
5865 boolean_t ret = 1;
cb323159
A
5866
5867 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
5ba3f43e 5868
39236c6e
A
5869 if (mp_tp->mpt_state >= MPTCPS_CLOSE_WAIT) {
5870 ret = 0;
5871 }
0a7de745 5872 return ret;
39236c6e
A
5873}
5874
5875/*
5876 * MPTCP t_maxseg adjustment function
5877 */
5878int
5879mptcp_adj_mss(struct tcpcb *tp, boolean_t mtudisc)
5880{
5881 int mss_lower = 0;
5882 struct mptcb *mp_tp = tptomptp(tp);
5883
0a7de745
A
5884#define MPTCP_COMPUTE_LEN { \
5885 mss_lower = sizeof (struct mptcp_dss_ack_opt); \
5886 if (mp_tp->mpt_flags & MPTCPF_CHECKSUM) \
5887 mss_lower += 2; \
5888 else \
5889 /* adjust to 32-bit boundary + EOL */ \
5890 mss_lower += 2; \
39236c6e 5891}
0a7de745
A
5892 if (mp_tp == NULL) {
5893 return 0;
5894 }
39236c6e 5895
cb323159 5896 socket_lock_assert_owned(mptetoso(mp_tp->mpt_mpte));
5ba3f43e 5897
39236c6e
A
5898 /*
5899 * For the first subflow and subsequent subflows, adjust mss for
5900 * most common MPTCP option size, for case where tcp_mss is called
5901 * during option processing and MTU discovery.
5902 */
5ba3f43e
A
5903 if (!mtudisc) {
5904 if (tp->t_mpflags & TMPF_MPTCP_TRUE &&
5905 !(tp->t_mpflags & TMPF_JOINED_FLOW)) {
5906 MPTCP_COMPUTE_LEN;
5907 }
39236c6e 5908
5ba3f43e
A
5909 if (tp->t_mpflags & TMPF_PREESTABLISHED &&
5910 tp->t_mpflags & TMPF_SENT_JOIN) {
5911 MPTCP_COMPUTE_LEN;
5912 }
5913 } else {
5914 if (tp->t_mpflags & TMPF_MPTCP_TRUE) {
5915 MPTCP_COMPUTE_LEN;
5916 }
39236c6e
A
5917 }
5918
0a7de745 5919 return mss_lower;
39236c6e
A
5920}
5921
5922/*
5923 * Update the pid, upid, uuid of the subflow so, based on parent so
5924 */
5925void
5ba3f43e 5926mptcp_update_last_owner(struct socket *so, struct socket *mp_so)
39236c6e 5927{
5ba3f43e
A
5928 if (so->last_pid != mp_so->last_pid ||
5929 so->last_upid != mp_so->last_upid) {
5930 so->last_upid = mp_so->last_upid;
5931 so->last_pid = mp_so->last_pid;
5932 uuid_copy(so->last_uuid, mp_so->last_uuid);
39236c6e 5933 }
5ba3f43e 5934 so_update_policy(so);
39236c6e
A
5935}
5936
5937static void
5938fill_mptcp_subflow(struct socket *so, mptcp_flow_t *flow, struct mptsub *mpts)
5939{
5940 struct inpcb *inp;
5941
5942 tcp_getconninfo(so, &flow->flow_ci);
5943 inp = sotoinpcb(so);
5944#if INET6
5945 if ((inp->inp_vflag & INP_IPV6) != 0) {
5946 flow->flow_src.ss_family = AF_INET6;
5947 flow->flow_dst.ss_family = AF_INET6;
5948 flow->flow_src.ss_len = sizeof(struct sockaddr_in6);
5949 flow->flow_dst.ss_len = sizeof(struct sockaddr_in6);
5950 SIN6(&flow->flow_src)->sin6_port = inp->in6p_lport;
5951 SIN6(&flow->flow_dst)->sin6_port = inp->in6p_fport;
5952 SIN6(&flow->flow_src)->sin6_addr = inp->in6p_laddr;
5953 SIN6(&flow->flow_dst)->sin6_addr = inp->in6p_faddr;
39037602 5954 } else
39236c6e 5955#endif
3e170ce0 5956 if ((inp->inp_vflag & INP_IPV4) != 0) {
39236c6e
A
5957 flow->flow_src.ss_family = AF_INET;
5958 flow->flow_dst.ss_family = AF_INET;
5959 flow->flow_src.ss_len = sizeof(struct sockaddr_in);
5960 flow->flow_dst.ss_len = sizeof(struct sockaddr_in);
5961 SIN(&flow->flow_src)->sin_port = inp->inp_lport;
5962 SIN(&flow->flow_dst)->sin_port = inp->inp_fport;
5963 SIN(&flow->flow_src)->sin_addr = inp->inp_laddr;
5964 SIN(&flow->flow_dst)->sin_addr = inp->inp_faddr;
5965 }
3e170ce0
A
5966 flow->flow_len = sizeof(*flow);
5967 flow->flow_tcpci_offset = offsetof(mptcp_flow_t, flow_ci);
39236c6e
A
5968 flow->flow_flags = mpts->mpts_flags;
5969 flow->flow_cid = mpts->mpts_connid;
3e170ce0 5970 flow->flow_relseq = mpts->mpts_rel_seq;
5ba3f43e 5971 flow->flow_soerror = mpts->mpts_socket->so_error;
3e170ce0 5972 flow->flow_probecnt = mpts->mpts_probecnt;
39236c6e
A
5973}
5974
5975static int
5976mptcp_pcblist SYSCTL_HANDLER_ARGS
5977{
5978#pragma unused(oidp, arg1, arg2)
5979 int error = 0, f;
5ba3f43e 5980 size_t len;
39236c6e
A
5981 struct mppcb *mpp;
5982 struct mptses *mpte;
5983 struct mptcb *mp_tp;
5984 struct mptsub *mpts;
5985 struct socket *so;
5986 conninfo_mptcp_t mptcpci;
fe8ab488 5987 mptcp_flow_t *flows = NULL;
39236c6e 5988
0a7de745
A
5989 if (req->newptr != USER_ADDR_NULL) {
5990 return EPERM;
5991 }
39236c6e
A
5992
5993 lck_mtx_lock(&mtcbinfo.mppi_lock);
39236c6e 5994 if (req->oldptr == USER_ADDR_NULL) {
5ba3f43e 5995 size_t n = mtcbinfo.mppi_count;
39236c6e 5996 lck_mtx_unlock(&mtcbinfo.mppi_lock);
0a7de745
A
5997 req->oldidx = (n + n / 8) * sizeof(conninfo_mptcp_t) +
5998 4 * (n + n / 8) * sizeof(mptcp_flow_t);
5999 return 0;
39236c6e
A
6000 }
6001 TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
fe8ab488 6002 flows = NULL;
cb323159 6003 socket_lock(mpp->mpp_socket, 1);
39236c6e
A
6004 VERIFY(mpp->mpp_flags & MPP_ATTACHED);
6005 mpte = mptompte(mpp);
cb323159
A
6006
6007 socket_lock_assert_owned(mptetoso(mpte));
39236c6e 6008 mp_tp = mpte->mpte_mptcb;
3e170ce0
A
6009
6010 bzero(&mptcpci, sizeof(mptcpci));
39236c6e 6011 mptcpci.mptcpci_state = mp_tp->mpt_state;
3e170ce0
A
6012 mptcpci.mptcpci_flags = mp_tp->mpt_flags;
6013 mptcpci.mptcpci_ltoken = mp_tp->mpt_localtoken;
6014 mptcpci.mptcpci_rtoken = mp_tp->mpt_remotetoken;
6015 mptcpci.mptcpci_notsent_lowat = mp_tp->mpt_notsent_lowat;
6016 mptcpci.mptcpci_snduna = mp_tp->mpt_snduna;
6017 mptcpci.mptcpci_sndnxt = mp_tp->mpt_sndnxt;
6018 mptcpci.mptcpci_sndmax = mp_tp->mpt_sndmax;
6019 mptcpci.mptcpci_lidsn = mp_tp->mpt_local_idsn;
6020 mptcpci.mptcpci_sndwnd = mp_tp->mpt_sndwnd;
6021 mptcpci.mptcpci_rcvnxt = mp_tp->mpt_rcvnxt;
5ba3f43e 6022 mptcpci.mptcpci_rcvatmark = mp_tp->mpt_rcvnxt;
3e170ce0
A
6023 mptcpci.mptcpci_ridsn = mp_tp->mpt_remote_idsn;
6024 mptcpci.mptcpci_rcvwnd = mp_tp->mpt_rcvwnd;
3e170ce0 6025
39236c6e 6026 mptcpci.mptcpci_nflows = mpte->mpte_numflows;
3e170ce0
A
6027 mptcpci.mptcpci_mpte_flags = mpte->mpte_flags;
6028 mptcpci.mptcpci_mpte_addrid = mpte->mpte_addrid_last;
6029 mptcpci.mptcpci_flow_offset =
6030 offsetof(conninfo_mptcp_t, mptcpci_flows);
6031
fe8ab488
A
6032 len = sizeof(*flows) * mpte->mpte_numflows;
6033 if (mpte->mpte_numflows != 0) {
6034 flows = _MALLOC(len, M_TEMP, M_WAITOK | M_ZERO);
6035 if (flows == NULL) {
cb323159 6036 socket_unlock(mpp->mpp_socket, 1);
fe8ab488
A
6037 break;
6038 }
6039 mptcpci.mptcpci_len = sizeof(mptcpci) +
6040 sizeof(*flows) * (mptcpci.mptcpci_nflows - 1);
6041 error = SYSCTL_OUT(req, &mptcpci,
6042 sizeof(mptcpci) - sizeof(mptcp_flow_t));
6043 } else {
6044 mptcpci.mptcpci_len = sizeof(mptcpci);
3e170ce0 6045 error = SYSCTL_OUT(req, &mptcpci, sizeof(mptcpci));
39037602 6046 }
39236c6e 6047 if (error) {
cb323159 6048 socket_unlock(mpp->mpp_socket, 1);
39236c6e
A
6049 FREE(flows, M_TEMP);
6050 break;
6051 }
6052 f = 0;
6053 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
39236c6e 6054 so = mpts->mpts_socket;
39236c6e 6055 fill_mptcp_subflow(so, &flows[f], mpts);
39236c6e
A
6056 f++;
6057 }
cb323159 6058 socket_unlock(mpp->mpp_socket, 1);
fe8ab488
A
6059 if (flows) {
6060 error = SYSCTL_OUT(req, flows, len);
6061 FREE(flows, M_TEMP);
0a7de745 6062 if (error) {
fe8ab488 6063 break;
0a7de745 6064 }
fe8ab488 6065 }
39236c6e
A
6066 }
6067 lck_mtx_unlock(&mtcbinfo.mppi_lock);
6068
0a7de745 6069 return error;
39236c6e
A
6070}
6071
6072SYSCTL_PROC(_net_inet_mptcp, OID_AUTO, pcblist, CTLFLAG_RD | CTLFLAG_LOCKED,
39037602 6073 0, 0, mptcp_pcblist, "S,conninfo_mptcp_t",
39236c6e 6074 "List of active MPTCP connections");
fe8ab488 6075
fe8ab488
A
6076/*
6077 * Set notsent lowat mark on the MPTCB
6078 */
6079int
6080mptcp_set_notsent_lowat(struct mptses *mpte, int optval)
6081{
6082 struct mptcb *mp_tp = NULL;
6083 int error = 0;
6084
0a7de745 6085 if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED) {
fe8ab488 6086 mp_tp = mpte->mpte_mptcb;
0a7de745 6087 }
fe8ab488 6088
0a7de745 6089 if (mp_tp) {
fe8ab488 6090 mp_tp->mpt_notsent_lowat = optval;
0a7de745 6091 } else {
fe8ab488 6092 error = EINVAL;
0a7de745 6093 }
fe8ab488 6094
0a7de745 6095 return error;
fe8ab488
A
6096}
6097
6098u_int32_t
6099mptcp_get_notsent_lowat(struct mptses *mpte)
6100{
6101 struct mptcb *mp_tp = NULL;
6102
0a7de745 6103 if (mpte->mpte_mppcb->mpp_flags & MPP_ATTACHED) {
fe8ab488 6104 mp_tp = mpte->mpte_mptcb;
0a7de745 6105 }
fe8ab488 6106
0a7de745
A
6107 if (mp_tp) {
6108 return mp_tp->mpt_notsent_lowat;
6109 } else {
6110 return 0;
6111 }
fe8ab488
A
6112}
6113
39037602 6114int
5ba3f43e
A
6115mptcp_notsent_lowat_check(struct socket *so)
6116{
fe8ab488
A
6117 struct mptses *mpte;
6118 struct mppcb *mpp;
6119 struct mptcb *mp_tp;
6120 struct mptsub *mpts;
6121
6122 int notsent = 0;
6123
5ba3f43e 6124 mpp = mpsotomppcb(so);
fe8ab488 6125 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
0a7de745 6126 return 0;
fe8ab488
A
6127 }
6128
6129 mpte = mptompte(mpp);
cb323159 6130 socket_lock_assert_owned(mptetoso(mpte));
fe8ab488
A
6131 mp_tp = mpte->mpte_mptcb;
6132
fe8ab488
A
6133 notsent = so->so_snd.sb_cc;
6134
6135 if ((notsent == 0) ||
6136 ((notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)) <=
6137 mp_tp->mpt_notsent_lowat)) {
3e170ce0
A
6138 mptcplog((LOG_DEBUG, "MPTCP Sender: "
6139 "lowat %d notsent %d actual %d \n",
6140 mp_tp->mpt_notsent_lowat, notsent,
6141 notsent - (mp_tp->mpt_sndnxt - mp_tp->mpt_snduna)),
0a7de745
A
6142 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
6143 return 1;
fe8ab488 6144 }
fe8ab488
A
6145
6146 /* When Nagle's algorithm is not disabled, it is better
6147 * to wakeup the client even before there is atleast one
6148 * maxseg of data to write.
6149 */
6150 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
6151 int retval = 0;
fe8ab488
A
6152 if (mpts->mpts_flags & MPTSF_ACTIVE) {
6153 struct socket *subf_so = mpts->mpts_socket;
fe8ab488 6154 struct tcpcb *tp = intotcpcb(sotoinpcb(subf_so));
39037602 6155
fe8ab488 6156 notsent = so->so_snd.sb_cc -
0a7de745 6157 (tp->snd_nxt - tp->snd_una);
39037602 6158
fe8ab488
A
6159 if ((tp->t_flags & TF_NODELAY) == 0 &&
6160 notsent > 0 && (notsent <= (int)tp->t_maxseg)) {
6161 retval = 1;
6162 }
3e170ce0 6163 mptcplog((LOG_DEBUG, "MPTCP Sender: lowat %d notsent %d"
fe8ab488 6164 " nodelay false \n",
3e170ce0 6165 mp_tp->mpt_notsent_lowat, notsent),
0a7de745
A
6166 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
6167 return retval;
fe8ab488 6168 }
fe8ab488 6169 }
0a7de745 6170 return 0;
fe8ab488
A
6171}
6172
3e170ce0
A
6173static errno_t
6174mptcp_symptoms_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
0a7de745 6175 void **unitinfo)
3e170ce0
A
6176{
6177#pragma unused(kctlref, sac, unitinfo)
5ba3f43e 6178
0a7de745 6179 if (OSIncrementAtomic(&mptcp_kern_skt_inuse) > 0) {
cb323159 6180 os_log_error(mptcp_log_handle, "%s: MPTCP kernel-control socket for Symptoms already open!", __func__);
0a7de745 6181 }
5ba3f43e
A
6182
6183 mptcp_kern_skt_unit = sac->sc_unit;
6184
0a7de745 6185 return 0;
5ba3f43e
A
6186}
6187
6188static void
cb323159 6189mptcp_allow_uuid(uuid_t uuid, int32_t rssi)
5ba3f43e
A
6190{
6191 struct mppcb *mpp;
6192
6193 /* Iterate over all MPTCP connections */
6194
6195 lck_mtx_lock(&mtcbinfo.mppi_lock);
6196
6197 TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
cb323159
A
6198 struct socket *mp_so = mpp->mpp_socket;
6199 struct mptses *mpte = mpp->mpp_pcbe;
5ba3f43e 6200
cb323159 6201 socket_lock(mp_so, 1);
5ba3f43e
A
6202
6203 if (mp_so->so_flags & SOF_DELEGATED &&
0a7de745 6204 uuid_compare(uuid, mp_so->e_uuid)) {
5ba3f43e 6205 goto next;
0a7de745
A
6206 } else if (!(mp_so->so_flags & SOF_DELEGATED) &&
6207 uuid_compare(uuid, mp_so->last_uuid)) {
5ba3f43e 6208 goto next;
0a7de745
A
6209 }
6210
cb323159
A
6211 os_log(mptcp_log_handle, "%s - %lx: Got allowance for useApp with rssi %d\n",
6212 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), rssi);
5ba3f43e
A
6213
6214 mpte->mpte_flags |= MPTE_ACCESS_GRANTED;
6215
cb323159
A
6216 if (rssi > MPTCP_TARGET_BASED_RSSI_THRESHOLD) {
6217 mpte->mpte_flags |= MPTE_CELL_PROHIBITED;
6218 }
6219
5ba3f43e
A
6220 mptcp_check_subflows_and_add(mpte);
6221 mptcp_remove_subflows(mpte);
6222
cb323159 6223 mpte->mpte_flags &= ~(MPTE_ACCESS_GRANTED | MPTE_CELL_PROHIBITED);
5ba3f43e
A
6224
6225next:
cb323159 6226 socket_unlock(mp_so, 1);
5ba3f43e
A
6227 }
6228
6229 lck_mtx_unlock(&mtcbinfo.mppi_lock);
6230}
6231
6232static void
6233mptcp_wifi_status_changed(void)
6234{
6235 struct mppcb *mpp;
6236
6237 /* Iterate over all MPTCP connections */
6238
6239 lck_mtx_lock(&mtcbinfo.mppi_lock);
6240
6241 TAILQ_FOREACH(mpp, &mtcbinfo.mppi_pcbs, mpp_entry) {
cb323159
A
6242 struct socket *mp_so = mpp->mpp_socket;
6243 struct mptses *mpte = mpp->mpp_pcbe;
5ba3f43e 6244
cb323159 6245 socket_lock(mp_so, 1);
5ba3f43e 6246
cb323159
A
6247 /* Only handover- and urgency-mode are purely driven by Symptom's Wi-Fi status */
6248 if (mpte->mpte_svctype != MPTCP_SVCTYPE_HANDOVER &&
6249 mpte->mpte_svctype != MPTCP_SVCTYPE_TARGET_BASED) {
5ba3f43e 6250 goto next;
0a7de745 6251 }
5ba3f43e
A
6252
6253 mptcp_check_subflows_and_add(mpte);
6254 mptcp_check_subflows_and_remove(mpte);
6255
6256next:
cb323159 6257 socket_unlock(mp_so, 1);
5ba3f43e
A
6258 }
6259
6260 lck_mtx_unlock(&mtcbinfo.mppi_lock);
6261}
6262
6263void
6264mptcp_ask_symptoms(struct mptses *mpte)
6265{
6266 struct mptcp_symptoms_ask_uuid ask;
6267 struct socket *mp_so;
6268 struct proc *p;
6269 int pid, prio, err;
6270
6271 if (mptcp_kern_skt_unit == 0) {
cb323159
A
6272 os_log_error(mptcp_log_handle, "%s - %lx: skt_unit is still 0\n",
6273 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
5ba3f43e
A
6274 return;
6275 }
6276
6277 mp_so = mptetoso(mpte);
6278
0a7de745 6279 if (mp_so->so_flags & SOF_DELEGATED) {
5ba3f43e 6280 pid = mp_so->e_pid;
0a7de745 6281 } else {
5ba3f43e 6282 pid = mp_so->last_pid;
0a7de745 6283 }
5ba3f43e
A
6284
6285 p = proc_find(pid);
6286 if (p == PROC_NULL) {
cb323159
A
6287 os_log_error(mptcp_log_handle, "%s - %lx: Couldn't find proc for pid %u\n",
6288 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), pid);
5ba3f43e
A
6289 return;
6290 }
6291
6292 ask.cmd = MPTCP_SYMPTOMS_ASK_UUID;
6293
0a7de745 6294 if (mp_so->so_flags & SOF_DELEGATED) {
5ba3f43e 6295 uuid_copy(ask.uuid, mp_so->e_uuid);
0a7de745 6296 } else {
5ba3f43e 6297 uuid_copy(ask.uuid, mp_so->last_uuid);
0a7de745 6298 }
5ba3f43e
A
6299
6300 prio = proc_get_effective_task_policy(proc_task(p), TASK_POLICY_ROLE);
6301
cb323159
A
6302 if (prio == TASK_BACKGROUND_APPLICATION || prio == TASK_NONUI_APPLICATION ||
6303 prio == TASK_DARWINBG_APPLICATION) {
5ba3f43e 6304 ask.priority = MPTCP_SYMPTOMS_BACKGROUND;
0a7de745 6305 } else if (prio == TASK_FOREGROUND_APPLICATION) {
5ba3f43e 6306 ask.priority = MPTCP_SYMPTOMS_FOREGROUND;
0a7de745 6307 } else {
5ba3f43e 6308 ask.priority = MPTCP_SYMPTOMS_UNKNOWN;
0a7de745 6309 }
5ba3f43e 6310
5ba3f43e 6311 err = ctl_enqueuedata(mptcp_kern_ctrl_ref, mptcp_kern_skt_unit,
0a7de745 6312 &ask, sizeof(ask), CTL_DATA_EOR);
d9a64523 6313
cb323159
A
6314 os_log(mptcp_log_handle, "%s - %lx: asked symptoms about pid %u, taskprio %u, prio %u, err %d\n",
6315 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), pid, prio, ask.priority, err);
d9a64523 6316
5ba3f43e
A
6317
6318 proc_rele(p);
3e170ce0
A
6319}
6320
6321static errno_t
6322mptcp_symptoms_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t kcunit,
0a7de745 6323 void *unitinfo)
3e170ce0
A
6324{
6325#pragma unused(kctlref, kcunit, unitinfo)
5ba3f43e
A
6326
6327 OSDecrementAtomic(&mptcp_kern_skt_inuse);
6328
0a7de745 6329 return 0;
3e170ce0
A
6330}
6331
6332static errno_t
6333mptcp_symptoms_ctl_send(kern_ctl_ref kctlref, u_int32_t kcunit, void *unitinfo,
0a7de745 6334 mbuf_t m, int flags)
3e170ce0 6335{
5ba3f43e 6336#pragma unused(kctlref, unitinfo, flags)
0a7de745 6337 symptoms_advisory_t *sa = NULL;
3e170ce0 6338
0a7de745 6339 if (kcunit != mptcp_kern_skt_unit) {
cb323159 6340 os_log_error(mptcp_log_handle, "%s: kcunit %u is different from expected one %u\n",
0a7de745
A
6341 __func__, kcunit, mptcp_kern_skt_unit);
6342 }
5ba3f43e 6343
3e170ce0
A
6344 if (mbuf_pkthdr_len(m) < sizeof(*sa)) {
6345 mbuf_freem(m);
0a7de745 6346 return EINVAL;
3e170ce0
A
6347 }
6348
d9a64523 6349 if (mbuf_len(m) < sizeof(*sa)) {
0a7de745
A
6350 os_log_error(mptcp_log_handle, "%s: mbuf is %lu but need %lu\n",
6351 __func__, mbuf_len(m), sizeof(*sa));
d9a64523 6352 mbuf_freem(m);
0a7de745 6353 return EINVAL;
d9a64523
A
6354 }
6355
6356 sa = mbuf_data(m);
3e170ce0 6357
cb323159
A
6358 if (sa->sa_nwk_status != SYMPTOMS_ADVISORY_USEAPP) {
6359 os_log(mptcp_log_handle, "%s: wifi new,old: %d,%d, cell new, old: %d,%d\n", __func__,
6360 sa->sa_wifi_status, mptcp_advisory.sa_wifi_status,
6361 sa->sa_cell_status, mptcp_advisory.sa_cell_status);
3e170ce0 6362
cb323159 6363 if (sa->sa_wifi_status != mptcp_advisory.sa_wifi_status) {
3e170ce0 6364 mptcp_advisory.sa_wifi_status = sa->sa_wifi_status;
5ba3f43e 6365 mptcp_wifi_status_changed();
0a7de745 6366 }
cb323159
A
6367 } else {
6368 struct mptcp_symptoms_answer answer;
0a7de745 6369 errno_t err;
5ba3f43e 6370
cb323159
A
6371 /* We temporarily allow different sizes for ease of submission */
6372 if (mbuf_len(m) != sizeof(uuid_t) + sizeof(*sa) &&
6373 mbuf_len(m) != sizeof(answer)) {
6374 os_log_error(mptcp_log_handle, "%s: mbuf is %lu but need %lu or %lu\n",
6375 __func__, mbuf_len(m), sizeof(uuid_t) + sizeof(*sa),
6376 sizeof(answer));
0a7de745
A
6377 mbuf_free(m);
6378 return EINVAL;
6379 }
5ba3f43e 6380
cb323159
A
6381 memset(&answer, 0, sizeof(answer));
6382
6383 err = mbuf_copydata(m, 0, mbuf_len(m), &answer);
0a7de745
A
6384 if (err) {
6385 os_log_error(mptcp_log_handle, "%s: mbuf_copydata returned %d\n", __func__, err);
6386 mbuf_free(m);
6387 return err;
6388 }
5ba3f43e 6389
cb323159 6390 mptcp_allow_uuid(answer.uuid, answer.rssi);
3e170ce0 6391 }
5ba3f43e 6392
d9a64523 6393 mbuf_freem(m);
0a7de745 6394 return 0;
3e170ce0
A
6395}
6396
6397void
6398mptcp_control_register(void)
6399{
6400 /* Set up the advisory control socket */
6401 struct kern_ctl_reg mptcp_kern_ctl;
6402
6403 bzero(&mptcp_kern_ctl, sizeof(mptcp_kern_ctl));
6404 strlcpy(mptcp_kern_ctl.ctl_name, MPTCP_KERN_CTL_NAME,
6405 sizeof(mptcp_kern_ctl.ctl_name));
6406 mptcp_kern_ctl.ctl_connect = mptcp_symptoms_ctl_connect;
6407 mptcp_kern_ctl.ctl_disconnect = mptcp_symptoms_ctl_disconnect;
6408 mptcp_kern_ctl.ctl_send = mptcp_symptoms_ctl_send;
6409 mptcp_kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED;
6410
6411 (void)ctl_register(&mptcp_kern_ctl, &mptcp_kern_ctrl_ref);
6412}
6413
d9a64523
A
6414/*
6415 * Three return-values:
6416 * 1 : WiFi is bad
6417 * 0 : WiFi is good
cb323159 6418 * -1 : WiFi-state is unknown
d9a64523 6419 */
3e170ce0 6420int
cb323159 6421mptcp_is_wifi_unusable_for_session(struct mptses *mpte)
3e170ce0 6422{
d9a64523 6423 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
0a7de745 6424 if (mptcp_advisory.sa_wifi_status) {
cb323159 6425 return symptoms_is_wifi_lossy() ? 1 : 0;
0a7de745 6426 }
d9a64523
A
6427
6428 /*
6429 * If it's a first-party app and we don't have any info
6430 * about the Wi-Fi state, let's be pessimistic.
6431 */
0a7de745 6432 return -1;
cb323159
A
6433 } else {
6434 if (mptcp_advisory.sa_wifi_status & SYMPTOMS_ADVISORY_WIFI_BAD) {
6435 return 1;
6436 }
d9a64523 6437
cb323159
A
6438 /*
6439 * If we are target-based (meaning, we allow to be more lax on
6440 * the "unusable" target. We only *know* about the state once
6441 * we got the allowance from Symptoms (MPTE_ACCESS_GRANTED).
6442 *
6443 * If RSSI is not bad enough, MPTE_CELL_PROHIBITED will then
6444 * be set.
6445 *
6446 * In any other case (while in target-mode), consider WiFi bad
6447 * and we are going to ask for allowance from Symptoms anyway.
6448 */
6449 if (mpte->mpte_svctype == MPTCP_SVCTYPE_TARGET_BASED) {
6450 if (mpte->mpte_flags & MPTE_ACCESS_GRANTED &&
6451 mpte->mpte_flags & MPTE_CELL_PROHIBITED) {
6452 return 0;
6453 }
d9a64523 6454
cb323159
A
6455 return 1;
6456 }
d9a64523 6457
cb323159 6458 return 0;
0a7de745 6459 }
cb323159 6460}
d9a64523 6461
cb323159
A
6462boolean_t
6463symptoms_is_wifi_lossy(void)
6464{
6465 return (mptcp_advisory.sa_wifi_status & SYMPTOMS_ADVISORY_WIFI_OK) ? false : true;
3e170ce0
A
6466}
6467
490019cf
A
6468/* If TFO data is succesfully acked, it must be dropped from the mptcp so */
6469static void
5ba3f43e 6470mptcp_drop_tfo_data(struct mptses *mpte, struct mptsub *mpts)
490019cf 6471{
5ba3f43e 6472 struct socket *mp_so = mptetoso(mpte);
490019cf
A
6473 struct socket *so = mpts->mpts_socket;
6474 struct tcpcb *tp = intotcpcb(sotoinpcb(so));
6475 struct mptcb *mp_tp = mpte->mpte_mptcb;
6476
6477 /* If data was sent with SYN, rewind state */
6478 if (tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED) {
5ba3f43e 6479 u_int64_t mp_droplen = mp_tp->mpt_sndnxt - mp_tp->mpt_snduna;
490019cf 6480 unsigned int tcp_droplen = tp->snd_una - tp->iss - 1;
5ba3f43e 6481
490019cf
A
6482 VERIFY(mp_droplen <= (UINT_MAX));
6483 VERIFY(mp_droplen >= tcp_droplen);
6484
5ba3f43e
A
6485 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
6486 mpts->mpts_iss += tcp_droplen;
6487 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
6488
490019cf
A
6489 if (mp_droplen > tcp_droplen) {
6490 /* handle partial TCP ack */
6491 mp_so->so_flags1 |= SOF1_TFO_REWIND;
6492 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna + (mp_droplen - tcp_droplen);
490019cf
A
6493 mp_droplen = tcp_droplen;
6494 } else {
6495 /* all data on SYN was acked */
6496 mpts->mpts_rel_seq = 1;
6497 mp_tp->mpt_sndnxt = mp_tp->mpt_snduna;
490019cf
A
6498 }
6499 mp_tp->mpt_sndmax -= tcp_droplen;
6500
490019cf
A
6501 if (mp_droplen != 0) {
6502 VERIFY(mp_so->so_snd.sb_mb != NULL);
6503 sbdrop(&mp_so->so_snd, (int)mp_droplen);
6504 }
5ba3f43e
A
6505 }
6506}
6507
6508int
6509mptcp_freeq(struct mptcb *mp_tp)
6510{
6511 struct tseg_qent *q;
6512 int rv = 0;
6513
6514 while ((q = LIST_FIRST(&mp_tp->mpt_segq)) != NULL) {
6515 LIST_REMOVE(q, tqe_q);
6516 m_freem(q->tqe_m);
6517 zfree(tcp_reass_zone, q);
6518 rv = 1;
6519 }
6520 mp_tp->mpt_reassqlen = 0;
0a7de745 6521 return rv;
5ba3f43e
A
6522}
6523
6524static int
6525mptcp_post_event(u_int32_t event_code, int value)
6526{
6527 struct kev_mptcp_data event_data;
6528 struct kev_msg ev_msg;
6529
6530 memset(&ev_msg, 0, sizeof(ev_msg));
6531
0a7de745
A
6532 ev_msg.vendor_code = KEV_VENDOR_APPLE;
6533 ev_msg.kev_class = KEV_NETWORK_CLASS;
6534 ev_msg.kev_subclass = KEV_MPTCP_SUBCLASS;
6535 ev_msg.event_code = event_code;
5ba3f43e
A
6536
6537 event_data.value = value;
6538
0a7de745 6539 ev_msg.dv[0].data_ptr = &event_data;
5ba3f43e
A
6540 ev_msg.dv[0].data_length = sizeof(event_data);
6541
6542 return kev_post_msg(&ev_msg);
6543}
6544
cb323159
A
6545static void
6546mptcp_set_cellicon(struct mptses *mpte, struct mptsub *mpts)
5ba3f43e 6547{
94ff46dc 6548 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
5ba3f43e
A
6549 int error;
6550
6551 /* First-party apps (Siri) don't flip the cellicon */
0a7de745 6552 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
5ba3f43e 6553 return;
0a7de745 6554 }
5ba3f43e 6555
cb323159
A
6556 /* Subflow is disappearing - don't set it on this one */
6557 if (mpts->mpts_flags & (MPTSF_DISCONNECTING | MPTSF_DISCONNECTED)) {
6558 return;
6559 }
6560
94ff46dc
A
6561 /* Fallen back connections are not triggering the cellicon */
6562 if (mpte->mpte_mptcb->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
6563 return;
6564 }
6565
cb323159
A
6566 /* Remember the last time we set the cellicon. Needed for debouncing */
6567 mpte->mpte_last_cellicon_set = tcp_now;
6568
94ff46dc
A
6569 tp->t_timer[TCPT_CELLICON] = OFFSET_FROM_START(tp, MPTCP_CELLICON_TOGGLE_RATE);
6570 tcp_sched_timers(tp);
6571
cb323159
A
6572 if (mpts->mpts_flags & MPTSF_CELLICON_SET &&
6573 mpte->mpte_cellicon_increments != 0) {
6574 if (mptcp_cellicon_refcount == 0) {
6575 os_log_error(mptcp_log_handle, "%s - %lx: Cell should be set (count is %u), but it's zero!\n",
6576 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments);
6577
6578 /* Continue, so that the icon gets set... */
6579 } else {
6580 /*
6581 * In this case, the cellicon is already set. No need to bump it
6582 * even higher
6583 */
6584
6585 return;
6586 }
6587 }
6588
6589 /* When tearing down this subflow, we need to decrement the
6590 * reference counter
6591 */
6592 mpts->mpts_flags |= MPTSF_CELLICON_SET;
6593
6594 /* This counter, so that when a session gets destroyed we decrement
6595 * the reference counter by whatever is left
6596 */
6597 mpte->mpte_cellicon_increments++;
5ba3f43e 6598
cb323159
A
6599 if (OSIncrementAtomic(&mptcp_cellicon_refcount)) {
6600 /* If cellicon is already set, get out of here! */
5ba3f43e 6601 return;
0a7de745 6602 }
5ba3f43e
A
6603
6604 error = mptcp_post_event(KEV_MPTCP_CELLUSE, 1);
6605
0a7de745 6606 if (error) {
cb323159
A
6607 os_log_error(mptcp_log_handle, "%s - %lx: Setting cellicon failed with %d\n",
6608 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), error);
0a7de745 6609 } else {
cb323159
A
6610 os_log(mptcp_log_handle, "%s - %lx: successfully set the cellicon\n",
6611 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte));
0a7de745 6612 }
5ba3f43e
A
6613}
6614
6615void
cb323159 6616mptcp_clear_cellicon(void)
5ba3f43e 6617{
cb323159
A
6618 int error = mptcp_post_event(KEV_MPTCP_CELLUSE, 0);
6619
6620 if (error) {
6621 os_log_error(mptcp_log_handle, "%s: Unsetting cellicon failed with %d\n",
6622 __func__, error);
6623 } else {
6624 os_log(mptcp_log_handle, "%s: successfully unset the cellicon\n",
6625 __func__);
6626 }
6627}
6628
6629/*
6630 * Returns true if the icon has been flipped to WiFi.
6631 */
6632static boolean_t
6633__mptcp_unset_cellicon(long val)
6634{
6635 if (OSAddAtomic(-val, &mptcp_cellicon_refcount) != 1) {
6636 return false;
6637 }
6638
6639 mptcp_clear_cellicon();
6640
6641 return true;
6642}
5ba3f43e 6643
94ff46dc
A
6644void
6645mptcp_unset_cellicon(struct mptses *mpte, struct mptsub *mpts, uint32_t val)
cb323159
A
6646{
6647 /* First-party apps (Siri) don't flip the cellicon */
6648 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
5ba3f43e 6649 return;
0a7de745 6650 }
5ba3f43e 6651
cb323159
A
6652 if (mpte->mpte_cellicon_increments == 0) {
6653 /* This flow never used cell - get out of here! */
5ba3f43e 6654 return;
490019cf 6655 }
5ba3f43e 6656
cb323159
A
6657 if (mptcp_cellicon_refcount == 0) {
6658 os_log_error(mptcp_log_handle, "%s - %lx: Cell is off, but should be at least %u\n",
6659 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments);
5ba3f43e 6660
cb323159
A
6661 return;
6662 }
6663
6664 if (mpts) {
6665 if (!(mpts->mpts_flags & MPTSF_CELLICON_SET)) {
6666 return;
6667 }
6668
6669 mpts->mpts_flags &= ~MPTSF_CELLICON_SET;
6670 }
6671
94ff46dc
A
6672 if (mpte->mpte_cellicon_increments < val) {
6673 os_log_error(mptcp_log_handle, "%s - %lx: Increments is %u but want to dec by %u.\n",
6674 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments, val);
6675 val = mpte->mpte_cellicon_increments;
6676 }
6677
6678 mpte->mpte_cellicon_increments -= val;
cb323159
A
6679
6680 if (__mptcp_unset_cellicon(val) == false) {
6681 return;
6682 }
6683
6684 /* All flows are gone - our counter should be at zero too! */
6685 if (mpte->mpte_cellicon_increments != 0) {
6686 os_log_error(mptcp_log_handle, "%s - %lx: Inconsistent state! Cell refcount is zero but increments are at %u\n",
6687 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), mpte->mpte_cellicon_increments);
0a7de745 6688 }
5ba3f43e
A
6689}
6690
6691void
6692mptcp_reset_rexmit_state(struct tcpcb *tp)
6693{
6694 struct mptsub *mpts;
6695 struct inpcb *inp;
6696 struct socket *so;
6697
6698 inp = tp->t_inpcb;
0a7de745 6699 if (inp == NULL) {
5ba3f43e 6700 return;
0a7de745 6701 }
5ba3f43e
A
6702
6703 so = inp->inp_socket;
0a7de745 6704 if (so == NULL) {
5ba3f43e 6705 return;
0a7de745 6706 }
5ba3f43e 6707
0a7de745 6708 if (!(so->so_flags & SOF_MP_SUBFLOW)) {
5ba3f43e 6709 return;
0a7de745 6710 }
5ba3f43e
A
6711
6712 mpts = tp->t_mpsub;
6713
6714 mpts->mpts_flags &= ~MPTSF_WRITE_STALL;
6715 so->so_flags &= ~SOF_MP_TRYFAILOVER;
6716}
6717
6718void
6719mptcp_reset_keepalive(struct tcpcb *tp)
6720{
6721 struct mptsub *mpts = tp->t_mpsub;
6722
6723 mpts->mpts_flags &= ~MPTSF_READ_STALL;
490019cf 6724}