]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/mptcp.c
xnu-6153.41.3.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp.c
CommitLineData
39236c6e 1/*
d9a64523 2 * Copyright (c) 2012-2018 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
5ba3f43e
A
29/*
30 * A note on the MPTCP/NECP-interactions:
31 *
32 * MPTCP uses NECP-callbacks to get notified of interface/policy events.
33 * MPTCP registers to these events at the MPTCP-layer for interface-events
34 * through a call to necp_client_register_multipath_cb.
35 * To get per-flow events (aka per TCP-subflow), we register to it with
36 * necp_client_register_socket_flow. Both registrations happen by using the
37 * necp-client-uuid that comes from the app.
38 *
39 * The locking is rather tricky. In general, we expect the lock-ordering to
40 * happen from necp-fd -> necp->client -> mpp_lock.
41 *
42 * There are however some subtleties.
43 *
44 * 1. When registering the multipath_cb, we are holding the mpp_lock. This is
45 * safe, because it is the very first time this MPTCP-connection goes into NECP.
46 * As we go into NECP we take the NECP-locks and thus are guaranteed that no
47 * NECP-locks will deadlock us. Because these NECP-events will also first take
48 * the NECP-locks. Either they win the race and thus won't find our
49 * MPTCP-connection. Or, MPTCP wins the race and thus it will safely install
50 * the callbacks while holding the NECP lock.
51 *
52 * 2. When registering the subflow-callbacks we must unlock the mpp_lock. This,
53 * because we have already registered callbacks and we might race against an
54 * NECP-event that will match on our socket. So, we have to unlock to be safe.
55 *
56 * 3. When removing the multipath_cb, we do it in mp_pcbdispose(). The
57 * so_usecount has reached 0. We must be careful to not remove the mpp_socket
58 * pointers before we unregistered the callback. Because, again we might be
59 * racing against an NECP-event. Unregistering must happen with an unlocked
60 * mpp_lock, because of the lock-ordering constraint. It could be that
61 * before we had a chance to unregister an NECP-event triggers. That's why
62 * we need to check for the so_usecount in mptcp_session_necp_cb. If we get
63 * there while the socket is being garbage-collected, the use-count will go
64 * down to 0 and we exit. Removal of the multipath_cb again happens by taking
65 * the NECP-locks so any running NECP-events will finish first and exit cleanly.
66 *
67 * 4. When removing the subflow-callback, we do it in in_pcbdispose(). Again,
68 * the socket-lock must be unlocked for lock-ordering constraints. This gets a
69 * bit tricky here, as in tcp_garbage_collect we hold the mp_so and so lock.
70 * So, we drop the mp_so-lock as soon as the subflow is unlinked with
71 * mptcp_subflow_del. Then, in in_pcbdispose we drop the subflow-lock.
72 * If an NECP-event was waiting on the lock in mptcp_subflow_necp_cb, when it
73 * gets it, it will realize that the subflow became non-MPTCP and retry (see
74 * tcp_lock). Then it waits again on the subflow-lock. When we drop this lock
75 * in in_pcbdispose, and enter necp_inpcb_dispose, this one will have to wait
76 * for the NECP-lock (held by the other thread that is taking care of the NECP-
77 * event). So, the event now finally gets the subflow-lock and then hits an
78 * so_usecount that is 0 and exits. Eventually, we can remove the subflow from
79 * the NECP callback.
80 */
81
39236c6e
A
82#include <sys/param.h>
83#include <sys/systm.h>
84#include <sys/kernel.h>
85#include <sys/mbuf.h>
86#include <sys/mcache.h>
87#include <sys/socket.h>
88#include <sys/socketvar.h>
89#include <sys/syslog.h>
90#include <sys/protosw.h>
91
92#include <kern/zalloc.h>
93#include <kern/locks.h>
94
39236c6e
A
95#include <mach/sdt.h>
96
39236c6e
A
97#include <net/if.h>
98#include <netinet/in.h>
99#include <netinet/in_var.h>
100#include <netinet/tcp.h>
101#include <netinet/tcp_fsm.h>
102#include <netinet/tcp_seq.h>
103#include <netinet/tcp_var.h>
104#include <netinet/mptcp_var.h>
105#include <netinet/mptcp.h>
106#include <netinet/mptcp_seq.h>
107#include <netinet/mptcp_opt.h>
108#include <netinet/mptcp_timer.h>
109
110int mptcp_enable = 1;
111SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 112 &mptcp_enable, 0, "Enable Multipath TCP Support");
39236c6e 113
cb323159
A
114/*
115 * Number of times to try negotiating MPTCP on SYN retransmissions.
116 * We haven't seen any reports of a middlebox that is dropping all SYN-segments
117 * that have an MPTCP-option. Thus, let's be generous and retransmit it 4 times.
118 */
119int mptcp_mpcap_retries = 4;
39236c6e 120SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
0a7de745
A
121 CTLFLAG_RW | CTLFLAG_LOCKED,
122 &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
39236c6e
A
123
124/*
125 * By default, DSS checksum is turned off, revisit if we ever do
126 * MPTCP for non SSL Traffic.
127 */
128int mptcp_dss_csum = 0;
129SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 130 &mptcp_dss_csum, 0, "Enable DSS checksum");
39236c6e
A
131
132/*
133 * When mptcp_fail_thresh number of retransmissions are sent, subflow failover
134 * is attempted on a different path.
135 */
136int mptcp_fail_thresh = 1;
137SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 138 &mptcp_fail_thresh, 0, "Failover threshold");
39236c6e
A
139
140
141/*
fe8ab488
A
142 * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime
143 * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout.
144 * Some carrier networks have a timeout of 10 or 15 minutes.
39236c6e 145 */
0a7de745 146int mptcp_subflow_keeptime = 60 * 14;
39236c6e 147SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 148 &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
39236c6e 149
3e170ce0
A
150int mptcp_rtthist_rtthresh = 600;
151SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 152 &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
3e170ce0
A
153
154/*
155 * Use RTO history for sending new data
156 */
157int mptcp_use_rto = 1;
158SYSCTL_INT(_net_inet_mptcp, OID_AUTO, userto, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 159 &mptcp_use_rto, 0, "Disable RTO for subflow selection");
3e170ce0 160
3e170ce0
A
161int mptcp_rtothresh = 1500;
162SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 163 &mptcp_rtothresh, 0, "RTO threshold");
3e170ce0 164
3e170ce0
A
165/*
166 * Probe the preferred path, when it is not in use
167 */
3e170ce0
A
168uint32_t mptcp_probeto = 1000;
169SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 170 &mptcp_probeto, 0, "Disable probing by setting to 0");
3e170ce0 171
3e170ce0
A
172uint32_t mptcp_probecnt = 5;
173SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 174 &mptcp_probecnt, 0, "Number of probe writes");
3e170ce0
A
175
176/*
177 * Static declarations
178 */
5ba3f43e 179static uint16_t mptcp_input_csum(struct tcpcb *, struct mbuf *, uint64_t,
0a7de745 180 uint32_t, uint16_t, uint16_t, uint16_t);
5ba3f43e
A
181
182static int
183mptcp_reass_present(struct socket *mp_so)
184{
cb323159
A
185 struct mptses *mpte = mpsotompte(mp_so);
186 struct mptcb *mp_tp = mpte->mpte_mptcb;
5ba3f43e
A
187 struct tseg_qent *q;
188 int dowakeup = 0;
5c9f4661 189 int flags = 0;
5ba3f43e
A
190
191 /*
192 * Present data to user, advancing rcv_nxt through
193 * completed sequence space.
194 */
0a7de745
A
195 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
196 return flags;
197 }
5ba3f43e 198 q = LIST_FIRST(&mp_tp->mpt_segq);
0a7de745
A
199 if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) {
200 return flags;
201 }
5ba3f43e
A
202
203 /*
204 * If there is already another thread doing reassembly for this
205 * connection, it is better to let it finish the job --
206 * (radar 16316196)
207 */
0a7de745
A
208 if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) {
209 return flags;
210 }
5ba3f43e
A
211
212 mp_tp->mpt_flags |= MPTCPF_REASS_INPROG;
213
214 do {
215 mp_tp->mpt_rcvnxt += q->tqe_len;
216 LIST_REMOVE(q, tqe_q);
217 if (mp_so->so_state & SS_CANTRCVMORE) {
218 m_freem(q->tqe_m);
219 } else {
5c9f4661 220 flags = !!(q->tqe_m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
0a7de745 221 if (sbappendstream_rcvdemux(mp_so, q->tqe_m, 0, 0)) {
5ba3f43e 222 dowakeup = 1;
0a7de745 223 }
5ba3f43e
A
224 }
225 zfree(tcp_reass_zone, q);
226 mp_tp->mpt_reassqlen--;
227 q = LIST_FIRST(&mp_tp->mpt_segq);
228 } while (q && q->tqe_m->m_pkthdr.mp_dsn == mp_tp->mpt_rcvnxt);
229 mp_tp->mpt_flags &= ~MPTCPF_REASS_INPROG;
230
0a7de745 231 if (dowakeup) {
5ba3f43e 232 sorwakeup(mp_so); /* done with socket lock held */
0a7de745
A
233 }
234 return flags;
5ba3f43e
A
235}
236
237static int
238mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *m)
239{
240 struct mptcb *mp_tp = mpsotomppcb(mp_so)->mpp_pcbe->mpte_mptcb;
241 u_int64_t mb_dsn = phdr->mp_dsn;
242 struct tseg_qent *q;
243 struct tseg_qent *p = NULL;
244 struct tseg_qent *nq;
245 struct tseg_qent *te = NULL;
246 u_int16_t qlimit;
247
248 /*
249 * Limit the number of segments in the reassembly queue to prevent
250 * holding on to too many segments (and thus running out of mbufs).
251 * Make sure to let the missing segment through which caused this
252 * queue. Always keep one global queue entry spare to be able to
253 * process the missing segment.
254 */
255 qlimit = min(max(100, mp_so->so_rcv.sb_hiwat >> 10),
256 (tcp_autorcvbuf_max >> 10));
257 if (mb_dsn != mp_tp->mpt_rcvnxt &&
258 (mp_tp->mpt_reassqlen + 1) >= qlimit) {
259 tcpstat.tcps_mptcp_rcvmemdrop++;
260 m_freem(m);
261 *tlenp = 0;
0a7de745 262 return 0;
5ba3f43e
A
263 }
264
265 /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
266 te = (struct tseg_qent *) zalloc(tcp_reass_zone);
267 if (te == NULL) {
268 tcpstat.tcps_mptcp_rcvmemdrop++;
269 m_freem(m);
0a7de745 270 return 0;
5ba3f43e
A
271 }
272
273 mp_tp->mpt_reassqlen++;
274
275 /*
276 * Find a segment which begins after this one does.
277 */
278 LIST_FOREACH(q, &mp_tp->mpt_segq, tqe_q) {
0a7de745 279 if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) {
5ba3f43e 280 break;
0a7de745 281 }
5ba3f43e
A
282 p = q;
283 }
284
285 /*
286 * If there is a preceding segment, it may provide some of
287 * our data already. If so, drop the data from the incoming
288 * segment. If it provides all of our data, drop us.
289 */
290 if (p != NULL) {
291 int64_t i;
292 /* conversion to int (in i) handles seq wraparound */
293 i = p->tqe_m->m_pkthdr.mp_dsn + p->tqe_len - mb_dsn;
294 if (i > 0) {
295 if (i >= *tlenp) {
296 tcpstat.tcps_mptcp_rcvduppack++;
297 m_freem(m);
298 zfree(tcp_reass_zone, te);
299 te = NULL;
300 mp_tp->mpt_reassqlen--;
301 /*
302 * Try to present any queued data
303 * at the left window edge to the user.
304 * This is needed after the 3-WHS
305 * completes.
306 */
307 goto out;
308 }
309 m_adj(m, i);
310 *tlenp -= i;
311 phdr->mp_dsn += i;
312 }
313 }
314
315 tcpstat.tcps_mp_oodata++;
316
317 /*
318 * While we overlap succeeding segments trim them or,
319 * if they are completely covered, dequeue them.
320 */
321 while (q) {
322 int64_t i = (mb_dsn + *tlenp) - q->tqe_m->m_pkthdr.mp_dsn;
0a7de745 323 if (i <= 0) {
5ba3f43e 324 break;
0a7de745 325 }
5ba3f43e
A
326
327 if (i < q->tqe_len) {
328 q->tqe_m->m_pkthdr.mp_dsn += i;
329 q->tqe_len -= i;
330 m_adj(q->tqe_m, i);
331 break;
332 }
333
334 nq = LIST_NEXT(q, tqe_q);
335 LIST_REMOVE(q, tqe_q);
336 m_freem(q->tqe_m);
337 zfree(tcp_reass_zone, q);
338 mp_tp->mpt_reassqlen--;
339 q = nq;
340 }
341
342 /* Insert the new segment queue entry into place. */
343 te->tqe_m = m;
344 te->tqe_th = NULL;
345 te->tqe_len = *tlenp;
346
347 if (p == NULL) {
348 LIST_INSERT_HEAD(&mp_tp->mpt_segq, te, tqe_q);
349 } else {
350 LIST_INSERT_AFTER(p, te, tqe_q);
351 }
352
353out:
0a7de745 354 return mptcp_reass_present(mp_so);
5ba3f43e 355}
3e170ce0 356
39236c6e
A
357/*
358 * MPTCP input, called when data has been read from a subflow socket.
359 */
360void
361mptcp_input(struct mptses *mpte, struct mbuf *m)
362{
363 struct socket *mp_so;
364 struct mptcb *mp_tp = NULL;
5ba3f43e 365 int count = 0, wakeup = 0;
fe8ab488 366 struct mbuf *save = NULL, *prev = NULL;
39236c6e
A
367 struct mbuf *freelist = NULL, *tail = NULL;
368
369 VERIFY(m->m_flags & M_PKTHDR);
370
5ba3f43e
A
371 mp_so = mptetoso(mpte);
372 mp_tp = mpte->mpte_mptcb;
39236c6e 373
cb323159
A
374 socket_lock_assert_owned(mp_so);
375
39236c6e
A
376 DTRACE_MPTCP(input);
377
5ba3f43e
A
378 mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
379
39236c6e
A
380 /*
381 * Each mbuf contains MPTCP Data Sequence Map
382 * Process the data for reassembly, delivery to MPTCP socket
383 * client, etc.
384 *
385 */
386 count = mp_so->so_rcv.sb_cc;
387
39236c6e
A
388 /*
389 * In the degraded fallback case, data is accepted without DSS map
390 */
5ba3f43e 391 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
5c9f4661
A
392 struct mbuf *iter;
393 int mb_dfin = 0;
39037602 394fallback:
5ba3f43e
A
395 mptcp_sbrcv_grow(mp_tp);
396
a39ff7e2
A
397 iter = m;
398 while (iter) {
5c9f4661
A
399 if ((iter->m_flags & M_PKTHDR) &&
400 (iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
401 mb_dfin = 1;
a39ff7e2
A
402 }
403
404 if ((iter->m_flags & M_PKTHDR) && m_pktlen(iter) == 0) {
405 /* Don't add zero-length packets, so jump it! */
406 if (prev == NULL) {
407 m = iter->m_next;
408 m_free(iter);
409 iter = m;
410 } else {
411 prev->m_next = iter->m_next;
412 m_free(iter);
413 iter = prev->m_next;
414 }
415
416 /* It was a zero-length packet so next one must be a pkthdr */
417 VERIFY(iter == NULL || iter->m_flags & M_PKTHDR);
418 } else {
419 prev = iter;
420 iter = iter->m_next;
5c9f4661
A
421 }
422 }
423
39037602
A
424 /*
425 * assume degraded flow as this may be the first packet
426 * without DSS, and the subflow state is not updated yet.
fe8ab488 427 */
0a7de745 428 if (sbappendstream_rcvdemux(mp_so, m, 0, 0)) {
39236c6e 429 sorwakeup(mp_so);
0a7de745 430 }
5c9f4661 431
39236c6e
A
432 DTRACE_MPTCP5(receive__degraded, struct mbuf *, m,
433 struct socket *, mp_so,
434 struct sockbuf *, &mp_so->so_rcv,
435 struct sockbuf *, &mp_so->so_snd,
436 struct mptses *, mpte);
437 count = mp_so->so_rcv.sb_cc - count;
5c9f4661
A
438
439 mp_tp->mpt_rcvnxt += count;
440
441 if (mb_dfin) {
442 mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
443 socantrcvmore(mp_so);
444 }
39236c6e
A
445 return;
446 }
447
39236c6e 448 do {
5ba3f43e
A
449 u_int64_t mb_dsn;
450 int32_t mb_datalen;
451 int64_t todrop;
5c9f4661 452 int mb_dfin = 0;
5ba3f43e 453
cb323159
A
454 VERIFY(m->m_flags & M_PKTHDR);
455
fe8ab488 456 /* If fallback occurs, mbufs will not have PKTF_MPTCP set */
0a7de745 457 if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
fe8ab488 458 goto fallback;
0a7de745 459 }
fe8ab488 460
39236c6e 461 save = m->m_next;
fe8ab488
A
462 /*
463 * A single TCP packet formed of multiple mbufs
464 * holds DSS mapping in the first mbuf of the chain.
465 * Other mbufs in the chain may have M_PKTHDR set
466 * even though they belong to the same TCP packet
467 * and therefore use the DSS mapping stored in the
468 * first mbuf of the mbuf chain. mptcp_input() can
469 * get an mbuf chain with multiple TCP packets.
470 */
471 while (save && (!(save->m_flags & M_PKTHDR) ||
472 !(save->m_pkthdr.pkt_flags & PKTF_MPTCP))) {
473 prev = save;
474 save = save->m_next;
475 }
0a7de745 476 if (prev) {
fe8ab488 477 prev->m_next = NULL;
0a7de745 478 } else {
fe8ab488 479 m->m_next = NULL;
0a7de745 480 }
39236c6e
A
481
482 mb_dsn = m->m_pkthdr.mp_dsn;
483 mb_datalen = m->m_pkthdr.mp_rlen;
484
5ba3f43e
A
485 todrop = (mb_dsn + mb_datalen) - (mp_tp->mpt_rcvnxt + mp_tp->mpt_rcvwnd);
486 if (todrop > 0) {
487 tcpstat.tcps_mptcp_rcvpackafterwin++;
488
cb323159
A
489 os_log_info(mptcp_log_handle, "%s - %lx: dropping dsn %u dlen %u rcvnxt %u rcvwnd %u todrop %lld\n",
490 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
491 (uint32_t)mb_dsn, mb_datalen, (uint32_t)mp_tp->mpt_rcvnxt,
492 mp_tp->mpt_rcvwnd, todrop);
493
5ba3f43e 494 if (todrop >= mb_datalen) {
0a7de745 495 if (freelist == NULL) {
5ba3f43e 496 freelist = m;
0a7de745 497 } else {
5ba3f43e 498 tail->m_next = m;
0a7de745 499 }
5ba3f43e 500
0a7de745 501 if (prev != NULL) {
5ba3f43e 502 tail = prev;
0a7de745 503 } else {
5ba3f43e 504 tail = m;
0a7de745 505 }
5ba3f43e
A
506
507 m = save;
508 prev = save = NULL;
509 continue;
510 } else {
511 m_adj(m, -todrop);
512 mb_datalen -= todrop;
cb323159 513 m->m_pkthdr.mp_rlen -= todrop;
5ba3f43e 514 }
5c9f4661
A
515
516 /*
517 * We drop from the right edge of the mbuf, thus the
518 * DATA_FIN is dropped as well
519 */
520 m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN;
39236c6e
A
521 }
522
5ba3f43e 523 if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvnxt)) {
39236c6e 524 if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen),
5ba3f43e 525 mp_tp->mpt_rcvnxt)) {
0a7de745 526 if (freelist == NULL) {
fe8ab488 527 freelist = m;
0a7de745 528 } else {
39236c6e 529 tail->m_next = m;
0a7de745 530 }
fe8ab488 531
0a7de745 532 if (prev != NULL) {
fe8ab488 533 tail = prev;
0a7de745 534 } else {
39236c6e 535 tail = m;
0a7de745 536 }
fe8ab488 537
39236c6e 538 m = save;
fe8ab488 539 prev = save = NULL;
39236c6e
A
540 continue;
541 } else {
5ba3f43e 542 m_adj(m, (mp_tp->mpt_rcvnxt - mb_dsn));
cb323159
A
543 mb_datalen -= (mp_tp->mpt_rcvnxt - mb_dsn);
544 mb_dsn = mp_tp->mpt_rcvnxt;
545 m->m_pkthdr.mp_rlen = mb_datalen;
546 m->m_pkthdr.mp_dsn = mb_dsn;
39236c6e 547 }
39236c6e
A
548 }
549
d9a64523
A
550 if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) ||
551 !LIST_EMPTY(&mp_tp->mpt_segq)) {
552 mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m);
553
554 goto next;
555 }
556 mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
557
5ba3f43e
A
558 mptcp_sbrcv_grow(mp_tp);
559
0a7de745 560 if (sbappendstream_rcvdemux(mp_so, m, 0, 0)) {
5ba3f43e 561 wakeup = 1;
0a7de745 562 }
5ba3f43e 563
39236c6e
A
564 DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so,
565 struct sockbuf *, &mp_so->so_rcv,
566 struct sockbuf *, &mp_so->so_snd,
567 struct mptses *, mpte,
568 struct mptcb *, mp_tp);
39236c6e
A
569 count = mp_so->so_rcv.sb_cc - count;
570 tcpstat.tcps_mp_rcvtotal++;
571 tcpstat.tcps_mp_rcvbytes += count;
3e170ce0 572
5ba3f43e
A
573 mp_tp->mpt_rcvnxt += count;
574
575next:
5c9f4661
A
576 if (mb_dfin) {
577 mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
578 socantrcvmore(mp_so);
579 }
39236c6e 580 m = save;
fe8ab488 581 prev = save = NULL;
39236c6e
A
582 count = mp_so->so_rcv.sb_cc;
583 } while (m);
39236c6e 584
0a7de745 585 if (freelist) {
39236c6e 586 m_freem(freelist);
0a7de745 587 }
5ba3f43e 588
0a7de745 589 if (wakeup) {
5ba3f43e 590 sorwakeup(mp_so);
0a7de745 591 }
5ba3f43e
A
592}
593
a39ff7e2
A
594boolean_t
595mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject)
5ba3f43e
A
596{
597 struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
598
599 /*
600 * Always send if there is data in the reinject-queue.
601 */
0a7de745
A
602 if (!ignore_reinject && mp_tp->mpt_mpte->mpte_reinjectq) {
603 return TRUE;
604 }
5ba3f43e
A
605
606 /*
607 * Don't send, if:
608 *
609 * 1. snd_nxt >= snd_max : Means, basically everything has been sent.
610 * Except when using TFO, we might be doing a 0-byte write.
611 * 2. snd_una + snd_wnd <= snd_nxt: No space in the receiver's window
612 * 3. snd_nxt + 1 == snd_max and we are closing: A DATA_FIN is scheduled.
613 */
614
0a7de745
A
615 if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) {
616 return FALSE;
617 }
5ba3f43e 618
0a7de745
A
619 if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) {
620 return FALSE;
621 }
5ba3f43e 622
0a7de745
A
623 if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
624 return FALSE;
625 }
5ba3f43e 626
0a7de745
A
627 if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
628 return FALSE;
629 }
5ba3f43e 630
0a7de745 631 return TRUE;
39236c6e
A
632}
633
634/*
635 * MPTCP output.
636 */
637int
638mptcp_output(struct mptses *mpte)
639{
5ba3f43e 640 struct mptcb *mp_tp;
39236c6e
A
641 struct mptsub *mpts;
642 struct mptsub *mpts_tried = NULL;
643 struct socket *mp_so;
3e170ce0 644 struct mptsub *preferred_mpts = NULL;
5ba3f43e 645 uint64_t old_snd_nxt;
39236c6e
A
646 int error = 0;
647
5ba3f43e 648 mp_so = mptetoso(mpte);
cb323159 649 socket_lock_assert_owned(mp_so);
5ba3f43e 650 mp_tp = mpte->mpte_mptcb;
39236c6e 651
5ba3f43e
A
652 VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL));
653 mpte->mpte_mppcb->mpp_flags |= MPP_WUPCALL;
654
5ba3f43e 655 old_snd_nxt = mp_tp->mpt_sndnxt;
a39ff7e2 656 while (mptcp_can_send_more(mp_tp, FALSE)) {
5ba3f43e 657 /* get the "best" subflow to be used for transmission */
cb323159 658 mpts = mptcp_get_subflow(mpte, &preferred_mpts);
5ba3f43e
A
659 if (mpts == NULL) {
660 mptcplog((LOG_INFO, "%s: no subflow\n", __func__),
661 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
662 break;
663 }
39236c6e 664
5ba3f43e
A
665 /* In case there's just one flow, we reattempt later */
666 if (mpts_tried != NULL &&
667 (mpts == mpts_tried || (mpts->mpts_flags & MPTSF_FAILINGOVER))) {
668 mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER;
669 mpts_tried->mpts_flags |= MPTSF_ACTIVE;
670 mptcp_start_timer(mpte, MPTT_REXMT);
5ba3f43e
A
671 break;
672 }
673
674 /*
675 * Automatic sizing of send socket buffer. Increase the send
676 * socket buffer size if all of the following criteria are met
677 * 1. the receiver has enough buffer space for this data
678 * 2. send buffer is filled to 7/8th with data (so we actually
679 * have data to make use of it);
680 */
681 if (tcp_do_autosendbuf == 1 &&
682 (mp_so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE &&
683 tcp_cansbgrow(&mp_so->so_snd)) {
684 if ((mp_tp->mpt_sndwnd / 4 * 5) >= mp_so->so_snd.sb_hiwat &&
685 mp_so->so_snd.sb_cc >= (mp_so->so_snd.sb_hiwat / 8 * 7)) {
686 if (sbreserve(&mp_so->so_snd,
687 min(mp_so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
688 tcp_autosndbuf_max)) == 1) {
689 mp_so->so_snd.sb_idealsize = mp_so->so_snd.sb_hiwat;
3e170ce0
A
690 }
691 }
3e170ce0 692 }
3e170ce0 693
5ba3f43e
A
694 DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts,
695 struct socket *, mp_so);
696 error = mptcp_subflow_output(mpte, mpts, 0);
697 if (error) {
698 /* can be a temporary loss of source address or other error */
699 mpts->mpts_flags |= MPTSF_FAILINGOVER;
700 mpts->mpts_flags &= ~MPTSF_ACTIVE;
701 mpts_tried = mpts;
0a7de745 702 if (error != ECANCELED) {
cb323159
A
703 os_log_error(mptcp_log_handle, "%s - %lx: Error = %d mpts_flags %#x\n",
704 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
705 error, mpts->mpts_flags);
0a7de745 706 }
5ba3f43e
A
707 break;
708 }
709 /* The model is to have only one active flow at a time */
710 mpts->mpts_flags |= MPTSF_ACTIVE;
711 mpts->mpts_probesoon = mpts->mpts_probecnt = 0;
712
713 /* Allows us to update the smoothed rtt */
714 if (mptcp_probeto && mpts != preferred_mpts && preferred_mpts != NULL) {
715 if (preferred_mpts->mpts_probesoon) {
716 if ((tcp_now - preferred_mpts->mpts_probesoon) > mptcp_probeto) {
717 mptcp_subflow_output(mpte, preferred_mpts, MPTCP_SUBOUT_PROBING);
718 if (preferred_mpts->mpts_probecnt >= mptcp_probecnt) {
719 preferred_mpts->mpts_probesoon = 0;
720 preferred_mpts->mpts_probecnt = 0;
721 }
722 }
723 } else {
724 preferred_mpts->mpts_probesoon = tcp_now;
725 preferred_mpts->mpts_probecnt = 0;
726 }
727 }
728
729 if (mpte->mpte_active_sub == NULL) {
730 mpte->mpte_active_sub = mpts;
731 } else if (mpte->mpte_active_sub != mpts) {
5ba3f43e
A
732 mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE;
733 mpte->mpte_active_sub = mpts;
734
735 mptcpstats_inc_switch(mpte, mpts);
736 }
39236c6e 737 }
5ba3f43e 738
a39ff7e2
A
739 if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
740 if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax &&
0a7de745 741 mp_tp->mpt_snduna == mp_tp->mpt_sndnxt) {
a39ff7e2 742 mptcp_finish_usrclosed(mpte);
0a7de745 743 }
a39ff7e2
A
744 }
745
5ba3f43e
A
746 mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_WUPCALL);
747
39236c6e 748 /* subflow errors should not be percolated back up */
0a7de745 749 return 0;
39236c6e
A
750}
751
5ba3f43e
A
752
753static struct mptsub *
754mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt)
755{
756 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
757
758 /*
759 * Lower RTT? Take it, if it's our first one, or
760 * it doesn't has any loss, or the current one has
761 * loss as well.
762 */
763 if (tp->t_srtt && *currtt > tp->t_srtt &&
764 (curbest == NULL || tp->t_rxtshift == 0 ||
0a7de745 765 sototcpcb(curbest->mpts_socket)->t_rxtshift)) {
5ba3f43e 766 *currtt = tp->t_srtt;
0a7de745 767 return mpts;
5ba3f43e
A
768 }
769
770 /*
771 * If we find a subflow without loss, take it always!
772 */
773 if (curbest &&
774 sototcpcb(curbest->mpts_socket)->t_rxtshift &&
775 tp->t_rxtshift == 0) {
776 *currtt = tp->t_srtt;
0a7de745 777 return mpts;
5ba3f43e
A
778 }
779
0a7de745 780 return curbest != NULL ? curbest : mpts;
5ba3f43e
A
781}
782
783static struct mptsub *
784mptcp_return_subflow(struct mptsub *mpts)
785{
0a7de745
A
786 if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) {
787 return NULL;
788 }
5ba3f43e 789
0a7de745 790 return mpts;
5ba3f43e
A
791}
792
cb323159
A
793static boolean_t
794mptcp_subflow_is_slow(struct mptses *mpte, struct mptsub *mpts)
795{
796 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
797 int fail_thresh = mptcp_fail_thresh;
798
799 if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
800 fail_thresh *= 2;
801 }
802
803 return tp->t_rxtshift >= fail_thresh &&
804 (mptetoso(mpte)->so_snd.sb_cc || mpte->mpte_reinjectq);
805}
806
39236c6e
A
807/*
808 * Return the most eligible subflow to be used for sending data.
39236c6e
A
809 */
810struct mptsub *
cb323159 811mptcp_get_subflow(struct mptses *mpte, struct mptsub **preferred)
39236c6e 812{
5ba3f43e
A
813 struct tcpcb *besttp, *secondtp;
814 struct inpcb *bestinp, *secondinp;
39236c6e 815 struct mptsub *mpts;
3e170ce0
A
816 struct mptsub *best = NULL;
817 struct mptsub *second_best = NULL;
5ba3f43e 818 int exp_rtt = INT_MAX, cheap_rtt = INT_MAX;
39236c6e 819
5ba3f43e
A
820 /*
821 * First Step:
822 * Choose the best subflow for cellular and non-cellular interfaces.
823 */
39236c6e
A
824
825 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
5ba3f43e
A
826 struct socket *so = mpts->mpts_socket;
827 struct tcpcb *tp = sototcpcb(so);
828 struct inpcb *inp = sotoinpcb(so);
829
cb323159
A
830 mptcplog((LOG_DEBUG, "%s mpts %u mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n",
831 __func__, mpts->mpts_connid, mpts->mpts_flags,
0a7de745
A
832 INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state,
833 inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1,
834 tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt,
835 mptcp_subflow_cwnd_space(so)),
836 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 837
5ba3f43e
A
838 /*
839 * First, the hard conditions to reject subflows
840 * (e.g., not connected,...)
841 */
cb323159 842 if (inp->inp_last_outifp == NULL) {
5ba3f43e 843 continue;
0a7de745 844 }
5ba3f43e 845
0a7de745 846 if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
39236c6e 847 continue;
0a7de745 848 }
39236c6e
A
849
850 /* There can only be one subflow in degraded state */
851 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
3e170ce0 852 best = mpts;
39236c6e
A
853 break;
854 }
855
fe8ab488 856 /*
5ba3f43e 857 * If this subflow is waiting to finally send, do it!
fe8ab488 858 */
0a7de745
A
859 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
860 return mptcp_return_subflow(mpts);
861 }
39236c6e 862
5ba3f43e
A
863 /*
864 * Only send if the subflow is MP_CAPABLE. The exceptions to
865 * this rule (degraded or TFO) have been taken care of above.
866 */
0a7de745 867 if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) {
39236c6e 868 continue;
0a7de745 869 }
39236c6e 870
5ba3f43e
A
871 if ((so->so_state & SS_ISDISCONNECTED) ||
872 !(so->so_state & SS_ISCONNECTED) ||
873 !TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 874 tp->t_state > TCPS_CLOSE_WAIT) {
fe8ab488 875 continue;
0a7de745 876 }
39236c6e 877
5ba3f43e
A
878 /*
879 * Second, the soft conditions to find the subflow with best
880 * conditions for each set (aka cellular vs non-cellular)
881 */
0a7de745 882 if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
5ba3f43e 883 second_best = mptcp_choose_subflow(mpts, second_best,
0a7de745
A
884 &exp_rtt);
885 } else {
5ba3f43e 886 best = mptcp_choose_subflow(mpts, best, &cheap_rtt);
0a7de745 887 }
39236c6e 888 }
3e170ce0 889
39236c6e
A
890 /*
891 * If there is no preferred or backup subflow, and there is no active
892 * subflow use the last usable subflow.
893 */
0a7de745
A
894 if (best == NULL) {
895 return mptcp_return_subflow(second_best);
896 }
39236c6e 897
0a7de745
A
898 if (second_best == NULL) {
899 return mptcp_return_subflow(best);
900 }
5ba3f43e
A
901
902 besttp = sototcpcb(best->mpts_socket);
903 bestinp = sotoinpcb(best->mpts_socket);
904 secondtp = sototcpcb(second_best->mpts_socket);
905 secondinp = sotoinpcb(second_best->mpts_socket);
3e170ce0 906
0a7de745 907 if (preferred != NULL) {
5ba3f43e 908 *preferred = mptcp_return_subflow(best);
0a7de745 909 }
3e170ce0 910
5ba3f43e
A
911 /*
912 * Second Step: Among best and second_best. Choose the one that is
913 * most appropriate for this particular service-type.
914 */
915 if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
916 /*
917 * Only handover if Symptoms tells us to do so.
918 */
d9a64523 919 if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
cb323159 920 mptcp_is_wifi_unusable_for_session(mpte) != 0 && mptcp_subflow_is_slow(mpte, best)) {
0a7de745
A
921 return mptcp_return_subflow(second_best);
922 }
5ba3f43e 923
0a7de745 924 return mptcp_return_subflow(best);
5ba3f43e
A
925 } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_INTERACTIVE) {
926 int rtt_thresh = mptcp_rtthist_rtthresh << TCP_RTT_SHIFT;
927 int rto_thresh = mptcp_rtothresh;
928
929 /* Adjust with symptoms information */
d9a64523 930 if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
cb323159 931 mptcp_is_wifi_unusable_for_session(mpte) != 0) {
5ba3f43e
A
932 rtt_thresh /= 2;
933 rto_thresh /= 2;
934 }
3e170ce0 935
5ba3f43e
A
936 if (besttp->t_srtt && secondtp->t_srtt &&
937 besttp->t_srtt >= rtt_thresh &&
938 secondtp->t_srtt < rtt_thresh) {
939 tcpstat.tcps_mp_sel_rtt++;
940 mptcplog((LOG_DEBUG, "%s: best cid %d at rtt %d, second cid %d at rtt %d\n", __func__,
941 best->mpts_connid, besttp->t_srtt >> TCP_RTT_SHIFT,
942 second_best->mpts_connid,
943 secondtp->t_srtt >> TCP_RTT_SHIFT),
944 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
0a7de745 945 return mptcp_return_subflow(second_best);
5ba3f43e 946 }
39236c6e 947
cb323159 948 if (mptcp_subflow_is_slow(mpte, best) &&
5ba3f43e 949 secondtp->t_rxtshift == 0) {
0a7de745 950 return mptcp_return_subflow(second_best);
5ba3f43e 951 }
39037602 952
5ba3f43e
A
953 /* Compare RTOs, select second_best if best's rto exceeds rtothresh */
954 if (besttp->t_rxtcur && secondtp->t_rxtcur &&
955 besttp->t_rxtcur >= rto_thresh &&
956 secondtp->t_rxtcur < rto_thresh) {
957 tcpstat.tcps_mp_sel_rto++;
958 mptcplog((LOG_DEBUG, "%s: best cid %d at rto %d, second cid %d at rto %d\n", __func__,
959 best->mpts_connid, besttp->t_rxtcur,
960 second_best->mpts_connid, secondtp->t_rxtcur),
961 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
962
0a7de745 963 return mptcp_return_subflow(second_best);
5ba3f43e 964 }
fe8ab488 965
5ba3f43e
A
966 /*
967 * None of the above conditions for sending on the secondary
968 * were true. So, let's schedule on the best one, if he still
969 * has some space in the congestion-window.
970 */
0a7de745 971 return mptcp_return_subflow(best);
cb323159 972 } else if (mpte->mpte_svctype >= MPTCP_SVCTYPE_AGGREGATE) {
5ba3f43e 973 struct mptsub *tmp;
fe8ab488 974
5ba3f43e
A
975 /*
976 * We only care about RTT when aggregating
977 */
978 if (besttp->t_srtt > secondtp->t_srtt) {
979 tmp = best;
980 best = second_best;
981 besttp = secondtp;
982 bestinp = secondinp;
983
984 second_best = tmp;
985 secondtp = sototcpcb(second_best->mpts_socket);
986 secondinp = sotoinpcb(second_best->mpts_socket);
fe8ab488
A
987 }
988
5ba3f43e 989 /* Is there still space in the congestion window? */
0a7de745
A
990 if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) {
991 return mptcp_return_subflow(second_best);
992 }
fe8ab488 993
0a7de745 994 return mptcp_return_subflow(best);
5ba3f43e
A
995 } else {
996 panic("Unknown service-type configured for MPTCP");
fe8ab488 997 }
5ba3f43e 998
0a7de745 999 return NULL;
fe8ab488
A
1000}
1001
3e170ce0
A
1002static const char *
1003mptcp_event_to_str(uint32_t event)
1004{
1005 const char *c = "UNDEFINED";
1006 switch (event) {
1007 case MPCE_CLOSE:
1008 c = "MPCE_CLOSE";
1009 break;
1010 case MPCE_RECV_DATA_ACK:
1011 c = "MPCE_RECV_DATA_ACK";
1012 break;
1013 case MPCE_RECV_DATA_FIN:
1014 c = "MPCE_RECV_DATA_FIN";
1015 break;
1016 }
0a7de745 1017 return c;
3e170ce0
A
1018}
1019
1020static const char *
1021mptcp_state_to_str(mptcp_state_t state)
1022{
0a7de745 1023 const char *c = "UNDEFINED";
3e170ce0
A
1024 switch (state) {
1025 case MPTCPS_CLOSED:
1026 c = "MPTCPS_CLOSED";
1027 break;
1028 case MPTCPS_LISTEN:
1029 c = "MPTCPS_LISTEN";
1030 break;
1031 case MPTCPS_ESTABLISHED:
1032 c = "MPTCPS_ESTABLISHED";
1033 break;
1034 case MPTCPS_CLOSE_WAIT:
1035 c = "MPTCPS_CLOSE_WAIT";
1036 break;
1037 case MPTCPS_FIN_WAIT_1:
1038 c = "MPTCPS_FIN_WAIT_1";
1039 break;
1040 case MPTCPS_CLOSING:
1041 c = "MPTCPS_CLOSING";
1042 break;
1043 case MPTCPS_LAST_ACK:
1044 c = "MPTCPS_LAST_ACK";
1045 break;
1046 case MPTCPS_FIN_WAIT_2:
1047 c = "MPTCPS_FIN_WAIT_2";
1048 break;
1049 case MPTCPS_TIME_WAIT:
1050 c = "MPTCPS_TIME_WAIT";
1051 break;
3e170ce0
A
1052 case MPTCPS_TERMINATE:
1053 c = "MPTCPS_TERMINATE";
1054 break;
1055 }
0a7de745 1056 return c;
3e170ce0
A
1057}
1058
39236c6e
A
1059void
1060mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
1061{
cb323159
A
1062 struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
1063
1064 socket_lock_assert_owned(mp_so);
1065
3e170ce0 1066 mptcp_state_t old_state = mp_tp->mpt_state;
39236c6e 1067
39037602 1068 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
39236c6e
A
1069 uint32_t, event);
1070
1071 switch (mp_tp->mpt_state) {
1072 case MPTCPS_CLOSED:
1073 case MPTCPS_LISTEN:
a39ff7e2 1074 mp_tp->mpt_state = MPTCPS_TERMINATE;
39236c6e
A
1075 break;
1076
1077 case MPTCPS_ESTABLISHED:
fe8ab488 1078 if (event == MPCE_CLOSE) {
39236c6e 1079 mp_tp->mpt_state = MPTCPS_FIN_WAIT_1;
fe8ab488 1080 mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */
39037602 1081 } else if (event == MPCE_RECV_DATA_FIN) {
fe8ab488 1082 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
39236c6e 1083 mp_tp->mpt_state = MPTCPS_CLOSE_WAIT;
39037602 1084 }
39236c6e
A
1085 break;
1086
1087 case MPTCPS_CLOSE_WAIT:
fe8ab488 1088 if (event == MPCE_CLOSE) {
39236c6e 1089 mp_tp->mpt_state = MPTCPS_LAST_ACK;
fe8ab488 1090 mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */
39037602 1091 }
39236c6e
A
1092 break;
1093
1094 case MPTCPS_FIN_WAIT_1:
39037602 1095 if (event == MPCE_RECV_DATA_ACK) {
39236c6e 1096 mp_tp->mpt_state = MPTCPS_FIN_WAIT_2;
39037602 1097 } else if (event == MPCE_RECV_DATA_FIN) {
fe8ab488 1098 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
39236c6e 1099 mp_tp->mpt_state = MPTCPS_CLOSING;
39037602 1100 }
39236c6e
A
1101 break;
1102
1103 case MPTCPS_CLOSING:
0a7de745 1104 if (event == MPCE_RECV_DATA_ACK) {
39236c6e 1105 mp_tp->mpt_state = MPTCPS_TIME_WAIT;
0a7de745 1106 }
39236c6e
A
1107 break;
1108
1109 case MPTCPS_LAST_ACK:
0a7de745 1110 if (event == MPCE_RECV_DATA_ACK) {
5ba3f43e 1111 mptcp_close(mp_tp->mpt_mpte, mp_tp);
0a7de745 1112 }
39236c6e
A
1113 break;
1114
1115 case MPTCPS_FIN_WAIT_2:
fe8ab488
A
1116 if (event == MPCE_RECV_DATA_FIN) {
1117 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
39236c6e 1118 mp_tp->mpt_state = MPTCPS_TIME_WAIT;
39037602 1119 }
39236c6e
A
1120 break;
1121
1122 case MPTCPS_TIME_WAIT:
fe8ab488 1123 case MPTCPS_TERMINATE:
39236c6e 1124 break;
5ba3f43e 1125
39236c6e
A
1126 default:
1127 VERIFY(0);
1128 /* NOTREACHED */
1129 }
39037602 1130 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
39236c6e 1131 uint32_t, event);
5ba3f43e 1132 mptcplog((LOG_INFO, "%s: %s to %s on event %s\n", __func__,
3e170ce0
A
1133 mptcp_state_to_str(old_state),
1134 mptcp_state_to_str(mp_tp->mpt_state),
1135 mptcp_event_to_str(event)),
1136 MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG);
39236c6e
A
1137}
1138
39236c6e
A
1139/* If you change this function, match up mptcp_update_rcv_state_f */
1140void
1141mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp,
1142 uint16_t csum)
1143{
1144 struct mptcb *mp_tp = tptomptp(tp);
1145 u_int64_t full_dsn = 0;
1146
1147 NTOHL(dss_info->mdss_dsn);
1148 NTOHL(dss_info->mdss_subflow_seqn);
1149 NTOHS(dss_info->mdss_data_len);
1150
1151 /* XXX for autosndbuf grow sb here */
39236c6e 1152 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn);
39236c6e
A
1153 mptcp_update_rcv_state_meat(mp_tp, tp,
1154 full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len,
1155 csum);
39236c6e
A
1156}
1157
1158void
1159mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp,
1160 u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len,
1161 uint16_t csum)
1162{
1163 if (mdss_data_len == 0) {
cb323159
A
1164 os_log_error(mptcp_log_handle, "%s - %lx: Infinite Mapping.\n",
1165 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte));
3e170ce0 1166
39236c6e 1167 if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) {
cb323159
A
1168 os_log_error(mptcp_log_handle, "%s - %lx: Bad checksum %x \n",
1169 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), csum);
39236c6e
A
1170 }
1171 mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
1172 return;
1173 }
39236c6e 1174
39236c6e 1175 mptcp_notify_mpready(tp->t_inpcb->inp_socket);
5c9f4661 1176
39236c6e
A
1177 tp->t_rcv_map.mpt_dsn = full_dsn;
1178 tp->t_rcv_map.mpt_sseq = seqn;
1179 tp->t_rcv_map.mpt_len = mdss_data_len;
1180 tp->t_rcv_map.mpt_csum = csum;
1181 tp->t_mpflags |= TMPF_EMBED_DSN;
1182}
1183
1184
3e170ce0
A
1185static int
1186mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m,
1187 int hdrlen)
1188{
5ba3f43e 1189 u_int32_t datalen;
3e170ce0 1190
0a7de745 1191 if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
3e170ce0 1192 return 0;
0a7de745 1193 }
3e170ce0 1194
3e170ce0
A
1195 datalen = m->m_pkthdr.mp_rlen;
1196
3e170ce0
A
1197 /* unacceptable DSS option, fallback to TCP */
1198 if (m->m_pkthdr.len > ((int) datalen + hdrlen)) {
cb323159
A
1199 os_log_error(mptcp_log_handle, "%s - %lx: mbuf len %d, MPTCP expected %d",
1200 __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte), m->m_pkthdr.len, datalen);
3e170ce0
A
1201 } else {
1202 return 0;
1203 }
1204 tp->t_mpflags |= TMPF_SND_MPFAIL;
1205 mptcp_notify_mpfail(so);
1206 m_freem(m);
1207 return -1;
1208}
1209
1210int
5c9f4661 1211mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
0a7de745 1212 int drop_hdrlen)
3e170ce0 1213{
5c9f4661 1214 mptcp_insert_rmap(tp, m, th);
3e170ce0 1215 if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m,
0a7de745 1216 drop_hdrlen) != 0) {
3e170ce0 1217 return -1;
0a7de745 1218 }
3e170ce0
A
1219 return 0;
1220}
1221
39236c6e
A
1222/*
1223 * MPTCP Checksum support
1224 * The checksum is calculated whenever the MPTCP DSS option is included
1225 * in the TCP packet. The checksum includes the sum of the MPTCP psuedo
1226 * header and the actual data indicated by the length specified in the
1227 * DSS option.
1228 */
1229
5ba3f43e
A
1230int
1231mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
0a7de745 1232 uint32_t sseq, uint16_t dlen, uint16_t csum, uint16_t dfin)
3e170ce0 1233{
5ba3f43e
A
1234 uint16_t mptcp_csum;
1235
5c9f4661 1236 mptcp_csum = mptcp_input_csum(tp, m, dsn, sseq, dlen, csum, dfin);
3e170ce0
A
1237 if (mptcp_csum) {
1238 tp->t_mpflags |= TMPF_SND_MPFAIL;
3e170ce0
A
1239 mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
1240 m_freem(m);
1241 tcpstat.tcps_mp_badcsum++;
0a7de745 1242 return -1;
3e170ce0 1243 }
0a7de745 1244 return 0;
3e170ce0
A
1245}
1246
1247static uint16_t
5ba3f43e 1248mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq,
0a7de745 1249 uint16_t dlen, uint16_t csum, uint16_t dfin)
39236c6e
A
1250{
1251 struct mptcb *mp_tp = tptomptp(tp);
5c9f4661 1252 uint16_t real_len = dlen - dfin;
39236c6e 1253 uint32_t sum = 0;
39236c6e 1254
0a7de745
A
1255 if (mp_tp == NULL) {
1256 return 0;
1257 }
39236c6e 1258
0a7de745
A
1259 if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) {
1260 return 0;
1261 }
39236c6e 1262
0a7de745
A
1263 if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
1264 return 0;
1265 }
39236c6e 1266
39037602 1267 /*
39236c6e
A
1268 * The remote side may send a packet with fewer bytes than the
1269 * claimed DSS checksum length.
1270 */
5c9f4661 1271 if ((int)m_length2(m, NULL) < real_len) {
0a7de745 1272 return 0xffff;
5c9f4661 1273 }
39236c6e 1274
0a7de745 1275 if (real_len != 0) {
5c9f4661 1276 sum = m_sum16(m, 0, real_len);
0a7de745 1277 }
39236c6e 1278
5ba3f43e 1279 sum += in_pseudo64(htonll(dsn), htonl(sseq), htons(dlen) + csum);
39236c6e
A
1280 ADDCARRY(sum);
1281 DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m,
1282 uint32_t, sum);
5ba3f43e
A
1283
1284 mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
3e170ce0 1285 MPTCP_RECEIVER_DBG, MPTCP_LOGLVL_VERBOSE);
0a7de745 1286 return ~sum & 0xffff;
39236c6e
A
1287}
1288
5ba3f43e
A
1289uint32_t
1290mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen)
39236c6e 1291{
a39ff7e2 1292 uint32_t sum = 0;
39236c6e 1293
0a7de745 1294 if (dlen) {
5ba3f43e 1295 sum = m_sum16(m, 0, dlen);
0a7de745 1296 }
39236c6e
A
1297
1298 dss_val = mptcp_hton64(dss_val);
5ba3f43e
A
1299 sseq = htonl(sseq);
1300 dlen = htons(dlen);
1301 sum += in_pseudo64(dss_val, sseq, dlen);
39236c6e
A
1302
1303 ADDCARRY(sum);
1304 sum = ~sum & 0xffff;
5ba3f43e
A
1305 DTRACE_MPTCP2(checksum__result, struct mbuf *, m, uint32_t, sum);
1306 mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
0a7de745 1307 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e
A
1308
1309 return sum;
3e170ce0
A
1310}
1311
1312/*
1313 * When WiFi signal starts fading, there's more loss and RTT spikes.
1314 * Check if there has been a large spike by comparing against
1315 * a tolerable RTT spike threshold.
1316 */
1317boolean_t
1318mptcp_no_rto_spike(struct socket *so)
1319{
1320 struct tcpcb *tp = intotcpcb(sotoinpcb(so));
1321 int32_t spike = 0;
1322
5ba3f43e 1323 if (tp->t_rxtcur > mptcp_rtothresh) {
3e170ce0
A
1324 spike = tp->t_rxtcur - mptcp_rtothresh;
1325
5ba3f43e
A
1326 mptcplog((LOG_DEBUG, "%s: spike = %d rto = %d best = %d cur = %d\n",
1327 __func__, spike,
3e170ce0
A
1328 tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT,
1329 tp->t_rttcur),
0a7de745 1330 (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
3e170ce0
A
1331 }
1332
0a7de745
A
1333 if (spike > 0) {
1334 return FALSE;
3e170ce0 1335 } else {
0a7de745 1336 return TRUE;
3e170ce0 1337 }
39236c6e 1338}
5ba3f43e
A
1339
1340void
1341mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag)
1342{
1343 VERIFY(mpp->mpp_flags & flag);
1344 mpp->mpp_flags &= ~flag;
1345
0a7de745 1346 if (mptcp_should_defer_upcall(mpp)) {
5ba3f43e 1347 return;
0a7de745 1348 }
5ba3f43e
A
1349
1350 if (mpp->mpp_flags & MPP_SHOULD_WORKLOOP) {
1351 mpp->mpp_flags &= ~MPP_SHOULD_WORKLOOP;
1352
1353 mptcp_subflow_workloop(mpp->mpp_pcbe);
1354 }
1355
1356 if (mpp->mpp_flags & MPP_SHOULD_RWAKEUP) {
1357 mpp->mpp_flags &= ~MPP_SHOULD_RWAKEUP;
1358
1359 sorwakeup(mpp->mpp_socket);
1360 }
1361
1362 if (mpp->mpp_flags & MPP_SHOULD_WWAKEUP) {
1363 mpp->mpp_flags &= ~MPP_SHOULD_WWAKEUP;
1364
1365 sowwakeup(mpp->mpp_socket);
1366 }
5ba3f43e
A
1367}
1368
a39ff7e2 1369void
5ba3f43e
A
1370mptcp_ask_for_nat64(struct ifnet *ifp)
1371{
1372 in6_post_msg(ifp, KEV_INET6_REQUEST_NAT64_PREFIX, NULL, NULL);
1373
a39ff7e2 1374 os_log_info(mptcp_log_handle,
0a7de745
A
1375 "%s: asked for NAT64-prefix on %s\n", __func__,
1376 ifp->if_name);
5ba3f43e
A
1377}
1378
1379static void
1380mptcp_reset_itfinfo(struct mpt_itf_info *info)
1381{
cb323159 1382 memset(info, 0, sizeof(*info));
5ba3f43e
A
1383}
1384
1385void
d9a64523 1386mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index,
0a7de745 1387 uint32_t necp_flags, __unused bool *viable)
5ba3f43e 1388{
d9a64523
A
1389 boolean_t has_v4 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4);
1390 boolean_t has_v6 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6);
1391 boolean_t has_nat64 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_NAT64);
1392 boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER);
5ba3f43e
A
1393 struct mppcb *mp = (struct mppcb *)handle;
1394 struct mptses *mpte = mptompte(mp);
1395 struct socket *mp_so;
1396 struct mptcb *mp_tp;
1397 int locked = 0;
1398 uint32_t i, ifindex;
1399
d9a64523 1400 ifindex = interface_index;
5ba3f43e
A
1401 VERIFY(ifindex != IFSCOPE_NONE);
1402
5ba3f43e 1403 /* About to be garbage-collected (see note about MPTCP/NECP interactions) */
0a7de745 1404 if (mp->mpp_socket->so_usecount == 0) {
5ba3f43e 1405 return;
0a7de745 1406 }
5ba3f43e 1407
cb323159
A
1408 mp_so = mptetoso(mpte);
1409
5ba3f43e 1410 if (action != NECP_CLIENT_CBACTION_INITIAL) {
cb323159 1411 socket_lock(mp_so, 1);
5ba3f43e
A
1412 locked = 1;
1413
1414 /* Check again, because it might have changed while waiting */
0a7de745 1415 if (mp->mpp_socket->so_usecount == 0) {
5ba3f43e 1416 goto out;
0a7de745 1417 }
5ba3f43e
A
1418 }
1419
cb323159 1420 socket_lock_assert_owned(mp_so);
a39ff7e2 1421
5ba3f43e 1422 mp_tp = mpte->mpte_mptcb;
5ba3f43e 1423
cb323159
A
1424 os_log_info(mptcp_log_handle, "%s - %lx: action: %u ifindex %u usecount %u mpt_flags %#x state %u v4 %u v6 %u nat64 %u power %u\n",
1425 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), action, ifindex,
1426 mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state,
0a7de745 1427 has_v4, has_v6, has_nat64, low_power);
5ba3f43e
A
1428
1429 /* No need on fallen back sockets */
0a7de745 1430 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
5ba3f43e 1431 goto out;
0a7de745 1432 }
5ba3f43e 1433
d9a64523
A
1434 /*
1435 * When the interface goes in low-power mode we don't want to establish
1436 * new subflows on it. Thus, mark it internally as non-viable.
1437 */
0a7de745 1438 if (low_power) {
d9a64523 1439 action = NECP_CLIENT_CBACTION_NONVIABLE;
0a7de745 1440 }
d9a64523 1441
5ba3f43e
A
1442 if (action == NECP_CLIENT_CBACTION_NONVIABLE) {
1443 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
0a7de745 1444 if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) {
d9a64523 1445 continue;
0a7de745 1446 }
d9a64523 1447
0a7de745 1448 if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
5ba3f43e 1449 mptcp_reset_itfinfo(&mpte->mpte_itfinfo[i]);
0a7de745 1450 }
5ba3f43e
A
1451 }
1452
1453 mptcp_sched_create_subflows(mpte);
1454 } else if (action == NECP_CLIENT_CBACTION_VIABLE ||
0a7de745 1455 action == NECP_CLIENT_CBACTION_INITIAL) {
a39ff7e2 1456 int found_slot = 0, slot_index = -1;
cb323159 1457 struct sockaddr *dst;
5ba3f43e
A
1458 struct ifnet *ifp;
1459
5ba3f43e
A
1460 ifnet_head_lock_shared();
1461 ifp = ifindex2ifnet[ifindex];
1462 ifnet_head_done();
1463
0a7de745 1464 if (ifp == NULL) {
5ba3f43e 1465 goto out;
0a7de745 1466 }
5ba3f43e 1467
94ff46dc
A
1468 if (IFNET_IS_COMPANION_LINK(ifp)) {
1469 goto out;
1470 }
1471
5ba3f43e 1472 if (IFNET_IS_EXPENSIVE(ifp) &&
0a7de745 1473 (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
5ba3f43e 1474 goto out;
0a7de745 1475 }
5ba3f43e 1476
cb323159
A
1477 if (IFNET_IS_CONSTRAINED(ifp) &&
1478 (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
1479 goto out;
1480 }
1481
5ba3f43e 1482 if (IFNET_IS_CELLULAR(ifp) &&
0a7de745 1483 (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
5ba3f43e 1484 goto out;
0a7de745 1485 }
5ba3f43e 1486
0a7de745 1487 if (IS_INTF_CLAT46(ifp)) {
d9a64523 1488 has_v4 = FALSE;
0a7de745 1489 }
d9a64523 1490
a39ff7e2 1491 /* Look for the slot on where to store/update the interface-info. */
5ba3f43e 1492 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
a39ff7e2 1493 /* Found a potential empty slot where we can put it */
5ba3f43e 1494 if (mpte->mpte_itfinfo[i].ifindex == 0) {
a39ff7e2
A
1495 found_slot = 1;
1496 slot_index = i;
1497 }
1498
1499 /*
1500 * The interface is already in our array. Check if we
1501 * need to update it.
1502 */
1503 if (mpte->mpte_itfinfo[i].ifindex == ifindex &&
1504 (mpte->mpte_itfinfo[i].has_v4_conn != has_v4 ||
0a7de745
A
1505 mpte->mpte_itfinfo[i].has_v6_conn != has_v6 ||
1506 mpte->mpte_itfinfo[i].has_nat64_conn != has_nat64)) {
a39ff7e2
A
1507 found_slot = 1;
1508 slot_index = i;
1509 break;
5ba3f43e
A
1510 }
1511
1512 if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
a39ff7e2
A
1513 /*
1514 * Ok, it's already there and we don't need
1515 * to update it
1516 */
5ba3f43e
A
1517 goto out;
1518 }
1519 }
1520
cb323159
A
1521 dst = mptcp_get_session_dst(mpte, has_v6, has_v4);
1522 if (dst && (dst->sa_family == AF_INET || dst->sa_family == 0) &&
1523 has_v6 && !has_nat64 && !has_v4) {
d9a64523
A
1524 if (found_slot) {
1525 mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
1526 mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
1527 mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
1528 }
5ba3f43e
A
1529 mptcp_ask_for_nat64(ifp);
1530 goto out;
1531 }
1532
a39ff7e2 1533 if (found_slot == 0) {
5ba3f43e
A
1534 int new_size = mpte->mpte_itfinfo_size * 2;
1535 struct mpt_itf_info *info = _MALLOC(sizeof(*info) * new_size, M_TEMP, M_ZERO);
1536
1537 if (info == NULL) {
cb323159
A
1538 os_log_error(mptcp_log_handle, "%s - %lx: malloc failed for %u\n",
1539 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), new_size);
5ba3f43e
A
1540 goto out;
1541 }
1542
1543 memcpy(info, mpte->mpte_itfinfo, mpte->mpte_itfinfo_size * sizeof(*info));
1544
0a7de745 1545 if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) {
5ba3f43e 1546 _FREE(mpte->mpte_itfinfo, M_TEMP);
0a7de745 1547 }
5ba3f43e
A
1548
1549 /* We allocated a new one, thus the first must be empty */
a39ff7e2 1550 slot_index = mpte->mpte_itfinfo_size;
5ba3f43e
A
1551
1552 mpte->mpte_itfinfo = info;
1553 mpte->mpte_itfinfo_size = new_size;
5ba3f43e
A
1554 }
1555
a39ff7e2
A
1556 VERIFY(slot_index >= 0 && slot_index < (int)mpte->mpte_itfinfo_size);
1557 mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
1558 mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
1559 mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
d9a64523 1560 mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
5ba3f43e
A
1561
1562 mptcp_sched_create_subflows(mpte);
1563 }
1564
1565out:
0a7de745 1566 if (locked) {
cb323159 1567 socket_unlock(mp_so, 1);
0a7de745 1568 }
5ba3f43e
A
1569}
1570
1571void
1572mptcp_set_restrictions(struct socket *mp_so)
1573{
1574 struct mptses *mpte = mpsotompte(mp_so);
1575 uint32_t i;
1576
cb323159 1577 socket_lock_assert_owned(mp_so);
5ba3f43e
A
1578
1579 ifnet_head_lock_shared();
1580
1581 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
1582 struct mpt_itf_info *info = &mpte->mpte_itfinfo[i];
1583 uint32_t ifindex = info->ifindex;
1584 struct ifnet *ifp;
1585
0a7de745 1586 if (ifindex == IFSCOPE_NONE) {
5ba3f43e 1587 continue;
0a7de745 1588 }
5ba3f43e
A
1589
1590 ifp = ifindex2ifnet[ifindex];
0a7de745 1591 if (ifp == NULL) {
d9a64523 1592 continue;
0a7de745 1593 }
5ba3f43e
A
1594
1595 if (IFNET_IS_EXPENSIVE(ifp) &&
0a7de745 1596 (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
5ba3f43e 1597 info->ifindex = IFSCOPE_NONE;
0a7de745 1598 }
5ba3f43e 1599
cb323159
A
1600 if (IFNET_IS_CONSTRAINED(ifp) &&
1601 (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
1602 info->ifindex = IFSCOPE_NONE;
1603 }
1604
5ba3f43e 1605 if (IFNET_IS_CELLULAR(ifp) &&
0a7de745 1606 (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
5ba3f43e 1607 info->ifindex = IFSCOPE_NONE;
0a7de745 1608 }
5ba3f43e
A
1609 }
1610
1611 ifnet_head_done();
1612}