]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/mptcp.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp.c
CommitLineData
39236c6e 1/*
d9a64523 2 * Copyright (c) 2012-2018 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
5ba3f43e
A
29/*
30 * A note on the MPTCP/NECP-interactions:
31 *
32 * MPTCP uses NECP-callbacks to get notified of interface/policy events.
33 * MPTCP registers to these events at the MPTCP-layer for interface-events
34 * through a call to necp_client_register_multipath_cb.
35 * To get per-flow events (aka per TCP-subflow), we register to it with
36 * necp_client_register_socket_flow. Both registrations happen by using the
37 * necp-client-uuid that comes from the app.
38 *
39 * The locking is rather tricky. In general, we expect the lock-ordering to
40 * happen from necp-fd -> necp->client -> mpp_lock.
41 *
42 * There are however some subtleties.
43 *
44 * 1. When registering the multipath_cb, we are holding the mpp_lock. This is
45 * safe, because it is the very first time this MPTCP-connection goes into NECP.
46 * As we go into NECP we take the NECP-locks and thus are guaranteed that no
47 * NECP-locks will deadlock us. Because these NECP-events will also first take
48 * the NECP-locks. Either they win the race and thus won't find our
49 * MPTCP-connection. Or, MPTCP wins the race and thus it will safely install
50 * the callbacks while holding the NECP lock.
51 *
52 * 2. When registering the subflow-callbacks we must unlock the mpp_lock. This,
53 * because we have already registered callbacks and we might race against an
54 * NECP-event that will match on our socket. So, we have to unlock to be safe.
55 *
56 * 3. When removing the multipath_cb, we do it in mp_pcbdispose(). The
57 * so_usecount has reached 0. We must be careful to not remove the mpp_socket
58 * pointers before we unregistered the callback. Because, again we might be
59 * racing against an NECP-event. Unregistering must happen with an unlocked
60 * mpp_lock, because of the lock-ordering constraint. It could be that
61 * before we had a chance to unregister an NECP-event triggers. That's why
62 * we need to check for the so_usecount in mptcp_session_necp_cb. If we get
63 * there while the socket is being garbage-collected, the use-count will go
64 * down to 0 and we exit. Removal of the multipath_cb again happens by taking
65 * the NECP-locks so any running NECP-events will finish first and exit cleanly.
66 *
67 * 4. When removing the subflow-callback, we do it in in_pcbdispose(). Again,
68 * the socket-lock must be unlocked for lock-ordering constraints. This gets a
69 * bit tricky here, as in tcp_garbage_collect we hold the mp_so and so lock.
70 * So, we drop the mp_so-lock as soon as the subflow is unlinked with
71 * mptcp_subflow_del. Then, in in_pcbdispose we drop the subflow-lock.
72 * If an NECP-event was waiting on the lock in mptcp_subflow_necp_cb, when it
73 * gets it, it will realize that the subflow became non-MPTCP and retry (see
74 * tcp_lock). Then it waits again on the subflow-lock. When we drop this lock
75 * in in_pcbdispose, and enter necp_inpcb_dispose, this one will have to wait
76 * for the NECP-lock (held by the other thread that is taking care of the NECP-
77 * event). So, the event now finally gets the subflow-lock and then hits an
78 * so_usecount that is 0 and exits. Eventually, we can remove the subflow from
79 * the NECP callback.
80 */
81
39236c6e
A
82#include <sys/param.h>
83#include <sys/systm.h>
84#include <sys/kernel.h>
85#include <sys/mbuf.h>
86#include <sys/mcache.h>
87#include <sys/socket.h>
88#include <sys/socketvar.h>
89#include <sys/syslog.h>
90#include <sys/protosw.h>
91
92#include <kern/zalloc.h>
93#include <kern/locks.h>
94
39236c6e
A
95#include <mach/sdt.h>
96
39236c6e
A
97#include <net/if.h>
98#include <netinet/in.h>
99#include <netinet/in_var.h>
100#include <netinet/tcp.h>
101#include <netinet/tcp_fsm.h>
102#include <netinet/tcp_seq.h>
103#include <netinet/tcp_var.h>
104#include <netinet/mptcp_var.h>
105#include <netinet/mptcp.h>
106#include <netinet/mptcp_seq.h>
107#include <netinet/mptcp_opt.h>
108#include <netinet/mptcp_timer.h>
109
110int mptcp_enable = 1;
111SYSCTL_INT(_net_inet_mptcp, OID_AUTO, enable, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 112 &mptcp_enable, 0, "Enable Multipath TCP Support");
39236c6e 113
cb323159
A
114/*
115 * Number of times to try negotiating MPTCP on SYN retransmissions.
116 * We haven't seen any reports of a middlebox that is dropping all SYN-segments
117 * that have an MPTCP-option. Thus, let's be generous and retransmit it 4 times.
118 */
119int mptcp_mpcap_retries = 4;
39236c6e 120SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mptcp_cap_retr,
0a7de745
A
121 CTLFLAG_RW | CTLFLAG_LOCKED,
122 &mptcp_mpcap_retries, 0, "Number of MP Capable SYN Retries");
39236c6e
A
123
124/*
125 * By default, DSS checksum is turned off, revisit if we ever do
126 * MPTCP for non SSL Traffic.
127 */
128int mptcp_dss_csum = 0;
129SYSCTL_INT(_net_inet_mptcp, OID_AUTO, dss_csum, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 130 &mptcp_dss_csum, 0, "Enable DSS checksum");
39236c6e
A
131
132/*
133 * When mptcp_fail_thresh number of retransmissions are sent, subflow failover
134 * is attempted on a different path.
135 */
136int mptcp_fail_thresh = 1;
137SYSCTL_INT(_net_inet_mptcp, OID_AUTO, fail, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 138 &mptcp_fail_thresh, 0, "Failover threshold");
39236c6e 139
39236c6e 140/*
fe8ab488
A
141 * MPTCP subflows have TCP keepalives set to ON. Set a conservative keeptime
142 * as carrier networks mostly have a 30 minute to 60 minute NAT Timeout.
143 * Some carrier networks have a timeout of 10 or 15 minutes.
39236c6e 144 */
0a7de745 145int mptcp_subflow_keeptime = 60 * 14;
39236c6e 146SYSCTL_INT(_net_inet_mptcp, OID_AUTO, keepalive, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 147 &mptcp_subflow_keeptime, 0, "Keepalive in seconds");
39236c6e 148
3e170ce0
A
149int mptcp_rtthist_rtthresh = 600;
150SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rtthist_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 151 &mptcp_rtthist_rtthresh, 0, "Rtt threshold");
3e170ce0 152
3e170ce0
A
153int mptcp_rtothresh = 1500;
154SYSCTL_INT(_net_inet_mptcp, OID_AUTO, rto_thresh, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 155 &mptcp_rtothresh, 0, "RTO threshold");
3e170ce0 156
3e170ce0
A
157/*
158 * Probe the preferred path, when it is not in use
159 */
3e170ce0
A
160uint32_t mptcp_probeto = 1000;
161SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probeto, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 162 &mptcp_probeto, 0, "Disable probing by setting to 0");
3e170ce0 163
3e170ce0
A
164uint32_t mptcp_probecnt = 5;
165SYSCTL_UINT(_net_inet_mptcp, OID_AUTO, probecnt, CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 166 &mptcp_probecnt, 0, "Number of probe writes");
3e170ce0 167
5ba3f43e
A
168static int
169mptcp_reass_present(struct socket *mp_so)
170{
cb323159
A
171 struct mptses *mpte = mpsotompte(mp_so);
172 struct mptcb *mp_tp = mpte->mpte_mptcb;
5ba3f43e
A
173 struct tseg_qent *q;
174 int dowakeup = 0;
5c9f4661 175 int flags = 0;
5ba3f43e
A
176
177 /*
178 * Present data to user, advancing rcv_nxt through
179 * completed sequence space.
180 */
0a7de745
A
181 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
182 return flags;
183 }
5ba3f43e 184 q = LIST_FIRST(&mp_tp->mpt_segq);
0a7de745
A
185 if (!q || q->tqe_m->m_pkthdr.mp_dsn != mp_tp->mpt_rcvnxt) {
186 return flags;
187 }
5ba3f43e
A
188
189 /*
190 * If there is already another thread doing reassembly for this
191 * connection, it is better to let it finish the job --
192 * (radar 16316196)
193 */
0a7de745
A
194 if (mp_tp->mpt_flags & MPTCPF_REASS_INPROG) {
195 return flags;
196 }
5ba3f43e
A
197
198 mp_tp->mpt_flags |= MPTCPF_REASS_INPROG;
199
200 do {
201 mp_tp->mpt_rcvnxt += q->tqe_len;
202 LIST_REMOVE(q, tqe_q);
203 if (mp_so->so_state & SS_CANTRCVMORE) {
204 m_freem(q->tqe_m);
205 } else {
5c9f4661 206 flags = !!(q->tqe_m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
f427ee49 207 if (sbappendstream_rcvdemux(mp_so, q->tqe_m)) {
5ba3f43e 208 dowakeup = 1;
0a7de745 209 }
5ba3f43e
A
210 }
211 zfree(tcp_reass_zone, q);
212 mp_tp->mpt_reassqlen--;
213 q = LIST_FIRST(&mp_tp->mpt_segq);
214 } while (q && q->tqe_m->m_pkthdr.mp_dsn == mp_tp->mpt_rcvnxt);
215 mp_tp->mpt_flags &= ~MPTCPF_REASS_INPROG;
216
0a7de745 217 if (dowakeup) {
5ba3f43e 218 sorwakeup(mp_so); /* done with socket lock held */
0a7de745
A
219 }
220 return flags;
5ba3f43e
A
221}
222
223static int
224mptcp_reass(struct socket *mp_so, struct pkthdr *phdr, int *tlenp, struct mbuf *m)
225{
226 struct mptcb *mp_tp = mpsotomppcb(mp_so)->mpp_pcbe->mpte_mptcb;
227 u_int64_t mb_dsn = phdr->mp_dsn;
228 struct tseg_qent *q;
229 struct tseg_qent *p = NULL;
230 struct tseg_qent *nq;
231 struct tseg_qent *te = NULL;
f427ee49 232 uint32_t qlimit;
5ba3f43e
A
233
234 /*
235 * Limit the number of segments in the reassembly queue to prevent
236 * holding on to too many segments (and thus running out of mbufs).
237 * Make sure to let the missing segment through which caused this
238 * queue. Always keep one global queue entry spare to be able to
239 * process the missing segment.
240 */
f427ee49 241 qlimit = MIN(MAX(100, mp_so->so_rcv.sb_hiwat >> 10),
5ba3f43e
A
242 (tcp_autorcvbuf_max >> 10));
243 if (mb_dsn != mp_tp->mpt_rcvnxt &&
244 (mp_tp->mpt_reassqlen + 1) >= qlimit) {
245 tcpstat.tcps_mptcp_rcvmemdrop++;
246 m_freem(m);
247 *tlenp = 0;
0a7de745 248 return 0;
5ba3f43e
A
249 }
250
251 /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
252 te = (struct tseg_qent *) zalloc(tcp_reass_zone);
253 if (te == NULL) {
254 tcpstat.tcps_mptcp_rcvmemdrop++;
255 m_freem(m);
0a7de745 256 return 0;
5ba3f43e
A
257 }
258
259 mp_tp->mpt_reassqlen++;
260
261 /*
262 * Find a segment which begins after this one does.
263 */
264 LIST_FOREACH(q, &mp_tp->mpt_segq, tqe_q) {
0a7de745 265 if (MPTCP_SEQ_GT(q->tqe_m->m_pkthdr.mp_dsn, mb_dsn)) {
5ba3f43e 266 break;
0a7de745 267 }
5ba3f43e
A
268 p = q;
269 }
270
271 /*
272 * If there is a preceding segment, it may provide some of
273 * our data already. If so, drop the data from the incoming
274 * segment. If it provides all of our data, drop us.
275 */
276 if (p != NULL) {
277 int64_t i;
278 /* conversion to int (in i) handles seq wraparound */
279 i = p->tqe_m->m_pkthdr.mp_dsn + p->tqe_len - mb_dsn;
280 if (i > 0) {
281 if (i >= *tlenp) {
282 tcpstat.tcps_mptcp_rcvduppack++;
283 m_freem(m);
284 zfree(tcp_reass_zone, te);
285 te = NULL;
286 mp_tp->mpt_reassqlen--;
287 /*
288 * Try to present any queued data
289 * at the left window edge to the user.
290 * This is needed after the 3-WHS
291 * completes.
292 */
293 goto out;
294 }
f427ee49
A
295 VERIFY(i <= INT_MAX);
296 m_adj(m, (int)i);
5ba3f43e
A
297 *tlenp -= i;
298 phdr->mp_dsn += i;
299 }
300 }
301
302 tcpstat.tcps_mp_oodata++;
303
304 /*
305 * While we overlap succeeding segments trim them or,
306 * if they are completely covered, dequeue them.
307 */
308 while (q) {
309 int64_t i = (mb_dsn + *tlenp) - q->tqe_m->m_pkthdr.mp_dsn;
0a7de745 310 if (i <= 0) {
5ba3f43e 311 break;
0a7de745 312 }
5ba3f43e
A
313
314 if (i < q->tqe_len) {
315 q->tqe_m->m_pkthdr.mp_dsn += i;
316 q->tqe_len -= i;
f427ee49
A
317
318 VERIFY(i <= INT_MAX);
319 m_adj(q->tqe_m, (int)i);
5ba3f43e
A
320 break;
321 }
322
323 nq = LIST_NEXT(q, tqe_q);
324 LIST_REMOVE(q, tqe_q);
325 m_freem(q->tqe_m);
326 zfree(tcp_reass_zone, q);
327 mp_tp->mpt_reassqlen--;
328 q = nq;
329 }
330
331 /* Insert the new segment queue entry into place. */
332 te->tqe_m = m;
333 te->tqe_th = NULL;
334 te->tqe_len = *tlenp;
335
336 if (p == NULL) {
337 LIST_INSERT_HEAD(&mp_tp->mpt_segq, te, tqe_q);
338 } else {
339 LIST_INSERT_AFTER(p, te, tqe_q);
340 }
341
342out:
0a7de745 343 return mptcp_reass_present(mp_so);
5ba3f43e 344}
3e170ce0 345
39236c6e
A
346/*
347 * MPTCP input, called when data has been read from a subflow socket.
348 */
349void
350mptcp_input(struct mptses *mpte, struct mbuf *m)
351{
352 struct socket *mp_so;
353 struct mptcb *mp_tp = NULL;
5ba3f43e 354 int count = 0, wakeup = 0;
fe8ab488 355 struct mbuf *save = NULL, *prev = NULL;
39236c6e
A
356 struct mbuf *freelist = NULL, *tail = NULL;
357
358 VERIFY(m->m_flags & M_PKTHDR);
359
5ba3f43e
A
360 mp_so = mptetoso(mpte);
361 mp_tp = mpte->mpte_mptcb;
39236c6e 362
cb323159
A
363 socket_lock_assert_owned(mp_so);
364
39236c6e
A
365 DTRACE_MPTCP(input);
366
5ba3f43e
A
367 mp_tp->mpt_rcvwnd = mptcp_sbspace(mp_tp);
368
39236c6e
A
369 /*
370 * Each mbuf contains MPTCP Data Sequence Map
371 * Process the data for reassembly, delivery to MPTCP socket
372 * client, etc.
373 *
374 */
375 count = mp_so->so_rcv.sb_cc;
376
39236c6e
A
377 /*
378 * In the degraded fallback case, data is accepted without DSS map
379 */
5ba3f43e 380 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
5c9f4661
A
381 struct mbuf *iter;
382 int mb_dfin = 0;
39037602 383fallback:
5ba3f43e
A
384 mptcp_sbrcv_grow(mp_tp);
385
a39ff7e2
A
386 iter = m;
387 while (iter) {
5c9f4661
A
388 if ((iter->m_flags & M_PKTHDR) &&
389 (iter->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN)) {
390 mb_dfin = 1;
a39ff7e2
A
391 }
392
393 if ((iter->m_flags & M_PKTHDR) && m_pktlen(iter) == 0) {
394 /* Don't add zero-length packets, so jump it! */
395 if (prev == NULL) {
396 m = iter->m_next;
397 m_free(iter);
398 iter = m;
399 } else {
400 prev->m_next = iter->m_next;
401 m_free(iter);
402 iter = prev->m_next;
403 }
404
405 /* It was a zero-length packet so next one must be a pkthdr */
406 VERIFY(iter == NULL || iter->m_flags & M_PKTHDR);
407 } else {
408 prev = iter;
409 iter = iter->m_next;
5c9f4661
A
410 }
411 }
412
39037602
A
413 /*
414 * assume degraded flow as this may be the first packet
415 * without DSS, and the subflow state is not updated yet.
fe8ab488 416 */
f427ee49 417 if (sbappendstream_rcvdemux(mp_so, m)) {
39236c6e 418 sorwakeup(mp_so);
0a7de745 419 }
5c9f4661 420
39236c6e
A
421 DTRACE_MPTCP5(receive__degraded, struct mbuf *, m,
422 struct socket *, mp_so,
423 struct sockbuf *, &mp_so->so_rcv,
424 struct sockbuf *, &mp_so->so_snd,
425 struct mptses *, mpte);
426 count = mp_so->so_rcv.sb_cc - count;
5c9f4661
A
427
428 mp_tp->mpt_rcvnxt += count;
429
430 if (mb_dfin) {
431 mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
432 socantrcvmore(mp_so);
433 }
39236c6e
A
434 return;
435 }
436
39236c6e 437 do {
5ba3f43e
A
438 u_int64_t mb_dsn;
439 int32_t mb_datalen;
440 int64_t todrop;
5c9f4661 441 int mb_dfin = 0;
5ba3f43e 442
cb323159
A
443 VERIFY(m->m_flags & M_PKTHDR);
444
fe8ab488 445 /* If fallback occurs, mbufs will not have PKTF_MPTCP set */
0a7de745 446 if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
fe8ab488 447 goto fallback;
0a7de745 448 }
fe8ab488 449
39236c6e 450 save = m->m_next;
fe8ab488
A
451 /*
452 * A single TCP packet formed of multiple mbufs
453 * holds DSS mapping in the first mbuf of the chain.
454 * Other mbufs in the chain may have M_PKTHDR set
455 * even though they belong to the same TCP packet
456 * and therefore use the DSS mapping stored in the
457 * first mbuf of the mbuf chain. mptcp_input() can
458 * get an mbuf chain with multiple TCP packets.
459 */
460 while (save && (!(save->m_flags & M_PKTHDR) ||
461 !(save->m_pkthdr.pkt_flags & PKTF_MPTCP))) {
462 prev = save;
463 save = save->m_next;
464 }
0a7de745 465 if (prev) {
fe8ab488 466 prev->m_next = NULL;
0a7de745 467 } else {
fe8ab488 468 m->m_next = NULL;
0a7de745 469 }
39236c6e
A
470
471 mb_dsn = m->m_pkthdr.mp_dsn;
472 mb_datalen = m->m_pkthdr.mp_rlen;
473
5ba3f43e
A
474 todrop = (mb_dsn + mb_datalen) - (mp_tp->mpt_rcvnxt + mp_tp->mpt_rcvwnd);
475 if (todrop > 0) {
476 tcpstat.tcps_mptcp_rcvpackafterwin++;
477
cb323159
A
478 os_log_info(mptcp_log_handle, "%s - %lx: dropping dsn %u dlen %u rcvnxt %u rcvwnd %u todrop %lld\n",
479 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
480 (uint32_t)mb_dsn, mb_datalen, (uint32_t)mp_tp->mpt_rcvnxt,
481 mp_tp->mpt_rcvwnd, todrop);
482
5ba3f43e 483 if (todrop >= mb_datalen) {
0a7de745 484 if (freelist == NULL) {
5ba3f43e 485 freelist = m;
0a7de745 486 } else {
5ba3f43e 487 tail->m_next = m;
0a7de745 488 }
5ba3f43e 489
0a7de745 490 if (prev != NULL) {
5ba3f43e 491 tail = prev;
0a7de745 492 } else {
5ba3f43e 493 tail = m;
0a7de745 494 }
5ba3f43e
A
495
496 m = save;
497 prev = save = NULL;
498 continue;
499 } else {
f427ee49
A
500 VERIFY(todrop <= INT_MAX);
501 m_adj(m, (int)-todrop);
5ba3f43e 502 mb_datalen -= todrop;
cb323159 503 m->m_pkthdr.mp_rlen -= todrop;
5ba3f43e 504 }
5c9f4661
A
505
506 /*
507 * We drop from the right edge of the mbuf, thus the
508 * DATA_FIN is dropped as well
509 */
510 m->m_pkthdr.pkt_flags &= ~PKTF_MPTCP_DFIN;
39236c6e
A
511 }
512
5ba3f43e 513 if (MPTCP_SEQ_LT(mb_dsn, mp_tp->mpt_rcvnxt)) {
39236c6e 514 if (MPTCP_SEQ_LEQ((mb_dsn + mb_datalen),
5ba3f43e 515 mp_tp->mpt_rcvnxt)) {
0a7de745 516 if (freelist == NULL) {
fe8ab488 517 freelist = m;
0a7de745 518 } else {
39236c6e 519 tail->m_next = m;
0a7de745 520 }
fe8ab488 521
0a7de745 522 if (prev != NULL) {
fe8ab488 523 tail = prev;
0a7de745 524 } else {
39236c6e 525 tail = m;
0a7de745 526 }
fe8ab488 527
39236c6e 528 m = save;
fe8ab488 529 prev = save = NULL;
39236c6e
A
530 continue;
531 } else {
f427ee49
A
532 VERIFY((mp_tp->mpt_rcvnxt - mb_dsn) <= INT_MAX);
533 m_adj(m, (int)(mp_tp->mpt_rcvnxt - mb_dsn));
cb323159
A
534 mb_datalen -= (mp_tp->mpt_rcvnxt - mb_dsn);
535 mb_dsn = mp_tp->mpt_rcvnxt;
f427ee49
A
536 VERIFY(mb_datalen >= 0 && mb_datalen <= USHRT_MAX);
537 m->m_pkthdr.mp_rlen = (uint16_t)mb_datalen;
cb323159 538 m->m_pkthdr.mp_dsn = mb_dsn;
39236c6e 539 }
39236c6e
A
540 }
541
d9a64523
A
542 if (MPTCP_SEQ_GT(mb_dsn, mp_tp->mpt_rcvnxt) ||
543 !LIST_EMPTY(&mp_tp->mpt_segq)) {
544 mb_dfin = mptcp_reass(mp_so, &m->m_pkthdr, &mb_datalen, m);
545
546 goto next;
547 }
548 mb_dfin = !!(m->m_pkthdr.pkt_flags & PKTF_MPTCP_DFIN);
549
5ba3f43e
A
550 mptcp_sbrcv_grow(mp_tp);
551
f427ee49 552 if (sbappendstream_rcvdemux(mp_so, m)) {
5ba3f43e 553 wakeup = 1;
0a7de745 554 }
5ba3f43e 555
39236c6e
A
556 DTRACE_MPTCP6(receive, struct mbuf *, m, struct socket *, mp_so,
557 struct sockbuf *, &mp_so->so_rcv,
558 struct sockbuf *, &mp_so->so_snd,
559 struct mptses *, mpte,
560 struct mptcb *, mp_tp);
39236c6e
A
561 count = mp_so->so_rcv.sb_cc - count;
562 tcpstat.tcps_mp_rcvtotal++;
563 tcpstat.tcps_mp_rcvbytes += count;
3e170ce0 564
5ba3f43e
A
565 mp_tp->mpt_rcvnxt += count;
566
567next:
5c9f4661
A
568 if (mb_dfin) {
569 mptcp_close_fsm(mp_tp, MPCE_RECV_DATA_FIN);
570 socantrcvmore(mp_so);
571 }
39236c6e 572 m = save;
fe8ab488 573 prev = save = NULL;
39236c6e
A
574 count = mp_so->so_rcv.sb_cc;
575 } while (m);
39236c6e 576
0a7de745 577 if (freelist) {
39236c6e 578 m_freem(freelist);
0a7de745 579 }
5ba3f43e 580
0a7de745 581 if (wakeup) {
5ba3f43e 582 sorwakeup(mp_so);
0a7de745 583 }
5ba3f43e
A
584}
585
a39ff7e2
A
586boolean_t
587mptcp_can_send_more(struct mptcb *mp_tp, boolean_t ignore_reinject)
5ba3f43e
A
588{
589 struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
590
591 /*
592 * Always send if there is data in the reinject-queue.
593 */
0a7de745
A
594 if (!ignore_reinject && mp_tp->mpt_mpte->mpte_reinjectq) {
595 return TRUE;
596 }
5ba3f43e
A
597
598 /*
599 * Don't send, if:
600 *
601 * 1. snd_nxt >= snd_max : Means, basically everything has been sent.
602 * Except when using TFO, we might be doing a 0-byte write.
603 * 2. snd_una + snd_wnd <= snd_nxt: No space in the receiver's window
604 * 3. snd_nxt + 1 == snd_max and we are closing: A DATA_FIN is scheduled.
605 */
606
0a7de745
A
607 if (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA) && MPTCP_SEQ_GEQ(mp_tp->mpt_sndnxt, mp_tp->mpt_sndmax)) {
608 return FALSE;
609 }
5ba3f43e 610
0a7de745
A
611 if (MPTCP_SEQ_LEQ(mp_tp->mpt_snduna + mp_tp->mpt_sndwnd, mp_tp->mpt_sndnxt)) {
612 return FALSE;
613 }
5ba3f43e 614
0a7de745
A
615 if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax && mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
616 return FALSE;
617 }
5ba3f43e 618
0a7de745
A
619 if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
620 return FALSE;
621 }
5ba3f43e 622
0a7de745 623 return TRUE;
39236c6e
A
624}
625
626/*
627 * MPTCP output.
628 */
629int
630mptcp_output(struct mptses *mpte)
631{
5ba3f43e 632 struct mptcb *mp_tp;
39236c6e
A
633 struct mptsub *mpts;
634 struct mptsub *mpts_tried = NULL;
635 struct socket *mp_so;
3e170ce0 636 struct mptsub *preferred_mpts = NULL;
5ba3f43e 637 uint64_t old_snd_nxt;
39236c6e
A
638 int error = 0;
639
5ba3f43e
A
640 mp_so = mptetoso(mpte);
641 mp_tp = mpte->mpte_mptcb;
39236c6e 642
4ba76501
A
643 socket_lock_assert_owned(mp_so);
644
645 if (mp_so->so_flags & SOF_DEFUNCT) {
646 return 0;
647 }
648
5ba3f43e
A
649 VERIFY(!(mpte->mpte_mppcb->mpp_flags & MPP_WUPCALL));
650 mpte->mpte_mppcb->mpp_flags |= MPP_WUPCALL;
651
5ba3f43e 652 old_snd_nxt = mp_tp->mpt_sndnxt;
a39ff7e2 653 while (mptcp_can_send_more(mp_tp, FALSE)) {
5ba3f43e 654 /* get the "best" subflow to be used for transmission */
cb323159 655 mpts = mptcp_get_subflow(mpte, &preferred_mpts);
5ba3f43e
A
656 if (mpts == NULL) {
657 mptcplog((LOG_INFO, "%s: no subflow\n", __func__),
658 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
659 break;
660 }
39236c6e 661
5ba3f43e
A
662 /* In case there's just one flow, we reattempt later */
663 if (mpts_tried != NULL &&
664 (mpts == mpts_tried || (mpts->mpts_flags & MPTSF_FAILINGOVER))) {
665 mpts_tried->mpts_flags &= ~MPTSF_FAILINGOVER;
666 mpts_tried->mpts_flags |= MPTSF_ACTIVE;
667 mptcp_start_timer(mpte, MPTT_REXMT);
5ba3f43e
A
668 break;
669 }
670
671 /*
672 * Automatic sizing of send socket buffer. Increase the send
673 * socket buffer size if all of the following criteria are met
674 * 1. the receiver has enough buffer space for this data
675 * 2. send buffer is filled to 7/8th with data (so we actually
676 * have data to make use of it);
677 */
f427ee49 678 if ((mp_so->so_snd.sb_flags & (SB_AUTOSIZE | SB_TRIM)) == SB_AUTOSIZE &&
5ba3f43e
A
679 tcp_cansbgrow(&mp_so->so_snd)) {
680 if ((mp_tp->mpt_sndwnd / 4 * 5) >= mp_so->so_snd.sb_hiwat &&
681 mp_so->so_snd.sb_cc >= (mp_so->so_snd.sb_hiwat / 8 * 7)) {
682 if (sbreserve(&mp_so->so_snd,
683 min(mp_so->so_snd.sb_hiwat + tcp_autosndbuf_inc,
684 tcp_autosndbuf_max)) == 1) {
685 mp_so->so_snd.sb_idealsize = mp_so->so_snd.sb_hiwat;
3e170ce0
A
686 }
687 }
3e170ce0 688 }
3e170ce0 689
5ba3f43e
A
690 DTRACE_MPTCP3(output, struct mptses *, mpte, struct mptsub *, mpts,
691 struct socket *, mp_so);
692 error = mptcp_subflow_output(mpte, mpts, 0);
693 if (error) {
694 /* can be a temporary loss of source address or other error */
695 mpts->mpts_flags |= MPTSF_FAILINGOVER;
696 mpts->mpts_flags &= ~MPTSF_ACTIVE;
697 mpts_tried = mpts;
0a7de745 698 if (error != ECANCELED) {
cb323159
A
699 os_log_error(mptcp_log_handle, "%s - %lx: Error = %d mpts_flags %#x\n",
700 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte),
701 error, mpts->mpts_flags);
0a7de745 702 }
5ba3f43e
A
703 break;
704 }
705 /* The model is to have only one active flow at a time */
706 mpts->mpts_flags |= MPTSF_ACTIVE;
707 mpts->mpts_probesoon = mpts->mpts_probecnt = 0;
708
709 /* Allows us to update the smoothed rtt */
710 if (mptcp_probeto && mpts != preferred_mpts && preferred_mpts != NULL) {
711 if (preferred_mpts->mpts_probesoon) {
712 if ((tcp_now - preferred_mpts->mpts_probesoon) > mptcp_probeto) {
713 mptcp_subflow_output(mpte, preferred_mpts, MPTCP_SUBOUT_PROBING);
714 if (preferred_mpts->mpts_probecnt >= mptcp_probecnt) {
715 preferred_mpts->mpts_probesoon = 0;
716 preferred_mpts->mpts_probecnt = 0;
717 }
718 }
719 } else {
720 preferred_mpts->mpts_probesoon = tcp_now;
721 preferred_mpts->mpts_probecnt = 0;
722 }
723 }
724
725 if (mpte->mpte_active_sub == NULL) {
726 mpte->mpte_active_sub = mpts;
727 } else if (mpte->mpte_active_sub != mpts) {
5ba3f43e
A
728 mpte->mpte_active_sub->mpts_flags &= ~MPTSF_ACTIVE;
729 mpte->mpte_active_sub = mpts;
730
731 mptcpstats_inc_switch(mpte, mpts);
732 }
39236c6e 733 }
5ba3f43e 734
a39ff7e2
A
735 if (mp_tp->mpt_state > MPTCPS_CLOSE_WAIT) {
736 if (mp_tp->mpt_sndnxt + 1 == mp_tp->mpt_sndmax &&
0a7de745 737 mp_tp->mpt_snduna == mp_tp->mpt_sndnxt) {
a39ff7e2 738 mptcp_finish_usrclosed(mpte);
0a7de745 739 }
a39ff7e2
A
740 }
741
5ba3f43e
A
742 mptcp_handle_deferred_upcalls(mpte->mpte_mppcb, MPP_WUPCALL);
743
39236c6e 744 /* subflow errors should not be percolated back up */
0a7de745 745 return 0;
39236c6e
A
746}
747
5ba3f43e
A
748
749static struct mptsub *
750mptcp_choose_subflow(struct mptsub *mpts, struct mptsub *curbest, int *currtt)
751{
752 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
753
754 /*
755 * Lower RTT? Take it, if it's our first one, or
756 * it doesn't has any loss, or the current one has
757 * loss as well.
758 */
759 if (tp->t_srtt && *currtt > tp->t_srtt &&
760 (curbest == NULL || tp->t_rxtshift == 0 ||
0a7de745 761 sototcpcb(curbest->mpts_socket)->t_rxtshift)) {
5ba3f43e 762 *currtt = tp->t_srtt;
0a7de745 763 return mpts;
5ba3f43e
A
764 }
765
766 /*
767 * If we find a subflow without loss, take it always!
768 */
769 if (curbest &&
770 sototcpcb(curbest->mpts_socket)->t_rxtshift &&
771 tp->t_rxtshift == 0) {
772 *currtt = tp->t_srtt;
0a7de745 773 return mpts;
5ba3f43e
A
774 }
775
0a7de745 776 return curbest != NULL ? curbest : mpts;
5ba3f43e
A
777}
778
779static struct mptsub *
780mptcp_return_subflow(struct mptsub *mpts)
781{
0a7de745
A
782 if (mpts && mptcp_subflow_cwnd_space(mpts->mpts_socket) <= 0) {
783 return NULL;
784 }
5ba3f43e 785
0a7de745 786 return mpts;
5ba3f43e
A
787}
788
cb323159
A
789static boolean_t
790mptcp_subflow_is_slow(struct mptses *mpte, struct mptsub *mpts)
791{
792 struct tcpcb *tp = sototcpcb(mpts->mpts_socket);
793 int fail_thresh = mptcp_fail_thresh;
794
c3c9b80d 795 if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER || mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
cb323159
A
796 fail_thresh *= 2;
797 }
798
799 return tp->t_rxtshift >= fail_thresh &&
800 (mptetoso(mpte)->so_snd.sb_cc || mpte->mpte_reinjectq);
801}
802
39236c6e
A
803/*
804 * Return the most eligible subflow to be used for sending data.
39236c6e
A
805 */
806struct mptsub *
cb323159 807mptcp_get_subflow(struct mptses *mpte, struct mptsub **preferred)
39236c6e 808{
5ba3f43e
A
809 struct tcpcb *besttp, *secondtp;
810 struct inpcb *bestinp, *secondinp;
39236c6e 811 struct mptsub *mpts;
3e170ce0
A
812 struct mptsub *best = NULL;
813 struct mptsub *second_best = NULL;
5ba3f43e 814 int exp_rtt = INT_MAX, cheap_rtt = INT_MAX;
39236c6e 815
5ba3f43e
A
816 /*
817 * First Step:
818 * Choose the best subflow for cellular and non-cellular interfaces.
819 */
39236c6e
A
820
821 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
5ba3f43e
A
822 struct socket *so = mpts->mpts_socket;
823 struct tcpcb *tp = sototcpcb(so);
824 struct inpcb *inp = sotoinpcb(so);
825
cb323159
A
826 mptcplog((LOG_DEBUG, "%s mpts %u mpts_flags %#x, suspended %u sostate %#x tpstate %u cellular %d rtt %u rxtshift %u cheap %u exp %u cwnd %d\n",
827 __func__, mpts->mpts_connid, mpts->mpts_flags,
0a7de745
A
828 INP_WAIT_FOR_IF_FEEDBACK(inp), so->so_state, tp->t_state,
829 inp->inp_last_outifp ? IFNET_IS_CELLULAR(inp->inp_last_outifp) : -1,
830 tp->t_srtt, tp->t_rxtshift, cheap_rtt, exp_rtt,
831 mptcp_subflow_cwnd_space(so)),
832 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 833
5ba3f43e
A
834 /*
835 * First, the hard conditions to reject subflows
836 * (e.g., not connected,...)
837 */
cb323159 838 if (inp->inp_last_outifp == NULL) {
5ba3f43e 839 continue;
0a7de745 840 }
5ba3f43e 841
0a7de745 842 if (INP_WAIT_FOR_IF_FEEDBACK(inp)) {
39236c6e 843 continue;
0a7de745 844 }
39236c6e
A
845
846 /* There can only be one subflow in degraded state */
847 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
3e170ce0 848 best = mpts;
39236c6e
A
849 break;
850 }
851
fe8ab488 852 /*
5ba3f43e 853 * If this subflow is waiting to finally send, do it!
fe8ab488 854 */
0a7de745
A
855 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
856 return mptcp_return_subflow(mpts);
857 }
39236c6e 858
5ba3f43e
A
859 /*
860 * Only send if the subflow is MP_CAPABLE. The exceptions to
861 * this rule (degraded or TFO) have been taken care of above.
862 */
0a7de745 863 if (!(mpts->mpts_flags & MPTSF_MP_CAPABLE)) {
39236c6e 864 continue;
0a7de745 865 }
39236c6e 866
5ba3f43e
A
867 if ((so->so_state & SS_ISDISCONNECTED) ||
868 !(so->so_state & SS_ISCONNECTED) ||
869 !TCPS_HAVEESTABLISHED(tp->t_state) ||
0a7de745 870 tp->t_state > TCPS_CLOSE_WAIT) {
fe8ab488 871 continue;
0a7de745 872 }
39236c6e 873
5ba3f43e
A
874 /*
875 * Second, the soft conditions to find the subflow with best
876 * conditions for each set (aka cellular vs non-cellular)
877 */
0a7de745 878 if (IFNET_IS_CELLULAR(inp->inp_last_outifp)) {
5ba3f43e 879 second_best = mptcp_choose_subflow(mpts, second_best,
0a7de745
A
880 &exp_rtt);
881 } else {
5ba3f43e 882 best = mptcp_choose_subflow(mpts, best, &cheap_rtt);
0a7de745 883 }
39236c6e 884 }
3e170ce0 885
39236c6e
A
886 /*
887 * If there is no preferred or backup subflow, and there is no active
888 * subflow use the last usable subflow.
889 */
0a7de745
A
890 if (best == NULL) {
891 return mptcp_return_subflow(second_best);
892 }
39236c6e 893
0a7de745
A
894 if (second_best == NULL) {
895 return mptcp_return_subflow(best);
896 }
5ba3f43e
A
897
898 besttp = sototcpcb(best->mpts_socket);
899 bestinp = sotoinpcb(best->mpts_socket);
900 secondtp = sototcpcb(second_best->mpts_socket);
901 secondinp = sotoinpcb(second_best->mpts_socket);
3e170ce0 902
0a7de745 903 if (preferred != NULL) {
5ba3f43e 904 *preferred = mptcp_return_subflow(best);
0a7de745 905 }
3e170ce0 906
5ba3f43e
A
907 /*
908 * Second Step: Among best and second_best. Choose the one that is
909 * most appropriate for this particular service-type.
910 */
c3c9b80d
A
911 if (mpte->mpte_svctype == MPTCP_SVCTYPE_PURE_HANDOVER) {
912 return mptcp_return_subflow(best);
913 } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_HANDOVER) {
5ba3f43e
A
914 /*
915 * Only handover if Symptoms tells us to do so.
916 */
d9a64523 917 if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
cb323159 918 mptcp_is_wifi_unusable_for_session(mpte) != 0 && mptcp_subflow_is_slow(mpte, best)) {
0a7de745
A
919 return mptcp_return_subflow(second_best);
920 }
5ba3f43e 921
0a7de745 922 return mptcp_return_subflow(best);
5ba3f43e
A
923 } else if (mpte->mpte_svctype == MPTCP_SVCTYPE_INTERACTIVE) {
924 int rtt_thresh = mptcp_rtthist_rtthresh << TCP_RTT_SHIFT;
925 int rto_thresh = mptcp_rtothresh;
926
927 /* Adjust with symptoms information */
d9a64523 928 if (!IFNET_IS_CELLULAR(bestinp->inp_last_outifp) &&
cb323159 929 mptcp_is_wifi_unusable_for_session(mpte) != 0) {
5ba3f43e
A
930 rtt_thresh /= 2;
931 rto_thresh /= 2;
932 }
3e170ce0 933
5ba3f43e
A
934 if (besttp->t_srtt && secondtp->t_srtt &&
935 besttp->t_srtt >= rtt_thresh &&
936 secondtp->t_srtt < rtt_thresh) {
937 tcpstat.tcps_mp_sel_rtt++;
938 mptcplog((LOG_DEBUG, "%s: best cid %d at rtt %d, second cid %d at rtt %d\n", __func__,
939 best->mpts_connid, besttp->t_srtt >> TCP_RTT_SHIFT,
940 second_best->mpts_connid,
941 secondtp->t_srtt >> TCP_RTT_SHIFT),
942 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
0a7de745 943 return mptcp_return_subflow(second_best);
5ba3f43e 944 }
39236c6e 945
cb323159 946 if (mptcp_subflow_is_slow(mpte, best) &&
5ba3f43e 947 secondtp->t_rxtshift == 0) {
0a7de745 948 return mptcp_return_subflow(second_best);
5ba3f43e 949 }
39037602 950
5ba3f43e
A
951 /* Compare RTOs, select second_best if best's rto exceeds rtothresh */
952 if (besttp->t_rxtcur && secondtp->t_rxtcur &&
953 besttp->t_rxtcur >= rto_thresh &&
954 secondtp->t_rxtcur < rto_thresh) {
955 tcpstat.tcps_mp_sel_rto++;
956 mptcplog((LOG_DEBUG, "%s: best cid %d at rto %d, second cid %d at rto %d\n", __func__,
957 best->mpts_connid, besttp->t_rxtcur,
958 second_best->mpts_connid, secondtp->t_rxtcur),
959 MPTCP_SENDER_DBG, MPTCP_LOGLVL_LOG);
960
0a7de745 961 return mptcp_return_subflow(second_best);
5ba3f43e 962 }
fe8ab488 963
5ba3f43e
A
964 /*
965 * None of the above conditions for sending on the secondary
966 * were true. So, let's schedule on the best one, if he still
967 * has some space in the congestion-window.
968 */
0a7de745 969 return mptcp_return_subflow(best);
cb323159 970 } else if (mpte->mpte_svctype >= MPTCP_SVCTYPE_AGGREGATE) {
5ba3f43e 971 struct mptsub *tmp;
fe8ab488 972
5ba3f43e
A
973 /*
974 * We only care about RTT when aggregating
975 */
976 if (besttp->t_srtt > secondtp->t_srtt) {
977 tmp = best;
978 best = second_best;
979 besttp = secondtp;
980 bestinp = secondinp;
981
982 second_best = tmp;
983 secondtp = sototcpcb(second_best->mpts_socket);
984 secondinp = sotoinpcb(second_best->mpts_socket);
fe8ab488
A
985 }
986
5ba3f43e 987 /* Is there still space in the congestion window? */
0a7de745
A
988 if (mptcp_subflow_cwnd_space(bestinp->inp_socket) <= 0) {
989 return mptcp_return_subflow(second_best);
990 }
fe8ab488 991
0a7de745 992 return mptcp_return_subflow(best);
5ba3f43e
A
993 } else {
994 panic("Unknown service-type configured for MPTCP");
fe8ab488 995 }
5ba3f43e 996
0a7de745 997 return NULL;
fe8ab488
A
998}
999
3e170ce0
A
1000static const char *
1001mptcp_event_to_str(uint32_t event)
1002{
1003 const char *c = "UNDEFINED";
1004 switch (event) {
1005 case MPCE_CLOSE:
1006 c = "MPCE_CLOSE";
1007 break;
1008 case MPCE_RECV_DATA_ACK:
1009 c = "MPCE_RECV_DATA_ACK";
1010 break;
1011 case MPCE_RECV_DATA_FIN:
1012 c = "MPCE_RECV_DATA_FIN";
1013 break;
1014 }
0a7de745 1015 return c;
3e170ce0
A
1016}
1017
1018static const char *
1019mptcp_state_to_str(mptcp_state_t state)
1020{
0a7de745 1021 const char *c = "UNDEFINED";
3e170ce0
A
1022 switch (state) {
1023 case MPTCPS_CLOSED:
1024 c = "MPTCPS_CLOSED";
1025 break;
1026 case MPTCPS_LISTEN:
1027 c = "MPTCPS_LISTEN";
1028 break;
1029 case MPTCPS_ESTABLISHED:
1030 c = "MPTCPS_ESTABLISHED";
1031 break;
1032 case MPTCPS_CLOSE_WAIT:
1033 c = "MPTCPS_CLOSE_WAIT";
1034 break;
1035 case MPTCPS_FIN_WAIT_1:
1036 c = "MPTCPS_FIN_WAIT_1";
1037 break;
1038 case MPTCPS_CLOSING:
1039 c = "MPTCPS_CLOSING";
1040 break;
1041 case MPTCPS_LAST_ACK:
1042 c = "MPTCPS_LAST_ACK";
1043 break;
1044 case MPTCPS_FIN_WAIT_2:
1045 c = "MPTCPS_FIN_WAIT_2";
1046 break;
1047 case MPTCPS_TIME_WAIT:
1048 c = "MPTCPS_TIME_WAIT";
1049 break;
3e170ce0
A
1050 case MPTCPS_TERMINATE:
1051 c = "MPTCPS_TERMINATE";
1052 break;
1053 }
0a7de745 1054 return c;
3e170ce0
A
1055}
1056
39236c6e
A
1057void
1058mptcp_close_fsm(struct mptcb *mp_tp, uint32_t event)
1059{
cb323159
A
1060 struct socket *mp_so = mptetoso(mp_tp->mpt_mpte);
1061
1062 socket_lock_assert_owned(mp_so);
1063
3e170ce0 1064 mptcp_state_t old_state = mp_tp->mpt_state;
39236c6e 1065
39037602 1066 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
39236c6e
A
1067 uint32_t, event);
1068
1069 switch (mp_tp->mpt_state) {
1070 case MPTCPS_CLOSED:
1071 case MPTCPS_LISTEN:
a39ff7e2 1072 mp_tp->mpt_state = MPTCPS_TERMINATE;
39236c6e
A
1073 break;
1074
1075 case MPTCPS_ESTABLISHED:
fe8ab488 1076 if (event == MPCE_CLOSE) {
39236c6e 1077 mp_tp->mpt_state = MPTCPS_FIN_WAIT_1;
fe8ab488 1078 mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */
39037602 1079 } else if (event == MPCE_RECV_DATA_FIN) {
fe8ab488 1080 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
39236c6e 1081 mp_tp->mpt_state = MPTCPS_CLOSE_WAIT;
39037602 1082 }
39236c6e
A
1083 break;
1084
1085 case MPTCPS_CLOSE_WAIT:
fe8ab488 1086 if (event == MPCE_CLOSE) {
39236c6e 1087 mp_tp->mpt_state = MPTCPS_LAST_ACK;
fe8ab488 1088 mp_tp->mpt_sndmax += 1; /* adjust for Data FIN */
39037602 1089 }
39236c6e
A
1090 break;
1091
1092 case MPTCPS_FIN_WAIT_1:
39037602 1093 if (event == MPCE_RECV_DATA_ACK) {
39236c6e 1094 mp_tp->mpt_state = MPTCPS_FIN_WAIT_2;
39037602 1095 } else if (event == MPCE_RECV_DATA_FIN) {
fe8ab488 1096 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
39236c6e 1097 mp_tp->mpt_state = MPTCPS_CLOSING;
39037602 1098 }
39236c6e
A
1099 break;
1100
1101 case MPTCPS_CLOSING:
0a7de745 1102 if (event == MPCE_RECV_DATA_ACK) {
39236c6e 1103 mp_tp->mpt_state = MPTCPS_TIME_WAIT;
0a7de745 1104 }
39236c6e
A
1105 break;
1106
1107 case MPTCPS_LAST_ACK:
0a7de745 1108 if (event == MPCE_RECV_DATA_ACK) {
5ba3f43e 1109 mptcp_close(mp_tp->mpt_mpte, mp_tp);
0a7de745 1110 }
39236c6e
A
1111 break;
1112
1113 case MPTCPS_FIN_WAIT_2:
fe8ab488
A
1114 if (event == MPCE_RECV_DATA_FIN) {
1115 mp_tp->mpt_rcvnxt += 1; /* adj remote data FIN */
39236c6e 1116 mp_tp->mpt_state = MPTCPS_TIME_WAIT;
39037602 1117 }
39236c6e
A
1118 break;
1119
1120 case MPTCPS_TIME_WAIT:
fe8ab488 1121 case MPTCPS_TERMINATE:
39236c6e 1122 break;
5ba3f43e 1123
39236c6e
A
1124 default:
1125 VERIFY(0);
1126 /* NOTREACHED */
1127 }
39037602 1128 DTRACE_MPTCP2(state__change, struct mptcb *, mp_tp,
39236c6e 1129 uint32_t, event);
5ba3f43e 1130 mptcplog((LOG_INFO, "%s: %s to %s on event %s\n", __func__,
3e170ce0
A
1131 mptcp_state_to_str(old_state),
1132 mptcp_state_to_str(mp_tp->mpt_state),
1133 mptcp_event_to_str(event)),
1134 MPTCP_STATE_DBG, MPTCP_LOGLVL_LOG);
39236c6e
A
1135}
1136
39236c6e
A
1137/* If you change this function, match up mptcp_update_rcv_state_f */
1138void
1139mptcp_update_dss_rcv_state(struct mptcp_dsn_opt *dss_info, struct tcpcb *tp,
1140 uint16_t csum)
1141{
1142 struct mptcb *mp_tp = tptomptp(tp);
1143 u_int64_t full_dsn = 0;
1144
1145 NTOHL(dss_info->mdss_dsn);
1146 NTOHL(dss_info->mdss_subflow_seqn);
1147 NTOHS(dss_info->mdss_data_len);
1148
1149 /* XXX for autosndbuf grow sb here */
39236c6e 1150 MPTCP_EXTEND_DSN(mp_tp->mpt_rcvnxt, dss_info->mdss_dsn, full_dsn);
39236c6e
A
1151 mptcp_update_rcv_state_meat(mp_tp, tp,
1152 full_dsn, dss_info->mdss_subflow_seqn, dss_info->mdss_data_len,
1153 csum);
39236c6e
A
1154}
1155
1156void
1157mptcp_update_rcv_state_meat(struct mptcb *mp_tp, struct tcpcb *tp,
1158 u_int64_t full_dsn, u_int32_t seqn, u_int16_t mdss_data_len,
1159 uint16_t csum)
1160{
1161 if (mdss_data_len == 0) {
cb323159
A
1162 os_log_error(mptcp_log_handle, "%s - %lx: Infinite Mapping.\n",
1163 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte));
3e170ce0 1164
39236c6e 1165 if ((mp_tp->mpt_flags & MPTCPF_CHECKSUM) && (csum != 0)) {
cb323159
A
1166 os_log_error(mptcp_log_handle, "%s - %lx: Bad checksum %x \n",
1167 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mp_tp->mpt_mpte), csum);
39236c6e
A
1168 }
1169 mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
1170 return;
1171 }
39236c6e 1172
39236c6e 1173 mptcp_notify_mpready(tp->t_inpcb->inp_socket);
5c9f4661 1174
39236c6e
A
1175 tp->t_rcv_map.mpt_dsn = full_dsn;
1176 tp->t_rcv_map.mpt_sseq = seqn;
1177 tp->t_rcv_map.mpt_len = mdss_data_len;
1178 tp->t_rcv_map.mpt_csum = csum;
1179 tp->t_mpflags |= TMPF_EMBED_DSN;
1180}
1181
1182
3e170ce0
A
1183static int
1184mptcp_validate_dss_map(struct socket *so, struct tcpcb *tp, struct mbuf *m,
1185 int hdrlen)
1186{
5ba3f43e 1187 u_int32_t datalen;
3e170ce0 1188
0a7de745 1189 if (!(m->m_pkthdr.pkt_flags & PKTF_MPTCP)) {
3e170ce0 1190 return 0;
0a7de745 1191 }
3e170ce0 1192
3e170ce0
A
1193 datalen = m->m_pkthdr.mp_rlen;
1194
3e170ce0
A
1195 /* unacceptable DSS option, fallback to TCP */
1196 if (m->m_pkthdr.len > ((int) datalen + hdrlen)) {
cb323159
A
1197 os_log_error(mptcp_log_handle, "%s - %lx: mbuf len %d, MPTCP expected %d",
1198 __func__, (unsigned long)VM_KERNEL_ADDRPERM(tptomptp(tp)->mpt_mpte), m->m_pkthdr.len, datalen);
3e170ce0
A
1199 } else {
1200 return 0;
1201 }
1202 tp->t_mpflags |= TMPF_SND_MPFAIL;
1203 mptcp_notify_mpfail(so);
1204 m_freem(m);
1205 return -1;
1206}
1207
1208int
5c9f4661 1209mptcp_input_preproc(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th,
0a7de745 1210 int drop_hdrlen)
3e170ce0 1211{
5c9f4661 1212 mptcp_insert_rmap(tp, m, th);
3e170ce0 1213 if (mptcp_validate_dss_map(tp->t_inpcb->inp_socket, tp, m,
0a7de745 1214 drop_hdrlen) != 0) {
3e170ce0 1215 return -1;
0a7de745 1216 }
3e170ce0
A
1217 return 0;
1218}
1219
3e170ce0 1220static uint16_t
5ba3f43e 1221mptcp_input_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn, uint32_t sseq,
f427ee49 1222 uint16_t dlen, uint16_t csum, int dfin)
39236c6e
A
1223{
1224 struct mptcb *mp_tp = tptomptp(tp);
f427ee49 1225 int real_len = dlen - dfin;
39236c6e 1226 uint32_t sum = 0;
39236c6e 1227
f427ee49
A
1228 VERIFY(real_len >= 0);
1229
0a7de745
A
1230 if (mp_tp == NULL) {
1231 return 0;
1232 }
39236c6e 1233
0a7de745
A
1234 if (!(mp_tp->mpt_flags & MPTCPF_CHECKSUM)) {
1235 return 0;
1236 }
39236c6e 1237
0a7de745
A
1238 if (tp->t_mpflags & TMPF_TCP_FALLBACK) {
1239 return 0;
1240 }
39236c6e 1241
39037602 1242 /*
39236c6e
A
1243 * The remote side may send a packet with fewer bytes than the
1244 * claimed DSS checksum length.
1245 */
5c9f4661 1246 if ((int)m_length2(m, NULL) < real_len) {
0a7de745 1247 return 0xffff;
5c9f4661 1248 }
39236c6e 1249
0a7de745 1250 if (real_len != 0) {
5c9f4661 1251 sum = m_sum16(m, 0, real_len);
0a7de745 1252 }
39236c6e 1253
5ba3f43e 1254 sum += in_pseudo64(htonll(dsn), htonl(sseq), htons(dlen) + csum);
39236c6e 1255 ADDCARRY(sum);
f427ee49 1256
39236c6e
A
1257 DTRACE_MPTCP3(checksum__result, struct tcpcb *, tp, struct mbuf *, m,
1258 uint32_t, sum);
5ba3f43e 1259
0a7de745 1260 return ~sum & 0xffff;
39236c6e
A
1261}
1262
f427ee49
A
1263/*
1264 * MPTCP Checksum support
1265 * The checksum is calculated whenever the MPTCP DSS option is included
1266 * in the TCP packet. The checksum includes the sum of the MPTCP psuedo
1267 * header and the actual data indicated by the length specified in the
1268 * DSS option.
1269 */
1270
1271int
1272mptcp_validate_csum(struct tcpcb *tp, struct mbuf *m, uint64_t dsn,
1273 uint32_t sseq, uint16_t dlen, uint16_t csum, int dfin)
1274{
1275 uint16_t mptcp_csum;
1276
1277 mptcp_csum = mptcp_input_csum(tp, m, dsn, sseq, dlen, csum, dfin);
1278 if (mptcp_csum) {
1279 tp->t_mpflags |= TMPF_SND_MPFAIL;
1280 mptcp_notify_mpfail(tp->t_inpcb->inp_socket);
1281 m_freem(m);
1282 tcpstat.tcps_mp_badcsum++;
1283 return -1;
1284 }
1285 return 0;
1286}
1287
1288uint16_t
5ba3f43e 1289mptcp_output_csum(struct mbuf *m, uint64_t dss_val, uint32_t sseq, uint16_t dlen)
39236c6e 1290{
a39ff7e2 1291 uint32_t sum = 0;
39236c6e 1292
0a7de745 1293 if (dlen) {
5ba3f43e 1294 sum = m_sum16(m, 0, dlen);
0a7de745 1295 }
39236c6e
A
1296
1297 dss_val = mptcp_hton64(dss_val);
5ba3f43e
A
1298 sseq = htonl(sseq);
1299 dlen = htons(dlen);
1300 sum += in_pseudo64(dss_val, sseq, dlen);
39236c6e
A
1301
1302 ADDCARRY(sum);
1303 sum = ~sum & 0xffff;
5ba3f43e
A
1304 DTRACE_MPTCP2(checksum__result, struct mbuf *, m, uint32_t, sum);
1305 mptcplog((LOG_DEBUG, "%s: sum = %x \n", __func__, sum),
0a7de745 1306 MPTCP_SENDER_DBG, MPTCP_LOGLVL_VERBOSE);
5ba3f43e 1307
f427ee49 1308 return (uint16_t)sum;
3e170ce0
A
1309}
1310
1311/*
1312 * When WiFi signal starts fading, there's more loss and RTT spikes.
1313 * Check if there has been a large spike by comparing against
1314 * a tolerable RTT spike threshold.
1315 */
1316boolean_t
1317mptcp_no_rto_spike(struct socket *so)
1318{
1319 struct tcpcb *tp = intotcpcb(sotoinpcb(so));
1320 int32_t spike = 0;
1321
5ba3f43e 1322 if (tp->t_rxtcur > mptcp_rtothresh) {
3e170ce0
A
1323 spike = tp->t_rxtcur - mptcp_rtothresh;
1324
5ba3f43e
A
1325 mptcplog((LOG_DEBUG, "%s: spike = %d rto = %d best = %d cur = %d\n",
1326 __func__, spike,
3e170ce0
A
1327 tp->t_rxtcur, tp->t_rttbest >> TCP_RTT_SHIFT,
1328 tp->t_rttcur),
0a7de745 1329 (MPTCP_SOCKET_DBG | MPTCP_SENDER_DBG), MPTCP_LOGLVL_LOG);
3e170ce0
A
1330 }
1331
0a7de745
A
1332 if (spike > 0) {
1333 return FALSE;
3e170ce0 1334 } else {
0a7de745 1335 return TRUE;
3e170ce0 1336 }
39236c6e 1337}
5ba3f43e
A
1338
1339void
1340mptcp_handle_deferred_upcalls(struct mppcb *mpp, uint32_t flag)
1341{
1342 VERIFY(mpp->mpp_flags & flag);
1343 mpp->mpp_flags &= ~flag;
1344
0a7de745 1345 if (mptcp_should_defer_upcall(mpp)) {
5ba3f43e 1346 return;
0a7de745 1347 }
5ba3f43e
A
1348
1349 if (mpp->mpp_flags & MPP_SHOULD_WORKLOOP) {
1350 mpp->mpp_flags &= ~MPP_SHOULD_WORKLOOP;
1351
1352 mptcp_subflow_workloop(mpp->mpp_pcbe);
1353 }
1354
1355 if (mpp->mpp_flags & MPP_SHOULD_RWAKEUP) {
1356 mpp->mpp_flags &= ~MPP_SHOULD_RWAKEUP;
1357
1358 sorwakeup(mpp->mpp_socket);
1359 }
1360
1361 if (mpp->mpp_flags & MPP_SHOULD_WWAKEUP) {
1362 mpp->mpp_flags &= ~MPP_SHOULD_WWAKEUP;
1363
1364 sowwakeup(mpp->mpp_socket);
1365 }
5ba3f43e
A
1366}
1367
5ba3f43e
A
1368static void
1369mptcp_reset_itfinfo(struct mpt_itf_info *info)
1370{
cb323159 1371 memset(info, 0, sizeof(*info));
5ba3f43e
A
1372}
1373
1374void
d9a64523 1375mptcp_session_necp_cb(void *handle, int action, uint32_t interface_index,
0a7de745 1376 uint32_t necp_flags, __unused bool *viable)
5ba3f43e 1377{
d9a64523
A
1378 boolean_t has_v4 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV4);
1379 boolean_t has_v6 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_IPV6);
1380 boolean_t has_nat64 = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_HAS_NAT64);
1381 boolean_t low_power = !!(necp_flags & NECP_CLIENT_RESULT_FLAG_INTERFACE_LOW_POWER);
5ba3f43e
A
1382 struct mppcb *mp = (struct mppcb *)handle;
1383 struct mptses *mpte = mptompte(mp);
1384 struct socket *mp_so;
1385 struct mptcb *mp_tp;
5ba3f43e 1386 uint32_t i, ifindex;
f427ee49
A
1387 struct ifnet *ifp;
1388 int locked = 0;
5ba3f43e 1389
d9a64523 1390 ifindex = interface_index;
5ba3f43e
A
1391 VERIFY(ifindex != IFSCOPE_NONE);
1392
5ba3f43e 1393 /* About to be garbage-collected (see note about MPTCP/NECP interactions) */
0a7de745 1394 if (mp->mpp_socket->so_usecount == 0) {
5ba3f43e 1395 return;
0a7de745 1396 }
5ba3f43e 1397
cb323159
A
1398 mp_so = mptetoso(mpte);
1399
5ba3f43e 1400 if (action != NECP_CLIENT_CBACTION_INITIAL) {
cb323159 1401 socket_lock(mp_so, 1);
5ba3f43e
A
1402 locked = 1;
1403
1404 /* Check again, because it might have changed while waiting */
0a7de745 1405 if (mp->mpp_socket->so_usecount == 0) {
5ba3f43e 1406 goto out;
0a7de745 1407 }
5ba3f43e
A
1408 }
1409
cb323159 1410 socket_lock_assert_owned(mp_so);
a39ff7e2 1411
5ba3f43e 1412 mp_tp = mpte->mpte_mptcb;
5ba3f43e 1413
f427ee49
A
1414 ifnet_head_lock_shared();
1415 ifp = ifindex2ifnet[ifindex];
1416 ifnet_head_done();
1417
1418 os_log(mptcp_log_handle, "%s - %lx: action: %u ifindex %u delegated to %u usecount %u mpt_flags %#x state %u v4 %u v6 %u nat64 %u power %u\n",
cb323159 1419 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), action, ifindex,
f427ee49 1420 ifp && ifp->if_delegated.ifp ? ifp->if_delegated.ifp->if_index : IFSCOPE_NONE,
cb323159 1421 mp->mpp_socket->so_usecount, mp_tp->mpt_flags, mp_tp->mpt_state,
0a7de745 1422 has_v4, has_v6, has_nat64, low_power);
5ba3f43e
A
1423
1424 /* No need on fallen back sockets */
0a7de745 1425 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
5ba3f43e 1426 goto out;
0a7de745 1427 }
5ba3f43e 1428
d9a64523
A
1429 /*
1430 * When the interface goes in low-power mode we don't want to establish
1431 * new subflows on it. Thus, mark it internally as non-viable.
1432 */
0a7de745 1433 if (low_power) {
d9a64523 1434 action = NECP_CLIENT_CBACTION_NONVIABLE;
0a7de745 1435 }
d9a64523 1436
5ba3f43e
A
1437 if (action == NECP_CLIENT_CBACTION_NONVIABLE) {
1438 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
0a7de745 1439 if (mpte->mpte_itfinfo[i].ifindex == IFSCOPE_NONE) {
d9a64523 1440 continue;
0a7de745 1441 }
d9a64523 1442
0a7de745 1443 if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
5ba3f43e 1444 mptcp_reset_itfinfo(&mpte->mpte_itfinfo[i]);
0a7de745 1445 }
5ba3f43e
A
1446 }
1447
1448 mptcp_sched_create_subflows(mpte);
1449 } else if (action == NECP_CLIENT_CBACTION_VIABLE ||
0a7de745 1450 action == NECP_CLIENT_CBACTION_INITIAL) {
a39ff7e2 1451 int found_slot = 0, slot_index = -1;
cb323159 1452 struct sockaddr *dst;
5ba3f43e 1453
0a7de745 1454 if (ifp == NULL) {
5ba3f43e 1455 goto out;
0a7de745 1456 }
5ba3f43e 1457
94ff46dc
A
1458 if (IFNET_IS_COMPANION_LINK(ifp)) {
1459 goto out;
1460 }
1461
5ba3f43e 1462 if (IFNET_IS_EXPENSIVE(ifp) &&
0a7de745 1463 (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
5ba3f43e 1464 goto out;
0a7de745 1465 }
5ba3f43e 1466
cb323159
A
1467 if (IFNET_IS_CONSTRAINED(ifp) &&
1468 (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
1469 goto out;
1470 }
1471
5ba3f43e 1472 if (IFNET_IS_CELLULAR(ifp) &&
0a7de745 1473 (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
5ba3f43e 1474 goto out;
0a7de745 1475 }
5ba3f43e 1476
0a7de745 1477 if (IS_INTF_CLAT46(ifp)) {
d9a64523 1478 has_v4 = FALSE;
0a7de745 1479 }
d9a64523 1480
a39ff7e2 1481 /* Look for the slot on where to store/update the interface-info. */
5ba3f43e 1482 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
a39ff7e2 1483 /* Found a potential empty slot where we can put it */
5ba3f43e 1484 if (mpte->mpte_itfinfo[i].ifindex == 0) {
a39ff7e2
A
1485 found_slot = 1;
1486 slot_index = i;
1487 }
1488
1489 /*
1490 * The interface is already in our array. Check if we
1491 * need to update it.
1492 */
1493 if (mpte->mpte_itfinfo[i].ifindex == ifindex &&
1494 (mpte->mpte_itfinfo[i].has_v4_conn != has_v4 ||
0a7de745
A
1495 mpte->mpte_itfinfo[i].has_v6_conn != has_v6 ||
1496 mpte->mpte_itfinfo[i].has_nat64_conn != has_nat64)) {
a39ff7e2
A
1497 found_slot = 1;
1498 slot_index = i;
1499 break;
5ba3f43e
A
1500 }
1501
1502 if (mpte->mpte_itfinfo[i].ifindex == ifindex) {
a39ff7e2
A
1503 /*
1504 * Ok, it's already there and we don't need
1505 * to update it
1506 */
5ba3f43e
A
1507 goto out;
1508 }
1509 }
1510
cb323159 1511 dst = mptcp_get_session_dst(mpte, has_v6, has_v4);
c3c9b80d 1512 if (dst && dst->sa_family == AF_INET &&
cb323159 1513 has_v6 && !has_nat64 && !has_v4) {
d9a64523 1514 if (found_slot) {
f427ee49 1515 mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
d9a64523
A
1516 mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
1517 mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
1518 mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
1519 }
5ba3f43e
A
1520 goto out;
1521 }
1522
a39ff7e2 1523 if (found_slot == 0) {
5ba3f43e
A
1524 int new_size = mpte->mpte_itfinfo_size * 2;
1525 struct mpt_itf_info *info = _MALLOC(sizeof(*info) * new_size, M_TEMP, M_ZERO);
1526
1527 if (info == NULL) {
cb323159
A
1528 os_log_error(mptcp_log_handle, "%s - %lx: malloc failed for %u\n",
1529 __func__, (unsigned long)VM_KERNEL_ADDRPERM(mpte), new_size);
5ba3f43e
A
1530 goto out;
1531 }
1532
1533 memcpy(info, mpte->mpte_itfinfo, mpte->mpte_itfinfo_size * sizeof(*info));
1534
0a7de745 1535 if (mpte->mpte_itfinfo_size > MPTE_ITFINFO_SIZE) {
5ba3f43e 1536 _FREE(mpte->mpte_itfinfo, M_TEMP);
0a7de745 1537 }
5ba3f43e
A
1538
1539 /* We allocated a new one, thus the first must be empty */
a39ff7e2 1540 slot_index = mpte->mpte_itfinfo_size;
5ba3f43e
A
1541
1542 mpte->mpte_itfinfo = info;
1543 mpte->mpte_itfinfo_size = new_size;
5ba3f43e
A
1544 }
1545
a39ff7e2
A
1546 VERIFY(slot_index >= 0 && slot_index < (int)mpte->mpte_itfinfo_size);
1547 mpte->mpte_itfinfo[slot_index].ifindex = ifindex;
1548 mpte->mpte_itfinfo[slot_index].has_v4_conn = has_v4;
1549 mpte->mpte_itfinfo[slot_index].has_v6_conn = has_v6;
d9a64523 1550 mpte->mpte_itfinfo[slot_index].has_nat64_conn = has_nat64;
5ba3f43e
A
1551
1552 mptcp_sched_create_subflows(mpte);
1553 }
1554
1555out:
0a7de745 1556 if (locked) {
cb323159 1557 socket_unlock(mp_so, 1);
0a7de745 1558 }
5ba3f43e
A
1559}
1560
1561void
1562mptcp_set_restrictions(struct socket *mp_so)
1563{
1564 struct mptses *mpte = mpsotompte(mp_so);
1565 uint32_t i;
1566
cb323159 1567 socket_lock_assert_owned(mp_so);
5ba3f43e
A
1568
1569 ifnet_head_lock_shared();
1570
1571 for (i = 0; i < mpte->mpte_itfinfo_size; i++) {
1572 struct mpt_itf_info *info = &mpte->mpte_itfinfo[i];
1573 uint32_t ifindex = info->ifindex;
1574 struct ifnet *ifp;
1575
0a7de745 1576 if (ifindex == IFSCOPE_NONE) {
5ba3f43e 1577 continue;
0a7de745 1578 }
5ba3f43e
A
1579
1580 ifp = ifindex2ifnet[ifindex];
0a7de745 1581 if (ifp == NULL) {
d9a64523 1582 continue;
0a7de745 1583 }
5ba3f43e
A
1584
1585 if (IFNET_IS_EXPENSIVE(ifp) &&
0a7de745 1586 (mp_so->so_restrictions & SO_RESTRICT_DENY_EXPENSIVE)) {
5ba3f43e 1587 info->ifindex = IFSCOPE_NONE;
0a7de745 1588 }
5ba3f43e 1589
cb323159
A
1590 if (IFNET_IS_CONSTRAINED(ifp) &&
1591 (mp_so->so_restrictions & SO_RESTRICT_DENY_CONSTRAINED)) {
1592 info->ifindex = IFSCOPE_NONE;
1593 }
1594
5ba3f43e 1595 if (IFNET_IS_CELLULAR(ifp) &&
0a7de745 1596 (mp_so->so_restrictions & SO_RESTRICT_DENY_CELLULAR)) {
5ba3f43e 1597 info->ifindex = IFSCOPE_NONE;
0a7de745 1598 }
5ba3f43e
A
1599 }
1600
1601 ifnet_head_done();
1602}