2 * Copyright (c) 2012-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40 #include <sys/kauth.h>
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/tcp.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/tcp_seq.h>
49 #include <netinet/tcp_var.h>
50 #include <netinet/tcp_timer.h>
51 #include <netinet/mptcp_var.h>
52 #include <netinet/mptcp_timer.h>
56 static int mptcp_usr_attach(struct socket
*, int, struct proc
*);
57 static int mptcp_usr_detach(struct socket
*);
58 static int mptcp_attach(struct socket
*, struct proc
*);
59 static int mptcp_usr_connectx(struct socket
*, struct sockaddr
*,
60 struct sockaddr
*, struct proc
*, uint32_t, sae_associd_t
,
61 sae_connid_t
*, uint32_t, void *, uint32_t, struct uio
*, user_ssize_t
*);
62 static int mptcp_getassocids(struct mptses
*, uint32_t *, user_addr_t
);
63 static int mptcp_getconnids(struct mptses
*, sae_associd_t
, uint32_t *,
65 static int mptcp_getconninfo(struct mptses
*, sae_connid_t
*, uint32_t *,
66 uint32_t *, int32_t *, user_addr_t
, socklen_t
*, user_addr_t
, socklen_t
*,
67 uint32_t *, user_addr_t
, uint32_t *);
68 static int mptcp_usr_control(struct socket
*, u_long
, caddr_t
, struct ifnet
*,
70 static int mptcp_disconnect(struct mptses
*);
71 static int mptcp_usr_disconnect(struct socket
*);
72 static int mptcp_usr_disconnectx(struct socket
*, sae_associd_t
, sae_connid_t
);
73 static struct mptses
*mptcp_usrclosed(struct mptses
*);
74 static int mptcp_usr_rcvd(struct socket
*, int);
75 static int mptcp_usr_send(struct socket
*, int, struct mbuf
*,
76 struct sockaddr
*, struct mbuf
*, struct proc
*);
77 static int mptcp_usr_shutdown(struct socket
*);
78 static int mptcp_usr_sosend(struct socket
*, struct sockaddr
*, struct uio
*,
79 struct mbuf
*, struct mbuf
*, int);
80 static int mptcp_usr_socheckopt(struct socket
*, struct sockopt
*);
81 static int mptcp_usr_preconnect(struct socket
*so
);
83 struct pr_usrreqs mptcp_usrreqs
= {
84 .pru_attach
= mptcp_usr_attach
,
85 .pru_connectx
= mptcp_usr_connectx
,
86 .pru_control
= mptcp_usr_control
,
87 .pru_detach
= mptcp_usr_detach
,
88 .pru_disconnect
= mptcp_usr_disconnect
,
89 .pru_disconnectx
= mptcp_usr_disconnectx
,
90 .pru_peeraddr
= mp_getpeeraddr
,
91 .pru_rcvd
= mptcp_usr_rcvd
,
92 .pru_send
= mptcp_usr_send
,
93 .pru_shutdown
= mptcp_usr_shutdown
,
94 .pru_sockaddr
= mp_getsockaddr
,
95 .pru_sosend
= mptcp_usr_sosend
,
96 .pru_soreceive
= soreceive
,
97 .pru_socheckopt
= mptcp_usr_socheckopt
,
98 .pru_preconnect
= mptcp_usr_preconnect
,
102 #if (DEVELOPMENT || DEBUG)
103 static int mptcp_disable_entitlements
= 0;
104 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, disable_entitlements
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
105 &mptcp_disable_entitlements
, 0, "Disable Multipath TCP Entitlement Checking");
108 int mptcp_developer_mode
= 0;
109 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, allow_aggregate
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
110 &mptcp_developer_mode
, 0, "Allow the Multipath aggregation mode");
112 static unsigned long mptcp_expected_progress_headstart
= 5000;
113 SYSCTL_ULONG(_net_inet_mptcp
, OID_AUTO
, expected_progress_headstart
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
114 &mptcp_expected_progress_headstart
, "Headstart to give MPTCP before meeting the progress deadline");
118 * Attaches an MPTCP control block to a socket.
121 mptcp_usr_attach(struct socket
*mp_so
, int proto
, struct proc
*p
)
123 #pragma unused(proto)
126 VERIFY(mpsotomppcb(mp_so
) == NULL
);
128 error
= mptcp_attach(mp_so
, p
);
133 if ((mp_so
->so_options
& SO_LINGER
) && mp_so
->so_linger
== 0) {
134 mp_so
->so_linger
= (short)(TCP_LINGERTIME
* hz
);
141 * Detaches an MPTCP control block from a socket.
144 mptcp_usr_detach(struct socket
*mp_so
)
146 struct mptses
*mpte
= mpsotompte(mp_so
);
147 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
149 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
150 os_log_error(mptcp_log_handle
, "%s - %lx: state: %d\n",
151 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
152 mpp
? mpp
->mpp_state
: -1);
157 * We are done with this MPTCP socket (it has been closed);
158 * trigger all subflows to be disconnected, if not already,
159 * by initiating the PCB detach sequence (SOF_PCBCLEARING
164 mptcp_disconnect(mpte
);
170 * Attach MPTCP protocol to socket, allocating MP control block,
171 * MPTCP session, control block, buffer space, etc.
174 mptcp_attach(struct socket
*mp_so
, struct proc
*p
)
177 struct mptses
*mpte
= NULL
;
178 struct mptcb
*mp_tp
= NULL
;
179 struct mppcb
*mpp
= NULL
;
182 if (mp_so
->so_snd
.sb_hiwat
== 0 || mp_so
->so_rcv
.sb_hiwat
== 0) {
183 error
= soreserve(mp_so
, tcp_sendspace
, tcp_recvspace
);
189 if (mp_so
->so_snd
.sb_preconn_hiwat
== 0) {
190 soreserve_preconnect(mp_so
, 2048);
193 if ((mp_so
->so_rcv
.sb_flags
& SB_USRSIZE
) == 0) {
194 mp_so
->so_rcv
.sb_flags
|= SB_AUTOSIZE
;
196 if ((mp_so
->so_snd
.sb_flags
& SB_USRSIZE
) == 0) {
197 mp_so
->so_snd
.sb_flags
|= SB_AUTOSIZE
;
201 * MPTCP send-socket buffers cannot be compressed, due to the
202 * fact that each mbuf chained via m_next is a M_PKTHDR
203 * which carries some MPTCP metadata.
205 mp_so
->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
207 if ((error
= mp_pcballoc(mp_so
, &mtcbinfo
)) != 0) {
211 mpp
= mpsotomppcb(mp_so
);
212 mpte
= (struct mptses
*)mpp
->mpp_pcbe
;
213 mp_tp
= mpte
->mpte_mptcb
;
215 VERIFY(mp_tp
!= NULL
);
221 mptcp_entitlement_check(struct socket
*mp_so
, uint8_t svctype
)
223 struct mptses
*mpte
= mpsotompte(mp_so
);
225 /* First, check for mptcp_extended without delegation */
226 if (soopt_cred_check(mp_so
, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED
, TRUE
, FALSE
) == 0) {
228 * This means the app has the extended entitlement. Thus,
229 * it's a first party app and can run without restrictions.
231 mpte
->mpte_flags
|= MPTE_FIRSTPARTY
;
235 /* Now with delegation */
236 if (mp_so
->so_flags
& SOF_DELEGATED
&&
237 soopt_cred_check(mp_so
, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED
, TRUE
, TRUE
) == 0) {
239 * This means the app has the extended entitlement. Thus,
240 * it's a first party app and can run without restrictions.
242 mpte
->mpte_flags
|= MPTE_FIRSTPARTY
;
246 if (svctype
== MPTCP_SVCTYPE_AGGREGATE
) {
247 if (mptcp_developer_mode
) {
251 os_log_error(mptcp_log_handle
, "%s - %lx: MPTCP prohibited on svc %u\n",
252 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_svctype
);
260 * Common subroutine to open a MPTCP connection to one of the remote hosts
261 * specified by dst_sl. This includes allocating and establishing a
262 * subflow TCP connection, either initially to establish MPTCP connection,
263 * or to join an existing one. Returns a connection handle upon success.
266 mptcp_connectx(struct mptses
*mpte
, struct sockaddr
*src
,
267 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
272 VERIFY(pcid
!= NULL
);
274 error
= mptcp_subflow_add(mpte
, src
, dst
, ifscope
, pcid
);
280 * User-protocol pru_connectx callback.
283 mptcp_usr_connectx(struct socket
*mp_so
, struct sockaddr
*src
,
284 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
285 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
286 uint32_t arglen
, struct uio
*auio
, user_ssize_t
*bytes_written
)
288 #pragma unused(p, aid, flags, arg, arglen)
289 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
290 struct mptses
*mpte
= NULL
;
291 struct mptcb
*mp_tp
= NULL
;
292 user_ssize_t datalen
;
295 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
296 os_log_error(mptcp_log_handle
, "%s - %lx: state %d\n",
297 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
298 mpp
? mpp
->mpp_state
: -1);
302 mpte
= mptompte(mpp
);
303 mp_tp
= mpte
->mpte_mptcb
;
305 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
306 os_log_error(mptcp_log_handle
, "%s - %lx: fell back to TCP\n",
307 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
312 if (dst
->sa_family
!= AF_INET
&& dst
->sa_family
!= AF_INET6
) {
313 error
= EAFNOSUPPORT
;
317 if (dst
->sa_family
== AF_INET
&&
318 dst
->sa_len
!= sizeof(mpte
->__mpte_dst_v4
)) {
319 os_log_error(mptcp_log_handle
, "%s - %lx: IPv4 dst len %u\n",
320 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), dst
->sa_len
);
325 if (dst
->sa_family
== AF_INET6
&&
326 dst
->sa_len
!= sizeof(mpte
->__mpte_dst_v6
)) {
327 os_log_error(mptcp_log_handle
, "%s - %lx: IPv6 dst len %u\n",
328 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), dst
->sa_len
);
333 if (!(mpte
->mpte_flags
& MPTE_SVCTYPE_CHECKED
)) {
334 if (mptcp_entitlement_check(mp_so
, mpte
->mpte_svctype
) < 0) {
339 mpte
->mpte_flags
|= MPTE_SVCTYPE_CHECKED
;
342 if ((mp_so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
)) == 0) {
343 memcpy(&mpte
->mpte_u_dst
, dst
, dst
->sa_len
);
347 if (src
->sa_family
!= AF_INET
&& src
->sa_family
!= AF_INET6
) {
348 error
= EAFNOSUPPORT
;
352 if (src
->sa_family
== AF_INET
&&
353 src
->sa_len
!= sizeof(mpte
->__mpte_src_v4
)) {
354 os_log_error(mptcp_log_handle
, "%s - %lx: IPv4 src len %u\n",
355 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), src
->sa_len
);
360 if (src
->sa_family
== AF_INET6
&&
361 src
->sa_len
!= sizeof(mpte
->__mpte_src_v6
)) {
362 os_log_error(mptcp_log_handle
, "%s - %lx: IPv6 src len %u\n",
363 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), src
->sa_len
);
368 if ((mp_so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
)) == 0) {
369 memcpy(&mpte
->mpte_u_src
, src
, src
->sa_len
);
373 error
= mptcp_connectx(mpte
, src
, dst
, ifscope
, pcid
);
375 /* If there is data, copy it */
377 datalen
= uio_resid(auio
);
378 socket_unlock(mp_so
, 0);
379 error
= mp_so
->so_proto
->pr_usrreqs
->pru_sosend(mp_so
, NULL
,
380 (uio_t
) auio
, NULL
, NULL
, 0);
382 if (error
== 0 || error
== EWOULDBLOCK
) {
383 *bytes_written
= datalen
- uio_resid(auio
);
386 if (error
== EWOULDBLOCK
) {
390 socket_lock(mp_so
, 0);
398 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
401 mptcp_getassocids(struct mptses
*mpte
, uint32_t *cnt
, user_addr_t aidp
)
403 /* MPTCP has at most 1 association */
404 *cnt
= (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) ? 1 : 0;
406 /* just asking how many there are? */
407 if (aidp
== USER_ADDR_NULL
) {
411 return copyout(&mpte
->mpte_associd
, aidp
,
412 sizeof(mpte
->mpte_associd
));
416 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
419 mptcp_getconnids(struct mptses
*mpte
, sae_associd_t aid
, uint32_t *cnt
,
425 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
426 aid
!= mpte
->mpte_associd
) {
430 *cnt
= mpte
->mpte_numflows
;
432 /* just asking how many there are? */
433 if (cidp
== USER_ADDR_NULL
) {
437 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
438 if ((error
= copyout(&mpts
->mpts_connid
, cidp
,
439 sizeof(mpts
->mpts_connid
))) != 0) {
443 cidp
+= sizeof(mpts
->mpts_connid
);
450 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
453 mptcp_getconninfo(struct mptses
*mpte
, sae_connid_t
*cid
, uint32_t *flags
,
454 uint32_t *ifindex
, int32_t *soerror
, user_addr_t src
, socklen_t
*src_len
,
455 user_addr_t dst
, socklen_t
*dst_len
, uint32_t *aux_type
,
456 user_addr_t aux_data
, uint32_t *aux_len
)
463 /* MPTCP-level global stats */
464 if (*cid
== SAE_CONNID_ALL
) {
465 struct socket
*mp_so
= mptetoso(mpte
);
466 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
467 struct conninfo_multipathtcp mptcp_ci
;
470 if (*aux_len
!= 0 && *aux_len
!= sizeof(mptcp_ci
)) {
474 if (mp_so
->so_state
& SS_ISCONNECTING
) {
475 *flags
|= CIF_CONNECTING
;
477 if (mp_so
->so_state
& SS_ISCONNECTED
) {
478 *flags
|= CIF_CONNECTED
;
480 if (mp_so
->so_state
& SS_ISDISCONNECTING
) {
481 *flags
|= CIF_DISCONNECTING
;
483 if (mp_so
->so_state
& SS_ISDISCONNECTED
) {
484 *flags
|= CIF_DISCONNECTED
;
486 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
)) {
487 *flags
|= CIF_MP_CAPABLE
;
489 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
490 *flags
|= CIF_MP_DEGRADED
;
496 *aux_type
= CIAUX_MPTCP
;
497 *aux_len
= sizeof(mptcp_ci
);
499 if (aux_data
!= USER_ADDR_NULL
) {
500 const struct mptsub
*mpts
;
501 int initial_info_set
= 0;
504 bzero(&mptcp_ci
, sizeof(mptcp_ci
));
505 mptcp_ci
.mptcpci_subflow_count
= mpte
->mpte_numflows
;
506 mptcp_ci
.mptcpci_switch_count
= mpte
->mpte_subflow_switches
;
508 VERIFY(sizeof(mptcp_ci
.mptcpci_itfstats
) == sizeof(mpte
->mpte_itfstats
));
509 memcpy(mptcp_ci
.mptcpci_itfstats
, mpte
->mpte_itfstats
, sizeof(mptcp_ci
.mptcpci_itfstats
));
511 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
512 if (i
>= sizeof(mptcp_ci
.mptcpci_subflow_connids
) / sizeof(sae_connid_t
)) {
515 mptcp_ci
.mptcpci_subflow_connids
[i
] = mpts
->mpts_connid
;
517 if (mpts
->mpts_flags
& MPTSF_INITIAL_SUB
) {
518 const struct inpcb
*inp
;
520 inp
= sotoinpcb(mpts
->mpts_socket
);
522 mptcp_ci
.mptcpci_init_rxbytes
= inp
->inp_stat
->rxbytes
;
523 mptcp_ci
.mptcpci_init_txbytes
= inp
->inp_stat
->txbytes
;
524 initial_info_set
= 1;
527 mptcpstats_update(mptcp_ci
.mptcpci_itfstats
, mpts
);
532 if (initial_info_set
== 0) {
533 mptcp_ci
.mptcpci_init_rxbytes
= mpte
->mpte_init_rxbytes
;
534 mptcp_ci
.mptcpci_init_txbytes
= mpte
->mpte_init_txbytes
;
537 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
538 mptcp_ci
.mptcpci_flags
|= MPTCPCI_FIRSTPARTY
;
541 error
= copyout(&mptcp_ci
, aux_data
, sizeof(mptcp_ci
));
543 os_log_error(mptcp_log_handle
, "%s - %lx: copyout failed: %d\n",
544 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
552 /* Any stats of any subflow */
553 if (*cid
== SAE_CONNID_ANY
) {
554 const struct mptsub
*mpts
;
556 const struct inpcb
*inp
;
559 mpts
= TAILQ_FIRST(&mpte
->mpte_subflows
);
564 so
= mpts
->mpts_socket
;
567 if (inp
->inp_vflag
& INP_IPV4
) {
568 error
= in_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
569 soerror
, src
, src_len
, dst
, dst_len
,
570 aux_type
, aux_data
, aux_len
);
572 error
= in6_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
573 soerror
, src
, src_len
, dst
, dst_len
,
574 aux_type
, aux_data
, aux_len
);
578 os_log_error(mptcp_log_handle
, "%s - %lx:error from in_getconninfo %d\n",
579 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
583 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) {
584 *flags
|= CIF_MP_CAPABLE
;
586 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
587 *flags
|= CIF_MP_DEGRADED
;
589 if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
590 *flags
|= CIF_MP_READY
;
592 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
593 *flags
|= CIF_MP_ACTIVE
;
598 /* Per-interface stats */
599 const struct mptsub
*mpts
, *orig_mpts
= NULL
;
600 struct conninfo_tcp tcp_ci
;
601 const struct inpcb
*inp
;
606 /* cid is thus an ifindex - range-check first! */
607 if (*cid
> USHRT_MAX
) {
611 bzero(&tcp_ci
, sizeof(tcp_ci
));
613 /* First, get a subflow to fill in the "regular" info. */
614 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
615 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
617 if (ifp
&& ifp
->if_index
== *cid
) {
623 /* No subflow there - well, let's just get the basic itf-info */
627 so
= mpts
->mpts_socket
;
630 /* Give it USER_ADDR_NULL, because we are doing this on our own */
631 if (inp
->inp_vflag
& INP_IPV4
) {
632 error
= in_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
633 soerror
, src
, src_len
, dst
, dst_len
,
634 aux_type
, USER_ADDR_NULL
, aux_len
);
636 error
= in6_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
637 soerror
, src
, src_len
, dst
, dst_len
,
638 aux_type
, USER_ADDR_NULL
, aux_len
);
642 os_log_error(mptcp_log_handle
, "%s - %lx:error from in_getconninfo %d\n",
643 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
647 /* ToDo: Nobody is reading these flags on subflows. Why bother ? */
648 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) {
649 *flags
|= CIF_MP_CAPABLE
;
651 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
652 *flags
|= CIF_MP_DEGRADED
;
654 if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
655 *flags
|= CIF_MP_READY
;
657 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
658 *flags
|= CIF_MP_ACTIVE
;
662 * Now, we gather the metrics (aka., tcp_info) and roll them in
663 * across all subflows of this interface to build an aggregated
666 * We take the TCP_INFO from the first subflow as the "master",
667 * feeding into those fields that we do not roll.
669 if (aux_data
!= USER_ADDR_NULL
) {
670 tcp_getconninfo(so
, &tcp_ci
);
673 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
674 const struct inpcb
*mptsinp
= sotoinpcb(mpts
->mpts_socket
);
675 const struct ifnet
*ifp
;
677 ifp
= mptsinp
->inp_last_outifp
;
679 if (ifp
== NULL
|| ifp
->if_index
!= *cid
|| mpts
== orig_mpts
) {
683 /* Roll the itf-stats into the tcp_info */
684 tcp_ci
.tcpci_tcp_info
.tcpi_txbytes
+=
685 mptsinp
->inp_stat
->txbytes
;
686 tcp_ci
.tcpci_tcp_info
.tcpi_rxbytes
+=
687 mptsinp
->inp_stat
->rxbytes
;
689 tcp_ci
.tcpci_tcp_info
.tcpi_wifi_txbytes
+=
690 mptsinp
->inp_wstat
->txbytes
;
691 tcp_ci
.tcpci_tcp_info
.tcpi_wifi_rxbytes
+=
692 mptsinp
->inp_wstat
->rxbytes
;
694 tcp_ci
.tcpci_tcp_info
.tcpi_wired_txbytes
+=
695 mptsinp
->inp_Wstat
->txbytes
;
696 tcp_ci
.tcpci_tcp_info
.tcpi_wired_rxbytes
+=
697 mptsinp
->inp_Wstat
->rxbytes
;
699 tcp_ci
.tcpci_tcp_info
.tcpi_cell_txbytes
+=
700 mptsinp
->inp_cstat
->txbytes
;
701 tcp_ci
.tcpci_tcp_info
.tcpi_cell_rxbytes
+=
702 mptsinp
->inp_cstat
->rxbytes
;
707 *aux_type
= CIAUX_TCP
;
709 *aux_len
= sizeof(tcp_ci
);
710 } else if (aux_data
!= USER_ADDR_NULL
) {
714 * Finally, old subflows might have been closed - we
715 * want this data as well, so grab it from the interface
718 create
= orig_mpts
!= NULL
;
721 * When we found a subflow, we are willing to create a stats-index
722 * because we have some data to return. If there isn't a subflow,
723 * nor anything in the stats, return EINVAL. Because the
724 * ifindex belongs to something that doesn't exist.
726 index
= mptcpstats_get_index_by_ifindex(mpte
->mpte_itfstats
, (u_short
)(*cid
), false);
728 os_log_error(mptcp_log_handle
,
729 "%s - %lx: Asking for too many ifindex: %u subcount %u, mpts? %s\n",
730 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
731 *cid
, mpte
->mpte_numflows
,
732 orig_mpts
? "yes" : "no");
734 if (orig_mpts
== NULL
) {
738 struct mptcp_itf_stats
*stats
;
740 stats
= &mpte
->mpte_itfstats
[index
];
742 /* Roll the itf-stats into the tcp_info */
743 tcp_ci
.tcpci_tcp_info
.tcpi_last_outif
= *cid
;
744 tcp_ci
.tcpci_tcp_info
.tcpi_txbytes
+=
746 tcp_ci
.tcpci_tcp_info
.tcpi_rxbytes
+=
749 tcp_ci
.tcpci_tcp_info
.tcpi_wifi_txbytes
+=
750 stats
->mpis_wifi_txbytes
;
751 tcp_ci
.tcpci_tcp_info
.tcpi_wifi_rxbytes
+=
752 stats
->mpis_wifi_rxbytes
;
754 tcp_ci
.tcpci_tcp_info
.tcpi_wired_txbytes
+=
755 stats
->mpis_wired_txbytes
;
756 tcp_ci
.tcpci_tcp_info
.tcpi_wired_rxbytes
+=
757 stats
->mpis_wired_rxbytes
;
759 tcp_ci
.tcpci_tcp_info
.tcpi_cell_txbytes
+=
760 stats
->mpis_cell_txbytes
;
761 tcp_ci
.tcpci_tcp_info
.tcpi_cell_rxbytes
+=
762 stats
->mpis_cell_rxbytes
;
765 *aux_len
= min(*aux_len
, sizeof(tcp_ci
));
766 error
= copyout(&tcp_ci
, aux_data
, *aux_len
);
777 * User-protocol pru_control callback.
780 mptcp_usr_control(struct socket
*mp_so
, u_long cmd
, caddr_t data
,
781 struct ifnet
*ifp
, struct proc
*p
)
783 #pragma unused(ifp, p)
784 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
788 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
792 mpte
= mptompte(mpp
);
795 case SIOCGASSOCIDS32
: { /* struct so_aidreq32 */
796 struct so_aidreq32 aidr
;
797 bcopy(data
, &aidr
, sizeof(aidr
));
798 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
801 bcopy(&aidr
, data
, sizeof(aidr
));
806 case SIOCGASSOCIDS64
: { /* struct so_aidreq64 */
807 struct so_aidreq64 aidr
;
808 bcopy(data
, &aidr
, sizeof(aidr
));
809 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
810 (user_addr_t
)aidr
.sar_aidp
);
812 bcopy(&aidr
, data
, sizeof(aidr
));
817 case SIOCGCONNIDS32
: { /* struct so_cidreq32 */
818 struct so_cidreq32 cidr
;
819 bcopy(data
, &cidr
, sizeof(cidr
));
820 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
823 bcopy(&cidr
, data
, sizeof(cidr
));
828 case SIOCGCONNIDS64
: { /* struct so_cidreq64 */
829 struct so_cidreq64 cidr
;
830 bcopy(data
, &cidr
, sizeof(cidr
));
831 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
832 (user_addr_t
)cidr
.scr_cidp
);
834 bcopy(&cidr
, data
, sizeof(cidr
));
839 case SIOCGCONNINFO32
: { /* struct so_cinforeq32 */
840 struct so_cinforeq32 cifr
;
841 bcopy(data
, &cifr
, sizeof(cifr
));
842 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
843 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
844 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
845 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
848 bcopy(&cifr
, data
, sizeof(cifr
));
853 case SIOCGCONNINFO64
: { /* struct so_cinforeq64 */
854 struct so_cinforeq64 cifr
;
855 bcopy(data
, &cifr
, sizeof(cifr
));
856 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
857 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
858 (user_addr_t
)cifr
.scir_src
, &cifr
.scir_src_len
,
859 (user_addr_t
)cifr
.scir_dst
, &cifr
.scir_dst_len
,
860 &cifr
.scir_aux_type
, (user_addr_t
)cifr
.scir_aux_data
,
863 bcopy(&cifr
, data
, sizeof(cifr
));
877 mptcp_disconnect(struct mptses
*mpte
)
879 struct socket
*mp_so
;
883 mp_so
= mptetoso(mpte
);
884 mp_tp
= mpte
->mpte_mptcb
;
886 DTRACE_MPTCP3(disconnectx
, struct mptses
*, mpte
,
887 struct socket
*, mp_so
, struct mptcb
*, mp_tp
);
889 /* if we're not detached, go thru socket state checks */
890 if (!(mp_so
->so_flags
& SOF_PCBCLEARING
)) {
891 if (!(mp_so
->so_state
& (SS_ISCONNECTED
|
896 if (mp_so
->so_state
& SS_ISDISCONNECTING
) {
902 mptcp_cancel_all_timers(mp_tp
);
903 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
904 mptcp_close(mpte
, mp_tp
);
905 } else if ((mp_so
->so_options
& SO_LINGER
) &&
906 mp_so
->so_linger
== 0) {
907 mptcp_drop(mpte
, mp_tp
, 0);
909 soisdisconnecting(mp_so
);
910 sbflush(&mp_so
->so_rcv
);
911 if (mptcp_usrclosed(mpte
) != NULL
) {
917 mptcp_subflow_workloop(mpte
);
925 * Wrapper function to support disconnect on socket
928 mptcp_usr_disconnect(struct socket
*mp_so
)
930 return mptcp_disconnect(mpsotompte(mp_so
));
934 * User-protocol pru_disconnectx callback.
937 mptcp_usr_disconnectx(struct socket
*mp_so
, sae_associd_t aid
, sae_connid_t cid
)
939 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
) {
943 if (cid
!= SAE_CONNID_ANY
&& cid
!= SAE_CONNID_ALL
) {
947 return mptcp_usr_disconnect(mp_so
);
951 mptcp_finish_usrclosed(struct mptses
*mpte
)
953 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
954 struct socket
*mp_so
= mptetoso(mpte
);
956 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
957 mpte
= mptcp_close(mpte
, mp_tp
);
958 } else if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_2
) {
959 soisdisconnected(mp_so
);
963 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
964 if ((mp_so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
965 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
966 mptcp_subflow_disconnect(mpte
, mpts
);
968 mptcp_subflow_shutdown(mpte
, mpts
);
975 * User issued close, and wish to trail thru shutdown states.
977 static struct mptses
*
978 mptcp_usrclosed(struct mptses
*mpte
)
980 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
982 mptcp_close_fsm(mp_tp
, MPCE_CLOSE
);
984 /* Not everything has been acknowledged - don't close the subflows! */
985 if (mp_tp
->mpt_sndnxt
+ 1 != mp_tp
->mpt_sndmax
) {
989 mptcp_finish_usrclosed(mpte
);
995 * After a receive, possible send some update to peer.
998 mptcp_usr_rcvd(struct socket
*mp_so
, int flags
)
1000 #pragma unused(flags)
1001 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
1002 struct mptses
*mpte
;
1003 struct mptsub
*mpts
;
1006 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1011 mpte
= mptompte(mpp
);
1013 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1014 struct socket
*so
= mpts
->mpts_socket
;
1016 if (so
->so_proto
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
!= NULL
) {
1017 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, 0);
1021 error
= mptcp_output(mpte
);
1027 * Do a send by putting data in the output queue.
1030 mptcp_usr_send(struct socket
*mp_so
, int prus_flags
, struct mbuf
*m
,
1031 struct sockaddr
*nam
, struct mbuf
*control
, struct proc
*p
)
1033 #pragma unused(nam, p)
1034 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
1035 struct mptses
*mpte
;
1038 if (prus_flags
& (PRUS_OOB
| PRUS_EOF
)) {
1048 if (control
!= NULL
&& control
->m_len
!= 0) {
1053 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1057 mpte
= mptompte(mpp
);
1058 VERIFY(mpte
!= NULL
);
1060 if (!(mp_so
->so_state
& SS_ISCONNECTED
) &&
1061 !(mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1066 mptcp_insert_dsn(mpp
, m
);
1067 VERIFY(mp_so
->so_snd
.sb_flags
& SB_NOCOMPRESS
);
1068 sbappendstream(&mp_so
->so_snd
, m
);
1071 error
= mptcp_output(mpte
);
1076 if (mp_so
->so_state
& SS_ISCONNECTING
) {
1077 if (mp_so
->so_state
& SS_NBIO
) {
1078 error
= EWOULDBLOCK
;
1080 error
= sbwait(&mp_so
->so_snd
);
1089 if (control
!= NULL
) {
1097 * Mark the MPTCP connection as being incapable of further output.
1100 mptcp_usr_shutdown(struct socket
*mp_so
)
1102 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
1103 struct mptses
*mpte
;
1106 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1110 mpte
= mptompte(mpp
);
1111 VERIFY(mpte
!= NULL
);
1113 socantsendmore(mp_so
);
1115 mpte
= mptcp_usrclosed(mpte
);
1117 error
= mptcp_output(mpte
);
1124 * Copy the contents of uio into a properly sized mbuf chain.
1127 mptcp_uiotombuf(struct uio
*uio
, int how
, user_ssize_t space
, struct mbuf
**top
)
1129 struct mbuf
*m
, *mb
, *nm
= NULL
, *mtail
= NULL
;
1130 int progress
, len
, error
;
1131 user_ssize_t resid
, tot
;
1133 VERIFY(top
!= NULL
&& *top
== NULL
);
1136 * space can be zero or an arbitrary large value bound by
1137 * the total data supplied by the uio.
1139 resid
= uio_resid(uio
);
1141 tot
= MIN(resid
, space
);
1146 if (tot
< 0 || tot
> INT_MAX
) {
1155 /* Loop and append maximum sized mbufs to the chain tail. */
1157 uint32_t m_needed
= 1;
1159 if (njcl
> 0 && len
> MBIGCLBYTES
) {
1160 mb
= m_getpackets_internal(&m_needed
, 1,
1161 how
, 1, M16KCLBYTES
);
1162 } else if (len
> MCLBYTES
) {
1163 mb
= m_getpackets_internal(&m_needed
, 1,
1164 how
, 1, MBIGCLBYTES
);
1165 } else if (len
>= (signed)MINCLSIZE
) {
1166 mb
= m_getpackets_internal(&m_needed
, 1,
1169 mb
= m_gethdr(how
, MT_DATA
);
1172 /* Fail the whole operation if one mbuf can't be allocated. */
1181 VERIFY(mb
->m_flags
& M_PKTHDR
);
1182 len
-= ((mb
->m_flags
& M_EXT
) ? mb
->m_ext
.ext_size
: MHLEN
);
1183 if (mtail
!= NULL
) {
1194 /* Fill all mbufs with uio data and update header information. */
1195 for (mb
= m
; mb
!= NULL
; mb
= mb
->m_next
) {
1196 /* tot >= 0 && tot <= INT_MAX (see above) */
1197 len
= MIN((int)M_TRAILINGSPACE(mb
), (int)(tot
- progress
));
1199 error
= uiomove(mtod(mb
, char *), len
, uio
);
1205 /* each mbuf is M_PKTHDR chained via m_next */
1207 mb
->m_pkthdr
.len
= len
;
1211 VERIFY(progress
== tot
);
1217 * MPTCP socket protocol-user socket send routine, derived from sosend().
1220 mptcp_usr_sosend(struct socket
*mp_so
, struct sockaddr
*addr
, struct uio
*uio
,
1221 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1223 #pragma unused(addr)
1224 user_ssize_t resid
, space
;
1225 int error
, sendflags
;
1226 struct proc
*p
= current_proc();
1229 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1230 if (uio
== NULL
|| top
!= NULL
) {
1234 resid
= uio_resid(uio
);
1236 socket_lock(mp_so
, 1);
1237 so_update_last_owner_locked(mp_so
, p
);
1238 so_update_policy(mp_so
);
1240 VERIFY(mp_so
->so_type
== SOCK_STREAM
);
1241 VERIFY(!(mp_so
->so_flags
& SOF_MP_SUBFLOW
));
1243 if (flags
& (MSG_OOB
| MSG_DONTROUTE
)) {
1245 socket_unlock(mp_so
, 1);
1250 * In theory resid should be unsigned. However, space must be
1251 * signed, as it might be less than 0 if we over-committed, and we
1252 * must use a signed comparison of space and resid. On the other
1253 * hand, a negative resid causes us to loop sending 0-length
1254 * segments to the protocol.
1256 if (resid
< 0 || resid
> INT_MAX
||
1257 (flags
& MSG_EOR
) || control
!= NULL
) {
1259 socket_unlock(mp_so
, 1);
1263 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1266 error
= sosendcheck(mp_so
, NULL
, resid
, 0, 0, flags
,
1272 space
= sbspace(&mp_so
->so_snd
);
1274 socket_unlock(mp_so
, 0);
1276 * Copy the data from userland into an mbuf chain.
1278 error
= mptcp_uiotombuf(uio
, M_WAITOK
, space
, &top
);
1280 socket_lock(mp_so
, 0);
1283 VERIFY(top
!= NULL
);
1284 space
-= resid
- uio_resid(uio
);
1285 resid
= uio_resid(uio
);
1286 socket_lock(mp_so
, 0);
1289 * Compute flags here, for pru_send and NKEs.
1291 sendflags
= (resid
> 0 && space
> 0) ?
1292 PRUS_MORETOCOME
: 0;
1295 * Socket filter processing
1297 VERIFY(control
== NULL
);
1298 error
= sflt_data_out(mp_so
, NULL
, &top
, &control
, 0);
1300 if (error
== EJUSTRETURN
) {
1303 /* always free control if any */
1307 if (control
!= NULL
) {
1313 * Pass data to protocol.
1315 error
= (*mp_so
->so_proto
->pr_usrreqs
->pru_send
)
1316 (mp_so
, sendflags
, top
, NULL
, NULL
, p
);
1322 } while (resid
!= 0 && space
> 0);
1323 } while (resid
!= 0);
1327 sbunlock(&mp_so
->so_snd
, FALSE
); /* will unlock socket */
1329 socket_unlock(mp_so
, 1);
1335 if (control
!= NULL
) {
1339 soclearfastopen(mp_so
);
1345 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1346 * This routine simply indicates to the caller whether or not to proceed
1347 * further with the given socket option. This is invoked by sosetoptlock()
1348 * and sogetoptlock().
1351 mptcp_usr_socheckopt(struct socket
*mp_so
, struct sockopt
*sopt
)
1353 #pragma unused(mp_so)
1356 VERIFY(sopt
->sopt_level
== SOL_SOCKET
);
1359 * We could check for sopt_dir (set/get) here, but we'll just
1360 * let the caller deal with it as appropriate; therefore the
1361 * following is a superset of the socket options which we
1362 * allow for set/get.
1364 * XXX: adi@apple.com
1366 * Need to consider the following cases:
1368 * a. Certain socket options don't have a clear definition
1369 * on the expected behavior post connect(2). At the time
1370 * those options are issued on the MP socket, there may
1371 * be existing subflow sockets that are already connected.
1373 switch (sopt
->sopt_name
) {
1374 case SO_LINGER
: /* MP */
1375 case SO_LINGER_SEC
: /* MP */
1376 case SO_TYPE
: /* MP */
1377 case SO_NREAD
: /* MP */
1378 case SO_NWRITE
: /* MP */
1379 case SO_ERROR
: /* MP */
1380 case SO_SNDBUF
: /* MP */
1381 case SO_RCVBUF
: /* MP */
1382 case SO_SNDLOWAT
: /* MP */
1383 case SO_RCVLOWAT
: /* MP */
1384 case SO_SNDTIMEO
: /* MP */
1385 case SO_RCVTIMEO
: /* MP */
1386 case SO_NKE
: /* MP */
1387 case SO_NOSIGPIPE
: /* MP */
1388 case SO_NOADDRERR
: /* MP */
1389 case SO_LABEL
: /* MP */
1390 case SO_PEERLABEL
: /* MP */
1391 case SO_DEFUNCTIT
: /* MP */
1392 case SO_DEFUNCTOK
: /* MP */
1393 case SO_ISDEFUNCT
: /* MP */
1394 case SO_TRAFFIC_CLASS_DBG
: /* MP */
1395 case SO_DELEGATED
: /* MP */
1396 case SO_DELEGATED_UUID
: /* MP */
1398 case SO_NECP_ATTRIBUTES
:
1399 case SO_NECP_CLIENTUUID
:
1401 case SO_MPKL_SEND_INFO
:
1403 * Tell the caller that these options are to be processed.
1407 case SO_DEBUG
: /* MP + subflow */
1408 case SO_KEEPALIVE
: /* MP + subflow */
1409 case SO_USELOOPBACK
: /* MP + subflow */
1410 case SO_RANDOMPORT
: /* MP + subflow */
1411 case SO_TRAFFIC_CLASS
: /* MP + subflow */
1412 case SO_RECV_TRAFFIC_CLASS
: /* MP + subflow */
1413 case SO_PRIVILEGED_TRAFFIC_CLASS
: /* MP + subflow */
1414 case SO_RECV_ANYIF
: /* MP + subflow */
1415 case SO_RESTRICTIONS
: /* MP + subflow */
1416 case SO_FLUSH
: /* MP + subflow */
1417 case SO_NOWAKEFROMSLEEP
:
1418 case SO_NOAPNFALLBK
:
1419 case SO_MARK_CELLFALLBACK
:
1421 * Tell the caller that these options are to be processed;
1422 * these will also be recorded later by mptcp_setopt().
1424 * NOTE: Only support integer option value for now.
1426 if (sopt
->sopt_valsize
!= sizeof(int)) {
1433 * Tell the caller to stop immediately and return an error.
1435 error
= ENOPROTOOPT
;
1443 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1446 mptcp_setopt_apply(struct mptses
*mpte
, struct mptopt
*mpo
)
1448 struct socket
*mp_so
;
1449 struct mptsub
*mpts
;
1453 /* just bail now if this isn't applicable to subflow sockets */
1454 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1455 error
= ENOPROTOOPT
;
1460 * Skip those that are handled internally; these options
1461 * should not have been recorded and marked with the
1462 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1464 if (mpo
->mpo_level
== SOL_SOCKET
&&
1465 (mpo
->mpo_name
== SO_NOSIGPIPE
|| mpo
->mpo_name
== SO_NOADDRERR
)) {
1466 error
= ENOPROTOOPT
;
1470 mp_so
= mptetoso(mpte
);
1473 * Don't bother going further if there's no subflow; mark the option
1474 * with MPOF_INTERIM so that we know whether or not to remove this
1475 * option upon encountering an error while issuing it during subflow
1478 if (mpte
->mpte_numflows
== 0) {
1479 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
));
1480 mpo
->mpo_flags
|= MPOF_INTERIM
;
1481 /* return success */
1485 bzero(&smpo
, sizeof(smpo
));
1486 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1487 smpo
.mpo_level
= mpo
->mpo_level
;
1488 smpo
.mpo_name
= mpo
->mpo_name
;
1490 /* grab exisiting values in case we need to rollback */
1491 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1494 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
| MPTSF_SOPT_INPROG
);
1495 mpts
->mpts_oldintval
= 0;
1496 smpo
.mpo_intval
= 0;
1497 VERIFY(mpts
->mpts_socket
!= NULL
);
1498 so
= mpts
->mpts_socket
;
1499 if (mptcp_subflow_sogetopt(mpte
, so
, &smpo
) == 0) {
1500 mpts
->mpts_flags
|= MPTSF_SOPT_OLDVAL
;
1501 mpts
->mpts_oldintval
= smpo
.mpo_intval
;
1505 /* apply socket option */
1506 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1509 mpts
->mpts_flags
|= MPTSF_SOPT_INPROG
;
1510 VERIFY(mpts
->mpts_socket
!= NULL
);
1511 so
= mpts
->mpts_socket
;
1512 error
= mptcp_subflow_sosetopt(mpte
, mpts
, mpo
);
1518 /* cleanup, and rollback if needed */
1519 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1522 if (!(mpts
->mpts_flags
& MPTSF_SOPT_INPROG
)) {
1523 /* clear in case it's set */
1524 mpts
->mpts_flags
&= ~MPTSF_SOPT_OLDVAL
;
1525 mpts
->mpts_oldintval
= 0;
1528 if (!(mpts
->mpts_flags
& MPTSF_SOPT_OLDVAL
)) {
1529 mpts
->mpts_flags
&= ~MPTSF_SOPT_INPROG
;
1530 VERIFY(mpts
->mpts_oldintval
== 0);
1533 /* error during sosetopt, so roll it back */
1535 VERIFY(mpts
->mpts_socket
!= NULL
);
1536 so
= mpts
->mpts_socket
;
1537 smpo
.mpo_intval
= mpts
->mpts_oldintval
;
1538 mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
);
1540 mpts
->mpts_oldintval
= 0;
1541 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
| MPTSF_SOPT_INPROG
);
1549 * Handle SOPT_SET for socket options issued on MP socket.
1552 mptcp_setopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1554 int error
= 0, optval
= 0, level
, optname
, rec
= 1;
1555 struct mptopt smpo
, *mpo
= NULL
;
1556 struct socket
*mp_so
;
1558 level
= sopt
->sopt_level
;
1559 optname
= sopt
->sopt_name
;
1561 mp_so
= mptetoso(mpte
);
1564 * Record socket options which are applicable to subflow sockets so
1565 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1566 * for the list of eligible socket-level options.
1568 if (level
== SOL_SOCKET
) {
1572 case SO_USELOOPBACK
:
1574 case SO_TRAFFIC_CLASS
:
1575 case SO_RECV_TRAFFIC_CLASS
:
1576 case SO_PRIVILEGED_TRAFFIC_CLASS
:
1578 case SO_RESTRICTIONS
:
1579 case SO_NOWAKEFROMSLEEP
:
1580 case SO_NOAPNFALLBK
:
1581 case SO_MARK_CELLFALLBACK
:
1585 /* don't record it */
1589 /* Next ones, record at MPTCP-level */
1591 error
= sooptcopyin(sopt
, &mpte
->mpte_epid
,
1592 sizeof(int), sizeof(int));
1598 case SO_DELEGATED_UUID
:
1599 error
= sooptcopyin(sopt
, &mpte
->mpte_euuid
,
1600 sizeof(uuid_t
), sizeof(uuid_t
));
1607 case SO_NECP_CLIENTUUID
:
1608 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1613 error
= sooptcopyin(sopt
, &mpsotomppcb(mp_so
)->necp_client_uuid
,
1614 sizeof(uuid_t
), sizeof(uuid_t
));
1619 mpsotomppcb(mp_so
)->necp_cb
= mptcp_session_necp_cb
;
1620 error
= necp_client_register_multipath_cb(mp_so
->last_pid
,
1621 mpsotomppcb(mp_so
)->necp_client_uuid
,
1622 mpsotomppcb(mp_so
));
1627 if (uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1633 case SO_NECP_ATTRIBUTES
:
1636 /* nothing to do; just return */
1642 case TCP_RXT_FINDROP
:
1646 case TCP_CONNECTIONTIMEOUT
:
1647 case TCP_RXT_CONNDROPTIME
:
1648 case PERSIST_TIMEOUT
:
1649 case TCP_ADAPTIVE_READ_TIMEOUT
:
1650 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
1651 /* eligible; record it */
1653 case TCP_NOTSENT_LOWAT
:
1654 /* record at MPTCP level */
1655 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1665 mp_so
->so_flags
&= ~SOF_NOTSENT_LOWAT
;
1666 error
= mptcp_set_notsent_lowat(mpte
, 0);
1668 mp_so
->so_flags
|= SOF_NOTSENT_LOWAT
;
1669 error
= mptcp_set_notsent_lowat(mpte
,
1678 case MPTCP_SERVICE_TYPE
:
1679 /* record at MPTCP level */
1680 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1685 if (optval
< 0 || optval
>= MPTCP_SVCTYPE_MAX
) {
1690 if (mptcp_entitlement_check(mp_so
, (uint8_t)optval
) < 0) {
1695 mpte
->mpte_svctype
= (uint8_t)optval
;
1696 mpte
->mpte_flags
|= MPTE_SVCTYPE_CHECKED
;
1699 case MPTCP_ALTERNATE_PORT
:
1700 /* record at MPTCP level */
1701 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1707 if (optval
< 0 || optval
> UINT16_MAX
) {
1712 mpte
->mpte_alternate_port
= (uint16_t)optval
;
1715 case MPTCP_FORCE_ENABLE
:
1716 /* record at MPTCP level */
1717 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1723 if (optval
< 0 || optval
> 1) {
1729 mpte
->mpte_flags
|= MPTE_FORCE_ENABLE
;
1731 mpte
->mpte_flags
&= ~MPTE_FORCE_ENABLE
;
1735 case MPTCP_EXPECTED_PROGRESS_TARGET
:
1737 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1738 uint64_t mach_time_target
;
1739 uint64_t nanoseconds
;
1741 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_TARGET_BASED
) {
1742 os_log(mptcp_log_handle
, "%s - %lx: Can't set urgent activity when svctype is %u\n",
1743 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_svctype
);
1748 error
= sooptcopyin(sopt
, &mach_time_target
, sizeof(mach_time_target
), sizeof(mach_time_target
));
1753 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
1754 os_log(mptcp_log_handle
, "%s - %lx: Not ok to create subflows, state %u flags %#x\n",
1755 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
1760 if (mach_time_target
) {
1761 uint64_t time_now
= 0;
1762 uint64_t time_now_nanoseconds
;
1764 absolutetime_to_nanoseconds(mach_time_target
, &nanoseconds
);
1765 nanoseconds
= nanoseconds
- (mptcp_expected_progress_headstart
* NSEC_PER_MSEC
);
1767 time_now
= mach_continuous_time();
1768 absolutetime_to_nanoseconds(time_now
, &time_now_nanoseconds
);
1770 nanoseconds_to_absolutetime(nanoseconds
, &mach_time_target
);
1771 /* If the timer is already running and it would
1772 * fire in less than mptcp_expected_progress_headstart
1773 * seconds, then it's not worth canceling it.
1775 if (mpte
->mpte_time_target
&&
1776 mpte
->mpte_time_target
< time_now
&&
1777 time_now_nanoseconds
> nanoseconds
- (mptcp_expected_progress_headstart
* NSEC_PER_MSEC
)) {
1778 os_log(mptcp_log_handle
, "%s - %lx: Not rescheduling timer %llu now %llu target %llu\n",
1779 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1780 mpte
->mpte_time_target
,
1787 mpte
->mpte_time_target
= mach_time_target
;
1788 mptcp_set_urgency_timer(mpte
);
1794 error
= ENOPROTOOPT
;
1799 if ((error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1800 sizeof(optval
))) != 0) {
1805 /* search for an existing one; if not found, allocate */
1806 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) == NULL
) {
1807 mpo
= mptcp_sopt_alloc(Z_WAITOK
);
1814 /* initialize or update, as needed */
1815 mpo
->mpo_intval
= optval
;
1816 if (!(mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1817 mpo
->mpo_level
= level
;
1818 mpo
->mpo_name
= optname
;
1819 mptcp_sopt_insert(mpte
, mpo
);
1821 /* this can be issued on the subflow socket */
1822 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1825 bzero(&smpo
, sizeof(smpo
));
1827 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1828 mpo
->mpo_level
= level
;
1829 mpo
->mpo_name
= optname
;
1830 mpo
->mpo_intval
= optval
;
1833 /* issue this socket option on existing subflows */
1834 error
= mptcp_setopt_apply(mpte
, mpo
);
1835 if (error
!= 0 && (mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1836 VERIFY(mpo
!= &smpo
);
1837 mptcp_sopt_remove(mpte
, mpo
);
1838 mptcp_sopt_free(mpo
);
1841 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
1853 os_log_error(mptcp_log_handle
, "%s - %lx: sopt %s (%d, %d) val %d can't be issued error %d\n",
1854 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1855 mptcp_sopt2str(level
, optname
), level
, optname
, optval
, error
);
1860 mptcp_fill_info_bytestats(struct tcp_info
*ti
, struct mptses
*mpte
)
1862 struct mptsub
*mpts
;
1865 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1866 const struct inpcb
*inp
= sotoinpcb(mpts
->mpts_socket
);
1872 ti
->tcpi_txbytes
+= inp
->inp_stat
->txbytes
;
1873 ti
->tcpi_rxbytes
+= inp
->inp_stat
->rxbytes
;
1874 ti
->tcpi_cell_txbytes
+= inp
->inp_cstat
->txbytes
;
1875 ti
->tcpi_cell_rxbytes
+= inp
->inp_cstat
->rxbytes
;
1876 ti
->tcpi_wifi_txbytes
+= inp
->inp_wstat
->txbytes
;
1877 ti
->tcpi_wifi_rxbytes
+= inp
->inp_wstat
->rxbytes
;
1878 ti
->tcpi_wired_txbytes
+= inp
->inp_Wstat
->txbytes
;
1879 ti
->tcpi_wired_rxbytes
+= inp
->inp_Wstat
->rxbytes
;
1882 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
1883 struct mptcp_itf_stats
*stats
= &mpte
->mpte_itfstats
[i
];
1885 ti
->tcpi_txbytes
+= stats
->mpis_txbytes
;
1886 ti
->tcpi_rxbytes
+= stats
->mpis_rxbytes
;
1888 ti
->tcpi_wifi_txbytes
+= stats
->mpis_wifi_txbytes
;
1889 ti
->tcpi_wifi_rxbytes
+= stats
->mpis_wifi_rxbytes
;
1891 ti
->tcpi_wired_txbytes
+= stats
->mpis_wired_txbytes
;
1892 ti
->tcpi_wired_rxbytes
+= stats
->mpis_wired_rxbytes
;
1894 ti
->tcpi_cell_txbytes
+= stats
->mpis_cell_txbytes
;
1895 ti
->tcpi_cell_rxbytes
+= stats
->mpis_cell_rxbytes
;
1900 mptcp_fill_info(struct mptses
*mpte
, struct tcp_info
*ti
)
1902 struct mptsub
*actsub
= mpte
->mpte_active_sub
;
1903 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1904 struct tcpcb
*acttp
= NULL
;
1907 acttp
= sototcpcb(actsub
->mpts_socket
);
1910 bzero(ti
, sizeof(*ti
));
1912 ti
->tcpi_state
= (uint8_t)mp_tp
->mpt_state
;
1914 /* tcpi_snd_wscale */
1915 /* tcpi_rcv_wscale */
1918 ti
->tcpi_rto
= acttp
->t_timer
[TCPT_REXMT
] ? acttp
->t_rxtcur
: 0;
1924 ti
->tcpi_rttcur
= acttp
->t_rttcur
;
1925 ti
->tcpi_srtt
= acttp
->t_srtt
>> TCP_RTT_SHIFT
;
1926 ti
->tcpi_rttvar
= acttp
->t_rttvar
>> TCP_RTTVAR_SHIFT
;
1927 ti
->tcpi_rttbest
= acttp
->t_rttbest
>> TCP_RTT_SHIFT
;
1929 /* tcpi_snd_ssthresh */
1931 /* tcpi_rcv_space */
1932 ti
->tcpi_snd_wnd
= mp_tp
->mpt_sndwnd
;
1933 ti
->tcpi_snd_nxt
= (uint32_t)mp_tp
->mpt_sndnxt
;
1934 ti
->tcpi_rcv_nxt
= (uint32_t)mp_tp
->mpt_rcvnxt
;
1936 ti
->tcpi_last_outif
= (acttp
->t_inpcb
->inp_last_outifp
== NULL
) ? 0 :
1937 acttp
->t_inpcb
->inp_last_outifp
->if_index
;
1940 mptcp_fill_info_bytestats(ti
, mpte
);
1941 /* tcpi_txpackets */
1943 /* tcpi_txretransmitbytes */
1944 /* tcpi_txunacked */
1945 /* tcpi_rxpackets */
1947 /* tcpi_rxduplicatebytes */
1948 /* tcpi_rxoutoforderbytes */
1950 /* tcpi_synrexmits */
1953 /* tcpi_cell_rxpackets */
1955 /* tcpi_cell_txpackets */
1957 /* tcpi_wifi_rxpackets */
1959 /* tcpi_wifi_txpackets */
1961 /* tcpi_wired_rxpackets */
1962 /* tcpi_wired_txpackets */
1963 /* tcpi_connstatus */
1966 /* tcpi_ecn_recv_ce */
1967 /* tcpi_ecn_recv_cwr */
1969 ti
->tcpi_rcvoopack
= acttp
->t_rcvoopack
;
1972 /* tcpi_sack_recovery_episode */
1973 /* tcpi_reordered_pkts */
1974 /* tcpi_dsack_sent */
1975 /* tcpi_dsack_recvd */
1978 ti
->tcpi_txretransmitpackets
= acttp
->t_stat
.rxmitpkts
;
1983 * Handle SOPT_GET for socket options issued on MP socket.
1986 mptcp_getopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1988 int error
= 0, optval
= 0;
1991 * We only handle SOPT_GET for TCP level socket options; we should
1992 * not get here for socket level options since they are already
1993 * handled at the socket layer.
1995 if (sopt
->sopt_level
!= IPPROTO_TCP
) {
1996 error
= ENOPROTOOPT
;
2000 switch (sopt
->sopt_name
) {
2001 case PERSIST_TIMEOUT
:
2002 /* Only case for which we have a non-zero default */
2003 optval
= tcp_max_persist_timeout
;
2006 case TCP_RXT_FINDROP
:
2010 case TCP_CONNECTIONTIMEOUT
:
2011 case TCP_RXT_CONNDROPTIME
:
2012 case TCP_ADAPTIVE_READ_TIMEOUT
:
2013 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
2015 struct mptopt
*mpo
= mptcp_sopt_find(mpte
, sopt
);
2018 optval
= mpo
->mpo_intval
;
2023 /* The next ones are stored at the MPTCP-level */
2024 case TCP_NOTSENT_LOWAT
:
2025 if (mptetoso(mpte
)->so_flags
& SOF_NOTSENT_LOWAT
) {
2026 optval
= mptcp_get_notsent_lowat(mpte
);
2035 mptcp_fill_info(mpte
, &ti
);
2036 error
= sooptcopyout(sopt
, &ti
, sizeof(struct tcp_info
));
2040 case MPTCP_SERVICE_TYPE
:
2041 optval
= mpte
->mpte_svctype
;
2043 case MPTCP_ALTERNATE_PORT
:
2044 optval
= mpte
->mpte_alternate_port
;
2046 case MPTCP_FORCE_ENABLE
:
2047 optval
= !!(mpte
->mpte_flags
& MPTE_FORCE_ENABLE
);
2049 case MPTCP_EXPECTED_PROGRESS_TARGET
:
2050 error
= sooptcopyout(sopt
, &mpte
->mpte_time_target
, sizeof(mpte
->mpte_time_target
));
2055 error
= ENOPROTOOPT
;
2060 error
= sooptcopyout(sopt
, &optval
, sizeof(int));
2068 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
2069 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
2070 * to those that are allowed by mptcp_usr_socheckopt().
2073 mptcp_ctloutput(struct socket
*mp_so
, struct sockopt
*sopt
)
2075 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
2076 struct mptses
*mpte
;
2079 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
2083 mpte
= mptompte(mpp
);
2084 socket_lock_assert_owned(mp_so
);
2086 /* we only handle socket and TCP-level socket options for MPTCP */
2087 if (sopt
->sopt_level
!= SOL_SOCKET
&& sopt
->sopt_level
!= IPPROTO_TCP
) {
2092 switch (sopt
->sopt_dir
) {
2094 error
= mptcp_setopt(mpte
, sopt
);
2098 error
= mptcp_getopt(mpte
, sopt
);
2106 mptcp_sopt2str(int level
, int optname
)
2114 return "SO_LINGER_SEC";
2118 return "SO_KEEPALIVE";
2119 case SO_USELOOPBACK
:
2120 return "SO_USELOOPBACK";
2134 return "SO_SNDLOWAT";
2136 return "SO_RCVLOWAT";
2138 return "SO_SNDTIMEO";
2140 return "SO_RCVTIMEO";
2144 return "SO_NOSIGPIPE";
2146 return "SO_NOADDRERR";
2147 case SO_RESTRICTIONS
:
2148 return "SO_RESTRICTIONS";
2152 return "SO_PEERLABEL";
2154 return "SO_RANDOMPORT";
2155 case SO_TRAFFIC_CLASS
:
2156 return "SO_TRAFFIC_CLASS";
2157 case SO_RECV_TRAFFIC_CLASS
:
2158 return "SO_RECV_TRAFFIC_CLASS";
2159 case SO_TRAFFIC_CLASS_DBG
:
2160 return "SO_TRAFFIC_CLASS_DBG";
2161 case SO_PRIVILEGED_TRAFFIC_CLASS
:
2162 return "SO_PRIVILEGED_TRAFFIC_CLASS";
2164 return "SO_DEFUNCTIT";
2166 return "SO_DEFUNCTOK";
2168 return "SO_ISDEFUNCT";
2169 case SO_OPPORTUNISTIC
:
2170 return "SO_OPPORTUNISTIC";
2174 return "SO_RECV_ANYIF";
2175 case SO_NOWAKEFROMSLEEP
:
2176 return "SO_NOWAKEFROMSLEEP";
2177 case SO_NOAPNFALLBK
:
2178 return "SO_NOAPNFALLBK";
2179 case SO_MARK_CELLFALLBACK
:
2180 return "SO_CELLFALLBACK";
2182 return "SO_DELEGATED";
2183 case SO_DELEGATED_UUID
:
2184 return "SO_DELEGATED_UUID";
2186 case SO_NECP_ATTRIBUTES
:
2187 return "SO_NECP_ATTRIBUTES";
2188 case SO_NECP_CLIENTUUID
:
2189 return "SO_NECP_CLIENTUUID";
2197 return "TCP_NODELAY";
2199 return "TCP_KEEPALIVE";
2201 return "TCP_KEEPINTVL";
2203 return "TCP_KEEPCNT";
2204 case TCP_CONNECTIONTIMEOUT
:
2205 return "TCP_CONNECTIONTIMEOUT";
2206 case TCP_RXT_CONNDROPTIME
:
2207 return "TCP_RXT_CONNDROPTIME";
2208 case PERSIST_TIMEOUT
:
2209 return "PERSIST_TIMEOUT";
2210 case TCP_NOTSENT_LOWAT
:
2211 return "NOTSENT_LOWAT";
2212 case TCP_ADAPTIVE_READ_TIMEOUT
:
2213 return "ADAPTIVE_READ_TIMEOUT";
2214 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
2215 return "ADAPTIVE_WRITE_TIMEOUT";
2216 case MPTCP_SERVICE_TYPE
:
2217 return "MPTCP_SERVICE_TYPE";
2218 case MPTCP_ALTERNATE_PORT
:
2219 return "MPTCP_ALTERNATE_PORT";
2220 case MPTCP_FORCE_ENABLE
:
2221 return "MPTCP_FORCE_ENABLE";
2222 case MPTCP_EXPECTED_PROGRESS_TARGET
:
2223 return "MPTCP_EXPECTED_PROGRESS_TARGET";
2233 mptcp_usr_preconnect(struct socket
*mp_so
)
2235 struct mptsub
*mpts
= NULL
;
2236 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
2237 struct mptses
*mpte
;
2239 struct tcpcb
*tp
= NULL
;
2242 mpte
= mptompte(mpp
);
2244 mpts
= mptcp_get_subflow(mpte
, NULL
);
2246 os_log_error(mptcp_log_handle
, "%s - %lx: invalid preconnect ",
2247 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
2250 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
2251 so
= mpts
->mpts_socket
;
2252 tp
= intotcpcb(sotoinpcb(so
));
2253 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
2254 error
= tcp_output(sototcpcb(so
));
2256 soclearfastopen(mp_so
);