2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40 #include <sys/kauth.h>
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/tcp.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/tcp_seq.h>
49 #include <netinet/tcp_var.h>
50 #include <netinet/tcp_timer.h>
51 #include <netinet/mptcp_var.h>
52 #include <netinet/mptcp_timer.h>
56 static int mptcp_usr_attach(struct socket
*, int, struct proc
*);
57 static int mptcp_usr_detach(struct socket
*);
58 static int mptcp_attach(struct socket
*, struct proc
*);
59 static int mptcp_usr_connectx(struct socket
*, struct sockaddr
*,
60 struct sockaddr
*, struct proc
*, uint32_t, sae_associd_t
,
61 sae_connid_t
*, uint32_t, void *, uint32_t, struct uio
*, user_ssize_t
*);
62 static int mptcp_getassocids(struct mptses
*, uint32_t *, user_addr_t
);
63 static int mptcp_getconnids(struct mptses
*, sae_associd_t
, uint32_t *,
65 static int mptcp_getconninfo(struct mptses
*, sae_connid_t
*, uint32_t *,
66 uint32_t *, int32_t *, user_addr_t
, socklen_t
*, user_addr_t
, socklen_t
*,
67 uint32_t *, user_addr_t
, uint32_t *);
68 static int mptcp_usr_control(struct socket
*, u_long
, caddr_t
, struct ifnet
*,
70 static int mptcp_disconnect(struct mptses
*);
71 static int mptcp_usr_disconnect(struct socket
*);
72 static int mptcp_usr_disconnectx(struct socket
*, sae_associd_t
, sae_connid_t
);
73 static struct mptses
*mptcp_usrclosed(struct mptses
*);
74 static int mptcp_usr_rcvd(struct socket
*, int);
75 static int mptcp_usr_send(struct socket
*, int, struct mbuf
*,
76 struct sockaddr
*, struct mbuf
*, struct proc
*);
77 static int mptcp_usr_shutdown(struct socket
*);
78 static int mptcp_usr_sosend(struct socket
*, struct sockaddr
*, struct uio
*,
79 struct mbuf
*, struct mbuf
*, int);
80 static int mptcp_usr_socheckopt(struct socket
*, struct sockopt
*);
81 static int mptcp_usr_preconnect(struct socket
*so
);
83 struct pr_usrreqs mptcp_usrreqs
= {
84 .pru_attach
= mptcp_usr_attach
,
85 .pru_connectx
= mptcp_usr_connectx
,
86 .pru_control
= mptcp_usr_control
,
87 .pru_detach
= mptcp_usr_detach
,
88 .pru_disconnect
= mptcp_usr_disconnect
,
89 .pru_disconnectx
= mptcp_usr_disconnectx
,
90 .pru_peeraddr
= mp_getpeeraddr
,
91 .pru_rcvd
= mptcp_usr_rcvd
,
92 .pru_send
= mptcp_usr_send
,
93 .pru_shutdown
= mptcp_usr_shutdown
,
94 .pru_sockaddr
= mp_getsockaddr
,
95 .pru_sosend
= mptcp_usr_sosend
,
96 .pru_soreceive
= soreceive
,
97 .pru_socheckopt
= mptcp_usr_socheckopt
,
98 .pru_preconnect
= mptcp_usr_preconnect
,
102 #if (DEVELOPMENT || DEBUG)
103 static int mptcp_disable_entitlements
= 0;
104 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, disable_entitlements
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
105 &mptcp_disable_entitlements
, 0, "Disable Multipath TCP Entitlement Checking");
108 int mptcp_developer_mode
= 0;
109 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, allow_aggregate
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
110 &mptcp_developer_mode
, 0, "Allow the Multipath aggregation mode");
112 static unsigned long mptcp_expected_progress_headstart
= 5000;
113 SYSCTL_ULONG(_net_inet_mptcp
, OID_AUTO
, expected_progress_headstart
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
114 &mptcp_expected_progress_headstart
, "Headstart to give MPTCP before meeting the progress deadline");
118 * Attaches an MPTCP control block to a socket.
121 mptcp_usr_attach(struct socket
*mp_so
, int proto
, struct proc
*p
)
123 #pragma unused(proto)
126 VERIFY(mpsotomppcb(mp_so
) == NULL
);
128 error
= mptcp_attach(mp_so
, p
);
135 * Might want to use a different SO_LINGER timeout than TCP's?
137 if ((mp_so
->so_options
& SO_LINGER
) && mp_so
->so_linger
== 0) {
138 mp_so
->so_linger
= TCP_LINGERTIME
* hz
;
145 * Detaches an MPTCP control block from a socket.
148 mptcp_usr_detach(struct socket
*mp_so
)
150 struct mptses
*mpte
= mpsotompte(mp_so
);
151 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
153 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
154 os_log_error(mptcp_log_handle
, "%s - %lx: state: %d\n",
155 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
156 mpp
? mpp
->mpp_state
: -1);
161 * We are done with this MPTCP socket (it has been closed);
162 * trigger all subflows to be disconnected, if not already,
163 * by initiating the PCB detach sequence (SOF_PCBCLEARING
168 mptcp_disconnect(mpte
);
174 * Attach MPTCP protocol to socket, allocating MP control block,
175 * MPTCP session, control block, buffer space, etc.
178 mptcp_attach(struct socket
*mp_so
, struct proc
*p
)
181 struct mptses
*mpte
= NULL
;
182 struct mptcb
*mp_tp
= NULL
;
183 struct mppcb
*mpp
= NULL
;
186 if (mp_so
->so_snd
.sb_hiwat
== 0 || mp_so
->so_rcv
.sb_hiwat
== 0) {
187 error
= soreserve(mp_so
, tcp_sendspace
, tcp_recvspace
);
193 if (mp_so
->so_snd
.sb_preconn_hiwat
== 0) {
194 soreserve_preconnect(mp_so
, 2048);
197 if ((mp_so
->so_rcv
.sb_flags
& SB_USRSIZE
) == 0) {
198 mp_so
->so_rcv
.sb_flags
|= SB_AUTOSIZE
;
200 if ((mp_so
->so_snd
.sb_flags
& SB_USRSIZE
) == 0) {
201 mp_so
->so_snd
.sb_flags
|= SB_AUTOSIZE
;
205 * MPTCP send-socket buffers cannot be compressed, due to the
206 * fact that each mbuf chained via m_next is a M_PKTHDR
207 * which carries some MPTCP metadata.
209 mp_so
->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
211 if ((error
= mp_pcballoc(mp_so
, &mtcbinfo
)) != 0) {
215 mpp
= mpsotomppcb(mp_so
);
216 mpte
= (struct mptses
*)mpp
->mpp_pcbe
;
217 mp_tp
= mpte
->mpte_mptcb
;
219 VERIFY(mp_tp
!= NULL
);
225 mptcp_entitlement_check(struct socket
*mp_so
)
227 struct mptses
*mpte
= mpsotompte(mp_so
);
229 /* First, check for mptcp_extended without delegation */
230 if (soopt_cred_check(mp_so
, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED
, TRUE
, FALSE
) == 0) {
232 * This means the app has the extended entitlement. Thus,
233 * it's a first party app and can run without restrictions.
235 mpte
->mpte_flags
|= MPTE_FIRSTPARTY
;
239 /* Now with delegation */
240 if (mp_so
->so_flags
& SOF_DELEGATED
&&
241 soopt_cred_check(mp_so
, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED
, TRUE
, TRUE
) == 0) {
243 * This means the app has the extended entitlement. Thus,
244 * it's a first party app and can run without restrictions.
246 mpte
->mpte_flags
|= MPTE_FIRSTPARTY
;
250 /* Now, take a look at exceptions configured through sysctl */
251 #if (DEVELOPMENT || DEBUG)
252 if (mptcp_disable_entitlements
) {
257 if (mpte
->mpte_svctype
== MPTCP_SVCTYPE_AGGREGATE
) {
258 if (mptcp_developer_mode
) {
265 /* Second, check for regular users that are within the data-limits */
266 if (soopt_cred_check(mp_so
, PRIV_NET_PRIVILEGED_MULTIPATH
, TRUE
, FALSE
) == 0) {
270 if (mp_so
->so_flags
& SOF_DELEGATED
&&
271 soopt_cred_check(mp_so
, PRIV_NET_PRIVILEGED_MULTIPATH
, TRUE
, TRUE
) == 0) {
276 os_log_error(mptcp_log_handle
, "%s - %lx: MPTCP prohibited on svc %u\n",
277 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_svctype
);
283 * Common subroutine to open a MPTCP connection to one of the remote hosts
284 * specified by dst_sl. This includes allocating and establishing a
285 * subflow TCP connection, either initially to establish MPTCP connection,
286 * or to join an existing one. Returns a connection handle upon success.
289 mptcp_connectx(struct mptses
*mpte
, struct sockaddr
*src
,
290 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
295 VERIFY(pcid
!= NULL
);
297 error
= mptcp_subflow_add(mpte
, src
, dst
, ifscope
, pcid
);
303 * User-protocol pru_connectx callback.
306 mptcp_usr_connectx(struct socket
*mp_so
, struct sockaddr
*src
,
307 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
308 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
309 uint32_t arglen
, struct uio
*auio
, user_ssize_t
*bytes_written
)
311 #pragma unused(p, aid, flags, arg, arglen)
312 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
313 struct mptses
*mpte
= NULL
;
314 struct mptcb
*mp_tp
= NULL
;
315 user_ssize_t datalen
;
318 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
319 os_log_error(mptcp_log_handle
, "%s - %lx: state %d\n",
320 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
321 mpp
? mpp
->mpp_state
: -1);
325 mpte
= mptompte(mpp
);
326 mp_tp
= mpte
->mpte_mptcb
;
328 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
329 os_log_error(mptcp_log_handle
, "%s - %lx: fell back to TCP\n",
330 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
335 if (dst
->sa_family
!= AF_INET
&& dst
->sa_family
!= AF_INET6
) {
336 error
= EAFNOSUPPORT
;
340 if (dst
->sa_family
== AF_INET
&&
341 dst
->sa_len
!= sizeof(mpte
->__mpte_dst_v4
)) {
342 os_log_error(mptcp_log_handle
, "%s - %lx: IPv4 dst len %u\n",
343 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), dst
->sa_len
);
348 if (dst
->sa_family
== AF_INET6
&&
349 dst
->sa_len
!= sizeof(mpte
->__mpte_dst_v6
)) {
350 os_log_error(mptcp_log_handle
, "%s - %lx: IPv6 dst len %u\n",
351 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), dst
->sa_len
);
356 if (!(mpte
->mpte_flags
& MPTE_SVCTYPE_CHECKED
)) {
357 if (mptcp_entitlement_check(mp_so
) < 0) {
362 mpte
->mpte_flags
|= MPTE_SVCTYPE_CHECKED
;
365 if ((mp_so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
)) == 0) {
366 memcpy(&mpte
->mpte_u_dst
, dst
, dst
->sa_len
);
370 if (src
->sa_family
!= AF_INET
&& src
->sa_family
!= AF_INET6
) {
371 error
= EAFNOSUPPORT
;
375 if (src
->sa_family
== AF_INET
&&
376 src
->sa_len
!= sizeof(mpte
->__mpte_src_v4
)) {
377 os_log_error(mptcp_log_handle
, "%s - %lx: IPv4 src len %u\n",
378 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), src
->sa_len
);
383 if (src
->sa_family
== AF_INET6
&&
384 src
->sa_len
!= sizeof(mpte
->__mpte_src_v6
)) {
385 os_log_error(mptcp_log_handle
, "%s - %lx: IPv6 src len %u\n",
386 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), src
->sa_len
);
391 if ((mp_so
->so_state
& (SS_ISCONNECTED
| SS_ISCONNECTING
)) == 0) {
392 memcpy(&mpte
->mpte_u_src
, src
, src
->sa_len
);
396 error
= mptcp_connectx(mpte
, src
, dst
, ifscope
, pcid
);
398 /* If there is data, copy it */
400 datalen
= uio_resid(auio
);
401 socket_unlock(mp_so
, 0);
402 error
= mp_so
->so_proto
->pr_usrreqs
->pru_sosend(mp_so
, NULL
,
403 (uio_t
) auio
, NULL
, NULL
, 0);
405 if (error
== 0 || error
== EWOULDBLOCK
) {
406 *bytes_written
= datalen
- uio_resid(auio
);
409 if (error
== EWOULDBLOCK
) {
413 socket_lock(mp_so
, 0);
421 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
424 mptcp_getassocids(struct mptses
*mpte
, uint32_t *cnt
, user_addr_t aidp
)
426 /* MPTCP has at most 1 association */
427 *cnt
= (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) ? 1 : 0;
429 /* just asking how many there are? */
430 if (aidp
== USER_ADDR_NULL
) {
434 return copyout(&mpte
->mpte_associd
, aidp
,
435 sizeof(mpte
->mpte_associd
));
439 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
442 mptcp_getconnids(struct mptses
*mpte
, sae_associd_t aid
, uint32_t *cnt
,
448 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
449 aid
!= mpte
->mpte_associd
) {
453 *cnt
= mpte
->mpte_numflows
;
455 /* just asking how many there are? */
456 if (cidp
== USER_ADDR_NULL
) {
460 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
461 if ((error
= copyout(&mpts
->mpts_connid
, cidp
,
462 sizeof(mpts
->mpts_connid
))) != 0) {
466 cidp
+= sizeof(mpts
->mpts_connid
);
473 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
476 mptcp_getconninfo(struct mptses
*mpte
, sae_connid_t
*cid
, uint32_t *flags
,
477 uint32_t *ifindex
, int32_t *soerror
, user_addr_t src
, socklen_t
*src_len
,
478 user_addr_t dst
, socklen_t
*dst_len
, uint32_t *aux_type
,
479 user_addr_t aux_data
, uint32_t *aux_len
)
486 /* MPTCP-level global stats */
487 if (*cid
== SAE_CONNID_ALL
) {
488 struct socket
*mp_so
= mptetoso(mpte
);
489 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
490 struct conninfo_multipathtcp mptcp_ci
;
493 if (*aux_len
!= 0 && *aux_len
!= sizeof(mptcp_ci
)) {
497 if (mp_so
->so_state
& SS_ISCONNECTING
) {
498 *flags
|= CIF_CONNECTING
;
500 if (mp_so
->so_state
& SS_ISCONNECTED
) {
501 *flags
|= CIF_CONNECTED
;
503 if (mp_so
->so_state
& SS_ISDISCONNECTING
) {
504 *flags
|= CIF_DISCONNECTING
;
506 if (mp_so
->so_state
& SS_ISDISCONNECTED
) {
507 *flags
|= CIF_DISCONNECTED
;
509 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
)) {
510 *flags
|= CIF_MP_CAPABLE
;
512 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
513 *flags
|= CIF_MP_DEGRADED
;
519 *aux_type
= CIAUX_MPTCP
;
520 *aux_len
= sizeof(mptcp_ci
);
522 if (aux_data
!= USER_ADDR_NULL
) {
523 const struct mptsub
*mpts
;
524 int initial_info_set
= 0;
527 bzero(&mptcp_ci
, sizeof(mptcp_ci
));
528 mptcp_ci
.mptcpci_subflow_count
= mpte
->mpte_numflows
;
529 mptcp_ci
.mptcpci_switch_count
= mpte
->mpte_subflow_switches
;
531 VERIFY(sizeof(mptcp_ci
.mptcpci_itfstats
) == sizeof(mpte
->mpte_itfstats
));
532 memcpy(mptcp_ci
.mptcpci_itfstats
, mpte
->mpte_itfstats
, sizeof(mptcp_ci
.mptcpci_itfstats
));
534 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
535 if (i
>= sizeof(mptcp_ci
.mptcpci_subflow_connids
) / sizeof(sae_connid_t
)) {
538 mptcp_ci
.mptcpci_subflow_connids
[i
] = mpts
->mpts_connid
;
540 if (mpts
->mpts_flags
& MPTSF_INITIAL_SUB
) {
541 const struct inpcb
*inp
;
543 inp
= sotoinpcb(mpts
->mpts_socket
);
545 mptcp_ci
.mptcpci_init_rxbytes
= inp
->inp_stat
->rxbytes
;
546 mptcp_ci
.mptcpci_init_txbytes
= inp
->inp_stat
->txbytes
;
547 initial_info_set
= 1;
550 mptcpstats_update(mptcp_ci
.mptcpci_itfstats
, mpts
);
555 if (initial_info_set
== 0) {
556 mptcp_ci
.mptcpci_init_rxbytes
= mpte
->mpte_init_rxbytes
;
557 mptcp_ci
.mptcpci_init_txbytes
= mpte
->mpte_init_txbytes
;
560 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
) {
561 mptcp_ci
.mptcpci_flags
|= MPTCPCI_FIRSTPARTY
;
564 error
= copyout(&mptcp_ci
, aux_data
, sizeof(mptcp_ci
));
566 os_log_error(mptcp_log_handle
, "%s - %lx: copyout failed: %d\n",
567 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
575 /* Any stats of any subflow */
576 if (*cid
== SAE_CONNID_ANY
) {
577 const struct mptsub
*mpts
;
579 const struct inpcb
*inp
;
582 mpts
= TAILQ_FIRST(&mpte
->mpte_subflows
);
587 so
= mpts
->mpts_socket
;
590 if (inp
->inp_vflag
& INP_IPV4
) {
591 error
= in_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
592 soerror
, src
, src_len
, dst
, dst_len
,
593 aux_type
, aux_data
, aux_len
);
595 error
= in6_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
596 soerror
, src
, src_len
, dst
, dst_len
,
597 aux_type
, aux_data
, aux_len
);
601 os_log_error(mptcp_log_handle
, "%s - %lx:error from in_getconninfo %d\n",
602 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
606 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) {
607 *flags
|= CIF_MP_CAPABLE
;
609 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
610 *flags
|= CIF_MP_DEGRADED
;
612 if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
613 *flags
|= CIF_MP_READY
;
615 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
616 *flags
|= CIF_MP_ACTIVE
;
621 /* Per-interface stats */
622 const struct mptsub
*mpts
, *orig_mpts
;
623 struct conninfo_tcp tcp_ci
;
624 const struct inpcb
*inp
;
629 bzero(&tcp_ci
, sizeof(tcp_ci
));
631 /* First, get a subflow to fill in the "regular" info. */
632 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
633 const struct ifnet
*ifp
= sotoinpcb(mpts
->mpts_socket
)->inp_last_outifp
;
635 if (ifp
&& ifp
->if_index
== *cid
) {
641 /* No subflow there - well, let's just get the basic itf-info */
645 so
= mpts
->mpts_socket
;
648 /* Give it USER_ADDR_NULL, because we are doing this on our own */
649 if (inp
->inp_vflag
& INP_IPV4
) {
650 error
= in_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
651 soerror
, src
, src_len
, dst
, dst_len
,
652 aux_type
, USER_ADDR_NULL
, aux_len
);
654 error
= in6_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
655 soerror
, src
, src_len
, dst
, dst_len
,
656 aux_type
, USER_ADDR_NULL
, aux_len
);
660 os_log_error(mptcp_log_handle
, "%s - %lx:error from in_getconninfo %d\n",
661 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), error
);
665 /* ToDo: Nobody is reading these flags on subflows. Why bother ? */
666 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) {
667 *flags
|= CIF_MP_CAPABLE
;
669 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
) {
670 *flags
|= CIF_MP_DEGRADED
;
672 if (mpts
->mpts_flags
& MPTSF_MP_READY
) {
673 *flags
|= CIF_MP_READY
;
675 if (mpts
->mpts_flags
& MPTSF_ACTIVE
) {
676 *flags
|= CIF_MP_ACTIVE
;
680 * Now, we gather the metrics (aka., tcp_info) and roll them in
681 * across all subflows of this interface to build an aggregated
684 * We take the TCP_INFO from the first subflow as the "master",
685 * feeding into those fields that we do not roll.
687 if (aux_data
!= USER_ADDR_NULL
) {
688 tcp_getconninfo(so
, &tcp_ci
);
691 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
692 const struct inpcb
*mptsinp
= sotoinpcb(mpts
->mpts_socket
);
693 const struct ifnet
*ifp
;
695 ifp
= mptsinp
->inp_last_outifp
;
697 if (ifp
== NULL
|| ifp
->if_index
!= *cid
|| mpts
== orig_mpts
) {
701 /* Roll the itf-stats into the tcp_info */
702 tcp_ci
.tcpci_tcp_info
.tcpi_txbytes
+=
703 mptsinp
->inp_stat
->txbytes
;
704 tcp_ci
.tcpci_tcp_info
.tcpi_rxbytes
+=
705 mptsinp
->inp_stat
->rxbytes
;
707 tcp_ci
.tcpci_tcp_info
.tcpi_wifi_txbytes
+=
708 mptsinp
->inp_wstat
->txbytes
;
709 tcp_ci
.tcpci_tcp_info
.tcpi_wifi_rxbytes
+=
710 mptsinp
->inp_wstat
->rxbytes
;
712 tcp_ci
.tcpci_tcp_info
.tcpi_wired_txbytes
+=
713 mptsinp
->inp_Wstat
->txbytes
;
714 tcp_ci
.tcpci_tcp_info
.tcpi_wired_rxbytes
+=
715 mptsinp
->inp_Wstat
->rxbytes
;
717 tcp_ci
.tcpci_tcp_info
.tcpi_cell_txbytes
+=
718 mptsinp
->inp_cstat
->txbytes
;
719 tcp_ci
.tcpci_tcp_info
.tcpi_cell_rxbytes
+=
720 mptsinp
->inp_cstat
->rxbytes
;
725 *aux_type
= CIAUX_TCP
;
727 *aux_len
= sizeof(tcp_ci
);
728 } else if (aux_data
!= USER_ADDR_NULL
) {
732 * Finally, old subflows might have been closed - we
733 * want this data as well, so grab it from the interface
736 create
= orig_mpts
!= NULL
;
739 * When we found a subflow, we are willing to create a stats-index
740 * because we have some data to return. If there isn't a subflow,
741 * nor anything in the stats, return EINVAL. Because the
742 * ifindex belongs to something that doesn't exist.
744 index
= mptcpstats_get_index_by_ifindex(mpte
->mpte_itfstats
, *cid
, false);
746 os_log_error(mptcp_log_handle
,
747 "%s - %lx: Asking for too many ifindex: %u subcount %u, mpts? %s\n",
748 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
749 *cid
, mpte
->mpte_numflows
,
750 orig_mpts
? "yes" : "no");
752 if (orig_mpts
== NULL
) {
756 struct mptcp_itf_stats
*stats
;
758 stats
= &mpte
->mpte_itfstats
[index
];
760 /* Roll the itf-stats into the tcp_info */
761 tcp_ci
.tcpci_tcp_info
.tcpi_last_outif
= *cid
;
762 tcp_ci
.tcpci_tcp_info
.tcpi_txbytes
+=
764 tcp_ci
.tcpci_tcp_info
.tcpi_rxbytes
+=
767 tcp_ci
.tcpci_tcp_info
.tcpi_wifi_txbytes
+=
768 stats
->mpis_wifi_txbytes
;
769 tcp_ci
.tcpci_tcp_info
.tcpi_wifi_rxbytes
+=
770 stats
->mpis_wifi_rxbytes
;
772 tcp_ci
.tcpci_tcp_info
.tcpi_wired_txbytes
+=
773 stats
->mpis_wired_txbytes
;
774 tcp_ci
.tcpci_tcp_info
.tcpi_wired_rxbytes
+=
775 stats
->mpis_wired_rxbytes
;
777 tcp_ci
.tcpci_tcp_info
.tcpi_cell_txbytes
+=
778 stats
->mpis_cell_txbytes
;
779 tcp_ci
.tcpci_tcp_info
.tcpi_cell_rxbytes
+=
780 stats
->mpis_cell_rxbytes
;
783 *aux_len
= min(*aux_len
, sizeof(tcp_ci
));
784 error
= copyout(&tcp_ci
, aux_data
, *aux_len
);
795 * User-protocol pru_control callback.
798 mptcp_usr_control(struct socket
*mp_so
, u_long cmd
, caddr_t data
,
799 struct ifnet
*ifp
, struct proc
*p
)
801 #pragma unused(ifp, p)
802 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
806 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
810 mpte
= mptompte(mpp
);
813 case SIOCGASSOCIDS32
: { /* struct so_aidreq32 */
814 struct so_aidreq32 aidr
;
815 bcopy(data
, &aidr
, sizeof(aidr
));
816 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
819 bcopy(&aidr
, data
, sizeof(aidr
));
824 case SIOCGASSOCIDS64
: { /* struct so_aidreq64 */
825 struct so_aidreq64 aidr
;
826 bcopy(data
, &aidr
, sizeof(aidr
));
827 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
830 bcopy(&aidr
, data
, sizeof(aidr
));
835 case SIOCGCONNIDS32
: { /* struct so_cidreq32 */
836 struct so_cidreq32 cidr
;
837 bcopy(data
, &cidr
, sizeof(cidr
));
838 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
841 bcopy(&cidr
, data
, sizeof(cidr
));
846 case SIOCGCONNIDS64
: { /* struct so_cidreq64 */
847 struct so_cidreq64 cidr
;
848 bcopy(data
, &cidr
, sizeof(cidr
));
849 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
852 bcopy(&cidr
, data
, sizeof(cidr
));
857 case SIOCGCONNINFO32
: { /* struct so_cinforeq32 */
858 struct so_cinforeq32 cifr
;
859 bcopy(data
, &cifr
, sizeof(cifr
));
860 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
861 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
862 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
863 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
866 bcopy(&cifr
, data
, sizeof(cifr
));
871 case SIOCGCONNINFO64
: { /* struct so_cinforeq64 */
872 struct so_cinforeq64 cifr
;
873 bcopy(data
, &cifr
, sizeof(cifr
));
874 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
875 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
876 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
877 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
880 bcopy(&cifr
, data
, sizeof(cifr
));
894 mptcp_disconnect(struct mptses
*mpte
)
896 struct socket
*mp_so
;
900 mp_so
= mptetoso(mpte
);
901 mp_tp
= mpte
->mpte_mptcb
;
903 DTRACE_MPTCP3(disconnectx
, struct mptses
*, mpte
,
904 struct socket
*, mp_so
, struct mptcb
*, mp_tp
);
906 /* if we're not detached, go thru socket state checks */
907 if (!(mp_so
->so_flags
& SOF_PCBCLEARING
)) {
908 if (!(mp_so
->so_state
& (SS_ISCONNECTED
|
913 if (mp_so
->so_state
& SS_ISDISCONNECTING
) {
919 mptcp_cancel_all_timers(mp_tp
);
920 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
921 mptcp_close(mpte
, mp_tp
);
922 } else if ((mp_so
->so_options
& SO_LINGER
) &&
923 mp_so
->so_linger
== 0) {
924 mptcp_drop(mpte
, mp_tp
, 0);
926 soisdisconnecting(mp_so
);
927 sbflush(&mp_so
->so_rcv
);
928 if (mptcp_usrclosed(mpte
) != NULL
) {
934 mptcp_subflow_workloop(mpte
);
942 * Wrapper function to support disconnect on socket
945 mptcp_usr_disconnect(struct socket
*mp_so
)
947 return mptcp_disconnect(mpsotompte(mp_so
));
951 * User-protocol pru_disconnectx callback.
954 mptcp_usr_disconnectx(struct socket
*mp_so
, sae_associd_t aid
, sae_connid_t cid
)
956 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
) {
960 if (cid
!= SAE_CONNID_ANY
&& cid
!= SAE_CONNID_ALL
) {
964 return mptcp_usr_disconnect(mp_so
);
968 mptcp_finish_usrclosed(struct mptses
*mpte
)
970 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
971 struct socket
*mp_so
= mptetoso(mpte
);
973 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
974 mpte
= mptcp_close(mpte
, mp_tp
);
975 } else if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_2
) {
976 soisdisconnected(mp_so
);
980 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
981 if ((mp_so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
982 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
983 mptcp_subflow_disconnect(mpte
, mpts
);
985 mptcp_subflow_shutdown(mpte
, mpts
);
992 * User issued close, and wish to trail thru shutdown states.
994 static struct mptses
*
995 mptcp_usrclosed(struct mptses
*mpte
)
997 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
999 mptcp_close_fsm(mp_tp
, MPCE_CLOSE
);
1001 /* Not everything has been acknowledged - don't close the subflows! */
1002 if (mp_tp
->mpt_sndnxt
+ 1 != mp_tp
->mpt_sndmax
) {
1006 mptcp_finish_usrclosed(mpte
);
1012 * After a receive, possible send some update to peer.
1015 mptcp_usr_rcvd(struct socket
*mp_so
, int flags
)
1017 #pragma unused(flags)
1018 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
1019 struct mptses
*mpte
;
1020 struct mptsub
*mpts
;
1023 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1028 mpte
= mptompte(mpp
);
1030 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1031 struct socket
*so
= mpts
->mpts_socket
;
1033 if (so
->so_proto
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
!= NULL
) {
1034 (*so
->so_proto
->pr_usrreqs
->pru_rcvd
)(so
, 0);
1038 error
= mptcp_output(mpte
);
1044 * Do a send by putting data in the output queue.
1047 mptcp_usr_send(struct socket
*mp_so
, int prus_flags
, struct mbuf
*m
,
1048 struct sockaddr
*nam
, struct mbuf
*control
, struct proc
*p
)
1050 #pragma unused(nam, p)
1051 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
1052 struct mptses
*mpte
;
1055 if (prus_flags
& (PRUS_OOB
| PRUS_EOF
)) {
1065 if (control
!= NULL
&& control
->m_len
!= 0) {
1070 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1074 mpte
= mptompte(mpp
);
1075 VERIFY(mpte
!= NULL
);
1077 if (!(mp_so
->so_state
& SS_ISCONNECTED
) &&
1078 !(mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
1083 mptcp_insert_dsn(mpp
, m
);
1084 VERIFY(mp_so
->so_snd
.sb_flags
& SB_NOCOMPRESS
);
1085 sbappendstream(&mp_so
->so_snd
, m
);
1088 error
= mptcp_output(mpte
);
1093 if (mp_so
->so_state
& SS_ISCONNECTING
) {
1094 if (mp_so
->so_state
& SS_NBIO
) {
1095 error
= EWOULDBLOCK
;
1097 error
= sbwait(&mp_so
->so_snd
);
1106 if (control
!= NULL
) {
1114 * Mark the MPTCP connection as being incapable of further output.
1117 mptcp_usr_shutdown(struct socket
*mp_so
)
1119 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
1120 struct mptses
*mpte
;
1123 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1127 mpte
= mptompte(mpp
);
1128 VERIFY(mpte
!= NULL
);
1130 socantsendmore(mp_so
);
1132 mpte
= mptcp_usrclosed(mpte
);
1134 error
= mptcp_output(mpte
);
1141 * Copy the contents of uio into a properly sized mbuf chain.
1144 mptcp_uiotombuf(struct uio
*uio
, int how
, int space
, uint32_t align
,
1147 struct mbuf
*m
, *mb
, *nm
= NULL
, *mtail
= NULL
;
1148 user_ssize_t resid
, tot
, len
, progress
; /* must be user_ssize_t */
1151 VERIFY(top
!= NULL
&& *top
== NULL
);
1154 * space can be zero or an arbitrary large value bound by
1155 * the total data supplied by the uio.
1157 resid
= uio_resid(uio
);
1159 tot
= imin(resid
, space
);
1165 * The smallest unit is a single mbuf with pkthdr.
1166 * We can't align past it.
1168 if (align
>= MHLEN
) {
1173 * Give us the full allocation or nothing.
1174 * If space is zero return the smallest empty mbuf.
1176 if ((len
= tot
+ align
) == 0) {
1180 /* Loop and append maximum sized mbufs to the chain tail. */
1182 uint32_t m_needed
= 1;
1184 if (njcl
> 0 && len
> MBIGCLBYTES
) {
1185 mb
= m_getpackets_internal(&m_needed
, 1,
1186 how
, 1, M16KCLBYTES
);
1187 } else if (len
> MCLBYTES
) {
1188 mb
= m_getpackets_internal(&m_needed
, 1,
1189 how
, 1, MBIGCLBYTES
);
1190 } else if (len
>= (signed)MINCLSIZE
) {
1191 mb
= m_getpackets_internal(&m_needed
, 1,
1194 mb
= m_gethdr(how
, MT_DATA
);
1197 /* Fail the whole operation if one mbuf can't be allocated. */
1206 VERIFY(mb
->m_flags
& M_PKTHDR
);
1207 len
-= ((mb
->m_flags
& M_EXT
) ? mb
->m_ext
.ext_size
: MHLEN
);
1208 if (mtail
!= NULL
) {
1220 /* Fill all mbufs with uio data and update header information. */
1221 for (mb
= m
; mb
!= NULL
; mb
= mb
->m_next
) {
1222 len
= imin(M_TRAILINGSPACE(mb
), tot
- progress
);
1224 error
= uiomove(mtod(mb
, char *), len
, uio
);
1230 /* each mbuf is M_PKTHDR chained via m_next */
1232 mb
->m_pkthdr
.len
= len
;
1236 VERIFY(progress
== tot
);
1242 * MPTCP socket protocol-user socket send routine, derived from sosend().
1245 mptcp_usr_sosend(struct socket
*mp_so
, struct sockaddr
*addr
, struct uio
*uio
,
1246 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1248 #pragma unused(addr)
1251 int error
, sendflags
;
1252 struct proc
*p
= current_proc();
1255 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1256 if (uio
== NULL
|| top
!= NULL
) {
1260 resid
= uio_resid(uio
);
1262 socket_lock(mp_so
, 1);
1263 so_update_last_owner_locked(mp_so
, p
);
1264 so_update_policy(mp_so
);
1266 VERIFY(mp_so
->so_type
== SOCK_STREAM
);
1267 VERIFY(!(mp_so
->so_flags
& SOF_MP_SUBFLOW
));
1269 if ((flags
& (MSG_OOB
| MSG_DONTROUTE
)) ||
1270 (mp_so
->so_flags
& SOF_ENABLE_MSGS
)) {
1272 socket_unlock(mp_so
, 1);
1277 * In theory resid should be unsigned. However, space must be
1278 * signed, as it might be less than 0 if we over-committed, and we
1279 * must use a signed comparison of space and resid. On the other
1280 * hand, a negative resid causes us to loop sending 0-length
1281 * segments to the protocol.
1283 if (resid
< 0 || (flags
& MSG_EOR
) || control
!= NULL
) {
1285 socket_unlock(mp_so
, 1);
1289 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1292 error
= sosendcheck(mp_so
, NULL
, resid
, 0, 0, flags
,
1298 space
= sbspace(&mp_so
->so_snd
);
1300 socket_unlock(mp_so
, 0);
1302 * Copy the data from userland into an mbuf chain.
1304 error
= mptcp_uiotombuf(uio
, M_WAITOK
, space
, 0, &top
);
1306 socket_lock(mp_so
, 0);
1309 VERIFY(top
!= NULL
);
1310 space
-= resid
- uio_resid(uio
);
1311 resid
= uio_resid(uio
);
1312 socket_lock(mp_so
, 0);
1315 * Compute flags here, for pru_send and NKEs.
1317 sendflags
= (resid
> 0 && space
> 0) ?
1318 PRUS_MORETOCOME
: 0;
1321 * Socket filter processing
1323 VERIFY(control
== NULL
);
1324 error
= sflt_data_out(mp_so
, NULL
, &top
, &control
, 0);
1326 if (error
== EJUSTRETURN
) {
1329 /* always free control if any */
1333 if (control
!= NULL
) {
1339 * Pass data to protocol.
1341 error
= (*mp_so
->so_proto
->pr_usrreqs
->pru_send
)
1342 (mp_so
, sendflags
, top
, NULL
, NULL
, p
);
1348 } while (resid
!= 0 && space
> 0);
1349 } while (resid
!= 0);
1353 sbunlock(&mp_so
->so_snd
, FALSE
); /* will unlock socket */
1355 socket_unlock(mp_so
, 1);
1361 if (control
!= NULL
) {
1365 soclearfastopen(mp_so
);
1371 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1372 * This routine simply indicates to the caller whether or not to proceed
1373 * further with the given socket option. This is invoked by sosetoptlock()
1374 * and sogetoptlock().
1377 mptcp_usr_socheckopt(struct socket
*mp_so
, struct sockopt
*sopt
)
1379 #pragma unused(mp_so)
1382 VERIFY(sopt
->sopt_level
== SOL_SOCKET
);
1385 * We could check for sopt_dir (set/get) here, but we'll just
1386 * let the caller deal with it as appropriate; therefore the
1387 * following is a superset of the socket options which we
1388 * allow for set/get.
1390 * XXX: adi@apple.com
1392 * Need to consider the following cases:
1394 * a. Certain socket options don't have a clear definition
1395 * on the expected behavior post connect(2). At the time
1396 * those options are issued on the MP socket, there may
1397 * be existing subflow sockets that are already connected.
1399 switch (sopt
->sopt_name
) {
1400 case SO_LINGER
: /* MP */
1401 case SO_LINGER_SEC
: /* MP */
1402 case SO_TYPE
: /* MP */
1403 case SO_NREAD
: /* MP */
1404 case SO_NWRITE
: /* MP */
1405 case SO_ERROR
: /* MP */
1406 case SO_SNDBUF
: /* MP */
1407 case SO_RCVBUF
: /* MP */
1408 case SO_SNDLOWAT
: /* MP */
1409 case SO_RCVLOWAT
: /* MP */
1410 case SO_SNDTIMEO
: /* MP */
1411 case SO_RCVTIMEO
: /* MP */
1412 case SO_NKE
: /* MP */
1413 case SO_NOSIGPIPE
: /* MP */
1414 case SO_NOADDRERR
: /* MP */
1415 case SO_LABEL
: /* MP */
1416 case SO_PEERLABEL
: /* MP */
1417 case SO_DEFUNCTIT
: /* MP */
1418 case SO_DEFUNCTOK
: /* MP */
1419 case SO_ISDEFUNCT
: /* MP */
1420 case SO_TRAFFIC_CLASS_DBG
: /* MP */
1421 case SO_DELEGATED
: /* MP */
1422 case SO_DELEGATED_UUID
: /* MP */
1424 case SO_NECP_ATTRIBUTES
:
1425 case SO_NECP_CLIENTUUID
:
1427 case SO_MPKL_SEND_INFO
:
1429 * Tell the caller that these options are to be processed.
1433 case SO_DEBUG
: /* MP + subflow */
1434 case SO_KEEPALIVE
: /* MP + subflow */
1435 case SO_USELOOPBACK
: /* MP + subflow */
1436 case SO_RANDOMPORT
: /* MP + subflow */
1437 case SO_TRAFFIC_CLASS
: /* MP + subflow */
1438 case SO_RECV_TRAFFIC_CLASS
: /* MP + subflow */
1439 case SO_PRIVILEGED_TRAFFIC_CLASS
: /* MP + subflow */
1440 case SO_RECV_ANYIF
: /* MP + subflow */
1441 case SO_RESTRICTIONS
: /* MP + subflow */
1442 case SO_FLUSH
: /* MP + subflow */
1443 case SO_NOWAKEFROMSLEEP
:
1444 case SO_NOAPNFALLBK
:
1445 case SO_MARK_CELLFALLBACK
:
1447 * Tell the caller that these options are to be processed;
1448 * these will also be recorded later by mptcp_setopt().
1450 * NOTE: Only support integer option value for now.
1452 if (sopt
->sopt_valsize
!= sizeof(int)) {
1459 * Tell the caller to stop immediately and return an error.
1461 error
= ENOPROTOOPT
;
1469 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1472 mptcp_setopt_apply(struct mptses
*mpte
, struct mptopt
*mpo
)
1474 struct socket
*mp_so
;
1475 struct mptsub
*mpts
;
1479 /* just bail now if this isn't applicable to subflow sockets */
1480 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1481 error
= ENOPROTOOPT
;
1486 * Skip those that are handled internally; these options
1487 * should not have been recorded and marked with the
1488 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1490 if (mpo
->mpo_level
== SOL_SOCKET
&&
1491 (mpo
->mpo_name
== SO_NOSIGPIPE
|| mpo
->mpo_name
== SO_NOADDRERR
)) {
1492 error
= ENOPROTOOPT
;
1496 mp_so
= mptetoso(mpte
);
1499 * Don't bother going further if there's no subflow; mark the option
1500 * with MPOF_INTERIM so that we know whether or not to remove this
1501 * option upon encountering an error while issuing it during subflow
1504 if (mpte
->mpte_numflows
== 0) {
1505 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
));
1506 mpo
->mpo_flags
|= MPOF_INTERIM
;
1507 /* return success */
1511 bzero(&smpo
, sizeof(smpo
));
1512 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1513 smpo
.mpo_level
= mpo
->mpo_level
;
1514 smpo
.mpo_name
= mpo
->mpo_name
;
1516 /* grab exisiting values in case we need to rollback */
1517 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1520 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
| MPTSF_SOPT_INPROG
);
1521 mpts
->mpts_oldintval
= 0;
1522 smpo
.mpo_intval
= 0;
1523 VERIFY(mpts
->mpts_socket
!= NULL
);
1524 so
= mpts
->mpts_socket
;
1525 if (mptcp_subflow_sogetopt(mpte
, so
, &smpo
) == 0) {
1526 mpts
->mpts_flags
|= MPTSF_SOPT_OLDVAL
;
1527 mpts
->mpts_oldintval
= smpo
.mpo_intval
;
1531 /* apply socket option */
1532 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1535 mpts
->mpts_flags
|= MPTSF_SOPT_INPROG
;
1536 VERIFY(mpts
->mpts_socket
!= NULL
);
1537 so
= mpts
->mpts_socket
;
1538 error
= mptcp_subflow_sosetopt(mpte
, mpts
, mpo
);
1544 /* cleanup, and rollback if needed */
1545 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1548 if (!(mpts
->mpts_flags
& MPTSF_SOPT_INPROG
)) {
1549 /* clear in case it's set */
1550 mpts
->mpts_flags
&= ~MPTSF_SOPT_OLDVAL
;
1551 mpts
->mpts_oldintval
= 0;
1554 if (!(mpts
->mpts_flags
& MPTSF_SOPT_OLDVAL
)) {
1555 mpts
->mpts_flags
&= ~MPTSF_SOPT_INPROG
;
1556 VERIFY(mpts
->mpts_oldintval
== 0);
1559 /* error during sosetopt, so roll it back */
1561 VERIFY(mpts
->mpts_socket
!= NULL
);
1562 so
= mpts
->mpts_socket
;
1563 smpo
.mpo_intval
= mpts
->mpts_oldintval
;
1564 mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
);
1566 mpts
->mpts_oldintval
= 0;
1567 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
| MPTSF_SOPT_INPROG
);
1575 * Handle SOPT_SET for socket options issued on MP socket.
1578 mptcp_setopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1580 int error
= 0, optval
= 0, level
, optname
, rec
= 1;
1581 struct mptopt smpo
, *mpo
= NULL
;
1582 struct socket
*mp_so
;
1584 level
= sopt
->sopt_level
;
1585 optname
= sopt
->sopt_name
;
1587 mp_so
= mptetoso(mpte
);
1590 * Record socket options which are applicable to subflow sockets so
1591 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1592 * for the list of eligible socket-level options.
1594 if (level
== SOL_SOCKET
) {
1598 case SO_USELOOPBACK
:
1600 case SO_TRAFFIC_CLASS
:
1601 case SO_RECV_TRAFFIC_CLASS
:
1602 case SO_PRIVILEGED_TRAFFIC_CLASS
:
1604 case SO_RESTRICTIONS
:
1605 case SO_NOWAKEFROMSLEEP
:
1606 case SO_NOAPNFALLBK
:
1607 case SO_MARK_CELLFALLBACK
:
1611 /* don't record it */
1615 /* Next ones, record at MPTCP-level */
1617 error
= sooptcopyin(sopt
, &mpte
->mpte_epid
,
1618 sizeof(int), sizeof(int));
1624 case SO_DELEGATED_UUID
:
1625 error
= sooptcopyin(sopt
, &mpte
->mpte_euuid
,
1626 sizeof(uuid_t
), sizeof(uuid_t
));
1633 case SO_NECP_CLIENTUUID
:
1634 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1639 error
= sooptcopyin(sopt
, &mpsotomppcb(mp_so
)->necp_client_uuid
,
1640 sizeof(uuid_t
), sizeof(uuid_t
));
1645 mpsotomppcb(mp_so
)->necp_cb
= mptcp_session_necp_cb
;
1646 error
= necp_client_register_multipath_cb(mp_so
->last_pid
,
1647 mpsotomppcb(mp_so
)->necp_client_uuid
,
1648 mpsotomppcb(mp_so
));
1653 if (uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1659 case SO_NECP_ATTRIBUTES
:
1662 /* nothing to do; just return */
1668 case TCP_RXT_FINDROP
:
1672 case TCP_CONNECTIONTIMEOUT
:
1673 case TCP_RXT_CONNDROPTIME
:
1674 case PERSIST_TIMEOUT
:
1675 case TCP_ADAPTIVE_READ_TIMEOUT
:
1676 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
1677 /* eligible; record it */
1679 case TCP_NOTSENT_LOWAT
:
1680 /* record at MPTCP level */
1681 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1691 mp_so
->so_flags
&= ~SOF_NOTSENT_LOWAT
;
1692 error
= mptcp_set_notsent_lowat(mpte
, 0);
1694 mp_so
->so_flags
|= SOF_NOTSENT_LOWAT
;
1695 error
= mptcp_set_notsent_lowat(mpte
,
1704 case MPTCP_SERVICE_TYPE
:
1705 /* record at MPTCP level */
1706 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1711 if (optval
< 0 || optval
>= MPTCP_SVCTYPE_MAX
) {
1716 mpte
->mpte_svctype
= optval
;
1718 if (mptcp_entitlement_check(mp_so
) < 0) {
1723 mpte
->mpte_flags
|= MPTE_SVCTYPE_CHECKED
;
1726 case MPTCP_ALTERNATE_PORT
:
1727 /* record at MPTCP level */
1728 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1734 if (optval
< 0 || optval
> UINT16_MAX
) {
1739 mpte
->mpte_alternate_port
= optval
;
1742 case MPTCP_FORCE_ENABLE
:
1743 /* record at MPTCP level */
1744 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1750 if (optval
< 0 || optval
> 1) {
1756 mpte
->mpte_flags
|= MPTE_FORCE_ENABLE
;
1758 mpte
->mpte_flags
&= ~MPTE_FORCE_ENABLE
;
1762 case MPTCP_EXPECTED_PROGRESS_TARGET
:
1764 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1765 uint64_t mach_time_target
;
1766 uint64_t nanoseconds
;
1768 if (mpte
->mpte_svctype
!= MPTCP_SVCTYPE_TARGET_BASED
) {
1769 os_log(mptcp_log_handle
, "%s - %lx: Can't set urgent activity when svctype is %u\n",
1770 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mpte
->mpte_svctype
);
1775 error
= sooptcopyin(sopt
, &mach_time_target
, sizeof(mach_time_target
), sizeof(mach_time_target
));
1780 if (!mptcp_ok_to_create_subflows(mp_tp
)) {
1781 os_log(mptcp_log_handle
, "%s - %lx: Not ok to create subflows, state %u flags %#x\n",
1782 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
), mp_tp
->mpt_state
, mp_tp
->mpt_flags
);
1787 if (mach_time_target
) {
1788 uint64_t time_now
= 0;
1789 uint64_t time_now_nanoseconds
;
1791 absolutetime_to_nanoseconds(mach_time_target
, &nanoseconds
);
1792 nanoseconds
= nanoseconds
- (mptcp_expected_progress_headstart
* NSEC_PER_MSEC
);
1794 time_now
= mach_continuous_time();
1795 absolutetime_to_nanoseconds(time_now
, &time_now_nanoseconds
);
1797 nanoseconds_to_absolutetime(nanoseconds
, &mach_time_target
);
1798 /* If the timer is already running and it would
1799 * fire in less than mptcp_expected_progress_headstart
1800 * seconds, then it's not worth canceling it.
1802 if (mpte
->mpte_time_target
&&
1803 mpte
->mpte_time_target
< time_now
&&
1804 time_now_nanoseconds
> nanoseconds
- (mptcp_expected_progress_headstart
* NSEC_PER_MSEC
)) {
1805 os_log(mptcp_log_handle
, "%s - %lx: Not rescheduling timer %llu now %llu target %llu\n",
1806 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1807 mpte
->mpte_time_target
,
1814 mpte
->mpte_time_target
= mach_time_target
;
1815 mptcp_set_urgency_timer(mpte
);
1821 error
= ENOPROTOOPT
;
1826 if ((error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1827 sizeof(optval
))) != 0) {
1832 /* search for an existing one; if not found, allocate */
1833 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) == NULL
) {
1834 mpo
= mptcp_sopt_alloc(M_WAITOK
);
1841 /* initialize or update, as needed */
1842 mpo
->mpo_intval
= optval
;
1843 if (!(mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1844 mpo
->mpo_level
= level
;
1845 mpo
->mpo_name
= optname
;
1846 mptcp_sopt_insert(mpte
, mpo
);
1848 /* this can be issued on the subflow socket */
1849 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1852 bzero(&smpo
, sizeof(smpo
));
1854 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1855 mpo
->mpo_level
= level
;
1856 mpo
->mpo_name
= optname
;
1857 mpo
->mpo_intval
= optval
;
1860 /* issue this socket option on existing subflows */
1861 error
= mptcp_setopt_apply(mpte
, mpo
);
1862 if (error
!= 0 && (mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1863 VERIFY(mpo
!= &smpo
);
1864 mptcp_sopt_remove(mpte
, mpo
);
1865 mptcp_sopt_free(mpo
);
1868 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
1880 os_log_error(mptcp_log_handle
, "%s - %lx: sopt %s (%d, %d) val %d can't be issued error %d\n",
1881 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
),
1882 mptcp_sopt2str(level
, optname
), level
, optname
, optval
, error
);
1887 mptcp_fill_info_bytestats(struct tcp_info
*ti
, struct mptses
*mpte
)
1889 struct mptsub
*mpts
;
1892 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1893 const struct inpcb
*inp
= sotoinpcb(mpts
->mpts_socket
);
1899 ti
->tcpi_txbytes
+= inp
->inp_stat
->txbytes
;
1900 ti
->tcpi_rxbytes
+= inp
->inp_stat
->rxbytes
;
1901 ti
->tcpi_cell_txbytes
+= inp
->inp_cstat
->txbytes
;
1902 ti
->tcpi_cell_rxbytes
+= inp
->inp_cstat
->rxbytes
;
1903 ti
->tcpi_wifi_txbytes
+= inp
->inp_wstat
->txbytes
;
1904 ti
->tcpi_wifi_rxbytes
+= inp
->inp_wstat
->rxbytes
;
1905 ti
->tcpi_wired_txbytes
+= inp
->inp_Wstat
->txbytes
;
1906 ti
->tcpi_wired_rxbytes
+= inp
->inp_Wstat
->rxbytes
;
1909 for (i
= 0; i
< MPTCP_ITFSTATS_SIZE
; i
++) {
1910 struct mptcp_itf_stats
*stats
= &mpte
->mpte_itfstats
[i
];
1912 ti
->tcpi_txbytes
+= stats
->mpis_txbytes
;
1913 ti
->tcpi_rxbytes
+= stats
->mpis_rxbytes
;
1915 ti
->tcpi_wifi_txbytes
+= stats
->mpis_wifi_txbytes
;
1916 ti
->tcpi_wifi_rxbytes
+= stats
->mpis_wifi_rxbytes
;
1918 ti
->tcpi_wired_txbytes
+= stats
->mpis_wired_txbytes
;
1919 ti
->tcpi_wired_rxbytes
+= stats
->mpis_wired_rxbytes
;
1921 ti
->tcpi_cell_txbytes
+= stats
->mpis_cell_txbytes
;
1922 ti
->tcpi_cell_rxbytes
+= stats
->mpis_cell_rxbytes
;
1927 mptcp_fill_info(struct mptses
*mpte
, struct tcp_info
*ti
)
1929 struct mptsub
*actsub
= mpte
->mpte_active_sub
;
1930 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
1931 struct tcpcb
*acttp
= NULL
;
1934 acttp
= sototcpcb(actsub
->mpts_socket
);
1937 bzero(ti
, sizeof(*ti
));
1939 ti
->tcpi_state
= mp_tp
->mpt_state
;
1941 /* tcpi_snd_wscale */
1942 /* tcpi_rcv_wscale */
1945 ti
->tcpi_rto
= acttp
->t_timer
[TCPT_REXMT
] ? acttp
->t_rxtcur
: 0;
1951 ti
->tcpi_rttcur
= acttp
->t_rttcur
;
1952 ti
->tcpi_srtt
= acttp
->t_srtt
>> TCP_RTT_SHIFT
;
1953 ti
->tcpi_rttvar
= acttp
->t_rttvar
>> TCP_RTTVAR_SHIFT
;
1954 ti
->tcpi_rttbest
= acttp
->t_rttbest
>> TCP_RTT_SHIFT
;
1956 /* tcpi_snd_ssthresh */
1958 /* tcpi_rcv_space */
1959 ti
->tcpi_snd_wnd
= mp_tp
->mpt_sndwnd
;
1960 ti
->tcpi_snd_nxt
= mp_tp
->mpt_sndnxt
;
1961 ti
->tcpi_rcv_nxt
= mp_tp
->mpt_rcvnxt
;
1963 ti
->tcpi_last_outif
= (acttp
->t_inpcb
->inp_last_outifp
== NULL
) ? 0 :
1964 acttp
->t_inpcb
->inp_last_outifp
->if_index
;
1967 mptcp_fill_info_bytestats(ti
, mpte
);
1968 /* tcpi_txpackets */
1970 /* tcpi_txretransmitbytes */
1971 /* tcpi_txunacked */
1972 /* tcpi_rxpackets */
1974 /* tcpi_rxduplicatebytes */
1975 /* tcpi_rxoutoforderbytes */
1977 /* tcpi_synrexmits */
1980 /* tcpi_cell_rxpackets */
1982 /* tcpi_cell_txpackets */
1984 /* tcpi_wifi_rxpackets */
1986 /* tcpi_wifi_txpackets */
1988 /* tcpi_wired_rxpackets */
1989 /* tcpi_wired_txpackets */
1990 /* tcpi_connstatus */
1993 /* tcpi_ecn_recv_ce */
1994 /* tcpi_ecn_recv_cwr */
1996 ti
->tcpi_rcvoopack
= acttp
->t_rcvoopack
;
1999 /* tcpi_sack_recovery_episode */
2000 /* tcpi_reordered_pkts */
2001 /* tcpi_dsack_sent */
2002 /* tcpi_dsack_recvd */
2005 ti
->tcpi_txretransmitpackets
= acttp
->t_stat
.rxmitpkts
;
2010 * Handle SOPT_GET for socket options issued on MP socket.
2013 mptcp_getopt(struct mptses
*mpte
, struct sockopt
*sopt
)
2015 int error
= 0, optval
= 0;
2018 * We only handle SOPT_GET for TCP level socket options; we should
2019 * not get here for socket level options since they are already
2020 * handled at the socket layer.
2022 if (sopt
->sopt_level
!= IPPROTO_TCP
) {
2023 error
= ENOPROTOOPT
;
2027 switch (sopt
->sopt_name
) {
2028 case PERSIST_TIMEOUT
:
2029 /* Only case for which we have a non-zero default */
2030 optval
= tcp_max_persist_timeout
;
2032 case TCP_RXT_FINDROP
:
2036 case TCP_CONNECTIONTIMEOUT
:
2037 case TCP_RXT_CONNDROPTIME
:
2038 case TCP_ADAPTIVE_READ_TIMEOUT
:
2039 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
2041 struct mptopt
*mpo
= mptcp_sopt_find(mpte
, sopt
);
2044 optval
= mpo
->mpo_intval
;
2049 /* The next ones are stored at the MPTCP-level */
2050 case TCP_NOTSENT_LOWAT
:
2051 if (mptetoso(mpte
)->so_flags
& SOF_NOTSENT_LOWAT
) {
2052 optval
= mptcp_get_notsent_lowat(mpte
);
2061 mptcp_fill_info(mpte
, &ti
);
2062 error
= sooptcopyout(sopt
, &ti
, sizeof(struct tcp_info
));
2066 case MPTCP_SERVICE_TYPE
:
2067 optval
= mpte
->mpte_svctype
;
2069 case MPTCP_ALTERNATE_PORT
:
2070 optval
= mpte
->mpte_alternate_port
;
2072 case MPTCP_FORCE_ENABLE
:
2073 optval
= !!(mpte
->mpte_flags
& MPTE_FORCE_ENABLE
);
2075 case MPTCP_EXPECTED_PROGRESS_TARGET
:
2076 error
= sooptcopyout(sopt
, &mpte
->mpte_time_target
, sizeof(mpte
->mpte_time_target
));
2081 error
= ENOPROTOOPT
;
2086 error
= sooptcopyout(sopt
, &optval
, sizeof(int));
2094 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
2095 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
2096 * to those that are allowed by mptcp_usr_socheckopt().
2099 mptcp_ctloutput(struct socket
*mp_so
, struct sockopt
*sopt
)
2101 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
2102 struct mptses
*mpte
;
2105 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
2109 mpte
= mptompte(mpp
);
2110 socket_lock_assert_owned(mp_so
);
2112 /* we only handle socket and TCP-level socket options for MPTCP */
2113 if (sopt
->sopt_level
!= SOL_SOCKET
&& sopt
->sopt_level
!= IPPROTO_TCP
) {
2118 switch (sopt
->sopt_dir
) {
2120 error
= mptcp_setopt(mpte
, sopt
);
2124 error
= mptcp_getopt(mpte
, sopt
);
2132 mptcp_sopt2str(int level
, int optname
)
2140 return "SO_LINGER_SEC";
2144 return "SO_KEEPALIVE";
2145 case SO_USELOOPBACK
:
2146 return "SO_USELOOPBACK";
2160 return "SO_SNDLOWAT";
2162 return "SO_RCVLOWAT";
2164 return "SO_SNDTIMEO";
2166 return "SO_RCVTIMEO";
2170 return "SO_NOSIGPIPE";
2172 return "SO_NOADDRERR";
2173 case SO_RESTRICTIONS
:
2174 return "SO_RESTRICTIONS";
2178 return "SO_PEERLABEL";
2180 return "SO_RANDOMPORT";
2181 case SO_TRAFFIC_CLASS
:
2182 return "SO_TRAFFIC_CLASS";
2183 case SO_RECV_TRAFFIC_CLASS
:
2184 return "SO_RECV_TRAFFIC_CLASS";
2185 case SO_TRAFFIC_CLASS_DBG
:
2186 return "SO_TRAFFIC_CLASS_DBG";
2187 case SO_PRIVILEGED_TRAFFIC_CLASS
:
2188 return "SO_PRIVILEGED_TRAFFIC_CLASS";
2190 return "SO_DEFUNCTIT";
2192 return "SO_DEFUNCTOK";
2194 return "SO_ISDEFUNCT";
2195 case SO_OPPORTUNISTIC
:
2196 return "SO_OPPORTUNISTIC";
2200 return "SO_RECV_ANYIF";
2201 case SO_NOWAKEFROMSLEEP
:
2202 return "SO_NOWAKEFROMSLEEP";
2203 case SO_NOAPNFALLBK
:
2204 return "SO_NOAPNFALLBK";
2205 case SO_MARK_CELLFALLBACK
:
2206 return "SO_CELLFALLBACK";
2208 return "SO_DELEGATED";
2209 case SO_DELEGATED_UUID
:
2210 return "SO_DELEGATED_UUID";
2212 case SO_NECP_ATTRIBUTES
:
2213 return "SO_NECP_ATTRIBUTES";
2214 case SO_NECP_CLIENTUUID
:
2215 return "SO_NECP_CLIENTUUID";
2223 return "TCP_NODELAY";
2225 return "TCP_KEEPALIVE";
2227 return "TCP_KEEPINTVL";
2229 return "TCP_KEEPCNT";
2230 case TCP_CONNECTIONTIMEOUT
:
2231 return "TCP_CONNECTIONTIMEOUT";
2232 case TCP_RXT_CONNDROPTIME
:
2233 return "TCP_RXT_CONNDROPTIME";
2234 case PERSIST_TIMEOUT
:
2235 return "PERSIST_TIMEOUT";
2236 case TCP_NOTSENT_LOWAT
:
2237 return "NOTSENT_LOWAT";
2238 case TCP_ADAPTIVE_READ_TIMEOUT
:
2239 return "ADAPTIVE_READ_TIMEOUT";
2240 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
2241 return "ADAPTIVE_WRITE_TIMEOUT";
2242 case MPTCP_SERVICE_TYPE
:
2243 return "MPTCP_SERVICE_TYPE";
2244 case MPTCP_ALTERNATE_PORT
:
2245 return "MPTCP_ALTERNATE_PORT";
2246 case MPTCP_FORCE_ENABLE
:
2247 return "MPTCP_FORCE_ENABLE";
2248 case MPTCP_EXPECTED_PROGRESS_TARGET
:
2249 return "MPTCP_EXPECTED_PROGRESS_TARGET";
2259 mptcp_usr_preconnect(struct socket
*mp_so
)
2261 struct mptsub
*mpts
= NULL
;
2262 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
2263 struct mptses
*mpte
;
2265 struct tcpcb
*tp
= NULL
;
2268 mpte
= mptompte(mpp
);
2270 mpts
= mptcp_get_subflow(mpte
, NULL
);
2272 os_log_error(mptcp_log_handle
, "%s - %lx: invalid preconnect ",
2273 __func__
, (unsigned long)VM_KERNEL_ADDRPERM(mpte
));
2276 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
2277 so
= mpts
->mpts_socket
;
2278 tp
= intotcpcb(sotoinpcb(so
));
2279 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
2280 error
= tcp_output(sototcpcb(so
));
2282 soclearfastopen(mp_so
);