2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40 #include <sys/kauth.h>
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/tcp.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/tcp_seq.h>
49 #include <netinet/tcp_var.h>
50 #include <netinet/tcp_timer.h>
51 #include <netinet/mptcp_var.h>
52 #include <netinet/mptcp_timer.h>
56 static int mptcp_usr_attach(struct socket
*, int, struct proc
*);
57 static int mptcp_usr_detach(struct socket
*);
58 static int mptcp_attach(struct socket
*, struct proc
*);
59 static int mptcp_usr_connectx(struct socket
*, struct sockaddr
*,
60 struct sockaddr
*, struct proc
*, uint32_t, sae_associd_t
,
61 sae_connid_t
*, uint32_t, void *, uint32_t, struct uio
*, user_ssize_t
*);
62 static int mptcp_getassocids(struct mptses
*, uint32_t *, user_addr_t
);
63 static int mptcp_getconnids(struct mptses
*, sae_associd_t
, uint32_t *,
65 static int mptcp_getconninfo(struct mptses
*, sae_connid_t
*, uint32_t *,
66 uint32_t *, int32_t *, user_addr_t
, socklen_t
*, user_addr_t
, socklen_t
*,
67 uint32_t *, user_addr_t
, uint32_t *);
68 static int mptcp_usr_control(struct socket
*, u_long
, caddr_t
, struct ifnet
*,
70 static int mptcp_disconnect(struct mptses
*);
71 static int mptcp_usr_disconnect(struct socket
*);
72 static int mptcp_usr_disconnectx(struct socket
*, sae_associd_t
, sae_connid_t
);
73 static struct mptses
*mptcp_usrclosed(struct mptses
*);
74 static int mptcp_usr_rcvd(struct socket
*, int);
75 static int mptcp_usr_send(struct socket
*, int, struct mbuf
*,
76 struct sockaddr
*, struct mbuf
*, struct proc
*);
77 static int mptcp_usr_shutdown(struct socket
*);
78 static int mptcp_usr_sosend(struct socket
*, struct sockaddr
*, struct uio
*,
79 struct mbuf
*, struct mbuf
*, int);
80 static int mptcp_usr_socheckopt(struct socket
*, struct sockopt
*);
81 static int mptcp_setopt(struct mptses
*, struct sockopt
*);
82 static int mptcp_getopt(struct mptses
*, struct sockopt
*);
83 static int mptcp_default_tcp_optval(struct mptses
*, struct sockopt
*, int *);
84 static int mptcp_usr_preconnect(struct socket
*so
);
86 struct pr_usrreqs mptcp_usrreqs
= {
87 .pru_attach
= mptcp_usr_attach
,
88 .pru_connectx
= mptcp_usr_connectx
,
89 .pru_control
= mptcp_usr_control
,
90 .pru_detach
= mptcp_usr_detach
,
91 .pru_disconnect
= mptcp_usr_disconnect
,
92 .pru_disconnectx
= mptcp_usr_disconnectx
,
93 .pru_peeraddr
= mp_getpeeraddr
,
94 .pru_rcvd
= mptcp_usr_rcvd
,
95 .pru_send
= mptcp_usr_send
,
96 .pru_shutdown
= mptcp_usr_shutdown
,
97 .pru_sockaddr
= mp_getsockaddr
,
98 .pru_sosend
= mptcp_usr_sosend
,
99 .pru_soreceive
= soreceive
,
100 .pru_socheckopt
= mptcp_usr_socheckopt
,
101 .pru_preconnect
= mptcp_usr_preconnect
,
105 #if (DEVELOPMENT || DEBUG)
106 static int mptcp_disable_entitlements
= 0;
107 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, disable_entitlements
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
108 &mptcp_disable_entitlements
, 0, "Disable Multipath TCP Entitlement Checking");
111 int mptcp_developer_mode
= 0;
112 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, allow_aggregate
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
113 &mptcp_developer_mode
, 0, "Allow the Multipath aggregation mode");
117 * Attaches an MPTCP control block to a socket.
120 mptcp_usr_attach(struct socket
*mp_so
, int proto
, struct proc
*p
)
122 #pragma unused(proto)
125 VERIFY(mpsotomppcb(mp_so
) == NULL
);
127 error
= mptcp_attach(mp_so
, p
);
133 * Might want to use a different SO_LINGER timeout than TCP's?
135 if ((mp_so
->so_options
& SO_LINGER
) && mp_so
->so_linger
== 0)
136 mp_so
->so_linger
= TCP_LINGERTIME
* hz
;
142 * Detaches an MPTCP control block from a socket.
145 mptcp_usr_detach(struct socket
*mp_so
)
147 struct mptses
*mpte
= mpsotompte(mp_so
);
148 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
150 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
151 mptcplog((LOG_ERR
, "%s state: %d\n", __func__
,
152 mpp
? mpp
->mpp_state
: -1),
153 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
158 * We are done with this MPTCP socket (it has been closed);
159 * trigger all subflows to be disconnected, if not already,
160 * by initiating the PCB detach sequence (SOF_PCBCLEARING
165 mptcp_disconnect(mpte
);
171 * Attach MPTCP protocol to socket, allocating MP control block,
172 * MPTCP session, control block, buffer space, etc.
175 mptcp_attach(struct socket
*mp_so
, struct proc
*p
)
178 struct mptses
*mpte
= NULL
;
179 struct mptcb
*mp_tp
= NULL
;
180 struct mppcb
*mpp
= NULL
;
183 if (mp_so
->so_snd
.sb_hiwat
== 0 || mp_so
->so_rcv
.sb_hiwat
== 0) {
184 error
= soreserve(mp_so
, tcp_sendspace
, tcp_recvspace
);
189 if (mp_so
->so_snd
.sb_preconn_hiwat
== 0) {
190 soreserve_preconnect(mp_so
, 2048);
193 if ((mp_so
->so_rcv
.sb_flags
& SB_USRSIZE
) == 0)
194 mp_so
->so_rcv
.sb_flags
|= SB_AUTOSIZE
;
195 if ((mp_so
->so_snd
.sb_flags
& SB_USRSIZE
) == 0)
196 mp_so
->so_snd
.sb_flags
|= SB_AUTOSIZE
;
199 * MPTCP socket buffers cannot be compressed, due to the
200 * fact that each mbuf chained via m_next is a M_PKTHDR
201 * which carries some MPTCP metadata.
203 mp_so
->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
204 mp_so
->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
206 if ((error
= mp_pcballoc(mp_so
, &mtcbinfo
)) != 0) {
210 mpp
= mpsotomppcb(mp_so
);
212 mpte
= (struct mptses
*)mpp
->mpp_pcbe
;
213 VERIFY(mpte
!= NULL
);
214 mp_tp
= mpte
->mpte_mptcb
;
215 VERIFY(mp_tp
!= NULL
);
221 mptcp_entitlement_check(struct socket
*mp_so
)
223 struct mptses
*mpte
= mpsotompte(mp_so
);
225 if (soopt_cred_check(mp_so
, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED
, TRUE
) == 0) {
227 * This means the app has the extended entitlement. Thus,
228 * it's a first party app and can run without restrictions.
230 mpte
->mpte_flags
|= MPTE_FIRSTPARTY
;
234 #if (DEVELOPMENT || DEBUG)
235 if (mptcp_disable_entitlements
)
239 if (soopt_cred_check(mp_so
, PRIV_NET_PRIVILEGED_MULTIPATH
, TRUE
)) {
240 mptcplog((LOG_NOTICE
, "%s Multipath Capability needed\n", __func__
),
241 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
245 if (mpte
->mpte_svctype
> MPTCP_SVCTYPE_INTERACTIVE
&&
246 mptcp_developer_mode
== 0) {
247 mptcplog((LOG_NOTICE
, "%s need to set allow_aggregate sysctl\n",
248 __func__
), MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
253 mptcplog((LOG_NOTICE
, "%s entitlement granted for %u\n", __func__
, mpte
->mpte_svctype
),
254 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
260 * Common subroutine to open a MPTCP connection to one of the remote hosts
261 * specified by dst_sl. This includes allocating and establishing a
262 * subflow TCP connection, either initially to establish MPTCP connection,
263 * or to join an existing one. Returns a connection handle upon success.
266 mptcp_connectx(struct mptses
*mpte
, struct sockaddr
*src
,
267 struct sockaddr
*dst
, uint32_t ifscope
, sae_connid_t
*pcid
)
269 struct socket
*mp_so
= mptetoso(mpte
);
273 VERIFY(pcid
!= NULL
);
275 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx\n", __func__
,
276 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
277 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
278 DTRACE_MPTCP2(connectx
, struct mptses
*, mpte
, struct socket
*, mp_so
);
280 error
= mptcp_subflow_add(mpte
, src
, dst
, ifscope
, pcid
);
286 * User-protocol pru_connectx callback.
289 mptcp_usr_connectx(struct socket
*mp_so
, struct sockaddr
*src
,
290 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
291 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
292 uint32_t arglen
, struct uio
*auio
, user_ssize_t
*bytes_written
)
294 #pragma unused(p, aid, flags, arg, arglen)
295 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
296 struct mptses
*mpte
= NULL
;
297 struct mptcb
*mp_tp
= NULL
;
298 user_ssize_t datalen
;
301 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
302 mptcplog((LOG_ERR
, "%s state %d\n", __func__
,
303 mpp
? mpp
->mpp_state
: -1),
304 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
308 mpte
= mptompte(mpp
);
309 VERIFY(mpte
!= NULL
);
310 mpte_lock_assert_held(mpte
);
312 mp_tp
= mpte
->mpte_mptcb
;
313 VERIFY(mp_tp
!= NULL
);
315 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
316 mptcplog((LOG_ERR
, "%s fell back to TCP\n", __func__
),
317 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
322 if (dst
->sa_family
!= AF_INET
&& dst
->sa_family
!= AF_INET6
) {
323 error
= EAFNOSUPPORT
;
327 if (dst
->sa_family
== AF_INET
&&
328 dst
->sa_len
!= sizeof(mpte
->__mpte_dst_v4
)) {
329 mptcplog((LOG_ERR
, "%s IPv4 dst len %u\n", __func__
,
331 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
336 if (dst
->sa_family
== AF_INET6
&&
337 dst
->sa_len
!= sizeof(mpte
->__mpte_dst_v6
)) {
338 mptcplog((LOG_ERR
, "%s IPv6 dst len %u\n", __func__
,
340 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
345 if (!(mpte
->mpte_flags
& MPTE_SVCTYPE_CHECKED
)) {
346 if (mptcp_entitlement_check(mp_so
) < 0) {
351 mpte
->mpte_flags
|= MPTE_SVCTYPE_CHECKED
;
354 if ((mp_so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0) {
355 memcpy(&mpte
->mpte_dst
, dst
, dst
->sa_len
);
359 if (src
->sa_family
!= AF_INET
&& src
->sa_family
!= AF_INET6
) {
360 error
= EAFNOSUPPORT
;
364 if (src
->sa_family
== AF_INET
&&
365 src
->sa_len
!= sizeof(mpte
->__mpte_src_v4
)) {
366 mptcplog((LOG_ERR
, "%s IPv4 src len %u\n", __func__
,
368 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
373 if (src
->sa_family
== AF_INET6
&&
374 src
->sa_len
!= sizeof(mpte
->__mpte_src_v6
)) {
375 mptcplog((LOG_ERR
, "%s IPv6 src len %u\n", __func__
,
377 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
382 if ((mp_so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0) {
383 memcpy(&mpte
->mpte_src
, src
, src
->sa_len
);
387 error
= mptcp_connectx(mpte
, src
, dst
, ifscope
, pcid
);
389 /* If there is data, copy it */
391 datalen
= uio_resid(auio
);
392 socket_unlock(mp_so
, 0);
393 error
= mp_so
->so_proto
->pr_usrreqs
->pru_sosend(mp_so
, NULL
,
394 (uio_t
) auio
, NULL
, NULL
, 0);
396 if (error
== 0 || error
== EWOULDBLOCK
)
397 *bytes_written
= datalen
- uio_resid(auio
);
399 if (error
== EWOULDBLOCK
)
402 socket_lock(mp_so
, 0);
410 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
413 mptcp_getassocids(struct mptses
*mpte
, uint32_t *cnt
, user_addr_t aidp
)
415 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
417 /* MPTCP has at most 1 association */
418 *cnt
= (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) ? 1 : 0;
420 /* just asking how many there are? */
421 if (aidp
== USER_ADDR_NULL
)
424 return (copyout(&mpte
->mpte_associd
, aidp
,
425 sizeof (mpte
->mpte_associd
)));
429 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
432 mptcp_getconnids(struct mptses
*mpte
, sae_associd_t aid
, uint32_t *cnt
,
438 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
440 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
441 aid
!= mpte
->mpte_associd
)
444 *cnt
= mpte
->mpte_numflows
;
446 /* just asking how many there are? */
447 if (cidp
== USER_ADDR_NULL
)
450 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
451 if ((error
= copyout(&mpts
->mpts_connid
, cidp
,
452 sizeof (mpts
->mpts_connid
))) != 0)
455 cidp
+= sizeof (mpts
->mpts_connid
);
462 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
465 mptcp_getconninfo(struct mptses
*mpte
, sae_connid_t
*cid
, uint32_t *flags
,
466 uint32_t *ifindex
, int32_t *soerror
, user_addr_t src
, socklen_t
*src_len
,
467 user_addr_t dst
, socklen_t
*dst_len
, uint32_t *aux_type
,
468 user_addr_t aux_data
, uint32_t *aux_len
)
480 if (*cid
== SAE_CONNID_ALL
) {
481 struct socket
*mp_so
= mptetoso(mpte
);
482 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
483 struct conninfo_multipathtcp mptcp_ci
;
485 if (*aux_len
!= 0 && *aux_len
!= sizeof(mptcp_ci
))
488 if (mp_so
->so_state
& SS_ISCONNECTING
)
489 *flags
|= CIF_CONNECTING
;
490 if (mp_so
->so_state
& SS_ISCONNECTED
)
491 *flags
|= CIF_CONNECTED
;
492 if (mp_so
->so_state
& SS_ISDISCONNECTING
)
493 *flags
|= CIF_DISCONNECTING
;
494 if (mp_so
->so_state
& SS_ISDISCONNECTED
)
495 *flags
|= CIF_DISCONNECTED
;
496 if (!(mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
))
497 *flags
|= CIF_MP_CAPABLE
;
498 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
)
499 *flags
|= CIF_MP_DEGRADED
;
504 *aux_type
= CIAUX_MPTCP
;
505 *aux_len
= sizeof(mptcp_ci
);
507 if (aux_data
!= USER_ADDR_NULL
) {
509 int initial_info_set
= 0;
511 bzero(&mptcp_ci
, sizeof (mptcp_ci
));
512 mptcp_ci
.mptcpci_subflow_count
= mpte
->mpte_numflows
;
513 mptcp_ci
.mptcpci_switch_count
= mpte
->mpte_subflow_switches
;
515 VERIFY(sizeof(mptcp_ci
.mptcpci_itfstats
) == sizeof(mpte
->mpte_itfstats
));
516 memcpy(mptcp_ci
.mptcpci_itfstats
, mpte
->mpte_itfstats
, sizeof(mptcp_ci
.mptcpci_itfstats
));
518 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
519 if (i
>= sizeof(mptcp_ci
.mptcpci_subflow_connids
) / sizeof(sae_connid_t
))
521 mptcp_ci
.mptcpci_subflow_connids
[i
] = mpts
->mpts_connid
;
523 if (mpts
->mpts_flags
& MPTSF_INITIAL_SUB
) {
524 inp
= sotoinpcb(mpts
->mpts_socket
);
526 mptcp_ci
.mptcpci_init_rxbytes
= inp
->inp_stat
->rxbytes
;
527 mptcp_ci
.mptcpci_init_txbytes
= inp
->inp_stat
->txbytes
;
528 initial_info_set
= 1;
531 mptcpstats_update(mptcp_ci
.mptcpci_itfstats
, mpts
);
536 if (initial_info_set
== 0) {
537 mptcp_ci
.mptcpci_init_rxbytes
= mpte
->mpte_init_rxbytes
;
538 mptcp_ci
.mptcpci_init_txbytes
= mpte
->mpte_init_txbytes
;
541 if (mpte
->mpte_flags
& MPTE_FIRSTPARTY
)
542 mptcp_ci
.mptcpci_flags
|= MPTCPCI_FIRSTPARTY
;
544 error
= copyout(&mptcp_ci
, aux_data
, sizeof(mptcp_ci
));
546 mptcplog((LOG_ERR
, "%s copyout failed: %d\n",
548 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
556 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
557 if (mpts
->mpts_connid
== *cid
|| *cid
== SAE_CONNID_ANY
)
561 return ((*cid
== SAE_CONNID_ANY
) ? ENXIO
: EINVAL
);
563 so
= mpts
->mpts_socket
;
566 if (inp
->inp_vflag
& INP_IPV4
)
567 error
= in_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
568 soerror
, src
, src_len
, dst
, dst_len
,
569 aux_type
, aux_data
, aux_len
);
571 error
= in6_getconninfo(so
, SAE_CONNID_ANY
, flags
, ifindex
,
572 soerror
, src
, src_len
, dst
, dst_len
,
573 aux_type
, aux_data
, aux_len
);
576 mptcplog((LOG_ERR
, "%s error from in_getconninfo %d\n",
578 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
582 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
)
583 *flags
|= CIF_MP_CAPABLE
;
584 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
585 *flags
|= CIF_MP_DEGRADED
;
586 if (mpts
->mpts_flags
& MPTSF_MP_READY
)
587 *flags
|= CIF_MP_READY
;
588 if (mpts
->mpts_flags
& MPTSF_ACTIVE
)
589 *flags
|= CIF_MP_ACTIVE
;
591 mptcplog((LOG_DEBUG
, "%s: cid %d flags %x \n", __func__
,
592 mpts
->mpts_connid
, mpts
->mpts_flags
),
593 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
599 * User-protocol pru_control callback.
602 mptcp_usr_control(struct socket
*mp_so
, u_long cmd
, caddr_t data
,
603 struct ifnet
*ifp
, struct proc
*p
)
605 #pragma unused(ifp, p)
606 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
610 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
614 mpte
= mptompte(mpp
);
615 VERIFY(mpte
!= NULL
);
617 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
620 case SIOCGASSOCIDS32
: { /* struct so_aidreq32 */
621 struct so_aidreq32 aidr
;
622 bcopy(data
, &aidr
, sizeof (aidr
));
623 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
626 bcopy(&aidr
, data
, sizeof (aidr
));
630 case SIOCGASSOCIDS64
: { /* struct so_aidreq64 */
631 struct so_aidreq64 aidr
;
632 bcopy(data
, &aidr
, sizeof (aidr
));
633 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
636 bcopy(&aidr
, data
, sizeof (aidr
));
640 case SIOCGCONNIDS32
: { /* struct so_cidreq32 */
641 struct so_cidreq32 cidr
;
642 bcopy(data
, &cidr
, sizeof (cidr
));
643 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
646 bcopy(&cidr
, data
, sizeof (cidr
));
650 case SIOCGCONNIDS64
: { /* struct so_cidreq64 */
651 struct so_cidreq64 cidr
;
652 bcopy(data
, &cidr
, sizeof (cidr
));
653 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
656 bcopy(&cidr
, data
, sizeof (cidr
));
660 case SIOCGCONNINFO32
: { /* struct so_cinforeq32 */
661 struct so_cinforeq32 cifr
;
662 bcopy(data
, &cifr
, sizeof (cifr
));
663 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
664 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
665 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
666 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
669 bcopy(&cifr
, data
, sizeof (cifr
));
673 case SIOCGCONNINFO64
: { /* struct so_cinforeq64 */
674 struct so_cinforeq64 cifr
;
675 bcopy(data
, &cifr
, sizeof (cifr
));
676 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
677 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
678 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
679 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
682 bcopy(&cifr
, data
, sizeof (cifr
));
695 mptcp_disconnect(struct mptses
*mpte
)
697 struct socket
*mp_so
;
701 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
703 mp_so
= mptetoso(mpte
);
704 mp_tp
= mpte
->mpte_mptcb
;
706 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx %d\n", __func__
,
707 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mp_so
->so_error
),
708 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
710 DTRACE_MPTCP3(disconnectx
, struct mptses
*, mpte
,
711 struct socket
*, mp_so
, struct mptcb
*, mp_tp
);
713 /* if we're not detached, go thru socket state checks */
714 if (!(mp_so
->so_flags
& SOF_PCBCLEARING
)) {
715 if (!(mp_so
->so_state
& (SS_ISCONNECTED
|
720 if (mp_so
->so_state
& SS_ISDISCONNECTING
) {
726 mptcp_cancel_all_timers(mp_tp
);
727 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
728 mptcp_close(mpte
, mp_tp
);
729 } else if ((mp_so
->so_options
& SO_LINGER
) &&
730 mp_so
->so_linger
== 0) {
731 mptcp_drop(mpte
, mp_tp
, 0);
733 soisdisconnecting(mp_so
);
734 sbflush(&mp_so
->so_rcv
);
735 if (mptcp_usrclosed(mpte
) != NULL
)
740 mptcp_subflow_workloop(mpte
);
747 * Wrapper function to support disconnect on socket
750 mptcp_usr_disconnect(struct socket
*mp_so
)
752 return (mptcp_disconnect(mpsotompte(mp_so
)));
756 * User-protocol pru_disconnectx callback.
759 mptcp_usr_disconnectx(struct socket
*mp_so
, sae_associd_t aid
, sae_connid_t cid
)
761 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
)
764 if (cid
!= SAE_CONNID_ANY
&& cid
!= SAE_CONNID_ALL
)
767 return (mptcp_usr_disconnect(mp_so
));
771 mptcp_finish_usrclosed(struct mptses
*mpte
)
773 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
774 struct socket
*mp_so
= mptetoso(mpte
);
776 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
777 mpte
= mptcp_close(mpte
, mp_tp
);
778 } else if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_2
) {
779 soisdisconnected(mp_so
);
783 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
784 if ((mp_so
->so_state
& (SS_CANTRCVMORE
|SS_CANTSENDMORE
)) ==
785 (SS_CANTRCVMORE
| SS_CANTSENDMORE
))
786 mptcp_subflow_disconnect(mpte
, mpts
);
788 mptcp_subflow_shutdown(mpte
, mpts
);
794 * User issued close, and wish to trail thru shutdown states.
796 static struct mptses
*
797 mptcp_usrclosed(struct mptses
*mpte
)
799 struct mptcb
*mp_tp
= mpte
->mpte_mptcb
;
801 mptcp_close_fsm(mp_tp
, MPCE_CLOSE
);
803 /* Not everything has been acknowledged - don't close the subflows! */
804 if (mp_tp
->mpt_sndnxt
+ 1 != mp_tp
->mpt_sndmax
)
807 mptcp_finish_usrclosed(mpte
);
813 * After a receive, possible send some update to peer.
816 mptcp_usr_rcvd(struct socket
*mp_so
, int flags
)
818 #pragma unused(flags)
819 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
823 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
827 mpte
= mptompte(mpp
);
828 VERIFY(mpte
!= NULL
);
830 error
= mptcp_output(mpte
);
836 * Do a send by putting data in the output queue.
839 mptcp_usr_send(struct socket
*mp_so
, int prus_flags
, struct mbuf
*m
,
840 struct sockaddr
*nam
, struct mbuf
*control
, struct proc
*p
)
842 #pragma unused(nam, p)
843 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
847 if (prus_flags
& (PRUS_OOB
|PRUS_EOF
)) {
857 if (control
!= NULL
&& control
->m_len
!= 0) {
862 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
866 mpte
= mptompte(mpp
);
867 VERIFY(mpte
!= NULL
);
869 if (!(mp_so
->so_state
& SS_ISCONNECTED
) &&
870 !(mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)) {
875 mptcp_insert_dsn(mpp
, m
);
876 VERIFY(mp_so
->so_snd
.sb_flags
& SB_NOCOMPRESS
);
877 sbappendstream(&mp_so
->so_snd
, m
);
880 error
= mptcp_output(mpte
);
884 if (mp_so
->so_state
& SS_ISCONNECTING
) {
885 if (mp_so
->so_state
& SS_NBIO
)
888 error
= sbwait(&mp_so
->so_snd
);
902 * Mark the MPTCP connection as being incapable of further output.
905 mptcp_usr_shutdown(struct socket
*mp_so
)
907 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
911 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
915 mpte
= mptompte(mpp
);
916 VERIFY(mpte
!= NULL
);
918 socantsendmore(mp_so
);
920 mpte
= mptcp_usrclosed(mpte
);
922 error
= mptcp_output(mpte
);
928 * Copy the contents of uio into a properly sized mbuf chain.
931 mptcp_uiotombuf(struct uio
*uio
, int how
, int space
, uint32_t align
,
934 struct mbuf
*m
, *mb
, *nm
= NULL
, *mtail
= NULL
;
935 user_ssize_t resid
, tot
, len
, progress
; /* must be user_ssize_t */
938 VERIFY(top
!= NULL
&& *top
== NULL
);
941 * space can be zero or an arbitrary large value bound by
942 * the total data supplied by the uio.
944 resid
= uio_resid(uio
);
946 tot
= imin(resid
, space
);
951 * The smallest unit is a single mbuf with pkthdr.
952 * We can't align past it.
958 * Give us the full allocation or nothing.
959 * If space is zero return the smallest empty mbuf.
961 if ((len
= tot
+ align
) == 0)
964 /* Loop and append maximum sized mbufs to the chain tail. */
966 uint32_t m_needed
= 1;
968 if (njcl
> 0 && len
> MBIGCLBYTES
)
969 mb
= m_getpackets_internal(&m_needed
, 1,
970 how
, 1, M16KCLBYTES
);
971 else if (len
> MCLBYTES
)
972 mb
= m_getpackets_internal(&m_needed
, 1,
973 how
, 1, MBIGCLBYTES
);
974 else if (len
>= (signed)MINCLSIZE
)
975 mb
= m_getpackets_internal(&m_needed
, 1,
978 mb
= m_gethdr(how
, MT_DATA
);
980 /* Fail the whole operation if one mbuf can't be allocated. */
988 VERIFY(mb
->m_flags
& M_PKTHDR
);
989 len
-= ((mb
->m_flags
& M_EXT
) ? mb
->m_ext
.ext_size
: MHLEN
);
1001 /* Fill all mbufs with uio data and update header information. */
1002 for (mb
= m
; mb
!= NULL
; mb
= mb
->m_next
) {
1003 len
= imin(M_TRAILINGSPACE(mb
), tot
- progress
);
1005 error
= uiomove(mtod(mb
, char *), len
, uio
);
1011 /* each mbuf is M_PKTHDR chained via m_next */
1013 mb
->m_pkthdr
.len
= len
;
1017 VERIFY(progress
== tot
);
1023 * MPTCP socket protocol-user socket send routine, derived from sosend().
1026 mptcp_usr_sosend(struct socket
*mp_so
, struct sockaddr
*addr
, struct uio
*uio
,
1027 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1029 #pragma unused(addr)
1032 int error
, sendflags
;
1033 struct proc
*p
= current_proc();
1036 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1037 if (uio
== NULL
|| top
!= NULL
) {
1041 resid
= uio_resid(uio
);
1043 socket_lock(mp_so
, 1);
1044 so_update_last_owner_locked(mp_so
, p
);
1045 so_update_policy(mp_so
);
1047 VERIFY(mp_so
->so_type
== SOCK_STREAM
);
1048 VERIFY(!(mp_so
->so_flags
& SOF_MP_SUBFLOW
));
1050 if ((flags
& (MSG_OOB
|MSG_DONTROUTE
|MSG_HOLD
|MSG_SEND
|MSG_FLUSH
)) ||
1051 (mp_so
->so_flags
& SOF_ENABLE_MSGS
)) {
1053 socket_unlock(mp_so
, 1);
1058 * In theory resid should be unsigned. However, space must be
1059 * signed, as it might be less than 0 if we over-committed, and we
1060 * must use a signed comparison of space and resid. On the other
1061 * hand, a negative resid causes us to loop sending 0-length
1062 * segments to the protocol.
1064 if (resid
< 0 || (flags
& MSG_EOR
) || control
!= NULL
) {
1066 socket_unlock(mp_so
, 1);
1070 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1073 error
= sosendcheck(mp_so
, NULL
, resid
, 0, 0, flags
,
1078 space
= sbspace(&mp_so
->so_snd
);
1080 socket_unlock(mp_so
, 0);
1082 * Copy the data from userland into an mbuf chain.
1084 error
= mptcp_uiotombuf(uio
, M_WAITOK
, space
, 0, &top
);
1086 socket_lock(mp_so
, 0);
1089 VERIFY(top
!= NULL
);
1090 space
-= resid
- uio_resid(uio
);
1091 resid
= uio_resid(uio
);
1092 socket_lock(mp_so
, 0);
1095 * Compute flags here, for pru_send and NKEs.
1097 sendflags
= (resid
> 0 && space
> 0) ?
1098 PRUS_MORETOCOME
: 0;
1101 * Socket filter processing
1103 VERIFY(control
== NULL
);
1104 error
= sflt_data_out(mp_so
, NULL
, &top
, &control
, 0);
1106 if (error
== EJUSTRETURN
) {
1109 /* always free control if any */
1113 if (control
!= NULL
) {
1119 * Pass data to protocol.
1121 error
= (*mp_so
->so_proto
->pr_usrreqs
->pru_send
)
1122 (mp_so
, sendflags
, top
, NULL
, NULL
, p
);
1127 } while (resid
!= 0 && space
> 0);
1128 } while (resid
!= 0);
1132 sbunlock(&mp_so
->so_snd
, FALSE
); /* will unlock socket */
1134 socket_unlock(mp_so
, 1);
1138 if (control
!= NULL
)
1141 soclearfastopen(mp_so
);
1147 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1148 * This routine simply indicates to the caller whether or not to proceed
1149 * further with the given socket option. This is invoked by sosetoptlock()
1150 * and sogetoptlock().
1153 mptcp_usr_socheckopt(struct socket
*mp_so
, struct sockopt
*sopt
)
1155 #pragma unused(mp_so)
1158 VERIFY(sopt
->sopt_level
== SOL_SOCKET
);
1161 * We could check for sopt_dir (set/get) here, but we'll just
1162 * let the caller deal with it as appropriate; therefore the
1163 * following is a superset of the socket options which we
1164 * allow for set/get.
1166 * XXX: adi@apple.com
1168 * Need to consider the following cases:
1170 * a. Certain socket options don't have a clear definition
1171 * on the expected behavior post connect(2). At the time
1172 * those options are issued on the MP socket, there may
1173 * be existing subflow sockets that are already connected.
1175 switch (sopt
->sopt_name
) {
1176 case SO_LINGER
: /* MP */
1177 case SO_LINGER_SEC
: /* MP */
1178 case SO_TYPE
: /* MP */
1179 case SO_NREAD
: /* MP */
1180 case SO_NWRITE
: /* MP */
1181 case SO_ERROR
: /* MP */
1182 case SO_SNDBUF
: /* MP */
1183 case SO_RCVBUF
: /* MP */
1184 case SO_SNDLOWAT
: /* MP */
1185 case SO_RCVLOWAT
: /* MP */
1186 case SO_SNDTIMEO
: /* MP */
1187 case SO_RCVTIMEO
: /* MP */
1188 case SO_NKE
: /* MP */
1189 case SO_NOSIGPIPE
: /* MP */
1190 case SO_NOADDRERR
: /* MP */
1191 case SO_LABEL
: /* MP */
1192 case SO_PEERLABEL
: /* MP */
1193 case SO_DEFUNCTOK
: /* MP */
1194 case SO_ISDEFUNCT
: /* MP */
1195 case SO_TRAFFIC_CLASS_DBG
: /* MP */
1196 case SO_DELEGATED
: /* MP */
1197 case SO_DELEGATED_UUID
: /* MP */
1199 case SO_NECP_ATTRIBUTES
:
1200 case SO_NECP_CLIENTUUID
:
1203 * Tell the caller that these options are to be processed.
1207 case SO_DEBUG
: /* MP + subflow */
1208 case SO_KEEPALIVE
: /* MP + subflow */
1209 case SO_USELOOPBACK
: /* MP + subflow */
1210 case SO_RANDOMPORT
: /* MP + subflow */
1211 case SO_TRAFFIC_CLASS
: /* MP + subflow */
1212 case SO_RECV_TRAFFIC_CLASS
: /* MP + subflow */
1213 case SO_PRIVILEGED_TRAFFIC_CLASS
: /* MP + subflow */
1214 case SO_RECV_ANYIF
: /* MP + subflow */
1215 case SO_RESTRICTIONS
: /* MP + subflow */
1216 case SO_FLUSH
: /* MP + subflow */
1217 case SO_NOWAKEFROMSLEEP
:
1218 case SO_NOAPNFALLBK
:
1219 case SO_MARK_CELLFALLBACK
:
1221 * Tell the caller that these options are to be processed;
1222 * these will also be recorded later by mptcp_setopt().
1224 * NOTE: Only support integer option value for now.
1226 if (sopt
->sopt_valsize
!= sizeof (int))
1232 * Tell the caller to stop immediately and return an error.
1234 error
= ENOPROTOOPT
;
1242 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1245 mptcp_setopt_apply(struct mptses
*mpte
, struct mptopt
*mpo
)
1247 struct socket
*mp_so
;
1248 struct mptsub
*mpts
;
1252 /* just bail now if this isn't applicable to subflow sockets */
1253 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1254 error
= ENOPROTOOPT
;
1259 * Skip those that are handled internally; these options
1260 * should not have been recorded and marked with the
1261 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1263 if (mpo
->mpo_level
== SOL_SOCKET
&&
1264 (mpo
->mpo_name
== SO_NOSIGPIPE
|| mpo
->mpo_name
== SO_NOADDRERR
)) {
1265 error
= ENOPROTOOPT
;
1269 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1270 mp_so
= mptetoso(mpte
);
1273 * Don't bother going further if there's no subflow; mark the option
1274 * with MPOF_INTERIM so that we know whether or not to remove this
1275 * option upon encountering an error while issuing it during subflow
1278 if (mpte
->mpte_numflows
== 0) {
1279 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
));
1280 mpo
->mpo_flags
|= MPOF_INTERIM
;
1281 /* return success */
1285 bzero(&smpo
, sizeof (smpo
));
1286 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1287 smpo
.mpo_level
= mpo
->mpo_level
;
1288 smpo
.mpo_name
= mpo
->mpo_name
;
1290 /* grab exisiting values in case we need to rollback */
1291 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1294 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
|MPTSF_SOPT_INPROG
);
1295 mpts
->mpts_oldintval
= 0;
1296 smpo
.mpo_intval
= 0;
1297 VERIFY(mpts
->mpts_socket
!= NULL
);
1298 so
= mpts
->mpts_socket
;
1299 if (mptcp_subflow_sogetopt(mpte
, so
, &smpo
) == 0) {
1300 mpts
->mpts_flags
|= MPTSF_SOPT_OLDVAL
;
1301 mpts
->mpts_oldintval
= smpo
.mpo_intval
;
1305 /* apply socket option */
1306 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1309 mpts
->mpts_flags
|= MPTSF_SOPT_INPROG
;
1310 VERIFY(mpts
->mpts_socket
!= NULL
);
1311 so
= mpts
->mpts_socket
;
1312 error
= mptcp_subflow_sosetopt(mpte
, mpts
, mpo
);
1317 /* cleanup, and rollback if needed */
1318 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1321 if (!(mpts
->mpts_flags
& MPTSF_SOPT_INPROG
)) {
1322 /* clear in case it's set */
1323 mpts
->mpts_flags
&= ~MPTSF_SOPT_OLDVAL
;
1324 mpts
->mpts_oldintval
= 0;
1327 if (!(mpts
->mpts_flags
& MPTSF_SOPT_OLDVAL
)) {
1328 mpts
->mpts_flags
&= ~MPTSF_SOPT_INPROG
;
1329 VERIFY(mpts
->mpts_oldintval
== 0);
1332 /* error during sosetopt, so roll it back */
1334 VERIFY(mpts
->mpts_socket
!= NULL
);
1335 so
= mpts
->mpts_socket
;
1336 smpo
.mpo_intval
= mpts
->mpts_oldintval
;
1337 mptcp_subflow_sosetopt(mpte
, mpts
, &smpo
);
1339 mpts
->mpts_oldintval
= 0;
1340 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
|MPTSF_SOPT_INPROG
);
1348 * Handle SOPT_SET for socket options issued on MP socket.
1351 mptcp_setopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1353 int error
= 0, optval
= 0, level
, optname
, rec
= 1;
1354 struct mptopt smpo
, *mpo
= NULL
;
1355 struct socket
*mp_so
;
1357 level
= sopt
->sopt_level
;
1358 optname
= sopt
->sopt_name
;
1360 VERIFY(sopt
->sopt_dir
== SOPT_SET
);
1361 VERIFY(level
== SOL_SOCKET
|| level
== IPPROTO_TCP
);
1362 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1363 mp_so
= mptetoso(mpte
);
1366 * Record socket options which are applicable to subflow sockets so
1367 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1368 * for the list of eligible socket-level options.
1370 if (level
== SOL_SOCKET
) {
1374 case SO_USELOOPBACK
:
1376 case SO_TRAFFIC_CLASS
:
1377 case SO_RECV_TRAFFIC_CLASS
:
1378 case SO_PRIVILEGED_TRAFFIC_CLASS
:
1380 case SO_RESTRICTIONS
:
1381 case SO_NOWAKEFROMSLEEP
:
1382 case SO_NOAPNFALLBK
:
1383 case SO_MARK_CELLFALLBACK
:
1387 /* don't record it */
1391 /* Next ones, record at MPTCP-level */
1393 case SO_NECP_CLIENTUUID
:
1394 if (!uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1399 error
= sooptcopyin(sopt
, &mpsotomppcb(mp_so
)->necp_client_uuid
,
1400 sizeof(uuid_t
), sizeof(uuid_t
));
1405 mpsotomppcb(mp_so
)->necp_cb
= mptcp_session_necp_cb
;
1406 error
= necp_client_register_multipath_cb(mp_so
->last_pid
,
1407 mpsotomppcb(mp_so
)->necp_client_uuid
,
1408 mpsotomppcb(mp_so
));
1412 if (uuid_is_null(mpsotomppcb(mp_so
)->necp_client_uuid
)) {
1418 case SO_NECP_ATTRIBUTES
:
1421 /* nothing to do; just return */
1427 case TCP_RXT_FINDROP
:
1431 case TCP_CONNECTIONTIMEOUT
:
1432 case TCP_RXT_CONNDROPTIME
:
1433 case PERSIST_TIMEOUT
:
1434 case TCP_ADAPTIVE_READ_TIMEOUT
:
1435 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
1436 /* eligible; record it */
1438 case TCP_NOTSENT_LOWAT
:
1439 /* record at MPTCP level */
1440 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1449 mp_so
->so_flags
&= ~SOF_NOTSENT_LOWAT
;
1450 error
= mptcp_set_notsent_lowat(mpte
,0);
1452 mp_so
->so_flags
|= SOF_NOTSENT_LOWAT
;
1453 error
= mptcp_set_notsent_lowat(mpte
,
1458 case MPTCP_SERVICE_TYPE
:
1459 /* record at MPTCP level */
1460 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1464 if (optval
< 0 || optval
>= MPTCP_SVCTYPE_MAX
) {
1469 mpte
->mpte_svctype
= optval
;
1471 if (mptcp_entitlement_check(mp_so
) < 0) {
1476 mpte
->mpte_flags
|= MPTE_SVCTYPE_CHECKED
;
1479 case MPTCP_ALTERNATE_PORT
:
1480 /* record at MPTCP level */
1481 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1486 if (optval
< 0 || optval
> UINT16_MAX
) {
1491 mpte
->mpte_alternate_port
= optval
;
1496 error
= ENOPROTOOPT
;
1501 if ((error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
1502 sizeof (optval
))) != 0)
1506 /* search for an existing one; if not found, allocate */
1507 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) == NULL
)
1508 mpo
= mptcp_sopt_alloc(M_WAITOK
);
1513 mptcplog((LOG_DEBUG
, "%s: mp_so 0x%llx sopt %s val %d %s\n",
1514 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1515 mptcp_sopt2str(level
, optname
), optval
,
1516 (mpo
->mpo_flags
& MPOF_ATTACHED
) ?
1517 "updated" : "recorded"),
1518 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1520 /* initialize or update, as needed */
1521 mpo
->mpo_intval
= optval
;
1522 if (!(mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1523 mpo
->mpo_level
= level
;
1524 mpo
->mpo_name
= optname
;
1525 mptcp_sopt_insert(mpte
, mpo
);
1527 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1528 /* this can be issued on the subflow socket */
1529 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1532 bzero(&smpo
, sizeof (smpo
));
1534 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1535 mpo
->mpo_level
= level
;
1536 mpo
->mpo_name
= optname
;
1537 mpo
->mpo_intval
= optval
;
1539 VERIFY(mpo
== NULL
|| error
== 0);
1541 /* issue this socket option on existing subflows */
1543 error
= mptcp_setopt_apply(mpte
, mpo
);
1544 if (error
!= 0 && (mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1545 VERIFY(mpo
!= &smpo
);
1546 mptcp_sopt_remove(mpte
, mpo
);
1547 mptcp_sopt_free(mpo
);
1550 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
1553 if (error
== 0 && mpo
!= NULL
) {
1554 mptcplog((LOG_INFO
, "%s: mp_so 0x%llx sopt %s val %d set %s\n",
1555 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1556 mptcp_sopt2str(level
, optname
), optval
,
1557 (mpo
->mpo_flags
& MPOF_INTERIM
) ?
1558 "pending" : "successful"),
1559 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1560 } else if (error
!= 0) {
1561 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx sopt %s (%d, %d) val %d can't be issued error %d\n",
1562 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1563 mptcp_sopt2str(level
, optname
), level
, optname
, optval
, error
),
1564 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1570 * Handle SOPT_GET for socket options issued on MP socket.
1573 mptcp_getopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1575 int error
= 0, optval
= 0;
1577 VERIFY(sopt
->sopt_dir
== SOPT_GET
);
1578 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1581 * We only handle SOPT_GET for TCP level socket options; we should
1582 * not get here for socket level options since they are already
1583 * handled at the socket layer.
1585 if (sopt
->sopt_level
!= IPPROTO_TCP
) {
1586 error
= ENOPROTOOPT
;
1590 switch (sopt
->sopt_name
) {
1592 case TCP_RXT_FINDROP
:
1596 case TCP_CONNECTIONTIMEOUT
:
1597 case TCP_RXT_CONNDROPTIME
:
1598 case PERSIST_TIMEOUT
:
1599 case TCP_ADAPTIVE_READ_TIMEOUT
:
1600 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
1601 case TCP_NOTSENT_LOWAT
:
1602 case MPTCP_SERVICE_TYPE
:
1603 case MPTCP_ALTERNATE_PORT
:
1604 /* eligible; get the default value just in case */
1605 error
= mptcp_default_tcp_optval(mpte
, sopt
, &optval
);
1609 error
= ENOPROTOOPT
;
1613 switch (sopt
->sopt_name
) {
1614 case TCP_NOTSENT_LOWAT
:
1615 if (mptetoso(mpte
)->so_flags
& SOF_NOTSENT_LOWAT
)
1616 optval
= mptcp_get_notsent_lowat(mpte
);
1620 case MPTCP_SERVICE_TYPE
:
1621 optval
= mpte
->mpte_svctype
;
1623 case MPTCP_ALTERNATE_PORT
:
1624 optval
= mpte
->mpte_alternate_port
;
1629 * Search for a previously-issued TCP level socket option and
1630 * return the recorded option value. This assumes that the
1631 * value did not get modified by the lower layer after it was
1632 * issued at setsockopt(2) time. If not found, we'll return
1633 * the default value obtained ealier.
1638 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) != NULL
)
1639 optval
= mpo
->mpo_intval
;
1641 error
= sooptcopyout(sopt
, &optval
, sizeof (int));
1648 * Return default values for TCP socket options. Ideally we would query the
1649 * subflow TCP socket, but that requires creating a subflow socket before
1650 * connectx(2) time. To simplify things, just return the default values
1654 mptcp_default_tcp_optval(struct mptses
*mpte
, struct sockopt
*sopt
, int *optval
)
1658 VERIFY(sopt
->sopt_level
== IPPROTO_TCP
);
1659 VERIFY(sopt
->sopt_dir
== SOPT_GET
);
1660 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1662 /* try to do what tcp_newtcpcb() does */
1663 switch (sopt
->sopt_name
) {
1665 case TCP_RXT_FINDROP
:
1668 case TCP_CONNECTIONTIMEOUT
:
1669 case TCP_RXT_CONNDROPTIME
:
1670 case TCP_NOTSENT_LOWAT
:
1671 case TCP_ADAPTIVE_READ_TIMEOUT
:
1672 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
1673 case MPTCP_SERVICE_TYPE
:
1674 case MPTCP_ALTERNATE_PORT
:
1679 *optval
= mptcp_subflow_keeptime
;
1682 case PERSIST_TIMEOUT
:
1683 *optval
= tcp_max_persist_timeout
;
1687 error
= ENOPROTOOPT
;
1694 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1695 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1696 * to those that are allowed by mptcp_usr_socheckopt().
1699 mptcp_ctloutput(struct socket
*mp_so
, struct sockopt
*sopt
)
1701 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
1702 struct mptses
*mpte
;
1705 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1709 mpte
= mptompte(mpp
);
1710 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1712 /* we only handle socket and TCP-level socket options for MPTCP */
1713 if (sopt
->sopt_level
!= SOL_SOCKET
&& sopt
->sopt_level
!= IPPROTO_TCP
) {
1714 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1715 "%s: mp_so 0x%llx sopt %s level not "
1716 "handled\n", __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1717 mptcp_sopt2str(sopt
->sopt_level
, sopt
->sopt_name
)),
1718 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1723 switch (sopt
->sopt_dir
) {
1725 error
= mptcp_setopt(mpte
, sopt
);
1729 error
= mptcp_getopt(mpte
, sopt
);
1737 mptcp_sopt2str(int level
, int optname
)
1743 return ("SO_LINGER");
1745 return ("SO_LINGER_SEC");
1747 return ("SO_DEBUG");
1749 return ("SO_KEEPALIVE");
1750 case SO_USELOOPBACK
:
1751 return ("SO_USELOOPBACK");
1755 return ("SO_NREAD");
1757 return ("SO_NWRITE");
1759 return ("SO_ERROR");
1761 return ("SO_SNDBUF");
1763 return ("SO_RCVBUF");
1765 return ("SO_SNDLOWAT");
1767 return ("SO_RCVLOWAT");
1769 return ("SO_SNDTIMEO");
1771 return ("SO_RCVTIMEO");
1775 return ("SO_NOSIGPIPE");
1777 return ("SO_NOADDRERR");
1778 case SO_RESTRICTIONS
:
1779 return ("SO_RESTRICTIONS");
1781 return ("SO_LABEL");
1783 return ("SO_PEERLABEL");
1785 return ("SO_RANDOMPORT");
1786 case SO_TRAFFIC_CLASS
:
1787 return ("SO_TRAFFIC_CLASS");
1788 case SO_RECV_TRAFFIC_CLASS
:
1789 return ("SO_RECV_TRAFFIC_CLASS");
1790 case SO_TRAFFIC_CLASS_DBG
:
1791 return ("SO_TRAFFIC_CLASS_DBG");
1792 case SO_PRIVILEGED_TRAFFIC_CLASS
:
1793 return ("SO_PRIVILEGED_TRAFFIC_CLASS");
1795 return ("SO_DEFUNCTOK");
1797 return ("SO_ISDEFUNCT");
1798 case SO_OPPORTUNISTIC
:
1799 return ("SO_OPPORTUNISTIC");
1801 return ("SO_FLUSH");
1803 return ("SO_RECV_ANYIF");
1804 case SO_NOWAKEFROMSLEEP
:
1805 return ("SO_NOWAKEFROMSLEEP");
1806 case SO_NOAPNFALLBK
:
1807 return ("SO_NOAPNFALLBK");
1808 case SO_MARK_CELLFALLBACK
:
1809 return ("SO_CELLFALLBACK");
1811 return ("SO_DELEGATED");
1812 case SO_DELEGATED_UUID
:
1813 return ("SO_DELEGATED_UUID");
1815 case SO_NECP_ATTRIBUTES
:
1816 return ("SO_NECP_ATTRIBUTES");
1817 case SO_NECP_CLIENTUUID
:
1818 return ("SO_NECP_CLIENTUUID");
1826 return ("TCP_NODELAY");
1828 return ("TCP_KEEPALIVE");
1830 return ("TCP_KEEPINTVL");
1832 return ("TCP_KEEPCNT");
1833 case TCP_CONNECTIONTIMEOUT
:
1834 return ("TCP_CONNECTIONTIMEOUT");
1835 case TCP_RXT_CONNDROPTIME
:
1836 return ("TCP_RXT_CONNDROPTIME");
1837 case PERSIST_TIMEOUT
:
1838 return ("PERSIST_TIMEOUT");
1839 case TCP_NOTSENT_LOWAT
:
1840 return ("NOTSENT_LOWAT");
1841 case TCP_ADAPTIVE_READ_TIMEOUT
:
1842 return ("ADAPTIVE_READ_TIMEOUT");
1843 case TCP_ADAPTIVE_WRITE_TIMEOUT
:
1844 return ("ADAPTIVE_WRITE_TIMEOUT");
1845 case MPTCP_SERVICE_TYPE
:
1846 return ("MPTCP_SERVICE_TYPE");
1847 case MPTCP_ALTERNATE_PORT
:
1848 return ("MPTCP_ALTERNATE_PORT");
1858 mptcp_usr_preconnect(struct socket
*mp_so
)
1860 struct mptsub
*mpts
= NULL
;
1861 struct mppcb
*mpp
= mpsotomppcb(mp_so
);
1862 struct mptses
*mpte
;
1864 struct tcpcb
*tp
= NULL
;
1867 mpte
= mptompte(mpp
);
1868 VERIFY(mpte
!= NULL
);
1869 mpte_lock_assert_held(mpte
); /* same as MP socket lock */
1871 mpts
= mptcp_get_subflow(mpte
, NULL
, NULL
);
1873 mptcplog((LOG_ERR
, "%s: mp_so 0x%llx invalid preconnect ",
1874 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
1875 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1878 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
1879 so
= mpts
->mpts_socket
;
1880 tp
= intotcpcb(sotoinpcb(so
));
1881 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
1882 error
= tcp_output(sototcpcb(so
));
1884 soclearfastopen(mp_so
);