2 * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
42 #include <netinet/in.h>
43 #include <netinet/in_var.h>
44 #include <netinet/tcp.h>
45 #include <netinet/tcp_fsm.h>
46 #include <netinet/tcp_seq.h>
47 #include <netinet/tcp_var.h>
48 #include <netinet/tcp_timer.h>
49 #include <netinet/mptcp_var.h>
50 #include <netinet/mptcp_timer.h>
54 static int mptcp_usr_attach(struct socket
*, int, struct proc
*);
55 static int mptcp_usr_detach(struct socket
*);
56 static int mptcp_attach(struct socket
*, struct proc
*);
57 static int mptcp_detach(struct socket
*, struct mppcb
*);
58 static int mptcp_connectx(struct mptses
*, struct sockaddr_list
**,
59 struct sockaddr_list
**, struct proc
*, uint32_t, sae_associd_t
,
60 sae_connid_t
*, uint32_t, void *, uint32_t);
61 static int mptcp_usr_connectx(struct socket
*, struct sockaddr_list
**,
62 struct sockaddr_list
**, struct proc
*, uint32_t, sae_associd_t
,
63 sae_connid_t
*, uint32_t, void *, uint32_t, struct uio
*, user_ssize_t
*);
64 static int mptcp_getassocids(struct mptses
*, uint32_t *, user_addr_t
);
65 static int mptcp_getconnids(struct mptses
*, sae_associd_t
, uint32_t *,
67 static int mptcp_getconninfo(struct mptses
*, sae_connid_t
*, uint32_t *,
68 uint32_t *, int32_t *, user_addr_t
, socklen_t
*, user_addr_t
, socklen_t
*,
69 uint32_t *, user_addr_t
, uint32_t *);
70 static int mptcp_usr_control(struct socket
*, u_long
, caddr_t
, struct ifnet
*,
72 static int mptcp_disconnectx(struct mptses
*, sae_associd_t
, sae_connid_t
);
73 static int mptcp_usr_disconnect(struct socket
*);
74 static int mptcp_usr_disconnectx(struct socket
*, sae_associd_t
, sae_connid_t
);
75 static struct mptses
*mptcp_usrclosed(struct mptses
*);
76 static int mptcp_usr_peeloff(struct socket
*, sae_associd_t
, struct socket
**);
77 static int mptcp_peeloff(struct mptses
*, sae_associd_t
, struct socket
**);
78 static int mptcp_usr_rcvd(struct socket
*, int);
79 static int mptcp_usr_send(struct socket
*, int, struct mbuf
*,
80 struct sockaddr
*, struct mbuf
*, struct proc
*);
81 static int mptcp_usr_shutdown(struct socket
*);
82 static int mptcp_uiotombuf(struct uio
*, int, int, uint32_t, struct mbuf
**);
83 static int mptcp_usr_sosend(struct socket
*, struct sockaddr
*, struct uio
*,
84 struct mbuf
*, struct mbuf
*, int);
85 static int mptcp_usr_socheckopt(struct socket
*, struct sockopt
*);
86 static int mptcp_setopt_apply(struct mptses
*, struct mptopt
*);
87 static int mptcp_setopt(struct mptses
*, struct sockopt
*);
88 static int mptcp_getopt(struct mptses
*, struct sockopt
*);
89 static int mptcp_default_tcp_optval(struct mptses
*, struct sockopt
*, int *);
90 static void mptcp_connorder_helper(struct mptsub
*mpts
);
91 static int mptcp_usr_preconnect(struct socket
*so
);
93 struct pr_usrreqs mptcp_usrreqs
= {
94 .pru_attach
= mptcp_usr_attach
,
95 .pru_connectx
= mptcp_usr_connectx
,
96 .pru_control
= mptcp_usr_control
,
97 .pru_detach
= mptcp_usr_detach
,
98 .pru_disconnect
= mptcp_usr_disconnect
,
99 .pru_disconnectx
= mptcp_usr_disconnectx
,
100 .pru_peeloff
= mptcp_usr_peeloff
,
101 .pru_rcvd
= mptcp_usr_rcvd
,
102 .pru_send
= mptcp_usr_send
,
103 .pru_shutdown
= mptcp_usr_shutdown
,
104 .pru_sosend
= mptcp_usr_sosend
,
105 .pru_soreceive
= soreceive
,
106 .pru_socheckopt
= mptcp_usr_socheckopt
,
107 .pru_preconnect
= mptcp_usr_preconnect
,
111 * Sysctl for testing and tuning mptcp connectx with data api.
112 * Mirrors tcp_preconnect_sbspace for now.
114 #define MPTCP_PRECONNECT_SBSZ_MAX 1460
115 #define MPTCP_PRECONNECT_SBSZ_MIN (TCP_MSS)
116 #define MPTCP_PRECONNECT_SBSZ_DEF (TCP6_MSS)
117 static int mptcp_preconnect_sbspace
= MPTCP_PRECONNECT_SBSZ_DEF
;
118 SYSCTL_INT(_net_inet_mptcp
, OID_AUTO
, mp_preconn_sbsz
, CTLFLAG_RW
| CTLFLAG_LOCKED
,
119 &mptcp_preconnect_sbspace
, 0, "Maximum preconnect space");
123 * Attaches an MPTCP control block to a socket.
126 mptcp_usr_attach(struct socket
*mp_so
, int proto
, struct proc
*p
)
128 #pragma unused(proto)
131 VERIFY(sotomppcb(mp_so
) == NULL
);
133 error
= mptcp_attach(mp_so
, p
);
139 * Might want to use a different SO_LINGER timeout than TCP's?
141 if ((mp_so
->so_options
& SO_LINGER
) && mp_so
->so_linger
== 0)
142 mp_so
->so_linger
= TCP_LINGERTIME
* hz
;
148 * Detaches an MPTCP control block from a socket.
151 mptcp_usr_detach(struct socket
*mp_so
)
153 struct mppcb
*mpp
= sotomppcb(mp_so
);
157 VERIFY(mpp
->mpp_socket
!= NULL
);
159 error
= mptcp_detach(mp_so
, mpp
);
164 * Attach MPTCP protocol to socket, allocating MP control block,
165 * MPTCP session, control block, buffer space, etc.
168 mptcp_attach(struct socket
*mp_so
, struct proc
*p
)
171 struct mptses
*mpte
= NULL
;
172 struct mptcb
*mp_tp
= NULL
;
173 struct mppcb
*mpp
= NULL
;
176 if (mp_so
->so_snd
.sb_hiwat
== 0 || mp_so
->so_rcv
.sb_hiwat
== 0) {
177 error
= soreserve(mp_so
, tcp_sendspace
, MPTCP_RWIN_MAX
);
182 if (mp_so
->so_snd
.sb_preconn_hiwat
== 0) {
183 soreserve_preconnect(mp_so
, imin(MPTCP_PRECONNECT_SBSZ_MAX
,
184 imax(mptcp_preconnect_sbspace
, MPTCP_PRECONNECT_SBSZ_MIN
)));
188 * MPTCP socket buffers cannot be compressed, due to the
189 * fact that each mbuf chained via m_next is a M_PKTHDR
190 * which carries some MPTCP metadata.
192 mp_so
->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
193 mp_so
->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
195 /* Disable socket buffer auto-tuning. */
196 mp_so
->so_rcv
.sb_flags
&= ~SB_AUTOSIZE
;
197 mp_so
->so_snd
.sb_flags
&= ~SB_AUTOSIZE
;
199 if ((error
= mp_pcballoc(mp_so
, &mtcbinfo
)) != 0) {
203 mpp
= sotomppcb(mp_so
);
205 mpte
= (struct mptses
*)mpp
->mpp_pcbe
;
206 VERIFY(mpte
!= NULL
);
207 mp_tp
= mpte
->mpte_mptcb
;
208 VERIFY(mp_tp
!= NULL
);
214 * Called when the socket layer loses its final reference to the socket;
215 * at this point, there is only one case in which we will keep things
219 mptcp_detach(struct socket
*mp_so
, struct mppcb
*mpp
)
222 struct mppcbinfo
*mppi
;
224 VERIFY(mp_so
->so_pcb
== mpp
);
225 VERIFY(mpp
->mpp_socket
== mp_so
);
227 mppi
= mpp
->mpp_pcbinfo
;
228 VERIFY(mppi
!= NULL
);
230 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
231 VERIFY(mpte
->mpte_mppcb
== mpp
);
233 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
236 * We are done with this MPTCP socket (it has been closed);
237 * trigger all subflows to be disconnected, if not already,
238 * by initiating the PCB detach sequence (SOF_PCBCLEARING
243 (void) mptcp_disconnectx(mpte
, SAE_ASSOCID_ALL
, SAE_CONNID_ALL
);
248 * Here, we would want to handle time wait state.
255 * Common subroutine to open a MPTCP connection to one of the remote hosts
256 * specified by dst_sl. This includes allocating and establishing a
257 * subflow TCP connection, either initially to establish MPTCP connection,
258 * or to join an existing one. Returns a connection handle upon success.
261 mptcp_connectx(struct mptses
*mpte
, struct sockaddr_list
**src_sl
,
262 struct sockaddr_list
**dst_sl
, struct proc
*p
, uint32_t ifscope
,
263 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
266 #pragma unused(p, aid, flags, arg, arglen)
268 struct socket
*mp_so
;
271 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
272 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
274 VERIFY(dst_sl
!= NULL
&& *dst_sl
!= NULL
);
275 VERIFY(pcid
!= NULL
);
277 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
278 "%s: mp_so 0x%llx\n", __func__
,
279 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
280 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
282 DTRACE_MPTCP3(connectx
, struct mptses
*, mpte
, sae_associd_t
, aid
,
283 struct socket
*, mp_so
);
285 mpts
= mptcp_subflow_alloc(M_WAITOK
);
290 MPTS_ADDREF(mpts
); /* for this routine */
292 if (src_sl
!= NULL
) {
293 mpts
->mpts_src_sl
= *src_sl
;
296 mpts
->mpts_dst_sl
= *dst_sl
;
299 error
= mptcp_subflow_add(mpte
, mpts
, p
, ifscope
);
300 if (error
== 0 && pcid
!= NULL
)
301 *pcid
= mpts
->mpts_connid
;
305 if ((error
!= 0) && (error
!= EWOULDBLOCK
)) {
307 if (mpts
->mpts_flags
& MPTSF_ATTACHED
) {
310 mptcp_subflow_del(mpte
, mpts
, TRUE
);
322 * User-protocol pru_connectx callback.
325 mptcp_usr_connectx(struct socket
*mp_so
, struct sockaddr_list
**src_sl
,
326 struct sockaddr_list
**dst_sl
, struct proc
*p
, uint32_t ifscope
,
327 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
328 uint32_t arglen
, struct uio
*auio
, user_ssize_t
*bytes_written
)
330 struct mppcb
*mpp
= sotomppcb(mp_so
);
331 struct mptses
*mpte
= NULL
;
332 struct mptcb
*mp_tp
= NULL
;
333 user_ssize_t datalen
;
337 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
341 mpte
= mptompte(mpp
);
342 VERIFY(mpte
!= NULL
);
344 mp_tp
= mpte
->mpte_mptcb
;
345 VERIFY(mp_tp
!= NULL
);
347 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
352 error
= mptcp_connectx(mpte
, src_sl
, dst_sl
, p
, ifscope
,
353 aid
, pcid
, flags
, arg
, arglen
);
355 /* If there is data, copy it */
357 datalen
= uio_resid(auio
);
358 socket_unlock(mp_so
, 0);
359 error
= mp_so
->so_proto
->pr_usrreqs
->pru_sosend(mp_so
, NULL
,
360 (uio_t
) auio
, NULL
, NULL
, 0);
361 /* check if this can be supported with fast Join also. XXX */
362 if (error
== 0 || error
== EWOULDBLOCK
)
363 *bytes_written
= datalen
- uio_resid(auio
);
365 if (error
== EWOULDBLOCK
)
368 socket_lock(mp_so
, 0);
370 if (mp_tp
->mpt_flags
& MPTCPF_PEEL_OFF
) {
371 *bytes_written
= datalen
- uio_resid(auio
);
373 * Override errors like EPIPE that occur as
374 * a result of doing TFO during TCP fallback.
386 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
389 mptcp_getassocids(struct mptses
*mpte
, uint32_t *cnt
, user_addr_t aidp
)
391 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
393 /* MPTCP has at most 1 association */
394 *cnt
= (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) ? 1 : 0;
396 /* just asking how many there are? */
397 if (aidp
== USER_ADDR_NULL
)
400 return (copyout(&mpte
->mpte_associd
, aidp
,
401 sizeof (mpte
->mpte_associd
)));
405 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
408 mptcp_getconnids(struct mptses
*mpte
, sae_associd_t aid
, uint32_t *cnt
,
414 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
416 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
417 aid
!= mpte
->mpte_associd
)
420 *cnt
= mpte
->mpte_numflows
;
422 /* just asking how many there are? */
423 if (cidp
== USER_ADDR_NULL
)
426 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
427 if ((error
= copyout(&mpts
->mpts_connid
, cidp
,
428 sizeof (mpts
->mpts_connid
))) != 0)
431 cidp
+= sizeof (mpts
->mpts_connid
);
438 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
441 mptcp_getconninfo(struct mptses
*mpte
, sae_connid_t
*cid
, uint32_t *flags
,
442 uint32_t *ifindex
, int32_t *soerror
, user_addr_t src
, socklen_t
*src_len
,
443 user_addr_t dst
, socklen_t
*dst_len
, uint32_t *aux_type
,
444 user_addr_t aux_data
, uint32_t *aux_len
)
446 #pragma unused(aux_data)
447 struct sockaddr_entry
*se
;
448 struct ifnet
*ifp
= NULL
;
452 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
454 if (*cid
== SAE_CONNID_ALL
)
457 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
458 if (mpts
->mpts_connid
== *cid
|| *cid
== SAE_CONNID_ANY
)
462 return ((*cid
== SAE_CONNID_ANY
) ? ENXIO
: EINVAL
);
465 ifp
= mpts
->mpts_outif
;
466 *cid
= mpts
->mpts_connid
;
467 *ifindex
= ((ifp
!= NULL
) ? ifp
->if_index
: 0);
468 *soerror
= mpts
->mpts_soerror
;
470 if (mpts
->mpts_flags
& MPTSF_CONNECTING
)
471 *flags
|= CIF_CONNECTING
;
472 if (mpts
->mpts_flags
& MPTSF_CONNECTED
)
473 *flags
|= CIF_CONNECTED
;
474 if (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)
475 *flags
|= CIF_DISCONNECTING
;
476 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
477 *flags
|= CIF_DISCONNECTED
;
478 if (mpts
->mpts_flags
& MPTSF_BOUND_IF
)
479 *flags
|= CIF_BOUND_IF
;
480 if (mpts
->mpts_flags
& MPTSF_BOUND_IP
)
481 *flags
|= CIF_BOUND_IP
;
482 if (mpts
->mpts_flags
& MPTSF_BOUND_PORT
)
483 *flags
|= CIF_BOUND_PORT
;
484 if (mpts
->mpts_flags
& MPTSF_PREFERRED
)
485 *flags
|= CIF_PREFERRED
;
486 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
)
487 *flags
|= CIF_MP_CAPABLE
;
488 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
489 *flags
|= CIF_MP_DEGRADED
;
490 if (mpts
->mpts_flags
& MPTSF_MP_READY
)
491 *flags
|= CIF_MP_READY
;
492 if (mpts
->mpts_flags
& MPTSF_ACTIVE
)
493 *flags
|= CIF_MP_ACTIVE
;
495 VERIFY(mpts
->mpts_src_sl
!= NULL
);
496 se
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
);
497 VERIFY(se
!= NULL
&& se
->se_addr
!= NULL
);
498 *src_len
= se
->se_addr
->sa_len
;
499 if (src
!= USER_ADDR_NULL
) {
500 error
= copyout(se
->se_addr
, src
, se
->se_addr
->sa_len
);
505 VERIFY(mpts
->mpts_dst_sl
!= NULL
);
506 se
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
);
507 VERIFY(se
!= NULL
&& se
->se_addr
!= NULL
);
508 *dst_len
= se
->se_addr
->sa_len
;
509 if (dst
!= USER_ADDR_NULL
) {
510 error
= copyout(se
->se_addr
, dst
, se
->se_addr
->sa_len
);
517 if (mpts
->mpts_socket
!= NULL
) {
518 struct conninfo_tcp tcp_ci
;
520 *aux_type
= CIAUX_TCP
;
521 *aux_len
= sizeof (tcp_ci
);
523 if (aux_data
!= USER_ADDR_NULL
) {
524 struct socket
*so
= mpts
->mpts_socket
;
526 VERIFY(SOCK_PROTO(so
) == IPPROTO_TCP
);
527 bzero(&tcp_ci
, sizeof (tcp_ci
));
529 tcp_getconninfo(so
, &tcp_ci
);
530 socket_unlock(so
, 0);
531 error
= copyout(&tcp_ci
, aux_data
, sizeof (tcp_ci
));
536 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
537 "%s: cid %d flags %x \n",
538 __func__
, mpts
->mpts_connid
, mpts
->mpts_flags
),
539 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
547 * Handle SIOCSCONNORDER
550 mptcp_setconnorder(struct mptses
*mpte
, sae_connid_t cid
, uint32_t rank
)
552 struct mptsub
*mpts
, *mpts1
;
555 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
556 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
557 "%s: cid %d rank %d \n", __func__
, cid
, rank
),
558 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
560 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
565 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
566 if (mpts
->mpts_connid
== cid
)
574 if (rank
== 0 || rank
> 1) {
576 * If rank is 0, determine whether this should be the
577 * primary or backup subflow, depending on what we have.
579 * Otherwise, if greater than 0, make it a backup flow.
581 TAILQ_FOREACH(mpts1
, &mpte
->mpte_subflows
, mpts_entry
) {
583 if (mpts1
->mpts_flags
& MPTSF_PREFERRED
) {
591 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
592 mpts
->mpts_rank
= rank
;
593 if (mpts1
!= NULL
&& mpts
!= mpts1
) {
594 /* preferred subflow found; set rank as necessary */
596 mpts
->mpts_rank
= (mpts1
->mpts_rank
+ 1);
597 } else if (rank
== 0) {
598 /* no preferred one found; promote this */
606 * If rank is 1, promote this subflow to be preferred.
608 TAILQ_FOREACH(mpts1
, &mpte
->mpte_subflows
, mpts_entry
) {
611 (mpts1
->mpts_flags
& MPTSF_PREFERRED
)) {
612 mpts1
->mpts_flags
&= ~MPTSF_PREFERRED
;
613 if (mpte
->mpte_nummpcapflows
> 1)
614 mptcp_connorder_helper(mpts1
);
615 } else if (mpts1
== mpts
) {
616 mpts1
->mpts_rank
= 1;
617 if (mpts1
->mpts_flags
& MPTSF_MP_CAPABLE
) {
618 mpts1
->mpts_flags
|= MPTSF_PREFERRED
;
619 if (mpte
->mpte_nummpcapflows
> 1)
620 mptcp_connorder_helper(mpts1
);
632 mptcp_connorder_helper(struct mptsub
*mpts
)
634 struct socket
*so
= mpts
->mpts_socket
;
635 struct tcpcb
*tp
= NULL
;
639 tp
= intotcpcb(sotoinpcb(so
));
640 tp
->t_mpflags
|= TMPF_SND_MPPRIO
;
641 if (mpts
->mpts_flags
& MPTSF_PREFERRED
)
642 tp
->t_mpflags
&= ~TMPF_BACKUP_PATH
;
644 tp
->t_mpflags
|= TMPF_BACKUP_PATH
;
646 socket_unlock(so
, 0);
651 * Handle SIOCSGONNORDER
654 mptcp_getconnorder(struct mptses
*mpte
, sae_connid_t cid
, uint32_t *rank
)
659 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
660 VERIFY(rank
!= NULL
);
663 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
668 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
669 if (mpts
->mpts_connid
== cid
)
678 *rank
= mpts
->mpts_rank
;
685 * User-protocol pru_control callback.
688 mptcp_usr_control(struct socket
*mp_so
, u_long cmd
, caddr_t data
,
689 struct ifnet
*ifp
, struct proc
*p
)
691 #pragma unused(ifp, p)
692 struct mppcb
*mpp
= sotomppcb(mp_so
);
696 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
700 mpte
= mptompte(mpp
);
701 VERIFY(mpte
!= NULL
);
703 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
706 case SIOCGASSOCIDS32
: { /* struct so_aidreq32 */
707 struct so_aidreq32 aidr
;
708 bcopy(data
, &aidr
, sizeof (aidr
));
709 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
712 bcopy(&aidr
, data
, sizeof (aidr
));
716 case SIOCGASSOCIDS64
: { /* struct so_aidreq64 */
717 struct so_aidreq64 aidr
;
718 bcopy(data
, &aidr
, sizeof (aidr
));
719 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
722 bcopy(&aidr
, data
, sizeof (aidr
));
726 case SIOCGCONNIDS32
: { /* struct so_cidreq32 */
727 struct so_cidreq32 cidr
;
728 bcopy(data
, &cidr
, sizeof (cidr
));
729 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
732 bcopy(&cidr
, data
, sizeof (cidr
));
736 case SIOCGCONNIDS64
: { /* struct so_cidreq64 */
737 struct so_cidreq64 cidr
;
738 bcopy(data
, &cidr
, sizeof (cidr
));
739 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
742 bcopy(&cidr
, data
, sizeof (cidr
));
746 case SIOCGCONNINFO32
: { /* struct so_cinforeq32 */
747 struct so_cinforeq32 cifr
;
748 bcopy(data
, &cifr
, sizeof (cifr
));
749 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
750 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
751 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
752 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
755 bcopy(&cifr
, data
, sizeof (cifr
));
759 case SIOCGCONNINFO64
: { /* struct so_cinforeq64 */
760 struct so_cinforeq64 cifr
;
761 bcopy(data
, &cifr
, sizeof (cifr
));
762 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
763 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
764 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
765 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
768 bcopy(&cifr
, data
, sizeof (cifr
));
772 case SIOCSCONNORDER
: { /* struct so_cordreq */
773 struct so_cordreq cor
;
774 bcopy(data
, &cor
, sizeof (cor
));
775 error
= mptcp_setconnorder(mpte
, cor
.sco_cid
, cor
.sco_rank
);
777 bcopy(&cor
, data
, sizeof (cor
));
781 case SIOCGCONNORDER
: { /* struct so_cordreq */
782 struct so_cordreq cor
;
783 bcopy(data
, &cor
, sizeof (cor
));
784 error
= mptcp_getconnorder(mpte
, cor
.sco_cid
, &cor
.sco_rank
);
786 bcopy(&cor
, data
, sizeof (cor
));
799 * Initiate a disconnect. MPTCP-level disconnection is specified by
800 * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow
801 * connection while keeping the MPTCP-level connection (association).
804 mptcp_disconnectx(struct mptses
*mpte
, sae_associd_t aid
, sae_connid_t cid
)
807 struct socket
*mp_so
;
811 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
813 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
814 mp_tp
= mpte
->mpte_mptcb
;
816 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
817 "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__
,
818 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), aid
, cid
, mp_so
->so_error
),
819 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
821 DTRACE_MPTCP5(disconnectx
, struct mptses
*, mpte
, sae_associd_t
, aid
,
822 sae_connid_t
, cid
, struct socket
*, mp_so
, struct mptcb
*, mp_tp
);
824 VERIFY(aid
== SAE_ASSOCID_ANY
|| aid
== SAE_ASSOCID_ALL
||
825 aid
== mpte
->mpte_associd
);
827 /* terminate the association? */
828 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
829 /* if we're not detached, go thru socket state checks */
830 if (!(mp_so
->so_flags
& SOF_PCBCLEARING
)) {
831 if (!(mp_so
->so_state
& (SS_ISCONNECTED
|
836 if (mp_so
->so_state
& SS_ISDISCONNECTING
) {
842 mptcp_cancel_all_timers(mp_tp
);
843 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
844 (void) mptcp_close(mpte
, mp_tp
);
846 } else if ((mp_so
->so_options
& SO_LINGER
) &&
847 mp_so
->so_linger
== 0) {
848 (void) mptcp_drop(mpte
, mp_tp
, 0);
852 soisdisconnecting(mp_so
);
853 sbflush(&mp_so
->so_rcv
);
854 if (mptcp_usrclosed(mpte
) != NULL
)
855 (void) mptcp_output(mpte
);
858 bool disconnect_embryonic_subflows
= false;
859 struct socket
*so
= NULL
;
861 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
862 if (mpts
->mpts_connid
!= cid
)
867 * Check if disconnected subflow is the one used
868 * to initiate MPTCP connection.
869 * If it is and the connection is not yet join ready
870 * disconnect all other subflows.
872 so
= mpts
->mpts_socket
;
873 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
) &&
874 so
&& !(so
->so_flags
& SOF_MP_SEC_SUBFLOW
)) {
875 disconnect_embryonic_subflows
= true;
878 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
879 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
889 if (disconnect_embryonic_subflows
) {
890 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
891 if (mpts
->mpts_connid
== cid
)
894 mptcp_subflow_disconnect(mpte
, mpts
, TRUE
);
901 mptcp_thread_signal(mpte
);
903 if ((mp_so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
904 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
905 /* the socket has been shutdown, no more sockopt's */
906 mptcp_flush_sopts(mpte
);
914 * Wrapper function to support disconnect on socket
917 mptcp_usr_disconnect(struct socket
*mp_so
)
921 error
= mptcp_usr_disconnectx(mp_so
, SAE_ASSOCID_ALL
, SAE_CONNID_ALL
);
926 * User-protocol pru_disconnectx callback.
929 mptcp_usr_disconnectx(struct socket
*mp_so
, sae_associd_t aid
, sae_connid_t cid
)
931 struct mppcb
*mpp
= sotomppcb(mp_so
);
935 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
939 mpte
= mptompte(mpp
);
940 VERIFY(mpte
!= NULL
);
941 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
943 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
944 aid
!= mpte
->mpte_associd
) {
949 error
= mptcp_disconnectx(mpte
, aid
, cid
);
955 * User issued close, and wish to trail thru shutdown states.
957 static struct mptses
*
958 mptcp_usrclosed(struct mptses
*mpte
)
960 struct socket
*mp_so
;
964 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
965 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
966 mp_tp
= mpte
->mpte_mptcb
;
969 mptcp_close_fsm(mp_tp
, MPCE_CLOSE
);
971 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
972 mpte
= mptcp_close(mpte
, mp_tp
);
974 } else if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_2
) {
976 soisdisconnected(mp_so
);
977 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
979 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
985 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
987 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
988 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
997 * User-protocol pru_peeloff callback.
1000 mptcp_usr_peeloff(struct socket
*mp_so
, sae_associd_t aid
, struct socket
**psop
)
1002 struct mppcb
*mpp
= sotomppcb(mp_so
);
1003 struct mptses
*mpte
;
1006 VERIFY(psop
!= NULL
);
1008 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1012 mpte
= mptompte(mpp
);
1013 VERIFY(mpte
!= NULL
);
1015 error
= mptcp_peeloff(mpte
, aid
, psop
);
1021 * Transform a previously connected TCP subflow connection which has
1022 * failed to negotiate MPTCP to its own socket which can be externalized
1023 * with a file descriptor. Valid only when the MPTCP socket is not
1024 * yet associated (MPTCP-level connection has not been established.)
1027 mptcp_peeloff(struct mptses
*mpte
, sae_associd_t aid
, struct socket
**psop
)
1029 struct socket
*so
= NULL
, *mp_so
;
1030 struct mptsub
*mpts
;
1033 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1034 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1036 VERIFY(psop
!= NULL
);
1039 DTRACE_MPTCP3(peeloff
, struct mptses
*, mpte
, sae_associd_t
, aid
,
1040 struct socket
*, mp_so
);
1042 /* peeloff cannot happen after an association is established */
1043 if (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) {
1048 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
) {
1053 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1055 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) {
1056 panic("%s: so %p is MPTCP capable but mp_so %p "
1057 "aid is %d\n", __func__
, so
, mp_so
,
1058 mpte
->mpte_associd
);
1061 MPTS_ADDREF_LOCKED(mpts
); /* for us */
1062 so
= mpts
->mpts_socket
;
1065 * This subflow socket is about to be externalized; make it
1066 * appear as if it has the same properties as the MPTCP socket,
1067 * undo what's done earlier in mptcp_subflow_add().
1069 mptcp_subflow_sopeeloff(mpte
, mpts
, so
);
1072 mptcp_subflow_del(mpte
, mpts
, FALSE
);
1073 MPTS_REMREF(mpts
); /* ours */
1077 * Here we need to make sure the subflow socket is not
1078 * flow controlled; need to clear both INP_FLOW_CONTROLLED
1079 * and INP_FLOW_SUSPENDED on the subflow socket, since
1080 * we will no longer be monitoring its events.
1091 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1092 "%s: mp_so 0x%llx\n", __func__
,
1093 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
1094 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1101 * After a receive, possible send some update to peer.
1104 mptcp_usr_rcvd(struct socket
*mp_so
, int flags
)
1106 #pragma unused(flags)
1107 struct mppcb
*mpp
= sotomppcb(mp_so
);
1108 struct mptses
*mpte
;
1111 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1115 mpte
= mptompte(mpp
);
1116 VERIFY(mpte
!= NULL
);
1118 error
= mptcp_output(mpte
);
1124 * Do a send by putting data in the output queue.
1127 mptcp_usr_send(struct socket
*mp_so
, int prus_flags
, struct mbuf
*m
,
1128 struct sockaddr
*nam
, struct mbuf
*control
, struct proc
*p
)
1130 #pragma unused(nam, p)
1131 struct mppcb
*mpp
= sotomppcb(mp_so
);
1132 struct mptses
*mpte
;
1135 if (prus_flags
& (PRUS_OOB
|PRUS_EOF
)) {
1145 if (control
!= NULL
&& control
->m_len
!= 0) {
1150 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1154 mpte
= mptompte(mpp
);
1155 VERIFY(mpte
!= NULL
);
1157 if (!(mp_so
->so_state
& SS_ISCONNECTED
) &&
1158 (!(mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
))) {
1163 mptcp_insert_dsn(mpp
, m
);
1164 VERIFY(mp_so
->so_snd
.sb_flags
& SB_NOCOMPRESS
);
1165 (void) sbappendstream(&mp_so
->so_snd
, m
);
1169 * XXX: adi@apple.com
1171 * PRUS_MORETOCOME could be set, but we don't check it now.
1173 error
= mptcp_output(mpte
);
1177 if (mp_so
->so_state
& SS_ISCONNECTING
) {
1178 if (mp_so
->so_state
& SS_NBIO
)
1179 error
= EWOULDBLOCK
;
1181 error
= sbwait(&mp_so
->so_snd
);
1188 if (control
!= NULL
)
1195 * Mark the MPTCP connection as being incapable of further output.
1198 mptcp_usr_shutdown(struct socket
*mp_so
)
1200 struct mppcb
*mpp
= sotomppcb(mp_so
);
1201 struct mptses
*mpte
;
1204 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1208 mpte
= mptompte(mpp
);
1209 VERIFY(mpte
!= NULL
);
1211 socantsendmore(mp_so
);
1213 mpte
= mptcp_usrclosed(mpte
);
1215 error
= mptcp_output(mpte
);
1221 * Copy the contents of uio into a properly sized mbuf chain.
1224 mptcp_uiotombuf(struct uio
*uio
, int how
, int space
, uint32_t align
,
1227 struct mbuf
*m
, *mb
, *nm
= NULL
, *mtail
= NULL
;
1228 user_ssize_t resid
, tot
, len
, progress
; /* must be user_ssize_t */
1231 VERIFY(top
!= NULL
&& *top
== NULL
);
1234 * space can be zero or an arbitrary large value bound by
1235 * the total data supplied by the uio.
1237 resid
= uio_resid(uio
);
1239 tot
= imin(resid
, space
);
1244 * The smallest unit is a single mbuf with pkthdr.
1245 * We can't align past it.
1251 * Give us the full allocation or nothing.
1252 * If space is zero return the smallest empty mbuf.
1254 if ((len
= tot
+ align
) == 0)
1257 /* Loop and append maximum sized mbufs to the chain tail. */
1259 uint32_t m_needed
= 1;
1261 if (njcl
> 0 && len
> MBIGCLBYTES
)
1262 mb
= m_getpackets_internal(&m_needed
, 1,
1263 how
, 1, M16KCLBYTES
);
1264 else if (len
> MCLBYTES
)
1265 mb
= m_getpackets_internal(&m_needed
, 1,
1266 how
, 1, MBIGCLBYTES
);
1267 else if (len
>= (signed)MINCLSIZE
)
1268 mb
= m_getpackets_internal(&m_needed
, 1,
1271 mb
= m_gethdr(how
, MT_DATA
);
1273 /* Fail the whole operation if one mbuf can't be allocated. */
1281 VERIFY(mb
->m_flags
& M_PKTHDR
);
1282 len
-= ((mb
->m_flags
& M_EXT
) ? mb
->m_ext
.ext_size
: MHLEN
);
1294 /* Fill all mbufs with uio data and update header information. */
1295 for (mb
= m
; mb
!= NULL
; mb
= mb
->m_next
) {
1296 len
= imin(M_TRAILINGSPACE(mb
), tot
- progress
);
1298 error
= uiomove(mtod(mb
, char *), len
, uio
);
1304 /* each mbuf is M_PKTHDR chained via m_next */
1306 mb
->m_pkthdr
.len
= len
;
1310 VERIFY(progress
== tot
);
1316 * MPTCP socket protocol-user socket send routine, derived from sosend().
1319 mptcp_usr_sosend(struct socket
*mp_so
, struct sockaddr
*addr
, struct uio
*uio
,
1320 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1322 #pragma unused(addr)
1325 int error
, sendflags
;
1326 struct proc
*p
= current_proc();
1329 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1330 if (uio
== NULL
|| top
!= NULL
) {
1334 resid
= uio_resid(uio
);
1336 socket_lock(mp_so
, 1);
1337 so_update_last_owner_locked(mp_so
, p
);
1338 so_update_policy(mp_so
);
1340 VERIFY(mp_so
->so_type
== SOCK_STREAM
);
1341 VERIFY(!(mp_so
->so_flags
& SOF_MP_SUBFLOW
));
1343 if ((flags
& (MSG_OOB
|MSG_DONTROUTE
|MSG_HOLD
|MSG_SEND
|MSG_FLUSH
)) ||
1344 (mp_so
->so_flags
& SOF_ENABLE_MSGS
)) {
1346 socket_unlock(mp_so
, 1);
1351 * In theory resid should be unsigned. However, space must be
1352 * signed, as it might be less than 0 if we over-committed, and we
1353 * must use a signed comparison of space and resid. On the other
1354 * hand, a negative resid causes us to loop sending 0-length
1355 * segments to the protocol.
1357 if (resid
< 0 || (flags
& MSG_EOR
) || control
!= NULL
) {
1359 socket_unlock(mp_so
, 1);
1363 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1366 error
= sosendcheck(mp_so
, NULL
, resid
, 0, 0, flags
,
1371 space
= sbspace(&mp_so
->so_snd
);
1373 socket_unlock(mp_so
, 0);
1375 * Copy the data from userland into an mbuf chain.
1377 error
= mptcp_uiotombuf(uio
, M_WAITOK
, space
, 0, &top
);
1379 socket_lock(mp_so
, 0);
1382 VERIFY(top
!= NULL
);
1383 space
-= resid
- uio_resid(uio
);
1384 resid
= uio_resid(uio
);
1385 socket_lock(mp_so
, 0);
1388 * Compute flags here, for pru_send and NKEs.
1390 sendflags
= (resid
> 0 && space
> 0) ?
1391 PRUS_MORETOCOME
: 0;
1394 * Socket filter processing
1396 VERIFY(control
== NULL
);
1397 error
= sflt_data_out(mp_so
, NULL
, &top
, &control
, 0);
1399 if (error
== EJUSTRETURN
) {
1402 /* always free control if any */
1406 if (control
!= NULL
) {
1412 * Pass data to protocol.
1414 error
= (*mp_so
->so_proto
->pr_usrreqs
->pru_send
)
1415 (mp_so
, sendflags
, top
, NULL
, NULL
, p
);
1420 } while (resid
!= 0 && space
> 0);
1421 } while (resid
!= 0);
1425 sbunlock(&mp_so
->so_snd
, FALSE
); /* will unlock socket */
1427 socket_unlock(mp_so
, 1);
1431 if (control
!= NULL
)
1434 /* clear SOF1_PRECONNECT_DATA after one write */
1435 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)
1436 mp_so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;
1442 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1443 * This routine simply indicates to the caller whether or not to proceed
1444 * further with the given socket option. This is invoked by sosetoptlock()
1445 * and sogetoptlock().
1448 mptcp_usr_socheckopt(struct socket
*mp_so
, struct sockopt
*sopt
)
1450 #pragma unused(mp_so)
1453 VERIFY(sopt
->sopt_level
== SOL_SOCKET
);
1456 * We could check for sopt_dir (set/get) here, but we'll just
1457 * let the caller deal with it as appropriate; therefore the
1458 * following is a superset of the socket options which we
1459 * allow for set/get.
1461 * XXX: adi@apple.com
1463 * Need to consider the following cases:
1465 * a. In the event peeloff(2) occurs on the subflow socket,
1466 * we may want to issue those options which are now
1467 * handled at the MP socket. In that case, we will need
1468 * to record them in mptcp_setopt() so that they can
1469 * be replayed during peeloff.
1471 * b. Certain socket options don't have a clear definition
1472 * on the expected behavior post connect(2). At the time
1473 * those options are issued on the MP socket, there may
1474 * be existing subflow sockets that are already connected.
1476 switch (sopt
->sopt_name
) {
1477 case SO_LINGER
: /* MP */
1478 case SO_LINGER_SEC
: /* MP */
1479 case SO_TYPE
: /* MP */
1480 case SO_NREAD
: /* MP */
1481 case SO_NWRITE
: /* MP */
1482 case SO_ERROR
: /* MP */
1483 case SO_SNDBUF
: /* MP */
1484 case SO_RCVBUF
: /* MP */
1485 case SO_SNDLOWAT
: /* MP */
1486 case SO_RCVLOWAT
: /* MP */
1487 case SO_SNDTIMEO
: /* MP */
1488 case SO_RCVTIMEO
: /* MP */
1489 case SO_NKE
: /* MP */
1490 case SO_NOSIGPIPE
: /* MP */
1491 case SO_NOADDRERR
: /* MP */
1492 case SO_LABEL
: /* MP */
1493 case SO_PEERLABEL
: /* MP */
1494 case SO_DEFUNCTOK
: /* MP */
1495 case SO_ISDEFUNCT
: /* MP */
1496 case SO_TRAFFIC_CLASS_DBG
: /* MP */
1498 * Tell the caller that these options are to be processed.
1502 case SO_DEBUG
: /* MP + subflow */
1503 case SO_KEEPALIVE
: /* MP + subflow */
1504 case SO_USELOOPBACK
: /* MP + subflow */
1505 case SO_RANDOMPORT
: /* MP + subflow */
1506 case SO_TRAFFIC_CLASS
: /* MP + subflow */
1507 case SO_RECV_TRAFFIC_CLASS
: /* MP + subflow */
1508 case SO_PRIVILEGED_TRAFFIC_CLASS
: /* MP + subflow */
1509 case SO_RECV_ANYIF
: /* MP + subflow */
1510 case SO_RESTRICTIONS
: /* MP + subflow */
1511 case SO_FLUSH
: /* MP + subflow */
1512 case SO_MPTCP_FASTJOIN
: /* MP + subflow */
1513 case SO_NOWAKEFROMSLEEP
:
1515 * Tell the caller that these options are to be processed;
1516 * these will also be recorded later by mptcp_setopt().
1518 * NOTE: Only support integer option value for now.
1520 if (sopt
->sopt_valsize
!= sizeof (int))
1526 * Tell the caller to stop immediately and return an error.
1528 error
= ENOPROTOOPT
;
1536 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1539 mptcp_setopt_apply(struct mptses
*mpte
, struct mptopt
*mpo
)
1541 struct socket
*mp_so
;
1542 struct mptsub
*mpts
;
1546 /* just bail now if this isn't applicable to subflow sockets */
1547 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1548 error
= ENOPROTOOPT
;
1553 * Skip those that are handled internally; these options
1554 * should not have been recorded and marked with the
1555 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1557 if (mpo
->mpo_level
== SOL_SOCKET
&&
1558 (mpo
->mpo_name
== SO_NOSIGPIPE
|| mpo
->mpo_name
== SO_NOADDRERR
)) {
1559 error
= ENOPROTOOPT
;
1563 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1564 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1567 * Don't bother going further if there's no subflow; mark the option
1568 * with MPOF_INTERIM so that we know whether or not to remove this
1569 * option upon encountering an error while issuing it during subflow
1572 if (mpte
->mpte_numflows
== 0) {
1573 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
));
1574 mpo
->mpo_flags
|= MPOF_INTERIM
;
1575 /* return success */
1579 bzero(&smpo
, sizeof (smpo
));
1580 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1581 smpo
.mpo_level
= mpo
->mpo_level
;
1582 smpo
.mpo_name
= mpo
->mpo_name
;
1584 /* grab exisiting values in case we need to rollback */
1585 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1589 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
|MPTSF_SOPT_INPROG
);
1590 mpts
->mpts_oldintval
= 0;
1591 smpo
.mpo_intval
= 0;
1592 VERIFY(mpts
->mpts_socket
!= NULL
);
1593 so
= mpts
->mpts_socket
;
1595 if (mptcp_subflow_sogetopt(mpte
, so
, &smpo
) == 0) {
1596 mpts
->mpts_flags
|= MPTSF_SOPT_OLDVAL
;
1597 mpts
->mpts_oldintval
= smpo
.mpo_intval
;
1599 socket_unlock(so
, 0);
1603 /* apply socket option */
1604 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1608 mpts
->mpts_flags
|= MPTSF_SOPT_INPROG
;
1609 VERIFY(mpts
->mpts_socket
!= NULL
);
1610 so
= mpts
->mpts_socket
;
1612 error
= mptcp_subflow_sosetopt(mpte
, so
, mpo
);
1613 socket_unlock(so
, 0);
1619 /* cleanup, and rollback if needed */
1620 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1624 if (!(mpts
->mpts_flags
& MPTSF_SOPT_INPROG
)) {
1625 /* clear in case it's set */
1626 mpts
->mpts_flags
&= ~MPTSF_SOPT_OLDVAL
;
1627 mpts
->mpts_oldintval
= 0;
1631 if (!(mpts
->mpts_flags
& MPTSF_SOPT_OLDVAL
)) {
1632 mpts
->mpts_flags
&= ~MPTSF_SOPT_INPROG
;
1633 VERIFY(mpts
->mpts_oldintval
== 0);
1637 /* error during sosetopt, so roll it back */
1639 VERIFY(mpts
->mpts_socket
!= NULL
);
1640 so
= mpts
->mpts_socket
;
1642 smpo
.mpo_intval
= mpts
->mpts_oldintval
;
1643 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1644 socket_unlock(so
, 0);
1646 mpts
->mpts_oldintval
= 0;
1647 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
|MPTSF_SOPT_INPROG
);
1656 * Handle SOPT_SET for socket options issued on MP socket.
1659 mptcp_setopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1661 int error
= 0, optval
, level
, optname
, rec
= 1;
1662 struct mptopt smpo
, *mpo
= NULL
;
1663 struct socket
*mp_so
;
1666 level
= sopt
->sopt_level
;
1667 optname
= sopt
->sopt_name
;
1669 VERIFY(sopt
->sopt_dir
== SOPT_SET
);
1670 VERIFY(level
== SOL_SOCKET
|| level
== IPPROTO_TCP
);
1671 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1672 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1675 * Record socket options which are applicable to subflow sockets so
1676 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1677 * for the list of eligible socket-level options.
1679 if (level
== SOL_SOCKET
) {
1683 case SO_USELOOPBACK
:
1685 case SO_TRAFFIC_CLASS
:
1686 case SO_RECV_TRAFFIC_CLASS
:
1687 case SO_PRIVILEGED_TRAFFIC_CLASS
:
1689 case SO_RESTRICTIONS
:
1690 case SO_NOWAKEFROMSLEEP
:
1691 case SO_MPTCP_FASTJOIN
:
1695 /* don't record it */
1699 /* nothing to do; just return success */
1705 case TCP_RXT_FINDROP
:
1709 case TCP_CONNECTIONTIMEOUT
:
1710 case TCP_RXT_CONNDROPTIME
:
1711 case PERSIST_TIMEOUT
:
1712 /* eligible; record it */
1714 case TCP_NOTSENT_LOWAT
:
1715 /* record at MPTCP level */
1716 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1725 mp_so
->so_flags
&= ~SOF_NOTSENT_LOWAT
;
1726 error
= mptcp_set_notsent_lowat(mpte
,0);
1728 mp_so
->so_flags
|= SOF_NOTSENT_LOWAT
;
1729 error
= mptcp_set_notsent_lowat(mpte
,
1736 error
= ENOPROTOOPT
;
1741 if ((error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
1742 sizeof (optval
))) != 0)
1746 /* search for an existing one; if not found, allocate */
1747 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) == NULL
)
1748 mpo
= mptcp_sopt_alloc(M_WAITOK
);
1753 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1754 "%s: mp_so 0x%llx sopt %s "
1755 "val %d %s\n", __func__
,
1756 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1757 mptcp_sopt2str(level
, optname
, buf
,
1758 sizeof (buf
)), optval
,
1759 (mpo
->mpo_flags
& MPOF_ATTACHED
) ?
1760 "updated" : "recorded"),
1761 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1763 /* initialize or update, as needed */
1764 mpo
->mpo_intval
= optval
;
1765 if (!(mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1766 mpo
->mpo_level
= level
;
1767 mpo
->mpo_name
= optname
;
1768 mptcp_sopt_insert(mpte
, mpo
);
1770 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1771 /* this can be issued on the subflow socket */
1772 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1775 bzero(&smpo
, sizeof (smpo
));
1777 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1778 mpo
->mpo_level
= level
;
1779 mpo
->mpo_name
= optname
;
1780 mpo
->mpo_intval
= optval
;
1782 VERIFY(mpo
== NULL
|| error
== 0);
1784 /* issue this socket option on existing subflows */
1786 error
= mptcp_setopt_apply(mpte
, mpo
);
1787 if (error
!= 0 && (mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1788 VERIFY(mpo
!= &smpo
);
1789 mptcp_sopt_remove(mpte
, mpo
);
1790 mptcp_sopt_free(mpo
);
1793 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
1796 if (error
== 0 && mpo
!= NULL
) {
1797 mptcplog((LOG_ERR
, "MPTCP Socket: "
1798 "%s: mp_so 0x%llx sopt %s val %d set %s\n",
1799 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1800 mptcp_sopt2str(level
, optname
, buf
,
1801 sizeof (buf
)), optval
, (mpo
->mpo_flags
& MPOF_INTERIM
) ?
1802 "pending" : "successful"),
1803 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1804 } else if (error
!= 0) {
1805 mptcplog((LOG_ERR
, "MPTCP Socket: "
1806 "%s: mp_so 0x%llx sopt %s can't be issued "
1807 "error %d\n", __func__
,
1808 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mptcp_sopt2str(level
,
1809 optname
, buf
, sizeof (buf
)), error
),
1810 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1816 * Handle SOPT_GET for socket options issued on MP socket.
1819 mptcp_getopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1821 int error
= 0, optval
;
1823 VERIFY(sopt
->sopt_dir
== SOPT_GET
);
1824 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1827 * We only handle SOPT_GET for TCP level socket options; we should
1828 * not get here for socket level options since they are already
1829 * handled at the socket layer.
1831 if (sopt
->sopt_level
!= IPPROTO_TCP
) {
1832 error
= ENOPROTOOPT
;
1836 switch (sopt
->sopt_name
) {
1838 case TCP_RXT_FINDROP
:
1842 case TCP_CONNECTIONTIMEOUT
:
1843 case TCP_RXT_CONNDROPTIME
:
1844 case PERSIST_TIMEOUT
:
1845 case TCP_NOTSENT_LOWAT
:
1846 /* eligible; get the default value just in case */
1847 error
= mptcp_default_tcp_optval(mpte
, sopt
, &optval
);
1851 error
= ENOPROTOOPT
;
1855 switch (sopt
->sopt_name
) {
1856 case TCP_NOTSENT_LOWAT
:
1857 if (mpte
->mpte_mppcb
->mpp_socket
->so_flags
& SOF_NOTSENT_LOWAT
)
1858 optval
= mptcp_get_notsent_lowat(mpte
);
1865 * Search for a previously-issued TCP level socket option and
1866 * return the recorded option value. This assumes that the
1867 * value did not get modified by the lower layer after it was
1868 * issued at setsockopt(2) time. If not found, we'll return
1869 * the default value obtained ealier.
1874 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) != NULL
)
1875 optval
= mpo
->mpo_intval
;
1877 error
= sooptcopyout(sopt
, &optval
, sizeof (int));
1884 * Return default values for TCP socket options. Ideally we would query the
1885 * subflow TCP socket, but that requires creating a subflow socket before
1886 * connectx(2) time. To simplify things, just return the default values
1890 mptcp_default_tcp_optval(struct mptses
*mpte
, struct sockopt
*sopt
, int *optval
)
1894 VERIFY(sopt
->sopt_level
== IPPROTO_TCP
);
1895 VERIFY(sopt
->sopt_dir
== SOPT_GET
);
1896 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1898 /* try to do what tcp_newtcpcb() does */
1899 switch (sopt
->sopt_name
) {
1901 case TCP_RXT_FINDROP
:
1904 case TCP_CONNECTIONTIMEOUT
:
1905 case TCP_RXT_CONNDROPTIME
:
1906 case TCP_NOTSENT_LOWAT
:
1911 *optval
= mptcp_subflow_keeptime
;
1914 case PERSIST_TIMEOUT
:
1915 *optval
= tcp_max_persist_timeout
;
1919 error
= ENOPROTOOPT
;
1926 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1927 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1928 * to those that are allowed by mptcp_usr_socheckopt().
1931 mptcp_ctloutput(struct socket
*mp_so
, struct sockopt
*sopt
)
1933 struct mppcb
*mpp
= sotomppcb(mp_so
);
1934 struct mptses
*mpte
;
1937 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1941 mpte
= mptompte(mpp
);
1942 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1944 /* we only handle socket and TCP-level socket options for MPTCP */
1945 if (sopt
->sopt_level
!= SOL_SOCKET
&& sopt
->sopt_level
!= IPPROTO_TCP
) {
1947 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1948 "%s: mp_so 0x%llx sopt %s level not "
1949 "handled\n", __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1950 mptcp_sopt2str(sopt
->sopt_level
,
1951 sopt
->sopt_name
, buf
, sizeof (buf
))),
1952 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1957 switch (sopt
->sopt_dir
) {
1959 error
= mptcp_setopt(mpte
, sopt
);
1963 error
= mptcp_getopt(mpte
, sopt
);
1971 * Return a string representation of <sopt_level,sopt_name>
1974 mptcp_sopt2str(int level
, int optname
, char *dst
, int size
)
1976 char lbuf
[32], obuf
[32];
1977 const char *l
= lbuf
, *o
= obuf
;
1979 (void) snprintf(lbuf
, sizeof (lbuf
), "0x%x", level
);
1980 (void) snprintf(obuf
, sizeof (obuf
), "0x%x", optname
);
1990 o
= "SO_LINGER_SEC";
1998 case SO_USELOOPBACK
:
1999 o
= "SO_USELOOPBACK";
2040 case SO_RESTRICTIONS
:
2041 o
= "SO_RESTRICTIONS";
2050 o
= "SO_RANDOMPORT";
2052 case SO_TRAFFIC_CLASS
:
2053 o
= "SO_TRAFFIC_CLASS";
2055 case SO_RECV_TRAFFIC_CLASS
:
2056 o
= "SO_RECV_TRAFFIC_CLASS";
2058 case SO_TRAFFIC_CLASS_DBG
:
2059 o
= "SO_TRAFFIC_CLASS_DBG";
2061 case SO_PRIVILEGED_TRAFFIC_CLASS
:
2062 o
= "SO_PRIVILEGED_TRAFFIC_CLASS";
2070 case SO_OPPORTUNISTIC
:
2071 o
= "SO_OPPORTUNISTIC";
2077 o
= "SO_RECV_ANYIF";
2079 case SO_NOWAKEFROMSLEEP
:
2080 o
= "SO_NOWAKEFROMSLEEP";
2082 case SO_MPTCP_FASTJOIN
:
2083 o
= "SO_MPTCP_FASTJOIN";
2091 o
= "TCP_KEEPALIVE";
2094 o
= "TCP_KEEPINTVL";
2099 case TCP_CONNECTIONTIMEOUT
:
2100 o
= "TCP_CONNECTIONTIMEOUT";
2102 case TCP_RXT_CONNDROPTIME
:
2103 o
= "TCP_RXT_CONNDROPTIME";
2105 case PERSIST_TIMEOUT
:
2106 o
= "PERSIST_TIMEOUT";
2112 (void) snprintf(dst
, size
, "<%s,%s>", l
, o
);
2117 mptcp_usr_preconnect(struct socket
*mp_so
)
2119 struct mptsub
*mpts
= NULL
;
2120 struct mppcb
*mpp
= sotomppcb(mp_so
);
2121 struct mptses
*mpte
;
2123 struct tcpcb
*tp
= NULL
;
2125 mpte
= mptompte(mpp
);
2126 VERIFY(mpte
!= NULL
);
2127 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2129 mpts
= mptcp_get_subflow(mpte
, NULL
, NULL
);
2131 mptcplog((LOG_ERR
, "MPTCP Socket: "
2132 "%s: mp_so 0x%llx invalid preconnect ", __func__
,
2133 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
2134 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
2138 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
2139 so
= mpts
->mpts_socket
;
2141 tp
= intotcpcb(sotoinpcb(so
));
2142 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
2143 int error
= tcp_output(sototcpcb(so
));
2144 socket_unlock(so
, 0);
2146 mp_so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;