2 * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
42 #include <netinet/in.h>
43 #include <netinet/in_var.h>
44 #include <netinet/tcp.h>
45 #include <netinet/tcp_fsm.h>
46 #include <netinet/tcp_seq.h>
47 #include <netinet/tcp_var.h>
48 #include <netinet/tcp_timer.h>
49 #include <netinet/mptcp_var.h>
50 #include <netinet/mptcp_timer.h>
54 static int mptcp_usr_attach(struct socket
*, int, struct proc
*);
55 static int mptcp_usr_detach(struct socket
*);
56 static int mptcp_attach(struct socket
*, struct proc
*);
57 static int mptcp_detach(struct socket
*, struct mppcb
*);
58 static int mptcp_connectx(struct mptses
*, struct sockaddr
*,
59 struct sockaddr
*, struct proc
*, uint32_t, sae_associd_t
,
60 sae_connid_t
*, uint32_t, void *, uint32_t);
61 static int mptcp_usr_connectx(struct socket
*, struct sockaddr
*,
62 struct sockaddr
*, struct proc
*, uint32_t, sae_associd_t
,
63 sae_connid_t
*, uint32_t, void *, uint32_t, struct uio
*, user_ssize_t
*);
64 static int mptcp_getassocids(struct mptses
*, uint32_t *, user_addr_t
);
65 static int mptcp_getconnids(struct mptses
*, sae_associd_t
, uint32_t *,
67 static int mptcp_getconninfo(struct mptses
*, sae_connid_t
*, uint32_t *,
68 uint32_t *, int32_t *, user_addr_t
, socklen_t
*, user_addr_t
, socklen_t
*,
69 uint32_t *, user_addr_t
, uint32_t *);
70 static int mptcp_usr_control(struct socket
*, u_long
, caddr_t
, struct ifnet
*,
72 static int mptcp_disconnectx(struct mptses
*, sae_associd_t
, sae_connid_t
);
73 static int mptcp_usr_disconnect(struct socket
*);
74 static int mptcp_usr_disconnectx(struct socket
*, sae_associd_t
, sae_connid_t
);
75 static struct mptses
*mptcp_usrclosed(struct mptses
*);
76 static int mptcp_usr_peeloff(struct socket
*, sae_associd_t
, struct socket
**);
77 static int mptcp_peeloff(struct mptses
*, sae_associd_t
, struct socket
**);
78 static int mptcp_usr_rcvd(struct socket
*, int);
79 static int mptcp_usr_send(struct socket
*, int, struct mbuf
*,
80 struct sockaddr
*, struct mbuf
*, struct proc
*);
81 static int mptcp_usr_shutdown(struct socket
*);
82 static int mptcp_uiotombuf(struct uio
*, int, int, uint32_t, struct mbuf
**);
83 static int mptcp_usr_sosend(struct socket
*, struct sockaddr
*, struct uio
*,
84 struct mbuf
*, struct mbuf
*, int);
85 static int mptcp_usr_socheckopt(struct socket
*, struct sockopt
*);
86 static int mptcp_setopt_apply(struct mptses
*, struct mptopt
*);
87 static int mptcp_setopt(struct mptses
*, struct sockopt
*);
88 static int mptcp_getopt(struct mptses
*, struct sockopt
*);
89 static int mptcp_default_tcp_optval(struct mptses
*, struct sockopt
*, int *);
90 static void mptcp_connorder_helper(struct mptsub
*mpts
);
91 static int mptcp_usr_preconnect(struct socket
*so
);
93 struct pr_usrreqs mptcp_usrreqs
= {
94 .pru_attach
= mptcp_usr_attach
,
95 .pru_connectx
= mptcp_usr_connectx
,
96 .pru_control
= mptcp_usr_control
,
97 .pru_detach
= mptcp_usr_detach
,
98 .pru_disconnect
= mptcp_usr_disconnect
,
99 .pru_disconnectx
= mptcp_usr_disconnectx
,
100 .pru_peeloff
= mptcp_usr_peeloff
,
101 .pru_rcvd
= mptcp_usr_rcvd
,
102 .pru_send
= mptcp_usr_send
,
103 .pru_shutdown
= mptcp_usr_shutdown
,
104 .pru_sosend
= mptcp_usr_sosend
,
105 .pru_soreceive
= soreceive
,
106 .pru_socheckopt
= mptcp_usr_socheckopt
,
107 .pru_preconnect
= mptcp_usr_preconnect
,
111 * Attaches an MPTCP control block to a socket.
114 mptcp_usr_attach(struct socket
*mp_so
, int proto
, struct proc
*p
)
116 #pragma unused(proto)
119 VERIFY(sotomppcb(mp_so
) == NULL
);
121 error
= mptcp_attach(mp_so
, p
);
127 * Might want to use a different SO_LINGER timeout than TCP's?
129 if ((mp_so
->so_options
& SO_LINGER
) && mp_so
->so_linger
== 0)
130 mp_so
->so_linger
= TCP_LINGERTIME
* hz
;
136 * Detaches an MPTCP control block from a socket.
139 mptcp_usr_detach(struct socket
*mp_so
)
141 struct mppcb
*mpp
= sotomppcb(mp_so
);
145 VERIFY(mpp
->mpp_socket
!= NULL
);
147 error
= mptcp_detach(mp_so
, mpp
);
152 * Attach MPTCP protocol to socket, allocating MP control block,
153 * MPTCP session, control block, buffer space, etc.
156 mptcp_attach(struct socket
*mp_so
, struct proc
*p
)
159 struct mptses
*mpte
= NULL
;
160 struct mptcb
*mp_tp
= NULL
;
161 struct mppcb
*mpp
= NULL
;
164 if (mp_so
->so_snd
.sb_hiwat
== 0 || mp_so
->so_rcv
.sb_hiwat
== 0) {
165 error
= soreserve(mp_so
, tcp_sendspace
, MPTCP_RWIN_MAX
);
170 if (mp_so
->so_snd
.sb_preconn_hiwat
== 0) {
171 soreserve_preconnect(mp_so
, 2048);
175 * MPTCP socket buffers cannot be compressed, due to the
176 * fact that each mbuf chained via m_next is a M_PKTHDR
177 * which carries some MPTCP metadata.
179 mp_so
->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
180 mp_so
->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
182 /* Disable socket buffer auto-tuning. */
183 mp_so
->so_rcv
.sb_flags
&= ~SB_AUTOSIZE
;
184 mp_so
->so_snd
.sb_flags
&= ~SB_AUTOSIZE
;
186 if ((error
= mp_pcballoc(mp_so
, &mtcbinfo
)) != 0) {
190 mpp
= sotomppcb(mp_so
);
192 mpte
= (struct mptses
*)mpp
->mpp_pcbe
;
193 VERIFY(mpte
!= NULL
);
194 mp_tp
= mpte
->mpte_mptcb
;
195 VERIFY(mp_tp
!= NULL
);
201 * Called when the socket layer loses its final reference to the socket;
202 * at this point, there is only one case in which we will keep things
206 mptcp_detach(struct socket
*mp_so
, struct mppcb
*mpp
)
209 struct mppcbinfo
*mppi
;
211 VERIFY(mp_so
->so_pcb
== mpp
);
212 VERIFY(mpp
->mpp_socket
== mp_so
);
214 mppi
= mpp
->mpp_pcbinfo
;
215 VERIFY(mppi
!= NULL
);
217 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
218 VERIFY(mpte
->mpte_mppcb
== mpp
);
220 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
223 * We are done with this MPTCP socket (it has been closed);
224 * trigger all subflows to be disconnected, if not already,
225 * by initiating the PCB detach sequence (SOF_PCBCLEARING
230 (void) mptcp_disconnectx(mpte
, SAE_ASSOCID_ALL
, SAE_CONNID_ALL
);
235 * Here, we would want to handle time wait state.
242 * Common subroutine to open a MPTCP connection to one of the remote hosts
243 * specified by dst_sl. This includes allocating and establishing a
244 * subflow TCP connection, either initially to establish MPTCP connection,
245 * or to join an existing one. Returns a connection handle upon success.
248 mptcp_connectx(struct mptses
*mpte
, struct sockaddr
*src
,
249 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
250 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
253 #pragma unused(p, aid, flags, arg, arglen)
255 struct socket
*mp_so
;
258 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
259 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
262 VERIFY(pcid
!= NULL
);
264 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
265 "%s: mp_so 0x%llx\n", __func__
,
266 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
267 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
269 DTRACE_MPTCP3(connectx
, struct mptses
*, mpte
, sae_associd_t
, aid
,
270 struct socket
*, mp_so
);
272 mpts
= mptcp_subflow_alloc(M_WAITOK
);
277 MPTS_ADDREF(mpts
); /* for this routine */
280 int len
= src
->sa_len
;
282 MALLOC(mpts
->mpts_src
, struct sockaddr
*, len
, M_SONAME
,
284 if (mpts
->mpts_src
== NULL
) {
288 bcopy(src
, mpts
->mpts_src
, len
);
291 MALLOC(mpts
->mpts_dst
, struct sockaddr
*, dst
->sa_len
, M_SONAME
,
293 if (mpts
->mpts_dst
== NULL
) {
297 bcopy(dst
, mpts
->mpts_dst
, dst
->sa_len
);
299 error
= mptcp_subflow_add(mpte
, mpts
, p
, ifscope
);
300 if (error
== 0 && pcid
!= NULL
)
301 *pcid
= mpts
->mpts_connid
;
305 if ((error
!= 0) && (error
!= EWOULDBLOCK
)) {
307 if (mpts
->mpts_flags
& MPTSF_ATTACHED
) {
310 mptcp_subflow_del(mpte
, mpts
, TRUE
);
322 * User-protocol pru_connectx callback.
325 mptcp_usr_connectx(struct socket
*mp_so
, struct sockaddr
*src
,
326 struct sockaddr
*dst
, struct proc
*p
, uint32_t ifscope
,
327 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
328 uint32_t arglen
, struct uio
*auio
, user_ssize_t
*bytes_written
)
330 struct mppcb
*mpp
= sotomppcb(mp_so
);
331 struct mptses
*mpte
= NULL
;
332 struct mptcb
*mp_tp
= NULL
;
333 user_ssize_t datalen
;
337 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
341 mpte
= mptompte(mpp
);
342 VERIFY(mpte
!= NULL
);
344 mp_tp
= mpte
->mpte_mptcb
;
345 VERIFY(mp_tp
!= NULL
);
347 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
352 error
= mptcp_connectx(mpte
, src
, dst
, p
, ifscope
,
353 aid
, pcid
, flags
, arg
, arglen
);
355 /* If there is data, copy it */
357 datalen
= uio_resid(auio
);
358 socket_unlock(mp_so
, 0);
359 error
= mp_so
->so_proto
->pr_usrreqs
->pru_sosend(mp_so
, NULL
,
360 (uio_t
) auio
, NULL
, NULL
, 0);
361 /* check if this can be supported with fast Join also. XXX */
362 if (error
== 0 || error
== EWOULDBLOCK
)
363 *bytes_written
= datalen
- uio_resid(auio
);
365 if (error
== EWOULDBLOCK
)
368 socket_lock(mp_so
, 0);
370 if (mp_tp
->mpt_flags
& MPTCPF_PEEL_OFF
) {
371 *bytes_written
= datalen
- uio_resid(auio
);
373 * Override errors like EPIPE that occur as
374 * a result of doing TFO during TCP fallback.
386 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
389 mptcp_getassocids(struct mptses
*mpte
, uint32_t *cnt
, user_addr_t aidp
)
391 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
393 /* MPTCP has at most 1 association */
394 *cnt
= (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) ? 1 : 0;
396 /* just asking how many there are? */
397 if (aidp
== USER_ADDR_NULL
)
400 return (copyout(&mpte
->mpte_associd
, aidp
,
401 sizeof (mpte
->mpte_associd
)));
405 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
408 mptcp_getconnids(struct mptses
*mpte
, sae_associd_t aid
, uint32_t *cnt
,
414 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
416 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
417 aid
!= mpte
->mpte_associd
)
420 *cnt
= mpte
->mpte_numflows
;
422 /* just asking how many there are? */
423 if (cidp
== USER_ADDR_NULL
)
426 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
427 if ((error
= copyout(&mpts
->mpts_connid
, cidp
,
428 sizeof (mpts
->mpts_connid
))) != 0)
431 cidp
+= sizeof (mpts
->mpts_connid
);
438 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
441 mptcp_getconninfo(struct mptses
*mpte
, sae_connid_t
*cid
, uint32_t *flags
,
442 uint32_t *ifindex
, int32_t *soerror
, user_addr_t src
, socklen_t
*src_len
,
443 user_addr_t dst
, socklen_t
*dst_len
, uint32_t *aux_type
,
444 user_addr_t aux_data
, uint32_t *aux_len
)
446 #pragma unused(aux_data)
447 struct ifnet
*ifp
= NULL
;
451 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
453 if (*cid
== SAE_CONNID_ALL
)
456 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
457 if (mpts
->mpts_connid
== *cid
|| *cid
== SAE_CONNID_ANY
)
461 return ((*cid
== SAE_CONNID_ANY
) ? ENXIO
: EINVAL
);
464 ifp
= mpts
->mpts_outif
;
465 *cid
= mpts
->mpts_connid
;
466 *ifindex
= ((ifp
!= NULL
) ? ifp
->if_index
: 0);
467 *soerror
= mpts
->mpts_soerror
;
469 if (mpts
->mpts_flags
& MPTSF_CONNECTING
)
470 *flags
|= CIF_CONNECTING
;
471 if (mpts
->mpts_flags
& MPTSF_CONNECTED
)
472 *flags
|= CIF_CONNECTED
;
473 if (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)
474 *flags
|= CIF_DISCONNECTING
;
475 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
476 *flags
|= CIF_DISCONNECTED
;
477 if (mpts
->mpts_flags
& MPTSF_BOUND_IF
)
478 *flags
|= CIF_BOUND_IF
;
479 if (mpts
->mpts_flags
& MPTSF_BOUND_IP
)
480 *flags
|= CIF_BOUND_IP
;
481 if (mpts
->mpts_flags
& MPTSF_BOUND_PORT
)
482 *flags
|= CIF_BOUND_PORT
;
483 if (mpts
->mpts_flags
& MPTSF_PREFERRED
)
484 *flags
|= CIF_PREFERRED
;
485 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
)
486 *flags
|= CIF_MP_CAPABLE
;
487 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
488 *flags
|= CIF_MP_DEGRADED
;
489 if (mpts
->mpts_flags
& MPTSF_MP_READY
)
490 *flags
|= CIF_MP_READY
;
491 if (mpts
->mpts_flags
& MPTSF_ACTIVE
)
492 *flags
|= CIF_MP_ACTIVE
;
494 VERIFY(mpts
->mpts_src
!= NULL
);
495 *src_len
= mpts
->mpts_src
->sa_len
;
496 if (src
!= USER_ADDR_NULL
) {
497 error
= copyout(mpts
->mpts_src
, src
, mpts
->mpts_src
->sa_len
);
502 VERIFY(mpts
->mpts_dst
!= NULL
);
503 *dst_len
= mpts
->mpts_dst
->sa_len
;
504 if (dst
!= USER_ADDR_NULL
) {
505 error
= copyout(mpts
->mpts_dst
, dst
, mpts
->mpts_dst
->sa_len
);
512 if (mpts
->mpts_socket
!= NULL
) {
513 struct conninfo_tcp tcp_ci
;
515 *aux_type
= CIAUX_TCP
;
516 *aux_len
= sizeof (tcp_ci
);
518 if (aux_data
!= USER_ADDR_NULL
) {
519 struct socket
*so
= mpts
->mpts_socket
;
521 VERIFY(SOCK_PROTO(so
) == IPPROTO_TCP
);
522 bzero(&tcp_ci
, sizeof (tcp_ci
));
524 tcp_getconninfo(so
, &tcp_ci
);
525 socket_unlock(so
, 0);
526 error
= copyout(&tcp_ci
, aux_data
, sizeof (tcp_ci
));
531 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
532 "%s: cid %d flags %x \n",
533 __func__
, mpts
->mpts_connid
, mpts
->mpts_flags
),
534 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
542 * Handle SIOCSCONNORDER
545 mptcp_setconnorder(struct mptses
*mpte
, sae_connid_t cid
, uint32_t rank
)
547 struct mptsub
*mpts
, *mpts1
;
550 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
551 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
552 "%s: cid %d rank %d \n", __func__
, cid
, rank
),
553 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
555 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
560 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
561 if (mpts
->mpts_connid
== cid
)
569 if (rank
== 0 || rank
> 1) {
571 * If rank is 0, determine whether this should be the
572 * primary or backup subflow, depending on what we have.
574 * Otherwise, if greater than 0, make it a backup flow.
576 TAILQ_FOREACH(mpts1
, &mpte
->mpte_subflows
, mpts_entry
) {
578 if (mpts1
->mpts_flags
& MPTSF_PREFERRED
) {
586 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
587 mpts
->mpts_rank
= rank
;
588 if (mpts1
!= NULL
&& mpts
!= mpts1
) {
589 /* preferred subflow found; set rank as necessary */
591 mpts
->mpts_rank
= (mpts1
->mpts_rank
+ 1);
592 } else if (rank
== 0) {
593 /* no preferred one found; promote this */
601 * If rank is 1, promote this subflow to be preferred.
603 TAILQ_FOREACH(mpts1
, &mpte
->mpte_subflows
, mpts_entry
) {
606 (mpts1
->mpts_flags
& MPTSF_PREFERRED
)) {
607 mpts1
->mpts_flags
&= ~MPTSF_PREFERRED
;
608 if (mpte
->mpte_nummpcapflows
> 1)
609 mptcp_connorder_helper(mpts1
);
610 } else if (mpts1
== mpts
) {
611 mpts1
->mpts_rank
= 1;
612 if (mpts1
->mpts_flags
& MPTSF_MP_CAPABLE
) {
613 mpts1
->mpts_flags
|= MPTSF_PREFERRED
;
614 if (mpte
->mpte_nummpcapflows
> 1)
615 mptcp_connorder_helper(mpts1
);
627 mptcp_connorder_helper(struct mptsub
*mpts
)
629 struct socket
*so
= mpts
->mpts_socket
;
630 struct tcpcb
*tp
= NULL
;
634 tp
= intotcpcb(sotoinpcb(so
));
635 tp
->t_mpflags
|= TMPF_SND_MPPRIO
;
636 if (mpts
->mpts_flags
& MPTSF_PREFERRED
)
637 tp
->t_mpflags
&= ~TMPF_BACKUP_PATH
;
639 tp
->t_mpflags
|= TMPF_BACKUP_PATH
;
641 socket_unlock(so
, 0);
646 * Handle SIOCSGONNORDER
649 mptcp_getconnorder(struct mptses
*mpte
, sae_connid_t cid
, uint32_t *rank
)
654 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
655 VERIFY(rank
!= NULL
);
658 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
663 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
664 if (mpts
->mpts_connid
== cid
)
673 *rank
= mpts
->mpts_rank
;
680 * User-protocol pru_control callback.
683 mptcp_usr_control(struct socket
*mp_so
, u_long cmd
, caddr_t data
,
684 struct ifnet
*ifp
, struct proc
*p
)
686 #pragma unused(ifp, p)
687 struct mppcb
*mpp
= sotomppcb(mp_so
);
691 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
695 mpte
= mptompte(mpp
);
696 VERIFY(mpte
!= NULL
);
698 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
701 case SIOCGASSOCIDS32
: { /* struct so_aidreq32 */
702 struct so_aidreq32 aidr
;
703 bcopy(data
, &aidr
, sizeof (aidr
));
704 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
707 bcopy(&aidr
, data
, sizeof (aidr
));
711 case SIOCGASSOCIDS64
: { /* struct so_aidreq64 */
712 struct so_aidreq64 aidr
;
713 bcopy(data
, &aidr
, sizeof (aidr
));
714 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
717 bcopy(&aidr
, data
, sizeof (aidr
));
721 case SIOCGCONNIDS32
: { /* struct so_cidreq32 */
722 struct so_cidreq32 cidr
;
723 bcopy(data
, &cidr
, sizeof (cidr
));
724 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
727 bcopy(&cidr
, data
, sizeof (cidr
));
731 case SIOCGCONNIDS64
: { /* struct so_cidreq64 */
732 struct so_cidreq64 cidr
;
733 bcopy(data
, &cidr
, sizeof (cidr
));
734 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
737 bcopy(&cidr
, data
, sizeof (cidr
));
741 case SIOCGCONNINFO32
: { /* struct so_cinforeq32 */
742 struct so_cinforeq32 cifr
;
743 bcopy(data
, &cifr
, sizeof (cifr
));
744 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
745 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
746 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
747 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
750 bcopy(&cifr
, data
, sizeof (cifr
));
754 case SIOCGCONNINFO64
: { /* struct so_cinforeq64 */
755 struct so_cinforeq64 cifr
;
756 bcopy(data
, &cifr
, sizeof (cifr
));
757 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
758 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
759 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
760 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
763 bcopy(&cifr
, data
, sizeof (cifr
));
767 case SIOCSCONNORDER
: { /* struct so_cordreq */
768 struct so_cordreq cor
;
769 bcopy(data
, &cor
, sizeof (cor
));
770 error
= mptcp_setconnorder(mpte
, cor
.sco_cid
, cor
.sco_rank
);
772 bcopy(&cor
, data
, sizeof (cor
));
776 case SIOCGCONNORDER
: { /* struct so_cordreq */
777 struct so_cordreq cor
;
778 bcopy(data
, &cor
, sizeof (cor
));
779 error
= mptcp_getconnorder(mpte
, cor
.sco_cid
, &cor
.sco_rank
);
781 bcopy(&cor
, data
, sizeof (cor
));
794 * Initiate a disconnect. MPTCP-level disconnection is specified by
795 * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow
796 * connection while keeping the MPTCP-level connection (association).
799 mptcp_disconnectx(struct mptses
*mpte
, sae_associd_t aid
, sae_connid_t cid
)
802 struct socket
*mp_so
;
806 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
808 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
809 mp_tp
= mpte
->mpte_mptcb
;
811 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
812 "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__
,
813 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), aid
, cid
, mp_so
->so_error
),
814 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
816 DTRACE_MPTCP5(disconnectx
, struct mptses
*, mpte
, sae_associd_t
, aid
,
817 sae_connid_t
, cid
, struct socket
*, mp_so
, struct mptcb
*, mp_tp
);
819 VERIFY(aid
== SAE_ASSOCID_ANY
|| aid
== SAE_ASSOCID_ALL
||
820 aid
== mpte
->mpte_associd
);
822 /* terminate the association? */
823 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
824 /* if we're not detached, go thru socket state checks */
825 if (!(mp_so
->so_flags
& SOF_PCBCLEARING
)) {
826 if (!(mp_so
->so_state
& (SS_ISCONNECTED
|
831 if (mp_so
->so_state
& SS_ISDISCONNECTING
) {
837 mptcp_cancel_all_timers(mp_tp
);
838 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
839 (void) mptcp_close(mpte
, mp_tp
);
841 } else if ((mp_so
->so_options
& SO_LINGER
) &&
842 mp_so
->so_linger
== 0) {
843 (void) mptcp_drop(mpte
, mp_tp
, 0);
847 soisdisconnecting(mp_so
);
848 sbflush(&mp_so
->so_rcv
);
849 if (mptcp_usrclosed(mpte
) != NULL
)
850 (void) mptcp_output(mpte
);
853 bool disconnect_embryonic_subflows
= false;
854 struct socket
*so
= NULL
;
856 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
857 if (mpts
->mpts_connid
!= cid
)
862 * Check if disconnected subflow is the one used
863 * to initiate MPTCP connection.
864 * If it is and the connection is not yet join ready
865 * disconnect all other subflows.
867 so
= mpts
->mpts_socket
;
868 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
) &&
869 so
&& !(so
->so_flags
& SOF_MP_SEC_SUBFLOW
)) {
870 disconnect_embryonic_subflows
= true;
873 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
874 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
884 if (disconnect_embryonic_subflows
) {
885 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
886 if (mpts
->mpts_connid
== cid
)
889 mptcp_subflow_disconnect(mpte
, mpts
, TRUE
);
896 mptcp_thread_signal(mpte
);
898 if ((mp_so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
899 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
900 /* the socket has been shutdown, no more sockopt's */
901 mptcp_flush_sopts(mpte
);
909 * Wrapper function to support disconnect on socket
912 mptcp_usr_disconnect(struct socket
*mp_so
)
916 error
= mptcp_usr_disconnectx(mp_so
, SAE_ASSOCID_ALL
, SAE_CONNID_ALL
);
921 * User-protocol pru_disconnectx callback.
924 mptcp_usr_disconnectx(struct socket
*mp_so
, sae_associd_t aid
, sae_connid_t cid
)
926 struct mppcb
*mpp
= sotomppcb(mp_so
);
930 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
934 mpte
= mptompte(mpp
);
935 VERIFY(mpte
!= NULL
);
936 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
938 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
939 aid
!= mpte
->mpte_associd
) {
944 error
= mptcp_disconnectx(mpte
, aid
, cid
);
950 * User issued close, and wish to trail thru shutdown states.
952 static struct mptses
*
953 mptcp_usrclosed(struct mptses
*mpte
)
955 struct socket
*mp_so
;
959 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
960 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
961 mp_tp
= mpte
->mpte_mptcb
;
964 mptcp_close_fsm(mp_tp
, MPCE_CLOSE
);
966 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
967 mpte
= mptcp_close(mpte
, mp_tp
);
969 } else if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_2
) {
971 soisdisconnected(mp_so
);
972 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
974 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
980 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
982 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
983 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
992 * User-protocol pru_peeloff callback.
995 mptcp_usr_peeloff(struct socket
*mp_so
, sae_associd_t aid
, struct socket
**psop
)
997 struct mppcb
*mpp
= sotomppcb(mp_so
);
1001 VERIFY(psop
!= NULL
);
1003 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1007 mpte
= mptompte(mpp
);
1008 VERIFY(mpte
!= NULL
);
1010 error
= mptcp_peeloff(mpte
, aid
, psop
);
1016 * Transform a previously connected TCP subflow connection which has
1017 * failed to negotiate MPTCP to its own socket which can be externalized
1018 * with a file descriptor. Valid only when the MPTCP socket is not
1019 * yet associated (MPTCP-level connection has not been established.)
1022 mptcp_peeloff(struct mptses
*mpte
, sae_associd_t aid
, struct socket
**psop
)
1024 struct socket
*so
= NULL
, *mp_so
;
1025 struct mptsub
*mpts
;
1028 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1029 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1031 VERIFY(psop
!= NULL
);
1034 DTRACE_MPTCP3(peeloff
, struct mptses
*, mpte
, sae_associd_t
, aid
,
1035 struct socket
*, mp_so
);
1037 /* peeloff cannot happen after an association is established */
1038 if (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) {
1043 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
) {
1048 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1050 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) {
1051 panic("%s: so %p is MPTCP capable but mp_so %p "
1052 "aid is %d\n", __func__
, so
, mp_so
,
1053 mpte
->mpte_associd
);
1056 MPTS_ADDREF_LOCKED(mpts
); /* for us */
1057 so
= mpts
->mpts_socket
;
1060 * This subflow socket is about to be externalized; make it
1061 * appear as if it has the same properties as the MPTCP socket,
1062 * undo what's done earlier in mptcp_subflow_add().
1064 mptcp_subflow_sopeeloff(mpte
, mpts
, so
);
1067 mptcp_subflow_del(mpte
, mpts
, FALSE
);
1068 MPTS_REMREF(mpts
); /* ours */
1072 * Here we need to make sure the subflow socket is not
1073 * flow controlled; need to clear both INP_FLOW_CONTROLLED
1074 * and INP_FLOW_SUSPENDED on the subflow socket, since
1075 * we will no longer be monitoring its events.
1086 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1087 "%s: mp_so 0x%llx\n", __func__
,
1088 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
1089 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1096 * After a receive, possible send some update to peer.
1099 mptcp_usr_rcvd(struct socket
*mp_so
, int flags
)
1101 #pragma unused(flags)
1102 struct mppcb
*mpp
= sotomppcb(mp_so
);
1103 struct mptses
*mpte
;
1106 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1110 mpte
= mptompte(mpp
);
1111 VERIFY(mpte
!= NULL
);
1113 error
= mptcp_output(mpte
);
1119 * Do a send by putting data in the output queue.
1122 mptcp_usr_send(struct socket
*mp_so
, int prus_flags
, struct mbuf
*m
,
1123 struct sockaddr
*nam
, struct mbuf
*control
, struct proc
*p
)
1125 #pragma unused(nam, p)
1126 struct mppcb
*mpp
= sotomppcb(mp_so
);
1127 struct mptses
*mpte
;
1130 if (prus_flags
& (PRUS_OOB
|PRUS_EOF
)) {
1140 if (control
!= NULL
&& control
->m_len
!= 0) {
1145 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1149 mpte
= mptompte(mpp
);
1150 VERIFY(mpte
!= NULL
);
1152 if (!(mp_so
->so_state
& SS_ISCONNECTED
) &&
1153 (!(mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
))) {
1158 mptcp_insert_dsn(mpp
, m
);
1159 VERIFY(mp_so
->so_snd
.sb_flags
& SB_NOCOMPRESS
);
1160 (void) sbappendstream(&mp_so
->so_snd
, m
);
1164 * XXX: adi@apple.com
1166 * PRUS_MORETOCOME could be set, but we don't check it now.
1168 error
= mptcp_output(mpte
);
1172 if (mp_so
->so_state
& SS_ISCONNECTING
) {
1173 if (mp_so
->so_state
& SS_NBIO
)
1174 error
= EWOULDBLOCK
;
1176 error
= sbwait(&mp_so
->so_snd
);
1183 if (control
!= NULL
)
1190 * Mark the MPTCP connection as being incapable of further output.
1193 mptcp_usr_shutdown(struct socket
*mp_so
)
1195 struct mppcb
*mpp
= sotomppcb(mp_so
);
1196 struct mptses
*mpte
;
1199 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1203 mpte
= mptompte(mpp
);
1204 VERIFY(mpte
!= NULL
);
1206 socantsendmore(mp_so
);
1208 mpte
= mptcp_usrclosed(mpte
);
1210 error
= mptcp_output(mpte
);
1216 * Copy the contents of uio into a properly sized mbuf chain.
1219 mptcp_uiotombuf(struct uio
*uio
, int how
, int space
, uint32_t align
,
1222 struct mbuf
*m
, *mb
, *nm
= NULL
, *mtail
= NULL
;
1223 user_ssize_t resid
, tot
, len
, progress
; /* must be user_ssize_t */
1226 VERIFY(top
!= NULL
&& *top
== NULL
);
1229 * space can be zero or an arbitrary large value bound by
1230 * the total data supplied by the uio.
1232 resid
= uio_resid(uio
);
1234 tot
= imin(resid
, space
);
1239 * The smallest unit is a single mbuf with pkthdr.
1240 * We can't align past it.
1246 * Give us the full allocation or nothing.
1247 * If space is zero return the smallest empty mbuf.
1249 if ((len
= tot
+ align
) == 0)
1252 /* Loop and append maximum sized mbufs to the chain tail. */
1254 uint32_t m_needed
= 1;
1256 if (njcl
> 0 && len
> MBIGCLBYTES
)
1257 mb
= m_getpackets_internal(&m_needed
, 1,
1258 how
, 1, M16KCLBYTES
);
1259 else if (len
> MCLBYTES
)
1260 mb
= m_getpackets_internal(&m_needed
, 1,
1261 how
, 1, MBIGCLBYTES
);
1262 else if (len
>= (signed)MINCLSIZE
)
1263 mb
= m_getpackets_internal(&m_needed
, 1,
1266 mb
= m_gethdr(how
, MT_DATA
);
1268 /* Fail the whole operation if one mbuf can't be allocated. */
1276 VERIFY(mb
->m_flags
& M_PKTHDR
);
1277 len
-= ((mb
->m_flags
& M_EXT
) ? mb
->m_ext
.ext_size
: MHLEN
);
1289 /* Fill all mbufs with uio data and update header information. */
1290 for (mb
= m
; mb
!= NULL
; mb
= mb
->m_next
) {
1291 len
= imin(M_TRAILINGSPACE(mb
), tot
- progress
);
1293 error
= uiomove(mtod(mb
, char *), len
, uio
);
1299 /* each mbuf is M_PKTHDR chained via m_next */
1301 mb
->m_pkthdr
.len
= len
;
1305 VERIFY(progress
== tot
);
1311 * MPTCP socket protocol-user socket send routine, derived from sosend().
1314 mptcp_usr_sosend(struct socket
*mp_so
, struct sockaddr
*addr
, struct uio
*uio
,
1315 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1317 #pragma unused(addr)
1320 int error
, sendflags
;
1321 struct proc
*p
= current_proc();
1324 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1325 if (uio
== NULL
|| top
!= NULL
) {
1329 resid
= uio_resid(uio
);
1331 socket_lock(mp_so
, 1);
1332 so_update_last_owner_locked(mp_so
, p
);
1333 so_update_policy(mp_so
);
1335 VERIFY(mp_so
->so_type
== SOCK_STREAM
);
1336 VERIFY(!(mp_so
->so_flags
& SOF_MP_SUBFLOW
));
1338 if ((flags
& (MSG_OOB
|MSG_DONTROUTE
|MSG_HOLD
|MSG_SEND
|MSG_FLUSH
)) ||
1339 (mp_so
->so_flags
& SOF_ENABLE_MSGS
)) {
1341 socket_unlock(mp_so
, 1);
1346 * In theory resid should be unsigned. However, space must be
1347 * signed, as it might be less than 0 if we over-committed, and we
1348 * must use a signed comparison of space and resid. On the other
1349 * hand, a negative resid causes us to loop sending 0-length
1350 * segments to the protocol.
1352 if (resid
< 0 || (flags
& MSG_EOR
) || control
!= NULL
) {
1354 socket_unlock(mp_so
, 1);
1358 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1361 error
= sosendcheck(mp_so
, NULL
, resid
, 0, 0, flags
,
1366 space
= sbspace(&mp_so
->so_snd
);
1368 socket_unlock(mp_so
, 0);
1370 * Copy the data from userland into an mbuf chain.
1372 error
= mptcp_uiotombuf(uio
, M_WAITOK
, space
, 0, &top
);
1374 socket_lock(mp_so
, 0);
1377 VERIFY(top
!= NULL
);
1378 space
-= resid
- uio_resid(uio
);
1379 resid
= uio_resid(uio
);
1380 socket_lock(mp_so
, 0);
1383 * Compute flags here, for pru_send and NKEs.
1385 sendflags
= (resid
> 0 && space
> 0) ?
1386 PRUS_MORETOCOME
: 0;
1389 * Socket filter processing
1391 VERIFY(control
== NULL
);
1392 error
= sflt_data_out(mp_so
, NULL
, &top
, &control
, 0);
1394 if (error
== EJUSTRETURN
) {
1397 /* always free control if any */
1401 if (control
!= NULL
) {
1407 * Pass data to protocol.
1409 error
= (*mp_so
->so_proto
->pr_usrreqs
->pru_send
)
1410 (mp_so
, sendflags
, top
, NULL
, NULL
, p
);
1415 } while (resid
!= 0 && space
> 0);
1416 } while (resid
!= 0);
1420 sbunlock(&mp_so
->so_snd
, FALSE
); /* will unlock socket */
1422 socket_unlock(mp_so
, 1);
1426 if (control
!= NULL
)
1429 /* clear SOF1_PRECONNECT_DATA after one write */
1430 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)
1431 mp_so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;
1437 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1438 * This routine simply indicates to the caller whether or not to proceed
1439 * further with the given socket option. This is invoked by sosetoptlock()
1440 * and sogetoptlock().
1443 mptcp_usr_socheckopt(struct socket
*mp_so
, struct sockopt
*sopt
)
1445 #pragma unused(mp_so)
1448 VERIFY(sopt
->sopt_level
== SOL_SOCKET
);
1451 * We could check for sopt_dir (set/get) here, but we'll just
1452 * let the caller deal with it as appropriate; therefore the
1453 * following is a superset of the socket options which we
1454 * allow for set/get.
1456 * XXX: adi@apple.com
1458 * Need to consider the following cases:
1460 * a. In the event peeloff(2) occurs on the subflow socket,
1461 * we may want to issue those options which are now
1462 * handled at the MP socket. In that case, we will need
1463 * to record them in mptcp_setopt() so that they can
1464 * be replayed during peeloff.
1466 * b. Certain socket options don't have a clear definition
1467 * on the expected behavior post connect(2). At the time
1468 * those options are issued on the MP socket, there may
1469 * be existing subflow sockets that are already connected.
1471 switch (sopt
->sopt_name
) {
1472 case SO_LINGER
: /* MP */
1473 case SO_LINGER_SEC
: /* MP */
1474 case SO_TYPE
: /* MP */
1475 case SO_NREAD
: /* MP */
1476 case SO_NWRITE
: /* MP */
1477 case SO_ERROR
: /* MP */
1478 case SO_SNDBUF
: /* MP */
1479 case SO_RCVBUF
: /* MP */
1480 case SO_SNDLOWAT
: /* MP */
1481 case SO_RCVLOWAT
: /* MP */
1482 case SO_SNDTIMEO
: /* MP */
1483 case SO_RCVTIMEO
: /* MP */
1484 case SO_NKE
: /* MP */
1485 case SO_NOSIGPIPE
: /* MP */
1486 case SO_NOADDRERR
: /* MP */
1487 case SO_LABEL
: /* MP */
1488 case SO_PEERLABEL
: /* MP */
1489 case SO_DEFUNCTOK
: /* MP */
1490 case SO_ISDEFUNCT
: /* MP */
1491 case SO_TRAFFIC_CLASS_DBG
: /* MP */
1493 * Tell the caller that these options are to be processed.
1497 case SO_DEBUG
: /* MP + subflow */
1498 case SO_KEEPALIVE
: /* MP + subflow */
1499 case SO_USELOOPBACK
: /* MP + subflow */
1500 case SO_RANDOMPORT
: /* MP + subflow */
1501 case SO_TRAFFIC_CLASS
: /* MP + subflow */
1502 case SO_RECV_TRAFFIC_CLASS
: /* MP + subflow */
1503 case SO_PRIVILEGED_TRAFFIC_CLASS
: /* MP + subflow */
1504 case SO_RECV_ANYIF
: /* MP + subflow */
1505 case SO_RESTRICTIONS
: /* MP + subflow */
1506 case SO_FLUSH
: /* MP + subflow */
1507 case SO_MPTCP_FASTJOIN
: /* MP + subflow */
1508 case SO_NOWAKEFROMSLEEP
:
1509 case SO_NOAPNFALLBK
:
1511 * Tell the caller that these options are to be processed;
1512 * these will also be recorded later by mptcp_setopt().
1514 * NOTE: Only support integer option value for now.
1516 if (sopt
->sopt_valsize
!= sizeof (int))
1522 * Tell the caller to stop immediately and return an error.
1524 error
= ENOPROTOOPT
;
1532 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1535 mptcp_setopt_apply(struct mptses
*mpte
, struct mptopt
*mpo
)
1537 struct socket
*mp_so
;
1538 struct mptsub
*mpts
;
1542 /* just bail now if this isn't applicable to subflow sockets */
1543 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1544 error
= ENOPROTOOPT
;
1549 * Skip those that are handled internally; these options
1550 * should not have been recorded and marked with the
1551 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1553 if (mpo
->mpo_level
== SOL_SOCKET
&&
1554 (mpo
->mpo_name
== SO_NOSIGPIPE
|| mpo
->mpo_name
== SO_NOADDRERR
)) {
1555 error
= ENOPROTOOPT
;
1559 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1560 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1563 * Don't bother going further if there's no subflow; mark the option
1564 * with MPOF_INTERIM so that we know whether or not to remove this
1565 * option upon encountering an error while issuing it during subflow
1568 if (mpte
->mpte_numflows
== 0) {
1569 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
));
1570 mpo
->mpo_flags
|= MPOF_INTERIM
;
1571 /* return success */
1575 bzero(&smpo
, sizeof (smpo
));
1576 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1577 smpo
.mpo_level
= mpo
->mpo_level
;
1578 smpo
.mpo_name
= mpo
->mpo_name
;
1580 /* grab exisiting values in case we need to rollback */
1581 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1585 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
|MPTSF_SOPT_INPROG
);
1586 mpts
->mpts_oldintval
= 0;
1587 smpo
.mpo_intval
= 0;
1588 VERIFY(mpts
->mpts_socket
!= NULL
);
1589 so
= mpts
->mpts_socket
;
1591 if (mptcp_subflow_sogetopt(mpte
, so
, &smpo
) == 0) {
1592 mpts
->mpts_flags
|= MPTSF_SOPT_OLDVAL
;
1593 mpts
->mpts_oldintval
= smpo
.mpo_intval
;
1595 socket_unlock(so
, 0);
1599 /* apply socket option */
1600 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1604 mpts
->mpts_flags
|= MPTSF_SOPT_INPROG
;
1605 VERIFY(mpts
->mpts_socket
!= NULL
);
1606 so
= mpts
->mpts_socket
;
1608 error
= mptcp_subflow_sosetopt(mpte
, so
, mpo
);
1609 socket_unlock(so
, 0);
1615 /* cleanup, and rollback if needed */
1616 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1620 if (!(mpts
->mpts_flags
& MPTSF_SOPT_INPROG
)) {
1621 /* clear in case it's set */
1622 mpts
->mpts_flags
&= ~MPTSF_SOPT_OLDVAL
;
1623 mpts
->mpts_oldintval
= 0;
1627 if (!(mpts
->mpts_flags
& MPTSF_SOPT_OLDVAL
)) {
1628 mpts
->mpts_flags
&= ~MPTSF_SOPT_INPROG
;
1629 VERIFY(mpts
->mpts_oldintval
== 0);
1633 /* error during sosetopt, so roll it back */
1635 VERIFY(mpts
->mpts_socket
!= NULL
);
1636 so
= mpts
->mpts_socket
;
1638 smpo
.mpo_intval
= mpts
->mpts_oldintval
;
1639 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1640 socket_unlock(so
, 0);
1642 mpts
->mpts_oldintval
= 0;
1643 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
|MPTSF_SOPT_INPROG
);
1652 * Handle SOPT_SET for socket options issued on MP socket.
1655 mptcp_setopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1657 int error
= 0, optval
, level
, optname
, rec
= 1;
1658 struct mptopt smpo
, *mpo
= NULL
;
1659 struct socket
*mp_so
;
1662 level
= sopt
->sopt_level
;
1663 optname
= sopt
->sopt_name
;
1665 VERIFY(sopt
->sopt_dir
== SOPT_SET
);
1666 VERIFY(level
== SOL_SOCKET
|| level
== IPPROTO_TCP
);
1667 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1668 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1671 * Record socket options which are applicable to subflow sockets so
1672 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1673 * for the list of eligible socket-level options.
1675 if (level
== SOL_SOCKET
) {
1679 case SO_USELOOPBACK
:
1681 case SO_TRAFFIC_CLASS
:
1682 case SO_RECV_TRAFFIC_CLASS
:
1683 case SO_PRIVILEGED_TRAFFIC_CLASS
:
1685 case SO_RESTRICTIONS
:
1686 case SO_NOWAKEFROMSLEEP
:
1687 case SO_MPTCP_FASTJOIN
:
1688 case SO_NOAPNFALLBK
:
1692 /* don't record it */
1696 /* nothing to do; just return success */
1702 case TCP_RXT_FINDROP
:
1706 case TCP_CONNECTIONTIMEOUT
:
1707 case TCP_RXT_CONNDROPTIME
:
1708 case PERSIST_TIMEOUT
:
1709 /* eligible; record it */
1711 case TCP_NOTSENT_LOWAT
:
1712 /* record at MPTCP level */
1713 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1722 mp_so
->so_flags
&= ~SOF_NOTSENT_LOWAT
;
1723 error
= mptcp_set_notsent_lowat(mpte
,0);
1725 mp_so
->so_flags
|= SOF_NOTSENT_LOWAT
;
1726 error
= mptcp_set_notsent_lowat(mpte
,
1733 error
= ENOPROTOOPT
;
1738 if ((error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
1739 sizeof (optval
))) != 0)
1743 /* search for an existing one; if not found, allocate */
1744 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) == NULL
)
1745 mpo
= mptcp_sopt_alloc(M_WAITOK
);
1750 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1751 "%s: mp_so 0x%llx sopt %s "
1752 "val %d %s\n", __func__
,
1753 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1754 mptcp_sopt2str(level
, optname
, buf
,
1755 sizeof (buf
)), optval
,
1756 (mpo
->mpo_flags
& MPOF_ATTACHED
) ?
1757 "updated" : "recorded"),
1758 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1760 /* initialize or update, as needed */
1761 mpo
->mpo_intval
= optval
;
1762 if (!(mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1763 mpo
->mpo_level
= level
;
1764 mpo
->mpo_name
= optname
;
1765 mptcp_sopt_insert(mpte
, mpo
);
1767 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1768 /* this can be issued on the subflow socket */
1769 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1772 bzero(&smpo
, sizeof (smpo
));
1774 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1775 mpo
->mpo_level
= level
;
1776 mpo
->mpo_name
= optname
;
1777 mpo
->mpo_intval
= optval
;
1779 VERIFY(mpo
== NULL
|| error
== 0);
1781 /* issue this socket option on existing subflows */
1783 error
= mptcp_setopt_apply(mpte
, mpo
);
1784 if (error
!= 0 && (mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1785 VERIFY(mpo
!= &smpo
);
1786 mptcp_sopt_remove(mpte
, mpo
);
1787 mptcp_sopt_free(mpo
);
1790 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
1793 if (error
== 0 && mpo
!= NULL
) {
1794 mptcplog((LOG_ERR
, "MPTCP Socket: "
1795 "%s: mp_so 0x%llx sopt %s val %d set %s\n",
1796 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1797 mptcp_sopt2str(level
, optname
, buf
,
1798 sizeof (buf
)), optval
, (mpo
->mpo_flags
& MPOF_INTERIM
) ?
1799 "pending" : "successful"),
1800 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1801 } else if (error
!= 0) {
1802 mptcplog((LOG_ERR
, "MPTCP Socket: "
1803 "%s: mp_so 0x%llx sopt %s can't be issued "
1804 "error %d\n", __func__
,
1805 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mptcp_sopt2str(level
,
1806 optname
, buf
, sizeof (buf
)), error
),
1807 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1813 * Handle SOPT_GET for socket options issued on MP socket.
1816 mptcp_getopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1818 int error
= 0, optval
;
1820 VERIFY(sopt
->sopt_dir
== SOPT_GET
);
1821 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1824 * We only handle SOPT_GET for TCP level socket options; we should
1825 * not get here for socket level options since they are already
1826 * handled at the socket layer.
1828 if (sopt
->sopt_level
!= IPPROTO_TCP
) {
1829 error
= ENOPROTOOPT
;
1833 switch (sopt
->sopt_name
) {
1835 case TCP_RXT_FINDROP
:
1839 case TCP_CONNECTIONTIMEOUT
:
1840 case TCP_RXT_CONNDROPTIME
:
1841 case PERSIST_TIMEOUT
:
1842 case TCP_NOTSENT_LOWAT
:
1843 /* eligible; get the default value just in case */
1844 error
= mptcp_default_tcp_optval(mpte
, sopt
, &optval
);
1848 error
= ENOPROTOOPT
;
1852 switch (sopt
->sopt_name
) {
1853 case TCP_NOTSENT_LOWAT
:
1854 if (mpte
->mpte_mppcb
->mpp_socket
->so_flags
& SOF_NOTSENT_LOWAT
)
1855 optval
= mptcp_get_notsent_lowat(mpte
);
1862 * Search for a previously-issued TCP level socket option and
1863 * return the recorded option value. This assumes that the
1864 * value did not get modified by the lower layer after it was
1865 * issued at setsockopt(2) time. If not found, we'll return
1866 * the default value obtained ealier.
1871 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) != NULL
)
1872 optval
= mpo
->mpo_intval
;
1874 error
= sooptcopyout(sopt
, &optval
, sizeof (int));
1881 * Return default values for TCP socket options. Ideally we would query the
1882 * subflow TCP socket, but that requires creating a subflow socket before
1883 * connectx(2) time. To simplify things, just return the default values
1887 mptcp_default_tcp_optval(struct mptses
*mpte
, struct sockopt
*sopt
, int *optval
)
1891 VERIFY(sopt
->sopt_level
== IPPROTO_TCP
);
1892 VERIFY(sopt
->sopt_dir
== SOPT_GET
);
1893 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1895 /* try to do what tcp_newtcpcb() does */
1896 switch (sopt
->sopt_name
) {
1898 case TCP_RXT_FINDROP
:
1901 case TCP_CONNECTIONTIMEOUT
:
1902 case TCP_RXT_CONNDROPTIME
:
1903 case TCP_NOTSENT_LOWAT
:
1908 *optval
= mptcp_subflow_keeptime
;
1911 case PERSIST_TIMEOUT
:
1912 *optval
= tcp_max_persist_timeout
;
1916 error
= ENOPROTOOPT
;
1923 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1924 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1925 * to those that are allowed by mptcp_usr_socheckopt().
1928 mptcp_ctloutput(struct socket
*mp_so
, struct sockopt
*sopt
)
1930 struct mppcb
*mpp
= sotomppcb(mp_so
);
1931 struct mptses
*mpte
;
1934 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1938 mpte
= mptompte(mpp
);
1939 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1941 /* we only handle socket and TCP-level socket options for MPTCP */
1942 if (sopt
->sopt_level
!= SOL_SOCKET
&& sopt
->sopt_level
!= IPPROTO_TCP
) {
1944 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1945 "%s: mp_so 0x%llx sopt %s level not "
1946 "handled\n", __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1947 mptcp_sopt2str(sopt
->sopt_level
,
1948 sopt
->sopt_name
, buf
, sizeof (buf
))),
1949 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1954 switch (sopt
->sopt_dir
) {
1956 error
= mptcp_setopt(mpte
, sopt
);
1960 error
= mptcp_getopt(mpte
, sopt
);
1968 * Return a string representation of <sopt_level,sopt_name>
1971 mptcp_sopt2str(int level
, int optname
, char *dst
, int size
)
1973 char lbuf
[32], obuf
[32];
1974 const char *l
= lbuf
, *o
= obuf
;
1976 (void) snprintf(lbuf
, sizeof (lbuf
), "0x%x", level
);
1977 (void) snprintf(obuf
, sizeof (obuf
), "0x%x", optname
);
1987 o
= "SO_LINGER_SEC";
1995 case SO_USELOOPBACK
:
1996 o
= "SO_USELOOPBACK";
2037 case SO_RESTRICTIONS
:
2038 o
= "SO_RESTRICTIONS";
2047 o
= "SO_RANDOMPORT";
2049 case SO_TRAFFIC_CLASS
:
2050 o
= "SO_TRAFFIC_CLASS";
2052 case SO_RECV_TRAFFIC_CLASS
:
2053 o
= "SO_RECV_TRAFFIC_CLASS";
2055 case SO_TRAFFIC_CLASS_DBG
:
2056 o
= "SO_TRAFFIC_CLASS_DBG";
2058 case SO_PRIVILEGED_TRAFFIC_CLASS
:
2059 o
= "SO_PRIVILEGED_TRAFFIC_CLASS";
2067 case SO_OPPORTUNISTIC
:
2068 o
= "SO_OPPORTUNISTIC";
2074 o
= "SO_RECV_ANYIF";
2076 case SO_NOWAKEFROMSLEEP
:
2077 o
= "SO_NOWAKEFROMSLEEP";
2079 case SO_MPTCP_FASTJOIN
:
2080 o
= "SO_MPTCP_FASTJOIN";
2082 case SO_NOAPNFALLBK
:
2083 o
= "SO_NOAPNFALLBK";
2091 o
= "TCP_KEEPALIVE";
2094 o
= "TCP_KEEPINTVL";
2099 case TCP_CONNECTIONTIMEOUT
:
2100 o
= "TCP_CONNECTIONTIMEOUT";
2102 case TCP_RXT_CONNDROPTIME
:
2103 o
= "TCP_RXT_CONNDROPTIME";
2105 case PERSIST_TIMEOUT
:
2106 o
= "PERSIST_TIMEOUT";
2112 (void) snprintf(dst
, size
, "<%s,%s>", l
, o
);
2117 mptcp_usr_preconnect(struct socket
*mp_so
)
2119 struct mptsub
*mpts
= NULL
;
2120 struct mppcb
*mpp
= sotomppcb(mp_so
);
2121 struct mptses
*mpte
;
2123 struct tcpcb
*tp
= NULL
;
2125 mpte
= mptompte(mpp
);
2126 VERIFY(mpte
!= NULL
);
2127 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2129 mpts
= mptcp_get_subflow(mpte
, NULL
, NULL
);
2131 mptcplog((LOG_ERR
, "MPTCP Socket: "
2132 "%s: mp_so 0x%llx invalid preconnect ", __func__
,
2133 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
2134 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
2138 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
2139 so
= mpts
->mpts_socket
;
2141 tp
= intotcpcb(sotoinpcb(so
));
2142 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
2143 int error
= tcp_output(sototcpcb(so
));
2144 socket_unlock(so
, 0);
2146 mp_so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;