2 * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
42 #include <netinet/in.h>
43 #include <netinet/in_var.h>
44 #include <netinet/tcp.h>
45 #include <netinet/tcp_fsm.h>
46 #include <netinet/tcp_seq.h>
47 #include <netinet/tcp_var.h>
48 #include <netinet/tcp_timer.h>
49 #include <netinet/mptcp_var.h>
50 #include <netinet/mptcp_timer.h>
54 static int mptcp_usr_attach(struct socket
*, int, struct proc
*);
55 static int mptcp_usr_detach(struct socket
*);
56 static int mptcp_attach(struct socket
*, struct proc
*);
57 static int mptcp_detach(struct socket
*, struct mppcb
*);
58 static int mptcp_connectx(struct mptses
*, struct sockaddr_list
**,
59 struct sockaddr_list
**, struct proc
*, uint32_t, sae_associd_t
,
60 sae_connid_t
*, uint32_t, void *, uint32_t);
61 static int mptcp_usr_connectx(struct socket
*, struct sockaddr_list
**,
62 struct sockaddr_list
**, struct proc
*, uint32_t, sae_associd_t
,
63 sae_connid_t
*, uint32_t, void *, uint32_t, struct uio
*, user_ssize_t
*);
64 static int mptcp_getassocids(struct mptses
*, uint32_t *, user_addr_t
);
65 static int mptcp_getconnids(struct mptses
*, sae_associd_t
, uint32_t *,
67 static int mptcp_getconninfo(struct mptses
*, sae_connid_t
*, uint32_t *,
68 uint32_t *, int32_t *, user_addr_t
, socklen_t
*, user_addr_t
, socklen_t
*,
69 uint32_t *, user_addr_t
, uint32_t *);
70 static int mptcp_usr_control(struct socket
*, u_long
, caddr_t
, struct ifnet
*,
72 static int mptcp_disconnectx(struct mptses
*, sae_associd_t
, sae_connid_t
);
73 static int mptcp_usr_disconnect(struct socket
*);
74 static int mptcp_usr_disconnectx(struct socket
*, sae_associd_t
, sae_connid_t
);
75 static struct mptses
*mptcp_usrclosed(struct mptses
*);
76 static int mptcp_usr_peeloff(struct socket
*, sae_associd_t
, struct socket
**);
77 static int mptcp_peeloff(struct mptses
*, sae_associd_t
, struct socket
**);
78 static int mptcp_usr_rcvd(struct socket
*, int);
79 static int mptcp_usr_send(struct socket
*, int, struct mbuf
*,
80 struct sockaddr
*, struct mbuf
*, struct proc
*);
81 static int mptcp_usr_shutdown(struct socket
*);
82 static int mptcp_uiotombuf(struct uio
*, int, int, uint32_t, struct mbuf
**);
83 static int mptcp_usr_sosend(struct socket
*, struct sockaddr
*, struct uio
*,
84 struct mbuf
*, struct mbuf
*, int);
85 static int mptcp_usr_socheckopt(struct socket
*, struct sockopt
*);
86 static int mptcp_setopt_apply(struct mptses
*, struct mptopt
*);
87 static int mptcp_setopt(struct mptses
*, struct sockopt
*);
88 static int mptcp_getopt(struct mptses
*, struct sockopt
*);
89 static int mptcp_default_tcp_optval(struct mptses
*, struct sockopt
*, int *);
90 static void mptcp_connorder_helper(struct mptsub
*mpts
);
91 static int mptcp_usr_preconnect(struct socket
*so
);
93 struct pr_usrreqs mptcp_usrreqs
= {
94 .pru_attach
= mptcp_usr_attach
,
95 .pru_connectx
= mptcp_usr_connectx
,
96 .pru_control
= mptcp_usr_control
,
97 .pru_detach
= mptcp_usr_detach
,
98 .pru_disconnect
= mptcp_usr_disconnect
,
99 .pru_disconnectx
= mptcp_usr_disconnectx
,
100 .pru_peeloff
= mptcp_usr_peeloff
,
101 .pru_rcvd
= mptcp_usr_rcvd
,
102 .pru_send
= mptcp_usr_send
,
103 .pru_shutdown
= mptcp_usr_shutdown
,
104 .pru_sosend
= mptcp_usr_sosend
,
105 .pru_soreceive
= soreceive
,
106 .pru_socheckopt
= mptcp_usr_socheckopt
,
107 .pru_preconnect
= mptcp_usr_preconnect
,
111 * Attaches an MPTCP control block to a socket.
114 mptcp_usr_attach(struct socket
*mp_so
, int proto
, struct proc
*p
)
116 #pragma unused(proto)
119 VERIFY(sotomppcb(mp_so
) == NULL
);
121 error
= mptcp_attach(mp_so
, p
);
127 * Might want to use a different SO_LINGER timeout than TCP's?
129 if ((mp_so
->so_options
& SO_LINGER
) && mp_so
->so_linger
== 0)
130 mp_so
->so_linger
= TCP_LINGERTIME
* hz
;
136 * Detaches an MPTCP control block from a socket.
139 mptcp_usr_detach(struct socket
*mp_so
)
141 struct mppcb
*mpp
= sotomppcb(mp_so
);
145 VERIFY(mpp
->mpp_socket
!= NULL
);
147 error
= mptcp_detach(mp_so
, mpp
);
152 * Attach MPTCP protocol to socket, allocating MP control block,
153 * MPTCP session, control block, buffer space, etc.
156 mptcp_attach(struct socket
*mp_so
, struct proc
*p
)
159 struct mptses
*mpte
= NULL
;
160 struct mptcb
*mp_tp
= NULL
;
161 struct mppcb
*mpp
= NULL
;
164 if (mp_so
->so_snd
.sb_hiwat
== 0 || mp_so
->so_rcv
.sb_hiwat
== 0) {
165 error
= soreserve(mp_so
, tcp_sendspace
, MPTCP_RWIN_MAX
);
170 if (mp_so
->so_snd
.sb_preconn_hiwat
== 0) {
171 soreserve_preconnect(mp_so
, 2048);
175 * MPTCP socket buffers cannot be compressed, due to the
176 * fact that each mbuf chained via m_next is a M_PKTHDR
177 * which carries some MPTCP metadata.
179 mp_so
->so_snd
.sb_flags
|= SB_NOCOMPRESS
;
180 mp_so
->so_rcv
.sb_flags
|= SB_NOCOMPRESS
;
182 /* Disable socket buffer auto-tuning. */
183 mp_so
->so_rcv
.sb_flags
&= ~SB_AUTOSIZE
;
184 mp_so
->so_snd
.sb_flags
&= ~SB_AUTOSIZE
;
186 if ((error
= mp_pcballoc(mp_so
, &mtcbinfo
)) != 0) {
190 mpp
= sotomppcb(mp_so
);
192 mpte
= (struct mptses
*)mpp
->mpp_pcbe
;
193 VERIFY(mpte
!= NULL
);
194 mp_tp
= mpte
->mpte_mptcb
;
195 VERIFY(mp_tp
!= NULL
);
201 * Called when the socket layer loses its final reference to the socket;
202 * at this point, there is only one case in which we will keep things
206 mptcp_detach(struct socket
*mp_so
, struct mppcb
*mpp
)
209 struct mppcbinfo
*mppi
;
211 VERIFY(mp_so
->so_pcb
== mpp
);
212 VERIFY(mpp
->mpp_socket
== mp_so
);
214 mppi
= mpp
->mpp_pcbinfo
;
215 VERIFY(mppi
!= NULL
);
217 __IGNORE_WCASTALIGN(mpte
= &((struct mpp_mtp
*)mpp
)->mpp_ses
);
218 VERIFY(mpte
->mpte_mppcb
== mpp
);
220 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
223 * We are done with this MPTCP socket (it has been closed);
224 * trigger all subflows to be disconnected, if not already,
225 * by initiating the PCB detach sequence (SOF_PCBCLEARING
230 (void) mptcp_disconnectx(mpte
, SAE_ASSOCID_ALL
, SAE_CONNID_ALL
);
235 * Here, we would want to handle time wait state.
242 * Common subroutine to open a MPTCP connection to one of the remote hosts
243 * specified by dst_sl. This includes allocating and establishing a
244 * subflow TCP connection, either initially to establish MPTCP connection,
245 * or to join an existing one. Returns a connection handle upon success.
248 mptcp_connectx(struct mptses
*mpte
, struct sockaddr_list
**src_sl
,
249 struct sockaddr_list
**dst_sl
, struct proc
*p
, uint32_t ifscope
,
250 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
253 #pragma unused(p, aid, flags, arg, arglen)
255 struct socket
*mp_so
;
258 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
259 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
261 VERIFY(dst_sl
!= NULL
&& *dst_sl
!= NULL
);
262 VERIFY(pcid
!= NULL
);
264 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
265 "%s: mp_so 0x%llx\n", __func__
,
266 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
267 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
269 DTRACE_MPTCP3(connectx
, struct mptses
*, mpte
, sae_associd_t
, aid
,
270 struct socket
*, mp_so
);
272 mpts
= mptcp_subflow_alloc(M_WAITOK
);
277 MPTS_ADDREF(mpts
); /* for this routine */
279 if (src_sl
!= NULL
) {
280 mpts
->mpts_src_sl
= *src_sl
;
283 mpts
->mpts_dst_sl
= *dst_sl
;
286 error
= mptcp_subflow_add(mpte
, mpts
, p
, ifscope
);
287 if (error
== 0 && pcid
!= NULL
)
288 *pcid
= mpts
->mpts_connid
;
292 if ((error
!= 0) && (error
!= EWOULDBLOCK
)) {
294 if (mpts
->mpts_flags
& MPTSF_ATTACHED
) {
297 mptcp_subflow_del(mpte
, mpts
, TRUE
);
309 * User-protocol pru_connectx callback.
312 mptcp_usr_connectx(struct socket
*mp_so
, struct sockaddr_list
**src_sl
,
313 struct sockaddr_list
**dst_sl
, struct proc
*p
, uint32_t ifscope
,
314 sae_associd_t aid
, sae_connid_t
*pcid
, uint32_t flags
, void *arg
,
315 uint32_t arglen
, struct uio
*auio
, user_ssize_t
*bytes_written
)
317 struct mppcb
*mpp
= sotomppcb(mp_so
);
318 struct mptses
*mpte
= NULL
;
319 struct mptcb
*mp_tp
= NULL
;
320 user_ssize_t datalen
;
324 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
328 mpte
= mptompte(mpp
);
329 VERIFY(mpte
!= NULL
);
331 mp_tp
= mpte
->mpte_mptcb
;
332 VERIFY(mp_tp
!= NULL
);
334 if (mp_tp
->mpt_flags
& MPTCPF_FALLBACK_TO_TCP
) {
339 error
= mptcp_connectx(mpte
, src_sl
, dst_sl
, p
, ifscope
,
340 aid
, pcid
, flags
, arg
, arglen
);
342 /* If there is data, copy it */
344 datalen
= uio_resid(auio
);
345 socket_unlock(mp_so
, 0);
346 error
= mp_so
->so_proto
->pr_usrreqs
->pru_sosend(mp_so
, NULL
,
347 (uio_t
) auio
, NULL
, NULL
, 0);
348 /* check if this can be supported with fast Join also. XXX */
349 if (error
== 0 || error
== EWOULDBLOCK
)
350 *bytes_written
= datalen
- uio_resid(auio
);
352 if (error
== EWOULDBLOCK
)
355 socket_lock(mp_so
, 0);
357 if (mp_tp
->mpt_flags
& MPTCPF_PEEL_OFF
) {
358 *bytes_written
= datalen
- uio_resid(auio
);
360 * Override errors like EPIPE that occur as
361 * a result of doing TFO during TCP fallback.
373 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
376 mptcp_getassocids(struct mptses
*mpte
, uint32_t *cnt
, user_addr_t aidp
)
378 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
380 /* MPTCP has at most 1 association */
381 *cnt
= (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) ? 1 : 0;
383 /* just asking how many there are? */
384 if (aidp
== USER_ADDR_NULL
)
387 return (copyout(&mpte
->mpte_associd
, aidp
,
388 sizeof (mpte
->mpte_associd
)));
392 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
395 mptcp_getconnids(struct mptses
*mpte
, sae_associd_t aid
, uint32_t *cnt
,
401 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
403 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
404 aid
!= mpte
->mpte_associd
)
407 *cnt
= mpte
->mpte_numflows
;
409 /* just asking how many there are? */
410 if (cidp
== USER_ADDR_NULL
)
413 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
414 if ((error
= copyout(&mpts
->mpts_connid
, cidp
,
415 sizeof (mpts
->mpts_connid
))) != 0)
418 cidp
+= sizeof (mpts
->mpts_connid
);
425 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
428 mptcp_getconninfo(struct mptses
*mpte
, sae_connid_t
*cid
, uint32_t *flags
,
429 uint32_t *ifindex
, int32_t *soerror
, user_addr_t src
, socklen_t
*src_len
,
430 user_addr_t dst
, socklen_t
*dst_len
, uint32_t *aux_type
,
431 user_addr_t aux_data
, uint32_t *aux_len
)
433 #pragma unused(aux_data)
434 struct sockaddr_entry
*se
;
435 struct ifnet
*ifp
= NULL
;
439 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
441 if (*cid
== SAE_CONNID_ALL
)
444 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
445 if (mpts
->mpts_connid
== *cid
|| *cid
== SAE_CONNID_ANY
)
449 return ((*cid
== SAE_CONNID_ANY
) ? ENXIO
: EINVAL
);
452 ifp
= mpts
->mpts_outif
;
453 *cid
= mpts
->mpts_connid
;
454 *ifindex
= ((ifp
!= NULL
) ? ifp
->if_index
: 0);
455 *soerror
= mpts
->mpts_soerror
;
457 if (mpts
->mpts_flags
& MPTSF_CONNECTING
)
458 *flags
|= CIF_CONNECTING
;
459 if (mpts
->mpts_flags
& MPTSF_CONNECTED
)
460 *flags
|= CIF_CONNECTED
;
461 if (mpts
->mpts_flags
& MPTSF_DISCONNECTING
)
462 *flags
|= CIF_DISCONNECTING
;
463 if (mpts
->mpts_flags
& MPTSF_DISCONNECTED
)
464 *flags
|= CIF_DISCONNECTED
;
465 if (mpts
->mpts_flags
& MPTSF_BOUND_IF
)
466 *flags
|= CIF_BOUND_IF
;
467 if (mpts
->mpts_flags
& MPTSF_BOUND_IP
)
468 *flags
|= CIF_BOUND_IP
;
469 if (mpts
->mpts_flags
& MPTSF_BOUND_PORT
)
470 *flags
|= CIF_BOUND_PORT
;
471 if (mpts
->mpts_flags
& MPTSF_PREFERRED
)
472 *flags
|= CIF_PREFERRED
;
473 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
)
474 *flags
|= CIF_MP_CAPABLE
;
475 if (mpts
->mpts_flags
& MPTSF_MP_DEGRADED
)
476 *flags
|= CIF_MP_DEGRADED
;
477 if (mpts
->mpts_flags
& MPTSF_MP_READY
)
478 *flags
|= CIF_MP_READY
;
479 if (mpts
->mpts_flags
& MPTSF_ACTIVE
)
480 *flags
|= CIF_MP_ACTIVE
;
482 VERIFY(mpts
->mpts_src_sl
!= NULL
);
483 se
= TAILQ_FIRST(&mpts
->mpts_src_sl
->sl_head
);
484 VERIFY(se
!= NULL
&& se
->se_addr
!= NULL
);
485 *src_len
= se
->se_addr
->sa_len
;
486 if (src
!= USER_ADDR_NULL
) {
487 error
= copyout(se
->se_addr
, src
, se
->se_addr
->sa_len
);
492 VERIFY(mpts
->mpts_dst_sl
!= NULL
);
493 se
= TAILQ_FIRST(&mpts
->mpts_dst_sl
->sl_head
);
494 VERIFY(se
!= NULL
&& se
->se_addr
!= NULL
);
495 *dst_len
= se
->se_addr
->sa_len
;
496 if (dst
!= USER_ADDR_NULL
) {
497 error
= copyout(se
->se_addr
, dst
, se
->se_addr
->sa_len
);
504 if (mpts
->mpts_socket
!= NULL
) {
505 struct conninfo_tcp tcp_ci
;
507 *aux_type
= CIAUX_TCP
;
508 *aux_len
= sizeof (tcp_ci
);
510 if (aux_data
!= USER_ADDR_NULL
) {
511 struct socket
*so
= mpts
->mpts_socket
;
513 VERIFY(SOCK_PROTO(so
) == IPPROTO_TCP
);
514 bzero(&tcp_ci
, sizeof (tcp_ci
));
516 tcp_getconninfo(so
, &tcp_ci
);
517 socket_unlock(so
, 0);
518 error
= copyout(&tcp_ci
, aux_data
, sizeof (tcp_ci
));
523 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
524 "%s: cid %d flags %x \n",
525 __func__
, mpts
->mpts_connid
, mpts
->mpts_flags
),
526 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
534 * Handle SIOCSCONNORDER
537 mptcp_setconnorder(struct mptses
*mpte
, sae_connid_t cid
, uint32_t rank
)
539 struct mptsub
*mpts
, *mpts1
;
542 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
543 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
544 "%s: cid %d rank %d \n", __func__
, cid
, rank
),
545 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_VERBOSE
);
547 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
552 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
553 if (mpts
->mpts_connid
== cid
)
561 if (rank
== 0 || rank
> 1) {
563 * If rank is 0, determine whether this should be the
564 * primary or backup subflow, depending on what we have.
566 * Otherwise, if greater than 0, make it a backup flow.
568 TAILQ_FOREACH(mpts1
, &mpte
->mpte_subflows
, mpts_entry
) {
570 if (mpts1
->mpts_flags
& MPTSF_PREFERRED
) {
578 mpts
->mpts_flags
&= ~MPTSF_PREFERRED
;
579 mpts
->mpts_rank
= rank
;
580 if (mpts1
!= NULL
&& mpts
!= mpts1
) {
581 /* preferred subflow found; set rank as necessary */
583 mpts
->mpts_rank
= (mpts1
->mpts_rank
+ 1);
584 } else if (rank
== 0) {
585 /* no preferred one found; promote this */
593 * If rank is 1, promote this subflow to be preferred.
595 TAILQ_FOREACH(mpts1
, &mpte
->mpte_subflows
, mpts_entry
) {
598 (mpts1
->mpts_flags
& MPTSF_PREFERRED
)) {
599 mpts1
->mpts_flags
&= ~MPTSF_PREFERRED
;
600 if (mpte
->mpte_nummpcapflows
> 1)
601 mptcp_connorder_helper(mpts1
);
602 } else if (mpts1
== mpts
) {
603 mpts1
->mpts_rank
= 1;
604 if (mpts1
->mpts_flags
& MPTSF_MP_CAPABLE
) {
605 mpts1
->mpts_flags
|= MPTSF_PREFERRED
;
606 if (mpte
->mpte_nummpcapflows
> 1)
607 mptcp_connorder_helper(mpts1
);
619 mptcp_connorder_helper(struct mptsub
*mpts
)
621 struct socket
*so
= mpts
->mpts_socket
;
622 struct tcpcb
*tp
= NULL
;
626 tp
= intotcpcb(sotoinpcb(so
));
627 tp
->t_mpflags
|= TMPF_SND_MPPRIO
;
628 if (mpts
->mpts_flags
& MPTSF_PREFERRED
)
629 tp
->t_mpflags
&= ~TMPF_BACKUP_PATH
;
631 tp
->t_mpflags
|= TMPF_BACKUP_PATH
;
633 socket_unlock(so
, 0);
638 * Handle SIOCSGONNORDER
641 mptcp_getconnorder(struct mptses
*mpte
, sae_connid_t cid
, uint32_t *rank
)
646 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
647 VERIFY(rank
!= NULL
);
650 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
655 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
656 if (mpts
->mpts_connid
== cid
)
665 *rank
= mpts
->mpts_rank
;
672 * User-protocol pru_control callback.
675 mptcp_usr_control(struct socket
*mp_so
, u_long cmd
, caddr_t data
,
676 struct ifnet
*ifp
, struct proc
*p
)
678 #pragma unused(ifp, p)
679 struct mppcb
*mpp
= sotomppcb(mp_so
);
683 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
687 mpte
= mptompte(mpp
);
688 VERIFY(mpte
!= NULL
);
690 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
693 case SIOCGASSOCIDS32
: { /* struct so_aidreq32 */
694 struct so_aidreq32 aidr
;
695 bcopy(data
, &aidr
, sizeof (aidr
));
696 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
699 bcopy(&aidr
, data
, sizeof (aidr
));
703 case SIOCGASSOCIDS64
: { /* struct so_aidreq64 */
704 struct so_aidreq64 aidr
;
705 bcopy(data
, &aidr
, sizeof (aidr
));
706 error
= mptcp_getassocids(mpte
, &aidr
.sar_cnt
,
709 bcopy(&aidr
, data
, sizeof (aidr
));
713 case SIOCGCONNIDS32
: { /* struct so_cidreq32 */
714 struct so_cidreq32 cidr
;
715 bcopy(data
, &cidr
, sizeof (cidr
));
716 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
719 bcopy(&cidr
, data
, sizeof (cidr
));
723 case SIOCGCONNIDS64
: { /* struct so_cidreq64 */
724 struct so_cidreq64 cidr
;
725 bcopy(data
, &cidr
, sizeof (cidr
));
726 error
= mptcp_getconnids(mpte
, cidr
.scr_aid
, &cidr
.scr_cnt
,
729 bcopy(&cidr
, data
, sizeof (cidr
));
733 case SIOCGCONNINFO32
: { /* struct so_cinforeq32 */
734 struct so_cinforeq32 cifr
;
735 bcopy(data
, &cifr
, sizeof (cifr
));
736 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
737 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
738 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
739 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
742 bcopy(&cifr
, data
, sizeof (cifr
));
746 case SIOCGCONNINFO64
: { /* struct so_cinforeq64 */
747 struct so_cinforeq64 cifr
;
748 bcopy(data
, &cifr
, sizeof (cifr
));
749 error
= mptcp_getconninfo(mpte
, &cifr
.scir_cid
,
750 &cifr
.scir_flags
, &cifr
.scir_ifindex
, &cifr
.scir_error
,
751 cifr
.scir_src
, &cifr
.scir_src_len
, cifr
.scir_dst
,
752 &cifr
.scir_dst_len
, &cifr
.scir_aux_type
, cifr
.scir_aux_data
,
755 bcopy(&cifr
, data
, sizeof (cifr
));
759 case SIOCSCONNORDER
: { /* struct so_cordreq */
760 struct so_cordreq cor
;
761 bcopy(data
, &cor
, sizeof (cor
));
762 error
= mptcp_setconnorder(mpte
, cor
.sco_cid
, cor
.sco_rank
);
764 bcopy(&cor
, data
, sizeof (cor
));
768 case SIOCGCONNORDER
: { /* struct so_cordreq */
769 struct so_cordreq cor
;
770 bcopy(data
, &cor
, sizeof (cor
));
771 error
= mptcp_getconnorder(mpte
, cor
.sco_cid
, &cor
.sco_rank
);
773 bcopy(&cor
, data
, sizeof (cor
));
786 * Initiate a disconnect. MPTCP-level disconnection is specified by
787 * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow
788 * connection while keeping the MPTCP-level connection (association).
791 mptcp_disconnectx(struct mptses
*mpte
, sae_associd_t aid
, sae_connid_t cid
)
794 struct socket
*mp_so
;
798 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
800 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
801 mp_tp
= mpte
->mpte_mptcb
;
803 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
804 "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__
,
805 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), aid
, cid
, mp_so
->so_error
),
806 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
808 DTRACE_MPTCP5(disconnectx
, struct mptses
*, mpte
, sae_associd_t
, aid
,
809 sae_connid_t
, cid
, struct socket
*, mp_so
, struct mptcb
*, mp_tp
);
811 VERIFY(aid
== SAE_ASSOCID_ANY
|| aid
== SAE_ASSOCID_ALL
||
812 aid
== mpte
->mpte_associd
);
814 /* terminate the association? */
815 if (cid
== SAE_CONNID_ANY
|| cid
== SAE_CONNID_ALL
) {
816 /* if we're not detached, go thru socket state checks */
817 if (!(mp_so
->so_flags
& SOF_PCBCLEARING
)) {
818 if (!(mp_so
->so_state
& (SS_ISCONNECTED
|
823 if (mp_so
->so_state
& SS_ISDISCONNECTING
) {
829 mptcp_cancel_all_timers(mp_tp
);
830 if (mp_tp
->mpt_state
< MPTCPS_ESTABLISHED
) {
831 (void) mptcp_close(mpte
, mp_tp
);
833 } else if ((mp_so
->so_options
& SO_LINGER
) &&
834 mp_so
->so_linger
== 0) {
835 (void) mptcp_drop(mpte
, mp_tp
, 0);
839 soisdisconnecting(mp_so
);
840 sbflush(&mp_so
->so_rcv
);
841 if (mptcp_usrclosed(mpte
) != NULL
)
842 (void) mptcp_output(mpte
);
845 bool disconnect_embryonic_subflows
= false;
846 struct socket
*so
= NULL
;
848 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
849 if (mpts
->mpts_connid
!= cid
)
854 * Check if disconnected subflow is the one used
855 * to initiate MPTCP connection.
856 * If it is and the connection is not yet join ready
857 * disconnect all other subflows.
859 so
= mpts
->mpts_socket
;
860 if (!(mp_tp
->mpt_flags
& MPTCPF_JOIN_READY
) &&
861 so
&& !(so
->so_flags
& SOF_MP_SEC_SUBFLOW
)) {
862 disconnect_embryonic_subflows
= true;
865 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
866 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
876 if (disconnect_embryonic_subflows
) {
877 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
878 if (mpts
->mpts_connid
== cid
)
881 mptcp_subflow_disconnect(mpte
, mpts
, TRUE
);
888 mptcp_thread_signal(mpte
);
890 if ((mp_so
->so_state
& (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) ==
891 (SS_CANTRCVMORE
| SS_CANTSENDMORE
)) {
892 /* the socket has been shutdown, no more sockopt's */
893 mptcp_flush_sopts(mpte
);
901 * Wrapper function to support disconnect on socket
904 mptcp_usr_disconnect(struct socket
*mp_so
)
908 error
= mptcp_usr_disconnectx(mp_so
, SAE_ASSOCID_ALL
, SAE_CONNID_ALL
);
913 * User-protocol pru_disconnectx callback.
916 mptcp_usr_disconnectx(struct socket
*mp_so
, sae_associd_t aid
, sae_connid_t cid
)
918 struct mppcb
*mpp
= sotomppcb(mp_so
);
922 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
926 mpte
= mptompte(mpp
);
927 VERIFY(mpte
!= NULL
);
928 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
930 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
&&
931 aid
!= mpte
->mpte_associd
) {
936 error
= mptcp_disconnectx(mpte
, aid
, cid
);
942 * User issued close, and wish to trail thru shutdown states.
944 static struct mptses
*
945 mptcp_usrclosed(struct mptses
*mpte
)
947 struct socket
*mp_so
;
951 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
952 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
953 mp_tp
= mpte
->mpte_mptcb
;
956 mptcp_close_fsm(mp_tp
, MPCE_CLOSE
);
958 if (mp_tp
->mpt_state
== MPTCPS_CLOSED
) {
959 mpte
= mptcp_close(mpte
, mp_tp
);
961 } else if (mp_tp
->mpt_state
>= MPTCPS_FIN_WAIT_2
) {
963 soisdisconnected(mp_so
);
964 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
966 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
972 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
974 mpts
->mpts_flags
|= MPTSF_USER_DISCONNECT
;
975 mptcp_subflow_disconnect(mpte
, mpts
, FALSE
);
984 * User-protocol pru_peeloff callback.
987 mptcp_usr_peeloff(struct socket
*mp_so
, sae_associd_t aid
, struct socket
**psop
)
989 struct mppcb
*mpp
= sotomppcb(mp_so
);
993 VERIFY(psop
!= NULL
);
995 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
999 mpte
= mptompte(mpp
);
1000 VERIFY(mpte
!= NULL
);
1002 error
= mptcp_peeloff(mpte
, aid
, psop
);
1008 * Transform a previously connected TCP subflow connection which has
1009 * failed to negotiate MPTCP to its own socket which can be externalized
1010 * with a file descriptor. Valid only when the MPTCP socket is not
1011 * yet associated (MPTCP-level connection has not been established.)
1014 mptcp_peeloff(struct mptses
*mpte
, sae_associd_t aid
, struct socket
**psop
)
1016 struct socket
*so
= NULL
, *mp_so
;
1017 struct mptsub
*mpts
;
1020 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1021 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1023 VERIFY(psop
!= NULL
);
1026 DTRACE_MPTCP3(peeloff
, struct mptses
*, mpte
, sae_associd_t
, aid
,
1027 struct socket
*, mp_so
);
1029 /* peeloff cannot happen after an association is established */
1030 if (mpte
->mpte_associd
!= SAE_ASSOCID_ANY
) {
1035 if (aid
!= SAE_ASSOCID_ANY
&& aid
!= SAE_ASSOCID_ALL
) {
1040 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1042 if (mpts
->mpts_flags
& MPTSF_MP_CAPABLE
) {
1043 panic("%s: so %p is MPTCP capable but mp_so %p "
1044 "aid is %d\n", __func__
, so
, mp_so
,
1045 mpte
->mpte_associd
);
1048 MPTS_ADDREF_LOCKED(mpts
); /* for us */
1049 so
= mpts
->mpts_socket
;
1052 * This subflow socket is about to be externalized; make it
1053 * appear as if it has the same properties as the MPTCP socket,
1054 * undo what's done earlier in mptcp_subflow_add().
1056 mptcp_subflow_sopeeloff(mpte
, mpts
, so
);
1059 mptcp_subflow_del(mpte
, mpts
, FALSE
);
1060 MPTS_REMREF(mpts
); /* ours */
1064 * Here we need to make sure the subflow socket is not
1065 * flow controlled; need to clear both INP_FLOW_CONTROLLED
1066 * and INP_FLOW_SUSPENDED on the subflow socket, since
1067 * we will no longer be monitoring its events.
1078 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1079 "%s: mp_so 0x%llx\n", __func__
,
1080 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
1081 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1088 * After a receive, possible send some update to peer.
1091 mptcp_usr_rcvd(struct socket
*mp_so
, int flags
)
1093 #pragma unused(flags)
1094 struct mppcb
*mpp
= sotomppcb(mp_so
);
1095 struct mptses
*mpte
;
1098 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1102 mpte
= mptompte(mpp
);
1103 VERIFY(mpte
!= NULL
);
1105 error
= mptcp_output(mpte
);
1111 * Do a send by putting data in the output queue.
1114 mptcp_usr_send(struct socket
*mp_so
, int prus_flags
, struct mbuf
*m
,
1115 struct sockaddr
*nam
, struct mbuf
*control
, struct proc
*p
)
1117 #pragma unused(nam, p)
1118 struct mppcb
*mpp
= sotomppcb(mp_so
);
1119 struct mptses
*mpte
;
1122 if (prus_flags
& (PRUS_OOB
|PRUS_EOF
)) {
1132 if (control
!= NULL
&& control
->m_len
!= 0) {
1137 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1141 mpte
= mptompte(mpp
);
1142 VERIFY(mpte
!= NULL
);
1144 if (!(mp_so
->so_state
& SS_ISCONNECTED
) &&
1145 (!(mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
))) {
1150 mptcp_insert_dsn(mpp
, m
);
1151 VERIFY(mp_so
->so_snd
.sb_flags
& SB_NOCOMPRESS
);
1152 (void) sbappendstream(&mp_so
->so_snd
, m
);
1156 * XXX: adi@apple.com
1158 * PRUS_MORETOCOME could be set, but we don't check it now.
1160 error
= mptcp_output(mpte
);
1164 if (mp_so
->so_state
& SS_ISCONNECTING
) {
1165 if (mp_so
->so_state
& SS_NBIO
)
1166 error
= EWOULDBLOCK
;
1168 error
= sbwait(&mp_so
->so_snd
);
1175 if (control
!= NULL
)
1182 * Mark the MPTCP connection as being incapable of further output.
1185 mptcp_usr_shutdown(struct socket
*mp_so
)
1187 struct mppcb
*mpp
= sotomppcb(mp_so
);
1188 struct mptses
*mpte
;
1191 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1195 mpte
= mptompte(mpp
);
1196 VERIFY(mpte
!= NULL
);
1198 socantsendmore(mp_so
);
1200 mpte
= mptcp_usrclosed(mpte
);
1202 error
= mptcp_output(mpte
);
1208 * Copy the contents of uio into a properly sized mbuf chain.
1211 mptcp_uiotombuf(struct uio
*uio
, int how
, int space
, uint32_t align
,
1214 struct mbuf
*m
, *mb
, *nm
= NULL
, *mtail
= NULL
;
1215 user_ssize_t resid
, tot
, len
, progress
; /* must be user_ssize_t */
1218 VERIFY(top
!= NULL
&& *top
== NULL
);
1221 * space can be zero or an arbitrary large value bound by
1222 * the total data supplied by the uio.
1224 resid
= uio_resid(uio
);
1226 tot
= imin(resid
, space
);
1231 * The smallest unit is a single mbuf with pkthdr.
1232 * We can't align past it.
1238 * Give us the full allocation or nothing.
1239 * If space is zero return the smallest empty mbuf.
1241 if ((len
= tot
+ align
) == 0)
1244 /* Loop and append maximum sized mbufs to the chain tail. */
1246 uint32_t m_needed
= 1;
1248 if (njcl
> 0 && len
> MBIGCLBYTES
)
1249 mb
= m_getpackets_internal(&m_needed
, 1,
1250 how
, 1, M16KCLBYTES
);
1251 else if (len
> MCLBYTES
)
1252 mb
= m_getpackets_internal(&m_needed
, 1,
1253 how
, 1, MBIGCLBYTES
);
1254 else if (len
>= (signed)MINCLSIZE
)
1255 mb
= m_getpackets_internal(&m_needed
, 1,
1258 mb
= m_gethdr(how
, MT_DATA
);
1260 /* Fail the whole operation if one mbuf can't be allocated. */
1268 VERIFY(mb
->m_flags
& M_PKTHDR
);
1269 len
-= ((mb
->m_flags
& M_EXT
) ? mb
->m_ext
.ext_size
: MHLEN
);
1281 /* Fill all mbufs with uio data and update header information. */
1282 for (mb
= m
; mb
!= NULL
; mb
= mb
->m_next
) {
1283 len
= imin(M_TRAILINGSPACE(mb
), tot
- progress
);
1285 error
= uiomove(mtod(mb
, char *), len
, uio
);
1291 /* each mbuf is M_PKTHDR chained via m_next */
1293 mb
->m_pkthdr
.len
= len
;
1297 VERIFY(progress
== tot
);
1303 * MPTCP socket protocol-user socket send routine, derived from sosend().
1306 mptcp_usr_sosend(struct socket
*mp_so
, struct sockaddr
*addr
, struct uio
*uio
,
1307 struct mbuf
*top
, struct mbuf
*control
, int flags
)
1309 #pragma unused(addr)
1312 int error
, sendflags
;
1313 struct proc
*p
= current_proc();
1316 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1317 if (uio
== NULL
|| top
!= NULL
) {
1321 resid
= uio_resid(uio
);
1323 socket_lock(mp_so
, 1);
1324 so_update_last_owner_locked(mp_so
, p
);
1325 so_update_policy(mp_so
);
1327 VERIFY(mp_so
->so_type
== SOCK_STREAM
);
1328 VERIFY(!(mp_so
->so_flags
& SOF_MP_SUBFLOW
));
1330 if ((flags
& (MSG_OOB
|MSG_DONTROUTE
|MSG_HOLD
|MSG_SEND
|MSG_FLUSH
)) ||
1331 (mp_so
->so_flags
& SOF_ENABLE_MSGS
)) {
1333 socket_unlock(mp_so
, 1);
1338 * In theory resid should be unsigned. However, space must be
1339 * signed, as it might be less than 0 if we over-committed, and we
1340 * must use a signed comparison of space and resid. On the other
1341 * hand, a negative resid causes us to loop sending 0-length
1342 * segments to the protocol.
1344 if (resid
< 0 || (flags
& MSG_EOR
) || control
!= NULL
) {
1346 socket_unlock(mp_so
, 1);
1350 OSIncrementAtomicLong(&p
->p_stats
->p_ru
.ru_msgsnd
);
1353 error
= sosendcheck(mp_so
, NULL
, resid
, 0, 0, flags
,
1358 space
= sbspace(&mp_so
->so_snd
);
1360 socket_unlock(mp_so
, 0);
1362 * Copy the data from userland into an mbuf chain.
1364 error
= mptcp_uiotombuf(uio
, M_WAITOK
, space
, 0, &top
);
1366 socket_lock(mp_so
, 0);
1369 VERIFY(top
!= NULL
);
1370 space
-= resid
- uio_resid(uio
);
1371 resid
= uio_resid(uio
);
1372 socket_lock(mp_so
, 0);
1375 * Compute flags here, for pru_send and NKEs.
1377 sendflags
= (resid
> 0 && space
> 0) ?
1378 PRUS_MORETOCOME
: 0;
1381 * Socket filter processing
1383 VERIFY(control
== NULL
);
1384 error
= sflt_data_out(mp_so
, NULL
, &top
, &control
, 0);
1386 if (error
== EJUSTRETURN
) {
1389 /* always free control if any */
1393 if (control
!= NULL
) {
1399 * Pass data to protocol.
1401 error
= (*mp_so
->so_proto
->pr_usrreqs
->pru_send
)
1402 (mp_so
, sendflags
, top
, NULL
, NULL
, p
);
1407 } while (resid
!= 0 && space
> 0);
1408 } while (resid
!= 0);
1412 sbunlock(&mp_so
->so_snd
, FALSE
); /* will unlock socket */
1414 socket_unlock(mp_so
, 1);
1418 if (control
!= NULL
)
1421 /* clear SOF1_PRECONNECT_DATA after one write */
1422 if (mp_so
->so_flags1
& SOF1_PRECONNECT_DATA
)
1423 mp_so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;
1429 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1430 * This routine simply indicates to the caller whether or not to proceed
1431 * further with the given socket option. This is invoked by sosetoptlock()
1432 * and sogetoptlock().
1435 mptcp_usr_socheckopt(struct socket
*mp_so
, struct sockopt
*sopt
)
1437 #pragma unused(mp_so)
1440 VERIFY(sopt
->sopt_level
== SOL_SOCKET
);
1443 * We could check for sopt_dir (set/get) here, but we'll just
1444 * let the caller deal with it as appropriate; therefore the
1445 * following is a superset of the socket options which we
1446 * allow for set/get.
1448 * XXX: adi@apple.com
1450 * Need to consider the following cases:
1452 * a. In the event peeloff(2) occurs on the subflow socket,
1453 * we may want to issue those options which are now
1454 * handled at the MP socket. In that case, we will need
1455 * to record them in mptcp_setopt() so that they can
1456 * be replayed during peeloff.
1458 * b. Certain socket options don't have a clear definition
1459 * on the expected behavior post connect(2). At the time
1460 * those options are issued on the MP socket, there may
1461 * be existing subflow sockets that are already connected.
1463 switch (sopt
->sopt_name
) {
1464 case SO_LINGER
: /* MP */
1465 case SO_LINGER_SEC
: /* MP */
1466 case SO_TYPE
: /* MP */
1467 case SO_NREAD
: /* MP */
1468 case SO_NWRITE
: /* MP */
1469 case SO_ERROR
: /* MP */
1470 case SO_SNDBUF
: /* MP */
1471 case SO_RCVBUF
: /* MP */
1472 case SO_SNDLOWAT
: /* MP */
1473 case SO_RCVLOWAT
: /* MP */
1474 case SO_SNDTIMEO
: /* MP */
1475 case SO_RCVTIMEO
: /* MP */
1476 case SO_NKE
: /* MP */
1477 case SO_NOSIGPIPE
: /* MP */
1478 case SO_NOADDRERR
: /* MP */
1479 case SO_LABEL
: /* MP */
1480 case SO_PEERLABEL
: /* MP */
1481 case SO_DEFUNCTOK
: /* MP */
1482 case SO_ISDEFUNCT
: /* MP */
1483 case SO_TRAFFIC_CLASS_DBG
: /* MP */
1485 * Tell the caller that these options are to be processed.
1489 case SO_DEBUG
: /* MP + subflow */
1490 case SO_KEEPALIVE
: /* MP + subflow */
1491 case SO_USELOOPBACK
: /* MP + subflow */
1492 case SO_RANDOMPORT
: /* MP + subflow */
1493 case SO_TRAFFIC_CLASS
: /* MP + subflow */
1494 case SO_RECV_TRAFFIC_CLASS
: /* MP + subflow */
1495 case SO_PRIVILEGED_TRAFFIC_CLASS
: /* MP + subflow */
1496 case SO_RECV_ANYIF
: /* MP + subflow */
1497 case SO_RESTRICTIONS
: /* MP + subflow */
1498 case SO_FLUSH
: /* MP + subflow */
1499 case SO_MPTCP_FASTJOIN
: /* MP + subflow */
1500 case SO_NOWAKEFROMSLEEP
:
1501 case SO_NOAPNFALLBK
:
1503 * Tell the caller that these options are to be processed;
1504 * these will also be recorded later by mptcp_setopt().
1506 * NOTE: Only support integer option value for now.
1508 if (sopt
->sopt_valsize
!= sizeof (int))
1514 * Tell the caller to stop immediately and return an error.
1516 error
= ENOPROTOOPT
;
1524 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1527 mptcp_setopt_apply(struct mptses
*mpte
, struct mptopt
*mpo
)
1529 struct socket
*mp_so
;
1530 struct mptsub
*mpts
;
1534 /* just bail now if this isn't applicable to subflow sockets */
1535 if (!(mpo
->mpo_flags
& MPOF_SUBFLOW_OK
)) {
1536 error
= ENOPROTOOPT
;
1541 * Skip those that are handled internally; these options
1542 * should not have been recorded and marked with the
1543 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1545 if (mpo
->mpo_level
== SOL_SOCKET
&&
1546 (mpo
->mpo_name
== SO_NOSIGPIPE
|| mpo
->mpo_name
== SO_NOADDRERR
)) {
1547 error
= ENOPROTOOPT
;
1551 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1552 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1555 * Don't bother going further if there's no subflow; mark the option
1556 * with MPOF_INTERIM so that we know whether or not to remove this
1557 * option upon encountering an error while issuing it during subflow
1560 if (mpte
->mpte_numflows
== 0) {
1561 VERIFY(TAILQ_EMPTY(&mpte
->mpte_subflows
));
1562 mpo
->mpo_flags
|= MPOF_INTERIM
;
1563 /* return success */
1567 bzero(&smpo
, sizeof (smpo
));
1568 smpo
.mpo_flags
|= MPOF_SUBFLOW_OK
;
1569 smpo
.mpo_level
= mpo
->mpo_level
;
1570 smpo
.mpo_name
= mpo
->mpo_name
;
1572 /* grab exisiting values in case we need to rollback */
1573 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1577 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
|MPTSF_SOPT_INPROG
);
1578 mpts
->mpts_oldintval
= 0;
1579 smpo
.mpo_intval
= 0;
1580 VERIFY(mpts
->mpts_socket
!= NULL
);
1581 so
= mpts
->mpts_socket
;
1583 if (mptcp_subflow_sogetopt(mpte
, so
, &smpo
) == 0) {
1584 mpts
->mpts_flags
|= MPTSF_SOPT_OLDVAL
;
1585 mpts
->mpts_oldintval
= smpo
.mpo_intval
;
1587 socket_unlock(so
, 0);
1591 /* apply socket option */
1592 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1596 mpts
->mpts_flags
|= MPTSF_SOPT_INPROG
;
1597 VERIFY(mpts
->mpts_socket
!= NULL
);
1598 so
= mpts
->mpts_socket
;
1600 error
= mptcp_subflow_sosetopt(mpte
, so
, mpo
);
1601 socket_unlock(so
, 0);
1607 /* cleanup, and rollback if needed */
1608 TAILQ_FOREACH(mpts
, &mpte
->mpte_subflows
, mpts_entry
) {
1612 if (!(mpts
->mpts_flags
& MPTSF_SOPT_INPROG
)) {
1613 /* clear in case it's set */
1614 mpts
->mpts_flags
&= ~MPTSF_SOPT_OLDVAL
;
1615 mpts
->mpts_oldintval
= 0;
1619 if (!(mpts
->mpts_flags
& MPTSF_SOPT_OLDVAL
)) {
1620 mpts
->mpts_flags
&= ~MPTSF_SOPT_INPROG
;
1621 VERIFY(mpts
->mpts_oldintval
== 0);
1625 /* error during sosetopt, so roll it back */
1627 VERIFY(mpts
->mpts_socket
!= NULL
);
1628 so
= mpts
->mpts_socket
;
1630 smpo
.mpo_intval
= mpts
->mpts_oldintval
;
1631 (void) mptcp_subflow_sosetopt(mpte
, so
, &smpo
);
1632 socket_unlock(so
, 0);
1634 mpts
->mpts_oldintval
= 0;
1635 mpts
->mpts_flags
&= ~(MPTSF_SOPT_OLDVAL
|MPTSF_SOPT_INPROG
);
1644 * Handle SOPT_SET for socket options issued on MP socket.
1647 mptcp_setopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1649 int error
= 0, optval
, level
, optname
, rec
= 1;
1650 struct mptopt smpo
, *mpo
= NULL
;
1651 struct socket
*mp_so
;
1654 level
= sopt
->sopt_level
;
1655 optname
= sopt
->sopt_name
;
1657 VERIFY(sopt
->sopt_dir
== SOPT_SET
);
1658 VERIFY(level
== SOL_SOCKET
|| level
== IPPROTO_TCP
);
1659 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1660 mp_so
= mpte
->mpte_mppcb
->mpp_socket
;
1663 * Record socket options which are applicable to subflow sockets so
1664 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1665 * for the list of eligible socket-level options.
1667 if (level
== SOL_SOCKET
) {
1671 case SO_USELOOPBACK
:
1673 case SO_TRAFFIC_CLASS
:
1674 case SO_RECV_TRAFFIC_CLASS
:
1675 case SO_PRIVILEGED_TRAFFIC_CLASS
:
1677 case SO_RESTRICTIONS
:
1678 case SO_NOWAKEFROMSLEEP
:
1679 case SO_MPTCP_FASTJOIN
:
1680 case SO_NOAPNFALLBK
:
1684 /* don't record it */
1688 /* nothing to do; just return success */
1694 case TCP_RXT_FINDROP
:
1698 case TCP_CONNECTIONTIMEOUT
:
1699 case TCP_RXT_CONNDROPTIME
:
1700 case PERSIST_TIMEOUT
:
1701 /* eligible; record it */
1703 case TCP_NOTSENT_LOWAT
:
1704 /* record at MPTCP level */
1705 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1714 mp_so
->so_flags
&= ~SOF_NOTSENT_LOWAT
;
1715 error
= mptcp_set_notsent_lowat(mpte
,0);
1717 mp_so
->so_flags
|= SOF_NOTSENT_LOWAT
;
1718 error
= mptcp_set_notsent_lowat(mpte
,
1725 error
= ENOPROTOOPT
;
1730 if ((error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
1731 sizeof (optval
))) != 0)
1735 /* search for an existing one; if not found, allocate */
1736 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) == NULL
)
1737 mpo
= mptcp_sopt_alloc(M_WAITOK
);
1742 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1743 "%s: mp_so 0x%llx sopt %s "
1744 "val %d %s\n", __func__
,
1745 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1746 mptcp_sopt2str(level
, optname
, buf
,
1747 sizeof (buf
)), optval
,
1748 (mpo
->mpo_flags
& MPOF_ATTACHED
) ?
1749 "updated" : "recorded"),
1750 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1752 /* initialize or update, as needed */
1753 mpo
->mpo_intval
= optval
;
1754 if (!(mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1755 mpo
->mpo_level
= level
;
1756 mpo
->mpo_name
= optname
;
1757 mptcp_sopt_insert(mpte
, mpo
);
1759 VERIFY(mpo
->mpo_flags
& MPOF_ATTACHED
);
1760 /* this can be issued on the subflow socket */
1761 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1764 bzero(&smpo
, sizeof (smpo
));
1766 mpo
->mpo_flags
|= MPOF_SUBFLOW_OK
;
1767 mpo
->mpo_level
= level
;
1768 mpo
->mpo_name
= optname
;
1769 mpo
->mpo_intval
= optval
;
1771 VERIFY(mpo
== NULL
|| error
== 0);
1773 /* issue this socket option on existing subflows */
1775 error
= mptcp_setopt_apply(mpte
, mpo
);
1776 if (error
!= 0 && (mpo
->mpo_flags
& MPOF_ATTACHED
)) {
1777 VERIFY(mpo
!= &smpo
);
1778 mptcp_sopt_remove(mpte
, mpo
);
1779 mptcp_sopt_free(mpo
);
1782 mpo
->mpo_flags
&= ~MPOF_INTERIM
;
1785 if (error
== 0 && mpo
!= NULL
) {
1786 mptcplog((LOG_ERR
, "MPTCP Socket: "
1787 "%s: mp_so 0x%llx sopt %s val %d set %s\n",
1788 __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1789 mptcp_sopt2str(level
, optname
, buf
,
1790 sizeof (buf
)), optval
, (mpo
->mpo_flags
& MPOF_INTERIM
) ?
1791 "pending" : "successful"),
1792 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1793 } else if (error
!= 0) {
1794 mptcplog((LOG_ERR
, "MPTCP Socket: "
1795 "%s: mp_so 0x%llx sopt %s can't be issued "
1796 "error %d\n", __func__
,
1797 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
), mptcp_sopt2str(level
,
1798 optname
, buf
, sizeof (buf
)), error
),
1799 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
1805 * Handle SOPT_GET for socket options issued on MP socket.
1808 mptcp_getopt(struct mptses
*mpte
, struct sockopt
*sopt
)
1810 int error
= 0, optval
;
1812 VERIFY(sopt
->sopt_dir
== SOPT_GET
);
1813 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1816 * We only handle SOPT_GET for TCP level socket options; we should
1817 * not get here for socket level options since they are already
1818 * handled at the socket layer.
1820 if (sopt
->sopt_level
!= IPPROTO_TCP
) {
1821 error
= ENOPROTOOPT
;
1825 switch (sopt
->sopt_name
) {
1827 case TCP_RXT_FINDROP
:
1831 case TCP_CONNECTIONTIMEOUT
:
1832 case TCP_RXT_CONNDROPTIME
:
1833 case PERSIST_TIMEOUT
:
1834 case TCP_NOTSENT_LOWAT
:
1835 /* eligible; get the default value just in case */
1836 error
= mptcp_default_tcp_optval(mpte
, sopt
, &optval
);
1840 error
= ENOPROTOOPT
;
1844 switch (sopt
->sopt_name
) {
1845 case TCP_NOTSENT_LOWAT
:
1846 if (mpte
->mpte_mppcb
->mpp_socket
->so_flags
& SOF_NOTSENT_LOWAT
)
1847 optval
= mptcp_get_notsent_lowat(mpte
);
1854 * Search for a previously-issued TCP level socket option and
1855 * return the recorded option value. This assumes that the
1856 * value did not get modified by the lower layer after it was
1857 * issued at setsockopt(2) time. If not found, we'll return
1858 * the default value obtained ealier.
1863 if ((mpo
= mptcp_sopt_find(mpte
, sopt
)) != NULL
)
1864 optval
= mpo
->mpo_intval
;
1866 error
= sooptcopyout(sopt
, &optval
, sizeof (int));
1873 * Return default values for TCP socket options. Ideally we would query the
1874 * subflow TCP socket, but that requires creating a subflow socket before
1875 * connectx(2) time. To simplify things, just return the default values
1879 mptcp_default_tcp_optval(struct mptses
*mpte
, struct sockopt
*sopt
, int *optval
)
1883 VERIFY(sopt
->sopt_level
== IPPROTO_TCP
);
1884 VERIFY(sopt
->sopt_dir
== SOPT_GET
);
1885 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1887 /* try to do what tcp_newtcpcb() does */
1888 switch (sopt
->sopt_name
) {
1890 case TCP_RXT_FINDROP
:
1893 case TCP_CONNECTIONTIMEOUT
:
1894 case TCP_RXT_CONNDROPTIME
:
1895 case TCP_NOTSENT_LOWAT
:
1900 *optval
= mptcp_subflow_keeptime
;
1903 case PERSIST_TIMEOUT
:
1904 *optval
= tcp_max_persist_timeout
;
1908 error
= ENOPROTOOPT
;
1915 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1916 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1917 * to those that are allowed by mptcp_usr_socheckopt().
1920 mptcp_ctloutput(struct socket
*mp_so
, struct sockopt
*sopt
)
1922 struct mppcb
*mpp
= sotomppcb(mp_so
);
1923 struct mptses
*mpte
;
1926 if (mpp
== NULL
|| mpp
->mpp_state
== MPPCB_STATE_DEAD
) {
1930 mpte
= mptompte(mpp
);
1931 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
1933 /* we only handle socket and TCP-level socket options for MPTCP */
1934 if (sopt
->sopt_level
!= SOL_SOCKET
&& sopt
->sopt_level
!= IPPROTO_TCP
) {
1936 mptcplog((LOG_DEBUG
, "MPTCP Socket: "
1937 "%s: mp_so 0x%llx sopt %s level not "
1938 "handled\n", __func__
, (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
),
1939 mptcp_sopt2str(sopt
->sopt_level
,
1940 sopt
->sopt_name
, buf
, sizeof (buf
))),
1941 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_LOG
);
1946 switch (sopt
->sopt_dir
) {
1948 error
= mptcp_setopt(mpte
, sopt
);
1952 error
= mptcp_getopt(mpte
, sopt
);
1960 * Return a string representation of <sopt_level,sopt_name>
1963 mptcp_sopt2str(int level
, int optname
, char *dst
, int size
)
1965 char lbuf
[32], obuf
[32];
1966 const char *l
= lbuf
, *o
= obuf
;
1968 (void) snprintf(lbuf
, sizeof (lbuf
), "0x%x", level
);
1969 (void) snprintf(obuf
, sizeof (obuf
), "0x%x", optname
);
1979 o
= "SO_LINGER_SEC";
1987 case SO_USELOOPBACK
:
1988 o
= "SO_USELOOPBACK";
2029 case SO_RESTRICTIONS
:
2030 o
= "SO_RESTRICTIONS";
2039 o
= "SO_RANDOMPORT";
2041 case SO_TRAFFIC_CLASS
:
2042 o
= "SO_TRAFFIC_CLASS";
2044 case SO_RECV_TRAFFIC_CLASS
:
2045 o
= "SO_RECV_TRAFFIC_CLASS";
2047 case SO_TRAFFIC_CLASS_DBG
:
2048 o
= "SO_TRAFFIC_CLASS_DBG";
2050 case SO_PRIVILEGED_TRAFFIC_CLASS
:
2051 o
= "SO_PRIVILEGED_TRAFFIC_CLASS";
2059 case SO_OPPORTUNISTIC
:
2060 o
= "SO_OPPORTUNISTIC";
2066 o
= "SO_RECV_ANYIF";
2068 case SO_NOWAKEFROMSLEEP
:
2069 o
= "SO_NOWAKEFROMSLEEP";
2071 case SO_MPTCP_FASTJOIN
:
2072 o
= "SO_MPTCP_FASTJOIN";
2074 case SO_NOAPNFALLBK
:
2075 o
= "SO_NOAPNFALLBK";
2083 o
= "TCP_KEEPALIVE";
2086 o
= "TCP_KEEPINTVL";
2091 case TCP_CONNECTIONTIMEOUT
:
2092 o
= "TCP_CONNECTIONTIMEOUT";
2094 case TCP_RXT_CONNDROPTIME
:
2095 o
= "TCP_RXT_CONNDROPTIME";
2097 case PERSIST_TIMEOUT
:
2098 o
= "PERSIST_TIMEOUT";
2104 (void) snprintf(dst
, size
, "<%s,%s>", l
, o
);
2109 mptcp_usr_preconnect(struct socket
*mp_so
)
2111 struct mptsub
*mpts
= NULL
;
2112 struct mppcb
*mpp
= sotomppcb(mp_so
);
2113 struct mptses
*mpte
;
2115 struct tcpcb
*tp
= NULL
;
2117 mpte
= mptompte(mpp
);
2118 VERIFY(mpte
!= NULL
);
2119 MPTE_LOCK_ASSERT_HELD(mpte
); /* same as MP socket lock */
2121 mpts
= mptcp_get_subflow(mpte
, NULL
, NULL
);
2123 mptcplog((LOG_ERR
, "MPTCP Socket: "
2124 "%s: mp_so 0x%llx invalid preconnect ", __func__
,
2125 (u_int64_t
)VM_KERNEL_ADDRPERM(mp_so
)),
2126 MPTCP_SOCKET_DBG
, MPTCP_LOGLVL_ERR
);
2130 mpts
->mpts_flags
&= ~MPTSF_TFO_REQD
;
2131 so
= mpts
->mpts_socket
;
2133 tp
= intotcpcb(sotoinpcb(so
));
2134 tp
->t_mpflags
&= ~TMPF_TFO_REQUEST
;
2135 int error
= tcp_output(sototcpcb(so
));
2136 socket_unlock(so
, 0);
2138 mp_so
->so_flags1
&= ~SOF1_PRECONNECT_DATA
;