]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/mptcp_usrreq.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp_usrreq.c
CommitLineData
39236c6e 1/*
3e170ce0 2 * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/kernel.h>
32#include <sys/socket.h>
33#include <sys/socketvar.h>
34#include <sys/protosw.h>
35#include <sys/mcache.h>
36#include <sys/syslog.h>
37#include <sys/proc.h>
38#include <sys/proc_internal.h>
39#include <sys/resourcevar.h>
40
41#include <net/if.h>
42#include <netinet/in.h>
43#include <netinet/in_var.h>
44#include <netinet/tcp.h>
45#include <netinet/tcp_fsm.h>
46#include <netinet/tcp_seq.h>
47#include <netinet/tcp_var.h>
48#include <netinet/tcp_timer.h>
49#include <netinet/mptcp_var.h>
50#include <netinet/mptcp_timer.h>
51
52#include <mach/sdt.h>
53
54static int mptcp_usr_attach(struct socket *, int, struct proc *);
55static int mptcp_usr_detach(struct socket *);
56static int mptcp_attach(struct socket *, struct proc *);
57static int mptcp_detach(struct socket *, struct mppcb *);
58static int mptcp_connectx(struct mptses *, struct sockaddr_list **,
3e170ce0
A
59 struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
60 sae_connid_t *, uint32_t, void *, uint32_t);
39236c6e 61static int mptcp_usr_connectx(struct socket *, struct sockaddr_list **,
3e170ce0
A
62 struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
63 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
39236c6e 64static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
3e170ce0 65static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
39236c6e 66 user_addr_t);
3e170ce0 67static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
39236c6e
A
68 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
69 uint32_t *, user_addr_t, uint32_t *);
70static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
71 struct proc *);
3e170ce0 72static int mptcp_disconnectx(struct mptses *, sae_associd_t, sae_connid_t);
fe8ab488 73static int mptcp_usr_disconnect(struct socket *);
3e170ce0 74static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
39236c6e 75static struct mptses *mptcp_usrclosed(struct mptses *);
3e170ce0
A
76static int mptcp_usr_peeloff(struct socket *, sae_associd_t, struct socket **);
77static int mptcp_peeloff(struct mptses *, sae_associd_t, struct socket **);
39236c6e
A
78static int mptcp_usr_rcvd(struct socket *, int);
79static int mptcp_usr_send(struct socket *, int, struct mbuf *,
80 struct sockaddr *, struct mbuf *, struct proc *);
81static int mptcp_usr_shutdown(struct socket *);
82static int mptcp_uiotombuf(struct uio *, int, int, uint32_t, struct mbuf **);
83static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
84 struct mbuf *, struct mbuf *, int);
85static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
86static int mptcp_setopt_apply(struct mptses *, struct mptopt *);
87static int mptcp_setopt(struct mptses *, struct sockopt *);
88static int mptcp_getopt(struct mptses *, struct sockopt *);
89static int mptcp_default_tcp_optval(struct mptses *, struct sockopt *, int *);
90static void mptcp_connorder_helper(struct mptsub *mpts);
490019cf 91static int mptcp_usr_preconnect(struct socket *so);
39236c6e
A
92
93struct pr_usrreqs mptcp_usrreqs = {
94 .pru_attach = mptcp_usr_attach,
95 .pru_connectx = mptcp_usr_connectx,
96 .pru_control = mptcp_usr_control,
97 .pru_detach = mptcp_usr_detach,
fe8ab488 98 .pru_disconnect = mptcp_usr_disconnect,
39236c6e
A
99 .pru_disconnectx = mptcp_usr_disconnectx,
100 .pru_peeloff = mptcp_usr_peeloff,
101 .pru_rcvd = mptcp_usr_rcvd,
102 .pru_send = mptcp_usr_send,
103 .pru_shutdown = mptcp_usr_shutdown,
104 .pru_sosend = mptcp_usr_sosend,
105 .pru_soreceive = soreceive,
106 .pru_socheckopt = mptcp_usr_socheckopt,
490019cf 107 .pru_preconnect = mptcp_usr_preconnect,
39236c6e
A
108};
109
490019cf
A
110/*
111 * Sysctl for testing and tuning mptcp connectx with data api.
112 * Mirrors tcp_preconnect_sbspace for now.
113 */
114#define MPTCP_PRECONNECT_SBSZ_MAX 1460
115#define MPTCP_PRECONNECT_SBSZ_MIN (TCP_MSS)
116#define MPTCP_PRECONNECT_SBSZ_DEF (TCP6_MSS)
117static int mptcp_preconnect_sbspace = MPTCP_PRECONNECT_SBSZ_DEF;
118SYSCTL_INT(_net_inet_mptcp, OID_AUTO, mp_preconn_sbsz, CTLFLAG_RW | CTLFLAG_LOCKED,
119 &mptcp_preconnect_sbspace, 0, "Maximum preconnect space");
120
121
39236c6e
A
122/*
123 * Attaches an MPTCP control block to a socket.
124 */
125static int
126mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
127{
128#pragma unused(proto)
129 int error;
130
131 VERIFY(sotomppcb(mp_so) == NULL);
132
133 error = mptcp_attach(mp_so, p);
134 if (error != 0)
135 goto out;
136 /*
137 * XXX: adi@apple.com
138 *
139 * Might want to use a different SO_LINGER timeout than TCP's?
140 */
141 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0)
142 mp_so->so_linger = TCP_LINGERTIME * hz;
143out:
144 return (error);
145}
146
147/*
148 * Detaches an MPTCP control block from a socket.
149 */
150static int
151mptcp_usr_detach(struct socket *mp_so)
152{
153 struct mppcb *mpp = sotomppcb(mp_so);
154 int error = 0;
155
156 VERIFY(mpp != NULL);
157 VERIFY(mpp->mpp_socket != NULL);
158
159 error = mptcp_detach(mp_so, mpp);
160 return (error);
161}
162
163/*
164 * Attach MPTCP protocol to socket, allocating MP control block,
165 * MPTCP session, control block, buffer space, etc.
166 */
167static int
168mptcp_attach(struct socket *mp_so, struct proc *p)
169{
170#pragma unused(p)
3e170ce0
A
171 struct mptses *mpte = NULL;
172 struct mptcb *mp_tp = NULL;
173 struct mppcb *mpp = NULL;
39236c6e
A
174 int error = 0;
175
176 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
177 error = soreserve(mp_so, tcp_sendspace, MPTCP_RWIN_MAX);
178 if (error != 0)
179 goto out;
180 }
181
490019cf
A
182 if (mp_so->so_snd.sb_preconn_hiwat == 0) {
183 soreserve_preconnect(mp_so, imin(MPTCP_PRECONNECT_SBSZ_MAX,
184 imax(mptcp_preconnect_sbspace, MPTCP_PRECONNECT_SBSZ_MIN)));
185 }
186
39236c6e
A
187 /*
188 * MPTCP socket buffers cannot be compressed, due to the
189 * fact that each mbuf chained via m_next is a M_PKTHDR
190 * which carries some MPTCP metadata.
191 */
192 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
193 mp_so->so_rcv.sb_flags |= SB_NOCOMPRESS;
194
195 /* Disable socket buffer auto-tuning. */
196 mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
197 mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE;
198
3e170ce0 199 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
39236c6e 200 goto out;
3e170ce0 201 }
39236c6e
A
202
203 mpp = sotomppcb(mp_so);
204 VERIFY(mpp != NULL);
3e170ce0
A
205 mpte = (struct mptses *)mpp->mpp_pcbe;
206 VERIFY(mpte != NULL);
39236c6e
A
207 mp_tp = mpte->mpte_mptcb;
208 VERIFY(mp_tp != NULL);
39236c6e
A
209out:
210 return (error);
211}
212
213/*
214 * Called when the socket layer loses its final reference to the socket;
215 * at this point, there is only one case in which we will keep things
216 * around: time wait.
217 */
218static int
219mptcp_detach(struct socket *mp_so, struct mppcb *mpp)
220{
221 struct mptses *mpte;
222 struct mppcbinfo *mppi;
223
224 VERIFY(mp_so->so_pcb == mpp);
225 VERIFY(mpp->mpp_socket == mp_so);
226
227 mppi = mpp->mpp_pcbinfo;
228 VERIFY(mppi != NULL);
229
3e170ce0 230 __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
39236c6e
A
231 VERIFY(mpte->mpte_mppcb == mpp);
232
233 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
234
235 /*
236 * We are done with this MPTCP socket (it has been closed);
237 * trigger all subflows to be disconnected, if not already,
238 * by initiating the PCB detach sequence (SOF_PCBCLEARING
239 * will be set.)
240 */
241 mp_pcbdetach(mpp);
242
3e170ce0 243 (void) mptcp_disconnectx(mpte, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
39236c6e
A
244
245 /*
246 * XXX: adi@apple.com
247 *
248 * Here, we would want to handle time wait state.
249 */
250
251 return (0);
252}
253
254/*
255 * Common subroutine to open a MPTCP connection to one of the remote hosts
256 * specified by dst_sl. This includes allocating and establishing a
257 * subflow TCP connection, either initially to establish MPTCP connection,
258 * or to join an existing one. Returns a connection handle upon success.
259 */
260static int
261mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl,
262 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
3e170ce0 263 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
39236c6e
A
264 uint32_t arglen)
265{
266#pragma unused(p, aid, flags, arg, arglen)
267 struct mptsub *mpts;
268 struct socket *mp_so;
269 int error = 0;
270
271 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
272 mp_so = mpte->mpte_mppcb->mpp_socket;
273
274 VERIFY(dst_sl != NULL && *dst_sl != NULL);
275 VERIFY(pcid != NULL);
276
3e170ce0
A
277 mptcplog((LOG_DEBUG, "MPTCP Socket: "
278 "%s: mp_so 0x%llx\n", __func__,
279 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
280 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
fe8ab488 281
3e170ce0 282 DTRACE_MPTCP3(connectx, struct mptses *, mpte, sae_associd_t, aid,
39236c6e
A
283 struct socket *, mp_so);
284
285 mpts = mptcp_subflow_alloc(M_WAITOK);
286 if (mpts == NULL) {
287 error = ENOBUFS;
288 goto out;
289 }
290 MPTS_ADDREF(mpts); /* for this routine */
291
292 if (src_sl != NULL) {
293 mpts->mpts_src_sl = *src_sl;
294 *src_sl = NULL;
295 }
296 mpts->mpts_dst_sl = *dst_sl;
297 *dst_sl = NULL;
298
299 error = mptcp_subflow_add(mpte, mpts, p, ifscope);
300 if (error == 0 && pcid != NULL)
301 *pcid = mpts->mpts_connid;
302
303out:
304 if (mpts != NULL) {
305 if ((error != 0) && (error != EWOULDBLOCK)) {
306 MPTS_LOCK(mpts);
307 if (mpts->mpts_flags & MPTSF_ATTACHED) {
308 MPTS_UNLOCK(mpts);
309 MPTS_REMREF(mpts);
310 mptcp_subflow_del(mpte, mpts, TRUE);
311 return (error);
312 }
313 MPTS_UNLOCK(mpts);
314 }
315 MPTS_REMREF(mpts);
316 }
317
318 return (error);
319}
320
321/*
322 * User-protocol pru_connectx callback.
323 */
324static int
325mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl,
326 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
3e170ce0 327 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
490019cf 328 uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
39236c6e 329{
39236c6e 330 struct mppcb *mpp = sotomppcb(mp_so);
3e170ce0
A
331 struct mptses *mpte = NULL;
332 struct mptcb *mp_tp = NULL;
490019cf 333 user_ssize_t datalen;
3e170ce0 334
39236c6e
A
335 int error = 0;
336
337 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
338 error = EINVAL;
339 goto out;
340 }
341 mpte = mptompte(mpp);
342 VERIFY(mpte != NULL);
343
3e170ce0
A
344 mp_tp = mpte->mpte_mptcb;
345 VERIFY(mp_tp != NULL);
346
347 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
348 error = EINVAL;
349 goto out;
350 }
351
39236c6e
A
352 error = mptcp_connectx(mpte, src_sl, dst_sl, p, ifscope,
353 aid, pcid, flags, arg, arglen);
490019cf
A
354
355 /* If there is data, copy it */
356 if (auio != NULL) {
357 datalen = uio_resid(auio);
358 socket_unlock(mp_so, 0);
359 error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
360 (uio_t) auio, NULL, NULL, 0);
361 /* check if this can be supported with fast Join also. XXX */
362 if (error == 0 || error == EWOULDBLOCK)
363 *bytes_written = datalen - uio_resid(auio);
364
365 if (error == EWOULDBLOCK)
366 error = EINPROGRESS;
367
368 socket_lock(mp_so, 0);
369 MPT_LOCK(mp_tp);
370 if (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) {
371 *bytes_written = datalen - uio_resid(auio);
372 /*
373 * Override errors like EPIPE that occur as
374 * a result of doing TFO during TCP fallback.
375 */
376 error = EPROTO;
377 }
378 MPT_UNLOCK(mp_tp);
379 }
380
39236c6e
A
381out:
382 return (error);
383}
384
385/*
386 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
387 */
388static int
389mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
390{
391 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
392
393 /* MPTCP has at most 1 association */
3e170ce0 394 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
39236c6e
A
395
396 /* just asking how many there are? */
397 if (aidp == USER_ADDR_NULL)
398 return (0);
399
400 return (copyout(&mpte->mpte_associd, aidp,
401 sizeof (mpte->mpte_associd)));
402}
403
404/*
405 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
406 */
407static int
3e170ce0 408mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
39236c6e
A
409 user_addr_t cidp)
410{
411 struct mptsub *mpts;
412 int error = 0;
413
414 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
415
3e170ce0 416 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
39236c6e
A
417 aid != mpte->mpte_associd)
418 return (EINVAL);
419
420 *cnt = mpte->mpte_numflows;
421
422 /* just asking how many there are? */
423 if (cidp == USER_ADDR_NULL)
424 return (0);
425
426 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
427 if ((error = copyout(&mpts->mpts_connid, cidp,
428 sizeof (mpts->mpts_connid))) != 0)
429 break;
430
431 cidp += sizeof (mpts->mpts_connid);
432 }
433
434 return (error);
435}
436
437/*
438 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
439 */
440static int
3e170ce0 441mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
39236c6e
A
442 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
443 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
444 user_addr_t aux_data, uint32_t *aux_len)
445{
446#pragma unused(aux_data)
447 struct sockaddr_entry *se;
448 struct ifnet *ifp = NULL;
449 struct mptsub *mpts;
450 int error = 0;
451
452 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
453
3e170ce0 454 if (*cid == SAE_CONNID_ALL)
39236c6e
A
455 return (EINVAL);
456
457 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
3e170ce0 458 if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY)
39236c6e
A
459 break;
460 }
461 if (mpts == NULL)
3e170ce0 462 return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL);
39236c6e
A
463
464 MPTS_LOCK(mpts);
465 ifp = mpts->mpts_outif;
466 *cid = mpts->mpts_connid;
467 *ifindex = ((ifp != NULL) ? ifp->if_index : 0);
468 *soerror = mpts->mpts_soerror;
469 *flags = 0;
470 if (mpts->mpts_flags & MPTSF_CONNECTING)
471 *flags |= CIF_CONNECTING;
472 if (mpts->mpts_flags & MPTSF_CONNECTED)
473 *flags |= CIF_CONNECTED;
474 if (mpts->mpts_flags & MPTSF_DISCONNECTING)
475 *flags |= CIF_DISCONNECTING;
476 if (mpts->mpts_flags & MPTSF_DISCONNECTED)
477 *flags |= CIF_DISCONNECTED;
478 if (mpts->mpts_flags & MPTSF_BOUND_IF)
479 *flags |= CIF_BOUND_IF;
480 if (mpts->mpts_flags & MPTSF_BOUND_IP)
481 *flags |= CIF_BOUND_IP;
482 if (mpts->mpts_flags & MPTSF_BOUND_PORT)
483 *flags |= CIF_BOUND_PORT;
484 if (mpts->mpts_flags & MPTSF_PREFERRED)
485 *flags |= CIF_PREFERRED;
486 if (mpts->mpts_flags & MPTSF_MP_CAPABLE)
487 *flags |= CIF_MP_CAPABLE;
488 if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
489 *flags |= CIF_MP_DEGRADED;
490 if (mpts->mpts_flags & MPTSF_MP_READY)
491 *flags |= CIF_MP_READY;
492 if (mpts->mpts_flags & MPTSF_ACTIVE)
493 *flags |= CIF_MP_ACTIVE;
494
495 VERIFY(mpts->mpts_src_sl != NULL);
496 se = TAILQ_FIRST(&mpts->mpts_src_sl->sl_head);
497 VERIFY(se != NULL && se->se_addr != NULL);
498 *src_len = se->se_addr->sa_len;
499 if (src != USER_ADDR_NULL) {
500 error = copyout(se->se_addr, src, se->se_addr->sa_len);
501 if (error != 0)
502 goto out;
503 }
504
505 VERIFY(mpts->mpts_dst_sl != NULL);
506 se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head);
507 VERIFY(se != NULL && se->se_addr != NULL);
508 *dst_len = se->se_addr->sa_len;
509 if (dst != USER_ADDR_NULL) {
510 error = copyout(se->se_addr, dst, se->se_addr->sa_len);
511 if (error != 0)
512 goto out;
513 }
514
515 *aux_type = 0;
516 *aux_len = 0;
517 if (mpts->mpts_socket != NULL) {
518 struct conninfo_tcp tcp_ci;
519
520 *aux_type = CIAUX_TCP;
521 *aux_len = sizeof (tcp_ci);
522
523 if (aux_data != USER_ADDR_NULL) {
524 struct socket *so = mpts->mpts_socket;
525
526 VERIFY(SOCK_PROTO(so) == IPPROTO_TCP);
527 bzero(&tcp_ci, sizeof (tcp_ci));
528 socket_lock(so, 0);
529 tcp_getconninfo(so, &tcp_ci);
530 socket_unlock(so, 0);
531 error = copyout(&tcp_ci, aux_data, sizeof (tcp_ci));
532 if (error != 0)
533 goto out;
534 }
535 }
3e170ce0
A
536 mptcplog((LOG_DEBUG, "MPTCP Socket: "
537 "%s: cid %d flags %x \n",
538 __func__, mpts->mpts_connid, mpts->mpts_flags),
539 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
540
39236c6e
A
541out:
542 MPTS_UNLOCK(mpts);
543 return (error);
544}
545
546/*
547 * Handle SIOCSCONNORDER
548 */
549int
3e170ce0 550mptcp_setconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t rank)
39236c6e
A
551{
552 struct mptsub *mpts, *mpts1;
553 int error = 0;
554
555 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
3e170ce0
A
556 mptcplog((LOG_DEBUG, "MPTCP Socket: "
557 "%s: cid %d rank %d \n", __func__, cid, rank),
558 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 559
3e170ce0 560 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
39236c6e
A
561 error = EINVAL;
562 goto out;
563 }
564
565 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
566 if (mpts->mpts_connid == cid)
567 break;
568 }
569 if (mpts == NULL) {
570 error = ENXIO;
571 goto out;
572 }
573
574 if (rank == 0 || rank > 1) {
575 /*
576 * If rank is 0, determine whether this should be the
577 * primary or backup subflow, depending on what we have.
578 *
579 * Otherwise, if greater than 0, make it a backup flow.
580 */
581 TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) {
582 MPTS_LOCK(mpts1);
583 if (mpts1->mpts_flags & MPTSF_PREFERRED) {
584 MPTS_UNLOCK(mpts1);
585 break;
586 }
587 MPTS_UNLOCK(mpts1);
588 }
589
590 MPTS_LOCK(mpts);
591 mpts->mpts_flags &= ~MPTSF_PREFERRED;
592 mpts->mpts_rank = rank;
593 if (mpts1 != NULL && mpts != mpts1) {
594 /* preferred subflow found; set rank as necessary */
595 if (rank == 0)
596 mpts->mpts_rank = (mpts1->mpts_rank + 1);
597 } else if (rank == 0) {
598 /* no preferred one found; promote this */
599 rank = 1;
600 }
601 MPTS_UNLOCK(mpts);
602 }
603
604 if (rank == 1) {
605 /*
606 * If rank is 1, promote this subflow to be preferred.
607 */
608 TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) {
609 MPTS_LOCK(mpts1);
610 if (mpts1 != mpts &&
611 (mpts1->mpts_flags & MPTSF_PREFERRED)) {
612 mpts1->mpts_flags &= ~MPTSF_PREFERRED;
fe8ab488 613 if (mpte->mpte_nummpcapflows > 1)
39236c6e
A
614 mptcp_connorder_helper(mpts1);
615 } else if (mpts1 == mpts) {
616 mpts1->mpts_rank = 1;
617 if (mpts1->mpts_flags & MPTSF_MP_CAPABLE) {
618 mpts1->mpts_flags |= MPTSF_PREFERRED;
619 if (mpte->mpte_nummpcapflows > 1)
620 mptcp_connorder_helper(mpts1);
621 }
622 }
623 MPTS_UNLOCK(mpts1);
624 }
625 }
626
627out:
628 return (error);
629}
630
631static void
632mptcp_connorder_helper(struct mptsub *mpts)
633{
634 struct socket *so = mpts->mpts_socket;
635 struct tcpcb *tp = NULL;
636
637 socket_lock(so, 0);
638
639 tp = intotcpcb(sotoinpcb(so));
640 tp->t_mpflags |= TMPF_SND_MPPRIO;
641 if (mpts->mpts_flags & MPTSF_PREFERRED)
642 tp->t_mpflags &= ~TMPF_BACKUP_PATH;
643 else
644 tp->t_mpflags |= TMPF_BACKUP_PATH;
3e170ce0 645
39236c6e
A
646 socket_unlock(so, 0);
647
648}
649
650/*
651 * Handle SIOCSGONNORDER
652 */
653int
3e170ce0 654mptcp_getconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t *rank)
39236c6e
A
655{
656 struct mptsub *mpts;
657 int error = 0;
658
659 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
660 VERIFY(rank != NULL);
661 *rank = 0;
662
3e170ce0 663 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
39236c6e
A
664 error = EINVAL;
665 goto out;
666 }
667
668 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
669 if (mpts->mpts_connid == cid)
670 break;
671 }
672 if (mpts == NULL) {
673 error = ENXIO;
674 goto out;
675 }
676
677 MPTS_LOCK(mpts);
678 *rank = mpts->mpts_rank;
679 MPTS_UNLOCK(mpts);
680out:
681 return (error);
682}
683
684/*
685 * User-protocol pru_control callback.
686 */
687static int
688mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
689 struct ifnet *ifp, struct proc *p)
690{
691#pragma unused(ifp, p)
692 struct mppcb *mpp = sotomppcb(mp_so);
693 struct mptses *mpte;
694 int error = 0;
695
696 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
697 error = EINVAL;
698 goto out;
699 }
700 mpte = mptompte(mpp);
701 VERIFY(mpte != NULL);
702
703 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
704
705 switch (cmd) {
706 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
707 struct so_aidreq32 aidr;
708 bcopy(data, &aidr, sizeof (aidr));
709 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
710 aidr.sar_aidp);
711 if (error == 0)
712 bcopy(&aidr, data, sizeof (aidr));
713 break;
714 }
715
716 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
717 struct so_aidreq64 aidr;
718 bcopy(data, &aidr, sizeof (aidr));
719 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
720 aidr.sar_aidp);
721 if (error == 0)
722 bcopy(&aidr, data, sizeof (aidr));
723 break;
724 }
725
726 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
727 struct so_cidreq32 cidr;
728 bcopy(data, &cidr, sizeof (cidr));
729 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
730 cidr.scr_cidp);
731 if (error == 0)
732 bcopy(&cidr, data, sizeof (cidr));
733 break;
734 }
735
736 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
737 struct so_cidreq64 cidr;
738 bcopy(data, &cidr, sizeof (cidr));
739 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
740 cidr.scr_cidp);
741 if (error == 0)
742 bcopy(&cidr, data, sizeof (cidr));
743 break;
744 }
745
746 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
747 struct so_cinforeq32 cifr;
748 bcopy(data, &cifr, sizeof (cifr));
749 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
750 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
751 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
752 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
753 &cifr.scir_aux_len);
754 if (error == 0)
755 bcopy(&cifr, data, sizeof (cifr));
756 break;
757 }
758
759 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
760 struct so_cinforeq64 cifr;
761 bcopy(data, &cifr, sizeof (cifr));
762 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
763 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
764 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
765 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
766 &cifr.scir_aux_len);
767 if (error == 0)
768 bcopy(&cifr, data, sizeof (cifr));
769 break;
770 }
771
772 case SIOCSCONNORDER: { /* struct so_cordreq */
773 struct so_cordreq cor;
774 bcopy(data, &cor, sizeof (cor));
775 error = mptcp_setconnorder(mpte, cor.sco_cid, cor.sco_rank);
776 if (error == 0)
777 bcopy(&cor, data, sizeof (cor));
778 break;
779 }
780
781 case SIOCGCONNORDER: { /* struct so_cordreq */
782 struct so_cordreq cor;
783 bcopy(data, &cor, sizeof (cor));
784 error = mptcp_getconnorder(mpte, cor.sco_cid, &cor.sco_rank);
785 if (error == 0)
786 bcopy(&cor, data, sizeof (cor));
787 break;
788 }
789
790 default:
791 error = EOPNOTSUPP;
792 break;
793 }
794out:
795 return (error);
796}
797
798/*
799 * Initiate a disconnect. MPTCP-level disconnection is specified by
800 * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow
801 * connection while keeping the MPTCP-level connection (association).
802 */
803static int
3e170ce0 804mptcp_disconnectx(struct mptses *mpte, sae_associd_t aid, sae_connid_t cid)
39236c6e
A
805{
806 struct mptsub *mpts;
807 struct socket *mp_so;
808 struct mptcb *mp_tp;
809 int error = 0;
810
811 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
812
813 mp_so = mpte->mpte_mppcb->mpp_socket;
814 mp_tp = mpte->mpte_mptcb;
815
3e170ce0
A
816 mptcplog((LOG_DEBUG, "MPTCP Socket: "
817 "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__,
818 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error),
819 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
39236c6e 820
3e170ce0
A
821 DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, sae_associd_t, aid,
822 sae_connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp);
823
824 VERIFY(aid == SAE_ASSOCID_ANY || aid == SAE_ASSOCID_ALL ||
39236c6e
A
825 aid == mpte->mpte_associd);
826
827 /* terminate the association? */
3e170ce0 828 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
39236c6e
A
829 /* if we're not detached, go thru socket state checks */
830 if (!(mp_so->so_flags & SOF_PCBCLEARING)) {
831 if (!(mp_so->so_state & (SS_ISCONNECTED|
832 SS_ISCONNECTING))) {
833 error = ENOTCONN;
834 goto out;
835 }
836 if (mp_so->so_state & SS_ISDISCONNECTING) {
837 error = EALREADY;
838 goto out;
839 }
840 }
841 MPT_LOCK(mp_tp);
842 mptcp_cancel_all_timers(mp_tp);
843 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
844 (void) mptcp_close(mpte, mp_tp);
845 MPT_UNLOCK(mp_tp);
846 } else if ((mp_so->so_options & SO_LINGER) &&
847 mp_so->so_linger == 0) {
848 (void) mptcp_drop(mpte, mp_tp, 0);
849 MPT_UNLOCK(mp_tp);
850 } else {
851 MPT_UNLOCK(mp_tp);
852 soisdisconnecting(mp_so);
853 sbflush(&mp_so->so_rcv);
854 if (mptcp_usrclosed(mpte) != NULL)
855 (void) mptcp_output(mpte);
856 }
857 } else {
3e170ce0
A
858 bool disconnect_embryonic_subflows = false;
859 struct socket *so = NULL;
860
39236c6e
A
861 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
862 if (mpts->mpts_connid != cid)
863 continue;
3e170ce0 864
39236c6e 865 MPTS_LOCK(mpts);
3e170ce0
A
866 /*
867 * Check if disconnected subflow is the one used
868 * to initiate MPTCP connection.
869 * If it is and the connection is not yet join ready
870 * disconnect all other subflows.
871 */
872 so = mpts->mpts_socket;
873 if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) &&
874 so && !(so->so_flags & SOF_MP_SEC_SUBFLOW)) {
875 disconnect_embryonic_subflows = true;
876 }
877
fe8ab488 878 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
39236c6e
A
879 mptcp_subflow_disconnect(mpte, mpts, FALSE);
880 MPTS_UNLOCK(mpts);
881 break;
882 }
883
884 if (mpts == NULL) {
885 error = EINVAL;
886 goto out;
887 }
3e170ce0
A
888
889 if (disconnect_embryonic_subflows) {
890 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
891 if (mpts->mpts_connid == cid)
892 continue;
893 MPTS_LOCK(mpts);
894 mptcp_subflow_disconnect(mpte, mpts, TRUE);
895 MPTS_UNLOCK(mpts);
896 }
897 }
39236c6e
A
898 }
899
900 if (error == 0)
901 mptcp_thread_signal(mpte);
902
903 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
904 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
905 /* the socket has been shutdown, no more sockopt's */
906 mptcp_flush_sopts(mpte);
907 }
908
909out:
910 return (error);
911}
912
fe8ab488
A
913/*
914 * Wrapper function to support disconnect on socket
915 */
916static int
917mptcp_usr_disconnect(struct socket *mp_so)
918{
919 int error = 0;
920
3e170ce0 921 error = mptcp_usr_disconnectx(mp_so, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
fe8ab488
A
922 return (error);
923}
924
39236c6e
A
925/*
926 * User-protocol pru_disconnectx callback.
927 */
928static int
3e170ce0 929mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
39236c6e
A
930{
931 struct mppcb *mpp = sotomppcb(mp_so);
932 struct mptses *mpte;
933 int error = 0;
934
935 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
936 error = EINVAL;
937 goto out;
938 }
939 mpte = mptompte(mpp);
940 VERIFY(mpte != NULL);
941 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
942
3e170ce0 943 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
39236c6e
A
944 aid != mpte->mpte_associd) {
945 error = EINVAL;
946 goto out;
947 }
948
949 error = mptcp_disconnectx(mpte, aid, cid);
950out:
951 return (error);
952}
953
954/*
955 * User issued close, and wish to trail thru shutdown states.
956 */
957static struct mptses *
958mptcp_usrclosed(struct mptses *mpte)
959{
960 struct socket *mp_so;
961 struct mptcb *mp_tp;
962 struct mptsub *mpts;
963
964 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
965 mp_so = mpte->mpte_mppcb->mpp_socket;
966 mp_tp = mpte->mpte_mptcb;
967
968 MPT_LOCK(mp_tp);
969 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
970
fe8ab488 971 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
39236c6e
A
972 mpte = mptcp_close(mpte, mp_tp);
973 MPT_UNLOCK(mp_tp);
974 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
975 MPT_UNLOCK(mp_tp);
976 soisdisconnected(mp_so);
fe8ab488
A
977 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
978 MPTS_LOCK(mpts);
979 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
980 MPTS_UNLOCK(mpts);
981 }
39236c6e 982 } else {
39236c6e
A
983 MPT_UNLOCK(mp_tp);
984
985 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
986 MPTS_LOCK(mpts);
fe8ab488 987 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
39236c6e
A
988 mptcp_subflow_disconnect(mpte, mpts, FALSE);
989 MPTS_UNLOCK(mpts);
990 }
991 }
39236c6e
A
992
993 return (mpte);
994}
995
996/*
997 * User-protocol pru_peeloff callback.
998 */
999static int
3e170ce0 1000mptcp_usr_peeloff(struct socket *mp_so, sae_associd_t aid, struct socket **psop)
39236c6e
A
1001{
1002 struct mppcb *mpp = sotomppcb(mp_so);
1003 struct mptses *mpte;
1004 int error = 0;
1005
1006 VERIFY(psop != NULL);
1007
1008 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1009 error = EINVAL;
1010 goto out;
1011 }
1012 mpte = mptompte(mpp);
1013 VERIFY(mpte != NULL);
1014
1015 error = mptcp_peeloff(mpte, aid, psop);
1016out:
1017 return (error);
1018}
1019
1020/*
1021 * Transform a previously connected TCP subflow connection which has
1022 * failed to negotiate MPTCP to its own socket which can be externalized
1023 * with a file descriptor. Valid only when the MPTCP socket is not
1024 * yet associated (MPTCP-level connection has not been established.)
1025 */
1026static int
3e170ce0 1027mptcp_peeloff(struct mptses *mpte, sae_associd_t aid, struct socket **psop)
39236c6e
A
1028{
1029 struct socket *so = NULL, *mp_so;
1030 struct mptsub *mpts;
1031 int error = 0;
1032
1033 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1034 mp_so = mpte->mpte_mppcb->mpp_socket;
1035
1036 VERIFY(psop != NULL);
1037 *psop = NULL;
1038
3e170ce0 1039 DTRACE_MPTCP3(peeloff, struct mptses *, mpte, sae_associd_t, aid,
39236c6e
A
1040 struct socket *, mp_so);
1041
1042 /* peeloff cannot happen after an association is established */
3e170ce0 1043 if (mpte->mpte_associd != SAE_ASSOCID_ANY) {
39236c6e
A
1044 error = EINVAL;
1045 goto out;
1046 }
1047
3e170ce0 1048 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
39236c6e
A
1049 error = EINVAL;
1050 goto out;
1051 }
1052
1053 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1054 MPTS_LOCK(mpts);
1055 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
1056 panic("%s: so %p is MPTCP capable but mp_so %p "
1057 "aid is %d\n", __func__, so, mp_so,
1058 mpte->mpte_associd);
1059 /* NOTREACHED */
1060 }
1061 MPTS_ADDREF_LOCKED(mpts); /* for us */
1062 so = mpts->mpts_socket;
1063 VERIFY(so != NULL);
1064 /*
1065 * This subflow socket is about to be externalized; make it
1066 * appear as if it has the same properties as the MPTCP socket,
1067 * undo what's done earlier in mptcp_subflow_add().
1068 */
1069 mptcp_subflow_sopeeloff(mpte, mpts, so);
1070 MPTS_UNLOCK(mpts);
1071
1072 mptcp_subflow_del(mpte, mpts, FALSE);
1073 MPTS_REMREF(mpts); /* ours */
1074 /*
1075 * XXX adi@apple.com
1076 *
1077 * Here we need to make sure the subflow socket is not
1078 * flow controlled; need to clear both INP_FLOW_CONTROLLED
1079 * and INP_FLOW_SUSPENDED on the subflow socket, since
1080 * we will no longer be monitoring its events.
1081 */
1082 break;
1083 }
1084
1085 if (so == NULL) {
1086 error = EINVAL;
1087 goto out;
1088 }
1089 *psop = so;
1090
3e170ce0
A
1091 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1092 "%s: mp_so 0x%llx\n", __func__,
1093 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
1094 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1095
39236c6e
A
1096out:
1097 return (error);
1098}
1099
1100/*
1101 * After a receive, possible send some update to peer.
1102 */
1103static int
1104mptcp_usr_rcvd(struct socket *mp_so, int flags)
1105{
1106#pragma unused(flags)
1107 struct mppcb *mpp = sotomppcb(mp_so);
1108 struct mptses *mpte;
1109 int error = 0;
1110
1111 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1112 error = EINVAL;
1113 goto out;
1114 }
1115 mpte = mptompte(mpp);
1116 VERIFY(mpte != NULL);
1117
1118 error = mptcp_output(mpte);
1119out:
1120 return (error);
1121}
1122
1123/*
1124 * Do a send by putting data in the output queue.
1125 */
1126static int
1127mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1128 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1129{
1130#pragma unused(nam, p)
1131 struct mppcb *mpp = sotomppcb(mp_so);
1132 struct mptses *mpte;
1133 int error = 0;
1134
1135 if (prus_flags & (PRUS_OOB|PRUS_EOF)) {
1136 error = EOPNOTSUPP;
1137 goto out;
1138 }
1139
1140 if (nam != NULL) {
1141 error = EOPNOTSUPP;
1142 goto out;
1143 }
1144
1145 if (control != NULL && control->m_len != 0) {
1146 error = EOPNOTSUPP;
1147 goto out;
1148 }
1149
1150 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1151 error = ECONNRESET;
1152 goto out;
1153 }
1154 mpte = mptompte(mpp);
1155 VERIFY(mpte != NULL);
1156
490019cf
A
1157 if (!(mp_so->so_state & SS_ISCONNECTED) &&
1158 (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA))) {
39236c6e
A
1159 error = ENOTCONN;
1160 goto out;
1161 }
1162
1163 mptcp_insert_dsn(mpp, m);
1164 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1165 (void) sbappendstream(&mp_so->so_snd, m);
1166 m = NULL;
1167
490019cf
A
1168 /*
1169 * XXX: adi@apple.com
1170 *
1171 * PRUS_MORETOCOME could be set, but we don't check it now.
1172 */
1173 error = mptcp_output(mpte);
1174 if (error != 0)
1175 goto out;
1176
1177 if (mp_so->so_state & SS_ISCONNECTING) {
1178 if (mp_so->so_state & SS_NBIO)
1179 error = EWOULDBLOCK;
1180 else
1181 error = sbwait(&mp_so->so_snd);
39236c6e 1182 }
490019cf 1183
39236c6e
A
1184out:
1185 if (error) {
1186 if (m != NULL)
1187 m_freem(m);
1188 if (control != NULL)
1189 m_freem(control);
1190 }
1191 return (error);
1192}
1193
1194/*
1195 * Mark the MPTCP connection as being incapable of further output.
1196 */
1197static int
1198mptcp_usr_shutdown(struct socket *mp_so)
1199{
1200 struct mppcb *mpp = sotomppcb(mp_so);
1201 struct mptses *mpte;
1202 int error = 0;
1203
1204 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1205 error = EINVAL;
1206 goto out;
1207 }
1208 mpte = mptompte(mpp);
1209 VERIFY(mpte != NULL);
1210
1211 socantsendmore(mp_so);
1212
1213 mpte = mptcp_usrclosed(mpte);
1214 if (mpte != NULL)
1215 error = mptcp_output(mpte);
1216out:
1217 return (error);
1218}
1219
1220/*
1221 * Copy the contents of uio into a properly sized mbuf chain.
1222 */
1223static int
1224mptcp_uiotombuf(struct uio *uio, int how, int space, uint32_t align,
1225 struct mbuf **top)
1226{
1227 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1228 user_ssize_t resid, tot, len, progress; /* must be user_ssize_t */
1229 int error;
1230
1231 VERIFY(top != NULL && *top == NULL);
1232
1233 /*
1234 * space can be zero or an arbitrary large value bound by
1235 * the total data supplied by the uio.
1236 */
1237 resid = uio_resid(uio);
1238 if (space > 0)
1239 tot = imin(resid, space);
1240 else
1241 tot = resid;
1242
1243 /*
1244 * The smallest unit is a single mbuf with pkthdr.
1245 * We can't align past it.
1246 */
1247 if (align >= MHLEN)
1248 return (EINVAL);
1249
1250 /*
1251 * Give us the full allocation or nothing.
1252 * If space is zero return the smallest empty mbuf.
1253 */
1254 if ((len = tot + align) == 0)
1255 len = 1;
1256
1257 /* Loop and append maximum sized mbufs to the chain tail. */
1258 while (len > 0) {
1259 uint32_t m_needed = 1;
1260
1261 if (njcl > 0 && len > MBIGCLBYTES)
1262 mb = m_getpackets_internal(&m_needed, 1,
1263 how, 1, M16KCLBYTES);
1264 else if (len > MCLBYTES)
1265 mb = m_getpackets_internal(&m_needed, 1,
1266 how, 1, MBIGCLBYTES);
1267 else if (len >= (signed)MINCLSIZE)
1268 mb = m_getpackets_internal(&m_needed, 1,
1269 how, 1, MCLBYTES);
1270 else
1271 mb = m_gethdr(how, MT_DATA);
1272
1273 /* Fail the whole operation if one mbuf can't be allocated. */
1274 if (mb == NULL) {
1275 if (nm != NULL)
1276 m_freem(nm);
1277 return (ENOBUFS);
1278 }
1279
1280 /* Book keeping. */
1281 VERIFY(mb->m_flags & M_PKTHDR);
1282 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1283 if (mtail != NULL)
1284 mtail->m_next = mb;
1285 else
1286 nm = mb;
1287 mtail = mb;
1288 }
1289
1290 m = nm;
1291 m->m_data += align;
1292
1293 progress = 0;
1294 /* Fill all mbufs with uio data and update header information. */
1295 for (mb = m; mb != NULL; mb = mb->m_next) {
1296 len = imin(M_TRAILINGSPACE(mb), tot - progress);
1297
1298 error = uiomove(mtod(mb, char *), len, uio);
1299 if (error != 0) {
1300 m_freem(m);
1301 return (error);
1302 }
1303
1304 /* each mbuf is M_PKTHDR chained via m_next */
1305 mb->m_len = len;
1306 mb->m_pkthdr.len = len;
1307
1308 progress += len;
1309 }
1310 VERIFY(progress == tot);
1311 *top = m;
1312 return (0);
1313}
1314
1315/*
1316 * MPTCP socket protocol-user socket send routine, derived from sosend().
1317 */
1318static int
1319mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1320 struct mbuf *top, struct mbuf *control, int flags)
1321{
1322#pragma unused(addr)
1323 int32_t space;
1324 user_ssize_t resid;
1325 int error, sendflags;
1326 struct proc *p = current_proc();
1327 int sblocked = 0;
1328
1329 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1330 if (uio == NULL || top != NULL) {
1331 error = EINVAL;
1332 goto out;
1333 }
1334 resid = uio_resid(uio);
1335
1336 socket_lock(mp_so, 1);
1337 so_update_last_owner_locked(mp_so, p);
1338 so_update_policy(mp_so);
1339
1340 VERIFY(mp_so->so_type == SOCK_STREAM);
1341 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1342
1343 if ((flags & (MSG_OOB|MSG_DONTROUTE|MSG_HOLD|MSG_SEND|MSG_FLUSH)) ||
1344 (mp_so->so_flags & SOF_ENABLE_MSGS)) {
1345 error = EOPNOTSUPP;
1346 socket_unlock(mp_so, 1);
1347 goto out;
1348 }
1349
1350 /*
1351 * In theory resid should be unsigned. However, space must be
1352 * signed, as it might be less than 0 if we over-committed, and we
1353 * must use a signed comparison of space and resid. On the other
1354 * hand, a negative resid causes us to loop sending 0-length
1355 * segments to the protocol.
1356 */
1357 if (resid < 0 || (flags & MSG_EOR) || control != NULL) {
1358 error = EINVAL;
1359 socket_unlock(mp_so, 1);
1360 goto out;
1361 }
1362
1363 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1364
1365 do {
1366 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1367 &sblocked, NULL);
1368 if (error != 0)
1369 goto release;
1370
1371 space = sbspace(&mp_so->so_snd);
1372 do {
1373 socket_unlock(mp_so, 0);
1374 /*
1375 * Copy the data from userland into an mbuf chain.
1376 */
1377 error = mptcp_uiotombuf(uio, M_WAITOK, space, 0, &top);
1378 if (error != 0) {
1379 socket_lock(mp_so, 0);
1380 goto release;
1381 }
1382 VERIFY(top != NULL);
1383 space -= resid - uio_resid(uio);
1384 resid = uio_resid(uio);
1385 socket_lock(mp_so, 0);
1386
1387 /*
1388 * Compute flags here, for pru_send and NKEs.
1389 */
1390 sendflags = (resid > 0 && space > 0) ?
1391 PRUS_MORETOCOME : 0;
1392
1393 /*
1394 * Socket filter processing
1395 */
1396 VERIFY(control == NULL);
1397 error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1398 if (error != 0) {
1399 if (error == EJUSTRETURN) {
1400 error = 0;
1401 top = NULL;
1402 /* always free control if any */
1403 }
1404 goto release;
1405 }
1406 if (control != NULL) {
1407 m_freem(control);
1408 control = NULL;
1409 }
1410
1411 /*
1412 * Pass data to protocol.
1413 */
1414 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1415 (mp_so, sendflags, top, NULL, NULL, p);
1416
1417 top = NULL;
1418 if (error != 0)
1419 goto release;
1420 } while (resid != 0 && space > 0);
1421 } while (resid != 0);
1422
1423release:
1424 if (sblocked)
1425 sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1426 else
1427 socket_unlock(mp_so, 1);
1428out:
1429 if (top != NULL)
1430 m_freem(top);
1431 if (control != NULL)
1432 m_freem(control);
1433
490019cf
A
1434 /* clear SOF1_PRECONNECT_DATA after one write */
1435 if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA)
1436 mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1437
39236c6e
A
1438 return (error);
1439}
1440
1441/*
1442 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1443 * This routine simply indicates to the caller whether or not to proceed
1444 * further with the given socket option. This is invoked by sosetoptlock()
1445 * and sogetoptlock().
1446 */
1447static int
1448mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1449{
1450#pragma unused(mp_so)
1451 int error = 0;
1452
1453 VERIFY(sopt->sopt_level == SOL_SOCKET);
1454
1455 /*
1456 * We could check for sopt_dir (set/get) here, but we'll just
1457 * let the caller deal with it as appropriate; therefore the
1458 * following is a superset of the socket options which we
1459 * allow for set/get.
1460 *
1461 * XXX: adi@apple.com
1462 *
1463 * Need to consider the following cases:
1464 *
1465 * a. In the event peeloff(2) occurs on the subflow socket,
1466 * we may want to issue those options which are now
1467 * handled at the MP socket. In that case, we will need
1468 * to record them in mptcp_setopt() so that they can
1469 * be replayed during peeloff.
1470 *
1471 * b. Certain socket options don't have a clear definition
1472 * on the expected behavior post connect(2). At the time
1473 * those options are issued on the MP socket, there may
1474 * be existing subflow sockets that are already connected.
1475 */
1476 switch (sopt->sopt_name) {
1477 case SO_LINGER: /* MP */
1478 case SO_LINGER_SEC: /* MP */
1479 case SO_TYPE: /* MP */
1480 case SO_NREAD: /* MP */
1481 case SO_NWRITE: /* MP */
1482 case SO_ERROR: /* MP */
1483 case SO_SNDBUF: /* MP */
1484 case SO_RCVBUF: /* MP */
1485 case SO_SNDLOWAT: /* MP */
1486 case SO_RCVLOWAT: /* MP */
1487 case SO_SNDTIMEO: /* MP */
1488 case SO_RCVTIMEO: /* MP */
1489 case SO_NKE: /* MP */
1490 case SO_NOSIGPIPE: /* MP */
1491 case SO_NOADDRERR: /* MP */
1492 case SO_LABEL: /* MP */
1493 case SO_PEERLABEL: /* MP */
1494 case SO_DEFUNCTOK: /* MP */
1495 case SO_ISDEFUNCT: /* MP */
1496 case SO_TRAFFIC_CLASS_DBG: /* MP */
1497 /*
1498 * Tell the caller that these options are to be processed.
1499 */
1500 break;
1501
1502 case SO_DEBUG: /* MP + subflow */
1503 case SO_KEEPALIVE: /* MP + subflow */
1504 case SO_USELOOPBACK: /* MP + subflow */
1505 case SO_RANDOMPORT: /* MP + subflow */
1506 case SO_TRAFFIC_CLASS: /* MP + subflow */
1507 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1508 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1509 case SO_RECV_ANYIF: /* MP + subflow */
1510 case SO_RESTRICTIONS: /* MP + subflow */
1511 case SO_FLUSH: /* MP + subflow */
fe8ab488
A
1512 case SO_MPTCP_FASTJOIN: /* MP + subflow */
1513 case SO_NOWAKEFROMSLEEP:
39236c6e
A
1514 /*
1515 * Tell the caller that these options are to be processed;
1516 * these will also be recorded later by mptcp_setopt().
1517 *
1518 * NOTE: Only support integer option value for now.
1519 */
1520 if (sopt->sopt_valsize != sizeof (int))
1521 error = EINVAL;
1522 break;
1523
1524 default:
1525 /*
1526 * Tell the caller to stop immediately and return an error.
1527 */
1528 error = ENOPROTOOPT;
1529 break;
1530 }
1531
1532 return (error);
1533}
1534
1535/*
1536 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1537 */
1538static int
1539mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1540{
1541 struct socket *mp_so;
1542 struct mptsub *mpts;
1543 struct mptopt smpo;
1544 int error = 0;
1545
1546 /* just bail now if this isn't applicable to subflow sockets */
1547 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1548 error = ENOPROTOOPT;
1549 goto out;
1550 }
1551
1552 /*
1553 * Skip those that are handled internally; these options
1554 * should not have been recorded and marked with the
1555 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1556 */
1557 if (mpo->mpo_level == SOL_SOCKET &&
1558 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1559 error = ENOPROTOOPT;
1560 goto out;
1561 }
1562
1563 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1564 mp_so = mpte->mpte_mppcb->mpp_socket;
1565
1566 /*
1567 * Don't bother going further if there's no subflow; mark the option
1568 * with MPOF_INTERIM so that we know whether or not to remove this
1569 * option upon encountering an error while issuing it during subflow
1570 * socket creation.
1571 */
1572 if (mpte->mpte_numflows == 0) {
1573 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1574 mpo->mpo_flags |= MPOF_INTERIM;
1575 /* return success */
1576 goto out;
1577 }
1578
1579 bzero(&smpo, sizeof (smpo));
1580 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1581 smpo.mpo_level = mpo->mpo_level;
1582 smpo.mpo_name = mpo->mpo_name;
1583
1584 /* grab exisiting values in case we need to rollback */
1585 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1586 struct socket *so;
1587
1588 MPTS_LOCK(mpts);
1589 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG);
1590 mpts->mpts_oldintval = 0;
1591 smpo.mpo_intval = 0;
1592 VERIFY(mpts->mpts_socket != NULL);
1593 so = mpts->mpts_socket;
1594 socket_lock(so, 0);
1595 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1596 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1597 mpts->mpts_oldintval = smpo.mpo_intval;
1598 }
1599 socket_unlock(so, 0);
1600 MPTS_UNLOCK(mpts);
1601 }
1602
1603 /* apply socket option */
1604 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1605 struct socket *so;
1606
1607 MPTS_LOCK(mpts);
1608 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1609 VERIFY(mpts->mpts_socket != NULL);
1610 so = mpts->mpts_socket;
1611 socket_lock(so, 0);
1612 error = mptcp_subflow_sosetopt(mpte, so, mpo);
1613 socket_unlock(so, 0);
1614 MPTS_UNLOCK(mpts);
1615 if (error != 0)
1616 break;
1617 }
1618
1619 /* cleanup, and rollback if needed */
1620 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1621 struct socket *so;
1622
1623 MPTS_LOCK(mpts);
1624 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1625 /* clear in case it's set */
1626 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1627 mpts->mpts_oldintval = 0;
1628 MPTS_UNLOCK(mpts);
1629 continue;
1630 }
1631 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1632 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1633 VERIFY(mpts->mpts_oldintval == 0);
1634 MPTS_UNLOCK(mpts);
1635 continue;
1636 }
1637 /* error during sosetopt, so roll it back */
1638 if (error != 0) {
1639 VERIFY(mpts->mpts_socket != NULL);
1640 so = mpts->mpts_socket;
1641 socket_lock(so, 0);
1642 smpo.mpo_intval = mpts->mpts_oldintval;
1643 (void) mptcp_subflow_sosetopt(mpte, so, &smpo);
1644 socket_unlock(so, 0);
1645 }
1646 mpts->mpts_oldintval = 0;
1647 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG);
1648 MPTS_UNLOCK(mpts);
1649 }
1650
1651out:
1652 return (error);
1653}
1654
1655/*
1656 * Handle SOPT_SET for socket options issued on MP socket.
1657 */
1658static int
1659mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1660{
1661 int error = 0, optval, level, optname, rec = 1;
1662 struct mptopt smpo, *mpo = NULL;
1663 struct socket *mp_so;
1664 char buf[32];
1665
1666 level = sopt->sopt_level;
1667 optname = sopt->sopt_name;
1668
1669 VERIFY(sopt->sopt_dir == SOPT_SET);
1670 VERIFY(level == SOL_SOCKET || level == IPPROTO_TCP);
1671 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1672 mp_so = mpte->mpte_mppcb->mpp_socket;
1673
1674 /*
1675 * Record socket options which are applicable to subflow sockets so
1676 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1677 * for the list of eligible socket-level options.
1678 */
1679 if (level == SOL_SOCKET) {
1680 switch (optname) {
1681 case SO_DEBUG:
1682 case SO_KEEPALIVE:
1683 case SO_USELOOPBACK:
1684 case SO_RANDOMPORT:
1685 case SO_TRAFFIC_CLASS:
1686 case SO_RECV_TRAFFIC_CLASS:
1687 case SO_PRIVILEGED_TRAFFIC_CLASS:
1688 case SO_RECV_ANYIF:
1689 case SO_RESTRICTIONS:
fe8ab488
A
1690 case SO_NOWAKEFROMSLEEP:
1691 case SO_MPTCP_FASTJOIN:
39236c6e
A
1692 /* record it */
1693 break;
1694 case SO_FLUSH:
1695 /* don't record it */
1696 rec = 0;
1697 break;
1698 default:
1699 /* nothing to do; just return success */
1700 goto out;
1701 }
1702 } else {
1703 switch (optname) {
1704 case TCP_NODELAY:
1705 case TCP_RXT_FINDROP:
1706 case TCP_KEEPALIVE:
1707 case TCP_KEEPINTVL:
1708 case TCP_KEEPCNT:
1709 case TCP_CONNECTIONTIMEOUT:
1710 case TCP_RXT_CONNDROPTIME:
1711 case PERSIST_TIMEOUT:
1712 /* eligible; record it */
1713 break;
fe8ab488
A
1714 case TCP_NOTSENT_LOWAT:
1715 /* record at MPTCP level */
1716 error = sooptcopyin(sopt, &optval, sizeof(optval),
1717 sizeof(optval));
1718 if (error)
1719 goto out;
1720 if (optval < 0) {
1721 error = EINVAL;
1722 goto out;
1723 } else {
1724 if (optval == 0) {
1725 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1726 error = mptcp_set_notsent_lowat(mpte,0);
1727 } else {
1728 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1729 error = mptcp_set_notsent_lowat(mpte,
1730 optval);
1731 }
1732 }
1733 goto out;
39236c6e
A
1734 default:
1735 /* not eligible */
1736 error = ENOPROTOOPT;
1737 goto out;
1738 }
1739 }
1740
1741 if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
1742 sizeof (optval))) != 0)
1743 goto out;
1744
1745 if (rec) {
1746 /* search for an existing one; if not found, allocate */
1747 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL)
1748 mpo = mptcp_sopt_alloc(M_WAITOK);
1749
1750 if (mpo == NULL) {
1751 error = ENOBUFS;
1752 } else {
3e170ce0
A
1753 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1754 "%s: mp_so 0x%llx sopt %s "
39236c6e
A
1755 "val %d %s\n", __func__,
1756 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1757 mptcp_sopt2str(level, optname, buf,
1758 sizeof (buf)), optval,
1759 (mpo->mpo_flags & MPOF_ATTACHED) ?
3e170ce0
A
1760 "updated" : "recorded"),
1761 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
39236c6e
A
1762
1763 /* initialize or update, as needed */
1764 mpo->mpo_intval = optval;
1765 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1766 mpo->mpo_level = level;
1767 mpo->mpo_name = optname;
1768 mptcp_sopt_insert(mpte, mpo);
1769 }
1770 VERIFY(mpo->mpo_flags & MPOF_ATTACHED);
1771 /* this can be issued on the subflow socket */
1772 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1773 }
1774 } else {
1775 bzero(&smpo, sizeof (smpo));
1776 mpo = &smpo;
1777 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1778 mpo->mpo_level = level;
1779 mpo->mpo_name = optname;
1780 mpo->mpo_intval = optval;
1781 }
1782 VERIFY(mpo == NULL || error == 0);
1783
1784 /* issue this socket option on existing subflows */
1785 if (error == 0) {
1786 error = mptcp_setopt_apply(mpte, mpo);
1787 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1788 VERIFY(mpo != &smpo);
1789 mptcp_sopt_remove(mpte, mpo);
1790 mptcp_sopt_free(mpo);
1791 }
1792 if (mpo == &smpo)
1793 mpo->mpo_flags &= ~MPOF_INTERIM;
1794 }
1795out:
1796 if (error == 0 && mpo != NULL) {
3e170ce0
A
1797 mptcplog((LOG_ERR, "MPTCP Socket: "
1798 "%s: mp_so 0x%llx sopt %s val %d set %s\n",
39236c6e
A
1799 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1800 mptcp_sopt2str(level, optname, buf,
1801 sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ?
3e170ce0
A
1802 "pending" : "successful"),
1803 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
39236c6e 1804 } else if (error != 0) {
3e170ce0
A
1805 mptcplog((LOG_ERR, "MPTCP Socket: "
1806 "%s: mp_so 0x%llx sopt %s can't be issued "
39236c6e
A
1807 "error %d\n", __func__,
1808 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level,
3e170ce0
A
1809 optname, buf, sizeof (buf)), error),
1810 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
39236c6e
A
1811 }
1812 return (error);
1813}
1814
1815/*
1816 * Handle SOPT_GET for socket options issued on MP socket.
1817 */
1818static int
1819mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
1820{
1821 int error = 0, optval;
1822
1823 VERIFY(sopt->sopt_dir == SOPT_GET);
1824 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1825
1826 /*
1827 * We only handle SOPT_GET for TCP level socket options; we should
1828 * not get here for socket level options since they are already
1829 * handled at the socket layer.
1830 */
1831 if (sopt->sopt_level != IPPROTO_TCP) {
1832 error = ENOPROTOOPT;
1833 goto out;
1834 }
1835
1836 switch (sopt->sopt_name) {
1837 case TCP_NODELAY:
1838 case TCP_RXT_FINDROP:
1839 case TCP_KEEPALIVE:
1840 case TCP_KEEPINTVL:
1841 case TCP_KEEPCNT:
1842 case TCP_CONNECTIONTIMEOUT:
1843 case TCP_RXT_CONNDROPTIME:
1844 case PERSIST_TIMEOUT:
fe8ab488 1845 case TCP_NOTSENT_LOWAT:
39236c6e
A
1846 /* eligible; get the default value just in case */
1847 error = mptcp_default_tcp_optval(mpte, sopt, &optval);
1848 break;
1849 default:
1850 /* not eligible */
1851 error = ENOPROTOOPT;
1852 break;
1853 }
1854
fe8ab488
A
1855 switch (sopt->sopt_name) {
1856 case TCP_NOTSENT_LOWAT:
1857 if (mpte->mpte_mppcb->mpp_socket->so_flags & SOF_NOTSENT_LOWAT)
1858 optval = mptcp_get_notsent_lowat(mpte);
1859 else
1860 optval = 0;
1861 goto out;
1862 }
1863
39236c6e
A
1864 /*
1865 * Search for a previously-issued TCP level socket option and
1866 * return the recorded option value. This assumes that the
1867 * value did not get modified by the lower layer after it was
1868 * issued at setsockopt(2) time. If not found, we'll return
1869 * the default value obtained ealier.
1870 */
1871 if (error == 0) {
1872 struct mptopt *mpo;
1873
1874 if ((mpo = mptcp_sopt_find(mpte, sopt)) != NULL)
1875 optval = mpo->mpo_intval;
1876
1877 error = sooptcopyout(sopt, &optval, sizeof (int));
1878 }
1879out:
1880 return (error);
1881}
1882
1883/*
1884 * Return default values for TCP socket options. Ideally we would query the
1885 * subflow TCP socket, but that requires creating a subflow socket before
1886 * connectx(2) time. To simplify things, just return the default values
1887 * that we know of.
1888 */
1889static int
1890mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval)
1891{
1892 int error = 0;
1893
1894 VERIFY(sopt->sopt_level == IPPROTO_TCP);
1895 VERIFY(sopt->sopt_dir == SOPT_GET);
1896 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1897
1898 /* try to do what tcp_newtcpcb() does */
1899 switch (sopt->sopt_name) {
1900 case TCP_NODELAY:
1901 case TCP_RXT_FINDROP:
1902 case TCP_KEEPINTVL:
1903 case TCP_KEEPCNT:
1904 case TCP_CONNECTIONTIMEOUT:
1905 case TCP_RXT_CONNDROPTIME:
fe8ab488 1906 case TCP_NOTSENT_LOWAT:
39236c6e
A
1907 *optval = 0;
1908 break;
1909
1910 case TCP_KEEPALIVE:
1911 *optval = mptcp_subflow_keeptime;
1912 break;
1913
1914 case PERSIST_TIMEOUT:
1915 *optval = tcp_max_persist_timeout;
1916 break;
1917
1918 default:
1919 error = ENOPROTOOPT;
1920 break;
1921 }
1922 return (error);
1923}
1924
1925/*
1926 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1927 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1928 * to those that are allowed by mptcp_usr_socheckopt().
1929 */
1930int
1931mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
1932{
1933 struct mppcb *mpp = sotomppcb(mp_so);
1934 struct mptses *mpte;
1935 int error = 0;
1936
1937 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1938 error = EINVAL;
1939 goto out;
1940 }
1941 mpte = mptompte(mpp);
1942 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1943
1944 /* we only handle socket and TCP-level socket options for MPTCP */
1945 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
1946 char buf[32];
3e170ce0
A
1947 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1948 "%s: mp_so 0x%llx sopt %s level not "
39236c6e
A
1949 "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1950 mptcp_sopt2str(sopt->sopt_level,
3e170ce0
A
1951 sopt->sopt_name, buf, sizeof (buf))),
1952 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
39236c6e
A
1953 error = EINVAL;
1954 goto out;
1955 }
1956
1957 switch (sopt->sopt_dir) {
1958 case SOPT_SET:
1959 error = mptcp_setopt(mpte, sopt);
1960 break;
1961
1962 case SOPT_GET:
1963 error = mptcp_getopt(mpte, sopt);
1964 break;
1965 }
1966out:
1967 return (error);
1968}
1969
1970/*
1971 * Return a string representation of <sopt_level,sopt_name>
1972 */
1973const char *
1974mptcp_sopt2str(int level, int optname, char *dst, int size)
1975{
1976 char lbuf[32], obuf[32];
1977 const char *l = lbuf, *o = obuf;
1978
1979 (void) snprintf(lbuf, sizeof (lbuf), "0x%x", level);
1980 (void) snprintf(obuf, sizeof (obuf), "0x%x", optname);
1981
1982 switch (level) {
1983 case SOL_SOCKET:
1984 l = "SOL_SOCKET";
1985 switch (optname) {
1986 case SO_LINGER:
1987 o = "SO_LINGER";
1988 break;
1989 case SO_LINGER_SEC:
1990 o = "SO_LINGER_SEC";
1991 break;
1992 case SO_DEBUG:
1993 o = "SO_DEBUG";
1994 break;
1995 case SO_KEEPALIVE:
1996 o = "SO_KEEPALIVE";
1997 break;
1998 case SO_USELOOPBACK:
1999 o = "SO_USELOOPBACK";
2000 break;
2001 case SO_TYPE:
2002 o = "SO_TYPE";
2003 break;
2004 case SO_NREAD:
2005 o = "SO_NREAD";
2006 break;
2007 case SO_NWRITE:
2008 o = "SO_NWRITE";
2009 break;
2010 case SO_ERROR:
2011 o = "SO_ERROR";
2012 break;
2013 case SO_SNDBUF:
2014 o = "SO_SNDBUF";
2015 break;
2016 case SO_RCVBUF:
2017 o = "SO_RCVBUF";
2018 break;
2019 case SO_SNDLOWAT:
2020 o = "SO_SNDLOWAT";
2021 break;
2022 case SO_RCVLOWAT:
2023 o = "SO_RCVLOWAT";
2024 break;
2025 case SO_SNDTIMEO:
2026 o = "SO_SNDTIMEO";
2027 break;
2028 case SO_RCVTIMEO:
2029 o = "SO_RCVTIMEO";
2030 break;
2031 case SO_NKE:
2032 o = "SO_NKE";
2033 break;
2034 case SO_NOSIGPIPE:
2035 o = "SO_NOSIGPIPE";
2036 break;
2037 case SO_NOADDRERR:
2038 o = "SO_NOADDRERR";
2039 break;
2040 case SO_RESTRICTIONS:
2041 o = "SO_RESTRICTIONS";
2042 break;
2043 case SO_LABEL:
2044 o = "SO_LABEL";
2045 break;
2046 case SO_PEERLABEL:
2047 o = "SO_PEERLABEL";
2048 break;
2049 case SO_RANDOMPORT:
2050 o = "SO_RANDOMPORT";
2051 break;
2052 case SO_TRAFFIC_CLASS:
2053 o = "SO_TRAFFIC_CLASS";
2054 break;
2055 case SO_RECV_TRAFFIC_CLASS:
2056 o = "SO_RECV_TRAFFIC_CLASS";
2057 break;
2058 case SO_TRAFFIC_CLASS_DBG:
2059 o = "SO_TRAFFIC_CLASS_DBG";
2060 break;
2061 case SO_PRIVILEGED_TRAFFIC_CLASS:
2062 o = "SO_PRIVILEGED_TRAFFIC_CLASS";
2063 break;
2064 case SO_DEFUNCTOK:
2065 o = "SO_DEFUNCTOK";
2066 break;
2067 case SO_ISDEFUNCT:
2068 o = "SO_ISDEFUNCT";
2069 break;
2070 case SO_OPPORTUNISTIC:
2071 o = "SO_OPPORTUNISTIC";
2072 break;
2073 case SO_FLUSH:
2074 o = "SO_FLUSH";
2075 break;
2076 case SO_RECV_ANYIF:
2077 o = "SO_RECV_ANYIF";
2078 break;
fe8ab488
A
2079 case SO_NOWAKEFROMSLEEP:
2080 o = "SO_NOWAKEFROMSLEEP";
2081 break;
2082 case SO_MPTCP_FASTJOIN:
2083 o = "SO_MPTCP_FASTJOIN";
2084 break;
39236c6e
A
2085 }
2086 break;
2087 case IPPROTO_TCP:
2088 l = "IPPROTO_TCP";
2089 switch (optname) {
2090 case TCP_KEEPALIVE:
2091 o = "TCP_KEEPALIVE";
2092 break;
2093 case TCP_KEEPINTVL:
2094 o = "TCP_KEEPINTVL";
2095 break;
2096 case TCP_KEEPCNT:
2097 o = "TCP_KEEPCNT";
2098 break;
2099 case TCP_CONNECTIONTIMEOUT:
2100 o = "TCP_CONNECTIONTIMEOUT";
2101 break;
2102 case TCP_RXT_CONNDROPTIME:
2103 o = "TCP_RXT_CONNDROPTIME";
2104 break;
2105 case PERSIST_TIMEOUT:
2106 o = "PERSIST_TIMEOUT";
2107 break;
2108 }
2109 break;
2110 }
2111
2112 (void) snprintf(dst, size, "<%s,%s>", l, o);
2113 return (dst);
2114}
490019cf
A
2115
2116static int
2117mptcp_usr_preconnect(struct socket *mp_so)
2118{
2119 struct mptsub *mpts = NULL;
2120 struct mppcb *mpp = sotomppcb(mp_so);
2121 struct mptses *mpte;
2122 struct socket *so;
2123 struct tcpcb *tp = NULL;
2124
2125 mpte = mptompte(mpp);
2126 VERIFY(mpte != NULL);
2127 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
2128
2129 mpts = mptcp_get_subflow(mpte, NULL, NULL);
2130 if (mpts == NULL) {
2131 mptcplog((LOG_ERR, "MPTCP Socket: "
2132 "%s: mp_so 0x%llx invalid preconnect ", __func__,
2133 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
2134 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
2135 return (EINVAL);
2136 }
2137 MPTS_LOCK(mpts);
2138 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
2139 so = mpts->mpts_socket;
2140 socket_lock(so, 0);
2141 tp = intotcpcb(sotoinpcb(so));
2142 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
2143 int error = tcp_output(sototcpcb(so));
2144 socket_unlock(so, 0);
2145 MPTS_UNLOCK(mpts);
2146 mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
2147 return (error);
2148}