]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/mptcp_usrreq.c
xnu-3247.1.106.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp_usrreq.c
CommitLineData
39236c6e 1/*
3e170ce0 2 * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
39236c6e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/kernel.h>
32#include <sys/socket.h>
33#include <sys/socketvar.h>
34#include <sys/protosw.h>
35#include <sys/mcache.h>
36#include <sys/syslog.h>
37#include <sys/proc.h>
38#include <sys/proc_internal.h>
39#include <sys/resourcevar.h>
40
41#include <net/if.h>
42#include <netinet/in.h>
43#include <netinet/in_var.h>
44#include <netinet/tcp.h>
45#include <netinet/tcp_fsm.h>
46#include <netinet/tcp_seq.h>
47#include <netinet/tcp_var.h>
48#include <netinet/tcp_timer.h>
49#include <netinet/mptcp_var.h>
50#include <netinet/mptcp_timer.h>
51
52#include <mach/sdt.h>
53
54static int mptcp_usr_attach(struct socket *, int, struct proc *);
55static int mptcp_usr_detach(struct socket *);
56static int mptcp_attach(struct socket *, struct proc *);
57static int mptcp_detach(struct socket *, struct mppcb *);
58static int mptcp_connectx(struct mptses *, struct sockaddr_list **,
3e170ce0
A
59 struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
60 sae_connid_t *, uint32_t, void *, uint32_t);
39236c6e 61static int mptcp_usr_connectx(struct socket *, struct sockaddr_list **,
3e170ce0
A
62 struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
63 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
39236c6e 64static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
3e170ce0 65static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
39236c6e 66 user_addr_t);
3e170ce0 67static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
39236c6e
A
68 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
69 uint32_t *, user_addr_t, uint32_t *);
70static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
71 struct proc *);
3e170ce0 72static int mptcp_disconnectx(struct mptses *, sae_associd_t, sae_connid_t);
fe8ab488 73static int mptcp_usr_disconnect(struct socket *);
3e170ce0 74static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
39236c6e 75static struct mptses *mptcp_usrclosed(struct mptses *);
3e170ce0
A
76static int mptcp_usr_peeloff(struct socket *, sae_associd_t, struct socket **);
77static int mptcp_peeloff(struct mptses *, sae_associd_t, struct socket **);
39236c6e
A
78static int mptcp_usr_rcvd(struct socket *, int);
79static int mptcp_usr_send(struct socket *, int, struct mbuf *,
80 struct sockaddr *, struct mbuf *, struct proc *);
81static int mptcp_usr_shutdown(struct socket *);
82static int mptcp_uiotombuf(struct uio *, int, int, uint32_t, struct mbuf **);
83static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
84 struct mbuf *, struct mbuf *, int);
85static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
86static int mptcp_setopt_apply(struct mptses *, struct mptopt *);
87static int mptcp_setopt(struct mptses *, struct sockopt *);
88static int mptcp_getopt(struct mptses *, struct sockopt *);
89static int mptcp_default_tcp_optval(struct mptses *, struct sockopt *, int *);
90static void mptcp_connorder_helper(struct mptsub *mpts);
91
92struct pr_usrreqs mptcp_usrreqs = {
93 .pru_attach = mptcp_usr_attach,
94 .pru_connectx = mptcp_usr_connectx,
95 .pru_control = mptcp_usr_control,
96 .pru_detach = mptcp_usr_detach,
fe8ab488 97 .pru_disconnect = mptcp_usr_disconnect,
39236c6e
A
98 .pru_disconnectx = mptcp_usr_disconnectx,
99 .pru_peeloff = mptcp_usr_peeloff,
100 .pru_rcvd = mptcp_usr_rcvd,
101 .pru_send = mptcp_usr_send,
102 .pru_shutdown = mptcp_usr_shutdown,
103 .pru_sosend = mptcp_usr_sosend,
104 .pru_soreceive = soreceive,
105 .pru_socheckopt = mptcp_usr_socheckopt,
106};
107
108/*
109 * Attaches an MPTCP control block to a socket.
110 */
111static int
112mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
113{
114#pragma unused(proto)
115 int error;
116
117 VERIFY(sotomppcb(mp_so) == NULL);
118
119 error = mptcp_attach(mp_so, p);
120 if (error != 0)
121 goto out;
122 /*
123 * XXX: adi@apple.com
124 *
125 * Might want to use a different SO_LINGER timeout than TCP's?
126 */
127 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0)
128 mp_so->so_linger = TCP_LINGERTIME * hz;
129out:
130 return (error);
131}
132
133/*
134 * Detaches an MPTCP control block from a socket.
135 */
136static int
137mptcp_usr_detach(struct socket *mp_so)
138{
139 struct mppcb *mpp = sotomppcb(mp_so);
140 int error = 0;
141
142 VERIFY(mpp != NULL);
143 VERIFY(mpp->mpp_socket != NULL);
144
145 error = mptcp_detach(mp_so, mpp);
146 return (error);
147}
148
149/*
150 * Attach MPTCP protocol to socket, allocating MP control block,
151 * MPTCP session, control block, buffer space, etc.
152 */
153static int
154mptcp_attach(struct socket *mp_so, struct proc *p)
155{
156#pragma unused(p)
3e170ce0
A
157 struct mptses *mpte = NULL;
158 struct mptcb *mp_tp = NULL;
159 struct mppcb *mpp = NULL;
39236c6e
A
160 int error = 0;
161
162 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
163 error = soreserve(mp_so, tcp_sendspace, MPTCP_RWIN_MAX);
164 if (error != 0)
165 goto out;
166 }
167
168 /*
169 * MPTCP socket buffers cannot be compressed, due to the
170 * fact that each mbuf chained via m_next is a M_PKTHDR
171 * which carries some MPTCP metadata.
172 */
173 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
174 mp_so->so_rcv.sb_flags |= SB_NOCOMPRESS;
175
176 /* Disable socket buffer auto-tuning. */
177 mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
178 mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE;
179
3e170ce0 180 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
39236c6e 181 goto out;
3e170ce0 182 }
39236c6e
A
183
184 mpp = sotomppcb(mp_so);
185 VERIFY(mpp != NULL);
3e170ce0
A
186 mpte = (struct mptses *)mpp->mpp_pcbe;
187 VERIFY(mpte != NULL);
39236c6e
A
188 mp_tp = mpte->mpte_mptcb;
189 VERIFY(mp_tp != NULL);
39236c6e
A
190out:
191 return (error);
192}
193
194/*
195 * Called when the socket layer loses its final reference to the socket;
196 * at this point, there is only one case in which we will keep things
197 * around: time wait.
198 */
199static int
200mptcp_detach(struct socket *mp_so, struct mppcb *mpp)
201{
202 struct mptses *mpte;
203 struct mppcbinfo *mppi;
204
205 VERIFY(mp_so->so_pcb == mpp);
206 VERIFY(mpp->mpp_socket == mp_so);
207
208 mppi = mpp->mpp_pcbinfo;
209 VERIFY(mppi != NULL);
210
3e170ce0 211 __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
39236c6e
A
212 VERIFY(mpte->mpte_mppcb == mpp);
213
214 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
215
216 /*
217 * We are done with this MPTCP socket (it has been closed);
218 * trigger all subflows to be disconnected, if not already,
219 * by initiating the PCB detach sequence (SOF_PCBCLEARING
220 * will be set.)
221 */
222 mp_pcbdetach(mpp);
223
3e170ce0 224 (void) mptcp_disconnectx(mpte, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
39236c6e
A
225
226 /*
227 * XXX: adi@apple.com
228 *
229 * Here, we would want to handle time wait state.
230 */
231
232 return (0);
233}
234
235/*
236 * Common subroutine to open a MPTCP connection to one of the remote hosts
237 * specified by dst_sl. This includes allocating and establishing a
238 * subflow TCP connection, either initially to establish MPTCP connection,
239 * or to join an existing one. Returns a connection handle upon success.
240 */
241static int
242mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl,
243 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
3e170ce0 244 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
39236c6e
A
245 uint32_t arglen)
246{
247#pragma unused(p, aid, flags, arg, arglen)
248 struct mptsub *mpts;
249 struct socket *mp_so;
250 int error = 0;
251
252 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
253 mp_so = mpte->mpte_mppcb->mpp_socket;
254
255 VERIFY(dst_sl != NULL && *dst_sl != NULL);
256 VERIFY(pcid != NULL);
257
3e170ce0
A
258 mptcplog((LOG_DEBUG, "MPTCP Socket: "
259 "%s: mp_so 0x%llx\n", __func__,
260 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
261 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
fe8ab488 262
3e170ce0 263 DTRACE_MPTCP3(connectx, struct mptses *, mpte, sae_associd_t, aid,
39236c6e
A
264 struct socket *, mp_so);
265
266 mpts = mptcp_subflow_alloc(M_WAITOK);
267 if (mpts == NULL) {
268 error = ENOBUFS;
269 goto out;
270 }
271 MPTS_ADDREF(mpts); /* for this routine */
272
273 if (src_sl != NULL) {
274 mpts->mpts_src_sl = *src_sl;
275 *src_sl = NULL;
276 }
277 mpts->mpts_dst_sl = *dst_sl;
278 *dst_sl = NULL;
279
280 error = mptcp_subflow_add(mpte, mpts, p, ifscope);
281 if (error == 0 && pcid != NULL)
282 *pcid = mpts->mpts_connid;
283
284out:
285 if (mpts != NULL) {
286 if ((error != 0) && (error != EWOULDBLOCK)) {
287 MPTS_LOCK(mpts);
288 if (mpts->mpts_flags & MPTSF_ATTACHED) {
289 MPTS_UNLOCK(mpts);
290 MPTS_REMREF(mpts);
291 mptcp_subflow_del(mpte, mpts, TRUE);
292 return (error);
293 }
294 MPTS_UNLOCK(mpts);
295 }
296 MPTS_REMREF(mpts);
297 }
298
299 return (error);
300}
301
302/*
303 * User-protocol pru_connectx callback.
304 */
305static int
306mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl,
307 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
3e170ce0
A
308 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
309 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
39236c6e 310{
3e170ce0 311#pragma unused(arg, arglen, uio, bytes_written)
39236c6e 312 struct mppcb *mpp = sotomppcb(mp_so);
3e170ce0
A
313 struct mptses *mpte = NULL;
314 struct mptcb *mp_tp = NULL;
315
39236c6e
A
316 int error = 0;
317
318 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
319 error = EINVAL;
320 goto out;
321 }
322 mpte = mptompte(mpp);
323 VERIFY(mpte != NULL);
324
3e170ce0
A
325 mp_tp = mpte->mpte_mptcb;
326 VERIFY(mp_tp != NULL);
327
328 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
329 error = EINVAL;
330 goto out;
331 }
332
39236c6e
A
333 error = mptcp_connectx(mpte, src_sl, dst_sl, p, ifscope,
334 aid, pcid, flags, arg, arglen);
335out:
336 return (error);
337}
338
339/*
340 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
341 */
342static int
343mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
344{
345 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
346
347 /* MPTCP has at most 1 association */
3e170ce0 348 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
39236c6e
A
349
350 /* just asking how many there are? */
351 if (aidp == USER_ADDR_NULL)
352 return (0);
353
354 return (copyout(&mpte->mpte_associd, aidp,
355 sizeof (mpte->mpte_associd)));
356}
357
358/*
359 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
360 */
361static int
3e170ce0 362mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
39236c6e
A
363 user_addr_t cidp)
364{
365 struct mptsub *mpts;
366 int error = 0;
367
368 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
369
3e170ce0 370 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
39236c6e
A
371 aid != mpte->mpte_associd)
372 return (EINVAL);
373
374 *cnt = mpte->mpte_numflows;
375
376 /* just asking how many there are? */
377 if (cidp == USER_ADDR_NULL)
378 return (0);
379
380 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
381 if ((error = copyout(&mpts->mpts_connid, cidp,
382 sizeof (mpts->mpts_connid))) != 0)
383 break;
384
385 cidp += sizeof (mpts->mpts_connid);
386 }
387
388 return (error);
389}
390
391/*
392 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
393 */
394static int
3e170ce0 395mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
39236c6e
A
396 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
397 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
398 user_addr_t aux_data, uint32_t *aux_len)
399{
400#pragma unused(aux_data)
401 struct sockaddr_entry *se;
402 struct ifnet *ifp = NULL;
403 struct mptsub *mpts;
404 int error = 0;
405
406 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
407
3e170ce0 408 if (*cid == SAE_CONNID_ALL)
39236c6e
A
409 return (EINVAL);
410
411 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
3e170ce0 412 if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY)
39236c6e
A
413 break;
414 }
415 if (mpts == NULL)
3e170ce0 416 return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL);
39236c6e
A
417
418 MPTS_LOCK(mpts);
419 ifp = mpts->mpts_outif;
420 *cid = mpts->mpts_connid;
421 *ifindex = ((ifp != NULL) ? ifp->if_index : 0);
422 *soerror = mpts->mpts_soerror;
423 *flags = 0;
424 if (mpts->mpts_flags & MPTSF_CONNECTING)
425 *flags |= CIF_CONNECTING;
426 if (mpts->mpts_flags & MPTSF_CONNECTED)
427 *flags |= CIF_CONNECTED;
428 if (mpts->mpts_flags & MPTSF_DISCONNECTING)
429 *flags |= CIF_DISCONNECTING;
430 if (mpts->mpts_flags & MPTSF_DISCONNECTED)
431 *flags |= CIF_DISCONNECTED;
432 if (mpts->mpts_flags & MPTSF_BOUND_IF)
433 *flags |= CIF_BOUND_IF;
434 if (mpts->mpts_flags & MPTSF_BOUND_IP)
435 *flags |= CIF_BOUND_IP;
436 if (mpts->mpts_flags & MPTSF_BOUND_PORT)
437 *flags |= CIF_BOUND_PORT;
438 if (mpts->mpts_flags & MPTSF_PREFERRED)
439 *flags |= CIF_PREFERRED;
440 if (mpts->mpts_flags & MPTSF_MP_CAPABLE)
441 *flags |= CIF_MP_CAPABLE;
442 if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
443 *flags |= CIF_MP_DEGRADED;
444 if (mpts->mpts_flags & MPTSF_MP_READY)
445 *flags |= CIF_MP_READY;
446 if (mpts->mpts_flags & MPTSF_ACTIVE)
447 *flags |= CIF_MP_ACTIVE;
448
449 VERIFY(mpts->mpts_src_sl != NULL);
450 se = TAILQ_FIRST(&mpts->mpts_src_sl->sl_head);
451 VERIFY(se != NULL && se->se_addr != NULL);
452 *src_len = se->se_addr->sa_len;
453 if (src != USER_ADDR_NULL) {
454 error = copyout(se->se_addr, src, se->se_addr->sa_len);
455 if (error != 0)
456 goto out;
457 }
458
459 VERIFY(mpts->mpts_dst_sl != NULL);
460 se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head);
461 VERIFY(se != NULL && se->se_addr != NULL);
462 *dst_len = se->se_addr->sa_len;
463 if (dst != USER_ADDR_NULL) {
464 error = copyout(se->se_addr, dst, se->se_addr->sa_len);
465 if (error != 0)
466 goto out;
467 }
468
469 *aux_type = 0;
470 *aux_len = 0;
471 if (mpts->mpts_socket != NULL) {
472 struct conninfo_tcp tcp_ci;
473
474 *aux_type = CIAUX_TCP;
475 *aux_len = sizeof (tcp_ci);
476
477 if (aux_data != USER_ADDR_NULL) {
478 struct socket *so = mpts->mpts_socket;
479
480 VERIFY(SOCK_PROTO(so) == IPPROTO_TCP);
481 bzero(&tcp_ci, sizeof (tcp_ci));
482 socket_lock(so, 0);
483 tcp_getconninfo(so, &tcp_ci);
484 socket_unlock(so, 0);
485 error = copyout(&tcp_ci, aux_data, sizeof (tcp_ci));
486 if (error != 0)
487 goto out;
488 }
489 }
3e170ce0
A
490 mptcplog((LOG_DEBUG, "MPTCP Socket: "
491 "%s: cid %d flags %x \n",
492 __func__, mpts->mpts_connid, mpts->mpts_flags),
493 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
494
39236c6e
A
495out:
496 MPTS_UNLOCK(mpts);
497 return (error);
498}
499
500/*
501 * Handle SIOCSCONNORDER
502 */
503int
3e170ce0 504mptcp_setconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t rank)
39236c6e
A
505{
506 struct mptsub *mpts, *mpts1;
507 int error = 0;
508
509 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
3e170ce0
A
510 mptcplog((LOG_DEBUG, "MPTCP Socket: "
511 "%s: cid %d rank %d \n", __func__, cid, rank),
512 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
39236c6e 513
3e170ce0 514 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
39236c6e
A
515 error = EINVAL;
516 goto out;
517 }
518
519 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
520 if (mpts->mpts_connid == cid)
521 break;
522 }
523 if (mpts == NULL) {
524 error = ENXIO;
525 goto out;
526 }
527
528 if (rank == 0 || rank > 1) {
529 /*
530 * If rank is 0, determine whether this should be the
531 * primary or backup subflow, depending on what we have.
532 *
533 * Otherwise, if greater than 0, make it a backup flow.
534 */
535 TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) {
536 MPTS_LOCK(mpts1);
537 if (mpts1->mpts_flags & MPTSF_PREFERRED) {
538 MPTS_UNLOCK(mpts1);
539 break;
540 }
541 MPTS_UNLOCK(mpts1);
542 }
543
544 MPTS_LOCK(mpts);
545 mpts->mpts_flags &= ~MPTSF_PREFERRED;
546 mpts->mpts_rank = rank;
547 if (mpts1 != NULL && mpts != mpts1) {
548 /* preferred subflow found; set rank as necessary */
549 if (rank == 0)
550 mpts->mpts_rank = (mpts1->mpts_rank + 1);
551 } else if (rank == 0) {
552 /* no preferred one found; promote this */
553 rank = 1;
554 }
555 MPTS_UNLOCK(mpts);
556 }
557
558 if (rank == 1) {
559 /*
560 * If rank is 1, promote this subflow to be preferred.
561 */
562 TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) {
563 MPTS_LOCK(mpts1);
564 if (mpts1 != mpts &&
565 (mpts1->mpts_flags & MPTSF_PREFERRED)) {
566 mpts1->mpts_flags &= ~MPTSF_PREFERRED;
fe8ab488 567 if (mpte->mpte_nummpcapflows > 1)
39236c6e
A
568 mptcp_connorder_helper(mpts1);
569 } else if (mpts1 == mpts) {
570 mpts1->mpts_rank = 1;
571 if (mpts1->mpts_flags & MPTSF_MP_CAPABLE) {
572 mpts1->mpts_flags |= MPTSF_PREFERRED;
573 if (mpte->mpte_nummpcapflows > 1)
574 mptcp_connorder_helper(mpts1);
575 }
576 }
577 MPTS_UNLOCK(mpts1);
578 }
579 }
580
581out:
582 return (error);
583}
584
585static void
586mptcp_connorder_helper(struct mptsub *mpts)
587{
588 struct socket *so = mpts->mpts_socket;
589 struct tcpcb *tp = NULL;
590
591 socket_lock(so, 0);
592
593 tp = intotcpcb(sotoinpcb(so));
594 tp->t_mpflags |= TMPF_SND_MPPRIO;
595 if (mpts->mpts_flags & MPTSF_PREFERRED)
596 tp->t_mpflags &= ~TMPF_BACKUP_PATH;
597 else
598 tp->t_mpflags |= TMPF_BACKUP_PATH;
3e170ce0 599
39236c6e
A
600 socket_unlock(so, 0);
601
602}
603
604/*
605 * Handle SIOCSGONNORDER
606 */
607int
3e170ce0 608mptcp_getconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t *rank)
39236c6e
A
609{
610 struct mptsub *mpts;
611 int error = 0;
612
613 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
614 VERIFY(rank != NULL);
615 *rank = 0;
616
3e170ce0 617 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
39236c6e
A
618 error = EINVAL;
619 goto out;
620 }
621
622 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
623 if (mpts->mpts_connid == cid)
624 break;
625 }
626 if (mpts == NULL) {
627 error = ENXIO;
628 goto out;
629 }
630
631 MPTS_LOCK(mpts);
632 *rank = mpts->mpts_rank;
633 MPTS_UNLOCK(mpts);
634out:
635 return (error);
636}
637
638/*
639 * User-protocol pru_control callback.
640 */
641static int
642mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
643 struct ifnet *ifp, struct proc *p)
644{
645#pragma unused(ifp, p)
646 struct mppcb *mpp = sotomppcb(mp_so);
647 struct mptses *mpte;
648 int error = 0;
649
650 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
651 error = EINVAL;
652 goto out;
653 }
654 mpte = mptompte(mpp);
655 VERIFY(mpte != NULL);
656
657 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
658
659 switch (cmd) {
660 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
661 struct so_aidreq32 aidr;
662 bcopy(data, &aidr, sizeof (aidr));
663 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
664 aidr.sar_aidp);
665 if (error == 0)
666 bcopy(&aidr, data, sizeof (aidr));
667 break;
668 }
669
670 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
671 struct so_aidreq64 aidr;
672 bcopy(data, &aidr, sizeof (aidr));
673 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
674 aidr.sar_aidp);
675 if (error == 0)
676 bcopy(&aidr, data, sizeof (aidr));
677 break;
678 }
679
680 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
681 struct so_cidreq32 cidr;
682 bcopy(data, &cidr, sizeof (cidr));
683 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
684 cidr.scr_cidp);
685 if (error == 0)
686 bcopy(&cidr, data, sizeof (cidr));
687 break;
688 }
689
690 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
691 struct so_cidreq64 cidr;
692 bcopy(data, &cidr, sizeof (cidr));
693 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
694 cidr.scr_cidp);
695 if (error == 0)
696 bcopy(&cidr, data, sizeof (cidr));
697 break;
698 }
699
700 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
701 struct so_cinforeq32 cifr;
702 bcopy(data, &cifr, sizeof (cifr));
703 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
704 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
705 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
706 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
707 &cifr.scir_aux_len);
708 if (error == 0)
709 bcopy(&cifr, data, sizeof (cifr));
710 break;
711 }
712
713 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
714 struct so_cinforeq64 cifr;
715 bcopy(data, &cifr, sizeof (cifr));
716 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
717 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
718 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
719 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
720 &cifr.scir_aux_len);
721 if (error == 0)
722 bcopy(&cifr, data, sizeof (cifr));
723 break;
724 }
725
726 case SIOCSCONNORDER: { /* struct so_cordreq */
727 struct so_cordreq cor;
728 bcopy(data, &cor, sizeof (cor));
729 error = mptcp_setconnorder(mpte, cor.sco_cid, cor.sco_rank);
730 if (error == 0)
731 bcopy(&cor, data, sizeof (cor));
732 break;
733 }
734
735 case SIOCGCONNORDER: { /* struct so_cordreq */
736 struct so_cordreq cor;
737 bcopy(data, &cor, sizeof (cor));
738 error = mptcp_getconnorder(mpte, cor.sco_cid, &cor.sco_rank);
739 if (error == 0)
740 bcopy(&cor, data, sizeof (cor));
741 break;
742 }
743
744 default:
745 error = EOPNOTSUPP;
746 break;
747 }
748out:
749 return (error);
750}
751
752/*
753 * Initiate a disconnect. MPTCP-level disconnection is specified by
754 * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow
755 * connection while keeping the MPTCP-level connection (association).
756 */
757static int
3e170ce0 758mptcp_disconnectx(struct mptses *mpte, sae_associd_t aid, sae_connid_t cid)
39236c6e
A
759{
760 struct mptsub *mpts;
761 struct socket *mp_so;
762 struct mptcb *mp_tp;
763 int error = 0;
764
765 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
766
767 mp_so = mpte->mpte_mppcb->mpp_socket;
768 mp_tp = mpte->mpte_mptcb;
769
3e170ce0
A
770 mptcplog((LOG_DEBUG, "MPTCP Socket: "
771 "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__,
772 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error),
773 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
39236c6e 774
3e170ce0
A
775 DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, sae_associd_t, aid,
776 sae_connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp);
777
778 VERIFY(aid == SAE_ASSOCID_ANY || aid == SAE_ASSOCID_ALL ||
39236c6e
A
779 aid == mpte->mpte_associd);
780
781 /* terminate the association? */
3e170ce0 782 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
39236c6e
A
783 /* if we're not detached, go thru socket state checks */
784 if (!(mp_so->so_flags & SOF_PCBCLEARING)) {
785 if (!(mp_so->so_state & (SS_ISCONNECTED|
786 SS_ISCONNECTING))) {
787 error = ENOTCONN;
788 goto out;
789 }
790 if (mp_so->so_state & SS_ISDISCONNECTING) {
791 error = EALREADY;
792 goto out;
793 }
794 }
795 MPT_LOCK(mp_tp);
796 mptcp_cancel_all_timers(mp_tp);
797 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
798 (void) mptcp_close(mpte, mp_tp);
799 MPT_UNLOCK(mp_tp);
800 } else if ((mp_so->so_options & SO_LINGER) &&
801 mp_so->so_linger == 0) {
802 (void) mptcp_drop(mpte, mp_tp, 0);
803 MPT_UNLOCK(mp_tp);
804 } else {
805 MPT_UNLOCK(mp_tp);
806 soisdisconnecting(mp_so);
807 sbflush(&mp_so->so_rcv);
808 if (mptcp_usrclosed(mpte) != NULL)
809 (void) mptcp_output(mpte);
810 }
811 } else {
3e170ce0
A
812 bool disconnect_embryonic_subflows = false;
813 struct socket *so = NULL;
814
39236c6e
A
815 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
816 if (mpts->mpts_connid != cid)
817 continue;
3e170ce0 818
39236c6e 819 MPTS_LOCK(mpts);
3e170ce0
A
820 /*
821 * Check if disconnected subflow is the one used
822 * to initiate MPTCP connection.
823 * If it is and the connection is not yet join ready
824 * disconnect all other subflows.
825 */
826 so = mpts->mpts_socket;
827 if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) &&
828 so && !(so->so_flags & SOF_MP_SEC_SUBFLOW)) {
829 disconnect_embryonic_subflows = true;
830 }
831
fe8ab488 832 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
39236c6e
A
833 mptcp_subflow_disconnect(mpte, mpts, FALSE);
834 MPTS_UNLOCK(mpts);
835 break;
836 }
837
838 if (mpts == NULL) {
839 error = EINVAL;
840 goto out;
841 }
3e170ce0
A
842
843 if (disconnect_embryonic_subflows) {
844 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
845 if (mpts->mpts_connid == cid)
846 continue;
847 MPTS_LOCK(mpts);
848 mptcp_subflow_disconnect(mpte, mpts, TRUE);
849 MPTS_UNLOCK(mpts);
850 }
851 }
39236c6e
A
852 }
853
854 if (error == 0)
855 mptcp_thread_signal(mpte);
856
857 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
858 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
859 /* the socket has been shutdown, no more sockopt's */
860 mptcp_flush_sopts(mpte);
861 }
862
863out:
864 return (error);
865}
866
fe8ab488
A
867/*
868 * Wrapper function to support disconnect on socket
869 */
870static int
871mptcp_usr_disconnect(struct socket *mp_so)
872{
873 int error = 0;
874
3e170ce0 875 error = mptcp_usr_disconnectx(mp_so, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
fe8ab488
A
876 return (error);
877}
878
39236c6e
A
879/*
880 * User-protocol pru_disconnectx callback.
881 */
882static int
3e170ce0 883mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
39236c6e
A
884{
885 struct mppcb *mpp = sotomppcb(mp_so);
886 struct mptses *mpte;
887 int error = 0;
888
889 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
890 error = EINVAL;
891 goto out;
892 }
893 mpte = mptompte(mpp);
894 VERIFY(mpte != NULL);
895 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
896
3e170ce0 897 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
39236c6e
A
898 aid != mpte->mpte_associd) {
899 error = EINVAL;
900 goto out;
901 }
902
903 error = mptcp_disconnectx(mpte, aid, cid);
904out:
905 return (error);
906}
907
908/*
909 * User issued close, and wish to trail thru shutdown states.
910 */
911static struct mptses *
912mptcp_usrclosed(struct mptses *mpte)
913{
914 struct socket *mp_so;
915 struct mptcb *mp_tp;
916 struct mptsub *mpts;
917
918 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
919 mp_so = mpte->mpte_mppcb->mpp_socket;
920 mp_tp = mpte->mpte_mptcb;
921
922 MPT_LOCK(mp_tp);
923 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
924
fe8ab488 925 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
39236c6e
A
926 mpte = mptcp_close(mpte, mp_tp);
927 MPT_UNLOCK(mp_tp);
928 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
929 MPT_UNLOCK(mp_tp);
930 soisdisconnected(mp_so);
fe8ab488
A
931 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
932 MPTS_LOCK(mpts);
933 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
934 MPTS_UNLOCK(mpts);
935 }
39236c6e 936 } else {
39236c6e
A
937 MPT_UNLOCK(mp_tp);
938
939 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
940 MPTS_LOCK(mpts);
fe8ab488 941 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
39236c6e
A
942 mptcp_subflow_disconnect(mpte, mpts, FALSE);
943 MPTS_UNLOCK(mpts);
944 }
945 }
39236c6e
A
946
947 return (mpte);
948}
949
950/*
951 * User-protocol pru_peeloff callback.
952 */
953static int
3e170ce0 954mptcp_usr_peeloff(struct socket *mp_so, sae_associd_t aid, struct socket **psop)
39236c6e
A
955{
956 struct mppcb *mpp = sotomppcb(mp_so);
957 struct mptses *mpte;
958 int error = 0;
959
960 VERIFY(psop != NULL);
961
962 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
963 error = EINVAL;
964 goto out;
965 }
966 mpte = mptompte(mpp);
967 VERIFY(mpte != NULL);
968
969 error = mptcp_peeloff(mpte, aid, psop);
970out:
971 return (error);
972}
973
974/*
975 * Transform a previously connected TCP subflow connection which has
976 * failed to negotiate MPTCP to its own socket which can be externalized
977 * with a file descriptor. Valid only when the MPTCP socket is not
978 * yet associated (MPTCP-level connection has not been established.)
979 */
980static int
3e170ce0 981mptcp_peeloff(struct mptses *mpte, sae_associd_t aid, struct socket **psop)
39236c6e
A
982{
983 struct socket *so = NULL, *mp_so;
984 struct mptsub *mpts;
985 int error = 0;
986
987 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
988 mp_so = mpte->mpte_mppcb->mpp_socket;
989
990 VERIFY(psop != NULL);
991 *psop = NULL;
992
3e170ce0 993 DTRACE_MPTCP3(peeloff, struct mptses *, mpte, sae_associd_t, aid,
39236c6e
A
994 struct socket *, mp_so);
995
996 /* peeloff cannot happen after an association is established */
3e170ce0 997 if (mpte->mpte_associd != SAE_ASSOCID_ANY) {
39236c6e
A
998 error = EINVAL;
999 goto out;
1000 }
1001
3e170ce0 1002 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
39236c6e
A
1003 error = EINVAL;
1004 goto out;
1005 }
1006
1007 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1008 MPTS_LOCK(mpts);
1009 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
1010 panic("%s: so %p is MPTCP capable but mp_so %p "
1011 "aid is %d\n", __func__, so, mp_so,
1012 mpte->mpte_associd);
1013 /* NOTREACHED */
1014 }
1015 MPTS_ADDREF_LOCKED(mpts); /* for us */
1016 so = mpts->mpts_socket;
1017 VERIFY(so != NULL);
1018 /*
1019 * This subflow socket is about to be externalized; make it
1020 * appear as if it has the same properties as the MPTCP socket,
1021 * undo what's done earlier in mptcp_subflow_add().
1022 */
1023 mptcp_subflow_sopeeloff(mpte, mpts, so);
1024 MPTS_UNLOCK(mpts);
1025
1026 mptcp_subflow_del(mpte, mpts, FALSE);
1027 MPTS_REMREF(mpts); /* ours */
1028 /*
1029 * XXX adi@apple.com
1030 *
1031 * Here we need to make sure the subflow socket is not
1032 * flow controlled; need to clear both INP_FLOW_CONTROLLED
1033 * and INP_FLOW_SUSPENDED on the subflow socket, since
1034 * we will no longer be monitoring its events.
1035 */
1036 break;
1037 }
1038
1039 if (so == NULL) {
1040 error = EINVAL;
1041 goto out;
1042 }
1043 *psop = so;
1044
3e170ce0
A
1045 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1046 "%s: mp_so 0x%llx\n", __func__,
1047 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
1048 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1049
39236c6e
A
1050out:
1051 return (error);
1052}
1053
1054/*
1055 * After a receive, possible send some update to peer.
1056 */
1057static int
1058mptcp_usr_rcvd(struct socket *mp_so, int flags)
1059{
1060#pragma unused(flags)
1061 struct mppcb *mpp = sotomppcb(mp_so);
1062 struct mptses *mpte;
1063 int error = 0;
1064
1065 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1066 error = EINVAL;
1067 goto out;
1068 }
1069 mpte = mptompte(mpp);
1070 VERIFY(mpte != NULL);
1071
1072 error = mptcp_output(mpte);
1073out:
1074 return (error);
1075}
1076
1077/*
1078 * Do a send by putting data in the output queue.
1079 */
1080static int
1081mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1082 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1083{
1084#pragma unused(nam, p)
1085 struct mppcb *mpp = sotomppcb(mp_so);
1086 struct mptses *mpte;
1087 int error = 0;
1088
1089 if (prus_flags & (PRUS_OOB|PRUS_EOF)) {
1090 error = EOPNOTSUPP;
1091 goto out;
1092 }
1093
1094 if (nam != NULL) {
1095 error = EOPNOTSUPP;
1096 goto out;
1097 }
1098
1099 if (control != NULL && control->m_len != 0) {
1100 error = EOPNOTSUPP;
1101 goto out;
1102 }
1103
1104 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1105 error = ECONNRESET;
1106 goto out;
1107 }
1108 mpte = mptompte(mpp);
1109 VERIFY(mpte != NULL);
1110
1111 if (!(mp_so->so_state & SS_ISCONNECTED)) {
1112 error = ENOTCONN;
1113 goto out;
1114 }
1115
1116 mptcp_insert_dsn(mpp, m);
1117 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1118 (void) sbappendstream(&mp_so->so_snd, m);
1119 m = NULL;
1120
1121 if (mpte != NULL) {
1122 /*
1123 * XXX: adi@apple.com
1124 *
1125 * PRUS_MORETOCOME could be set, but we don't check it now.
1126 */
1127 error = mptcp_output(mpte);
1128 }
1129
1130out:
1131 if (error) {
1132 if (m != NULL)
1133 m_freem(m);
1134 if (control != NULL)
1135 m_freem(control);
1136 }
1137 return (error);
1138}
1139
1140/*
1141 * Mark the MPTCP connection as being incapable of further output.
1142 */
1143static int
1144mptcp_usr_shutdown(struct socket *mp_so)
1145{
1146 struct mppcb *mpp = sotomppcb(mp_so);
1147 struct mptses *mpte;
1148 int error = 0;
1149
1150 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1151 error = EINVAL;
1152 goto out;
1153 }
1154 mpte = mptompte(mpp);
1155 VERIFY(mpte != NULL);
1156
1157 socantsendmore(mp_so);
1158
1159 mpte = mptcp_usrclosed(mpte);
1160 if (mpte != NULL)
1161 error = mptcp_output(mpte);
1162out:
1163 return (error);
1164}
1165
1166/*
1167 * Copy the contents of uio into a properly sized mbuf chain.
1168 */
1169static int
1170mptcp_uiotombuf(struct uio *uio, int how, int space, uint32_t align,
1171 struct mbuf **top)
1172{
1173 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1174 user_ssize_t resid, tot, len, progress; /* must be user_ssize_t */
1175 int error;
1176
1177 VERIFY(top != NULL && *top == NULL);
1178
1179 /*
1180 * space can be zero or an arbitrary large value bound by
1181 * the total data supplied by the uio.
1182 */
1183 resid = uio_resid(uio);
1184 if (space > 0)
1185 tot = imin(resid, space);
1186 else
1187 tot = resid;
1188
1189 /*
1190 * The smallest unit is a single mbuf with pkthdr.
1191 * We can't align past it.
1192 */
1193 if (align >= MHLEN)
1194 return (EINVAL);
1195
1196 /*
1197 * Give us the full allocation or nothing.
1198 * If space is zero return the smallest empty mbuf.
1199 */
1200 if ((len = tot + align) == 0)
1201 len = 1;
1202
1203 /* Loop and append maximum sized mbufs to the chain tail. */
1204 while (len > 0) {
1205 uint32_t m_needed = 1;
1206
1207 if (njcl > 0 && len > MBIGCLBYTES)
1208 mb = m_getpackets_internal(&m_needed, 1,
1209 how, 1, M16KCLBYTES);
1210 else if (len > MCLBYTES)
1211 mb = m_getpackets_internal(&m_needed, 1,
1212 how, 1, MBIGCLBYTES);
1213 else if (len >= (signed)MINCLSIZE)
1214 mb = m_getpackets_internal(&m_needed, 1,
1215 how, 1, MCLBYTES);
1216 else
1217 mb = m_gethdr(how, MT_DATA);
1218
1219 /* Fail the whole operation if one mbuf can't be allocated. */
1220 if (mb == NULL) {
1221 if (nm != NULL)
1222 m_freem(nm);
1223 return (ENOBUFS);
1224 }
1225
1226 /* Book keeping. */
1227 VERIFY(mb->m_flags & M_PKTHDR);
1228 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1229 if (mtail != NULL)
1230 mtail->m_next = mb;
1231 else
1232 nm = mb;
1233 mtail = mb;
1234 }
1235
1236 m = nm;
1237 m->m_data += align;
1238
1239 progress = 0;
1240 /* Fill all mbufs with uio data and update header information. */
1241 for (mb = m; mb != NULL; mb = mb->m_next) {
1242 len = imin(M_TRAILINGSPACE(mb), tot - progress);
1243
1244 error = uiomove(mtod(mb, char *), len, uio);
1245 if (error != 0) {
1246 m_freem(m);
1247 return (error);
1248 }
1249
1250 /* each mbuf is M_PKTHDR chained via m_next */
1251 mb->m_len = len;
1252 mb->m_pkthdr.len = len;
1253
1254 progress += len;
1255 }
1256 VERIFY(progress == tot);
1257 *top = m;
1258 return (0);
1259}
1260
1261/*
1262 * MPTCP socket protocol-user socket send routine, derived from sosend().
1263 */
1264static int
1265mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1266 struct mbuf *top, struct mbuf *control, int flags)
1267{
1268#pragma unused(addr)
1269 int32_t space;
1270 user_ssize_t resid;
1271 int error, sendflags;
1272 struct proc *p = current_proc();
1273 int sblocked = 0;
1274
1275 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1276 if (uio == NULL || top != NULL) {
1277 error = EINVAL;
1278 goto out;
1279 }
1280 resid = uio_resid(uio);
1281
1282 socket_lock(mp_so, 1);
1283 so_update_last_owner_locked(mp_so, p);
1284 so_update_policy(mp_so);
1285
1286 VERIFY(mp_so->so_type == SOCK_STREAM);
1287 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1288
1289 if ((flags & (MSG_OOB|MSG_DONTROUTE|MSG_HOLD|MSG_SEND|MSG_FLUSH)) ||
1290 (mp_so->so_flags & SOF_ENABLE_MSGS)) {
1291 error = EOPNOTSUPP;
1292 socket_unlock(mp_so, 1);
1293 goto out;
1294 }
1295
1296 /*
1297 * In theory resid should be unsigned. However, space must be
1298 * signed, as it might be less than 0 if we over-committed, and we
1299 * must use a signed comparison of space and resid. On the other
1300 * hand, a negative resid causes us to loop sending 0-length
1301 * segments to the protocol.
1302 */
1303 if (resid < 0 || (flags & MSG_EOR) || control != NULL) {
1304 error = EINVAL;
1305 socket_unlock(mp_so, 1);
1306 goto out;
1307 }
1308
1309 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1310
1311 do {
1312 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1313 &sblocked, NULL);
1314 if (error != 0)
1315 goto release;
1316
1317 space = sbspace(&mp_so->so_snd);
1318 do {
1319 socket_unlock(mp_so, 0);
1320 /*
1321 * Copy the data from userland into an mbuf chain.
1322 */
1323 error = mptcp_uiotombuf(uio, M_WAITOK, space, 0, &top);
1324 if (error != 0) {
1325 socket_lock(mp_so, 0);
1326 goto release;
1327 }
1328 VERIFY(top != NULL);
1329 space -= resid - uio_resid(uio);
1330 resid = uio_resid(uio);
1331 socket_lock(mp_so, 0);
1332
1333 /*
1334 * Compute flags here, for pru_send and NKEs.
1335 */
1336 sendflags = (resid > 0 && space > 0) ?
1337 PRUS_MORETOCOME : 0;
1338
1339 /*
1340 * Socket filter processing
1341 */
1342 VERIFY(control == NULL);
1343 error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1344 if (error != 0) {
1345 if (error == EJUSTRETURN) {
1346 error = 0;
1347 top = NULL;
1348 /* always free control if any */
1349 }
1350 goto release;
1351 }
1352 if (control != NULL) {
1353 m_freem(control);
1354 control = NULL;
1355 }
1356
1357 /*
1358 * Pass data to protocol.
1359 */
1360 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1361 (mp_so, sendflags, top, NULL, NULL, p);
1362
1363 top = NULL;
1364 if (error != 0)
1365 goto release;
1366 } while (resid != 0 && space > 0);
1367 } while (resid != 0);
1368
1369release:
1370 if (sblocked)
1371 sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1372 else
1373 socket_unlock(mp_so, 1);
1374out:
1375 if (top != NULL)
1376 m_freem(top);
1377 if (control != NULL)
1378 m_freem(control);
1379
1380 return (error);
1381}
1382
1383/*
1384 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1385 * This routine simply indicates to the caller whether or not to proceed
1386 * further with the given socket option. This is invoked by sosetoptlock()
1387 * and sogetoptlock().
1388 */
1389static int
1390mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1391{
1392#pragma unused(mp_so)
1393 int error = 0;
1394
1395 VERIFY(sopt->sopt_level == SOL_SOCKET);
1396
1397 /*
1398 * We could check for sopt_dir (set/get) here, but we'll just
1399 * let the caller deal with it as appropriate; therefore the
1400 * following is a superset of the socket options which we
1401 * allow for set/get.
1402 *
1403 * XXX: adi@apple.com
1404 *
1405 * Need to consider the following cases:
1406 *
1407 * a. In the event peeloff(2) occurs on the subflow socket,
1408 * we may want to issue those options which are now
1409 * handled at the MP socket. In that case, we will need
1410 * to record them in mptcp_setopt() so that they can
1411 * be replayed during peeloff.
1412 *
1413 * b. Certain socket options don't have a clear definition
1414 * on the expected behavior post connect(2). At the time
1415 * those options are issued on the MP socket, there may
1416 * be existing subflow sockets that are already connected.
1417 */
1418 switch (sopt->sopt_name) {
1419 case SO_LINGER: /* MP */
1420 case SO_LINGER_SEC: /* MP */
1421 case SO_TYPE: /* MP */
1422 case SO_NREAD: /* MP */
1423 case SO_NWRITE: /* MP */
1424 case SO_ERROR: /* MP */
1425 case SO_SNDBUF: /* MP */
1426 case SO_RCVBUF: /* MP */
1427 case SO_SNDLOWAT: /* MP */
1428 case SO_RCVLOWAT: /* MP */
1429 case SO_SNDTIMEO: /* MP */
1430 case SO_RCVTIMEO: /* MP */
1431 case SO_NKE: /* MP */
1432 case SO_NOSIGPIPE: /* MP */
1433 case SO_NOADDRERR: /* MP */
1434 case SO_LABEL: /* MP */
1435 case SO_PEERLABEL: /* MP */
1436 case SO_DEFUNCTOK: /* MP */
1437 case SO_ISDEFUNCT: /* MP */
1438 case SO_TRAFFIC_CLASS_DBG: /* MP */
1439 /*
1440 * Tell the caller that these options are to be processed.
1441 */
1442 break;
1443
1444 case SO_DEBUG: /* MP + subflow */
1445 case SO_KEEPALIVE: /* MP + subflow */
1446 case SO_USELOOPBACK: /* MP + subflow */
1447 case SO_RANDOMPORT: /* MP + subflow */
1448 case SO_TRAFFIC_CLASS: /* MP + subflow */
1449 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1450 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1451 case SO_RECV_ANYIF: /* MP + subflow */
1452 case SO_RESTRICTIONS: /* MP + subflow */
1453 case SO_FLUSH: /* MP + subflow */
fe8ab488
A
1454 case SO_MPTCP_FASTJOIN: /* MP + subflow */
1455 case SO_NOWAKEFROMSLEEP:
39236c6e
A
1456 /*
1457 * Tell the caller that these options are to be processed;
1458 * these will also be recorded later by mptcp_setopt().
1459 *
1460 * NOTE: Only support integer option value for now.
1461 */
1462 if (sopt->sopt_valsize != sizeof (int))
1463 error = EINVAL;
1464 break;
1465
1466 default:
1467 /*
1468 * Tell the caller to stop immediately and return an error.
1469 */
1470 error = ENOPROTOOPT;
1471 break;
1472 }
1473
1474 return (error);
1475}
1476
1477/*
1478 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1479 */
1480static int
1481mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1482{
1483 struct socket *mp_so;
1484 struct mptsub *mpts;
1485 struct mptopt smpo;
1486 int error = 0;
1487
1488 /* just bail now if this isn't applicable to subflow sockets */
1489 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1490 error = ENOPROTOOPT;
1491 goto out;
1492 }
1493
1494 /*
1495 * Skip those that are handled internally; these options
1496 * should not have been recorded and marked with the
1497 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1498 */
1499 if (mpo->mpo_level == SOL_SOCKET &&
1500 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1501 error = ENOPROTOOPT;
1502 goto out;
1503 }
1504
1505 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1506 mp_so = mpte->mpte_mppcb->mpp_socket;
1507
1508 /*
1509 * Don't bother going further if there's no subflow; mark the option
1510 * with MPOF_INTERIM so that we know whether or not to remove this
1511 * option upon encountering an error while issuing it during subflow
1512 * socket creation.
1513 */
1514 if (mpte->mpte_numflows == 0) {
1515 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1516 mpo->mpo_flags |= MPOF_INTERIM;
1517 /* return success */
1518 goto out;
1519 }
1520
1521 bzero(&smpo, sizeof (smpo));
1522 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1523 smpo.mpo_level = mpo->mpo_level;
1524 smpo.mpo_name = mpo->mpo_name;
1525
1526 /* grab exisiting values in case we need to rollback */
1527 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1528 struct socket *so;
1529
1530 MPTS_LOCK(mpts);
1531 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG);
1532 mpts->mpts_oldintval = 0;
1533 smpo.mpo_intval = 0;
1534 VERIFY(mpts->mpts_socket != NULL);
1535 so = mpts->mpts_socket;
1536 socket_lock(so, 0);
1537 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1538 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1539 mpts->mpts_oldintval = smpo.mpo_intval;
1540 }
1541 socket_unlock(so, 0);
1542 MPTS_UNLOCK(mpts);
1543 }
1544
1545 /* apply socket option */
1546 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1547 struct socket *so;
1548
1549 MPTS_LOCK(mpts);
1550 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1551 VERIFY(mpts->mpts_socket != NULL);
1552 so = mpts->mpts_socket;
1553 socket_lock(so, 0);
1554 error = mptcp_subflow_sosetopt(mpte, so, mpo);
1555 socket_unlock(so, 0);
1556 MPTS_UNLOCK(mpts);
1557 if (error != 0)
1558 break;
1559 }
1560
1561 /* cleanup, and rollback if needed */
1562 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1563 struct socket *so;
1564
1565 MPTS_LOCK(mpts);
1566 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1567 /* clear in case it's set */
1568 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1569 mpts->mpts_oldintval = 0;
1570 MPTS_UNLOCK(mpts);
1571 continue;
1572 }
1573 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1574 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1575 VERIFY(mpts->mpts_oldintval == 0);
1576 MPTS_UNLOCK(mpts);
1577 continue;
1578 }
1579 /* error during sosetopt, so roll it back */
1580 if (error != 0) {
1581 VERIFY(mpts->mpts_socket != NULL);
1582 so = mpts->mpts_socket;
1583 socket_lock(so, 0);
1584 smpo.mpo_intval = mpts->mpts_oldintval;
1585 (void) mptcp_subflow_sosetopt(mpte, so, &smpo);
1586 socket_unlock(so, 0);
1587 }
1588 mpts->mpts_oldintval = 0;
1589 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG);
1590 MPTS_UNLOCK(mpts);
1591 }
1592
1593out:
1594 return (error);
1595}
1596
1597/*
1598 * Handle SOPT_SET for socket options issued on MP socket.
1599 */
1600static int
1601mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1602{
1603 int error = 0, optval, level, optname, rec = 1;
1604 struct mptopt smpo, *mpo = NULL;
1605 struct socket *mp_so;
1606 char buf[32];
1607
1608 level = sopt->sopt_level;
1609 optname = sopt->sopt_name;
1610
1611 VERIFY(sopt->sopt_dir == SOPT_SET);
1612 VERIFY(level == SOL_SOCKET || level == IPPROTO_TCP);
1613 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1614 mp_so = mpte->mpte_mppcb->mpp_socket;
1615
1616 /*
1617 * Record socket options which are applicable to subflow sockets so
1618 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1619 * for the list of eligible socket-level options.
1620 */
1621 if (level == SOL_SOCKET) {
1622 switch (optname) {
1623 case SO_DEBUG:
1624 case SO_KEEPALIVE:
1625 case SO_USELOOPBACK:
1626 case SO_RANDOMPORT:
1627 case SO_TRAFFIC_CLASS:
1628 case SO_RECV_TRAFFIC_CLASS:
1629 case SO_PRIVILEGED_TRAFFIC_CLASS:
1630 case SO_RECV_ANYIF:
1631 case SO_RESTRICTIONS:
fe8ab488
A
1632 case SO_NOWAKEFROMSLEEP:
1633 case SO_MPTCP_FASTJOIN:
39236c6e
A
1634 /* record it */
1635 break;
1636 case SO_FLUSH:
1637 /* don't record it */
1638 rec = 0;
1639 break;
1640 default:
1641 /* nothing to do; just return success */
1642 goto out;
1643 }
1644 } else {
1645 switch (optname) {
1646 case TCP_NODELAY:
1647 case TCP_RXT_FINDROP:
1648 case TCP_KEEPALIVE:
1649 case TCP_KEEPINTVL:
1650 case TCP_KEEPCNT:
1651 case TCP_CONNECTIONTIMEOUT:
1652 case TCP_RXT_CONNDROPTIME:
1653 case PERSIST_TIMEOUT:
1654 /* eligible; record it */
1655 break;
fe8ab488
A
1656 case TCP_NOTSENT_LOWAT:
1657 /* record at MPTCP level */
1658 error = sooptcopyin(sopt, &optval, sizeof(optval),
1659 sizeof(optval));
1660 if (error)
1661 goto out;
1662 if (optval < 0) {
1663 error = EINVAL;
1664 goto out;
1665 } else {
1666 if (optval == 0) {
1667 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1668 error = mptcp_set_notsent_lowat(mpte,0);
1669 } else {
1670 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1671 error = mptcp_set_notsent_lowat(mpte,
1672 optval);
1673 }
1674 }
1675 goto out;
39236c6e
A
1676 default:
1677 /* not eligible */
1678 error = ENOPROTOOPT;
1679 goto out;
1680 }
1681 }
1682
1683 if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
1684 sizeof (optval))) != 0)
1685 goto out;
1686
1687 if (rec) {
1688 /* search for an existing one; if not found, allocate */
1689 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL)
1690 mpo = mptcp_sopt_alloc(M_WAITOK);
1691
1692 if (mpo == NULL) {
1693 error = ENOBUFS;
1694 } else {
3e170ce0
A
1695 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1696 "%s: mp_so 0x%llx sopt %s "
39236c6e
A
1697 "val %d %s\n", __func__,
1698 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1699 mptcp_sopt2str(level, optname, buf,
1700 sizeof (buf)), optval,
1701 (mpo->mpo_flags & MPOF_ATTACHED) ?
3e170ce0
A
1702 "updated" : "recorded"),
1703 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
39236c6e
A
1704
1705 /* initialize or update, as needed */
1706 mpo->mpo_intval = optval;
1707 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1708 mpo->mpo_level = level;
1709 mpo->mpo_name = optname;
1710 mptcp_sopt_insert(mpte, mpo);
1711 }
1712 VERIFY(mpo->mpo_flags & MPOF_ATTACHED);
1713 /* this can be issued on the subflow socket */
1714 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1715 }
1716 } else {
1717 bzero(&smpo, sizeof (smpo));
1718 mpo = &smpo;
1719 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1720 mpo->mpo_level = level;
1721 mpo->mpo_name = optname;
1722 mpo->mpo_intval = optval;
1723 }
1724 VERIFY(mpo == NULL || error == 0);
1725
1726 /* issue this socket option on existing subflows */
1727 if (error == 0) {
1728 error = mptcp_setopt_apply(mpte, mpo);
1729 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1730 VERIFY(mpo != &smpo);
1731 mptcp_sopt_remove(mpte, mpo);
1732 mptcp_sopt_free(mpo);
1733 }
1734 if (mpo == &smpo)
1735 mpo->mpo_flags &= ~MPOF_INTERIM;
1736 }
1737out:
1738 if (error == 0 && mpo != NULL) {
3e170ce0
A
1739 mptcplog((LOG_ERR, "MPTCP Socket: "
1740 "%s: mp_so 0x%llx sopt %s val %d set %s\n",
39236c6e
A
1741 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1742 mptcp_sopt2str(level, optname, buf,
1743 sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ?
3e170ce0
A
1744 "pending" : "successful"),
1745 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
39236c6e 1746 } else if (error != 0) {
3e170ce0
A
1747 mptcplog((LOG_ERR, "MPTCP Socket: "
1748 "%s: mp_so 0x%llx sopt %s can't be issued "
39236c6e
A
1749 "error %d\n", __func__,
1750 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level,
3e170ce0
A
1751 optname, buf, sizeof (buf)), error),
1752 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
39236c6e
A
1753 }
1754 return (error);
1755}
1756
1757/*
1758 * Handle SOPT_GET for socket options issued on MP socket.
1759 */
1760static int
1761mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
1762{
1763 int error = 0, optval;
1764
1765 VERIFY(sopt->sopt_dir == SOPT_GET);
1766 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1767
1768 /*
1769 * We only handle SOPT_GET for TCP level socket options; we should
1770 * not get here for socket level options since they are already
1771 * handled at the socket layer.
1772 */
1773 if (sopt->sopt_level != IPPROTO_TCP) {
1774 error = ENOPROTOOPT;
1775 goto out;
1776 }
1777
1778 switch (sopt->sopt_name) {
1779 case TCP_NODELAY:
1780 case TCP_RXT_FINDROP:
1781 case TCP_KEEPALIVE:
1782 case TCP_KEEPINTVL:
1783 case TCP_KEEPCNT:
1784 case TCP_CONNECTIONTIMEOUT:
1785 case TCP_RXT_CONNDROPTIME:
1786 case PERSIST_TIMEOUT:
fe8ab488 1787 case TCP_NOTSENT_LOWAT:
39236c6e
A
1788 /* eligible; get the default value just in case */
1789 error = mptcp_default_tcp_optval(mpte, sopt, &optval);
1790 break;
1791 default:
1792 /* not eligible */
1793 error = ENOPROTOOPT;
1794 break;
1795 }
1796
fe8ab488
A
1797 switch (sopt->sopt_name) {
1798 case TCP_NOTSENT_LOWAT:
1799 if (mpte->mpte_mppcb->mpp_socket->so_flags & SOF_NOTSENT_LOWAT)
1800 optval = mptcp_get_notsent_lowat(mpte);
1801 else
1802 optval = 0;
1803 goto out;
1804 }
1805
39236c6e
A
1806 /*
1807 * Search for a previously-issued TCP level socket option and
1808 * return the recorded option value. This assumes that the
1809 * value did not get modified by the lower layer after it was
1810 * issued at setsockopt(2) time. If not found, we'll return
1811 * the default value obtained ealier.
1812 */
1813 if (error == 0) {
1814 struct mptopt *mpo;
1815
1816 if ((mpo = mptcp_sopt_find(mpte, sopt)) != NULL)
1817 optval = mpo->mpo_intval;
1818
1819 error = sooptcopyout(sopt, &optval, sizeof (int));
1820 }
1821out:
1822 return (error);
1823}
1824
1825/*
1826 * Return default values for TCP socket options. Ideally we would query the
1827 * subflow TCP socket, but that requires creating a subflow socket before
1828 * connectx(2) time. To simplify things, just return the default values
1829 * that we know of.
1830 */
1831static int
1832mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval)
1833{
1834 int error = 0;
1835
1836 VERIFY(sopt->sopt_level == IPPROTO_TCP);
1837 VERIFY(sopt->sopt_dir == SOPT_GET);
1838 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1839
1840 /* try to do what tcp_newtcpcb() does */
1841 switch (sopt->sopt_name) {
1842 case TCP_NODELAY:
1843 case TCP_RXT_FINDROP:
1844 case TCP_KEEPINTVL:
1845 case TCP_KEEPCNT:
1846 case TCP_CONNECTIONTIMEOUT:
1847 case TCP_RXT_CONNDROPTIME:
fe8ab488 1848 case TCP_NOTSENT_LOWAT:
39236c6e
A
1849 *optval = 0;
1850 break;
1851
1852 case TCP_KEEPALIVE:
1853 *optval = mptcp_subflow_keeptime;
1854 break;
1855
1856 case PERSIST_TIMEOUT:
1857 *optval = tcp_max_persist_timeout;
1858 break;
1859
1860 default:
1861 error = ENOPROTOOPT;
1862 break;
1863 }
1864 return (error);
1865}
1866
1867/*
1868 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1869 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1870 * to those that are allowed by mptcp_usr_socheckopt().
1871 */
1872int
1873mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
1874{
1875 struct mppcb *mpp = sotomppcb(mp_so);
1876 struct mptses *mpte;
1877 int error = 0;
1878
1879 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1880 error = EINVAL;
1881 goto out;
1882 }
1883 mpte = mptompte(mpp);
1884 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1885
1886 /* we only handle socket and TCP-level socket options for MPTCP */
1887 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
1888 char buf[32];
3e170ce0
A
1889 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1890 "%s: mp_so 0x%llx sopt %s level not "
39236c6e
A
1891 "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1892 mptcp_sopt2str(sopt->sopt_level,
3e170ce0
A
1893 sopt->sopt_name, buf, sizeof (buf))),
1894 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
39236c6e
A
1895 error = EINVAL;
1896 goto out;
1897 }
1898
1899 switch (sopt->sopt_dir) {
1900 case SOPT_SET:
1901 error = mptcp_setopt(mpte, sopt);
1902 break;
1903
1904 case SOPT_GET:
1905 error = mptcp_getopt(mpte, sopt);
1906 break;
1907 }
1908out:
1909 return (error);
1910}
1911
1912/*
1913 * Return a string representation of <sopt_level,sopt_name>
1914 */
1915const char *
1916mptcp_sopt2str(int level, int optname, char *dst, int size)
1917{
1918 char lbuf[32], obuf[32];
1919 const char *l = lbuf, *o = obuf;
1920
1921 (void) snprintf(lbuf, sizeof (lbuf), "0x%x", level);
1922 (void) snprintf(obuf, sizeof (obuf), "0x%x", optname);
1923
1924 switch (level) {
1925 case SOL_SOCKET:
1926 l = "SOL_SOCKET";
1927 switch (optname) {
1928 case SO_LINGER:
1929 o = "SO_LINGER";
1930 break;
1931 case SO_LINGER_SEC:
1932 o = "SO_LINGER_SEC";
1933 break;
1934 case SO_DEBUG:
1935 o = "SO_DEBUG";
1936 break;
1937 case SO_KEEPALIVE:
1938 o = "SO_KEEPALIVE";
1939 break;
1940 case SO_USELOOPBACK:
1941 o = "SO_USELOOPBACK";
1942 break;
1943 case SO_TYPE:
1944 o = "SO_TYPE";
1945 break;
1946 case SO_NREAD:
1947 o = "SO_NREAD";
1948 break;
1949 case SO_NWRITE:
1950 o = "SO_NWRITE";
1951 break;
1952 case SO_ERROR:
1953 o = "SO_ERROR";
1954 break;
1955 case SO_SNDBUF:
1956 o = "SO_SNDBUF";
1957 break;
1958 case SO_RCVBUF:
1959 o = "SO_RCVBUF";
1960 break;
1961 case SO_SNDLOWAT:
1962 o = "SO_SNDLOWAT";
1963 break;
1964 case SO_RCVLOWAT:
1965 o = "SO_RCVLOWAT";
1966 break;
1967 case SO_SNDTIMEO:
1968 o = "SO_SNDTIMEO";
1969 break;
1970 case SO_RCVTIMEO:
1971 o = "SO_RCVTIMEO";
1972 break;
1973 case SO_NKE:
1974 o = "SO_NKE";
1975 break;
1976 case SO_NOSIGPIPE:
1977 o = "SO_NOSIGPIPE";
1978 break;
1979 case SO_NOADDRERR:
1980 o = "SO_NOADDRERR";
1981 break;
1982 case SO_RESTRICTIONS:
1983 o = "SO_RESTRICTIONS";
1984 break;
1985 case SO_LABEL:
1986 o = "SO_LABEL";
1987 break;
1988 case SO_PEERLABEL:
1989 o = "SO_PEERLABEL";
1990 break;
1991 case SO_RANDOMPORT:
1992 o = "SO_RANDOMPORT";
1993 break;
1994 case SO_TRAFFIC_CLASS:
1995 o = "SO_TRAFFIC_CLASS";
1996 break;
1997 case SO_RECV_TRAFFIC_CLASS:
1998 o = "SO_RECV_TRAFFIC_CLASS";
1999 break;
2000 case SO_TRAFFIC_CLASS_DBG:
2001 o = "SO_TRAFFIC_CLASS_DBG";
2002 break;
2003 case SO_PRIVILEGED_TRAFFIC_CLASS:
2004 o = "SO_PRIVILEGED_TRAFFIC_CLASS";
2005 break;
2006 case SO_DEFUNCTOK:
2007 o = "SO_DEFUNCTOK";
2008 break;
2009 case SO_ISDEFUNCT:
2010 o = "SO_ISDEFUNCT";
2011 break;
2012 case SO_OPPORTUNISTIC:
2013 o = "SO_OPPORTUNISTIC";
2014 break;
2015 case SO_FLUSH:
2016 o = "SO_FLUSH";
2017 break;
2018 case SO_RECV_ANYIF:
2019 o = "SO_RECV_ANYIF";
2020 break;
fe8ab488
A
2021 case SO_NOWAKEFROMSLEEP:
2022 o = "SO_NOWAKEFROMSLEEP";
2023 break;
2024 case SO_MPTCP_FASTJOIN:
2025 o = "SO_MPTCP_FASTJOIN";
2026 break;
39236c6e
A
2027 }
2028 break;
2029 case IPPROTO_TCP:
2030 l = "IPPROTO_TCP";
2031 switch (optname) {
2032 case TCP_KEEPALIVE:
2033 o = "TCP_KEEPALIVE";
2034 break;
2035 case TCP_KEEPINTVL:
2036 o = "TCP_KEEPINTVL";
2037 break;
2038 case TCP_KEEPCNT:
2039 o = "TCP_KEEPCNT";
2040 break;
2041 case TCP_CONNECTIONTIMEOUT:
2042 o = "TCP_CONNECTIONTIMEOUT";
2043 break;
2044 case TCP_RXT_CONNDROPTIME:
2045 o = "TCP_RXT_CONNDROPTIME";
2046 break;
2047 case PERSIST_TIMEOUT:
2048 o = "PERSIST_TIMEOUT";
2049 break;
2050 }
2051 break;
2052 }
2053
2054 (void) snprintf(dst, size, "<%s,%s>", l, o);
2055 return (dst);
2056}