]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/mptcp_usrreq.c
xnu-3789.21.4.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp_usrreq.c
1 /*
2 * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40
41 #include <net/if.h>
42 #include <netinet/in.h>
43 #include <netinet/in_var.h>
44 #include <netinet/tcp.h>
45 #include <netinet/tcp_fsm.h>
46 #include <netinet/tcp_seq.h>
47 #include <netinet/tcp_var.h>
48 #include <netinet/tcp_timer.h>
49 #include <netinet/mptcp_var.h>
50 #include <netinet/mptcp_timer.h>
51
52 #include <mach/sdt.h>
53
54 static int mptcp_usr_attach(struct socket *, int, struct proc *);
55 static int mptcp_usr_detach(struct socket *);
56 static int mptcp_attach(struct socket *, struct proc *);
57 static int mptcp_detach(struct socket *, struct mppcb *);
58 static int mptcp_connectx(struct mptses *, struct sockaddr_list **,
59 struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
60 sae_connid_t *, uint32_t, void *, uint32_t);
61 static int mptcp_usr_connectx(struct socket *, struct sockaddr_list **,
62 struct sockaddr_list **, struct proc *, uint32_t, sae_associd_t,
63 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
64 static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
65 static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
66 user_addr_t);
67 static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
68 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
69 uint32_t *, user_addr_t, uint32_t *);
70 static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
71 struct proc *);
72 static int mptcp_disconnectx(struct mptses *, sae_associd_t, sae_connid_t);
73 static int mptcp_usr_disconnect(struct socket *);
74 static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
75 static struct mptses *mptcp_usrclosed(struct mptses *);
76 static int mptcp_usr_peeloff(struct socket *, sae_associd_t, struct socket **);
77 static int mptcp_peeloff(struct mptses *, sae_associd_t, struct socket **);
78 static int mptcp_usr_rcvd(struct socket *, int);
79 static int mptcp_usr_send(struct socket *, int, struct mbuf *,
80 struct sockaddr *, struct mbuf *, struct proc *);
81 static int mptcp_usr_shutdown(struct socket *);
82 static int mptcp_uiotombuf(struct uio *, int, int, uint32_t, struct mbuf **);
83 static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
84 struct mbuf *, struct mbuf *, int);
85 static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
86 static int mptcp_setopt_apply(struct mptses *, struct mptopt *);
87 static int mptcp_setopt(struct mptses *, struct sockopt *);
88 static int mptcp_getopt(struct mptses *, struct sockopt *);
89 static int mptcp_default_tcp_optval(struct mptses *, struct sockopt *, int *);
90 static void mptcp_connorder_helper(struct mptsub *mpts);
91 static int mptcp_usr_preconnect(struct socket *so);
92
93 struct pr_usrreqs mptcp_usrreqs = {
94 .pru_attach = mptcp_usr_attach,
95 .pru_connectx = mptcp_usr_connectx,
96 .pru_control = mptcp_usr_control,
97 .pru_detach = mptcp_usr_detach,
98 .pru_disconnect = mptcp_usr_disconnect,
99 .pru_disconnectx = mptcp_usr_disconnectx,
100 .pru_peeloff = mptcp_usr_peeloff,
101 .pru_rcvd = mptcp_usr_rcvd,
102 .pru_send = mptcp_usr_send,
103 .pru_shutdown = mptcp_usr_shutdown,
104 .pru_sosend = mptcp_usr_sosend,
105 .pru_soreceive = soreceive,
106 .pru_socheckopt = mptcp_usr_socheckopt,
107 .pru_preconnect = mptcp_usr_preconnect,
108 };
109
110 /*
111 * Attaches an MPTCP control block to a socket.
112 */
113 static int
114 mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
115 {
116 #pragma unused(proto)
117 int error;
118
119 VERIFY(sotomppcb(mp_so) == NULL);
120
121 error = mptcp_attach(mp_so, p);
122 if (error != 0)
123 goto out;
124 /*
125 * XXX: adi@apple.com
126 *
127 * Might want to use a different SO_LINGER timeout than TCP's?
128 */
129 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0)
130 mp_so->so_linger = TCP_LINGERTIME * hz;
131 out:
132 return (error);
133 }
134
135 /*
136 * Detaches an MPTCP control block from a socket.
137 */
138 static int
139 mptcp_usr_detach(struct socket *mp_so)
140 {
141 struct mppcb *mpp = sotomppcb(mp_so);
142 int error = 0;
143
144 VERIFY(mpp != NULL);
145 VERIFY(mpp->mpp_socket != NULL);
146
147 error = mptcp_detach(mp_so, mpp);
148 return (error);
149 }
150
151 /*
152 * Attach MPTCP protocol to socket, allocating MP control block,
153 * MPTCP session, control block, buffer space, etc.
154 */
155 static int
156 mptcp_attach(struct socket *mp_so, struct proc *p)
157 {
158 #pragma unused(p)
159 struct mptses *mpte = NULL;
160 struct mptcb *mp_tp = NULL;
161 struct mppcb *mpp = NULL;
162 int error = 0;
163
164 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
165 error = soreserve(mp_so, tcp_sendspace, MPTCP_RWIN_MAX);
166 if (error != 0)
167 goto out;
168 }
169
170 if (mp_so->so_snd.sb_preconn_hiwat == 0) {
171 soreserve_preconnect(mp_so, 2048);
172 }
173
174 /*
175 * MPTCP socket buffers cannot be compressed, due to the
176 * fact that each mbuf chained via m_next is a M_PKTHDR
177 * which carries some MPTCP metadata.
178 */
179 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
180 mp_so->so_rcv.sb_flags |= SB_NOCOMPRESS;
181
182 /* Disable socket buffer auto-tuning. */
183 mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
184 mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE;
185
186 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
187 goto out;
188 }
189
190 mpp = sotomppcb(mp_so);
191 VERIFY(mpp != NULL);
192 mpte = (struct mptses *)mpp->mpp_pcbe;
193 VERIFY(mpte != NULL);
194 mp_tp = mpte->mpte_mptcb;
195 VERIFY(mp_tp != NULL);
196 out:
197 return (error);
198 }
199
200 /*
201 * Called when the socket layer loses its final reference to the socket;
202 * at this point, there is only one case in which we will keep things
203 * around: time wait.
204 */
205 static int
206 mptcp_detach(struct socket *mp_so, struct mppcb *mpp)
207 {
208 struct mptses *mpte;
209 struct mppcbinfo *mppi;
210
211 VERIFY(mp_so->so_pcb == mpp);
212 VERIFY(mpp->mpp_socket == mp_so);
213
214 mppi = mpp->mpp_pcbinfo;
215 VERIFY(mppi != NULL);
216
217 __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
218 VERIFY(mpte->mpte_mppcb == mpp);
219
220 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
221
222 /*
223 * We are done with this MPTCP socket (it has been closed);
224 * trigger all subflows to be disconnected, if not already,
225 * by initiating the PCB detach sequence (SOF_PCBCLEARING
226 * will be set.)
227 */
228 mp_pcbdetach(mpp);
229
230 (void) mptcp_disconnectx(mpte, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
231
232 /*
233 * XXX: adi@apple.com
234 *
235 * Here, we would want to handle time wait state.
236 */
237
238 return (0);
239 }
240
241 /*
242 * Common subroutine to open a MPTCP connection to one of the remote hosts
243 * specified by dst_sl. This includes allocating and establishing a
244 * subflow TCP connection, either initially to establish MPTCP connection,
245 * or to join an existing one. Returns a connection handle upon success.
246 */
247 static int
248 mptcp_connectx(struct mptses *mpte, struct sockaddr_list **src_sl,
249 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
250 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
251 uint32_t arglen)
252 {
253 #pragma unused(p, aid, flags, arg, arglen)
254 struct mptsub *mpts;
255 struct socket *mp_so;
256 int error = 0;
257
258 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
259 mp_so = mpte->mpte_mppcb->mpp_socket;
260
261 VERIFY(dst_sl != NULL && *dst_sl != NULL);
262 VERIFY(pcid != NULL);
263
264 mptcplog((LOG_DEBUG, "MPTCP Socket: "
265 "%s: mp_so 0x%llx\n", __func__,
266 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
267 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
268
269 DTRACE_MPTCP3(connectx, struct mptses *, mpte, sae_associd_t, aid,
270 struct socket *, mp_so);
271
272 mpts = mptcp_subflow_alloc(M_WAITOK);
273 if (mpts == NULL) {
274 error = ENOBUFS;
275 goto out;
276 }
277 MPTS_ADDREF(mpts); /* for this routine */
278
279 if (src_sl != NULL) {
280 mpts->mpts_src_sl = *src_sl;
281 *src_sl = NULL;
282 }
283 mpts->mpts_dst_sl = *dst_sl;
284 *dst_sl = NULL;
285
286 error = mptcp_subflow_add(mpte, mpts, p, ifscope);
287 if (error == 0 && pcid != NULL)
288 *pcid = mpts->mpts_connid;
289
290 out:
291 if (mpts != NULL) {
292 if ((error != 0) && (error != EWOULDBLOCK)) {
293 MPTS_LOCK(mpts);
294 if (mpts->mpts_flags & MPTSF_ATTACHED) {
295 MPTS_UNLOCK(mpts);
296 MPTS_REMREF(mpts);
297 mptcp_subflow_del(mpte, mpts, TRUE);
298 return (error);
299 }
300 MPTS_UNLOCK(mpts);
301 }
302 MPTS_REMREF(mpts);
303 }
304
305 return (error);
306 }
307
308 /*
309 * User-protocol pru_connectx callback.
310 */
311 static int
312 mptcp_usr_connectx(struct socket *mp_so, struct sockaddr_list **src_sl,
313 struct sockaddr_list **dst_sl, struct proc *p, uint32_t ifscope,
314 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
315 uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
316 {
317 struct mppcb *mpp = sotomppcb(mp_so);
318 struct mptses *mpte = NULL;
319 struct mptcb *mp_tp = NULL;
320 user_ssize_t datalen;
321
322 int error = 0;
323
324 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
325 error = EINVAL;
326 goto out;
327 }
328 mpte = mptompte(mpp);
329 VERIFY(mpte != NULL);
330
331 mp_tp = mpte->mpte_mptcb;
332 VERIFY(mp_tp != NULL);
333
334 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
335 error = EINVAL;
336 goto out;
337 }
338
339 error = mptcp_connectx(mpte, src_sl, dst_sl, p, ifscope,
340 aid, pcid, flags, arg, arglen);
341
342 /* If there is data, copy it */
343 if (auio != NULL) {
344 datalen = uio_resid(auio);
345 socket_unlock(mp_so, 0);
346 error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
347 (uio_t) auio, NULL, NULL, 0);
348 /* check if this can be supported with fast Join also. XXX */
349 if (error == 0 || error == EWOULDBLOCK)
350 *bytes_written = datalen - uio_resid(auio);
351
352 if (error == EWOULDBLOCK)
353 error = EINPROGRESS;
354
355 socket_lock(mp_so, 0);
356 MPT_LOCK(mp_tp);
357 if (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) {
358 *bytes_written = datalen - uio_resid(auio);
359 /*
360 * Override errors like EPIPE that occur as
361 * a result of doing TFO during TCP fallback.
362 */
363 error = EPROTO;
364 }
365 MPT_UNLOCK(mp_tp);
366 }
367
368 out:
369 return (error);
370 }
371
372 /*
373 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
374 */
375 static int
376 mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
377 {
378 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
379
380 /* MPTCP has at most 1 association */
381 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
382
383 /* just asking how many there are? */
384 if (aidp == USER_ADDR_NULL)
385 return (0);
386
387 return (copyout(&mpte->mpte_associd, aidp,
388 sizeof (mpte->mpte_associd)));
389 }
390
391 /*
392 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
393 */
394 static int
395 mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
396 user_addr_t cidp)
397 {
398 struct mptsub *mpts;
399 int error = 0;
400
401 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
402
403 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
404 aid != mpte->mpte_associd)
405 return (EINVAL);
406
407 *cnt = mpte->mpte_numflows;
408
409 /* just asking how many there are? */
410 if (cidp == USER_ADDR_NULL)
411 return (0);
412
413 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
414 if ((error = copyout(&mpts->mpts_connid, cidp,
415 sizeof (mpts->mpts_connid))) != 0)
416 break;
417
418 cidp += sizeof (mpts->mpts_connid);
419 }
420
421 return (error);
422 }
423
424 /*
425 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
426 */
427 static int
428 mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
429 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
430 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
431 user_addr_t aux_data, uint32_t *aux_len)
432 {
433 #pragma unused(aux_data)
434 struct sockaddr_entry *se;
435 struct ifnet *ifp = NULL;
436 struct mptsub *mpts;
437 int error = 0;
438
439 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
440
441 if (*cid == SAE_CONNID_ALL)
442 return (EINVAL);
443
444 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
445 if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY)
446 break;
447 }
448 if (mpts == NULL)
449 return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL);
450
451 MPTS_LOCK(mpts);
452 ifp = mpts->mpts_outif;
453 *cid = mpts->mpts_connid;
454 *ifindex = ((ifp != NULL) ? ifp->if_index : 0);
455 *soerror = mpts->mpts_soerror;
456 *flags = 0;
457 if (mpts->mpts_flags & MPTSF_CONNECTING)
458 *flags |= CIF_CONNECTING;
459 if (mpts->mpts_flags & MPTSF_CONNECTED)
460 *flags |= CIF_CONNECTED;
461 if (mpts->mpts_flags & MPTSF_DISCONNECTING)
462 *flags |= CIF_DISCONNECTING;
463 if (mpts->mpts_flags & MPTSF_DISCONNECTED)
464 *flags |= CIF_DISCONNECTED;
465 if (mpts->mpts_flags & MPTSF_BOUND_IF)
466 *flags |= CIF_BOUND_IF;
467 if (mpts->mpts_flags & MPTSF_BOUND_IP)
468 *flags |= CIF_BOUND_IP;
469 if (mpts->mpts_flags & MPTSF_BOUND_PORT)
470 *flags |= CIF_BOUND_PORT;
471 if (mpts->mpts_flags & MPTSF_PREFERRED)
472 *flags |= CIF_PREFERRED;
473 if (mpts->mpts_flags & MPTSF_MP_CAPABLE)
474 *flags |= CIF_MP_CAPABLE;
475 if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
476 *flags |= CIF_MP_DEGRADED;
477 if (mpts->mpts_flags & MPTSF_MP_READY)
478 *flags |= CIF_MP_READY;
479 if (mpts->mpts_flags & MPTSF_ACTIVE)
480 *flags |= CIF_MP_ACTIVE;
481
482 VERIFY(mpts->mpts_src_sl != NULL);
483 se = TAILQ_FIRST(&mpts->mpts_src_sl->sl_head);
484 VERIFY(se != NULL && se->se_addr != NULL);
485 *src_len = se->se_addr->sa_len;
486 if (src != USER_ADDR_NULL) {
487 error = copyout(se->se_addr, src, se->se_addr->sa_len);
488 if (error != 0)
489 goto out;
490 }
491
492 VERIFY(mpts->mpts_dst_sl != NULL);
493 se = TAILQ_FIRST(&mpts->mpts_dst_sl->sl_head);
494 VERIFY(se != NULL && se->se_addr != NULL);
495 *dst_len = se->se_addr->sa_len;
496 if (dst != USER_ADDR_NULL) {
497 error = copyout(se->se_addr, dst, se->se_addr->sa_len);
498 if (error != 0)
499 goto out;
500 }
501
502 *aux_type = 0;
503 *aux_len = 0;
504 if (mpts->mpts_socket != NULL) {
505 struct conninfo_tcp tcp_ci;
506
507 *aux_type = CIAUX_TCP;
508 *aux_len = sizeof (tcp_ci);
509
510 if (aux_data != USER_ADDR_NULL) {
511 struct socket *so = mpts->mpts_socket;
512
513 VERIFY(SOCK_PROTO(so) == IPPROTO_TCP);
514 bzero(&tcp_ci, sizeof (tcp_ci));
515 socket_lock(so, 0);
516 tcp_getconninfo(so, &tcp_ci);
517 socket_unlock(so, 0);
518 error = copyout(&tcp_ci, aux_data, sizeof (tcp_ci));
519 if (error != 0)
520 goto out;
521 }
522 }
523 mptcplog((LOG_DEBUG, "MPTCP Socket: "
524 "%s: cid %d flags %x \n",
525 __func__, mpts->mpts_connid, mpts->mpts_flags),
526 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
527
528 out:
529 MPTS_UNLOCK(mpts);
530 return (error);
531 }
532
533 /*
534 * Handle SIOCSCONNORDER
535 */
536 int
537 mptcp_setconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t rank)
538 {
539 struct mptsub *mpts, *mpts1;
540 int error = 0;
541
542 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
543 mptcplog((LOG_DEBUG, "MPTCP Socket: "
544 "%s: cid %d rank %d \n", __func__, cid, rank),
545 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
546
547 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
548 error = EINVAL;
549 goto out;
550 }
551
552 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
553 if (mpts->mpts_connid == cid)
554 break;
555 }
556 if (mpts == NULL) {
557 error = ENXIO;
558 goto out;
559 }
560
561 if (rank == 0 || rank > 1) {
562 /*
563 * If rank is 0, determine whether this should be the
564 * primary or backup subflow, depending on what we have.
565 *
566 * Otherwise, if greater than 0, make it a backup flow.
567 */
568 TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) {
569 MPTS_LOCK(mpts1);
570 if (mpts1->mpts_flags & MPTSF_PREFERRED) {
571 MPTS_UNLOCK(mpts1);
572 break;
573 }
574 MPTS_UNLOCK(mpts1);
575 }
576
577 MPTS_LOCK(mpts);
578 mpts->mpts_flags &= ~MPTSF_PREFERRED;
579 mpts->mpts_rank = rank;
580 if (mpts1 != NULL && mpts != mpts1) {
581 /* preferred subflow found; set rank as necessary */
582 if (rank == 0)
583 mpts->mpts_rank = (mpts1->mpts_rank + 1);
584 } else if (rank == 0) {
585 /* no preferred one found; promote this */
586 rank = 1;
587 }
588 MPTS_UNLOCK(mpts);
589 }
590
591 if (rank == 1) {
592 /*
593 * If rank is 1, promote this subflow to be preferred.
594 */
595 TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) {
596 MPTS_LOCK(mpts1);
597 if (mpts1 != mpts &&
598 (mpts1->mpts_flags & MPTSF_PREFERRED)) {
599 mpts1->mpts_flags &= ~MPTSF_PREFERRED;
600 if (mpte->mpte_nummpcapflows > 1)
601 mptcp_connorder_helper(mpts1);
602 } else if (mpts1 == mpts) {
603 mpts1->mpts_rank = 1;
604 if (mpts1->mpts_flags & MPTSF_MP_CAPABLE) {
605 mpts1->mpts_flags |= MPTSF_PREFERRED;
606 if (mpte->mpte_nummpcapflows > 1)
607 mptcp_connorder_helper(mpts1);
608 }
609 }
610 MPTS_UNLOCK(mpts1);
611 }
612 }
613
614 out:
615 return (error);
616 }
617
618 static void
619 mptcp_connorder_helper(struct mptsub *mpts)
620 {
621 struct socket *so = mpts->mpts_socket;
622 struct tcpcb *tp = NULL;
623
624 socket_lock(so, 0);
625
626 tp = intotcpcb(sotoinpcb(so));
627 tp->t_mpflags |= TMPF_SND_MPPRIO;
628 if (mpts->mpts_flags & MPTSF_PREFERRED)
629 tp->t_mpflags &= ~TMPF_BACKUP_PATH;
630 else
631 tp->t_mpflags |= TMPF_BACKUP_PATH;
632
633 socket_unlock(so, 0);
634
635 }
636
637 /*
638 * Handle SIOCSGONNORDER
639 */
640 int
641 mptcp_getconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t *rank)
642 {
643 struct mptsub *mpts;
644 int error = 0;
645
646 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
647 VERIFY(rank != NULL);
648 *rank = 0;
649
650 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
651 error = EINVAL;
652 goto out;
653 }
654
655 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
656 if (mpts->mpts_connid == cid)
657 break;
658 }
659 if (mpts == NULL) {
660 error = ENXIO;
661 goto out;
662 }
663
664 MPTS_LOCK(mpts);
665 *rank = mpts->mpts_rank;
666 MPTS_UNLOCK(mpts);
667 out:
668 return (error);
669 }
670
671 /*
672 * User-protocol pru_control callback.
673 */
674 static int
675 mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
676 struct ifnet *ifp, struct proc *p)
677 {
678 #pragma unused(ifp, p)
679 struct mppcb *mpp = sotomppcb(mp_so);
680 struct mptses *mpte;
681 int error = 0;
682
683 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
684 error = EINVAL;
685 goto out;
686 }
687 mpte = mptompte(mpp);
688 VERIFY(mpte != NULL);
689
690 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
691
692 switch (cmd) {
693 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
694 struct so_aidreq32 aidr;
695 bcopy(data, &aidr, sizeof (aidr));
696 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
697 aidr.sar_aidp);
698 if (error == 0)
699 bcopy(&aidr, data, sizeof (aidr));
700 break;
701 }
702
703 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
704 struct so_aidreq64 aidr;
705 bcopy(data, &aidr, sizeof (aidr));
706 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
707 aidr.sar_aidp);
708 if (error == 0)
709 bcopy(&aidr, data, sizeof (aidr));
710 break;
711 }
712
713 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
714 struct so_cidreq32 cidr;
715 bcopy(data, &cidr, sizeof (cidr));
716 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
717 cidr.scr_cidp);
718 if (error == 0)
719 bcopy(&cidr, data, sizeof (cidr));
720 break;
721 }
722
723 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
724 struct so_cidreq64 cidr;
725 bcopy(data, &cidr, sizeof (cidr));
726 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
727 cidr.scr_cidp);
728 if (error == 0)
729 bcopy(&cidr, data, sizeof (cidr));
730 break;
731 }
732
733 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
734 struct so_cinforeq32 cifr;
735 bcopy(data, &cifr, sizeof (cifr));
736 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
737 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
738 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
739 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
740 &cifr.scir_aux_len);
741 if (error == 0)
742 bcopy(&cifr, data, sizeof (cifr));
743 break;
744 }
745
746 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
747 struct so_cinforeq64 cifr;
748 bcopy(data, &cifr, sizeof (cifr));
749 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
750 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
751 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
752 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
753 &cifr.scir_aux_len);
754 if (error == 0)
755 bcopy(&cifr, data, sizeof (cifr));
756 break;
757 }
758
759 case SIOCSCONNORDER: { /* struct so_cordreq */
760 struct so_cordreq cor;
761 bcopy(data, &cor, sizeof (cor));
762 error = mptcp_setconnorder(mpte, cor.sco_cid, cor.sco_rank);
763 if (error == 0)
764 bcopy(&cor, data, sizeof (cor));
765 break;
766 }
767
768 case SIOCGCONNORDER: { /* struct so_cordreq */
769 struct so_cordreq cor;
770 bcopy(data, &cor, sizeof (cor));
771 error = mptcp_getconnorder(mpte, cor.sco_cid, &cor.sco_rank);
772 if (error == 0)
773 bcopy(&cor, data, sizeof (cor));
774 break;
775 }
776
777 default:
778 error = EOPNOTSUPP;
779 break;
780 }
781 out:
782 return (error);
783 }
784
785 /*
786 * Initiate a disconnect. MPTCP-level disconnection is specified by
787 * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow
788 * connection while keeping the MPTCP-level connection (association).
789 */
790 static int
791 mptcp_disconnectx(struct mptses *mpte, sae_associd_t aid, sae_connid_t cid)
792 {
793 struct mptsub *mpts;
794 struct socket *mp_so;
795 struct mptcb *mp_tp;
796 int error = 0;
797
798 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
799
800 mp_so = mpte->mpte_mppcb->mpp_socket;
801 mp_tp = mpte->mpte_mptcb;
802
803 mptcplog((LOG_DEBUG, "MPTCP Socket: "
804 "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__,
805 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error),
806 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
807
808 DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, sae_associd_t, aid,
809 sae_connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp);
810
811 VERIFY(aid == SAE_ASSOCID_ANY || aid == SAE_ASSOCID_ALL ||
812 aid == mpte->mpte_associd);
813
814 /* terminate the association? */
815 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
816 /* if we're not detached, go thru socket state checks */
817 if (!(mp_so->so_flags & SOF_PCBCLEARING)) {
818 if (!(mp_so->so_state & (SS_ISCONNECTED|
819 SS_ISCONNECTING))) {
820 error = ENOTCONN;
821 goto out;
822 }
823 if (mp_so->so_state & SS_ISDISCONNECTING) {
824 error = EALREADY;
825 goto out;
826 }
827 }
828 MPT_LOCK(mp_tp);
829 mptcp_cancel_all_timers(mp_tp);
830 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
831 (void) mptcp_close(mpte, mp_tp);
832 MPT_UNLOCK(mp_tp);
833 } else if ((mp_so->so_options & SO_LINGER) &&
834 mp_so->so_linger == 0) {
835 (void) mptcp_drop(mpte, mp_tp, 0);
836 MPT_UNLOCK(mp_tp);
837 } else {
838 MPT_UNLOCK(mp_tp);
839 soisdisconnecting(mp_so);
840 sbflush(&mp_so->so_rcv);
841 if (mptcp_usrclosed(mpte) != NULL)
842 (void) mptcp_output(mpte);
843 }
844 } else {
845 bool disconnect_embryonic_subflows = false;
846 struct socket *so = NULL;
847
848 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
849 if (mpts->mpts_connid != cid)
850 continue;
851
852 MPTS_LOCK(mpts);
853 /*
854 * Check if disconnected subflow is the one used
855 * to initiate MPTCP connection.
856 * If it is and the connection is not yet join ready
857 * disconnect all other subflows.
858 */
859 so = mpts->mpts_socket;
860 if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) &&
861 so && !(so->so_flags & SOF_MP_SEC_SUBFLOW)) {
862 disconnect_embryonic_subflows = true;
863 }
864
865 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
866 mptcp_subflow_disconnect(mpte, mpts, FALSE);
867 MPTS_UNLOCK(mpts);
868 break;
869 }
870
871 if (mpts == NULL) {
872 error = EINVAL;
873 goto out;
874 }
875
876 if (disconnect_embryonic_subflows) {
877 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
878 if (mpts->mpts_connid == cid)
879 continue;
880 MPTS_LOCK(mpts);
881 mptcp_subflow_disconnect(mpte, mpts, TRUE);
882 MPTS_UNLOCK(mpts);
883 }
884 }
885 }
886
887 if (error == 0)
888 mptcp_thread_signal(mpte);
889
890 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
891 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
892 /* the socket has been shutdown, no more sockopt's */
893 mptcp_flush_sopts(mpte);
894 }
895
896 out:
897 return (error);
898 }
899
900 /*
901 * Wrapper function to support disconnect on socket
902 */
903 static int
904 mptcp_usr_disconnect(struct socket *mp_so)
905 {
906 int error = 0;
907
908 error = mptcp_usr_disconnectx(mp_so, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
909 return (error);
910 }
911
912 /*
913 * User-protocol pru_disconnectx callback.
914 */
915 static int
916 mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
917 {
918 struct mppcb *mpp = sotomppcb(mp_so);
919 struct mptses *mpte;
920 int error = 0;
921
922 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
923 error = EINVAL;
924 goto out;
925 }
926 mpte = mptompte(mpp);
927 VERIFY(mpte != NULL);
928 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
929
930 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
931 aid != mpte->mpte_associd) {
932 error = EINVAL;
933 goto out;
934 }
935
936 error = mptcp_disconnectx(mpte, aid, cid);
937 out:
938 return (error);
939 }
940
941 /*
942 * User issued close, and wish to trail thru shutdown states.
943 */
944 static struct mptses *
945 mptcp_usrclosed(struct mptses *mpte)
946 {
947 struct socket *mp_so;
948 struct mptcb *mp_tp;
949 struct mptsub *mpts;
950
951 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
952 mp_so = mpte->mpte_mppcb->mpp_socket;
953 mp_tp = mpte->mpte_mptcb;
954
955 MPT_LOCK(mp_tp);
956 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
957
958 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
959 mpte = mptcp_close(mpte, mp_tp);
960 MPT_UNLOCK(mp_tp);
961 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
962 MPT_UNLOCK(mp_tp);
963 soisdisconnected(mp_so);
964 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
965 MPTS_LOCK(mpts);
966 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
967 MPTS_UNLOCK(mpts);
968 }
969 } else {
970 MPT_UNLOCK(mp_tp);
971
972 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
973 MPTS_LOCK(mpts);
974 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
975 mptcp_subflow_disconnect(mpte, mpts, FALSE);
976 MPTS_UNLOCK(mpts);
977 }
978 }
979
980 return (mpte);
981 }
982
983 /*
984 * User-protocol pru_peeloff callback.
985 */
986 static int
987 mptcp_usr_peeloff(struct socket *mp_so, sae_associd_t aid, struct socket **psop)
988 {
989 struct mppcb *mpp = sotomppcb(mp_so);
990 struct mptses *mpte;
991 int error = 0;
992
993 VERIFY(psop != NULL);
994
995 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
996 error = EINVAL;
997 goto out;
998 }
999 mpte = mptompte(mpp);
1000 VERIFY(mpte != NULL);
1001
1002 error = mptcp_peeloff(mpte, aid, psop);
1003 out:
1004 return (error);
1005 }
1006
1007 /*
1008 * Transform a previously connected TCP subflow connection which has
1009 * failed to negotiate MPTCP to its own socket which can be externalized
1010 * with a file descriptor. Valid only when the MPTCP socket is not
1011 * yet associated (MPTCP-level connection has not been established.)
1012 */
1013 static int
1014 mptcp_peeloff(struct mptses *mpte, sae_associd_t aid, struct socket **psop)
1015 {
1016 struct socket *so = NULL, *mp_so;
1017 struct mptsub *mpts;
1018 int error = 0;
1019
1020 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1021 mp_so = mpte->mpte_mppcb->mpp_socket;
1022
1023 VERIFY(psop != NULL);
1024 *psop = NULL;
1025
1026 DTRACE_MPTCP3(peeloff, struct mptses *, mpte, sae_associd_t, aid,
1027 struct socket *, mp_so);
1028
1029 /* peeloff cannot happen after an association is established */
1030 if (mpte->mpte_associd != SAE_ASSOCID_ANY) {
1031 error = EINVAL;
1032 goto out;
1033 }
1034
1035 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
1036 error = EINVAL;
1037 goto out;
1038 }
1039
1040 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1041 MPTS_LOCK(mpts);
1042 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
1043 panic("%s: so %p is MPTCP capable but mp_so %p "
1044 "aid is %d\n", __func__, so, mp_so,
1045 mpte->mpte_associd);
1046 /* NOTREACHED */
1047 }
1048 MPTS_ADDREF_LOCKED(mpts); /* for us */
1049 so = mpts->mpts_socket;
1050 VERIFY(so != NULL);
1051 /*
1052 * This subflow socket is about to be externalized; make it
1053 * appear as if it has the same properties as the MPTCP socket,
1054 * undo what's done earlier in mptcp_subflow_add().
1055 */
1056 mptcp_subflow_sopeeloff(mpte, mpts, so);
1057 MPTS_UNLOCK(mpts);
1058
1059 mptcp_subflow_del(mpte, mpts, FALSE);
1060 MPTS_REMREF(mpts); /* ours */
1061 /*
1062 * XXX adi@apple.com
1063 *
1064 * Here we need to make sure the subflow socket is not
1065 * flow controlled; need to clear both INP_FLOW_CONTROLLED
1066 * and INP_FLOW_SUSPENDED on the subflow socket, since
1067 * we will no longer be monitoring its events.
1068 */
1069 break;
1070 }
1071
1072 if (so == NULL) {
1073 error = EINVAL;
1074 goto out;
1075 }
1076 *psop = so;
1077
1078 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1079 "%s: mp_so 0x%llx\n", __func__,
1080 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
1081 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1082
1083 out:
1084 return (error);
1085 }
1086
1087 /*
1088 * After a receive, possible send some update to peer.
1089 */
1090 static int
1091 mptcp_usr_rcvd(struct socket *mp_so, int flags)
1092 {
1093 #pragma unused(flags)
1094 struct mppcb *mpp = sotomppcb(mp_so);
1095 struct mptses *mpte;
1096 int error = 0;
1097
1098 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1099 error = EINVAL;
1100 goto out;
1101 }
1102 mpte = mptompte(mpp);
1103 VERIFY(mpte != NULL);
1104
1105 error = mptcp_output(mpte);
1106 out:
1107 return (error);
1108 }
1109
1110 /*
1111 * Do a send by putting data in the output queue.
1112 */
1113 static int
1114 mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1115 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1116 {
1117 #pragma unused(nam, p)
1118 struct mppcb *mpp = sotomppcb(mp_so);
1119 struct mptses *mpte;
1120 int error = 0;
1121
1122 if (prus_flags & (PRUS_OOB|PRUS_EOF)) {
1123 error = EOPNOTSUPP;
1124 goto out;
1125 }
1126
1127 if (nam != NULL) {
1128 error = EOPNOTSUPP;
1129 goto out;
1130 }
1131
1132 if (control != NULL && control->m_len != 0) {
1133 error = EOPNOTSUPP;
1134 goto out;
1135 }
1136
1137 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1138 error = ECONNRESET;
1139 goto out;
1140 }
1141 mpte = mptompte(mpp);
1142 VERIFY(mpte != NULL);
1143
1144 if (!(mp_so->so_state & SS_ISCONNECTED) &&
1145 (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA))) {
1146 error = ENOTCONN;
1147 goto out;
1148 }
1149
1150 mptcp_insert_dsn(mpp, m);
1151 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1152 (void) sbappendstream(&mp_so->so_snd, m);
1153 m = NULL;
1154
1155 /*
1156 * XXX: adi@apple.com
1157 *
1158 * PRUS_MORETOCOME could be set, but we don't check it now.
1159 */
1160 error = mptcp_output(mpte);
1161 if (error != 0)
1162 goto out;
1163
1164 if (mp_so->so_state & SS_ISCONNECTING) {
1165 if (mp_so->so_state & SS_NBIO)
1166 error = EWOULDBLOCK;
1167 else
1168 error = sbwait(&mp_so->so_snd);
1169 }
1170
1171 out:
1172 if (error) {
1173 if (m != NULL)
1174 m_freem(m);
1175 if (control != NULL)
1176 m_freem(control);
1177 }
1178 return (error);
1179 }
1180
1181 /*
1182 * Mark the MPTCP connection as being incapable of further output.
1183 */
1184 static int
1185 mptcp_usr_shutdown(struct socket *mp_so)
1186 {
1187 struct mppcb *mpp = sotomppcb(mp_so);
1188 struct mptses *mpte;
1189 int error = 0;
1190
1191 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1192 error = EINVAL;
1193 goto out;
1194 }
1195 mpte = mptompte(mpp);
1196 VERIFY(mpte != NULL);
1197
1198 socantsendmore(mp_so);
1199
1200 mpte = mptcp_usrclosed(mpte);
1201 if (mpte != NULL)
1202 error = mptcp_output(mpte);
1203 out:
1204 return (error);
1205 }
1206
1207 /*
1208 * Copy the contents of uio into a properly sized mbuf chain.
1209 */
1210 static int
1211 mptcp_uiotombuf(struct uio *uio, int how, int space, uint32_t align,
1212 struct mbuf **top)
1213 {
1214 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1215 user_ssize_t resid, tot, len, progress; /* must be user_ssize_t */
1216 int error;
1217
1218 VERIFY(top != NULL && *top == NULL);
1219
1220 /*
1221 * space can be zero or an arbitrary large value bound by
1222 * the total data supplied by the uio.
1223 */
1224 resid = uio_resid(uio);
1225 if (space > 0)
1226 tot = imin(resid, space);
1227 else
1228 tot = resid;
1229
1230 /*
1231 * The smallest unit is a single mbuf with pkthdr.
1232 * We can't align past it.
1233 */
1234 if (align >= MHLEN)
1235 return (EINVAL);
1236
1237 /*
1238 * Give us the full allocation or nothing.
1239 * If space is zero return the smallest empty mbuf.
1240 */
1241 if ((len = tot + align) == 0)
1242 len = 1;
1243
1244 /* Loop and append maximum sized mbufs to the chain tail. */
1245 while (len > 0) {
1246 uint32_t m_needed = 1;
1247
1248 if (njcl > 0 && len > MBIGCLBYTES)
1249 mb = m_getpackets_internal(&m_needed, 1,
1250 how, 1, M16KCLBYTES);
1251 else if (len > MCLBYTES)
1252 mb = m_getpackets_internal(&m_needed, 1,
1253 how, 1, MBIGCLBYTES);
1254 else if (len >= (signed)MINCLSIZE)
1255 mb = m_getpackets_internal(&m_needed, 1,
1256 how, 1, MCLBYTES);
1257 else
1258 mb = m_gethdr(how, MT_DATA);
1259
1260 /* Fail the whole operation if one mbuf can't be allocated. */
1261 if (mb == NULL) {
1262 if (nm != NULL)
1263 m_freem(nm);
1264 return (ENOBUFS);
1265 }
1266
1267 /* Book keeping. */
1268 VERIFY(mb->m_flags & M_PKTHDR);
1269 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1270 if (mtail != NULL)
1271 mtail->m_next = mb;
1272 else
1273 nm = mb;
1274 mtail = mb;
1275 }
1276
1277 m = nm;
1278 m->m_data += align;
1279
1280 progress = 0;
1281 /* Fill all mbufs with uio data and update header information. */
1282 for (mb = m; mb != NULL; mb = mb->m_next) {
1283 len = imin(M_TRAILINGSPACE(mb), tot - progress);
1284
1285 error = uiomove(mtod(mb, char *), len, uio);
1286 if (error != 0) {
1287 m_freem(m);
1288 return (error);
1289 }
1290
1291 /* each mbuf is M_PKTHDR chained via m_next */
1292 mb->m_len = len;
1293 mb->m_pkthdr.len = len;
1294
1295 progress += len;
1296 }
1297 VERIFY(progress == tot);
1298 *top = m;
1299 return (0);
1300 }
1301
1302 /*
1303 * MPTCP socket protocol-user socket send routine, derived from sosend().
1304 */
1305 static int
1306 mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1307 struct mbuf *top, struct mbuf *control, int flags)
1308 {
1309 #pragma unused(addr)
1310 int32_t space;
1311 user_ssize_t resid;
1312 int error, sendflags;
1313 struct proc *p = current_proc();
1314 int sblocked = 0;
1315
1316 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1317 if (uio == NULL || top != NULL) {
1318 error = EINVAL;
1319 goto out;
1320 }
1321 resid = uio_resid(uio);
1322
1323 socket_lock(mp_so, 1);
1324 so_update_last_owner_locked(mp_so, p);
1325 so_update_policy(mp_so);
1326
1327 VERIFY(mp_so->so_type == SOCK_STREAM);
1328 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1329
1330 if ((flags & (MSG_OOB|MSG_DONTROUTE|MSG_HOLD|MSG_SEND|MSG_FLUSH)) ||
1331 (mp_so->so_flags & SOF_ENABLE_MSGS)) {
1332 error = EOPNOTSUPP;
1333 socket_unlock(mp_so, 1);
1334 goto out;
1335 }
1336
1337 /*
1338 * In theory resid should be unsigned. However, space must be
1339 * signed, as it might be less than 0 if we over-committed, and we
1340 * must use a signed comparison of space and resid. On the other
1341 * hand, a negative resid causes us to loop sending 0-length
1342 * segments to the protocol.
1343 */
1344 if (resid < 0 || (flags & MSG_EOR) || control != NULL) {
1345 error = EINVAL;
1346 socket_unlock(mp_so, 1);
1347 goto out;
1348 }
1349
1350 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1351
1352 do {
1353 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1354 &sblocked, NULL);
1355 if (error != 0)
1356 goto release;
1357
1358 space = sbspace(&mp_so->so_snd);
1359 do {
1360 socket_unlock(mp_so, 0);
1361 /*
1362 * Copy the data from userland into an mbuf chain.
1363 */
1364 error = mptcp_uiotombuf(uio, M_WAITOK, space, 0, &top);
1365 if (error != 0) {
1366 socket_lock(mp_so, 0);
1367 goto release;
1368 }
1369 VERIFY(top != NULL);
1370 space -= resid - uio_resid(uio);
1371 resid = uio_resid(uio);
1372 socket_lock(mp_so, 0);
1373
1374 /*
1375 * Compute flags here, for pru_send and NKEs.
1376 */
1377 sendflags = (resid > 0 && space > 0) ?
1378 PRUS_MORETOCOME : 0;
1379
1380 /*
1381 * Socket filter processing
1382 */
1383 VERIFY(control == NULL);
1384 error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1385 if (error != 0) {
1386 if (error == EJUSTRETURN) {
1387 error = 0;
1388 top = NULL;
1389 /* always free control if any */
1390 }
1391 goto release;
1392 }
1393 if (control != NULL) {
1394 m_freem(control);
1395 control = NULL;
1396 }
1397
1398 /*
1399 * Pass data to protocol.
1400 */
1401 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1402 (mp_so, sendflags, top, NULL, NULL, p);
1403
1404 top = NULL;
1405 if (error != 0)
1406 goto release;
1407 } while (resid != 0 && space > 0);
1408 } while (resid != 0);
1409
1410 release:
1411 if (sblocked)
1412 sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1413 else
1414 socket_unlock(mp_so, 1);
1415 out:
1416 if (top != NULL)
1417 m_freem(top);
1418 if (control != NULL)
1419 m_freem(control);
1420
1421 /* clear SOF1_PRECONNECT_DATA after one write */
1422 if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA)
1423 mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1424
1425 return (error);
1426 }
1427
1428 /*
1429 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1430 * This routine simply indicates to the caller whether or not to proceed
1431 * further with the given socket option. This is invoked by sosetoptlock()
1432 * and sogetoptlock().
1433 */
1434 static int
1435 mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1436 {
1437 #pragma unused(mp_so)
1438 int error = 0;
1439
1440 VERIFY(sopt->sopt_level == SOL_SOCKET);
1441
1442 /*
1443 * We could check for sopt_dir (set/get) here, but we'll just
1444 * let the caller deal with it as appropriate; therefore the
1445 * following is a superset of the socket options which we
1446 * allow for set/get.
1447 *
1448 * XXX: adi@apple.com
1449 *
1450 * Need to consider the following cases:
1451 *
1452 * a. In the event peeloff(2) occurs on the subflow socket,
1453 * we may want to issue those options which are now
1454 * handled at the MP socket. In that case, we will need
1455 * to record them in mptcp_setopt() so that they can
1456 * be replayed during peeloff.
1457 *
1458 * b. Certain socket options don't have a clear definition
1459 * on the expected behavior post connect(2). At the time
1460 * those options are issued on the MP socket, there may
1461 * be existing subflow sockets that are already connected.
1462 */
1463 switch (sopt->sopt_name) {
1464 case SO_LINGER: /* MP */
1465 case SO_LINGER_SEC: /* MP */
1466 case SO_TYPE: /* MP */
1467 case SO_NREAD: /* MP */
1468 case SO_NWRITE: /* MP */
1469 case SO_ERROR: /* MP */
1470 case SO_SNDBUF: /* MP */
1471 case SO_RCVBUF: /* MP */
1472 case SO_SNDLOWAT: /* MP */
1473 case SO_RCVLOWAT: /* MP */
1474 case SO_SNDTIMEO: /* MP */
1475 case SO_RCVTIMEO: /* MP */
1476 case SO_NKE: /* MP */
1477 case SO_NOSIGPIPE: /* MP */
1478 case SO_NOADDRERR: /* MP */
1479 case SO_LABEL: /* MP */
1480 case SO_PEERLABEL: /* MP */
1481 case SO_DEFUNCTOK: /* MP */
1482 case SO_ISDEFUNCT: /* MP */
1483 case SO_TRAFFIC_CLASS_DBG: /* MP */
1484 /*
1485 * Tell the caller that these options are to be processed.
1486 */
1487 break;
1488
1489 case SO_DEBUG: /* MP + subflow */
1490 case SO_KEEPALIVE: /* MP + subflow */
1491 case SO_USELOOPBACK: /* MP + subflow */
1492 case SO_RANDOMPORT: /* MP + subflow */
1493 case SO_TRAFFIC_CLASS: /* MP + subflow */
1494 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1495 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1496 case SO_RECV_ANYIF: /* MP + subflow */
1497 case SO_RESTRICTIONS: /* MP + subflow */
1498 case SO_FLUSH: /* MP + subflow */
1499 case SO_MPTCP_FASTJOIN: /* MP + subflow */
1500 case SO_NOWAKEFROMSLEEP:
1501 case SO_NOAPNFALLBK:
1502 /*
1503 * Tell the caller that these options are to be processed;
1504 * these will also be recorded later by mptcp_setopt().
1505 *
1506 * NOTE: Only support integer option value for now.
1507 */
1508 if (sopt->sopt_valsize != sizeof (int))
1509 error = EINVAL;
1510 break;
1511
1512 default:
1513 /*
1514 * Tell the caller to stop immediately and return an error.
1515 */
1516 error = ENOPROTOOPT;
1517 break;
1518 }
1519
1520 return (error);
1521 }
1522
1523 /*
1524 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1525 */
1526 static int
1527 mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1528 {
1529 struct socket *mp_so;
1530 struct mptsub *mpts;
1531 struct mptopt smpo;
1532 int error = 0;
1533
1534 /* just bail now if this isn't applicable to subflow sockets */
1535 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1536 error = ENOPROTOOPT;
1537 goto out;
1538 }
1539
1540 /*
1541 * Skip those that are handled internally; these options
1542 * should not have been recorded and marked with the
1543 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1544 */
1545 if (mpo->mpo_level == SOL_SOCKET &&
1546 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1547 error = ENOPROTOOPT;
1548 goto out;
1549 }
1550
1551 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1552 mp_so = mpte->mpte_mppcb->mpp_socket;
1553
1554 /*
1555 * Don't bother going further if there's no subflow; mark the option
1556 * with MPOF_INTERIM so that we know whether or not to remove this
1557 * option upon encountering an error while issuing it during subflow
1558 * socket creation.
1559 */
1560 if (mpte->mpte_numflows == 0) {
1561 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1562 mpo->mpo_flags |= MPOF_INTERIM;
1563 /* return success */
1564 goto out;
1565 }
1566
1567 bzero(&smpo, sizeof (smpo));
1568 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1569 smpo.mpo_level = mpo->mpo_level;
1570 smpo.mpo_name = mpo->mpo_name;
1571
1572 /* grab exisiting values in case we need to rollback */
1573 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1574 struct socket *so;
1575
1576 MPTS_LOCK(mpts);
1577 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG);
1578 mpts->mpts_oldintval = 0;
1579 smpo.mpo_intval = 0;
1580 VERIFY(mpts->mpts_socket != NULL);
1581 so = mpts->mpts_socket;
1582 socket_lock(so, 0);
1583 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1584 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1585 mpts->mpts_oldintval = smpo.mpo_intval;
1586 }
1587 socket_unlock(so, 0);
1588 MPTS_UNLOCK(mpts);
1589 }
1590
1591 /* apply socket option */
1592 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1593 struct socket *so;
1594
1595 MPTS_LOCK(mpts);
1596 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1597 VERIFY(mpts->mpts_socket != NULL);
1598 so = mpts->mpts_socket;
1599 socket_lock(so, 0);
1600 error = mptcp_subflow_sosetopt(mpte, so, mpo);
1601 socket_unlock(so, 0);
1602 MPTS_UNLOCK(mpts);
1603 if (error != 0)
1604 break;
1605 }
1606
1607 /* cleanup, and rollback if needed */
1608 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1609 struct socket *so;
1610
1611 MPTS_LOCK(mpts);
1612 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1613 /* clear in case it's set */
1614 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1615 mpts->mpts_oldintval = 0;
1616 MPTS_UNLOCK(mpts);
1617 continue;
1618 }
1619 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1620 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1621 VERIFY(mpts->mpts_oldintval == 0);
1622 MPTS_UNLOCK(mpts);
1623 continue;
1624 }
1625 /* error during sosetopt, so roll it back */
1626 if (error != 0) {
1627 VERIFY(mpts->mpts_socket != NULL);
1628 so = mpts->mpts_socket;
1629 socket_lock(so, 0);
1630 smpo.mpo_intval = mpts->mpts_oldintval;
1631 (void) mptcp_subflow_sosetopt(mpte, so, &smpo);
1632 socket_unlock(so, 0);
1633 }
1634 mpts->mpts_oldintval = 0;
1635 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG);
1636 MPTS_UNLOCK(mpts);
1637 }
1638
1639 out:
1640 return (error);
1641 }
1642
1643 /*
1644 * Handle SOPT_SET for socket options issued on MP socket.
1645 */
1646 static int
1647 mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1648 {
1649 int error = 0, optval, level, optname, rec = 1;
1650 struct mptopt smpo, *mpo = NULL;
1651 struct socket *mp_so;
1652 char buf[32];
1653
1654 level = sopt->sopt_level;
1655 optname = sopt->sopt_name;
1656
1657 VERIFY(sopt->sopt_dir == SOPT_SET);
1658 VERIFY(level == SOL_SOCKET || level == IPPROTO_TCP);
1659 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1660 mp_so = mpte->mpte_mppcb->mpp_socket;
1661
1662 /*
1663 * Record socket options which are applicable to subflow sockets so
1664 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1665 * for the list of eligible socket-level options.
1666 */
1667 if (level == SOL_SOCKET) {
1668 switch (optname) {
1669 case SO_DEBUG:
1670 case SO_KEEPALIVE:
1671 case SO_USELOOPBACK:
1672 case SO_RANDOMPORT:
1673 case SO_TRAFFIC_CLASS:
1674 case SO_RECV_TRAFFIC_CLASS:
1675 case SO_PRIVILEGED_TRAFFIC_CLASS:
1676 case SO_RECV_ANYIF:
1677 case SO_RESTRICTIONS:
1678 case SO_NOWAKEFROMSLEEP:
1679 case SO_MPTCP_FASTJOIN:
1680 case SO_NOAPNFALLBK:
1681 /* record it */
1682 break;
1683 case SO_FLUSH:
1684 /* don't record it */
1685 rec = 0;
1686 break;
1687 default:
1688 /* nothing to do; just return success */
1689 goto out;
1690 }
1691 } else {
1692 switch (optname) {
1693 case TCP_NODELAY:
1694 case TCP_RXT_FINDROP:
1695 case TCP_KEEPALIVE:
1696 case TCP_KEEPINTVL:
1697 case TCP_KEEPCNT:
1698 case TCP_CONNECTIONTIMEOUT:
1699 case TCP_RXT_CONNDROPTIME:
1700 case PERSIST_TIMEOUT:
1701 /* eligible; record it */
1702 break;
1703 case TCP_NOTSENT_LOWAT:
1704 /* record at MPTCP level */
1705 error = sooptcopyin(sopt, &optval, sizeof(optval),
1706 sizeof(optval));
1707 if (error)
1708 goto out;
1709 if (optval < 0) {
1710 error = EINVAL;
1711 goto out;
1712 } else {
1713 if (optval == 0) {
1714 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1715 error = mptcp_set_notsent_lowat(mpte,0);
1716 } else {
1717 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1718 error = mptcp_set_notsent_lowat(mpte,
1719 optval);
1720 }
1721 }
1722 goto out;
1723 default:
1724 /* not eligible */
1725 error = ENOPROTOOPT;
1726 goto out;
1727 }
1728 }
1729
1730 if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
1731 sizeof (optval))) != 0)
1732 goto out;
1733
1734 if (rec) {
1735 /* search for an existing one; if not found, allocate */
1736 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL)
1737 mpo = mptcp_sopt_alloc(M_WAITOK);
1738
1739 if (mpo == NULL) {
1740 error = ENOBUFS;
1741 } else {
1742 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1743 "%s: mp_so 0x%llx sopt %s "
1744 "val %d %s\n", __func__,
1745 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1746 mptcp_sopt2str(level, optname, buf,
1747 sizeof (buf)), optval,
1748 (mpo->mpo_flags & MPOF_ATTACHED) ?
1749 "updated" : "recorded"),
1750 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1751
1752 /* initialize or update, as needed */
1753 mpo->mpo_intval = optval;
1754 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1755 mpo->mpo_level = level;
1756 mpo->mpo_name = optname;
1757 mptcp_sopt_insert(mpte, mpo);
1758 }
1759 VERIFY(mpo->mpo_flags & MPOF_ATTACHED);
1760 /* this can be issued on the subflow socket */
1761 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1762 }
1763 } else {
1764 bzero(&smpo, sizeof (smpo));
1765 mpo = &smpo;
1766 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1767 mpo->mpo_level = level;
1768 mpo->mpo_name = optname;
1769 mpo->mpo_intval = optval;
1770 }
1771 VERIFY(mpo == NULL || error == 0);
1772
1773 /* issue this socket option on existing subflows */
1774 if (error == 0) {
1775 error = mptcp_setopt_apply(mpte, mpo);
1776 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1777 VERIFY(mpo != &smpo);
1778 mptcp_sopt_remove(mpte, mpo);
1779 mptcp_sopt_free(mpo);
1780 }
1781 if (mpo == &smpo)
1782 mpo->mpo_flags &= ~MPOF_INTERIM;
1783 }
1784 out:
1785 if (error == 0 && mpo != NULL) {
1786 mptcplog((LOG_ERR, "MPTCP Socket: "
1787 "%s: mp_so 0x%llx sopt %s val %d set %s\n",
1788 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1789 mptcp_sopt2str(level, optname, buf,
1790 sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ?
1791 "pending" : "successful"),
1792 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
1793 } else if (error != 0) {
1794 mptcplog((LOG_ERR, "MPTCP Socket: "
1795 "%s: mp_so 0x%llx sopt %s can't be issued "
1796 "error %d\n", __func__,
1797 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level,
1798 optname, buf, sizeof (buf)), error),
1799 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
1800 }
1801 return (error);
1802 }
1803
1804 /*
1805 * Handle SOPT_GET for socket options issued on MP socket.
1806 */
1807 static int
1808 mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
1809 {
1810 int error = 0, optval;
1811
1812 VERIFY(sopt->sopt_dir == SOPT_GET);
1813 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1814
1815 /*
1816 * We only handle SOPT_GET for TCP level socket options; we should
1817 * not get here for socket level options since they are already
1818 * handled at the socket layer.
1819 */
1820 if (sopt->sopt_level != IPPROTO_TCP) {
1821 error = ENOPROTOOPT;
1822 goto out;
1823 }
1824
1825 switch (sopt->sopt_name) {
1826 case TCP_NODELAY:
1827 case TCP_RXT_FINDROP:
1828 case TCP_KEEPALIVE:
1829 case TCP_KEEPINTVL:
1830 case TCP_KEEPCNT:
1831 case TCP_CONNECTIONTIMEOUT:
1832 case TCP_RXT_CONNDROPTIME:
1833 case PERSIST_TIMEOUT:
1834 case TCP_NOTSENT_LOWAT:
1835 /* eligible; get the default value just in case */
1836 error = mptcp_default_tcp_optval(mpte, sopt, &optval);
1837 break;
1838 default:
1839 /* not eligible */
1840 error = ENOPROTOOPT;
1841 break;
1842 }
1843
1844 switch (sopt->sopt_name) {
1845 case TCP_NOTSENT_LOWAT:
1846 if (mpte->mpte_mppcb->mpp_socket->so_flags & SOF_NOTSENT_LOWAT)
1847 optval = mptcp_get_notsent_lowat(mpte);
1848 else
1849 optval = 0;
1850 goto out;
1851 }
1852
1853 /*
1854 * Search for a previously-issued TCP level socket option and
1855 * return the recorded option value. This assumes that the
1856 * value did not get modified by the lower layer after it was
1857 * issued at setsockopt(2) time. If not found, we'll return
1858 * the default value obtained ealier.
1859 */
1860 if (error == 0) {
1861 struct mptopt *mpo;
1862
1863 if ((mpo = mptcp_sopt_find(mpte, sopt)) != NULL)
1864 optval = mpo->mpo_intval;
1865
1866 error = sooptcopyout(sopt, &optval, sizeof (int));
1867 }
1868 out:
1869 return (error);
1870 }
1871
1872 /*
1873 * Return default values for TCP socket options. Ideally we would query the
1874 * subflow TCP socket, but that requires creating a subflow socket before
1875 * connectx(2) time. To simplify things, just return the default values
1876 * that we know of.
1877 */
1878 static int
1879 mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval)
1880 {
1881 int error = 0;
1882
1883 VERIFY(sopt->sopt_level == IPPROTO_TCP);
1884 VERIFY(sopt->sopt_dir == SOPT_GET);
1885 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1886
1887 /* try to do what tcp_newtcpcb() does */
1888 switch (sopt->sopt_name) {
1889 case TCP_NODELAY:
1890 case TCP_RXT_FINDROP:
1891 case TCP_KEEPINTVL:
1892 case TCP_KEEPCNT:
1893 case TCP_CONNECTIONTIMEOUT:
1894 case TCP_RXT_CONNDROPTIME:
1895 case TCP_NOTSENT_LOWAT:
1896 *optval = 0;
1897 break;
1898
1899 case TCP_KEEPALIVE:
1900 *optval = mptcp_subflow_keeptime;
1901 break;
1902
1903 case PERSIST_TIMEOUT:
1904 *optval = tcp_max_persist_timeout;
1905 break;
1906
1907 default:
1908 error = ENOPROTOOPT;
1909 break;
1910 }
1911 return (error);
1912 }
1913
1914 /*
1915 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1916 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1917 * to those that are allowed by mptcp_usr_socheckopt().
1918 */
1919 int
1920 mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
1921 {
1922 struct mppcb *mpp = sotomppcb(mp_so);
1923 struct mptses *mpte;
1924 int error = 0;
1925
1926 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1927 error = EINVAL;
1928 goto out;
1929 }
1930 mpte = mptompte(mpp);
1931 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1932
1933 /* we only handle socket and TCP-level socket options for MPTCP */
1934 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
1935 char buf[32];
1936 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1937 "%s: mp_so 0x%llx sopt %s level not "
1938 "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1939 mptcp_sopt2str(sopt->sopt_level,
1940 sopt->sopt_name, buf, sizeof (buf))),
1941 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1942 error = EINVAL;
1943 goto out;
1944 }
1945
1946 switch (sopt->sopt_dir) {
1947 case SOPT_SET:
1948 error = mptcp_setopt(mpte, sopt);
1949 break;
1950
1951 case SOPT_GET:
1952 error = mptcp_getopt(mpte, sopt);
1953 break;
1954 }
1955 out:
1956 return (error);
1957 }
1958
1959 /*
1960 * Return a string representation of <sopt_level,sopt_name>
1961 */
1962 const char *
1963 mptcp_sopt2str(int level, int optname, char *dst, int size)
1964 {
1965 char lbuf[32], obuf[32];
1966 const char *l = lbuf, *o = obuf;
1967
1968 (void) snprintf(lbuf, sizeof (lbuf), "0x%x", level);
1969 (void) snprintf(obuf, sizeof (obuf), "0x%x", optname);
1970
1971 switch (level) {
1972 case SOL_SOCKET:
1973 l = "SOL_SOCKET";
1974 switch (optname) {
1975 case SO_LINGER:
1976 o = "SO_LINGER";
1977 break;
1978 case SO_LINGER_SEC:
1979 o = "SO_LINGER_SEC";
1980 break;
1981 case SO_DEBUG:
1982 o = "SO_DEBUG";
1983 break;
1984 case SO_KEEPALIVE:
1985 o = "SO_KEEPALIVE";
1986 break;
1987 case SO_USELOOPBACK:
1988 o = "SO_USELOOPBACK";
1989 break;
1990 case SO_TYPE:
1991 o = "SO_TYPE";
1992 break;
1993 case SO_NREAD:
1994 o = "SO_NREAD";
1995 break;
1996 case SO_NWRITE:
1997 o = "SO_NWRITE";
1998 break;
1999 case SO_ERROR:
2000 o = "SO_ERROR";
2001 break;
2002 case SO_SNDBUF:
2003 o = "SO_SNDBUF";
2004 break;
2005 case SO_RCVBUF:
2006 o = "SO_RCVBUF";
2007 break;
2008 case SO_SNDLOWAT:
2009 o = "SO_SNDLOWAT";
2010 break;
2011 case SO_RCVLOWAT:
2012 o = "SO_RCVLOWAT";
2013 break;
2014 case SO_SNDTIMEO:
2015 o = "SO_SNDTIMEO";
2016 break;
2017 case SO_RCVTIMEO:
2018 o = "SO_RCVTIMEO";
2019 break;
2020 case SO_NKE:
2021 o = "SO_NKE";
2022 break;
2023 case SO_NOSIGPIPE:
2024 o = "SO_NOSIGPIPE";
2025 break;
2026 case SO_NOADDRERR:
2027 o = "SO_NOADDRERR";
2028 break;
2029 case SO_RESTRICTIONS:
2030 o = "SO_RESTRICTIONS";
2031 break;
2032 case SO_LABEL:
2033 o = "SO_LABEL";
2034 break;
2035 case SO_PEERLABEL:
2036 o = "SO_PEERLABEL";
2037 break;
2038 case SO_RANDOMPORT:
2039 o = "SO_RANDOMPORT";
2040 break;
2041 case SO_TRAFFIC_CLASS:
2042 o = "SO_TRAFFIC_CLASS";
2043 break;
2044 case SO_RECV_TRAFFIC_CLASS:
2045 o = "SO_RECV_TRAFFIC_CLASS";
2046 break;
2047 case SO_TRAFFIC_CLASS_DBG:
2048 o = "SO_TRAFFIC_CLASS_DBG";
2049 break;
2050 case SO_PRIVILEGED_TRAFFIC_CLASS:
2051 o = "SO_PRIVILEGED_TRAFFIC_CLASS";
2052 break;
2053 case SO_DEFUNCTOK:
2054 o = "SO_DEFUNCTOK";
2055 break;
2056 case SO_ISDEFUNCT:
2057 o = "SO_ISDEFUNCT";
2058 break;
2059 case SO_OPPORTUNISTIC:
2060 o = "SO_OPPORTUNISTIC";
2061 break;
2062 case SO_FLUSH:
2063 o = "SO_FLUSH";
2064 break;
2065 case SO_RECV_ANYIF:
2066 o = "SO_RECV_ANYIF";
2067 break;
2068 case SO_NOWAKEFROMSLEEP:
2069 o = "SO_NOWAKEFROMSLEEP";
2070 break;
2071 case SO_MPTCP_FASTJOIN:
2072 o = "SO_MPTCP_FASTJOIN";
2073 break;
2074 case SO_NOAPNFALLBK:
2075 o = "SO_NOAPNFALLBK";
2076 break;
2077 }
2078 break;
2079 case IPPROTO_TCP:
2080 l = "IPPROTO_TCP";
2081 switch (optname) {
2082 case TCP_KEEPALIVE:
2083 o = "TCP_KEEPALIVE";
2084 break;
2085 case TCP_KEEPINTVL:
2086 o = "TCP_KEEPINTVL";
2087 break;
2088 case TCP_KEEPCNT:
2089 o = "TCP_KEEPCNT";
2090 break;
2091 case TCP_CONNECTIONTIMEOUT:
2092 o = "TCP_CONNECTIONTIMEOUT";
2093 break;
2094 case TCP_RXT_CONNDROPTIME:
2095 o = "TCP_RXT_CONNDROPTIME";
2096 break;
2097 case PERSIST_TIMEOUT:
2098 o = "PERSIST_TIMEOUT";
2099 break;
2100 }
2101 break;
2102 }
2103
2104 (void) snprintf(dst, size, "<%s,%s>", l, o);
2105 return (dst);
2106 }
2107
2108 static int
2109 mptcp_usr_preconnect(struct socket *mp_so)
2110 {
2111 struct mptsub *mpts = NULL;
2112 struct mppcb *mpp = sotomppcb(mp_so);
2113 struct mptses *mpte;
2114 struct socket *so;
2115 struct tcpcb *tp = NULL;
2116
2117 mpte = mptompte(mpp);
2118 VERIFY(mpte != NULL);
2119 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
2120
2121 mpts = mptcp_get_subflow(mpte, NULL, NULL);
2122 if (mpts == NULL) {
2123 mptcplog((LOG_ERR, "MPTCP Socket: "
2124 "%s: mp_so 0x%llx invalid preconnect ", __func__,
2125 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
2126 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
2127 return (EINVAL);
2128 }
2129 MPTS_LOCK(mpts);
2130 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
2131 so = mpts->mpts_socket;
2132 socket_lock(so, 0);
2133 tp = intotcpcb(sotoinpcb(so));
2134 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
2135 int error = tcp_output(sototcpcb(so));
2136 socket_unlock(so, 0);
2137 MPTS_UNLOCK(mpts);
2138 mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
2139 return (error);
2140 }