]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/mptcp_usrreq.c
xnu-3789.70.16.tar.gz
[apple/xnu.git] / bsd / netinet / mptcp_usrreq.c
1 /*
2 * Copyright (c) 2012-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40
41 #include <net/if.h>
42 #include <netinet/in.h>
43 #include <netinet/in_var.h>
44 #include <netinet/tcp.h>
45 #include <netinet/tcp_fsm.h>
46 #include <netinet/tcp_seq.h>
47 #include <netinet/tcp_var.h>
48 #include <netinet/tcp_timer.h>
49 #include <netinet/mptcp_var.h>
50 #include <netinet/mptcp_timer.h>
51
52 #include <mach/sdt.h>
53
54 static int mptcp_usr_attach(struct socket *, int, struct proc *);
55 static int mptcp_usr_detach(struct socket *);
56 static int mptcp_attach(struct socket *, struct proc *);
57 static int mptcp_detach(struct socket *, struct mppcb *);
58 static int mptcp_connectx(struct mptses *, struct sockaddr *,
59 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
60 sae_connid_t *, uint32_t, void *, uint32_t);
61 static int mptcp_usr_connectx(struct socket *, struct sockaddr *,
62 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
63 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
64 static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
65 static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
66 user_addr_t);
67 static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
68 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
69 uint32_t *, user_addr_t, uint32_t *);
70 static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
71 struct proc *);
72 static int mptcp_disconnectx(struct mptses *, sae_associd_t, sae_connid_t);
73 static int mptcp_usr_disconnect(struct socket *);
74 static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
75 static struct mptses *mptcp_usrclosed(struct mptses *);
76 static int mptcp_usr_peeloff(struct socket *, sae_associd_t, struct socket **);
77 static int mptcp_peeloff(struct mptses *, sae_associd_t, struct socket **);
78 static int mptcp_usr_rcvd(struct socket *, int);
79 static int mptcp_usr_send(struct socket *, int, struct mbuf *,
80 struct sockaddr *, struct mbuf *, struct proc *);
81 static int mptcp_usr_shutdown(struct socket *);
82 static int mptcp_uiotombuf(struct uio *, int, int, uint32_t, struct mbuf **);
83 static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
84 struct mbuf *, struct mbuf *, int);
85 static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
86 static int mptcp_setopt_apply(struct mptses *, struct mptopt *);
87 static int mptcp_setopt(struct mptses *, struct sockopt *);
88 static int mptcp_getopt(struct mptses *, struct sockopt *);
89 static int mptcp_default_tcp_optval(struct mptses *, struct sockopt *, int *);
90 static void mptcp_connorder_helper(struct mptsub *mpts);
91 static int mptcp_usr_preconnect(struct socket *so);
92
93 struct pr_usrreqs mptcp_usrreqs = {
94 .pru_attach = mptcp_usr_attach,
95 .pru_connectx = mptcp_usr_connectx,
96 .pru_control = mptcp_usr_control,
97 .pru_detach = mptcp_usr_detach,
98 .pru_disconnect = mptcp_usr_disconnect,
99 .pru_disconnectx = mptcp_usr_disconnectx,
100 .pru_peeloff = mptcp_usr_peeloff,
101 .pru_rcvd = mptcp_usr_rcvd,
102 .pru_send = mptcp_usr_send,
103 .pru_shutdown = mptcp_usr_shutdown,
104 .pru_sosend = mptcp_usr_sosend,
105 .pru_soreceive = soreceive,
106 .pru_socheckopt = mptcp_usr_socheckopt,
107 .pru_preconnect = mptcp_usr_preconnect,
108 };
109
110 /*
111 * Attaches an MPTCP control block to a socket.
112 */
113 static int
114 mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
115 {
116 #pragma unused(proto)
117 int error;
118
119 VERIFY(sotomppcb(mp_so) == NULL);
120
121 error = mptcp_attach(mp_so, p);
122 if (error != 0)
123 goto out;
124 /*
125 * XXX: adi@apple.com
126 *
127 * Might want to use a different SO_LINGER timeout than TCP's?
128 */
129 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0)
130 mp_so->so_linger = TCP_LINGERTIME * hz;
131 out:
132 return (error);
133 }
134
135 /*
136 * Detaches an MPTCP control block from a socket.
137 */
138 static int
139 mptcp_usr_detach(struct socket *mp_so)
140 {
141 struct mppcb *mpp = sotomppcb(mp_so);
142 int error = 0;
143
144 VERIFY(mpp != NULL);
145 VERIFY(mpp->mpp_socket != NULL);
146
147 error = mptcp_detach(mp_so, mpp);
148 return (error);
149 }
150
151 /*
152 * Attach MPTCP protocol to socket, allocating MP control block,
153 * MPTCP session, control block, buffer space, etc.
154 */
155 static int
156 mptcp_attach(struct socket *mp_so, struct proc *p)
157 {
158 #pragma unused(p)
159 struct mptses *mpte = NULL;
160 struct mptcb *mp_tp = NULL;
161 struct mppcb *mpp = NULL;
162 int error = 0;
163
164 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
165 error = soreserve(mp_so, tcp_sendspace, MPTCP_RWIN_MAX);
166 if (error != 0)
167 goto out;
168 }
169
170 if (mp_so->so_snd.sb_preconn_hiwat == 0) {
171 soreserve_preconnect(mp_so, 2048);
172 }
173
174 /*
175 * MPTCP socket buffers cannot be compressed, due to the
176 * fact that each mbuf chained via m_next is a M_PKTHDR
177 * which carries some MPTCP metadata.
178 */
179 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
180 mp_so->so_rcv.sb_flags |= SB_NOCOMPRESS;
181
182 /* Disable socket buffer auto-tuning. */
183 mp_so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
184 mp_so->so_snd.sb_flags &= ~SB_AUTOSIZE;
185
186 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
187 goto out;
188 }
189
190 mpp = sotomppcb(mp_so);
191 VERIFY(mpp != NULL);
192 mpte = (struct mptses *)mpp->mpp_pcbe;
193 VERIFY(mpte != NULL);
194 mp_tp = mpte->mpte_mptcb;
195 VERIFY(mp_tp != NULL);
196 out:
197 return (error);
198 }
199
200 /*
201 * Called when the socket layer loses its final reference to the socket;
202 * at this point, there is only one case in which we will keep things
203 * around: time wait.
204 */
205 static int
206 mptcp_detach(struct socket *mp_so, struct mppcb *mpp)
207 {
208 struct mptses *mpte;
209 struct mppcbinfo *mppi;
210
211 VERIFY(mp_so->so_pcb == mpp);
212 VERIFY(mpp->mpp_socket == mp_so);
213
214 mppi = mpp->mpp_pcbinfo;
215 VERIFY(mppi != NULL);
216
217 __IGNORE_WCASTALIGN(mpte = &((struct mpp_mtp *)mpp)->mpp_ses);
218 VERIFY(mpte->mpte_mppcb == mpp);
219
220 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
221
222 /*
223 * We are done with this MPTCP socket (it has been closed);
224 * trigger all subflows to be disconnected, if not already,
225 * by initiating the PCB detach sequence (SOF_PCBCLEARING
226 * will be set.)
227 */
228 mp_pcbdetach(mpp);
229
230 (void) mptcp_disconnectx(mpte, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
231
232 /*
233 * XXX: adi@apple.com
234 *
235 * Here, we would want to handle time wait state.
236 */
237
238 return (0);
239 }
240
241 /*
242 * Common subroutine to open a MPTCP connection to one of the remote hosts
243 * specified by dst_sl. This includes allocating and establishing a
244 * subflow TCP connection, either initially to establish MPTCP connection,
245 * or to join an existing one. Returns a connection handle upon success.
246 */
247 static int
248 mptcp_connectx(struct mptses *mpte, struct sockaddr *src,
249 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
250 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
251 uint32_t arglen)
252 {
253 #pragma unused(p, aid, flags, arg, arglen)
254 struct mptsub *mpts;
255 struct socket *mp_so;
256 int error = 0;
257
258 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
259 mp_so = mpte->mpte_mppcb->mpp_socket;
260
261 VERIFY(dst != NULL);
262 VERIFY(pcid != NULL);
263
264 mptcplog((LOG_DEBUG, "MPTCP Socket: "
265 "%s: mp_so 0x%llx\n", __func__,
266 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
267 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
268
269 DTRACE_MPTCP3(connectx, struct mptses *, mpte, sae_associd_t, aid,
270 struct socket *, mp_so);
271
272 mpts = mptcp_subflow_alloc(M_WAITOK);
273 if (mpts == NULL) {
274 error = ENOBUFS;
275 goto out;
276 }
277 MPTS_ADDREF(mpts); /* for this routine */
278
279 if (src != NULL) {
280 int len = src->sa_len;
281
282 MALLOC(mpts->mpts_src, struct sockaddr *, len, M_SONAME,
283 M_WAITOK | M_ZERO);
284 if (mpts->mpts_src == NULL) {
285 error = ENOBUFS;
286 goto out;
287 }
288 bcopy(src, mpts->mpts_src, len);
289 }
290
291 MALLOC(mpts->mpts_dst, struct sockaddr *, dst->sa_len, M_SONAME,
292 M_WAITOK | M_ZERO);
293 if (mpts->mpts_dst == NULL) {
294 error = ENOBUFS;
295 goto out;
296 }
297 bcopy(dst, mpts->mpts_dst, dst->sa_len);
298
299 error = mptcp_subflow_add(mpte, mpts, p, ifscope);
300 if (error == 0 && pcid != NULL)
301 *pcid = mpts->mpts_connid;
302
303 out:
304 if (mpts != NULL) {
305 if ((error != 0) && (error != EWOULDBLOCK)) {
306 MPTS_LOCK(mpts);
307 if (mpts->mpts_flags & MPTSF_ATTACHED) {
308 MPTS_UNLOCK(mpts);
309 MPTS_REMREF(mpts);
310 mptcp_subflow_del(mpte, mpts, TRUE);
311 return (error);
312 }
313 MPTS_UNLOCK(mpts);
314 }
315 MPTS_REMREF(mpts);
316 }
317
318 return (error);
319 }
320
321 /*
322 * User-protocol pru_connectx callback.
323 */
324 static int
325 mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src,
326 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
327 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
328 uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
329 {
330 struct mppcb *mpp = sotomppcb(mp_so);
331 struct mptses *mpte = NULL;
332 struct mptcb *mp_tp = NULL;
333 user_ssize_t datalen;
334
335 int error = 0;
336
337 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
338 error = EINVAL;
339 goto out;
340 }
341 mpte = mptompte(mpp);
342 VERIFY(mpte != NULL);
343
344 mp_tp = mpte->mpte_mptcb;
345 VERIFY(mp_tp != NULL);
346
347 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
348 error = EINVAL;
349 goto out;
350 }
351
352 error = mptcp_connectx(mpte, src, dst, p, ifscope,
353 aid, pcid, flags, arg, arglen);
354
355 /* If there is data, copy it */
356 if (auio != NULL) {
357 datalen = uio_resid(auio);
358 socket_unlock(mp_so, 0);
359 error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
360 (uio_t) auio, NULL, NULL, 0);
361 /* check if this can be supported with fast Join also. XXX */
362 if (error == 0 || error == EWOULDBLOCK)
363 *bytes_written = datalen - uio_resid(auio);
364
365 if (error == EWOULDBLOCK)
366 error = EINPROGRESS;
367
368 socket_lock(mp_so, 0);
369 MPT_LOCK(mp_tp);
370 if (mp_tp->mpt_flags & MPTCPF_PEEL_OFF) {
371 *bytes_written = datalen - uio_resid(auio);
372 /*
373 * Override errors like EPIPE that occur as
374 * a result of doing TFO during TCP fallback.
375 */
376 error = EPROTO;
377 }
378 MPT_UNLOCK(mp_tp);
379 }
380
381 out:
382 return (error);
383 }
384
385 /*
386 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
387 */
388 static int
389 mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
390 {
391 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
392
393 /* MPTCP has at most 1 association */
394 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
395
396 /* just asking how many there are? */
397 if (aidp == USER_ADDR_NULL)
398 return (0);
399
400 return (copyout(&mpte->mpte_associd, aidp,
401 sizeof (mpte->mpte_associd)));
402 }
403
404 /*
405 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
406 */
407 static int
408 mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
409 user_addr_t cidp)
410 {
411 struct mptsub *mpts;
412 int error = 0;
413
414 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
415
416 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
417 aid != mpte->mpte_associd)
418 return (EINVAL);
419
420 *cnt = mpte->mpte_numflows;
421
422 /* just asking how many there are? */
423 if (cidp == USER_ADDR_NULL)
424 return (0);
425
426 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
427 if ((error = copyout(&mpts->mpts_connid, cidp,
428 sizeof (mpts->mpts_connid))) != 0)
429 break;
430
431 cidp += sizeof (mpts->mpts_connid);
432 }
433
434 return (error);
435 }
436
437 /*
438 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
439 */
440 static int
441 mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
442 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
443 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
444 user_addr_t aux_data, uint32_t *aux_len)
445 {
446 #pragma unused(aux_data)
447 struct ifnet *ifp = NULL;
448 struct mptsub *mpts;
449 int error = 0;
450
451 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
452
453 if (*cid == SAE_CONNID_ALL)
454 return (EINVAL);
455
456 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
457 if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY)
458 break;
459 }
460 if (mpts == NULL)
461 return ((*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL);
462
463 MPTS_LOCK(mpts);
464 ifp = mpts->mpts_outif;
465 *cid = mpts->mpts_connid;
466 *ifindex = ((ifp != NULL) ? ifp->if_index : 0);
467 *soerror = mpts->mpts_soerror;
468 *flags = 0;
469 if (mpts->mpts_flags & MPTSF_CONNECTING)
470 *flags |= CIF_CONNECTING;
471 if (mpts->mpts_flags & MPTSF_CONNECTED)
472 *flags |= CIF_CONNECTED;
473 if (mpts->mpts_flags & MPTSF_DISCONNECTING)
474 *flags |= CIF_DISCONNECTING;
475 if (mpts->mpts_flags & MPTSF_DISCONNECTED)
476 *flags |= CIF_DISCONNECTED;
477 if (mpts->mpts_flags & MPTSF_BOUND_IF)
478 *flags |= CIF_BOUND_IF;
479 if (mpts->mpts_flags & MPTSF_BOUND_IP)
480 *flags |= CIF_BOUND_IP;
481 if (mpts->mpts_flags & MPTSF_BOUND_PORT)
482 *flags |= CIF_BOUND_PORT;
483 if (mpts->mpts_flags & MPTSF_PREFERRED)
484 *flags |= CIF_PREFERRED;
485 if (mpts->mpts_flags & MPTSF_MP_CAPABLE)
486 *flags |= CIF_MP_CAPABLE;
487 if (mpts->mpts_flags & MPTSF_MP_DEGRADED)
488 *flags |= CIF_MP_DEGRADED;
489 if (mpts->mpts_flags & MPTSF_MP_READY)
490 *flags |= CIF_MP_READY;
491 if (mpts->mpts_flags & MPTSF_ACTIVE)
492 *flags |= CIF_MP_ACTIVE;
493
494 VERIFY(mpts->mpts_src != NULL);
495 *src_len = mpts->mpts_src->sa_len;
496 if (src != USER_ADDR_NULL) {
497 error = copyout(mpts->mpts_src, src, mpts->mpts_src->sa_len);
498 if (error != 0)
499 goto out;
500 }
501
502 VERIFY(mpts->mpts_dst != NULL);
503 *dst_len = mpts->mpts_dst->sa_len;
504 if (dst != USER_ADDR_NULL) {
505 error = copyout(mpts->mpts_dst, dst, mpts->mpts_dst->sa_len);
506 if (error != 0)
507 goto out;
508 }
509
510 *aux_type = 0;
511 *aux_len = 0;
512 if (mpts->mpts_socket != NULL) {
513 struct conninfo_tcp tcp_ci;
514
515 *aux_type = CIAUX_TCP;
516 *aux_len = sizeof (tcp_ci);
517
518 if (aux_data != USER_ADDR_NULL) {
519 struct socket *so = mpts->mpts_socket;
520
521 VERIFY(SOCK_PROTO(so) == IPPROTO_TCP);
522 bzero(&tcp_ci, sizeof (tcp_ci));
523 socket_lock(so, 0);
524 tcp_getconninfo(so, &tcp_ci);
525 socket_unlock(so, 0);
526 error = copyout(&tcp_ci, aux_data, sizeof (tcp_ci));
527 if (error != 0)
528 goto out;
529 }
530 }
531 mptcplog((LOG_DEBUG, "MPTCP Socket: "
532 "%s: cid %d flags %x \n",
533 __func__, mpts->mpts_connid, mpts->mpts_flags),
534 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
535
536 out:
537 MPTS_UNLOCK(mpts);
538 return (error);
539 }
540
541 /*
542 * Handle SIOCSCONNORDER
543 */
544 int
545 mptcp_setconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t rank)
546 {
547 struct mptsub *mpts, *mpts1;
548 int error = 0;
549
550 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
551 mptcplog((LOG_DEBUG, "MPTCP Socket: "
552 "%s: cid %d rank %d \n", __func__, cid, rank),
553 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
554
555 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
556 error = EINVAL;
557 goto out;
558 }
559
560 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
561 if (mpts->mpts_connid == cid)
562 break;
563 }
564 if (mpts == NULL) {
565 error = ENXIO;
566 goto out;
567 }
568
569 if (rank == 0 || rank > 1) {
570 /*
571 * If rank is 0, determine whether this should be the
572 * primary or backup subflow, depending on what we have.
573 *
574 * Otherwise, if greater than 0, make it a backup flow.
575 */
576 TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) {
577 MPTS_LOCK(mpts1);
578 if (mpts1->mpts_flags & MPTSF_PREFERRED) {
579 MPTS_UNLOCK(mpts1);
580 break;
581 }
582 MPTS_UNLOCK(mpts1);
583 }
584
585 MPTS_LOCK(mpts);
586 mpts->mpts_flags &= ~MPTSF_PREFERRED;
587 mpts->mpts_rank = rank;
588 if (mpts1 != NULL && mpts != mpts1) {
589 /* preferred subflow found; set rank as necessary */
590 if (rank == 0)
591 mpts->mpts_rank = (mpts1->mpts_rank + 1);
592 } else if (rank == 0) {
593 /* no preferred one found; promote this */
594 rank = 1;
595 }
596 MPTS_UNLOCK(mpts);
597 }
598
599 if (rank == 1) {
600 /*
601 * If rank is 1, promote this subflow to be preferred.
602 */
603 TAILQ_FOREACH(mpts1, &mpte->mpte_subflows, mpts_entry) {
604 MPTS_LOCK(mpts1);
605 if (mpts1 != mpts &&
606 (mpts1->mpts_flags & MPTSF_PREFERRED)) {
607 mpts1->mpts_flags &= ~MPTSF_PREFERRED;
608 if (mpte->mpte_nummpcapflows > 1)
609 mptcp_connorder_helper(mpts1);
610 } else if (mpts1 == mpts) {
611 mpts1->mpts_rank = 1;
612 if (mpts1->mpts_flags & MPTSF_MP_CAPABLE) {
613 mpts1->mpts_flags |= MPTSF_PREFERRED;
614 if (mpte->mpte_nummpcapflows > 1)
615 mptcp_connorder_helper(mpts1);
616 }
617 }
618 MPTS_UNLOCK(mpts1);
619 }
620 }
621
622 out:
623 return (error);
624 }
625
626 static void
627 mptcp_connorder_helper(struct mptsub *mpts)
628 {
629 struct socket *so = mpts->mpts_socket;
630 struct tcpcb *tp = NULL;
631
632 socket_lock(so, 0);
633
634 tp = intotcpcb(sotoinpcb(so));
635 tp->t_mpflags |= TMPF_SND_MPPRIO;
636 if (mpts->mpts_flags & MPTSF_PREFERRED)
637 tp->t_mpflags &= ~TMPF_BACKUP_PATH;
638 else
639 tp->t_mpflags |= TMPF_BACKUP_PATH;
640
641 socket_unlock(so, 0);
642
643 }
644
645 /*
646 * Handle SIOCSGONNORDER
647 */
648 int
649 mptcp_getconnorder(struct mptses *mpte, sae_connid_t cid, uint32_t *rank)
650 {
651 struct mptsub *mpts;
652 int error = 0;
653
654 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
655 VERIFY(rank != NULL);
656 *rank = 0;
657
658 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
659 error = EINVAL;
660 goto out;
661 }
662
663 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
664 if (mpts->mpts_connid == cid)
665 break;
666 }
667 if (mpts == NULL) {
668 error = ENXIO;
669 goto out;
670 }
671
672 MPTS_LOCK(mpts);
673 *rank = mpts->mpts_rank;
674 MPTS_UNLOCK(mpts);
675 out:
676 return (error);
677 }
678
679 /*
680 * User-protocol pru_control callback.
681 */
682 static int
683 mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
684 struct ifnet *ifp, struct proc *p)
685 {
686 #pragma unused(ifp, p)
687 struct mppcb *mpp = sotomppcb(mp_so);
688 struct mptses *mpte;
689 int error = 0;
690
691 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
692 error = EINVAL;
693 goto out;
694 }
695 mpte = mptompte(mpp);
696 VERIFY(mpte != NULL);
697
698 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
699
700 switch (cmd) {
701 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
702 struct so_aidreq32 aidr;
703 bcopy(data, &aidr, sizeof (aidr));
704 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
705 aidr.sar_aidp);
706 if (error == 0)
707 bcopy(&aidr, data, sizeof (aidr));
708 break;
709 }
710
711 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
712 struct so_aidreq64 aidr;
713 bcopy(data, &aidr, sizeof (aidr));
714 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
715 aidr.sar_aidp);
716 if (error == 0)
717 bcopy(&aidr, data, sizeof (aidr));
718 break;
719 }
720
721 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
722 struct so_cidreq32 cidr;
723 bcopy(data, &cidr, sizeof (cidr));
724 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
725 cidr.scr_cidp);
726 if (error == 0)
727 bcopy(&cidr, data, sizeof (cidr));
728 break;
729 }
730
731 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
732 struct so_cidreq64 cidr;
733 bcopy(data, &cidr, sizeof (cidr));
734 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
735 cidr.scr_cidp);
736 if (error == 0)
737 bcopy(&cidr, data, sizeof (cidr));
738 break;
739 }
740
741 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
742 struct so_cinforeq32 cifr;
743 bcopy(data, &cifr, sizeof (cifr));
744 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
745 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
746 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
747 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
748 &cifr.scir_aux_len);
749 if (error == 0)
750 bcopy(&cifr, data, sizeof (cifr));
751 break;
752 }
753
754 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
755 struct so_cinforeq64 cifr;
756 bcopy(data, &cifr, sizeof (cifr));
757 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
758 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
759 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
760 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
761 &cifr.scir_aux_len);
762 if (error == 0)
763 bcopy(&cifr, data, sizeof (cifr));
764 break;
765 }
766
767 case SIOCSCONNORDER: { /* struct so_cordreq */
768 struct so_cordreq cor;
769 bcopy(data, &cor, sizeof (cor));
770 error = mptcp_setconnorder(mpte, cor.sco_cid, cor.sco_rank);
771 if (error == 0)
772 bcopy(&cor, data, sizeof (cor));
773 break;
774 }
775
776 case SIOCGCONNORDER: { /* struct so_cordreq */
777 struct so_cordreq cor;
778 bcopy(data, &cor, sizeof (cor));
779 error = mptcp_getconnorder(mpte, cor.sco_cid, &cor.sco_rank);
780 if (error == 0)
781 bcopy(&cor, data, sizeof (cor));
782 break;
783 }
784
785 default:
786 error = EOPNOTSUPP;
787 break;
788 }
789 out:
790 return (error);
791 }
792
793 /*
794 * Initiate a disconnect. MPTCP-level disconnection is specified by
795 * CONNID_{ANY,ALL}. Otherwise, selectively disconnect a subflow
796 * connection while keeping the MPTCP-level connection (association).
797 */
798 static int
799 mptcp_disconnectx(struct mptses *mpte, sae_associd_t aid, sae_connid_t cid)
800 {
801 struct mptsub *mpts;
802 struct socket *mp_so;
803 struct mptcb *mp_tp;
804 int error = 0;
805
806 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
807
808 mp_so = mpte->mpte_mppcb->mpp_socket;
809 mp_tp = mpte->mpte_mptcb;
810
811 mptcplog((LOG_DEBUG, "MPTCP Socket: "
812 "%s: mp_so 0x%llx aid %d cid %d %d\n", __func__,
813 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), aid, cid, mp_so->so_error),
814 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
815
816 DTRACE_MPTCP5(disconnectx, struct mptses *, mpte, sae_associd_t, aid,
817 sae_connid_t, cid, struct socket *, mp_so, struct mptcb *, mp_tp);
818
819 VERIFY(aid == SAE_ASSOCID_ANY || aid == SAE_ASSOCID_ALL ||
820 aid == mpte->mpte_associd);
821
822 /* terminate the association? */
823 if (cid == SAE_CONNID_ANY || cid == SAE_CONNID_ALL) {
824 /* if we're not detached, go thru socket state checks */
825 if (!(mp_so->so_flags & SOF_PCBCLEARING)) {
826 if (!(mp_so->so_state & (SS_ISCONNECTED|
827 SS_ISCONNECTING))) {
828 error = ENOTCONN;
829 goto out;
830 }
831 if (mp_so->so_state & SS_ISDISCONNECTING) {
832 error = EALREADY;
833 goto out;
834 }
835 }
836 MPT_LOCK(mp_tp);
837 mptcp_cancel_all_timers(mp_tp);
838 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
839 (void) mptcp_close(mpte, mp_tp);
840 MPT_UNLOCK(mp_tp);
841 } else if ((mp_so->so_options & SO_LINGER) &&
842 mp_so->so_linger == 0) {
843 (void) mptcp_drop(mpte, mp_tp, 0);
844 MPT_UNLOCK(mp_tp);
845 } else {
846 MPT_UNLOCK(mp_tp);
847 soisdisconnecting(mp_so);
848 sbflush(&mp_so->so_rcv);
849 if (mptcp_usrclosed(mpte) != NULL)
850 (void) mptcp_output(mpte);
851 }
852 } else {
853 bool disconnect_embryonic_subflows = false;
854 struct socket *so = NULL;
855
856 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
857 if (mpts->mpts_connid != cid)
858 continue;
859
860 MPTS_LOCK(mpts);
861 /*
862 * Check if disconnected subflow is the one used
863 * to initiate MPTCP connection.
864 * If it is and the connection is not yet join ready
865 * disconnect all other subflows.
866 */
867 so = mpts->mpts_socket;
868 if (!(mp_tp->mpt_flags & MPTCPF_JOIN_READY) &&
869 so && !(so->so_flags & SOF_MP_SEC_SUBFLOW)) {
870 disconnect_embryonic_subflows = true;
871 }
872
873 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
874 mptcp_subflow_disconnect(mpte, mpts, FALSE);
875 MPTS_UNLOCK(mpts);
876 break;
877 }
878
879 if (mpts == NULL) {
880 error = EINVAL;
881 goto out;
882 }
883
884 if (disconnect_embryonic_subflows) {
885 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
886 if (mpts->mpts_connid == cid)
887 continue;
888 MPTS_LOCK(mpts);
889 mptcp_subflow_disconnect(mpte, mpts, TRUE);
890 MPTS_UNLOCK(mpts);
891 }
892 }
893 }
894
895 if (error == 0)
896 mptcp_thread_signal(mpte);
897
898 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
899 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
900 /* the socket has been shutdown, no more sockopt's */
901 mptcp_flush_sopts(mpte);
902 }
903
904 out:
905 return (error);
906 }
907
908 /*
909 * Wrapper function to support disconnect on socket
910 */
911 static int
912 mptcp_usr_disconnect(struct socket *mp_so)
913 {
914 int error = 0;
915
916 error = mptcp_usr_disconnectx(mp_so, SAE_ASSOCID_ALL, SAE_CONNID_ALL);
917 return (error);
918 }
919
920 /*
921 * User-protocol pru_disconnectx callback.
922 */
923 static int
924 mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
925 {
926 struct mppcb *mpp = sotomppcb(mp_so);
927 struct mptses *mpte;
928 int error = 0;
929
930 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
931 error = EINVAL;
932 goto out;
933 }
934 mpte = mptompte(mpp);
935 VERIFY(mpte != NULL);
936 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
937
938 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
939 aid != mpte->mpte_associd) {
940 error = EINVAL;
941 goto out;
942 }
943
944 error = mptcp_disconnectx(mpte, aid, cid);
945 out:
946 return (error);
947 }
948
949 /*
950 * User issued close, and wish to trail thru shutdown states.
951 */
952 static struct mptses *
953 mptcp_usrclosed(struct mptses *mpte)
954 {
955 struct socket *mp_so;
956 struct mptcb *mp_tp;
957 struct mptsub *mpts;
958
959 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
960 mp_so = mpte->mpte_mppcb->mpp_socket;
961 mp_tp = mpte->mpte_mptcb;
962
963 MPT_LOCK(mp_tp);
964 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
965
966 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
967 mpte = mptcp_close(mpte, mp_tp);
968 MPT_UNLOCK(mp_tp);
969 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
970 MPT_UNLOCK(mp_tp);
971 soisdisconnected(mp_so);
972 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
973 MPTS_LOCK(mpts);
974 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
975 MPTS_UNLOCK(mpts);
976 }
977 } else {
978 MPT_UNLOCK(mp_tp);
979
980 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
981 MPTS_LOCK(mpts);
982 mpts->mpts_flags |= MPTSF_USER_DISCONNECT;
983 mptcp_subflow_disconnect(mpte, mpts, FALSE);
984 MPTS_UNLOCK(mpts);
985 }
986 }
987
988 return (mpte);
989 }
990
991 /*
992 * User-protocol pru_peeloff callback.
993 */
994 static int
995 mptcp_usr_peeloff(struct socket *mp_so, sae_associd_t aid, struct socket **psop)
996 {
997 struct mppcb *mpp = sotomppcb(mp_so);
998 struct mptses *mpte;
999 int error = 0;
1000
1001 VERIFY(psop != NULL);
1002
1003 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1004 error = EINVAL;
1005 goto out;
1006 }
1007 mpte = mptompte(mpp);
1008 VERIFY(mpte != NULL);
1009
1010 error = mptcp_peeloff(mpte, aid, psop);
1011 out:
1012 return (error);
1013 }
1014
1015 /*
1016 * Transform a previously connected TCP subflow connection which has
1017 * failed to negotiate MPTCP to its own socket which can be externalized
1018 * with a file descriptor. Valid only when the MPTCP socket is not
1019 * yet associated (MPTCP-level connection has not been established.)
1020 */
1021 static int
1022 mptcp_peeloff(struct mptses *mpte, sae_associd_t aid, struct socket **psop)
1023 {
1024 struct socket *so = NULL, *mp_so;
1025 struct mptsub *mpts;
1026 int error = 0;
1027
1028 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1029 mp_so = mpte->mpte_mppcb->mpp_socket;
1030
1031 VERIFY(psop != NULL);
1032 *psop = NULL;
1033
1034 DTRACE_MPTCP3(peeloff, struct mptses *, mpte, sae_associd_t, aid,
1035 struct socket *, mp_so);
1036
1037 /* peeloff cannot happen after an association is established */
1038 if (mpte->mpte_associd != SAE_ASSOCID_ANY) {
1039 error = EINVAL;
1040 goto out;
1041 }
1042
1043 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
1044 error = EINVAL;
1045 goto out;
1046 }
1047
1048 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1049 MPTS_LOCK(mpts);
1050 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
1051 panic("%s: so %p is MPTCP capable but mp_so %p "
1052 "aid is %d\n", __func__, so, mp_so,
1053 mpte->mpte_associd);
1054 /* NOTREACHED */
1055 }
1056 MPTS_ADDREF_LOCKED(mpts); /* for us */
1057 so = mpts->mpts_socket;
1058 VERIFY(so != NULL);
1059 /*
1060 * This subflow socket is about to be externalized; make it
1061 * appear as if it has the same properties as the MPTCP socket,
1062 * undo what's done earlier in mptcp_subflow_add().
1063 */
1064 mptcp_subflow_sopeeloff(mpte, mpts, so);
1065 MPTS_UNLOCK(mpts);
1066
1067 mptcp_subflow_del(mpte, mpts, FALSE);
1068 MPTS_REMREF(mpts); /* ours */
1069 /*
1070 * XXX adi@apple.com
1071 *
1072 * Here we need to make sure the subflow socket is not
1073 * flow controlled; need to clear both INP_FLOW_CONTROLLED
1074 * and INP_FLOW_SUSPENDED on the subflow socket, since
1075 * we will no longer be monitoring its events.
1076 */
1077 break;
1078 }
1079
1080 if (so == NULL) {
1081 error = EINVAL;
1082 goto out;
1083 }
1084 *psop = so;
1085
1086 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1087 "%s: mp_so 0x%llx\n", __func__,
1088 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
1089 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1090
1091 out:
1092 return (error);
1093 }
1094
1095 /*
1096 * After a receive, possible send some update to peer.
1097 */
1098 static int
1099 mptcp_usr_rcvd(struct socket *mp_so, int flags)
1100 {
1101 #pragma unused(flags)
1102 struct mppcb *mpp = sotomppcb(mp_so);
1103 struct mptses *mpte;
1104 int error = 0;
1105
1106 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1107 error = EINVAL;
1108 goto out;
1109 }
1110 mpte = mptompte(mpp);
1111 VERIFY(mpte != NULL);
1112
1113 error = mptcp_output(mpte);
1114 out:
1115 return (error);
1116 }
1117
1118 /*
1119 * Do a send by putting data in the output queue.
1120 */
1121 static int
1122 mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
1123 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1124 {
1125 #pragma unused(nam, p)
1126 struct mppcb *mpp = sotomppcb(mp_so);
1127 struct mptses *mpte;
1128 int error = 0;
1129
1130 if (prus_flags & (PRUS_OOB|PRUS_EOF)) {
1131 error = EOPNOTSUPP;
1132 goto out;
1133 }
1134
1135 if (nam != NULL) {
1136 error = EOPNOTSUPP;
1137 goto out;
1138 }
1139
1140 if (control != NULL && control->m_len != 0) {
1141 error = EOPNOTSUPP;
1142 goto out;
1143 }
1144
1145 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1146 error = ECONNRESET;
1147 goto out;
1148 }
1149 mpte = mptompte(mpp);
1150 VERIFY(mpte != NULL);
1151
1152 if (!(mp_so->so_state & SS_ISCONNECTED) &&
1153 (!(mp_so->so_flags1 & SOF1_PRECONNECT_DATA))) {
1154 error = ENOTCONN;
1155 goto out;
1156 }
1157
1158 mptcp_insert_dsn(mpp, m);
1159 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
1160 (void) sbappendstream(&mp_so->so_snd, m);
1161 m = NULL;
1162
1163 /*
1164 * XXX: adi@apple.com
1165 *
1166 * PRUS_MORETOCOME could be set, but we don't check it now.
1167 */
1168 error = mptcp_output(mpte);
1169 if (error != 0)
1170 goto out;
1171
1172 if (mp_so->so_state & SS_ISCONNECTING) {
1173 if (mp_so->so_state & SS_NBIO)
1174 error = EWOULDBLOCK;
1175 else
1176 error = sbwait(&mp_so->so_snd);
1177 }
1178
1179 out:
1180 if (error) {
1181 if (m != NULL)
1182 m_freem(m);
1183 if (control != NULL)
1184 m_freem(control);
1185 }
1186 return (error);
1187 }
1188
1189 /*
1190 * Mark the MPTCP connection as being incapable of further output.
1191 */
1192 static int
1193 mptcp_usr_shutdown(struct socket *mp_so)
1194 {
1195 struct mppcb *mpp = sotomppcb(mp_so);
1196 struct mptses *mpte;
1197 int error = 0;
1198
1199 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1200 error = EINVAL;
1201 goto out;
1202 }
1203 mpte = mptompte(mpp);
1204 VERIFY(mpte != NULL);
1205
1206 socantsendmore(mp_so);
1207
1208 mpte = mptcp_usrclosed(mpte);
1209 if (mpte != NULL)
1210 error = mptcp_output(mpte);
1211 out:
1212 return (error);
1213 }
1214
1215 /*
1216 * Copy the contents of uio into a properly sized mbuf chain.
1217 */
1218 static int
1219 mptcp_uiotombuf(struct uio *uio, int how, int space, uint32_t align,
1220 struct mbuf **top)
1221 {
1222 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
1223 user_ssize_t resid, tot, len, progress; /* must be user_ssize_t */
1224 int error;
1225
1226 VERIFY(top != NULL && *top == NULL);
1227
1228 /*
1229 * space can be zero or an arbitrary large value bound by
1230 * the total data supplied by the uio.
1231 */
1232 resid = uio_resid(uio);
1233 if (space > 0)
1234 tot = imin(resid, space);
1235 else
1236 tot = resid;
1237
1238 /*
1239 * The smallest unit is a single mbuf with pkthdr.
1240 * We can't align past it.
1241 */
1242 if (align >= MHLEN)
1243 return (EINVAL);
1244
1245 /*
1246 * Give us the full allocation or nothing.
1247 * If space is zero return the smallest empty mbuf.
1248 */
1249 if ((len = tot + align) == 0)
1250 len = 1;
1251
1252 /* Loop and append maximum sized mbufs to the chain tail. */
1253 while (len > 0) {
1254 uint32_t m_needed = 1;
1255
1256 if (njcl > 0 && len > MBIGCLBYTES)
1257 mb = m_getpackets_internal(&m_needed, 1,
1258 how, 1, M16KCLBYTES);
1259 else if (len > MCLBYTES)
1260 mb = m_getpackets_internal(&m_needed, 1,
1261 how, 1, MBIGCLBYTES);
1262 else if (len >= (signed)MINCLSIZE)
1263 mb = m_getpackets_internal(&m_needed, 1,
1264 how, 1, MCLBYTES);
1265 else
1266 mb = m_gethdr(how, MT_DATA);
1267
1268 /* Fail the whole operation if one mbuf can't be allocated. */
1269 if (mb == NULL) {
1270 if (nm != NULL)
1271 m_freem(nm);
1272 return (ENOBUFS);
1273 }
1274
1275 /* Book keeping. */
1276 VERIFY(mb->m_flags & M_PKTHDR);
1277 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1278 if (mtail != NULL)
1279 mtail->m_next = mb;
1280 else
1281 nm = mb;
1282 mtail = mb;
1283 }
1284
1285 m = nm;
1286 m->m_data += align;
1287
1288 progress = 0;
1289 /* Fill all mbufs with uio data and update header information. */
1290 for (mb = m; mb != NULL; mb = mb->m_next) {
1291 len = imin(M_TRAILINGSPACE(mb), tot - progress);
1292
1293 error = uiomove(mtod(mb, char *), len, uio);
1294 if (error != 0) {
1295 m_freem(m);
1296 return (error);
1297 }
1298
1299 /* each mbuf is M_PKTHDR chained via m_next */
1300 mb->m_len = len;
1301 mb->m_pkthdr.len = len;
1302
1303 progress += len;
1304 }
1305 VERIFY(progress == tot);
1306 *top = m;
1307 return (0);
1308 }
1309
1310 /*
1311 * MPTCP socket protocol-user socket send routine, derived from sosend().
1312 */
1313 static int
1314 mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1315 struct mbuf *top, struct mbuf *control, int flags)
1316 {
1317 #pragma unused(addr)
1318 int32_t space;
1319 user_ssize_t resid;
1320 int error, sendflags;
1321 struct proc *p = current_proc();
1322 int sblocked = 0;
1323
1324 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1325 if (uio == NULL || top != NULL) {
1326 error = EINVAL;
1327 goto out;
1328 }
1329 resid = uio_resid(uio);
1330
1331 socket_lock(mp_so, 1);
1332 so_update_last_owner_locked(mp_so, p);
1333 so_update_policy(mp_so);
1334
1335 VERIFY(mp_so->so_type == SOCK_STREAM);
1336 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1337
1338 if ((flags & (MSG_OOB|MSG_DONTROUTE|MSG_HOLD|MSG_SEND|MSG_FLUSH)) ||
1339 (mp_so->so_flags & SOF_ENABLE_MSGS)) {
1340 error = EOPNOTSUPP;
1341 socket_unlock(mp_so, 1);
1342 goto out;
1343 }
1344
1345 /*
1346 * In theory resid should be unsigned. However, space must be
1347 * signed, as it might be less than 0 if we over-committed, and we
1348 * must use a signed comparison of space and resid. On the other
1349 * hand, a negative resid causes us to loop sending 0-length
1350 * segments to the protocol.
1351 */
1352 if (resid < 0 || (flags & MSG_EOR) || control != NULL) {
1353 error = EINVAL;
1354 socket_unlock(mp_so, 1);
1355 goto out;
1356 }
1357
1358 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1359
1360 do {
1361 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1362 &sblocked, NULL);
1363 if (error != 0)
1364 goto release;
1365
1366 space = sbspace(&mp_so->so_snd);
1367 do {
1368 socket_unlock(mp_so, 0);
1369 /*
1370 * Copy the data from userland into an mbuf chain.
1371 */
1372 error = mptcp_uiotombuf(uio, M_WAITOK, space, 0, &top);
1373 if (error != 0) {
1374 socket_lock(mp_so, 0);
1375 goto release;
1376 }
1377 VERIFY(top != NULL);
1378 space -= resid - uio_resid(uio);
1379 resid = uio_resid(uio);
1380 socket_lock(mp_so, 0);
1381
1382 /*
1383 * Compute flags here, for pru_send and NKEs.
1384 */
1385 sendflags = (resid > 0 && space > 0) ?
1386 PRUS_MORETOCOME : 0;
1387
1388 /*
1389 * Socket filter processing
1390 */
1391 VERIFY(control == NULL);
1392 error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1393 if (error != 0) {
1394 if (error == EJUSTRETURN) {
1395 error = 0;
1396 top = NULL;
1397 /* always free control if any */
1398 }
1399 goto release;
1400 }
1401 if (control != NULL) {
1402 m_freem(control);
1403 control = NULL;
1404 }
1405
1406 /*
1407 * Pass data to protocol.
1408 */
1409 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1410 (mp_so, sendflags, top, NULL, NULL, p);
1411
1412 top = NULL;
1413 if (error != 0)
1414 goto release;
1415 } while (resid != 0 && space > 0);
1416 } while (resid != 0);
1417
1418 release:
1419 if (sblocked)
1420 sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1421 else
1422 socket_unlock(mp_so, 1);
1423 out:
1424 if (top != NULL)
1425 m_freem(top);
1426 if (control != NULL)
1427 m_freem(control);
1428
1429 /* clear SOF1_PRECONNECT_DATA after one write */
1430 if (mp_so->so_flags1 & SOF1_PRECONNECT_DATA)
1431 mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
1432
1433 return (error);
1434 }
1435
1436 /*
1437 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1438 * This routine simply indicates to the caller whether or not to proceed
1439 * further with the given socket option. This is invoked by sosetoptlock()
1440 * and sogetoptlock().
1441 */
1442 static int
1443 mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1444 {
1445 #pragma unused(mp_so)
1446 int error = 0;
1447
1448 VERIFY(sopt->sopt_level == SOL_SOCKET);
1449
1450 /*
1451 * We could check for sopt_dir (set/get) here, but we'll just
1452 * let the caller deal with it as appropriate; therefore the
1453 * following is a superset of the socket options which we
1454 * allow for set/get.
1455 *
1456 * XXX: adi@apple.com
1457 *
1458 * Need to consider the following cases:
1459 *
1460 * a. In the event peeloff(2) occurs on the subflow socket,
1461 * we may want to issue those options which are now
1462 * handled at the MP socket. In that case, we will need
1463 * to record them in mptcp_setopt() so that they can
1464 * be replayed during peeloff.
1465 *
1466 * b. Certain socket options don't have a clear definition
1467 * on the expected behavior post connect(2). At the time
1468 * those options are issued on the MP socket, there may
1469 * be existing subflow sockets that are already connected.
1470 */
1471 switch (sopt->sopt_name) {
1472 case SO_LINGER: /* MP */
1473 case SO_LINGER_SEC: /* MP */
1474 case SO_TYPE: /* MP */
1475 case SO_NREAD: /* MP */
1476 case SO_NWRITE: /* MP */
1477 case SO_ERROR: /* MP */
1478 case SO_SNDBUF: /* MP */
1479 case SO_RCVBUF: /* MP */
1480 case SO_SNDLOWAT: /* MP */
1481 case SO_RCVLOWAT: /* MP */
1482 case SO_SNDTIMEO: /* MP */
1483 case SO_RCVTIMEO: /* MP */
1484 case SO_NKE: /* MP */
1485 case SO_NOSIGPIPE: /* MP */
1486 case SO_NOADDRERR: /* MP */
1487 case SO_LABEL: /* MP */
1488 case SO_PEERLABEL: /* MP */
1489 case SO_DEFUNCTOK: /* MP */
1490 case SO_ISDEFUNCT: /* MP */
1491 case SO_TRAFFIC_CLASS_DBG: /* MP */
1492 /*
1493 * Tell the caller that these options are to be processed.
1494 */
1495 break;
1496
1497 case SO_DEBUG: /* MP + subflow */
1498 case SO_KEEPALIVE: /* MP + subflow */
1499 case SO_USELOOPBACK: /* MP + subflow */
1500 case SO_RANDOMPORT: /* MP + subflow */
1501 case SO_TRAFFIC_CLASS: /* MP + subflow */
1502 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1503 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1504 case SO_RECV_ANYIF: /* MP + subflow */
1505 case SO_RESTRICTIONS: /* MP + subflow */
1506 case SO_FLUSH: /* MP + subflow */
1507 case SO_MPTCP_FASTJOIN: /* MP + subflow */
1508 case SO_NOWAKEFROMSLEEP:
1509 case SO_NOAPNFALLBK:
1510 /*
1511 * Tell the caller that these options are to be processed;
1512 * these will also be recorded later by mptcp_setopt().
1513 *
1514 * NOTE: Only support integer option value for now.
1515 */
1516 if (sopt->sopt_valsize != sizeof (int))
1517 error = EINVAL;
1518 break;
1519
1520 default:
1521 /*
1522 * Tell the caller to stop immediately and return an error.
1523 */
1524 error = ENOPROTOOPT;
1525 break;
1526 }
1527
1528 return (error);
1529 }
1530
1531 /*
1532 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1533 */
1534 static int
1535 mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1536 {
1537 struct socket *mp_so;
1538 struct mptsub *mpts;
1539 struct mptopt smpo;
1540 int error = 0;
1541
1542 /* just bail now if this isn't applicable to subflow sockets */
1543 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1544 error = ENOPROTOOPT;
1545 goto out;
1546 }
1547
1548 /*
1549 * Skip those that are handled internally; these options
1550 * should not have been recorded and marked with the
1551 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1552 */
1553 if (mpo->mpo_level == SOL_SOCKET &&
1554 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1555 error = ENOPROTOOPT;
1556 goto out;
1557 }
1558
1559 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1560 mp_so = mpte->mpte_mppcb->mpp_socket;
1561
1562 /*
1563 * Don't bother going further if there's no subflow; mark the option
1564 * with MPOF_INTERIM so that we know whether or not to remove this
1565 * option upon encountering an error while issuing it during subflow
1566 * socket creation.
1567 */
1568 if (mpte->mpte_numflows == 0) {
1569 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1570 mpo->mpo_flags |= MPOF_INTERIM;
1571 /* return success */
1572 goto out;
1573 }
1574
1575 bzero(&smpo, sizeof (smpo));
1576 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1577 smpo.mpo_level = mpo->mpo_level;
1578 smpo.mpo_name = mpo->mpo_name;
1579
1580 /* grab exisiting values in case we need to rollback */
1581 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1582 struct socket *so;
1583
1584 MPTS_LOCK(mpts);
1585 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG);
1586 mpts->mpts_oldintval = 0;
1587 smpo.mpo_intval = 0;
1588 VERIFY(mpts->mpts_socket != NULL);
1589 so = mpts->mpts_socket;
1590 socket_lock(so, 0);
1591 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1592 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1593 mpts->mpts_oldintval = smpo.mpo_intval;
1594 }
1595 socket_unlock(so, 0);
1596 MPTS_UNLOCK(mpts);
1597 }
1598
1599 /* apply socket option */
1600 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1601 struct socket *so;
1602
1603 MPTS_LOCK(mpts);
1604 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1605 VERIFY(mpts->mpts_socket != NULL);
1606 so = mpts->mpts_socket;
1607 socket_lock(so, 0);
1608 error = mptcp_subflow_sosetopt(mpte, so, mpo);
1609 socket_unlock(so, 0);
1610 MPTS_UNLOCK(mpts);
1611 if (error != 0)
1612 break;
1613 }
1614
1615 /* cleanup, and rollback if needed */
1616 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1617 struct socket *so;
1618
1619 MPTS_LOCK(mpts);
1620 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1621 /* clear in case it's set */
1622 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1623 mpts->mpts_oldintval = 0;
1624 MPTS_UNLOCK(mpts);
1625 continue;
1626 }
1627 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1628 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1629 VERIFY(mpts->mpts_oldintval == 0);
1630 MPTS_UNLOCK(mpts);
1631 continue;
1632 }
1633 /* error during sosetopt, so roll it back */
1634 if (error != 0) {
1635 VERIFY(mpts->mpts_socket != NULL);
1636 so = mpts->mpts_socket;
1637 socket_lock(so, 0);
1638 smpo.mpo_intval = mpts->mpts_oldintval;
1639 (void) mptcp_subflow_sosetopt(mpte, so, &smpo);
1640 socket_unlock(so, 0);
1641 }
1642 mpts->mpts_oldintval = 0;
1643 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL|MPTSF_SOPT_INPROG);
1644 MPTS_UNLOCK(mpts);
1645 }
1646
1647 out:
1648 return (error);
1649 }
1650
1651 /*
1652 * Handle SOPT_SET for socket options issued on MP socket.
1653 */
1654 static int
1655 mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1656 {
1657 int error = 0, optval, level, optname, rec = 1;
1658 struct mptopt smpo, *mpo = NULL;
1659 struct socket *mp_so;
1660 char buf[32];
1661
1662 level = sopt->sopt_level;
1663 optname = sopt->sopt_name;
1664
1665 VERIFY(sopt->sopt_dir == SOPT_SET);
1666 VERIFY(level == SOL_SOCKET || level == IPPROTO_TCP);
1667 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1668 mp_so = mpte->mpte_mppcb->mpp_socket;
1669
1670 /*
1671 * Record socket options which are applicable to subflow sockets so
1672 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1673 * for the list of eligible socket-level options.
1674 */
1675 if (level == SOL_SOCKET) {
1676 switch (optname) {
1677 case SO_DEBUG:
1678 case SO_KEEPALIVE:
1679 case SO_USELOOPBACK:
1680 case SO_RANDOMPORT:
1681 case SO_TRAFFIC_CLASS:
1682 case SO_RECV_TRAFFIC_CLASS:
1683 case SO_PRIVILEGED_TRAFFIC_CLASS:
1684 case SO_RECV_ANYIF:
1685 case SO_RESTRICTIONS:
1686 case SO_NOWAKEFROMSLEEP:
1687 case SO_MPTCP_FASTJOIN:
1688 case SO_NOAPNFALLBK:
1689 /* record it */
1690 break;
1691 case SO_FLUSH:
1692 /* don't record it */
1693 rec = 0;
1694 break;
1695 default:
1696 /* nothing to do; just return success */
1697 goto out;
1698 }
1699 } else {
1700 switch (optname) {
1701 case TCP_NODELAY:
1702 case TCP_RXT_FINDROP:
1703 case TCP_KEEPALIVE:
1704 case TCP_KEEPINTVL:
1705 case TCP_KEEPCNT:
1706 case TCP_CONNECTIONTIMEOUT:
1707 case TCP_RXT_CONNDROPTIME:
1708 case PERSIST_TIMEOUT:
1709 /* eligible; record it */
1710 break;
1711 case TCP_NOTSENT_LOWAT:
1712 /* record at MPTCP level */
1713 error = sooptcopyin(sopt, &optval, sizeof(optval),
1714 sizeof(optval));
1715 if (error)
1716 goto out;
1717 if (optval < 0) {
1718 error = EINVAL;
1719 goto out;
1720 } else {
1721 if (optval == 0) {
1722 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1723 error = mptcp_set_notsent_lowat(mpte,0);
1724 } else {
1725 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1726 error = mptcp_set_notsent_lowat(mpte,
1727 optval);
1728 }
1729 }
1730 goto out;
1731 default:
1732 /* not eligible */
1733 error = ENOPROTOOPT;
1734 goto out;
1735 }
1736 }
1737
1738 if ((error = sooptcopyin(sopt, &optval, sizeof (optval),
1739 sizeof (optval))) != 0)
1740 goto out;
1741
1742 if (rec) {
1743 /* search for an existing one; if not found, allocate */
1744 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL)
1745 mpo = mptcp_sopt_alloc(M_WAITOK);
1746
1747 if (mpo == NULL) {
1748 error = ENOBUFS;
1749 } else {
1750 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1751 "%s: mp_so 0x%llx sopt %s "
1752 "val %d %s\n", __func__,
1753 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1754 mptcp_sopt2str(level, optname, buf,
1755 sizeof (buf)), optval,
1756 (mpo->mpo_flags & MPOF_ATTACHED) ?
1757 "updated" : "recorded"),
1758 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1759
1760 /* initialize or update, as needed */
1761 mpo->mpo_intval = optval;
1762 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1763 mpo->mpo_level = level;
1764 mpo->mpo_name = optname;
1765 mptcp_sopt_insert(mpte, mpo);
1766 }
1767 VERIFY(mpo->mpo_flags & MPOF_ATTACHED);
1768 /* this can be issued on the subflow socket */
1769 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1770 }
1771 } else {
1772 bzero(&smpo, sizeof (smpo));
1773 mpo = &smpo;
1774 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1775 mpo->mpo_level = level;
1776 mpo->mpo_name = optname;
1777 mpo->mpo_intval = optval;
1778 }
1779 VERIFY(mpo == NULL || error == 0);
1780
1781 /* issue this socket option on existing subflows */
1782 if (error == 0) {
1783 error = mptcp_setopt_apply(mpte, mpo);
1784 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1785 VERIFY(mpo != &smpo);
1786 mptcp_sopt_remove(mpte, mpo);
1787 mptcp_sopt_free(mpo);
1788 }
1789 if (mpo == &smpo)
1790 mpo->mpo_flags &= ~MPOF_INTERIM;
1791 }
1792 out:
1793 if (error == 0 && mpo != NULL) {
1794 mptcplog((LOG_ERR, "MPTCP Socket: "
1795 "%s: mp_so 0x%llx sopt %s val %d set %s\n",
1796 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1797 mptcp_sopt2str(level, optname, buf,
1798 sizeof (buf)), optval, (mpo->mpo_flags & MPOF_INTERIM) ?
1799 "pending" : "successful"),
1800 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
1801 } else if (error != 0) {
1802 mptcplog((LOG_ERR, "MPTCP Socket: "
1803 "%s: mp_so 0x%llx sopt %s can't be issued "
1804 "error %d\n", __func__,
1805 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mptcp_sopt2str(level,
1806 optname, buf, sizeof (buf)), error),
1807 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
1808 }
1809 return (error);
1810 }
1811
1812 /*
1813 * Handle SOPT_GET for socket options issued on MP socket.
1814 */
1815 static int
1816 mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
1817 {
1818 int error = 0, optval;
1819
1820 VERIFY(sopt->sopt_dir == SOPT_GET);
1821 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1822
1823 /*
1824 * We only handle SOPT_GET for TCP level socket options; we should
1825 * not get here for socket level options since they are already
1826 * handled at the socket layer.
1827 */
1828 if (sopt->sopt_level != IPPROTO_TCP) {
1829 error = ENOPROTOOPT;
1830 goto out;
1831 }
1832
1833 switch (sopt->sopt_name) {
1834 case TCP_NODELAY:
1835 case TCP_RXT_FINDROP:
1836 case TCP_KEEPALIVE:
1837 case TCP_KEEPINTVL:
1838 case TCP_KEEPCNT:
1839 case TCP_CONNECTIONTIMEOUT:
1840 case TCP_RXT_CONNDROPTIME:
1841 case PERSIST_TIMEOUT:
1842 case TCP_NOTSENT_LOWAT:
1843 /* eligible; get the default value just in case */
1844 error = mptcp_default_tcp_optval(mpte, sopt, &optval);
1845 break;
1846 default:
1847 /* not eligible */
1848 error = ENOPROTOOPT;
1849 break;
1850 }
1851
1852 switch (sopt->sopt_name) {
1853 case TCP_NOTSENT_LOWAT:
1854 if (mpte->mpte_mppcb->mpp_socket->so_flags & SOF_NOTSENT_LOWAT)
1855 optval = mptcp_get_notsent_lowat(mpte);
1856 else
1857 optval = 0;
1858 goto out;
1859 }
1860
1861 /*
1862 * Search for a previously-issued TCP level socket option and
1863 * return the recorded option value. This assumes that the
1864 * value did not get modified by the lower layer after it was
1865 * issued at setsockopt(2) time. If not found, we'll return
1866 * the default value obtained ealier.
1867 */
1868 if (error == 0) {
1869 struct mptopt *mpo;
1870
1871 if ((mpo = mptcp_sopt_find(mpte, sopt)) != NULL)
1872 optval = mpo->mpo_intval;
1873
1874 error = sooptcopyout(sopt, &optval, sizeof (int));
1875 }
1876 out:
1877 return (error);
1878 }
1879
1880 /*
1881 * Return default values for TCP socket options. Ideally we would query the
1882 * subflow TCP socket, but that requires creating a subflow socket before
1883 * connectx(2) time. To simplify things, just return the default values
1884 * that we know of.
1885 */
1886 static int
1887 mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval)
1888 {
1889 int error = 0;
1890
1891 VERIFY(sopt->sopt_level == IPPROTO_TCP);
1892 VERIFY(sopt->sopt_dir == SOPT_GET);
1893 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1894
1895 /* try to do what tcp_newtcpcb() does */
1896 switch (sopt->sopt_name) {
1897 case TCP_NODELAY:
1898 case TCP_RXT_FINDROP:
1899 case TCP_KEEPINTVL:
1900 case TCP_KEEPCNT:
1901 case TCP_CONNECTIONTIMEOUT:
1902 case TCP_RXT_CONNDROPTIME:
1903 case TCP_NOTSENT_LOWAT:
1904 *optval = 0;
1905 break;
1906
1907 case TCP_KEEPALIVE:
1908 *optval = mptcp_subflow_keeptime;
1909 break;
1910
1911 case PERSIST_TIMEOUT:
1912 *optval = tcp_max_persist_timeout;
1913 break;
1914
1915 default:
1916 error = ENOPROTOOPT;
1917 break;
1918 }
1919 return (error);
1920 }
1921
1922 /*
1923 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1924 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1925 * to those that are allowed by mptcp_usr_socheckopt().
1926 */
1927 int
1928 mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
1929 {
1930 struct mppcb *mpp = sotomppcb(mp_so);
1931 struct mptses *mpte;
1932 int error = 0;
1933
1934 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1935 error = EINVAL;
1936 goto out;
1937 }
1938 mpte = mptompte(mpp);
1939 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
1940
1941 /* we only handle socket and TCP-level socket options for MPTCP */
1942 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
1943 char buf[32];
1944 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1945 "%s: mp_so 0x%llx sopt %s level not "
1946 "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1947 mptcp_sopt2str(sopt->sopt_level,
1948 sopt->sopt_name, buf, sizeof (buf))),
1949 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1950 error = EINVAL;
1951 goto out;
1952 }
1953
1954 switch (sopt->sopt_dir) {
1955 case SOPT_SET:
1956 error = mptcp_setopt(mpte, sopt);
1957 break;
1958
1959 case SOPT_GET:
1960 error = mptcp_getopt(mpte, sopt);
1961 break;
1962 }
1963 out:
1964 return (error);
1965 }
1966
1967 /*
1968 * Return a string representation of <sopt_level,sopt_name>
1969 */
1970 const char *
1971 mptcp_sopt2str(int level, int optname, char *dst, int size)
1972 {
1973 char lbuf[32], obuf[32];
1974 const char *l = lbuf, *o = obuf;
1975
1976 (void) snprintf(lbuf, sizeof (lbuf), "0x%x", level);
1977 (void) snprintf(obuf, sizeof (obuf), "0x%x", optname);
1978
1979 switch (level) {
1980 case SOL_SOCKET:
1981 l = "SOL_SOCKET";
1982 switch (optname) {
1983 case SO_LINGER:
1984 o = "SO_LINGER";
1985 break;
1986 case SO_LINGER_SEC:
1987 o = "SO_LINGER_SEC";
1988 break;
1989 case SO_DEBUG:
1990 o = "SO_DEBUG";
1991 break;
1992 case SO_KEEPALIVE:
1993 o = "SO_KEEPALIVE";
1994 break;
1995 case SO_USELOOPBACK:
1996 o = "SO_USELOOPBACK";
1997 break;
1998 case SO_TYPE:
1999 o = "SO_TYPE";
2000 break;
2001 case SO_NREAD:
2002 o = "SO_NREAD";
2003 break;
2004 case SO_NWRITE:
2005 o = "SO_NWRITE";
2006 break;
2007 case SO_ERROR:
2008 o = "SO_ERROR";
2009 break;
2010 case SO_SNDBUF:
2011 o = "SO_SNDBUF";
2012 break;
2013 case SO_RCVBUF:
2014 o = "SO_RCVBUF";
2015 break;
2016 case SO_SNDLOWAT:
2017 o = "SO_SNDLOWAT";
2018 break;
2019 case SO_RCVLOWAT:
2020 o = "SO_RCVLOWAT";
2021 break;
2022 case SO_SNDTIMEO:
2023 o = "SO_SNDTIMEO";
2024 break;
2025 case SO_RCVTIMEO:
2026 o = "SO_RCVTIMEO";
2027 break;
2028 case SO_NKE:
2029 o = "SO_NKE";
2030 break;
2031 case SO_NOSIGPIPE:
2032 o = "SO_NOSIGPIPE";
2033 break;
2034 case SO_NOADDRERR:
2035 o = "SO_NOADDRERR";
2036 break;
2037 case SO_RESTRICTIONS:
2038 o = "SO_RESTRICTIONS";
2039 break;
2040 case SO_LABEL:
2041 o = "SO_LABEL";
2042 break;
2043 case SO_PEERLABEL:
2044 o = "SO_PEERLABEL";
2045 break;
2046 case SO_RANDOMPORT:
2047 o = "SO_RANDOMPORT";
2048 break;
2049 case SO_TRAFFIC_CLASS:
2050 o = "SO_TRAFFIC_CLASS";
2051 break;
2052 case SO_RECV_TRAFFIC_CLASS:
2053 o = "SO_RECV_TRAFFIC_CLASS";
2054 break;
2055 case SO_TRAFFIC_CLASS_DBG:
2056 o = "SO_TRAFFIC_CLASS_DBG";
2057 break;
2058 case SO_PRIVILEGED_TRAFFIC_CLASS:
2059 o = "SO_PRIVILEGED_TRAFFIC_CLASS";
2060 break;
2061 case SO_DEFUNCTOK:
2062 o = "SO_DEFUNCTOK";
2063 break;
2064 case SO_ISDEFUNCT:
2065 o = "SO_ISDEFUNCT";
2066 break;
2067 case SO_OPPORTUNISTIC:
2068 o = "SO_OPPORTUNISTIC";
2069 break;
2070 case SO_FLUSH:
2071 o = "SO_FLUSH";
2072 break;
2073 case SO_RECV_ANYIF:
2074 o = "SO_RECV_ANYIF";
2075 break;
2076 case SO_NOWAKEFROMSLEEP:
2077 o = "SO_NOWAKEFROMSLEEP";
2078 break;
2079 case SO_MPTCP_FASTJOIN:
2080 o = "SO_MPTCP_FASTJOIN";
2081 break;
2082 case SO_NOAPNFALLBK:
2083 o = "SO_NOAPNFALLBK";
2084 break;
2085 }
2086 break;
2087 case IPPROTO_TCP:
2088 l = "IPPROTO_TCP";
2089 switch (optname) {
2090 case TCP_KEEPALIVE:
2091 o = "TCP_KEEPALIVE";
2092 break;
2093 case TCP_KEEPINTVL:
2094 o = "TCP_KEEPINTVL";
2095 break;
2096 case TCP_KEEPCNT:
2097 o = "TCP_KEEPCNT";
2098 break;
2099 case TCP_CONNECTIONTIMEOUT:
2100 o = "TCP_CONNECTIONTIMEOUT";
2101 break;
2102 case TCP_RXT_CONNDROPTIME:
2103 o = "TCP_RXT_CONNDROPTIME";
2104 break;
2105 case PERSIST_TIMEOUT:
2106 o = "PERSIST_TIMEOUT";
2107 break;
2108 }
2109 break;
2110 }
2111
2112 (void) snprintf(dst, size, "<%s,%s>", l, o);
2113 return (dst);
2114 }
2115
2116 static int
2117 mptcp_usr_preconnect(struct socket *mp_so)
2118 {
2119 struct mptsub *mpts = NULL;
2120 struct mppcb *mpp = sotomppcb(mp_so);
2121 struct mptses *mpte;
2122 struct socket *so;
2123 struct tcpcb *tp = NULL;
2124
2125 mpte = mptompte(mpp);
2126 VERIFY(mpte != NULL);
2127 MPTE_LOCK_ASSERT_HELD(mpte); /* same as MP socket lock */
2128
2129 mpts = mptcp_get_subflow(mpte, NULL, NULL);
2130 if (mpts == NULL) {
2131 mptcplog((LOG_ERR, "MPTCP Socket: "
2132 "%s: mp_so 0x%llx invalid preconnect ", __func__,
2133 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
2134 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
2135 return (EINVAL);
2136 }
2137 MPTS_LOCK(mpts);
2138 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
2139 so = mpts->mpts_socket;
2140 socket_lock(so, 0);
2141 tp = intotcpcb(sotoinpcb(so));
2142 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
2143 int error = tcp_output(sototcpcb(so));
2144 socket_unlock(so, 0);
2145 MPTS_UNLOCK(mpts);
2146 mp_so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
2147 return (error);
2148 }