]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/mptcp_usrreq.c
a73d3339f0aa490e6ab40414e3732574aab8f404
[apple/xnu.git] / bsd / netinet / mptcp_usrreq.c
1 /*
2 * Copyright (c) 2012-2017 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/protosw.h>
35 #include <sys/mcache.h>
36 #include <sys/syslog.h>
37 #include <sys/proc.h>
38 #include <sys/proc_internal.h>
39 #include <sys/resourcevar.h>
40 #include <sys/kauth.h>
41 #include <sys/priv.h>
42
43 #include <net/if.h>
44 #include <netinet/in.h>
45 #include <netinet/in_var.h>
46 #include <netinet/tcp.h>
47 #include <netinet/tcp_fsm.h>
48 #include <netinet/tcp_seq.h>
49 #include <netinet/tcp_var.h>
50 #include <netinet/tcp_timer.h>
51 #include <netinet/mptcp_var.h>
52 #include <netinet/mptcp_timer.h>
53
54 #include <mach/sdt.h>
55
56 static int mptcp_usr_attach(struct socket *, int, struct proc *);
57 static int mptcp_usr_detach(struct socket *);
58 static int mptcp_attach(struct socket *, struct proc *);
59 static int mptcp_usr_connectx(struct socket *, struct sockaddr *,
60 struct sockaddr *, struct proc *, uint32_t, sae_associd_t,
61 sae_connid_t *, uint32_t, void *, uint32_t, struct uio *, user_ssize_t *);
62 static int mptcp_getassocids(struct mptses *, uint32_t *, user_addr_t);
63 static int mptcp_getconnids(struct mptses *, sae_associd_t, uint32_t *,
64 user_addr_t);
65 static int mptcp_getconninfo(struct mptses *, sae_connid_t *, uint32_t *,
66 uint32_t *, int32_t *, user_addr_t, socklen_t *, user_addr_t, socklen_t *,
67 uint32_t *, user_addr_t, uint32_t *);
68 static int mptcp_usr_control(struct socket *, u_long, caddr_t, struct ifnet *,
69 struct proc *);
70 static int mptcp_disconnect(struct mptses *);
71 static int mptcp_usr_disconnect(struct socket *);
72 static int mptcp_usr_disconnectx(struct socket *, sae_associd_t, sae_connid_t);
73 static struct mptses *mptcp_usrclosed(struct mptses *);
74 static int mptcp_usr_rcvd(struct socket *, int);
75 static int mptcp_usr_send(struct socket *, int, struct mbuf *,
76 struct sockaddr *, struct mbuf *, struct proc *);
77 static int mptcp_usr_shutdown(struct socket *);
78 static int mptcp_usr_sosend(struct socket *, struct sockaddr *, struct uio *,
79 struct mbuf *, struct mbuf *, int);
80 static int mptcp_usr_socheckopt(struct socket *, struct sockopt *);
81 static int mptcp_default_tcp_optval(struct mptses *, struct sockopt *, int *);
82 static int mptcp_usr_preconnect(struct socket *so);
83
84 struct pr_usrreqs mptcp_usrreqs = {
85 .pru_attach = mptcp_usr_attach,
86 .pru_connectx = mptcp_usr_connectx,
87 .pru_control = mptcp_usr_control,
88 .pru_detach = mptcp_usr_detach,
89 .pru_disconnect = mptcp_usr_disconnect,
90 .pru_disconnectx = mptcp_usr_disconnectx,
91 .pru_peeraddr = mp_getpeeraddr,
92 .pru_rcvd = mptcp_usr_rcvd,
93 .pru_send = mptcp_usr_send,
94 .pru_shutdown = mptcp_usr_shutdown,
95 .pru_sockaddr = mp_getsockaddr,
96 .pru_sosend = mptcp_usr_sosend,
97 .pru_soreceive = soreceive,
98 .pru_socheckopt = mptcp_usr_socheckopt,
99 .pru_preconnect = mptcp_usr_preconnect,
100 };
101
102
103 #if (DEVELOPMENT || DEBUG)
104 static int mptcp_disable_entitlements = 0;
105 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, disable_entitlements, CTLFLAG_RW | CTLFLAG_LOCKED,
106 &mptcp_disable_entitlements, 0, "Disable Multipath TCP Entitlement Checking");
107 #endif
108
109 int mptcp_developer_mode = 0;
110 SYSCTL_INT(_net_inet_mptcp, OID_AUTO, allow_aggregate, CTLFLAG_RW | CTLFLAG_LOCKED,
111 &mptcp_developer_mode, 0, "Allow the Multipath aggregation mode");
112
113
114 /*
115 * Attaches an MPTCP control block to a socket.
116 */
117 static int
118 mptcp_usr_attach(struct socket *mp_so, int proto, struct proc *p)
119 {
120 #pragma unused(proto)
121 int error;
122
123 VERIFY(mpsotomppcb(mp_so) == NULL);
124
125 error = mptcp_attach(mp_so, p);
126 if (error != 0) {
127 goto out;
128 }
129 /*
130 * XXX: adi@apple.com
131 *
132 * Might want to use a different SO_LINGER timeout than TCP's?
133 */
134 if ((mp_so->so_options & SO_LINGER) && mp_so->so_linger == 0) {
135 mp_so->so_linger = TCP_LINGERTIME * hz;
136 }
137 out:
138 return error;
139 }
140
141 /*
142 * Detaches an MPTCP control block from a socket.
143 */
144 static int
145 mptcp_usr_detach(struct socket *mp_so)
146 {
147 struct mptses *mpte = mpsotompte(mp_so);
148 struct mppcb *mpp = mpsotomppcb(mp_so);
149
150 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
151 mptcplog((LOG_ERR, "%s state: %d\n", __func__,
152 mpp ? mpp->mpp_state : -1),
153 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
154 return EINVAL;
155 }
156
157 /*
158 * We are done with this MPTCP socket (it has been closed);
159 * trigger all subflows to be disconnected, if not already,
160 * by initiating the PCB detach sequence (SOF_PCBCLEARING
161 * will be set.)
162 */
163 mp_pcbdetach(mp_so);
164
165 mptcp_disconnect(mpte);
166
167 return 0;
168 }
169
170 /*
171 * Attach MPTCP protocol to socket, allocating MP control block,
172 * MPTCP session, control block, buffer space, etc.
173 */
174 static int
175 mptcp_attach(struct socket *mp_so, struct proc *p)
176 {
177 #pragma unused(p)
178 struct mptses *mpte = NULL;
179 struct mptcb *mp_tp = NULL;
180 struct mppcb *mpp = NULL;
181 int error = 0;
182
183 if (mp_so->so_snd.sb_hiwat == 0 || mp_so->so_rcv.sb_hiwat == 0) {
184 error = soreserve(mp_so, tcp_sendspace, tcp_recvspace);
185 if (error != 0) {
186 goto out;
187 }
188 }
189
190 if (mp_so->so_snd.sb_preconn_hiwat == 0) {
191 soreserve_preconnect(mp_so, 2048);
192 }
193
194 if ((mp_so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
195 mp_so->so_rcv.sb_flags |= SB_AUTOSIZE;
196 }
197 if ((mp_so->so_snd.sb_flags & SB_USRSIZE) == 0) {
198 mp_so->so_snd.sb_flags |= SB_AUTOSIZE;
199 }
200
201 /*
202 * MPTCP socket buffers cannot be compressed, due to the
203 * fact that each mbuf chained via m_next is a M_PKTHDR
204 * which carries some MPTCP metadata.
205 */
206 mp_so->so_snd.sb_flags |= SB_NOCOMPRESS;
207 mp_so->so_rcv.sb_flags |= SB_NOCOMPRESS;
208
209 if ((error = mp_pcballoc(mp_so, &mtcbinfo)) != 0) {
210 goto out;
211 }
212
213 mpp = mpsotomppcb(mp_so);
214 VERIFY(mpp != NULL);
215 mpte = (struct mptses *)mpp->mpp_pcbe;
216 VERIFY(mpte != NULL);
217 mp_tp = mpte->mpte_mptcb;
218 VERIFY(mp_tp != NULL);
219 out:
220 return error;
221 }
222
223 static int
224 mptcp_entitlement_check(struct socket *mp_so)
225 {
226 struct mptses *mpte = mpsotompte(mp_so);
227
228 if (soopt_cred_check(mp_so, PRIV_NET_RESTRICTED_MULTIPATH_EXTENDED, TRUE) == 0) {
229 /*
230 * This means the app has the extended entitlement. Thus,
231 * it's a first party app and can run without restrictions.
232 */
233 mpte->mpte_flags |= MPTE_FIRSTPARTY;
234 goto grant;
235 }
236
237 #if (DEVELOPMENT || DEBUG)
238 if (mptcp_disable_entitlements) {
239 goto grant;
240 }
241 #endif
242
243 if (soopt_cred_check(mp_so, PRIV_NET_PRIVILEGED_MULTIPATH, TRUE)) {
244 mptcplog((LOG_NOTICE, "%s Multipath Capability needed\n", __func__),
245 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
246 return -1;
247 }
248
249 if (mpte->mpte_svctype > MPTCP_SVCTYPE_INTERACTIVE &&
250 mptcp_developer_mode == 0) {
251 mptcplog((LOG_NOTICE, "%s need to set allow_aggregate sysctl\n",
252 __func__), MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
253 return -1;
254 }
255
256 grant:
257 mptcplog((LOG_NOTICE, "%s entitlement granted for %u\n", __func__, mpte->mpte_svctype),
258 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
259
260 return 0;
261 }
262
263 /*
264 * Common subroutine to open a MPTCP connection to one of the remote hosts
265 * specified by dst_sl. This includes allocating and establishing a
266 * subflow TCP connection, either initially to establish MPTCP connection,
267 * or to join an existing one. Returns a connection handle upon success.
268 */
269 static int
270 mptcp_connectx(struct mptses *mpte, struct sockaddr *src,
271 struct sockaddr *dst, uint32_t ifscope, sae_connid_t *pcid)
272 {
273 struct socket *mp_so = mptetoso(mpte);
274 int error = 0;
275
276 VERIFY(dst != NULL);
277 VERIFY(pcid != NULL);
278
279 mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx\n", __func__,
280 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
281 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
282 DTRACE_MPTCP2(connectx, struct mptses *, mpte, struct socket *, mp_so);
283
284 error = mptcp_subflow_add(mpte, src, dst, ifscope, pcid);
285
286 return error;
287 }
288
289 /*
290 * User-protocol pru_connectx callback.
291 */
292 static int
293 mptcp_usr_connectx(struct socket *mp_so, struct sockaddr *src,
294 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
295 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
296 uint32_t arglen, struct uio *auio, user_ssize_t *bytes_written)
297 {
298 #pragma unused(p, aid, flags, arg, arglen)
299 struct mppcb *mpp = mpsotomppcb(mp_so);
300 struct mptses *mpte = NULL;
301 struct mptcb *mp_tp = NULL;
302 user_ssize_t datalen;
303 int error = 0;
304
305 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
306 mptcplog((LOG_ERR, "%s state %d\n", __func__,
307 mpp ? mpp->mpp_state : -1),
308 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
309 error = EINVAL;
310 goto out;
311 }
312 mpte = mptompte(mpp);
313 VERIFY(mpte != NULL);
314 mpte_lock_assert_held(mpte);
315
316 mp_tp = mpte->mpte_mptcb;
317 VERIFY(mp_tp != NULL);
318
319 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
320 mptcplog((LOG_ERR, "%s fell back to TCP\n", __func__),
321 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
322 error = EINVAL;
323 goto out;
324 }
325
326 if (dst->sa_family != AF_INET && dst->sa_family != AF_INET6) {
327 error = EAFNOSUPPORT;
328 goto out;
329 }
330
331 if (dst->sa_family == AF_INET &&
332 dst->sa_len != sizeof(mpte->__mpte_dst_v4)) {
333 mptcplog((LOG_ERR, "%s IPv4 dst len %u\n", __func__,
334 dst->sa_len),
335 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
336 error = EINVAL;
337 goto out;
338 }
339
340 if (dst->sa_family == AF_INET6 &&
341 dst->sa_len != sizeof(mpte->__mpte_dst_v6)) {
342 mptcplog((LOG_ERR, "%s IPv6 dst len %u\n", __func__,
343 dst->sa_len),
344 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
345 error = EINVAL;
346 goto out;
347 }
348
349 if (!(mpte->mpte_flags & MPTE_SVCTYPE_CHECKED)) {
350 if (mptcp_entitlement_check(mp_so) < 0) {
351 error = EPERM;
352 goto out;
353 }
354
355 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
356 }
357
358 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
359 memcpy(&mpte->mpte_dst, dst, dst->sa_len);
360 }
361
362 if (src) {
363 if (src->sa_family != AF_INET && src->sa_family != AF_INET6) {
364 error = EAFNOSUPPORT;
365 goto out;
366 }
367
368 if (src->sa_family == AF_INET &&
369 src->sa_len != sizeof(mpte->__mpte_src_v4)) {
370 mptcplog((LOG_ERR, "%s IPv4 src len %u\n", __func__,
371 src->sa_len),
372 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
373 error = EINVAL;
374 goto out;
375 }
376
377 if (src->sa_family == AF_INET6 &&
378 src->sa_len != sizeof(mpte->__mpte_src_v6)) {
379 mptcplog((LOG_ERR, "%s IPv6 src len %u\n", __func__,
380 src->sa_len),
381 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
382 error = EINVAL;
383 goto out;
384 }
385
386 if ((mp_so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING)) == 0) {
387 memcpy(&mpte->mpte_src, src, src->sa_len);
388 }
389 }
390
391 error = mptcp_connectx(mpte, src, dst, ifscope, pcid);
392
393 /* If there is data, copy it */
394 if (auio != NULL) {
395 datalen = uio_resid(auio);
396 socket_unlock(mp_so, 0);
397 error = mp_so->so_proto->pr_usrreqs->pru_sosend(mp_so, NULL,
398 (uio_t) auio, NULL, NULL, 0);
399
400 if (error == 0 || error == EWOULDBLOCK) {
401 *bytes_written = datalen - uio_resid(auio);
402 }
403
404 if (error == EWOULDBLOCK) {
405 error = EINPROGRESS;
406 }
407
408 socket_lock(mp_so, 0);
409 }
410
411 out:
412 return error;
413 }
414
415 /*
416 * Handle SIOCGASSOCIDS ioctl for PF_MULTIPATH domain.
417 */
418 static int
419 mptcp_getassocids(struct mptses *mpte, uint32_t *cnt, user_addr_t aidp)
420 {
421 mpte_lock_assert_held(mpte); /* same as MP socket lock */
422
423 /* MPTCP has at most 1 association */
424 *cnt = (mpte->mpte_associd != SAE_ASSOCID_ANY) ? 1 : 0;
425
426 /* just asking how many there are? */
427 if (aidp == USER_ADDR_NULL) {
428 return 0;
429 }
430
431 return copyout(&mpte->mpte_associd, aidp,
432 sizeof(mpte->mpte_associd));
433 }
434
435 /*
436 * Handle SIOCGCONNIDS ioctl for PF_MULTIPATH domain.
437 */
438 static int
439 mptcp_getconnids(struct mptses *mpte, sae_associd_t aid, uint32_t *cnt,
440 user_addr_t cidp)
441 {
442 struct mptsub *mpts;
443 int error = 0;
444
445 mpte_lock_assert_held(mpte); /* same as MP socket lock */
446
447 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL &&
448 aid != mpte->mpte_associd) {
449 return EINVAL;
450 }
451
452 *cnt = mpte->mpte_numflows;
453
454 /* just asking how many there are? */
455 if (cidp == USER_ADDR_NULL) {
456 return 0;
457 }
458
459 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
460 if ((error = copyout(&mpts->mpts_connid, cidp,
461 sizeof(mpts->mpts_connid))) != 0) {
462 break;
463 }
464
465 cidp += sizeof(mpts->mpts_connid);
466 }
467
468 return error;
469 }
470
471 /*
472 * Handle SIOCGCONNINFO ioctl for PF_MULTIPATH domain.
473 */
474 static int
475 mptcp_getconninfo(struct mptses *mpte, sae_connid_t *cid, uint32_t *flags,
476 uint32_t *ifindex, int32_t *soerror, user_addr_t src, socklen_t *src_len,
477 user_addr_t dst, socklen_t *dst_len, uint32_t *aux_type,
478 user_addr_t aux_data, uint32_t *aux_len)
479 {
480 struct socket *so;
481 struct inpcb *inp;
482 struct mptsub *mpts;
483 int error = 0;
484
485 *flags = 0;
486 *aux_type = 0;
487 *ifindex = 0;
488 *soerror = 0;
489
490 if (*cid == SAE_CONNID_ALL) {
491 struct socket *mp_so = mptetoso(mpte);
492 struct mptcb *mp_tp = mpte->mpte_mptcb;
493 struct conninfo_multipathtcp mptcp_ci;
494
495 if (*aux_len != 0 && *aux_len != sizeof(mptcp_ci)) {
496 return EINVAL;
497 }
498
499 if (mp_so->so_state & SS_ISCONNECTING) {
500 *flags |= CIF_CONNECTING;
501 }
502 if (mp_so->so_state & SS_ISCONNECTED) {
503 *flags |= CIF_CONNECTED;
504 }
505 if (mp_so->so_state & SS_ISDISCONNECTING) {
506 *flags |= CIF_DISCONNECTING;
507 }
508 if (mp_so->so_state & SS_ISDISCONNECTED) {
509 *flags |= CIF_DISCONNECTED;
510 }
511 if (!(mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP)) {
512 *flags |= CIF_MP_CAPABLE;
513 }
514 if (mp_tp->mpt_flags & MPTCPF_FALLBACK_TO_TCP) {
515 *flags |= CIF_MP_DEGRADED;
516 }
517
518 *src_len = 0;
519 *dst_len = 0;
520
521 *aux_type = CIAUX_MPTCP;
522 *aux_len = sizeof(mptcp_ci);
523
524 if (aux_data != USER_ADDR_NULL) {
525 unsigned long i = 0;
526 int initial_info_set = 0;
527
528 bzero(&mptcp_ci, sizeof(mptcp_ci));
529 mptcp_ci.mptcpci_subflow_count = mpte->mpte_numflows;
530 mptcp_ci.mptcpci_switch_count = mpte->mpte_subflow_switches;
531
532 VERIFY(sizeof(mptcp_ci.mptcpci_itfstats) == sizeof(mpte->mpte_itfstats));
533 memcpy(mptcp_ci.mptcpci_itfstats, mpte->mpte_itfstats, sizeof(mptcp_ci.mptcpci_itfstats));
534
535 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
536 if (i >= sizeof(mptcp_ci.mptcpci_subflow_connids) / sizeof(sae_connid_t)) {
537 break;
538 }
539 mptcp_ci.mptcpci_subflow_connids[i] = mpts->mpts_connid;
540
541 if (mpts->mpts_flags & MPTSF_INITIAL_SUB) {
542 inp = sotoinpcb(mpts->mpts_socket);
543
544 mptcp_ci.mptcpci_init_rxbytes = inp->inp_stat->rxbytes;
545 mptcp_ci.mptcpci_init_txbytes = inp->inp_stat->txbytes;
546 initial_info_set = 1;
547 }
548
549 mptcpstats_update(mptcp_ci.mptcpci_itfstats, mpts);
550
551 i++;
552 }
553
554 if (initial_info_set == 0) {
555 mptcp_ci.mptcpci_init_rxbytes = mpte->mpte_init_rxbytes;
556 mptcp_ci.mptcpci_init_txbytes = mpte->mpte_init_txbytes;
557 }
558
559 if (mpte->mpte_flags & MPTE_FIRSTPARTY) {
560 mptcp_ci.mptcpci_flags |= MPTCPCI_FIRSTPARTY;
561 }
562
563 error = copyout(&mptcp_ci, aux_data, sizeof(mptcp_ci));
564 if (error != 0) {
565 mptcplog((LOG_ERR, "%s copyout failed: %d\n",
566 __func__, error),
567 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
568 return error;
569 }
570 }
571
572 return 0;
573 }
574
575 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
576 if (mpts->mpts_connid == *cid || *cid == SAE_CONNID_ANY) {
577 break;
578 }
579 }
580 if (mpts == NULL) {
581 return (*cid == SAE_CONNID_ANY) ? ENXIO : EINVAL;
582 }
583
584 so = mpts->mpts_socket;
585 inp = sotoinpcb(so);
586
587 if (inp->inp_vflag & INP_IPV4) {
588 error = in_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
589 soerror, src, src_len, dst, dst_len,
590 aux_type, aux_data, aux_len);
591 } else {
592 error = in6_getconninfo(so, SAE_CONNID_ANY, flags, ifindex,
593 soerror, src, src_len, dst, dst_len,
594 aux_type, aux_data, aux_len);
595 }
596
597 if (error != 0) {
598 mptcplog((LOG_ERR, "%s error from in_getconninfo %d\n",
599 __func__, error),
600 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
601 return error;
602 }
603
604 if (mpts->mpts_flags & MPTSF_MP_CAPABLE) {
605 *flags |= CIF_MP_CAPABLE;
606 }
607 if (mpts->mpts_flags & MPTSF_MP_DEGRADED) {
608 *flags |= CIF_MP_DEGRADED;
609 }
610 if (mpts->mpts_flags & MPTSF_MP_READY) {
611 *flags |= CIF_MP_READY;
612 }
613 if (mpts->mpts_flags & MPTSF_ACTIVE) {
614 *flags |= CIF_MP_ACTIVE;
615 }
616
617 mptcplog((LOG_DEBUG, "%s: cid %d flags %x \n", __func__,
618 mpts->mpts_connid, mpts->mpts_flags),
619 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_VERBOSE);
620
621 return 0;
622 }
623
624 /*
625 * User-protocol pru_control callback.
626 */
627 static int
628 mptcp_usr_control(struct socket *mp_so, u_long cmd, caddr_t data,
629 struct ifnet *ifp, struct proc *p)
630 {
631 #pragma unused(ifp, p)
632 struct mppcb *mpp = mpsotomppcb(mp_so);
633 struct mptses *mpte;
634 int error = 0;
635
636 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
637 error = EINVAL;
638 goto out;
639 }
640 mpte = mptompte(mpp);
641 VERIFY(mpte != NULL);
642
643 mpte_lock_assert_held(mpte); /* same as MP socket lock */
644
645 switch (cmd) {
646 case SIOCGASSOCIDS32: { /* struct so_aidreq32 */
647 struct so_aidreq32 aidr;
648 bcopy(data, &aidr, sizeof(aidr));
649 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
650 aidr.sar_aidp);
651 if (error == 0) {
652 bcopy(&aidr, data, sizeof(aidr));
653 }
654 break;
655 }
656
657 case SIOCGASSOCIDS64: { /* struct so_aidreq64 */
658 struct so_aidreq64 aidr;
659 bcopy(data, &aidr, sizeof(aidr));
660 error = mptcp_getassocids(mpte, &aidr.sar_cnt,
661 aidr.sar_aidp);
662 if (error == 0) {
663 bcopy(&aidr, data, sizeof(aidr));
664 }
665 break;
666 }
667
668 case SIOCGCONNIDS32: { /* struct so_cidreq32 */
669 struct so_cidreq32 cidr;
670 bcopy(data, &cidr, sizeof(cidr));
671 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
672 cidr.scr_cidp);
673 if (error == 0) {
674 bcopy(&cidr, data, sizeof(cidr));
675 }
676 break;
677 }
678
679 case SIOCGCONNIDS64: { /* struct so_cidreq64 */
680 struct so_cidreq64 cidr;
681 bcopy(data, &cidr, sizeof(cidr));
682 error = mptcp_getconnids(mpte, cidr.scr_aid, &cidr.scr_cnt,
683 cidr.scr_cidp);
684 if (error == 0) {
685 bcopy(&cidr, data, sizeof(cidr));
686 }
687 break;
688 }
689
690 case SIOCGCONNINFO32: { /* struct so_cinforeq32 */
691 struct so_cinforeq32 cifr;
692 bcopy(data, &cifr, sizeof(cifr));
693 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
694 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
695 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
696 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
697 &cifr.scir_aux_len);
698 if (error == 0) {
699 bcopy(&cifr, data, sizeof(cifr));
700 }
701 break;
702 }
703
704 case SIOCGCONNINFO64: { /* struct so_cinforeq64 */
705 struct so_cinforeq64 cifr;
706 bcopy(data, &cifr, sizeof(cifr));
707 error = mptcp_getconninfo(mpte, &cifr.scir_cid,
708 &cifr.scir_flags, &cifr.scir_ifindex, &cifr.scir_error,
709 cifr.scir_src, &cifr.scir_src_len, cifr.scir_dst,
710 &cifr.scir_dst_len, &cifr.scir_aux_type, cifr.scir_aux_data,
711 &cifr.scir_aux_len);
712 if (error == 0) {
713 bcopy(&cifr, data, sizeof(cifr));
714 }
715 break;
716 }
717
718 default:
719 error = EOPNOTSUPP;
720 break;
721 }
722 out:
723 return error;
724 }
725
726 static int
727 mptcp_disconnect(struct mptses *mpte)
728 {
729 struct socket *mp_so;
730 struct mptcb *mp_tp;
731 int error = 0;
732
733 mpte_lock_assert_held(mpte); /* same as MP socket lock */
734
735 mp_so = mptetoso(mpte);
736 mp_tp = mpte->mpte_mptcb;
737
738 mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx %d\n", __func__,
739 (u_int64_t)VM_KERNEL_ADDRPERM(mp_so), mp_so->so_error),
740 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
741
742 DTRACE_MPTCP3(disconnectx, struct mptses *, mpte,
743 struct socket *, mp_so, struct mptcb *, mp_tp);
744
745 /* if we're not detached, go thru socket state checks */
746 if (!(mp_so->so_flags & SOF_PCBCLEARING)) {
747 if (!(mp_so->so_state & (SS_ISCONNECTED |
748 SS_ISCONNECTING))) {
749 error = ENOTCONN;
750 goto out;
751 }
752 if (mp_so->so_state & SS_ISDISCONNECTING) {
753 error = EALREADY;
754 goto out;
755 }
756 }
757
758 mptcp_cancel_all_timers(mp_tp);
759 if (mp_tp->mpt_state < MPTCPS_ESTABLISHED) {
760 mptcp_close(mpte, mp_tp);
761 } else if ((mp_so->so_options & SO_LINGER) &&
762 mp_so->so_linger == 0) {
763 mptcp_drop(mpte, mp_tp, 0);
764 } else {
765 soisdisconnecting(mp_so);
766 sbflush(&mp_so->so_rcv);
767 if (mptcp_usrclosed(mpte) != NULL) {
768 mptcp_output(mpte);
769 }
770 }
771
772 if (error == 0) {
773 mptcp_subflow_workloop(mpte);
774 }
775
776 out:
777 return error;
778 }
779
780 /*
781 * Wrapper function to support disconnect on socket
782 */
783 static int
784 mptcp_usr_disconnect(struct socket *mp_so)
785 {
786 return mptcp_disconnect(mpsotompte(mp_so));
787 }
788
789 /*
790 * User-protocol pru_disconnectx callback.
791 */
792 static int
793 mptcp_usr_disconnectx(struct socket *mp_so, sae_associd_t aid, sae_connid_t cid)
794 {
795 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
796 return EINVAL;
797 }
798
799 if (cid != SAE_CONNID_ANY && cid != SAE_CONNID_ALL) {
800 return EINVAL;
801 }
802
803 return mptcp_usr_disconnect(mp_so);
804 }
805
806 void
807 mptcp_finish_usrclosed(struct mptses *mpte)
808 {
809 struct mptcb *mp_tp = mpte->mpte_mptcb;
810 struct socket *mp_so = mptetoso(mpte);
811
812 if (mp_tp->mpt_state == MPTCPS_CLOSED) {
813 mpte = mptcp_close(mpte, mp_tp);
814 } else if (mp_tp->mpt_state >= MPTCPS_FIN_WAIT_2) {
815 soisdisconnected(mp_so);
816 } else {
817 struct mptsub *mpts;
818
819 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
820 if ((mp_so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==
821 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
822 mptcp_subflow_disconnect(mpte, mpts);
823 } else {
824 mptcp_subflow_shutdown(mpte, mpts);
825 }
826 }
827 }
828 }
829
830 /*
831 * User issued close, and wish to trail thru shutdown states.
832 */
833 static struct mptses *
834 mptcp_usrclosed(struct mptses *mpte)
835 {
836 struct mptcb *mp_tp = mpte->mpte_mptcb;
837
838 mptcp_close_fsm(mp_tp, MPCE_CLOSE);
839
840 /* Not everything has been acknowledged - don't close the subflows! */
841 if (mp_tp->mpt_sndnxt + 1 != mp_tp->mpt_sndmax) {
842 return mpte;
843 }
844
845 mptcp_finish_usrclosed(mpte);
846
847 return mpte;
848 }
849
850 /*
851 * After a receive, possible send some update to peer.
852 */
853 static int
854 mptcp_usr_rcvd(struct socket *mp_so, int flags)
855 {
856 #pragma unused(flags)
857 struct mppcb *mpp = mpsotomppcb(mp_so);
858 struct mptses *mpte;
859 int error = 0;
860
861 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
862 error = EINVAL;
863 goto out;
864 }
865 mpte = mptompte(mpp);
866 VERIFY(mpte != NULL);
867
868 error = mptcp_output(mpte);
869 out:
870 return error;
871 }
872
873 /*
874 * Do a send by putting data in the output queue.
875 */
876 static int
877 mptcp_usr_send(struct socket *mp_so, int prus_flags, struct mbuf *m,
878 struct sockaddr *nam, struct mbuf *control, struct proc *p)
879 {
880 #pragma unused(nam, p)
881 struct mppcb *mpp = mpsotomppcb(mp_so);
882 struct mptses *mpte;
883 int error = 0;
884
885 if (prus_flags & (PRUS_OOB | PRUS_EOF)) {
886 error = EOPNOTSUPP;
887 goto out;
888 }
889
890 if (nam != NULL) {
891 error = EOPNOTSUPP;
892 goto out;
893 }
894
895 if (control != NULL && control->m_len != 0) {
896 error = EOPNOTSUPP;
897 goto out;
898 }
899
900 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
901 error = ECONNRESET;
902 goto out;
903 }
904 mpte = mptompte(mpp);
905 VERIFY(mpte != NULL);
906
907 if (!(mp_so->so_state & SS_ISCONNECTED) &&
908 !(mp_so->so_flags1 & SOF1_PRECONNECT_DATA)) {
909 error = ENOTCONN;
910 goto out;
911 }
912
913 mptcp_insert_dsn(mpp, m);
914 VERIFY(mp_so->so_snd.sb_flags & SB_NOCOMPRESS);
915 sbappendstream(&mp_so->so_snd, m);
916 m = NULL;
917
918 error = mptcp_output(mpte);
919 if (error != 0) {
920 goto out;
921 }
922
923 if (mp_so->so_state & SS_ISCONNECTING) {
924 if (mp_so->so_state & SS_NBIO) {
925 error = EWOULDBLOCK;
926 } else {
927 error = sbwait(&mp_so->so_snd);
928 }
929 }
930
931 out:
932 if (error) {
933 if (m != NULL) {
934 m_freem(m);
935 }
936 if (control != NULL) {
937 m_freem(control);
938 }
939 }
940 return error;
941 }
942
943 /*
944 * Mark the MPTCP connection as being incapable of further output.
945 */
946 static int
947 mptcp_usr_shutdown(struct socket *mp_so)
948 {
949 struct mppcb *mpp = mpsotomppcb(mp_so);
950 struct mptses *mpte;
951 int error = 0;
952
953 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
954 error = EINVAL;
955 goto out;
956 }
957 mpte = mptompte(mpp);
958 VERIFY(mpte != NULL);
959
960 socantsendmore(mp_so);
961
962 mpte = mptcp_usrclosed(mpte);
963 if (mpte != NULL) {
964 error = mptcp_output(mpte);
965 }
966 out:
967 return error;
968 }
969
970 /*
971 * Copy the contents of uio into a properly sized mbuf chain.
972 */
973 static int
974 mptcp_uiotombuf(struct uio *uio, int how, int space, uint32_t align,
975 struct mbuf **top)
976 {
977 struct mbuf *m, *mb, *nm = NULL, *mtail = NULL;
978 user_ssize_t resid, tot, len, progress; /* must be user_ssize_t */
979 int error;
980
981 VERIFY(top != NULL && *top == NULL);
982
983 /*
984 * space can be zero or an arbitrary large value bound by
985 * the total data supplied by the uio.
986 */
987 resid = uio_resid(uio);
988 if (space > 0) {
989 tot = imin(resid, space);
990 } else {
991 tot = resid;
992 }
993
994 /*
995 * The smallest unit is a single mbuf with pkthdr.
996 * We can't align past it.
997 */
998 if (align >= MHLEN) {
999 return EINVAL;
1000 }
1001
1002 /*
1003 * Give us the full allocation or nothing.
1004 * If space is zero return the smallest empty mbuf.
1005 */
1006 if ((len = tot + align) == 0) {
1007 len = 1;
1008 }
1009
1010 /* Loop and append maximum sized mbufs to the chain tail. */
1011 while (len > 0) {
1012 uint32_t m_needed = 1;
1013
1014 if (njcl > 0 && len > MBIGCLBYTES) {
1015 mb = m_getpackets_internal(&m_needed, 1,
1016 how, 1, M16KCLBYTES);
1017 } else if (len > MCLBYTES) {
1018 mb = m_getpackets_internal(&m_needed, 1,
1019 how, 1, MBIGCLBYTES);
1020 } else if (len >= (signed)MINCLSIZE) {
1021 mb = m_getpackets_internal(&m_needed, 1,
1022 how, 1, MCLBYTES);
1023 } else {
1024 mb = m_gethdr(how, MT_DATA);
1025 }
1026
1027 /* Fail the whole operation if one mbuf can't be allocated. */
1028 if (mb == NULL) {
1029 if (nm != NULL) {
1030 m_freem(nm);
1031 }
1032 return ENOBUFS;
1033 }
1034
1035 /* Book keeping. */
1036 VERIFY(mb->m_flags & M_PKTHDR);
1037 len -= ((mb->m_flags & M_EXT) ? mb->m_ext.ext_size : MHLEN);
1038 if (mtail != NULL) {
1039 mtail->m_next = mb;
1040 } else {
1041 nm = mb;
1042 }
1043 mtail = mb;
1044 }
1045
1046 m = nm;
1047 m->m_data += align;
1048
1049 progress = 0;
1050 /* Fill all mbufs with uio data and update header information. */
1051 for (mb = m; mb != NULL; mb = mb->m_next) {
1052 len = imin(M_TRAILINGSPACE(mb), tot - progress);
1053
1054 error = uiomove(mtod(mb, char *), len, uio);
1055 if (error != 0) {
1056 m_freem(m);
1057 return error;
1058 }
1059
1060 /* each mbuf is M_PKTHDR chained via m_next */
1061 mb->m_len = len;
1062 mb->m_pkthdr.len = len;
1063
1064 progress += len;
1065 }
1066 VERIFY(progress == tot);
1067 *top = m;
1068 return 0;
1069 }
1070
1071 /*
1072 * MPTCP socket protocol-user socket send routine, derived from sosend().
1073 */
1074 static int
1075 mptcp_usr_sosend(struct socket *mp_so, struct sockaddr *addr, struct uio *uio,
1076 struct mbuf *top, struct mbuf *control, int flags)
1077 {
1078 #pragma unused(addr)
1079 int32_t space;
1080 user_ssize_t resid;
1081 int error, sendflags;
1082 struct proc *p = current_proc();
1083 int sblocked = 0;
1084
1085 /* UIO is required for now, due to per-mbuf M_PKTHDR constrains */
1086 if (uio == NULL || top != NULL) {
1087 error = EINVAL;
1088 goto out;
1089 }
1090 resid = uio_resid(uio);
1091
1092 socket_lock(mp_so, 1);
1093 so_update_last_owner_locked(mp_so, p);
1094 so_update_policy(mp_so);
1095
1096 VERIFY(mp_so->so_type == SOCK_STREAM);
1097 VERIFY(!(mp_so->so_flags & SOF_MP_SUBFLOW));
1098
1099 if ((flags & (MSG_OOB | MSG_DONTROUTE | MSG_HOLD | MSG_SEND | MSG_FLUSH)) ||
1100 (mp_so->so_flags & SOF_ENABLE_MSGS)) {
1101 error = EOPNOTSUPP;
1102 socket_unlock(mp_so, 1);
1103 goto out;
1104 }
1105
1106 /*
1107 * In theory resid should be unsigned. However, space must be
1108 * signed, as it might be less than 0 if we over-committed, and we
1109 * must use a signed comparison of space and resid. On the other
1110 * hand, a negative resid causes us to loop sending 0-length
1111 * segments to the protocol.
1112 */
1113 if (resid < 0 || (flags & MSG_EOR) || control != NULL) {
1114 error = EINVAL;
1115 socket_unlock(mp_so, 1);
1116 goto out;
1117 }
1118
1119 OSIncrementAtomicLong(&p->p_stats->p_ru.ru_msgsnd);
1120
1121 do {
1122 error = sosendcheck(mp_so, NULL, resid, 0, 0, flags,
1123 &sblocked, NULL);
1124 if (error != 0) {
1125 goto release;
1126 }
1127
1128 space = sbspace(&mp_so->so_snd);
1129 do {
1130 socket_unlock(mp_so, 0);
1131 /*
1132 * Copy the data from userland into an mbuf chain.
1133 */
1134 error = mptcp_uiotombuf(uio, M_WAITOK, space, 0, &top);
1135 if (error != 0) {
1136 socket_lock(mp_so, 0);
1137 goto release;
1138 }
1139 VERIFY(top != NULL);
1140 space -= resid - uio_resid(uio);
1141 resid = uio_resid(uio);
1142 socket_lock(mp_so, 0);
1143
1144 /*
1145 * Compute flags here, for pru_send and NKEs.
1146 */
1147 sendflags = (resid > 0 && space > 0) ?
1148 PRUS_MORETOCOME : 0;
1149
1150 /*
1151 * Socket filter processing
1152 */
1153 VERIFY(control == NULL);
1154 error = sflt_data_out(mp_so, NULL, &top, &control, 0);
1155 if (error != 0) {
1156 if (error == EJUSTRETURN) {
1157 error = 0;
1158 top = NULL;
1159 /* always free control if any */
1160 }
1161 goto release;
1162 }
1163 if (control != NULL) {
1164 m_freem(control);
1165 control = NULL;
1166 }
1167
1168 /*
1169 * Pass data to protocol.
1170 */
1171 error = (*mp_so->so_proto->pr_usrreqs->pru_send)
1172 (mp_so, sendflags, top, NULL, NULL, p);
1173
1174 top = NULL;
1175 if (error != 0) {
1176 goto release;
1177 }
1178 } while (resid != 0 && space > 0);
1179 } while (resid != 0);
1180
1181 release:
1182 if (sblocked) {
1183 sbunlock(&mp_so->so_snd, FALSE); /* will unlock socket */
1184 } else {
1185 socket_unlock(mp_so, 1);
1186 }
1187 out:
1188 if (top != NULL) {
1189 m_freem(top);
1190 }
1191 if (control != NULL) {
1192 m_freem(control);
1193 }
1194
1195 soclearfastopen(mp_so);
1196
1197 return error;
1198 }
1199
1200 /*
1201 * Called to filter SOPT_{SET,GET} for SOL_SOCKET level socket options.
1202 * This routine simply indicates to the caller whether or not to proceed
1203 * further with the given socket option. This is invoked by sosetoptlock()
1204 * and sogetoptlock().
1205 */
1206 static int
1207 mptcp_usr_socheckopt(struct socket *mp_so, struct sockopt *sopt)
1208 {
1209 #pragma unused(mp_so)
1210 int error = 0;
1211
1212 VERIFY(sopt->sopt_level == SOL_SOCKET);
1213
1214 /*
1215 * We could check for sopt_dir (set/get) here, but we'll just
1216 * let the caller deal with it as appropriate; therefore the
1217 * following is a superset of the socket options which we
1218 * allow for set/get.
1219 *
1220 * XXX: adi@apple.com
1221 *
1222 * Need to consider the following cases:
1223 *
1224 * a. Certain socket options don't have a clear definition
1225 * on the expected behavior post connect(2). At the time
1226 * those options are issued on the MP socket, there may
1227 * be existing subflow sockets that are already connected.
1228 */
1229 switch (sopt->sopt_name) {
1230 case SO_LINGER: /* MP */
1231 case SO_LINGER_SEC: /* MP */
1232 case SO_TYPE: /* MP */
1233 case SO_NREAD: /* MP */
1234 case SO_NWRITE: /* MP */
1235 case SO_ERROR: /* MP */
1236 case SO_SNDBUF: /* MP */
1237 case SO_RCVBUF: /* MP */
1238 case SO_SNDLOWAT: /* MP */
1239 case SO_RCVLOWAT: /* MP */
1240 case SO_SNDTIMEO: /* MP */
1241 case SO_RCVTIMEO: /* MP */
1242 case SO_NKE: /* MP */
1243 case SO_NOSIGPIPE: /* MP */
1244 case SO_NOADDRERR: /* MP */
1245 case SO_LABEL: /* MP */
1246 case SO_PEERLABEL: /* MP */
1247 case SO_DEFUNCTOK: /* MP */
1248 case SO_ISDEFUNCT: /* MP */
1249 case SO_TRAFFIC_CLASS_DBG: /* MP */
1250 case SO_DELEGATED: /* MP */
1251 case SO_DELEGATED_UUID: /* MP */
1252 #if NECP
1253 case SO_NECP_ATTRIBUTES:
1254 case SO_NECP_CLIENTUUID:
1255 #endif /* NECP */
1256 /*
1257 * Tell the caller that these options are to be processed.
1258 */
1259 break;
1260
1261 case SO_DEBUG: /* MP + subflow */
1262 case SO_KEEPALIVE: /* MP + subflow */
1263 case SO_USELOOPBACK: /* MP + subflow */
1264 case SO_RANDOMPORT: /* MP + subflow */
1265 case SO_TRAFFIC_CLASS: /* MP + subflow */
1266 case SO_RECV_TRAFFIC_CLASS: /* MP + subflow */
1267 case SO_PRIVILEGED_TRAFFIC_CLASS: /* MP + subflow */
1268 case SO_RECV_ANYIF: /* MP + subflow */
1269 case SO_RESTRICTIONS: /* MP + subflow */
1270 case SO_FLUSH: /* MP + subflow */
1271 case SO_NOWAKEFROMSLEEP:
1272 case SO_NOAPNFALLBK:
1273 case SO_MARK_CELLFALLBACK:
1274 /*
1275 * Tell the caller that these options are to be processed;
1276 * these will also be recorded later by mptcp_setopt().
1277 *
1278 * NOTE: Only support integer option value for now.
1279 */
1280 if (sopt->sopt_valsize != sizeof(int)) {
1281 error = EINVAL;
1282 }
1283 break;
1284
1285 default:
1286 /*
1287 * Tell the caller to stop immediately and return an error.
1288 */
1289 error = ENOPROTOOPT;
1290 break;
1291 }
1292
1293 return error;
1294 }
1295
1296 /*
1297 * Issue SOPT_SET for all MPTCP subflows (for integer option values.)
1298 */
1299 static int
1300 mptcp_setopt_apply(struct mptses *mpte, struct mptopt *mpo)
1301 {
1302 struct socket *mp_so;
1303 struct mptsub *mpts;
1304 struct mptopt smpo;
1305 int error = 0;
1306
1307 /* just bail now if this isn't applicable to subflow sockets */
1308 if (!(mpo->mpo_flags & MPOF_SUBFLOW_OK)) {
1309 error = ENOPROTOOPT;
1310 goto out;
1311 }
1312
1313 /*
1314 * Skip those that are handled internally; these options
1315 * should not have been recorded and marked with the
1316 * MPOF_SUBFLOW_OK by mptcp_setopt(), but just in case.
1317 */
1318 if (mpo->mpo_level == SOL_SOCKET &&
1319 (mpo->mpo_name == SO_NOSIGPIPE || mpo->mpo_name == SO_NOADDRERR)) {
1320 error = ENOPROTOOPT;
1321 goto out;
1322 }
1323
1324 mpte_lock_assert_held(mpte); /* same as MP socket lock */
1325 mp_so = mptetoso(mpte);
1326
1327 /*
1328 * Don't bother going further if there's no subflow; mark the option
1329 * with MPOF_INTERIM so that we know whether or not to remove this
1330 * option upon encountering an error while issuing it during subflow
1331 * socket creation.
1332 */
1333 if (mpte->mpte_numflows == 0) {
1334 VERIFY(TAILQ_EMPTY(&mpte->mpte_subflows));
1335 mpo->mpo_flags |= MPOF_INTERIM;
1336 /* return success */
1337 goto out;
1338 }
1339
1340 bzero(&smpo, sizeof(smpo));
1341 smpo.mpo_flags |= MPOF_SUBFLOW_OK;
1342 smpo.mpo_level = mpo->mpo_level;
1343 smpo.mpo_name = mpo->mpo_name;
1344
1345 /* grab exisiting values in case we need to rollback */
1346 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1347 struct socket *so;
1348
1349 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1350 mpts->mpts_oldintval = 0;
1351 smpo.mpo_intval = 0;
1352 VERIFY(mpts->mpts_socket != NULL);
1353 so = mpts->mpts_socket;
1354 if (mptcp_subflow_sogetopt(mpte, so, &smpo) == 0) {
1355 mpts->mpts_flags |= MPTSF_SOPT_OLDVAL;
1356 mpts->mpts_oldintval = smpo.mpo_intval;
1357 }
1358 }
1359
1360 /* apply socket option */
1361 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1362 struct socket *so;
1363
1364 mpts->mpts_flags |= MPTSF_SOPT_INPROG;
1365 VERIFY(mpts->mpts_socket != NULL);
1366 so = mpts->mpts_socket;
1367 error = mptcp_subflow_sosetopt(mpte, mpts, mpo);
1368 if (error != 0) {
1369 break;
1370 }
1371 }
1372
1373 /* cleanup, and rollback if needed */
1374 TAILQ_FOREACH(mpts, &mpte->mpte_subflows, mpts_entry) {
1375 struct socket *so;
1376
1377 if (!(mpts->mpts_flags & MPTSF_SOPT_INPROG)) {
1378 /* clear in case it's set */
1379 mpts->mpts_flags &= ~MPTSF_SOPT_OLDVAL;
1380 mpts->mpts_oldintval = 0;
1381 continue;
1382 }
1383 if (!(mpts->mpts_flags & MPTSF_SOPT_OLDVAL)) {
1384 mpts->mpts_flags &= ~MPTSF_SOPT_INPROG;
1385 VERIFY(mpts->mpts_oldintval == 0);
1386 continue;
1387 }
1388 /* error during sosetopt, so roll it back */
1389 if (error != 0) {
1390 VERIFY(mpts->mpts_socket != NULL);
1391 so = mpts->mpts_socket;
1392 smpo.mpo_intval = mpts->mpts_oldintval;
1393 mptcp_subflow_sosetopt(mpte, mpts, &smpo);
1394 }
1395 mpts->mpts_oldintval = 0;
1396 mpts->mpts_flags &= ~(MPTSF_SOPT_OLDVAL | MPTSF_SOPT_INPROG);
1397 }
1398
1399 out:
1400 return error;
1401 }
1402
1403 /*
1404 * Handle SOPT_SET for socket options issued on MP socket.
1405 */
1406 static int
1407 mptcp_setopt(struct mptses *mpte, struct sockopt *sopt)
1408 {
1409 int error = 0, optval = 0, level, optname, rec = 1;
1410 struct mptopt smpo, *mpo = NULL;
1411 struct socket *mp_so;
1412
1413 level = sopt->sopt_level;
1414 optname = sopt->sopt_name;
1415
1416 mp_so = mptetoso(mpte);
1417
1418 /*
1419 * Record socket options which are applicable to subflow sockets so
1420 * that we can replay them for new ones; see mptcp_usr_socheckopt()
1421 * for the list of eligible socket-level options.
1422 */
1423 if (level == SOL_SOCKET) {
1424 switch (optname) {
1425 case SO_DEBUG:
1426 case SO_KEEPALIVE:
1427 case SO_USELOOPBACK:
1428 case SO_RANDOMPORT:
1429 case SO_TRAFFIC_CLASS:
1430 case SO_RECV_TRAFFIC_CLASS:
1431 case SO_PRIVILEGED_TRAFFIC_CLASS:
1432 case SO_RECV_ANYIF:
1433 case SO_RESTRICTIONS:
1434 case SO_NOWAKEFROMSLEEP:
1435 case SO_NOAPNFALLBK:
1436 case SO_MARK_CELLFALLBACK:
1437 /* record it */
1438 break;
1439 case SO_FLUSH:
1440 /* don't record it */
1441 rec = 0;
1442 break;
1443
1444 /* Next ones, record at MPTCP-level */
1445 #if NECP
1446 case SO_NECP_CLIENTUUID:
1447 if (!uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1448 error = EINVAL;
1449 goto out;
1450 }
1451
1452 error = sooptcopyin(sopt, &mpsotomppcb(mp_so)->necp_client_uuid,
1453 sizeof(uuid_t), sizeof(uuid_t));
1454 if (error != 0) {
1455 goto out;
1456 }
1457
1458 mpsotomppcb(mp_so)->necp_cb = mptcp_session_necp_cb;
1459 error = necp_client_register_multipath_cb(mp_so->last_pid,
1460 mpsotomppcb(mp_so)->necp_client_uuid,
1461 mpsotomppcb(mp_so));
1462 if (error) {
1463 goto out;
1464 }
1465
1466 if (uuid_is_null(mpsotomppcb(mp_so)->necp_client_uuid)) {
1467 error = EINVAL;
1468 goto out;
1469 }
1470
1471 goto out;
1472 case SO_NECP_ATTRIBUTES:
1473 #endif /* NECP */
1474 default:
1475 /* nothing to do; just return */
1476 goto out;
1477 }
1478 } else {
1479 switch (optname) {
1480 case TCP_NODELAY:
1481 case TCP_RXT_FINDROP:
1482 case TCP_KEEPALIVE:
1483 case TCP_KEEPINTVL:
1484 case TCP_KEEPCNT:
1485 case TCP_CONNECTIONTIMEOUT:
1486 case TCP_RXT_CONNDROPTIME:
1487 case PERSIST_TIMEOUT:
1488 case TCP_ADAPTIVE_READ_TIMEOUT:
1489 case TCP_ADAPTIVE_WRITE_TIMEOUT:
1490 /* eligible; record it */
1491 break;
1492 case TCP_NOTSENT_LOWAT:
1493 /* record at MPTCP level */
1494 error = sooptcopyin(sopt, &optval, sizeof(optval),
1495 sizeof(optval));
1496 if (error) {
1497 goto out;
1498 }
1499 if (optval < 0) {
1500 error = EINVAL;
1501 goto out;
1502 } else {
1503 if (optval == 0) {
1504 mp_so->so_flags &= ~SOF_NOTSENT_LOWAT;
1505 error = mptcp_set_notsent_lowat(mpte, 0);
1506 } else {
1507 mp_so->so_flags |= SOF_NOTSENT_LOWAT;
1508 error = mptcp_set_notsent_lowat(mpte,
1509 optval);
1510 }
1511 }
1512 goto out;
1513 case MPTCP_SERVICE_TYPE:
1514 /* record at MPTCP level */
1515 error = sooptcopyin(sopt, &optval, sizeof(optval),
1516 sizeof(optval));
1517 if (error) {
1518 goto out;
1519 }
1520 if (optval < 0 || optval >= MPTCP_SVCTYPE_MAX) {
1521 error = EINVAL;
1522 goto out;
1523 }
1524
1525 mpte->mpte_svctype = optval;
1526
1527 if (mptcp_entitlement_check(mp_so) < 0) {
1528 error = EACCES;
1529 goto out;
1530 }
1531
1532 mpte->mpte_flags |= MPTE_SVCTYPE_CHECKED;
1533
1534 goto out;
1535 case MPTCP_ALTERNATE_PORT:
1536 /* record at MPTCP level */
1537 error = sooptcopyin(sopt, &optval, sizeof(optval),
1538 sizeof(optval));
1539 if (error) {
1540 goto out;
1541 }
1542
1543 if (optval < 0 || optval > UINT16_MAX) {
1544 error = EINVAL;
1545 goto out;
1546 }
1547
1548 mpte->mpte_alternate_port = optval;
1549
1550 goto out;
1551 default:
1552 /* not eligible */
1553 error = ENOPROTOOPT;
1554 goto out;
1555 }
1556 }
1557
1558 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
1559 sizeof(optval))) != 0) {
1560 goto out;
1561 }
1562
1563 if (rec) {
1564 /* search for an existing one; if not found, allocate */
1565 if ((mpo = mptcp_sopt_find(mpte, sopt)) == NULL) {
1566 mpo = mptcp_sopt_alloc(M_WAITOK);
1567 }
1568
1569 if (mpo == NULL) {
1570 error = ENOBUFS;
1571 } else {
1572 mptcplog((LOG_DEBUG, "%s: mp_so 0x%llx sopt %s val %d %s\n",
1573 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1574 mptcp_sopt2str(level, optname), optval,
1575 (mpo->mpo_flags & MPOF_ATTACHED) ?
1576 "updated" : "recorded"),
1577 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1578
1579 /* initialize or update, as needed */
1580 mpo->mpo_intval = optval;
1581 if (!(mpo->mpo_flags & MPOF_ATTACHED)) {
1582 mpo->mpo_level = level;
1583 mpo->mpo_name = optname;
1584 mptcp_sopt_insert(mpte, mpo);
1585 }
1586 /* this can be issued on the subflow socket */
1587 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1588 }
1589 } else {
1590 bzero(&smpo, sizeof(smpo));
1591 mpo = &smpo;
1592 mpo->mpo_flags |= MPOF_SUBFLOW_OK;
1593 mpo->mpo_level = level;
1594 mpo->mpo_name = optname;
1595 mpo->mpo_intval = optval;
1596 }
1597
1598 /* issue this socket option on existing subflows */
1599 if (error == 0) {
1600 error = mptcp_setopt_apply(mpte, mpo);
1601 if (error != 0 && (mpo->mpo_flags & MPOF_ATTACHED)) {
1602 VERIFY(mpo != &smpo);
1603 mptcp_sopt_remove(mpte, mpo);
1604 mptcp_sopt_free(mpo);
1605 }
1606 if (mpo == &smpo) {
1607 mpo->mpo_flags &= ~MPOF_INTERIM;
1608 }
1609 }
1610 out:
1611 if (error == 0 && mpo != NULL) {
1612 mptcplog((LOG_INFO, "%s: mp_so 0x%llx sopt %s val %d set %s\n",
1613 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1614 mptcp_sopt2str(level, optname), optval,
1615 (mpo->mpo_flags & MPOF_INTERIM) ?
1616 "pending" : "successful"),
1617 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1618 } else if (error != 0) {
1619 mptcplog((LOG_ERR, "%s: mp_so 0x%llx sopt %s (%d, %d) val %d can't be issued error %d\n",
1620 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1621 mptcp_sopt2str(level, optname), level, optname, optval, error),
1622 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
1623 }
1624 return error;
1625 }
1626
1627 /*
1628 * Handle SOPT_GET for socket options issued on MP socket.
1629 */
1630 static int
1631 mptcp_getopt(struct mptses *mpte, struct sockopt *sopt)
1632 {
1633 int error = 0, optval = 0;
1634
1635 /*
1636 * We only handle SOPT_GET for TCP level socket options; we should
1637 * not get here for socket level options since they are already
1638 * handled at the socket layer.
1639 */
1640 if (sopt->sopt_level != IPPROTO_TCP) {
1641 error = ENOPROTOOPT;
1642 goto out;
1643 }
1644
1645 switch (sopt->sopt_name) {
1646 case TCP_NODELAY:
1647 case TCP_RXT_FINDROP:
1648 case TCP_KEEPALIVE:
1649 case TCP_KEEPINTVL:
1650 case TCP_KEEPCNT:
1651 case TCP_CONNECTIONTIMEOUT:
1652 case TCP_RXT_CONNDROPTIME:
1653 case PERSIST_TIMEOUT:
1654 case TCP_ADAPTIVE_READ_TIMEOUT:
1655 case TCP_ADAPTIVE_WRITE_TIMEOUT:
1656 case TCP_NOTSENT_LOWAT:
1657 case MPTCP_SERVICE_TYPE:
1658 case MPTCP_ALTERNATE_PORT:
1659 /* eligible; get the default value just in case */
1660 error = mptcp_default_tcp_optval(mpte, sopt, &optval);
1661 break;
1662 default:
1663 /* not eligible */
1664 error = ENOPROTOOPT;
1665 break;
1666 }
1667
1668 switch (sopt->sopt_name) {
1669 case TCP_NOTSENT_LOWAT:
1670 if (mptetoso(mpte)->so_flags & SOF_NOTSENT_LOWAT) {
1671 optval = mptcp_get_notsent_lowat(mpte);
1672 } else {
1673 optval = 0;
1674 }
1675 goto out;
1676 case MPTCP_SERVICE_TYPE:
1677 optval = mpte->mpte_svctype;
1678 goto out;
1679 case MPTCP_ALTERNATE_PORT:
1680 optval = mpte->mpte_alternate_port;
1681 goto out;
1682 }
1683
1684 /*
1685 * Search for a previously-issued TCP level socket option and
1686 * return the recorded option value. This assumes that the
1687 * value did not get modified by the lower layer after it was
1688 * issued at setsockopt(2) time. If not found, we'll return
1689 * the default value obtained ealier.
1690 */
1691 if (error == 0) {
1692 struct mptopt *mpo;
1693
1694 if ((mpo = mptcp_sopt_find(mpte, sopt)) != NULL) {
1695 optval = mpo->mpo_intval;
1696 }
1697
1698 error = sooptcopyout(sopt, &optval, sizeof(int));
1699 }
1700 out:
1701 return error;
1702 }
1703
1704 /*
1705 * Return default values for TCP socket options. Ideally we would query the
1706 * subflow TCP socket, but that requires creating a subflow socket before
1707 * connectx(2) time. To simplify things, just return the default values
1708 * that we know of.
1709 */
1710 static int
1711 mptcp_default_tcp_optval(struct mptses *mpte, struct sockopt *sopt, int *optval)
1712 {
1713 int error = 0;
1714
1715 VERIFY(sopt->sopt_level == IPPROTO_TCP);
1716 VERIFY(sopt->sopt_dir == SOPT_GET);
1717 mpte_lock_assert_held(mpte); /* same as MP socket lock */
1718
1719 /* try to do what tcp_newtcpcb() does */
1720 switch (sopt->sopt_name) {
1721 case TCP_NODELAY:
1722 case TCP_RXT_FINDROP:
1723 case TCP_KEEPINTVL:
1724 case TCP_KEEPCNT:
1725 case TCP_CONNECTIONTIMEOUT:
1726 case TCP_RXT_CONNDROPTIME:
1727 case TCP_NOTSENT_LOWAT:
1728 case TCP_ADAPTIVE_READ_TIMEOUT:
1729 case TCP_ADAPTIVE_WRITE_TIMEOUT:
1730 case MPTCP_SERVICE_TYPE:
1731 case MPTCP_ALTERNATE_PORT:
1732 *optval = 0;
1733 break;
1734
1735 case TCP_KEEPALIVE:
1736 *optval = mptcp_subflow_keeptime;
1737 break;
1738
1739 case PERSIST_TIMEOUT:
1740 *optval = tcp_max_persist_timeout;
1741 break;
1742
1743 default:
1744 error = ENOPROTOOPT;
1745 break;
1746 }
1747 return error;
1748 }
1749
1750 /*
1751 * MPTCP SOPT_{SET,GET} socket option handler, for options issued on the MP
1752 * socket, at SOL_SOCKET and IPPROTO_TCP levels. The former is restricted
1753 * to those that are allowed by mptcp_usr_socheckopt().
1754 */
1755 int
1756 mptcp_ctloutput(struct socket *mp_so, struct sockopt *sopt)
1757 {
1758 struct mppcb *mpp = mpsotomppcb(mp_so);
1759 struct mptses *mpte;
1760 int error = 0;
1761
1762 if (mpp == NULL || mpp->mpp_state == MPPCB_STATE_DEAD) {
1763 error = EINVAL;
1764 goto out;
1765 }
1766 mpte = mptompte(mpp);
1767 mpte_lock_assert_held(mpte); /* same as MP socket lock */
1768
1769 /* we only handle socket and TCP-level socket options for MPTCP */
1770 if (sopt->sopt_level != SOL_SOCKET && sopt->sopt_level != IPPROTO_TCP) {
1771 mptcplog((LOG_DEBUG, "MPTCP Socket: "
1772 "%s: mp_so 0x%llx sopt %s level not "
1773 "handled\n", __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so),
1774 mptcp_sopt2str(sopt->sopt_level, sopt->sopt_name)),
1775 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_LOG);
1776 error = EINVAL;
1777 goto out;
1778 }
1779
1780 switch (sopt->sopt_dir) {
1781 case SOPT_SET:
1782 error = mptcp_setopt(mpte, sopt);
1783 break;
1784
1785 case SOPT_GET:
1786 error = mptcp_getopt(mpte, sopt);
1787 break;
1788 }
1789 out:
1790 return error;
1791 }
1792
1793 const char *
1794 mptcp_sopt2str(int level, int optname)
1795 {
1796 switch (level) {
1797 case SOL_SOCKET:
1798 switch (optname) {
1799 case SO_LINGER:
1800 return "SO_LINGER";
1801 case SO_LINGER_SEC:
1802 return "SO_LINGER_SEC";
1803 case SO_DEBUG:
1804 return "SO_DEBUG";
1805 case SO_KEEPALIVE:
1806 return "SO_KEEPALIVE";
1807 case SO_USELOOPBACK:
1808 return "SO_USELOOPBACK";
1809 case SO_TYPE:
1810 return "SO_TYPE";
1811 case SO_NREAD:
1812 return "SO_NREAD";
1813 case SO_NWRITE:
1814 return "SO_NWRITE";
1815 case SO_ERROR:
1816 return "SO_ERROR";
1817 case SO_SNDBUF:
1818 return "SO_SNDBUF";
1819 case SO_RCVBUF:
1820 return "SO_RCVBUF";
1821 case SO_SNDLOWAT:
1822 return "SO_SNDLOWAT";
1823 case SO_RCVLOWAT:
1824 return "SO_RCVLOWAT";
1825 case SO_SNDTIMEO:
1826 return "SO_SNDTIMEO";
1827 case SO_RCVTIMEO:
1828 return "SO_RCVTIMEO";
1829 case SO_NKE:
1830 return "SO_NKE";
1831 case SO_NOSIGPIPE:
1832 return "SO_NOSIGPIPE";
1833 case SO_NOADDRERR:
1834 return "SO_NOADDRERR";
1835 case SO_RESTRICTIONS:
1836 return "SO_RESTRICTIONS";
1837 case SO_LABEL:
1838 return "SO_LABEL";
1839 case SO_PEERLABEL:
1840 return "SO_PEERLABEL";
1841 case SO_RANDOMPORT:
1842 return "SO_RANDOMPORT";
1843 case SO_TRAFFIC_CLASS:
1844 return "SO_TRAFFIC_CLASS";
1845 case SO_RECV_TRAFFIC_CLASS:
1846 return "SO_RECV_TRAFFIC_CLASS";
1847 case SO_TRAFFIC_CLASS_DBG:
1848 return "SO_TRAFFIC_CLASS_DBG";
1849 case SO_PRIVILEGED_TRAFFIC_CLASS:
1850 return "SO_PRIVILEGED_TRAFFIC_CLASS";
1851 case SO_DEFUNCTOK:
1852 return "SO_DEFUNCTOK";
1853 case SO_ISDEFUNCT:
1854 return "SO_ISDEFUNCT";
1855 case SO_OPPORTUNISTIC:
1856 return "SO_OPPORTUNISTIC";
1857 case SO_FLUSH:
1858 return "SO_FLUSH";
1859 case SO_RECV_ANYIF:
1860 return "SO_RECV_ANYIF";
1861 case SO_NOWAKEFROMSLEEP:
1862 return "SO_NOWAKEFROMSLEEP";
1863 case SO_NOAPNFALLBK:
1864 return "SO_NOAPNFALLBK";
1865 case SO_MARK_CELLFALLBACK:
1866 return "SO_CELLFALLBACK";
1867 case SO_DELEGATED:
1868 return "SO_DELEGATED";
1869 case SO_DELEGATED_UUID:
1870 return "SO_DELEGATED_UUID";
1871 #if NECP
1872 case SO_NECP_ATTRIBUTES:
1873 return "SO_NECP_ATTRIBUTES";
1874 case SO_NECP_CLIENTUUID:
1875 return "SO_NECP_CLIENTUUID";
1876 #endif /* NECP */
1877 }
1878
1879 break;
1880 case IPPROTO_TCP:
1881 switch (optname) {
1882 case TCP_NODELAY:
1883 return "TCP_NODELAY";
1884 case TCP_KEEPALIVE:
1885 return "TCP_KEEPALIVE";
1886 case TCP_KEEPINTVL:
1887 return "TCP_KEEPINTVL";
1888 case TCP_KEEPCNT:
1889 return "TCP_KEEPCNT";
1890 case TCP_CONNECTIONTIMEOUT:
1891 return "TCP_CONNECTIONTIMEOUT";
1892 case TCP_RXT_CONNDROPTIME:
1893 return "TCP_RXT_CONNDROPTIME";
1894 case PERSIST_TIMEOUT:
1895 return "PERSIST_TIMEOUT";
1896 case TCP_NOTSENT_LOWAT:
1897 return "NOTSENT_LOWAT";
1898 case TCP_ADAPTIVE_READ_TIMEOUT:
1899 return "ADAPTIVE_READ_TIMEOUT";
1900 case TCP_ADAPTIVE_WRITE_TIMEOUT:
1901 return "ADAPTIVE_WRITE_TIMEOUT";
1902 case MPTCP_SERVICE_TYPE:
1903 return "MPTCP_SERVICE_TYPE";
1904 case MPTCP_ALTERNATE_PORT:
1905 return "MPTCP_ALTERNATE_PORT";
1906 }
1907
1908 break;
1909 }
1910
1911 return "unknown";
1912 }
1913
1914 static int
1915 mptcp_usr_preconnect(struct socket *mp_so)
1916 {
1917 struct mptsub *mpts = NULL;
1918 struct mppcb *mpp = mpsotomppcb(mp_so);
1919 struct mptses *mpte;
1920 struct socket *so;
1921 struct tcpcb *tp = NULL;
1922 int error;
1923
1924 mpte = mptompte(mpp);
1925 VERIFY(mpte != NULL);
1926 mpte_lock_assert_held(mpte); /* same as MP socket lock */
1927
1928 mpts = mptcp_get_subflow(mpte, NULL, NULL);
1929 if (mpts == NULL) {
1930 mptcplog((LOG_ERR, "%s: mp_so 0x%llx invalid preconnect ",
1931 __func__, (u_int64_t)VM_KERNEL_ADDRPERM(mp_so)),
1932 MPTCP_SOCKET_DBG, MPTCP_LOGLVL_ERR);
1933 return EINVAL;
1934 }
1935 mpts->mpts_flags &= ~MPTSF_TFO_REQD;
1936 so = mpts->mpts_socket;
1937 tp = intotcpcb(sotoinpcb(so));
1938 tp->t_mpflags &= ~TMPF_TFO_REQUEST;
1939 error = tcp_output(sototcpcb(so));
1940
1941 soclearfastopen(mp_so);
1942
1943 return error;
1944 }