]> git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/tcp_usrreq.c
xnu-6153.121.1.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_usrreq.c
1 /*
2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $
62 */
63
64
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/kernel.h>
68 #include <sys/sysctl.h>
69 #include <sys/mbuf.h>
70 #if INET6
71 #include <sys/domain.h>
72 #endif /* INET6 */
73 #if !CONFIG_EMBEDDED
74 #include <sys/kasl.h>
75 #endif
76 #include <sys/priv.h>
77 #include <sys/socket.h>
78 #include <sys/socketvar.h>
79 #include <sys/protosw.h>
80 #include <sys/syslog.h>
81
82 #include <net/if.h>
83 #include <net/route.h>
84 #include <net/ntstat.h>
85 #include <net/content_filter.h>
86 #include <net/multi_layer_pkt_log.h>
87
88 #include <netinet/in.h>
89 #include <netinet/in_systm.h>
90 #if INET6
91 #include <netinet/ip6.h>
92 #endif
93 #include <netinet/in_pcb.h>
94 #if INET6
95 #include <netinet6/in6_pcb.h>
96 #endif
97 #include <netinet/in_var.h>
98 #include <netinet/ip_var.h>
99 #if INET6
100 #include <netinet6/ip6_var.h>
101 #endif
102 #include <netinet/tcp.h>
103 #include <netinet/tcp_fsm.h>
104 #include <netinet/tcp_seq.h>
105 #include <netinet/tcp_timer.h>
106 #include <netinet/tcp_var.h>
107 #include <netinet/tcpip.h>
108 #include <netinet/tcp_cc.h>
109 #include <netinet/tcp_log.h>
110 #include <mach/sdt.h>
111 #if TCPDEBUG
112 #include <netinet/tcp_debug.h>
113 #endif
114 #if MPTCP
115 #include <netinet/mptcp_var.h>
116 #endif /* MPTCP */
117
118 #if IPSEC
119 #include <netinet6/ipsec.h>
120 #endif /*IPSEC*/
121
122 #if FLOW_DIVERT
123 #include <netinet/flow_divert.h>
124 #endif /* FLOW_DIVERT */
125
126 errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *);
127
128 int tcp_sysctl_info(struct sysctl_oid *, void *, int, struct sysctl_req *);
129 static void tcp_connection_fill_info(struct tcpcb *tp,
130 struct tcp_connection_info *tci);
131 static int tcp_get_mpkl_send_info(struct mbuf *, struct so_mpkl_send_info *);
132
133 /*
134 * TCP protocol interface to socket abstraction.
135 */
136 static int tcp_attach(struct socket *, struct proc *);
137 static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *);
138 #if INET6
139 static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *);
140 static int tcp6_usr_connect(struct socket *, struct sockaddr *,
141 struct proc *);
142 #endif /* INET6 */
143 static struct tcpcb *tcp_disconnect(struct tcpcb *);
144 static struct tcpcb *tcp_usrclosed(struct tcpcb *);
145 extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
146
147 #if TCPDEBUG
148 #define TCPDEBUG0 int ostate = 0
149 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0
150 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
151 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
152 #else
153 #define TCPDEBUG0
154 #define TCPDEBUG1()
155 #define TCPDEBUG2(req)
156 #endif
157
158 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info,
159 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
160 0, 0, tcp_sysctl_info, "S", "TCP info per tuple");
161
162 /*
163 * TCP attaches to socket via pru_attach(), reserving space,
164 * and an internet control block.
165 *
166 * Returns: 0 Success
167 * EISCONN
168 * tcp_attach:ENOBUFS
169 * tcp_attach:ENOMEM
170 * tcp_attach:??? [IPSEC specific]
171 */
172 static int
173 tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p)
174 {
175 int error;
176 struct inpcb *inp = sotoinpcb(so);
177 struct tcpcb *tp = 0;
178 TCPDEBUG0;
179
180 TCPDEBUG1();
181 if (inp) {
182 error = EISCONN;
183 goto out;
184 }
185
186 error = tcp_attach(so, p);
187 if (error) {
188 goto out;
189 }
190
191 if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
192 so->so_linger = TCP_LINGERTIME * hz;
193 }
194 tp = sototcpcb(so);
195 out:
196 TCPDEBUG2(PRU_ATTACH);
197 return error;
198 }
199
200 /*
201 * pru_detach() detaches the TCP protocol from the socket.
202 * If the protocol state is non-embryonic, then can't
203 * do this directly: have to initiate a pru_disconnect(),
204 * which may finish later; embryonic TCB's can just
205 * be discarded here.
206 */
207 static int
208 tcp_usr_detach(struct socket *so)
209 {
210 int error = 0;
211 struct inpcb *inp = sotoinpcb(so);
212 struct tcpcb *tp;
213 TCPDEBUG0;
214
215 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
216 return EINVAL; /* XXX */
217 }
218 socket_lock_assert_owned(so);
219 tp = intotcpcb(inp);
220 /* In case we got disconnected from the peer */
221 if (tp == NULL) {
222 goto out;
223 }
224 TCPDEBUG1();
225
226 calculate_tcp_clock();
227
228 tp = tcp_disconnect(tp);
229 out:
230 TCPDEBUG2(PRU_DETACH);
231 return error;
232 }
233
234 #if NECP
235 #define COMMON_START() TCPDEBUG0; \
236 do { \
237 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
238 return (EINVAL); \
239 if (necp_socket_should_use_flow_divert(inp)) \
240 return (EPROTOTYPE); \
241 tp = intotcpcb(inp); \
242 TCPDEBUG1(); \
243 calculate_tcp_clock(); \
244 } while (0)
245 #else /* NECP */
246 #define COMMON_START() TCPDEBUG0; \
247 do { \
248 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
249 return (EINVAL); \
250 tp = intotcpcb(inp); \
251 TCPDEBUG1(); \
252 calculate_tcp_clock(); \
253 } while (0)
254 #endif /* !NECP */
255
256 #define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out
257
258
259 /*
260 * Give the socket an address.
261 *
262 * Returns: 0 Success
263 * EINVAL Invalid argument [COMMON_START]
264 * EAFNOSUPPORT Address family not supported
265 * in_pcbbind:EADDRNOTAVAIL Address not available.
266 * in_pcbbind:EINVAL Invalid argument
267 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
268 * in_pcbbind:EACCES Permission denied
269 * in_pcbbind:EADDRINUSE Address in use
270 * in_pcbbind:EAGAIN Resource unavailable, try again
271 * in_pcbbind:EPERM Operation not permitted
272 */
273 static int
274 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
275 {
276 int error = 0;
277 struct inpcb *inp = sotoinpcb(so);
278 struct tcpcb *tp;
279 struct sockaddr_in *sinp;
280
281 COMMON_START();
282
283 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
284 error = EAFNOSUPPORT;
285 goto out;
286 }
287
288 /*
289 * Must check for multicast addresses and disallow binding
290 * to them.
291 */
292 sinp = (struct sockaddr_in *)(void *)nam;
293 if (sinp->sin_family == AF_INET &&
294 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
295 error = EAFNOSUPPORT;
296 goto out;
297 }
298 error = in_pcbbind(inp, nam, p);
299 if (error) {
300 goto out;
301 }
302
303 #if NECP
304 /* Update NECP client with bind result if not in middle of connect */
305 if ((inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS) &&
306 !uuid_is_null(inp->necp_client_uuid)) {
307 socket_unlock(so, 0);
308 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
309 socket_lock(so, 0);
310 }
311 #endif /* NECP */
312
313 COMMON_END(PRU_BIND);
314 }
315
316 #if INET6
317 static int
318 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
319 {
320 int error = 0;
321 struct inpcb *inp = sotoinpcb(so);
322 struct tcpcb *tp;
323 struct sockaddr_in6 *sin6p;
324
325 COMMON_START();
326
327 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
328 error = EAFNOSUPPORT;
329 goto out;
330 }
331
332 /*
333 * Must check for multicast addresses and disallow binding
334 * to them.
335 */
336 sin6p = (struct sockaddr_in6 *)(void *)nam;
337 if (sin6p->sin6_family == AF_INET6 &&
338 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
339 error = EAFNOSUPPORT;
340 goto out;
341 }
342 inp->inp_vflag &= ~INP_IPV4;
343 inp->inp_vflag |= INP_IPV6;
344 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
345 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) {
346 inp->inp_vflag |= INP_IPV4;
347 } else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
348 struct sockaddr_in sin;
349
350 in6_sin6_2_sin(&sin, sin6p);
351 inp->inp_vflag |= INP_IPV4;
352 inp->inp_vflag &= ~INP_IPV6;
353 error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
354 goto out;
355 }
356 }
357 error = in6_pcbbind(inp, nam, p);
358 if (error) {
359 goto out;
360 }
361 COMMON_END(PRU_BIND);
362 }
363 #endif /* INET6 */
364
365 /*
366 * Prepare to accept connections.
367 *
368 * Returns: 0 Success
369 * EINVAL [COMMON_START]
370 * in_pcbbind:EADDRNOTAVAIL Address not available.
371 * in_pcbbind:EINVAL Invalid argument
372 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
373 * in_pcbbind:EACCES Permission denied
374 * in_pcbbind:EADDRINUSE Address in use
375 * in_pcbbind:EAGAIN Resource unavailable, try again
376 * in_pcbbind:EPERM Operation not permitted
377 */
378 static int
379 tcp_usr_listen(struct socket *so, struct proc *p)
380 {
381 int error = 0;
382 struct inpcb *inp = sotoinpcb(so);
383 struct tcpcb *tp;
384
385 COMMON_START();
386 if (inp->inp_lport == 0) {
387 error = in_pcbbind(inp, NULL, p);
388 }
389 if (error == 0) {
390 tp->t_state = TCPS_LISTEN;
391 }
392 TCP_LOG_LISTEN(tp, error);
393 COMMON_END(PRU_LISTEN);
394 }
395
396 #if INET6
397 static int
398 tcp6_usr_listen(struct socket *so, struct proc *p)
399 {
400 int error = 0;
401 struct inpcb *inp = sotoinpcb(so);
402 struct tcpcb *tp;
403
404 COMMON_START();
405 if (inp->inp_lport == 0) {
406 inp->inp_vflag &= ~INP_IPV4;
407 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
408 inp->inp_vflag |= INP_IPV4;
409 }
410 error = in6_pcbbind(inp, NULL, p);
411 }
412 if (error == 0) {
413 tp->t_state = TCPS_LISTEN;
414 }
415 TCP_LOG_LISTEN(tp, error);
416 COMMON_END(PRU_LISTEN);
417 }
418 #endif /* INET6 */
419
420 static int
421 tcp_connect_complete(struct socket *so)
422 {
423 struct tcpcb *tp = sototcpcb(so);
424 struct inpcb *inp = sotoinpcb(so);
425 int error = 0;
426
427 /* TFO delays the tcp_output until later, when the app calls write() */
428 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
429 if (!necp_socket_is_allowed_to_send_recv(sotoinpcb(so), NULL, NULL, NULL, NULL)) {
430 TCP_LOG_DROP_NECP(NULL, NULL, tp, true);
431 return EHOSTUNREACH;
432 }
433
434 /* Initialize enough state so that we can actually send data */
435 tcp_mss(tp, -1, IFSCOPE_NONE);
436 tp->snd_wnd = tp->t_maxseg;
437 tp->max_sndwnd = tp->snd_wnd;
438 } else {
439 error = tcp_output(tp);
440 }
441
442 #if NECP
443 /* Update NECP client with connected five-tuple */
444 if (error == 0 && !uuid_is_null(inp->necp_client_uuid)) {
445 socket_unlock(so, 0);
446 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
447 socket_lock(so, 0);
448 }
449 #endif /* NECP */
450
451 return error;
452 }
453
454 /*
455 * Initiate connection to peer.
456 * Create a template for use in transmissions on this connection.
457 * Enter SYN_SENT state, and mark socket as connecting.
458 * Start keep-alive timer, and seed output sequence space.
459 * Send initial segment on connection.
460 */
461 static int
462 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
463 {
464 int error = 0;
465 struct inpcb *inp = sotoinpcb(so);
466 struct tcpcb *tp;
467 struct sockaddr_in *sinp;
468
469 TCPDEBUG0;
470 if (inp == NULL) {
471 return EINVAL;
472 } else if (inp->inp_state == INPCB_STATE_DEAD) {
473 if (so->so_error) {
474 error = so->so_error;
475 so->so_error = 0;
476 return error;
477 } else {
478 return EINVAL;
479 }
480 }
481 #if NECP
482 #if CONTENT_FILTER
483 error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
484 if (error != 0) {
485 return error;
486 }
487 #endif /* CONTENT_FILTER */
488 #if FLOW_DIVERT
489 if (necp_socket_should_use_flow_divert(inp)) {
490 uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp);
491 if (fd_ctl_unit > 0) {
492 error = flow_divert_pcb_init(so, fd_ctl_unit);
493 if (error == 0) {
494 error = flow_divert_connect_out(so, nam, p);
495 }
496 } else {
497 error = ENETDOWN;
498 }
499
500 return error;
501 }
502 #endif /* FLOW_DIVERT */
503 #endif /* NECP */
504 tp = intotcpcb(inp);
505 TCPDEBUG1();
506
507 calculate_tcp_clock();
508
509 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
510 error = EAFNOSUPPORT;
511 goto out;
512 }
513 /*
514 * Must disallow TCP ``connections'' to multicast addresses.
515 */
516 sinp = (struct sockaddr_in *)(void *)nam;
517 if (sinp->sin_family == AF_INET
518 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
519 error = EAFNOSUPPORT;
520 goto out;
521 }
522
523 if ((error = tcp_connect(tp, nam, p)) != 0) {
524 TCP_LOG_CONNECT(tp, true, error);
525 goto out;
526 }
527
528 error = tcp_connect_complete(so);
529
530 TCP_LOG_CONNECT(tp, true, error);
531
532 COMMON_END(PRU_CONNECT);
533 }
534
535 static int
536 tcp_usr_connectx_common(struct socket *so, int af,
537 struct sockaddr *src, struct sockaddr *dst,
538 struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid,
539 uint32_t flags, void *arg, uint32_t arglen, struct uio *auio,
540 user_ssize_t *bytes_written)
541 {
542 #pragma unused(aid, flags, arg, arglen)
543 struct inpcb *inp = sotoinpcb(so);
544 int error = 0;
545 user_ssize_t datalen = 0;
546
547 if (inp == NULL) {
548 return EINVAL;
549 }
550
551 VERIFY(dst != NULL);
552
553 ASSERT(!(inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS));
554 inp->inp_flags2 |= INP2_CONNECT_IN_PROGRESS;
555
556 #if NECP
557 inp_update_necp_policy(inp, src, dst, ifscope);
558 #endif /* NECP */
559
560 if ((so->so_flags1 & SOF1_DATA_IDEMPOTENT) &&
561 (tcp_fastopen & TCP_FASTOPEN_CLIENT)) {
562 sototcpcb(so)->t_flagsext |= TF_FASTOPEN;
563 }
564
565 /* bind socket to the specified interface, if requested */
566 if (ifscope != IFSCOPE_NONE &&
567 (error = inp_bindif(inp, ifscope, NULL)) != 0) {
568 goto done;
569 }
570
571 /* if source address and/or port is specified, bind to it */
572 if (src != NULL) {
573 error = sobindlock(so, src, 0); /* already locked */
574 if (error != 0) {
575 goto done;
576 }
577 }
578
579 switch (af) {
580 case AF_INET:
581 error = tcp_usr_connect(so, dst, p);
582 break;
583 #if INET6
584 case AF_INET6:
585 error = tcp6_usr_connect(so, dst, p);
586 break;
587 #endif /* INET6 */
588 default:
589 VERIFY(0);
590 /* NOTREACHED */
591 }
592
593 if (error != 0) {
594 goto done;
595 }
596
597 /* if there is data, copy it */
598 if (auio != NULL) {
599 socket_unlock(so, 0);
600
601 VERIFY(bytes_written != NULL);
602
603 datalen = uio_resid(auio);
604 error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL,
605 (uio_t)auio, NULL, NULL, 0);
606 socket_lock(so, 0);
607
608 if (error == 0 || error == EWOULDBLOCK) {
609 *bytes_written = datalen - uio_resid(auio);
610 }
611
612 /*
613 * sosend returns EWOULDBLOCK if it's a non-blocking
614 * socket or a timeout occured (this allows to return
615 * the amount of queued data through sendit()).
616 *
617 * However, connectx() returns EINPROGRESS in case of a
618 * blocking socket. So we change the return value here.
619 */
620 if (error == EWOULDBLOCK) {
621 error = EINPROGRESS;
622 }
623 }
624
625 if (error == 0 && pcid != NULL) {
626 *pcid = 1; /* there is only one connection in regular TCP */
627 }
628 done:
629 if (error && error != EINPROGRESS) {
630 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
631 }
632
633 inp->inp_flags2 &= ~INP2_CONNECT_IN_PROGRESS;
634 return error;
635 }
636
637 static int
638 tcp_usr_connectx(struct socket *so, struct sockaddr *src,
639 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
640 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
641 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
642 {
643 return tcp_usr_connectx_common(so, AF_INET, src, dst, p, ifscope, aid,
644 pcid, flags, arg, arglen, uio, bytes_written);
645 }
646
647 #if INET6
648 static int
649 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
650 {
651 int error = 0;
652 struct inpcb *inp = sotoinpcb(so);
653 struct tcpcb *tp;
654 struct sockaddr_in6 *sin6p;
655
656 TCPDEBUG0;
657 if (inp == NULL) {
658 return EINVAL;
659 } else if (inp->inp_state == INPCB_STATE_DEAD) {
660 if (so->so_error) {
661 error = so->so_error;
662 so->so_error = 0;
663 return error;
664 } else {
665 return EINVAL;
666 }
667 }
668 #if NECP
669 #if CONTENT_FILTER
670 error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
671 if (error != 0) {
672 return error;
673 }
674 #endif /* CONTENT_FILTER */
675 #if FLOW_DIVERT
676 if (necp_socket_should_use_flow_divert(inp)) {
677 uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp);
678 if (fd_ctl_unit > 0) {
679 error = flow_divert_pcb_init(so, fd_ctl_unit);
680 if (error == 0) {
681 error = flow_divert_connect_out(so, nam, p);
682 }
683 } else {
684 error = ENETDOWN;
685 }
686
687 return error;
688 }
689 #endif /* FLOW_DIVERT */
690 #endif /* NECP */
691
692 tp = intotcpcb(inp);
693 TCPDEBUG1();
694
695 calculate_tcp_clock();
696
697 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
698 error = EAFNOSUPPORT;
699 goto out;
700 }
701
702 /*
703 * Must disallow TCP ``connections'' to multicast addresses.
704 */
705 sin6p = (struct sockaddr_in6 *)(void *)nam;
706 if (sin6p->sin6_family == AF_INET6
707 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
708 error = EAFNOSUPPORT;
709 goto out;
710 }
711
712 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
713 struct sockaddr_in sin;
714
715 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
716 return EINVAL;
717 }
718
719 in6_sin6_2_sin(&sin, sin6p);
720 inp->inp_vflag |= INP_IPV4;
721 inp->inp_vflag &= ~INP_IPV6;
722 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) {
723 TCP_LOG_CONNECT(tp, true, error);
724 goto out;
725 }
726
727 error = tcp_connect_complete(so);
728 goto out;
729 }
730 inp->inp_vflag &= ~INP_IPV4;
731 inp->inp_vflag |= INP_IPV6;
732 if ((error = tcp6_connect(tp, nam, p)) != 0) {
733 TCP_LOG_CONNECT(tp, true, error);
734 goto out;
735 }
736
737 error = tcp_connect_complete(so);
738
739 TCP_LOG_CONNECT(tp, true, error);
740
741 COMMON_END(PRU_CONNECT);
742 }
743
744 static int
745 tcp6_usr_connectx(struct socket *so, struct sockaddr*src,
746 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
747 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
748 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
749 {
750 return tcp_usr_connectx_common(so, AF_INET6, src, dst, p, ifscope, aid,
751 pcid, flags, arg, arglen, uio, bytes_written);
752 }
753 #endif /* INET6 */
754
755 /*
756 * Initiate disconnect from peer.
757 * If connection never passed embryonic stage, just drop;
758 * else if don't need to let data drain, then can just drop anyways,
759 * else have to begin TCP shutdown process: mark socket disconnecting,
760 * drain unread data, state switch to reflect user close, and
761 * send segment (e.g. FIN) to peer. Socket will be really disconnected
762 * when peer sends FIN and acks ours.
763 *
764 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
765 */
766 static int
767 tcp_usr_disconnect(struct socket *so)
768 {
769 int error = 0;
770 struct inpcb *inp = sotoinpcb(so);
771 struct tcpcb *tp;
772
773 socket_lock_assert_owned(so);
774 COMMON_START();
775 /* In case we got disconnected from the peer */
776 if (tp == NULL) {
777 goto out;
778 }
779 tp = tcp_disconnect(tp);
780 COMMON_END(PRU_DISCONNECT);
781 }
782
783 /*
784 * User-protocol pru_disconnectx callback.
785 */
786 static int
787 tcp_usr_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
788 {
789 #pragma unused(cid)
790 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
791 return EINVAL;
792 }
793
794 return tcp_usr_disconnect(so);
795 }
796
797 /*
798 * Accept a connection. Essentially all the work is
799 * done at higher levels; just return the address
800 * of the peer, storing through addr.
801 */
802 static int
803 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
804 {
805 int error = 0;
806 struct inpcb *inp = sotoinpcb(so);
807 struct tcpcb *tp = NULL;
808 TCPDEBUG0;
809
810 in_getpeeraddr(so, nam);
811
812 if (so->so_state & SS_ISDISCONNECTED) {
813 error = ECONNABORTED;
814 goto out;
815 }
816 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
817 return EINVAL;
818 }
819 #if NECP
820 else if (necp_socket_should_use_flow_divert(inp)) {
821 return EPROTOTYPE;
822 }
823
824 #endif /* NECP */
825
826 tp = intotcpcb(inp);
827 TCPDEBUG1();
828
829 TCP_LOG_ACCEPT(tp, 0);
830
831 calculate_tcp_clock();
832
833 COMMON_END(PRU_ACCEPT);
834 }
835
836 #if INET6
837 static int
838 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
839 {
840 int error = 0;
841 struct inpcb *inp = sotoinpcb(so);
842 struct tcpcb *tp = NULL;
843 TCPDEBUG0;
844
845 if (so->so_state & SS_ISDISCONNECTED) {
846 error = ECONNABORTED;
847 goto out;
848 }
849 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
850 return EINVAL;
851 }
852 #if NECP
853 else if (necp_socket_should_use_flow_divert(inp)) {
854 return EPROTOTYPE;
855 }
856
857 #endif /* NECP */
858
859 tp = intotcpcb(inp);
860 TCPDEBUG1();
861
862 TCP_LOG_ACCEPT(tp, 0);
863
864 calculate_tcp_clock();
865
866 in6_mapped_peeraddr(so, nam);
867 COMMON_END(PRU_ACCEPT);
868 }
869 #endif /* INET6 */
870
871 /*
872 * Mark the connection as being incapable of further output.
873 *
874 * Returns: 0 Success
875 * EINVAL [COMMON_START]
876 * tcp_output:EADDRNOTAVAIL
877 * tcp_output:ENOBUFS
878 * tcp_output:EMSGSIZE
879 * tcp_output:EHOSTUNREACH
880 * tcp_output:ENETUNREACH
881 * tcp_output:ENETDOWN
882 * tcp_output:ENOMEM
883 * tcp_output:EACCES
884 * tcp_output:EMSGSIZE
885 * tcp_output:ENOBUFS
886 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
887 */
888 static int
889 tcp_usr_shutdown(struct socket *so)
890 {
891 int error = 0;
892 struct inpcb *inp = sotoinpcb(so);
893 struct tcpcb *tp;
894
895 TCPDEBUG0;
896 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
897 return EINVAL;
898 }
899
900 socantsendmore(so);
901
902 /*
903 * In case we got disconnected from the peer, or if this is
904 * a socket that is to be flow-diverted (but not yet).
905 */
906 tp = intotcpcb(inp);
907 TCPDEBUG1();
908
909 if (tp == NULL
910 #if NECP
911 || (necp_socket_should_use_flow_divert(inp))
912 #endif /* NECP */
913 ) {
914 if (tp != NULL) {
915 error = EPROTOTYPE;
916 }
917 goto out;
918 }
919
920 calculate_tcp_clock();
921
922 tp = tcp_usrclosed(tp);
923 #if MPTCP
924 /* A reset has been sent but socket exists, do not send FIN */
925 if ((so->so_flags & SOF_MP_SUBFLOW) &&
926 (tp) && (tp->t_mpflags & TMPF_RESET)) {
927 goto out;
928 }
929 #endif
930 #if CONTENT_FILTER
931 /* Don't send a FIN yet */
932 if (tp && !(so->so_state & SS_ISDISCONNECTED) &&
933 cfil_sock_data_pending(&so->so_snd)) {
934 goto out;
935 }
936 #endif /* CONTENT_FILTER */
937 if (tp) {
938 error = tcp_output(tp);
939 }
940 COMMON_END(PRU_SHUTDOWN);
941 }
942
943 /*
944 * After a receive, possibly send window update to peer.
945 */
946 static int
947 tcp_usr_rcvd(struct socket *so, __unused int flags)
948 {
949 int error = 0;
950 struct inpcb *inp = sotoinpcb(so);
951 struct tcpcb *tp;
952
953 COMMON_START();
954 /* In case we got disconnected from the peer */
955 if (tp == NULL) {
956 goto out;
957 }
958 tcp_sbrcv_trim(tp, &so->so_rcv);
959
960 /*
961 * This tcp_output is solely there to trigger window-updates.
962 * However, we really do not want these window-updates while we
963 * are still in SYN_SENT or SYN_RECEIVED.
964 */
965 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
966 tcp_output(tp);
967 }
968
969 #if CONTENT_FILTER
970 cfil_sock_buf_update(&so->so_rcv);
971 #endif /* CONTENT_FILTER */
972
973 COMMON_END(PRU_RCVD);
974 }
975
976 /*
977 * Do a send by putting data in output queue and updating urgent
978 * marker if URG set. Possibly send more data. Unlike the other
979 * pru_*() routines, the mbuf chains are our responsibility. We
980 * must either enqueue them or free them. The other pru_* routines
981 * generally are caller-frees.
982 *
983 * Returns: 0 Success
984 * ECONNRESET
985 * EINVAL
986 * ENOBUFS
987 * tcp_connect:EADDRINUSE Address in use
988 * tcp_connect:EADDRNOTAVAIL Address not available.
989 * tcp_connect:EINVAL Invalid argument
990 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef]
991 * tcp_connect:EACCES Permission denied
992 * tcp_connect:EAGAIN Resource unavailable, try again
993 * tcp_connect:EPERM Operation not permitted
994 * tcp_output:EADDRNOTAVAIL
995 * tcp_output:ENOBUFS
996 * tcp_output:EMSGSIZE
997 * tcp_output:EHOSTUNREACH
998 * tcp_output:ENETUNREACH
999 * tcp_output:ENETDOWN
1000 * tcp_output:ENOMEM
1001 * tcp_output:EACCES
1002 * tcp_output:EMSGSIZE
1003 * tcp_output:ENOBUFS
1004 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
1005 * tcp6_connect:??? [IPV6 only]
1006 */
1007 static int
1008 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
1009 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1010 {
1011 int error = 0;
1012 struct inpcb *inp = sotoinpcb(so);
1013 struct tcpcb *tp;
1014 uint32_t msgpri = MSG_PRI_DEFAULT;
1015 uint32_t mpkl_len = 0; /* length of mbuf chain */
1016 uint32_t mpkl_seq; /* sequence number where new data is added */
1017 struct so_mpkl_send_info mpkl_send_info = {};
1018
1019 #if INET6
1020 int isipv6;
1021 #endif
1022 TCPDEBUG0;
1023
1024 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD
1025 #if NECP
1026 || (necp_socket_should_use_flow_divert(inp))
1027 #endif /* NECP */
1028 ) {
1029 /*
1030 * OOPS! we lost a race, the TCP session got reset after
1031 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
1032 * network interrupt in the non-splnet() section of sosend().
1033 */
1034 if (m != NULL) {
1035 m_freem(m);
1036 }
1037 if (control != NULL) {
1038 m_freem(control);
1039 control = NULL;
1040 }
1041
1042 if (inp == NULL) {
1043 error = ECONNRESET; /* XXX EPIPE? */
1044 } else {
1045 error = EPROTOTYPE;
1046 }
1047 tp = NULL;
1048 TCPDEBUG1();
1049 goto out;
1050 }
1051 #if INET6
1052 isipv6 = nam && nam->sa_family == AF_INET6;
1053 #endif /* INET6 */
1054 tp = intotcpcb(inp);
1055 TCPDEBUG1();
1056
1057 calculate_tcp_clock();
1058
1059 if (net_mpklog_enabled) {
1060 mpkl_seq = tp->snd_una + so->so_snd.sb_cc;
1061 if (m) {
1062 mpkl_len = m_length(m);
1063 }
1064 if (so->so_flags1 & SOF1_MPKL_SEND_INFO) {
1065 uuid_copy(mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
1066 mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
1067 }
1068 }
1069
1070 if (control != NULL) {
1071 if (so->so_flags & SOF_ENABLE_MSGS) {
1072 /* Get the msg priority from control mbufs */
1073 error = tcp_get_msg_priority(control, &msgpri);
1074 if (error) {
1075 m_freem(control);
1076 if (m != NULL) {
1077 m_freem(m);
1078 }
1079 control = NULL;
1080 m = NULL;
1081 goto out;
1082 }
1083 }
1084 if (control->m_len > 0 && net_mpklog_enabled) {
1085 error = tcp_get_mpkl_send_info(control, &mpkl_send_info);
1086 /*
1087 * Intepretation of the returned code:
1088 * 0: client wants us to use value passed in SCM_MPKL_SEND_INFO
1089 * 1: SCM_MPKL_SEND_INFO was not present
1090 * other: failure
1091 */
1092 if (error != 0 && error != ENOMSG) {
1093 m_freem(control);
1094 if (m != NULL) {
1095 m_freem(m);
1096 }
1097 control = NULL;
1098 m = NULL;
1099 goto out;
1100 }
1101 }
1102 /*
1103 * Silently drop unsupported ancillary data messages
1104 */
1105 m_freem(control);
1106 control = NULL;
1107 }
1108
1109 if (so->so_flags & SOF_ENABLE_MSGS) {
1110 VERIFY(m->m_flags & M_PKTHDR);
1111 m->m_pkthdr.msg_pri = msgpri;
1112 }
1113
1114 /* MPTCP sublow socket buffers must not be compressed */
1115 VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) ||
1116 (so->so_snd.sb_flags & SB_NOCOMPRESS));
1117
1118 if (!(flags & PRUS_OOB) || (so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1119 /* Call msg send if message delivery is enabled */
1120 if (so->so_flags & SOF_ENABLE_MSGS) {
1121 sbappendmsg_snd(&so->so_snd, m);
1122 } else {
1123 sbappendstream(&so->so_snd, m);
1124 }
1125
1126 if (nam && tp->t_state < TCPS_SYN_SENT) {
1127 /*
1128 * Do implied connect if not yet connected,
1129 * initialize window to default value, and
1130 * initialize maxseg/maxopd using peer's cached
1131 * MSS.
1132 */
1133 #if INET6
1134 if (isipv6) {
1135 error = tcp6_connect(tp, nam, p);
1136 } else
1137 #endif /* INET6 */
1138 error = tcp_connect(tp, nam, p);
1139 if (error) {
1140 TCP_LOG_CONNECT(tp, true, error);
1141 goto out;
1142 }
1143 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1144 tp->max_sndwnd = tp->snd_wnd;
1145 tcp_mss(tp, -1, IFSCOPE_NONE);
1146
1147 TCP_LOG_CONNECT(tp, true, error);
1148
1149 /* The sequence number of the data is past the SYN */
1150 mpkl_seq = tp->iss + 1;
1151 }
1152
1153 if (flags & PRUS_EOF) {
1154 /*
1155 * Close the send side of the connection after
1156 * the data is sent.
1157 */
1158 socantsendmore(so);
1159 tp = tcp_usrclosed(tp);
1160 }
1161 if (tp != NULL) {
1162 if (flags & PRUS_MORETOCOME) {
1163 tp->t_flags |= TF_MORETOCOME;
1164 }
1165 error = tcp_output(tp);
1166 if (flags & PRUS_MORETOCOME) {
1167 tp->t_flags &= ~TF_MORETOCOME;
1168 }
1169 }
1170 } else {
1171 if (sbspace(&so->so_snd) == 0) {
1172 /* if no space is left in sockbuf,
1173 * do not try to squeeze in OOB traffic */
1174 m_freem(m);
1175 error = ENOBUFS;
1176 goto out;
1177 }
1178 /*
1179 * According to RFC961 (Assigned Protocols),
1180 * the urgent pointer points to the last octet
1181 * of urgent data. We continue, however,
1182 * to consider it to indicate the first octet
1183 * of data past the urgent section.
1184 * Otherwise, snd_up should be one lower.
1185 */
1186 sbappendstream(&so->so_snd, m);
1187 if (nam && tp->t_state < TCPS_SYN_SENT) {
1188 /*
1189 * Do implied connect if not yet connected,
1190 * initialize window to default value, and
1191 * initialize maxseg/maxopd using peer's cached
1192 * MSS.
1193 */
1194 #if INET6
1195 if (isipv6) {
1196 error = tcp6_connect(tp, nam, p);
1197 } else
1198 #endif /* INET6 */
1199 error = tcp_connect(tp, nam, p);
1200 if (error) {
1201 TCP_LOG_CONNECT(tp, true, error);
1202 goto out;
1203 }
1204 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1205 tp->max_sndwnd = tp->snd_wnd;
1206 tcp_mss(tp, -1, IFSCOPE_NONE);
1207
1208 TCP_LOG_CONNECT(tp, true, error);
1209 }
1210 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
1211 tp->t_flagsext |= TF_FORCE;
1212 error = tcp_output(tp);
1213 tp->t_flagsext &= ~TF_FORCE;
1214 }
1215
1216 if (net_mpklog_enabled && (inp = tp->t_inpcb) != NULL &&
1217 ((inp->inp_last_outifp != NULL &&
1218 (inp->inp_last_outifp->if_xflags & IFXF_MPK_LOG)) ||
1219 (inp->inp_boundifp != NULL &&
1220 (inp->inp_boundifp->if_xflags & IFXF_MPK_LOG)))) {
1221 MPKL_TCP_SEND(tcp_mpkl_log_object,
1222 mpkl_send_info.mpkl_proto, mpkl_send_info.mpkl_uuid,
1223 ntohs(inp->inp_lport), ntohs(inp->inp_fport),
1224 mpkl_seq, mpkl_len,
1225 so->last_pid, so->so_log_seqn++);
1226 }
1227
1228 /*
1229 * We wait for the socket to successfully connect before returning.
1230 * This allows us to signal a timeout to the application.
1231 */
1232 if (so->so_state & SS_ISCONNECTING) {
1233 if (so->so_state & SS_NBIO) {
1234 error = EWOULDBLOCK;
1235 } else {
1236 error = sbwait(&so->so_snd);
1237 }
1238 }
1239
1240 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
1241 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1242 }
1243
1244 /*
1245 * Abort the TCP.
1246 */
1247 static int
1248 tcp_usr_abort(struct socket *so)
1249 {
1250 int error = 0;
1251 struct inpcb *inp = sotoinpcb(so);
1252 struct tcpcb *tp;
1253
1254 COMMON_START();
1255 /* In case we got disconnected from the peer */
1256 if (tp == NULL) {
1257 goto out;
1258 }
1259 tp = tcp_drop(tp, ECONNABORTED);
1260 VERIFY(so->so_usecount > 0);
1261 so->so_usecount--;
1262 COMMON_END(PRU_ABORT);
1263 }
1264
1265 /*
1266 * Receive out-of-band data.
1267 *
1268 * Returns: 0 Success
1269 * EINVAL [COMMON_START]
1270 * EINVAL
1271 * EWOULDBLOCK
1272 */
1273 static int
1274 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1275 {
1276 int error = 0;
1277 struct inpcb *inp = sotoinpcb(so);
1278 struct tcpcb *tp;
1279
1280 COMMON_START();
1281 if ((so->so_oobmark == 0 &&
1282 (so->so_state & SS_RCVATMARK) == 0) ||
1283 so->so_options & SO_OOBINLINE ||
1284 tp->t_oobflags & TCPOOB_HADDATA) {
1285 error = EINVAL;
1286 goto out;
1287 }
1288 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1289 error = EWOULDBLOCK;
1290 goto out;
1291 }
1292 m->m_len = 1;
1293 *mtod(m, caddr_t) = tp->t_iobc;
1294 so->so_state &= ~SS_RCVATMARK;
1295 if ((flags & MSG_PEEK) == 0) {
1296 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1297 }
1298 COMMON_END(PRU_RCVOOB);
1299 }
1300
1301 static int
1302 tcp_usr_preconnect(struct socket *so)
1303 {
1304 struct inpcb *inp = sotoinpcb(so);
1305 int error = 0;
1306
1307 #if NECP
1308 if (necp_socket_should_use_flow_divert(inp)) {
1309 /* May happen, if in tcp_usr_connect we did not had a chance
1310 * to set the usrreqs (due to some error). So, let's get out
1311 * of here.
1312 */
1313 goto out;
1314 }
1315 #endif /* NECP */
1316
1317 error = tcp_output(sototcpcb(so));
1318
1319 soclearfastopen(so);
1320
1321 COMMON_END(PRU_PRECONNECT);
1322 }
1323
1324 /* xxx - should be const */
1325 struct pr_usrreqs tcp_usrreqs = {
1326 .pru_abort = tcp_usr_abort,
1327 .pru_accept = tcp_usr_accept,
1328 .pru_attach = tcp_usr_attach,
1329 .pru_bind = tcp_usr_bind,
1330 .pru_connect = tcp_usr_connect,
1331 .pru_connectx = tcp_usr_connectx,
1332 .pru_control = in_control,
1333 .pru_detach = tcp_usr_detach,
1334 .pru_disconnect = tcp_usr_disconnect,
1335 .pru_disconnectx = tcp_usr_disconnectx,
1336 .pru_listen = tcp_usr_listen,
1337 .pru_peeraddr = in_getpeeraddr,
1338 .pru_rcvd = tcp_usr_rcvd,
1339 .pru_rcvoob = tcp_usr_rcvoob,
1340 .pru_send = tcp_usr_send,
1341 .pru_shutdown = tcp_usr_shutdown,
1342 .pru_sockaddr = in_getsockaddr,
1343 .pru_sosend = sosend,
1344 .pru_soreceive = soreceive,
1345 .pru_preconnect = tcp_usr_preconnect,
1346 };
1347
1348 #if INET6
1349 struct pr_usrreqs tcp6_usrreqs = {
1350 .pru_abort = tcp_usr_abort,
1351 .pru_accept = tcp6_usr_accept,
1352 .pru_attach = tcp_usr_attach,
1353 .pru_bind = tcp6_usr_bind,
1354 .pru_connect = tcp6_usr_connect,
1355 .pru_connectx = tcp6_usr_connectx,
1356 .pru_control = in6_control,
1357 .pru_detach = tcp_usr_detach,
1358 .pru_disconnect = tcp_usr_disconnect,
1359 .pru_disconnectx = tcp_usr_disconnectx,
1360 .pru_listen = tcp6_usr_listen,
1361 .pru_peeraddr = in6_mapped_peeraddr,
1362 .pru_rcvd = tcp_usr_rcvd,
1363 .pru_rcvoob = tcp_usr_rcvoob,
1364 .pru_send = tcp_usr_send,
1365 .pru_shutdown = tcp_usr_shutdown,
1366 .pru_sockaddr = in6_mapped_sockaddr,
1367 .pru_sosend = sosend,
1368 .pru_soreceive = soreceive,
1369 .pru_preconnect = tcp_usr_preconnect,
1370 };
1371 #endif /* INET6 */
1372
1373 /*
1374 * Common subroutine to open a TCP connection to remote host specified
1375 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
1376 * port number if needed. Call in_pcbladdr to do the routing and to choose
1377 * a local host address (interface). If there is an existing incarnation
1378 * of the same connection in TIME-WAIT state and if the remote host was
1379 * sending CC options and if the connection duration was < MSL, then
1380 * truncate the previous TIME-WAIT state and proceed.
1381 * Initialize connection parameters and enter SYN-SENT state.
1382 *
1383 * Returns: 0 Success
1384 * EADDRINUSE
1385 * EINVAL
1386 * in_pcbbind:EADDRNOTAVAIL Address not available.
1387 * in_pcbbind:EINVAL Invalid argument
1388 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
1389 * in_pcbbind:EACCES Permission denied
1390 * in_pcbbind:EADDRINUSE Address in use
1391 * in_pcbbind:EAGAIN Resource unavailable, try again
1392 * in_pcbbind:EPERM Operation not permitted
1393 * in_pcbladdr:EINVAL Invalid argument
1394 * in_pcbladdr:EAFNOSUPPORT Address family not supported
1395 * in_pcbladdr:EADDRNOTAVAIL Address not available
1396 */
1397 static int
1398 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
1399 {
1400 struct inpcb *inp = tp->t_inpcb, *oinp;
1401 struct socket *so = inp->inp_socket;
1402 struct tcpcb *otp;
1403 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1404 struct in_addr laddr;
1405 int error = 0;
1406 struct ifnet *outif = NULL;
1407
1408 if (inp->inp_lport == 0) {
1409 error = in_pcbbind(inp, NULL, p);
1410 if (error) {
1411 goto done;
1412 }
1413 }
1414
1415 /*
1416 * Cannot simply call in_pcbconnect, because there might be an
1417 * earlier incarnation of this same connection still in
1418 * TIME_WAIT state, creating an ADDRINUSE error.
1419 */
1420 error = in_pcbladdr(inp, nam, &laddr, IFSCOPE_NONE, &outif, 0);
1421 if (error) {
1422 goto done;
1423 }
1424
1425 socket_unlock(inp->inp_socket, 0);
1426 oinp = in_pcblookup_hash(inp->inp_pcbinfo,
1427 sin->sin_addr, sin->sin_port,
1428 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : laddr,
1429 inp->inp_lport, 0, NULL);
1430
1431 socket_lock(inp->inp_socket, 0);
1432 if (oinp) {
1433 if (oinp != inp) { /* 4143933: avoid deadlock if inp == oinp */
1434 socket_lock(oinp->inp_socket, 1);
1435 }
1436 if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1437 if (oinp != inp) {
1438 socket_unlock(oinp->inp_socket, 1);
1439 }
1440 goto skip_oinp;
1441 }
1442
1443 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
1444 otp->t_state == TCPS_TIME_WAIT &&
1445 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
1446 (otp->t_flags & TF_RCVD_CC)) {
1447 otp = tcp_close(otp);
1448 } else {
1449 printf("tcp_connect: inp=0x%llx err=EADDRINUSE\n",
1450 (uint64_t)VM_KERNEL_ADDRPERM(inp));
1451 if (oinp != inp) {
1452 socket_unlock(oinp->inp_socket, 1);
1453 }
1454 error = EADDRINUSE;
1455 goto done;
1456 }
1457 if (oinp != inp) {
1458 socket_unlock(oinp->inp_socket, 1);
1459 }
1460 }
1461 skip_oinp:
1462 if ((inp->inp_laddr.s_addr == INADDR_ANY ? laddr.s_addr :
1463 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr &&
1464 inp->inp_lport == sin->sin_port) {
1465 error = EINVAL;
1466 goto done;
1467 }
1468 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1469 /*lock inversion issue, mostly with udp multicast packets */
1470 socket_unlock(inp->inp_socket, 0);
1471 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1472 socket_lock(inp->inp_socket, 0);
1473 }
1474 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1475 inp->inp_laddr = laddr;
1476 /* no reference needed */
1477 inp->inp_last_outifp = outif;
1478
1479 inp->inp_flags |= INP_INADDR_ANY;
1480 }
1481 inp->inp_faddr = sin->sin_addr;
1482 inp->inp_fport = sin->sin_port;
1483 in_pcbrehash(inp);
1484 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1485
1486 if (inp->inp_flowhash == 0) {
1487 inp->inp_flowhash = inp_calc_flowhash(inp);
1488 }
1489
1490 tcp_set_max_rwinscale(tp, so, outif);
1491
1492 soisconnecting(so);
1493 tcpstat.tcps_connattempt++;
1494 tp->t_state = TCPS_SYN_SENT;
1495 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPINIT(tp));
1496 tp->iss = tcp_new_isn(tp);
1497 tcp_sendseqinit(tp);
1498 tp->t_connect_time = tcp_now;
1499 if (nstat_collect) {
1500 nstat_route_connect_attempt(inp->inp_route.ro_rt);
1501 }
1502
1503 done:
1504 if (outif != NULL) {
1505 ifnet_release(outif);
1506 }
1507
1508 return error;
1509 }
1510
1511 #if INET6
1512 static int
1513 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
1514 {
1515 struct inpcb *inp = tp->t_inpcb, *oinp;
1516 struct socket *so = inp->inp_socket;
1517 struct tcpcb *otp;
1518 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
1519 struct in6_addr addr6;
1520 int error = 0;
1521 struct ifnet *outif = NULL;
1522
1523 if (inp->inp_lport == 0) {
1524 error = in6_pcbbind(inp, NULL, p);
1525 if (error) {
1526 goto done;
1527 }
1528 }
1529
1530 /*
1531 * Cannot simply call in_pcbconnect, because there might be an
1532 * earlier incarnation of this same connection still in
1533 * TIME_WAIT state, creating an ADDRINUSE error.
1534 *
1535 * in6_pcbladdr() might return an ifp with its reference held
1536 * even in the error case, so make sure that it's released
1537 * whenever it's non-NULL.
1538 */
1539 error = in6_pcbladdr(inp, nam, &addr6, &outif);
1540 if (error) {
1541 goto done;
1542 }
1543 socket_unlock(inp->inp_socket, 0);
1544 oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
1545 &sin6->sin6_addr, sin6->sin6_port,
1546 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
1547 ? &addr6
1548 : &inp->in6p_laddr,
1549 inp->inp_lport, 0, NULL);
1550 socket_lock(inp->inp_socket, 0);
1551 if (oinp) {
1552 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
1553 otp->t_state == TCPS_TIME_WAIT &&
1554 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
1555 (otp->t_flags & TF_RCVD_CC)) {
1556 otp = tcp_close(otp);
1557 } else {
1558 error = EADDRINUSE;
1559 goto done;
1560 }
1561 }
1562 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1563 /*lock inversion issue, mostly with udp multicast packets */
1564 socket_unlock(inp->inp_socket, 0);
1565 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1566 socket_lock(inp->inp_socket, 0);
1567 }
1568 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1569 inp->in6p_laddr = addr6;
1570 inp->in6p_last_outifp = outif; /* no reference needed */
1571 inp->in6p_flags |= INP_IN6ADDR_ANY;
1572 }
1573 inp->in6p_faddr = sin6->sin6_addr;
1574 inp->inp_fport = sin6->sin6_port;
1575 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) {
1576 inp->inp_flow = sin6->sin6_flowinfo;
1577 }
1578 in_pcbrehash(inp);
1579 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1580
1581 if (inp->inp_flowhash == 0) {
1582 inp->inp_flowhash = inp_calc_flowhash(inp);
1583 }
1584 /* update flowinfo - RFC 6437 */
1585 if (inp->inp_flow == 0 && inp->in6p_flags & IN6P_AUTOFLOWLABEL) {
1586 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
1587 inp->inp_flow |=
1588 (htonl(inp->inp_flowhash) & IPV6_FLOWLABEL_MASK);
1589 }
1590
1591 tcp_set_max_rwinscale(tp, so, outif);
1592
1593 soisconnecting(so);
1594 tcpstat.tcps_connattempt++;
1595 tp->t_state = TCPS_SYN_SENT;
1596 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
1597 TCP_CONN_KEEPINIT(tp));
1598 tp->iss = tcp_new_isn(tp);
1599 tcp_sendseqinit(tp);
1600 tp->t_connect_time = tcp_now;
1601 if (nstat_collect) {
1602 nstat_route_connect_attempt(inp->inp_route.ro_rt);
1603 }
1604
1605 done:
1606 if (outif != NULL) {
1607 ifnet_release(outif);
1608 }
1609
1610 return error;
1611 }
1612 #endif /* INET6 */
1613
1614 /*
1615 * Export TCP internal state information via a struct tcp_info
1616 */
1617 void
1618 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1619 {
1620 struct inpcb *inp = tp->t_inpcb;
1621
1622 bzero(ti, sizeof(*ti));
1623
1624 ti->tcpi_state = tp->t_state;
1625 ti->tcpi_flowhash = inp->inp_flowhash;
1626
1627 if (tp->t_state > TCPS_LISTEN) {
1628 if (TSTMP_SUPPORTED(tp)) {
1629 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1630 }
1631 if (SACK_ENABLED(tp)) {
1632 ti->tcpi_options |= TCPI_OPT_SACK;
1633 }
1634 if (TCP_WINDOW_SCALE_ENABLED(tp)) {
1635 ti->tcpi_options |= TCPI_OPT_WSCALE;
1636 ti->tcpi_snd_wscale = tp->snd_scale;
1637 ti->tcpi_rcv_wscale = tp->rcv_scale;
1638 }
1639 if (TCP_ECN_ENABLED(tp)) {
1640 ti->tcpi_options |= TCPI_OPT_ECN;
1641 }
1642
1643 /* Are we in retranmission episode */
1644 if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) {
1645 ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY;
1646 }
1647
1648 if (tp->t_flags & TF_STREAMING_ON) {
1649 ti->tcpi_flags |= TCPI_FLAG_STREAMING_ON;
1650 }
1651
1652 ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0;
1653 ti->tcpi_snd_mss = tp->t_maxseg;
1654 ti->tcpi_rcv_mss = tp->t_maxseg;
1655
1656 ti->tcpi_rttcur = tp->t_rttcur;
1657 ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT;
1658 ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
1659 ti->tcpi_rttbest = tp->t_rttbest >> TCP_RTT_SHIFT;
1660
1661 ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1662 ti->tcpi_snd_cwnd = tp->snd_cwnd;
1663 ti->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
1664
1665 ti->tcpi_rcv_space = tp->rcv_wnd;
1666
1667 ti->tcpi_snd_wnd = tp->snd_wnd;
1668 ti->tcpi_snd_nxt = tp->snd_nxt;
1669 ti->tcpi_rcv_nxt = tp->rcv_nxt;
1670
1671 /* convert bytes/msec to bits/sec */
1672 if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
1673 tp->t_bwmeas != NULL) {
1674 ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000);
1675 }
1676
1677 ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1678 tp->t_inpcb->inp_last_outifp->if_index;
1679
1680 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes);
1681 ti->tcpi_txpackets = inp->inp_stat->txpackets;
1682 ti->tcpi_txbytes = inp->inp_stat->txbytes;
1683 ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
1684 ti->tcpi_txretransmitpackets = tp->t_stat.rxmitpkts;
1685 ti->tcpi_txunacked = tp->snd_max - tp->snd_una;
1686
1687 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes);
1688 ti->tcpi_rxpackets = inp->inp_stat->rxpackets;
1689 ti->tcpi_rxbytes = inp->inp_stat->rxbytes;
1690 ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
1691 ti->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
1692
1693 if (tp->t_state > TCPS_LISTEN) {
1694 ti->tcpi_synrexmits = tp->t_stat.rxmitsyns;
1695 }
1696 ti->tcpi_cell_rxpackets = inp->inp_cstat->rxpackets;
1697 ti->tcpi_cell_rxbytes = inp->inp_cstat->rxbytes;
1698 ti->tcpi_cell_txpackets = inp->inp_cstat->txpackets;
1699 ti->tcpi_cell_txbytes = inp->inp_cstat->txbytes;
1700
1701 ti->tcpi_wifi_rxpackets = inp->inp_wstat->rxpackets;
1702 ti->tcpi_wifi_rxbytes = inp->inp_wstat->rxbytes;
1703 ti->tcpi_wifi_txpackets = inp->inp_wstat->txpackets;
1704 ti->tcpi_wifi_txbytes = inp->inp_wstat->txbytes;
1705
1706 ti->tcpi_wired_rxpackets = inp->inp_Wstat->rxpackets;
1707 ti->tcpi_wired_rxbytes = inp->inp_Wstat->rxbytes;
1708 ti->tcpi_wired_txpackets = inp->inp_Wstat->txpackets;
1709 ti->tcpi_wired_txbytes = inp->inp_Wstat->txbytes;
1710 tcp_get_connectivity_status(tp, &ti->tcpi_connstatus);
1711
1712 ti->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
1713 ti->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
1714 ti->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
1715 ti->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
1716
1717 ti->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
1718 ti->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
1719 ti->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
1720 ti->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
1721 ti->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
1722 ti->tcpi_tfo_cookie_wrong = !!(tp->t_tfo_stats & TFO_S_COOKIE_WRONG);
1723 ti->tcpi_tfo_no_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_NO_COOKIE_RCV);
1724 ti->tcpi_tfo_heuristics_disable = !!(tp->t_tfo_stats & TFO_S_HEURISTICS_DISABLE);
1725 ti->tcpi_tfo_send_blackhole = !!(tp->t_tfo_stats & TFO_S_SEND_BLACKHOLE);
1726 ti->tcpi_tfo_recv_blackhole = !!(tp->t_tfo_stats & TFO_S_RECV_BLACKHOLE);
1727 ti->tcpi_tfo_onebyte_proxy = !!(tp->t_tfo_stats & TFO_S_ONE_BYTE_PROXY);
1728
1729 ti->tcpi_ecn_client_setup = !!(tp->ecn_flags & TE_SETUPSENT);
1730 ti->tcpi_ecn_server_setup = !!(tp->ecn_flags & TE_SETUPRECEIVED);
1731 ti->tcpi_ecn_success = (tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON ? 1 : 0;
1732 ti->tcpi_ecn_lost_syn = !!(tp->ecn_flags & TE_LOST_SYN);
1733 ti->tcpi_ecn_lost_synack = !!(tp->ecn_flags & TE_LOST_SYNACK);
1734
1735 ti->tcpi_local_peer = !!(tp->t_flags & TF_LOCAL);
1736
1737 if (tp->t_inpcb->inp_last_outifp != NULL) {
1738 if (IFNET_IS_CELLULAR(tp->t_inpcb->inp_last_outifp)) {
1739 ti->tcpi_if_cell = 1;
1740 }
1741 if (IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
1742 ti->tcpi_if_wifi = 1;
1743 }
1744 if (IFNET_IS_WIRED(tp->t_inpcb->inp_last_outifp)) {
1745 ti->tcpi_if_wired = 1;
1746 }
1747 if (IFNET_IS_WIFI_INFRA(tp->t_inpcb->inp_last_outifp)) {
1748 ti->tcpi_if_wifi_infra = 1;
1749 }
1750 if (tp->t_inpcb->inp_last_outifp->if_eflags & IFEF_AWDL) {
1751 ti->tcpi_if_wifi_awdl = 1;
1752 }
1753 }
1754 if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) {
1755 ti->tcpi_snd_background = 1;
1756 }
1757 if (tcp_recv_bg == 1 ||
1758 IS_TCP_RECV_BG(tp->t_inpcb->inp_socket)) {
1759 ti->tcpi_rcv_background = 1;
1760 }
1761
1762 ti->tcpi_ecn_recv_ce = tp->t_ecn_recv_ce;
1763 ti->tcpi_ecn_recv_cwr = tp->t_ecn_recv_cwr;
1764
1765 ti->tcpi_rcvoopack = tp->t_rcvoopack;
1766 ti->tcpi_pawsdrop = tp->t_pawsdrop;
1767 ti->tcpi_sack_recovery_episode = tp->t_sack_recovery_episode;
1768 ti->tcpi_reordered_pkts = tp->t_reordered_pkts;
1769 ti->tcpi_dsack_sent = tp->t_dsack_sent;
1770 ti->tcpi_dsack_recvd = tp->t_dsack_recvd;
1771 }
1772 }
1773
1774 __private_extern__ errno_t
1775 tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti)
1776 {
1777 struct inpcbinfo *pcbinfo = NULL;
1778 struct inpcb *inp = NULL;
1779 struct socket *so;
1780 struct tcpcb *tp;
1781
1782 if (itpl->itpl_proto == IPPROTO_TCP) {
1783 pcbinfo = &tcbinfo;
1784 } else {
1785 return EINVAL;
1786 }
1787
1788 if (itpl->itpl_local_sa.sa_family == AF_INET &&
1789 itpl->itpl_remote_sa.sa_family == AF_INET) {
1790 inp = in_pcblookup_hash(pcbinfo,
1791 itpl->itpl_remote_sin.sin_addr,
1792 itpl->itpl_remote_sin.sin_port,
1793 itpl->itpl_local_sin.sin_addr,
1794 itpl->itpl_local_sin.sin_port,
1795 0, NULL);
1796 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 &&
1797 itpl->itpl_remote_sa.sa_family == AF_INET6) {
1798 struct in6_addr ina6_local;
1799 struct in6_addr ina6_remote;
1800
1801 ina6_local = itpl->itpl_local_sin6.sin6_addr;
1802 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) &&
1803 itpl->itpl_local_sin6.sin6_scope_id) {
1804 ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id);
1805 }
1806
1807 ina6_remote = itpl->itpl_remote_sin6.sin6_addr;
1808 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) &&
1809 itpl->itpl_remote_sin6.sin6_scope_id) {
1810 ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id);
1811 }
1812
1813 inp = in6_pcblookup_hash(pcbinfo,
1814 &ina6_remote,
1815 itpl->itpl_remote_sin6.sin6_port,
1816 &ina6_local,
1817 itpl->itpl_local_sin6.sin6_port,
1818 0, NULL);
1819 } else {
1820 return EINVAL;
1821 }
1822 if (inp == NULL || (so = inp->inp_socket) == NULL) {
1823 return ENOENT;
1824 }
1825
1826 socket_lock(so, 0);
1827 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1828 socket_unlock(so, 0);
1829 return ENOENT;
1830 }
1831 tp = intotcpcb(inp);
1832
1833 tcp_fill_info(tp, ti);
1834 socket_unlock(so, 0);
1835
1836 return 0;
1837 }
1838
1839 static void
1840 tcp_connection_fill_info(struct tcpcb *tp, struct tcp_connection_info *tci)
1841 {
1842 struct inpcb *inp = tp->t_inpcb;
1843
1844 bzero(tci, sizeof(*tci));
1845 tci->tcpi_state = tp->t_state;
1846 if (tp->t_state > TCPS_LISTEN) {
1847 if (TSTMP_SUPPORTED(tp)) {
1848 tci->tcpi_options |= TCPCI_OPT_TIMESTAMPS;
1849 }
1850 if (SACK_ENABLED(tp)) {
1851 tci->tcpi_options |= TCPCI_OPT_SACK;
1852 }
1853 if (TCP_WINDOW_SCALE_ENABLED(tp)) {
1854 tci->tcpi_options |= TCPCI_OPT_WSCALE;
1855 tci->tcpi_snd_wscale = tp->snd_scale;
1856 tci->tcpi_rcv_wscale = tp->rcv_scale;
1857 }
1858 if (TCP_ECN_ENABLED(tp)) {
1859 tci->tcpi_options |= TCPCI_OPT_ECN;
1860 }
1861 if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) {
1862 tci->tcpi_flags |= TCPCI_FLAG_LOSSRECOVERY;
1863 }
1864 if (tp->t_flagsext & TF_PKTS_REORDERED) {
1865 tci->tcpi_flags |= TCPCI_FLAG_REORDERING_DETECTED;
1866 }
1867 tci->tcpi_rto = (tp->t_timer[TCPT_REXMT] > 0) ?
1868 tp->t_rxtcur : 0;
1869 tci->tcpi_maxseg = tp->t_maxseg;
1870 tci->tcpi_snd_ssthresh = tp->snd_ssthresh;
1871 tci->tcpi_snd_cwnd = tp->snd_cwnd;
1872 tci->tcpi_snd_wnd = tp->snd_wnd;
1873 tci->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
1874 tci->tcpi_rcv_wnd = tp->rcv_wnd;
1875 tci->tcpi_rttcur = tp->t_rttcur;
1876 tci->tcpi_srtt = (tp->t_srtt >> TCP_RTT_SHIFT);
1877 tci->tcpi_rttvar = (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1878 tci->tcpi_txpackets = inp->inp_stat->txpackets;
1879 tci->tcpi_txbytes = inp->inp_stat->txbytes;
1880 tci->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
1881 tci->tcpi_txretransmitpackets = tp->t_stat.rxmitpkts;
1882 tci->tcpi_rxpackets = inp->inp_stat->rxpackets;
1883 tci->tcpi_rxbytes = inp->inp_stat->rxbytes;
1884 tci->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
1885
1886 tci->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
1887 tci->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
1888 tci->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
1889 tci->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
1890 tci->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
1891 tci->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
1892 tci->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
1893 tci->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
1894 tci->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
1895 tci->tcpi_tfo_cookie_wrong = !!(tp->t_tfo_stats & TFO_S_COOKIE_WRONG);
1896 tci->tcpi_tfo_no_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_NO_COOKIE_RCV);
1897 tci->tcpi_tfo_heuristics_disable = !!(tp->t_tfo_stats & TFO_S_HEURISTICS_DISABLE);
1898 tci->tcpi_tfo_send_blackhole = !!(tp->t_tfo_stats & TFO_S_SEND_BLACKHOLE);
1899 tci->tcpi_tfo_recv_blackhole = !!(tp->t_tfo_stats & TFO_S_RECV_BLACKHOLE);
1900 tci->tcpi_tfo_onebyte_proxy = !!(tp->t_tfo_stats & TFO_S_ONE_BYTE_PROXY);
1901 }
1902 }
1903
1904
1905 __private_extern__ int
1906 tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
1907 {
1908 int error;
1909 struct tcp_info ti = {};
1910 struct info_tuple itpl;
1911
1912 if (req->newptr == USER_ADDR_NULL) {
1913 return EINVAL;
1914 }
1915 if (req->newlen < sizeof(struct info_tuple)) {
1916 return EINVAL;
1917 }
1918 error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple));
1919 if (error != 0) {
1920 return error;
1921 }
1922 error = tcp_fill_info_for_info_tuple(&itpl, &ti);
1923 if (error != 0) {
1924 return error;
1925 }
1926 error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info));
1927 if (error != 0) {
1928 return error;
1929 }
1930
1931 return 0;
1932 }
1933
1934 static int
1935 tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid)
1936 {
1937 int error = EHOSTUNREACH;
1938 *out_pid = -1;
1939 if ((so->so_state & SS_ISCONNECTED) == 0) {
1940 return ENOTCONN;
1941 }
1942
1943 struct inpcb *inp = (struct inpcb*)so->so_pcb;
1944 uint16_t lport = inp->inp_lport;
1945 uint16_t fport = inp->inp_fport;
1946 struct inpcb *finp = NULL;
1947 struct in6_addr laddr6, faddr6;
1948 struct in_addr laddr4, faddr4;
1949
1950 if (inp->inp_vflag & INP_IPV6) {
1951 laddr6 = inp->in6p_laddr;
1952 faddr6 = inp->in6p_faddr;
1953 } else if (inp->inp_vflag & INP_IPV4) {
1954 laddr4 = inp->inp_laddr;
1955 faddr4 = inp->inp_faddr;
1956 }
1957
1958 socket_unlock(so, 0);
1959 if (inp->inp_vflag & INP_IPV6) {
1960 finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL);
1961 } else if (inp->inp_vflag & INP_IPV4) {
1962 finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL);
1963 }
1964
1965 if (finp) {
1966 *out_pid = finp->inp_socket->last_pid;
1967 error = 0;
1968 in_pcb_checkstate(finp, WNT_RELEASE, 0);
1969 }
1970 socket_lock(so, 0);
1971
1972 return error;
1973 }
1974
1975 void
1976 tcp_getconninfo(struct socket *so, struct conninfo_tcp *tcp_ci)
1977 {
1978 (void) tcp_lookup_peer_pid_locked(so, &tcp_ci->tcpci_peer_pid);
1979 tcp_fill_info(sototcpcb(so), &tcp_ci->tcpci_tcp_info);
1980 }
1981
1982 void
1983 tcp_clear_keep_alive_offload(struct socket *so)
1984 {
1985 struct inpcb *inp;
1986 struct ifnet *ifp;
1987
1988 inp = sotoinpcb(so);
1989 if (inp == NULL) {
1990 return;
1991 }
1992
1993 if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) == 0) {
1994 return;
1995 }
1996
1997 ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
1998 inp->inp_last_outifp;
1999 if (ifp == NULL) {
2000 panic("%s: so %p inp %p ifp NULL",
2001 __func__, so, inp);
2002 }
2003
2004 ifnet_lock_exclusive(ifp);
2005
2006 if (ifp->if_tcp_kao_cnt == 0) {
2007 panic("%s: so %p inp %p ifp %p if_tcp_kao_cnt == 0",
2008 __func__, so, inp, ifp);
2009 }
2010 ifp->if_tcp_kao_cnt--;
2011 inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD;
2012
2013 ifnet_lock_done(ifp);
2014 }
2015
2016 static int
2017 tcp_set_keep_alive_offload(struct socket *so, struct proc *proc)
2018 {
2019 int error = 0;
2020 struct inpcb *inp;
2021 struct ifnet *ifp;
2022
2023 inp = sotoinpcb(so);
2024 if (inp == NULL) {
2025 return ECONNRESET;
2026 }
2027 if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) != 0) {
2028 return 0;
2029 }
2030
2031 ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
2032 inp->inp_last_outifp;
2033 if (ifp == NULL) {
2034 error = ENXIO;
2035 os_log_info(OS_LOG_DEFAULT,
2036 "%s: error %d for proc %s[%u] out ifp is not set\n",
2037 __func__, error,
2038 proc != NULL ? proc->p_comm : "kernel",
2039 proc != NULL ? proc->p_pid : 0);
2040 return ENXIO;
2041 }
2042
2043 error = if_get_tcp_kao_max(ifp);
2044 if (error != 0) {
2045 return error;
2046 }
2047
2048 ifnet_lock_exclusive(ifp);
2049 if (ifp->if_tcp_kao_cnt < ifp->if_tcp_kao_max) {
2050 ifp->if_tcp_kao_cnt++;
2051 inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD;
2052 } else {
2053 error = ETOOMANYREFS;
2054 os_log_info(OS_LOG_DEFAULT,
2055 "%s: error %d for proc %s[%u] if_tcp_kao_max %u\n",
2056 __func__, error,
2057 proc != NULL ? proc->p_comm : "kernel",
2058 proc != NULL ? proc->p_pid : 0,
2059 ifp->if_tcp_kao_max);
2060 }
2061 ifnet_lock_done(ifp);
2062
2063 return error;
2064 }
2065
2066 /*
2067 * The new sockopt interface makes it possible for us to block in the
2068 * copyin/out step (if we take a page fault). Taking a page fault at
2069 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
2070 * use TSM, there probably isn't any need for this function to run at
2071 * splnet() any more. This needs more examination.)
2072 */
2073 int
2074 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
2075 {
2076 int error = 0, opt = 0, optval = 0;
2077 struct inpcb *inp;
2078 struct tcpcb *tp;
2079
2080 inp = sotoinpcb(so);
2081 if (inp == NULL) {
2082 return ECONNRESET;
2083 }
2084 /* Allow <SOL_SOCKET,SO_FLUSH/SO_TRAFFIC_MGT_BACKGROUND> at this level */
2085 if (sopt->sopt_level != IPPROTO_TCP &&
2086 !(sopt->sopt_level == SOL_SOCKET && (sopt->sopt_name == SO_FLUSH ||
2087 sopt->sopt_name == SO_TRAFFIC_MGT_BACKGROUND))) {
2088 #if INET6
2089 if (SOCK_CHECK_DOM(so, PF_INET6)) {
2090 error = ip6_ctloutput(so, sopt);
2091 } else
2092 #endif /* INET6 */
2093 error = ip_ctloutput(so, sopt);
2094 return error;
2095 }
2096 tp = intotcpcb(inp);
2097 if (tp == NULL) {
2098 return ECONNRESET;
2099 }
2100
2101 calculate_tcp_clock();
2102
2103 switch (sopt->sopt_dir) {
2104 case SOPT_SET:
2105 switch (sopt->sopt_name) {
2106 case TCP_NODELAY:
2107 case TCP_NOOPT:
2108 case TCP_NOPUSH:
2109 error = sooptcopyin(sopt, &optval, sizeof optval,
2110 sizeof optval);
2111 if (error) {
2112 break;
2113 }
2114
2115 switch (sopt->sopt_name) {
2116 case TCP_NODELAY:
2117 opt = TF_NODELAY;
2118 break;
2119 case TCP_NOOPT:
2120 opt = TF_NOOPT;
2121 break;
2122 case TCP_NOPUSH:
2123 opt = TF_NOPUSH;
2124 break;
2125 default:
2126 opt = 0; /* dead code to fool gcc */
2127 break;
2128 }
2129
2130 if (optval) {
2131 tp->t_flags |= opt;
2132 } else {
2133 tp->t_flags &= ~opt;
2134 }
2135 break;
2136 case TCP_RXT_FINDROP:
2137 case TCP_NOTIMEWAIT:
2138 error = sooptcopyin(sopt, &optval, sizeof optval,
2139 sizeof optval);
2140 if (error) {
2141 break;
2142 }
2143 switch (sopt->sopt_name) {
2144 case TCP_RXT_FINDROP:
2145 opt = TF_RXTFINDROP;
2146 break;
2147 case TCP_NOTIMEWAIT:
2148 opt = TF_NOTIMEWAIT;
2149 break;
2150 default:
2151 opt = 0;
2152 break;
2153 }
2154 if (optval) {
2155 tp->t_flagsext |= opt;
2156 } else {
2157 tp->t_flagsext &= ~opt;
2158 }
2159 break;
2160 case TCP_MEASURE_SND_BW:
2161 error = sooptcopyin(sopt, &optval, sizeof optval,
2162 sizeof optval);
2163 if (error) {
2164 break;
2165 }
2166 opt = TF_MEASURESNDBW;
2167 if (optval) {
2168 if (tp->t_bwmeas == NULL) {
2169 tp->t_bwmeas = tcp_bwmeas_alloc(tp);
2170 if (tp->t_bwmeas == NULL) {
2171 error = ENOMEM;
2172 break;
2173 }
2174 }
2175 tp->t_flagsext |= opt;
2176 } else {
2177 tp->t_flagsext &= ~opt;
2178 /* Reset snd bw measurement state */
2179 tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS);
2180 if (tp->t_bwmeas != NULL) {
2181 tcp_bwmeas_free(tp);
2182 }
2183 }
2184 break;
2185 case TCP_MEASURE_BW_BURST: {
2186 struct tcp_measure_bw_burst in;
2187 uint32_t minpkts, maxpkts;
2188 bzero(&in, sizeof(in));
2189
2190 error = sooptcopyin(sopt, &in, sizeof(in),
2191 sizeof(in));
2192 if (error) {
2193 break;
2194 }
2195 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
2196 tp->t_bwmeas == NULL) {
2197 error = EINVAL;
2198 break;
2199 }
2200 minpkts = (in.min_burst_size != 0) ? in.min_burst_size :
2201 tp->t_bwmeas->bw_minsizepkts;
2202 maxpkts = (in.max_burst_size != 0) ? in.max_burst_size :
2203 tp->t_bwmeas->bw_maxsizepkts;
2204 if (minpkts > maxpkts) {
2205 error = EINVAL;
2206 break;
2207 }
2208 tp->t_bwmeas->bw_minsizepkts = minpkts;
2209 tp->t_bwmeas->bw_maxsizepkts = maxpkts;
2210 tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg);
2211 tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg);
2212 break;
2213 }
2214 case TCP_MAXSEG:
2215 error = sooptcopyin(sopt, &optval, sizeof optval,
2216 sizeof optval);
2217 if (error) {
2218 break;
2219 }
2220
2221 if (optval > 0 && optval <= tp->t_maxseg &&
2222 optval + 40 >= tcp_minmss) {
2223 tp->t_maxseg = optval;
2224 } else {
2225 error = EINVAL;
2226 }
2227 break;
2228
2229 case TCP_KEEPALIVE:
2230 error = sooptcopyin(sopt, &optval, sizeof optval,
2231 sizeof optval);
2232 if (error) {
2233 break;
2234 }
2235 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
2236 error = EINVAL;
2237 } else {
2238 tp->t_keepidle = optval * TCP_RETRANSHZ;
2239 /* reset the timer to new value */
2240 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
2241 TCP_CONN_KEEPIDLE(tp));
2242 tcp_check_timer_state(tp);
2243 }
2244 break;
2245
2246 case TCP_CONNECTIONTIMEOUT:
2247 error = sooptcopyin(sopt, &optval, sizeof optval,
2248 sizeof optval);
2249 if (error) {
2250 break;
2251 }
2252 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
2253 error = EINVAL;
2254 } else {
2255 tp->t_keepinit = optval * TCP_RETRANSHZ;
2256 if (tp->t_state == TCPS_SYN_RECEIVED ||
2257 tp->t_state == TCPS_SYN_SENT) {
2258 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
2259 TCP_CONN_KEEPINIT(tp));
2260 tcp_check_timer_state(tp);
2261 }
2262 }
2263 break;
2264
2265 case TCP_KEEPINTVL:
2266 error = sooptcopyin(sopt, &optval, sizeof(optval),
2267 sizeof(optval));
2268 if (error) {
2269 break;
2270 }
2271 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
2272 error = EINVAL;
2273 } else {
2274 tp->t_keepintvl = optval * TCP_RETRANSHZ;
2275 if (tp->t_state == TCPS_FIN_WAIT_2 &&
2276 TCP_CONN_MAXIDLE(tp) > 0) {
2277 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
2278 TCP_CONN_MAXIDLE(tp));
2279 tcp_check_timer_state(tp);
2280 }
2281 }
2282 break;
2283
2284 case TCP_KEEPCNT:
2285 error = sooptcopyin(sopt, &optval, sizeof(optval),
2286 sizeof(optval));
2287 if (error) {
2288 break;
2289 }
2290 if (optval < 0 || optval > INT32_MAX) {
2291 error = EINVAL;
2292 } else {
2293 tp->t_keepcnt = optval;
2294 if (tp->t_state == TCPS_FIN_WAIT_2 &&
2295 TCP_CONN_MAXIDLE(tp) > 0) {
2296 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
2297 TCP_CONN_MAXIDLE(tp));
2298 tcp_check_timer_state(tp);
2299 }
2300 }
2301 break;
2302
2303 case TCP_KEEPALIVE_OFFLOAD:
2304 if ((error = priv_check_cred(kauth_cred_get(),
2305 PRIV_NETINET_TCP_KA_OFFLOAD, 0)) != 0) {
2306 break;
2307 }
2308 error = sooptcopyin(sopt, &optval, sizeof(optval),
2309 sizeof(optval));
2310 if (error) {
2311 break;
2312 }
2313 if (optval < 0 || optval > INT32_MAX) {
2314 error = EINVAL;
2315 break;
2316 }
2317 if (optval != 0) {
2318 error = tcp_set_keep_alive_offload(so,
2319 sopt->sopt_p);
2320 } else {
2321 tcp_clear_keep_alive_offload(so);
2322 }
2323 break;
2324
2325 case PERSIST_TIMEOUT:
2326 error = sooptcopyin(sopt, &optval, sizeof optval,
2327 sizeof optval);
2328 if (error) {
2329 break;
2330 }
2331 if (optval < 0) {
2332 error = EINVAL;
2333 } else {
2334 tp->t_persist_timeout = optval * TCP_RETRANSHZ;
2335 }
2336 break;
2337 case TCP_RXT_CONNDROPTIME:
2338 error = sooptcopyin(sopt, &optval, sizeof(optval),
2339 sizeof(optval));
2340 if (error) {
2341 break;
2342 }
2343 if (optval < 0) {
2344 error = EINVAL;
2345 } else {
2346 tp->t_rxt_conndroptime = optval * TCP_RETRANSHZ;
2347 }
2348 break;
2349 case TCP_NOTSENT_LOWAT:
2350 error = sooptcopyin(sopt, &optval, sizeof(optval),
2351 sizeof(optval));
2352 if (error) {
2353 break;
2354 }
2355 if (optval < 0) {
2356 error = EINVAL;
2357 break;
2358 } else {
2359 if (optval == 0) {
2360 so->so_flags &= ~(SOF_NOTSENT_LOWAT);
2361 tp->t_notsent_lowat = 0;
2362 } else {
2363 so->so_flags |= SOF_NOTSENT_LOWAT;
2364 tp->t_notsent_lowat = optval;
2365 }
2366 }
2367 break;
2368 case TCP_ADAPTIVE_READ_TIMEOUT:
2369 error = sooptcopyin(sopt, &optval, sizeof(optval),
2370 sizeof(optval));
2371 if (error) {
2372 break;
2373 }
2374 if (optval < 0 ||
2375 optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
2376 error = EINVAL;
2377 break;
2378 } else if (optval == 0) {
2379 tp->t_adaptive_rtimo = 0;
2380 tcp_keepalive_reset(tp);
2381
2382 if (tp->t_mpsub) {
2383 mptcp_reset_keepalive(tp);
2384 }
2385 } else {
2386 tp->t_adaptive_rtimo = optval;
2387 }
2388 break;
2389 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2390 error = sooptcopyin(sopt, &optval, sizeof(optval),
2391 sizeof(optval));
2392 if (error) {
2393 break;
2394 }
2395 if (optval < 0 ||
2396 optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
2397 error = EINVAL;
2398 break;
2399 } else {
2400 tp->t_adaptive_wtimo = optval;
2401 }
2402 break;
2403 case TCP_ENABLE_MSGS:
2404 error = sooptcopyin(sopt, &optval, sizeof(optval),
2405 sizeof(optval));
2406 if (error) {
2407 break;
2408 }
2409 if (optval < 0 || optval > 1) {
2410 error = EINVAL;
2411 } else if (optval == 1) {
2412 /*
2413 * Check if messages option is already
2414 * enabled, if so return.
2415 */
2416 if (so->so_flags & SOF_ENABLE_MSGS) {
2417 VERIFY(so->so_msg_state != NULL);
2418 break;
2419 }
2420
2421 /*
2422 * allocate memory for storing message
2423 * related state
2424 */
2425 VERIFY(so->so_msg_state == NULL);
2426 MALLOC(so->so_msg_state,
2427 struct msg_state *,
2428 sizeof(struct msg_state),
2429 M_TEMP, M_WAITOK | M_ZERO);
2430 if (so->so_msg_state == NULL) {
2431 error = ENOMEM;
2432 break;
2433 }
2434
2435 /* Enable message delivery */
2436 so->so_flags |= SOF_ENABLE_MSGS;
2437 } else {
2438 /*
2439 * Can't disable message delivery on socket
2440 * because of restrictions imposed by
2441 * encoding/decoding
2442 */
2443 error = EINVAL;
2444 }
2445 break;
2446 case TCP_SENDMOREACKS:
2447 error = sooptcopyin(sopt, &optval, sizeof(optval),
2448 sizeof(optval));
2449 if (error) {
2450 break;
2451 }
2452 if (optval < 0 || optval > 1) {
2453 error = EINVAL;
2454 } else if (optval == 0) {
2455 tp->t_flagsext &= ~(TF_NOSTRETCHACK);
2456 } else {
2457 tp->t_flagsext |= TF_NOSTRETCHACK;
2458 }
2459 break;
2460 case TCP_DISABLE_BLACKHOLE_DETECTION:
2461 error = sooptcopyin(sopt, &optval, sizeof(optval),
2462 sizeof(optval));
2463 if (error) {
2464 break;
2465 }
2466 if (optval < 0 || optval > 1) {
2467 error = EINVAL;
2468 } else if (optval == 0) {
2469 tp->t_flagsext &= ~TF_NOBLACKHOLE_DETECTION;
2470 } else {
2471 tp->t_flagsext |= TF_NOBLACKHOLE_DETECTION;
2472 if ((tp->t_flags & TF_BLACKHOLE) &&
2473 tp->t_pmtud_saved_maxopd > 0) {
2474 tcp_pmtud_revert_segment_size(tp);
2475 }
2476 }
2477 break;
2478 case TCP_FASTOPEN:
2479 if (!(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
2480 error = ENOTSUP;
2481 break;
2482 }
2483
2484 error = sooptcopyin(sopt, &optval, sizeof(optval),
2485 sizeof(optval));
2486 if (error) {
2487 break;
2488 }
2489 if (optval < 0 || optval > 1) {
2490 error = EINVAL;
2491 break;
2492 }
2493 if (tp->t_state != TCPS_LISTEN) {
2494 error = EINVAL;
2495 break;
2496 }
2497 if (optval) {
2498 tp->t_flagsext |= TF_FASTOPEN;
2499 } else {
2500 tcp_disable_tfo(tp);
2501 }
2502 break;
2503 case TCP_FASTOPEN_FORCE_HEURISTICS:
2504
2505 break;
2506 case TCP_FASTOPEN_FORCE_ENABLE:
2507 error = sooptcopyin(sopt, &optval, sizeof(optval),
2508 sizeof(optval));
2509
2510 if (error) {
2511 break;
2512 }
2513 if (optval < 0 || optval > 1) {
2514 error = EINVAL;
2515 break;
2516 }
2517
2518 if (tp->t_state != TCPS_CLOSED) {
2519 error = EINVAL;
2520 break;
2521 }
2522 if (optval) {
2523 tp->t_flagsext |= TF_FASTOPEN_FORCE_ENABLE;
2524 } else {
2525 tp->t_flagsext &= ~TF_FASTOPEN_FORCE_ENABLE;
2526 }
2527
2528 break;
2529 case TCP_ENABLE_ECN:
2530 error = sooptcopyin(sopt, &optval, sizeof optval,
2531 sizeof optval);
2532 if (error) {
2533 break;
2534 }
2535 if (optval) {
2536 tp->ecn_flags |= TE_ECN_MODE_ENABLE;
2537 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2538 } else {
2539 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2540 tp->ecn_flags |= TE_ECN_MODE_DISABLE;
2541 }
2542 break;
2543 case TCP_ECN_MODE:
2544 error = sooptcopyin(sopt, &optval, sizeof optval,
2545 sizeof optval);
2546 if (error) {
2547 break;
2548 }
2549 if (optval == ECN_MODE_DEFAULT) {
2550 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2551 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2552 } else if (optval == ECN_MODE_ENABLE) {
2553 tp->ecn_flags |= TE_ECN_MODE_ENABLE;
2554 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2555 } else if (optval == ECN_MODE_DISABLE) {
2556 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2557 tp->ecn_flags |= TE_ECN_MODE_DISABLE;
2558 } else {
2559 error = EINVAL;
2560 }
2561 break;
2562 case TCP_NOTIFY_ACKNOWLEDGEMENT:
2563 error = sooptcopyin(sopt, &optval,
2564 sizeof(optval), sizeof(optval));
2565 if (error) {
2566 break;
2567 }
2568 if (optval <= 0) {
2569 error = EINVAL;
2570 break;
2571 }
2572 if (tp->t_notify_ack_count >= TCP_MAX_NOTIFY_ACK) {
2573 error = ETOOMANYREFS;
2574 break;
2575 }
2576
2577 /*
2578 * validate that the given marker id is not
2579 * a duplicate to avoid ambiguity
2580 */
2581 if ((error = tcp_notify_ack_id_valid(tp, so,
2582 optval)) != 0) {
2583 break;
2584 }
2585 error = tcp_add_notify_ack_marker(tp, optval);
2586 break;
2587 case SO_FLUSH:
2588 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
2589 sizeof(optval))) != 0) {
2590 break;
2591 }
2592
2593 error = inp_flush(inp, optval);
2594 break;
2595
2596 case SO_TRAFFIC_MGT_BACKGROUND:
2597 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
2598 sizeof(optval))) != 0) {
2599 break;
2600 }
2601
2602 if (optval) {
2603 socket_set_traffic_mgt_flags_locked(so,
2604 TRAFFIC_MGT_SO_BACKGROUND);
2605 } else {
2606 socket_clear_traffic_mgt_flags_locked(so,
2607 TRAFFIC_MGT_SO_BACKGROUND);
2608 }
2609 break;
2610 case TCP_RXT_MINIMUM_TIMEOUT:
2611 error = sooptcopyin(sopt, &optval, sizeof(optval),
2612 sizeof(optval));
2613 if (error) {
2614 break;
2615 }
2616 if (optval < 0) {
2617 error = EINVAL;
2618 break;
2619 }
2620 if (optval == 0) {
2621 tp->t_rxt_minimum_timeout = 0;
2622 } else {
2623 tp->t_rxt_minimum_timeout = min(optval,
2624 TCP_RXT_MINIMUM_TIMEOUT_LIMIT);
2625 /* convert to milliseconds */
2626 tp->t_rxt_minimum_timeout *= TCP_RETRANSHZ;
2627 }
2628 break;
2629 default:
2630 error = ENOPROTOOPT;
2631 break;
2632 }
2633 break;
2634
2635 case SOPT_GET:
2636 switch (sopt->sopt_name) {
2637 case TCP_NODELAY:
2638 optval = tp->t_flags & TF_NODELAY;
2639 break;
2640 case TCP_MAXSEG:
2641 optval = tp->t_maxseg;
2642 break;
2643 case TCP_KEEPALIVE:
2644 if (tp->t_keepidle > 0) {
2645 optval = tp->t_keepidle / TCP_RETRANSHZ;
2646 } else {
2647 optval = tcp_keepidle / TCP_RETRANSHZ;
2648 }
2649 break;
2650 case TCP_KEEPINTVL:
2651 if (tp->t_keepintvl > 0) {
2652 optval = tp->t_keepintvl / TCP_RETRANSHZ;
2653 } else {
2654 optval = tcp_keepintvl / TCP_RETRANSHZ;
2655 }
2656 break;
2657 case TCP_KEEPCNT:
2658 if (tp->t_keepcnt > 0) {
2659 optval = tp->t_keepcnt;
2660 } else {
2661 optval = tcp_keepcnt;
2662 }
2663 break;
2664 case TCP_KEEPALIVE_OFFLOAD:
2665 optval = !!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD);
2666 break;
2667 case TCP_NOOPT:
2668 optval = tp->t_flags & TF_NOOPT;
2669 break;
2670 case TCP_NOPUSH:
2671 optval = tp->t_flags & TF_NOPUSH;
2672 break;
2673 case TCP_ENABLE_ECN:
2674 optval = (tp->ecn_flags & TE_ECN_MODE_ENABLE) ? 1 : 0;
2675 break;
2676 case TCP_ECN_MODE:
2677 if (tp->ecn_flags & TE_ECN_MODE_ENABLE) {
2678 optval = ECN_MODE_ENABLE;
2679 } else if (tp->ecn_flags & TE_ECN_MODE_DISABLE) {
2680 optval = ECN_MODE_DISABLE;
2681 } else {
2682 optval = ECN_MODE_DEFAULT;
2683 }
2684 break;
2685 case TCP_CONNECTIONTIMEOUT:
2686 optval = tp->t_keepinit / TCP_RETRANSHZ;
2687 break;
2688 case PERSIST_TIMEOUT:
2689 optval = tp->t_persist_timeout / TCP_RETRANSHZ;
2690 break;
2691 case TCP_RXT_CONNDROPTIME:
2692 optval = tp->t_rxt_conndroptime / TCP_RETRANSHZ;
2693 break;
2694 case TCP_RXT_FINDROP:
2695 optval = tp->t_flagsext & TF_RXTFINDROP;
2696 break;
2697 case TCP_NOTIMEWAIT:
2698 optval = (tp->t_flagsext & TF_NOTIMEWAIT) ? 1 : 0;
2699 break;
2700 case TCP_FASTOPEN:
2701 if (tp->t_state != TCPS_LISTEN ||
2702 !(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
2703 error = ENOTSUP;
2704 break;
2705 }
2706 optval = tfo_enabled(tp);
2707 break;
2708 case TCP_FASTOPEN_FORCE_HEURISTICS:
2709 optval = 0;
2710 break;
2711 case TCP_FASTOPEN_FORCE_ENABLE:
2712 optval = (tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) ? 1 : 0;
2713 break;
2714 case TCP_MEASURE_SND_BW:
2715 optval = tp->t_flagsext & TF_MEASURESNDBW;
2716 break;
2717 case TCP_INFO: {
2718 struct tcp_info ti;
2719
2720 tcp_fill_info(tp, &ti);
2721 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
2722 goto done;
2723 /* NOT REACHED */
2724 }
2725 case TCP_CONNECTION_INFO: {
2726 struct tcp_connection_info tci;
2727 tcp_connection_fill_info(tp, &tci);
2728 error = sooptcopyout(sopt, &tci,
2729 sizeof(struct tcp_connection_info));
2730 goto done;
2731 }
2732 case TCP_MEASURE_BW_BURST: {
2733 struct tcp_measure_bw_burst out = {};
2734 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
2735 tp->t_bwmeas == NULL) {
2736 error = EINVAL;
2737 break;
2738 }
2739 out.min_burst_size = tp->t_bwmeas->bw_minsizepkts;
2740 out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts;
2741 error = sooptcopyout(sopt, &out, sizeof(out));
2742 goto done;
2743 }
2744 case TCP_NOTSENT_LOWAT:
2745 if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
2746 optval = tp->t_notsent_lowat;
2747 } else {
2748 optval = 0;
2749 }
2750 break;
2751
2752 case TCP_ENABLE_MSGS:
2753 if (so->so_flags & SOF_ENABLE_MSGS) {
2754 optval = 1;
2755 } else {
2756 optval = 0;
2757 }
2758 break;
2759 case TCP_SENDMOREACKS:
2760 if (tp->t_flagsext & TF_NOSTRETCHACK) {
2761 optval = 1;
2762 } else {
2763 optval = 0;
2764 }
2765 break;
2766 case TCP_DISABLE_BLACKHOLE_DETECTION:
2767 if (tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) {
2768 optval = 1;
2769 } else {
2770 optval = 0;
2771 }
2772 break;
2773 case TCP_PEER_PID: {
2774 pid_t pid;
2775 error = tcp_lookup_peer_pid_locked(so, &pid);
2776 if (error == 0) {
2777 error = sooptcopyout(sopt, &pid, sizeof(pid));
2778 }
2779 goto done;
2780 }
2781 case TCP_ADAPTIVE_READ_TIMEOUT:
2782 optval = tp->t_adaptive_rtimo;
2783 break;
2784 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2785 optval = tp->t_adaptive_wtimo;
2786 break;
2787 case SO_TRAFFIC_MGT_BACKGROUND:
2788 optval = (so->so_flags1 &
2789 SOF1_TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0;
2790 break;
2791 case TCP_NOTIFY_ACKNOWLEDGEMENT: {
2792 struct tcp_notify_ack_complete retid;
2793
2794 if (sopt->sopt_valsize != sizeof(retid)) {
2795 error = EINVAL;
2796 break;
2797 }
2798 bzero(&retid, sizeof(retid));
2799 tcp_get_notify_ack_count(tp, &retid);
2800 if (retid.notify_complete_count > 0) {
2801 tcp_get_notify_ack_ids(tp, &retid);
2802 }
2803
2804 error = sooptcopyout(sopt, &retid, sizeof(retid));
2805 goto done;
2806 }
2807 case TCP_RXT_MINIMUM_TIMEOUT:
2808 optval = tp->t_rxt_minimum_timeout / TCP_RETRANSHZ;
2809 break;
2810 default:
2811 error = ENOPROTOOPT;
2812 break;
2813 }
2814 if (error == 0) {
2815 error = sooptcopyout(sopt, &optval, sizeof optval);
2816 }
2817 break;
2818 }
2819 done:
2820 return error;
2821 }
2822
2823 /*
2824 * tcp_sendspace and tcp_recvspace are the default send and receive window
2825 * sizes, respectively. These are obsolescent (this information should
2826 * be set by the route).
2827 */
2828 u_int32_t tcp_sendspace = 1448 * 256;
2829 u_int32_t tcp_recvspace = 1448 * 384;
2830
2831 /* During attach, the size of socket buffer allocated is limited to
2832 * sb_max in sbreserve. Disallow setting the tcp send and recv space
2833 * to be more than sb_max because that will cause tcp_attach to fail
2834 * (see radar 5713060)
2835 */
2836 static int
2837 sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1,
2838 int arg2, struct sysctl_req *req)
2839 {
2840 #pragma unused(arg2)
2841 u_int32_t new_value = 0, *space_p = NULL;
2842 int changed = 0, error = 0;
2843 u_quad_t sb_effective_max = (sb_max / (MSIZE + MCLBYTES)) * MCLBYTES;
2844
2845 switch (oidp->oid_number) {
2846 case TCPCTL_SENDSPACE:
2847 space_p = &tcp_sendspace;
2848 break;
2849 case TCPCTL_RECVSPACE:
2850 space_p = &tcp_recvspace;
2851 break;
2852 default:
2853 return EINVAL;
2854 }
2855 error = sysctl_io_number(req, *space_p, sizeof(u_int32_t),
2856 &new_value, &changed);
2857 if (changed) {
2858 if (new_value > 0 && new_value <= sb_effective_max) {
2859 *space_p = new_value;
2860 SYSCTL_SKMEM_UPDATE_AT_OFFSET(arg2, new_value);
2861 } else {
2862 error = ERANGE;
2863 }
2864 }
2865 return error;
2866 }
2867
2868 #if SYSCTL_SKMEM
2869 SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace,
2870 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_sendspace,
2871 offsetof(skmem_sysctl, tcp.sendspace), sysctl_tcp_sospace,
2872 "IU", "Maximum outgoing TCP datagram size");
2873 SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace,
2874 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_recvspace,
2875 offsetof(skmem_sysctl, tcp.recvspace), sysctl_tcp_sospace,
2876 "IU", "Maximum incoming TCP datagram size");
2877 #else /* SYSCTL_SKMEM */
2878 SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2879 &tcp_sendspace, 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size");
2880 SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2881 &tcp_recvspace, 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size");
2882 #endif /* SYSCTL_SKMEM */
2883
2884 /*
2885 * Attach TCP protocol to socket, allocating
2886 * internet protocol control block, tcp control block,
2887 * bufer space, and entering LISTEN state if to accept connections.
2888 *
2889 * Returns: 0 Success
2890 * in_pcballoc:ENOBUFS
2891 * in_pcballoc:ENOMEM
2892 * in_pcballoc:??? [IPSEC specific]
2893 * soreserve:ENOBUFS
2894 */
2895 static int
2896 tcp_attach(struct socket *so, struct proc *p)
2897 {
2898 struct tcpcb *tp;
2899 struct inpcb *inp;
2900 int error;
2901 #if INET6
2902 int isipv6 = SOCK_CHECK_DOM(so, PF_INET6) != 0;
2903 #endif
2904
2905 error = in_pcballoc(so, &tcbinfo, p);
2906 if (error) {
2907 return error;
2908 }
2909
2910 inp = sotoinpcb(so);
2911
2912 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
2913 error = soreserve(so, tcp_sendspace, tcp_recvspace);
2914 if (error) {
2915 return error;
2916 }
2917 }
2918
2919 if (so->so_snd.sb_preconn_hiwat == 0) {
2920 soreserve_preconnect(so, 2048);
2921 }
2922
2923 if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
2924 so->so_rcv.sb_flags |= SB_AUTOSIZE;
2925 }
2926 if ((so->so_snd.sb_flags & SB_USRSIZE) == 0) {
2927 so->so_snd.sb_flags |= SB_AUTOSIZE;
2928 }
2929
2930 #if INET6
2931 if (isipv6) {
2932 inp->inp_vflag |= INP_IPV6;
2933 inp->in6p_hops = -1; /* use kernel default */
2934 } else
2935 #endif /* INET6 */
2936 inp->inp_vflag |= INP_IPV4;
2937 tp = tcp_newtcpcb(inp);
2938 if (tp == NULL) {
2939 int nofd = so->so_state & SS_NOFDREF; /* XXX */
2940
2941 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
2942 #if INET6
2943 if (isipv6) {
2944 in6_pcbdetach(inp);
2945 } else
2946 #endif /* INET6 */
2947 in_pcbdetach(inp);
2948 so->so_state |= nofd;
2949 return ENOBUFS;
2950 }
2951 if (nstat_collect) {
2952 nstat_tcp_new_pcb(inp);
2953 }
2954 tp->t_state = TCPS_CLOSED;
2955 return 0;
2956 }
2957
2958 /*
2959 * Initiate (or continue) disconnect.
2960 * If embryonic state, just send reset (once).
2961 * If in ``let data drain'' option and linger null, just drop.
2962 * Otherwise (hard), mark socket disconnecting and drop
2963 * current input data; switch states based on user close, and
2964 * send segment to peer (with FIN).
2965 */
2966 static struct tcpcb *
2967 tcp_disconnect(struct tcpcb *tp)
2968 {
2969 struct socket *so = tp->t_inpcb->inp_socket;
2970
2971 if (so->so_rcv.sb_cc != 0 || tp->t_reassqlen != 0) {
2972 return tcp_drop(tp, 0);
2973 }
2974
2975 if (tp->t_state < TCPS_ESTABLISHED) {
2976 tp = tcp_close(tp);
2977 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
2978 tp = tcp_drop(tp, 0);
2979 } else {
2980 soisdisconnecting(so);
2981 sbflush(&so->so_rcv);
2982 tp = tcp_usrclosed(tp);
2983 #if MPTCP
2984 /* A reset has been sent but socket exists, do not send FIN */
2985 if ((so->so_flags & SOF_MP_SUBFLOW) &&
2986 (tp) && (tp->t_mpflags & TMPF_RESET)) {
2987 return tp;
2988 }
2989 #endif
2990 if (tp) {
2991 (void) tcp_output(tp);
2992 }
2993 }
2994 return tp;
2995 }
2996
2997 /*
2998 * User issued close, and wish to trail through shutdown states:
2999 * if never received SYN, just forget it. If got a SYN from peer,
3000 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
3001 * If already got a FIN from peer, then almost done; go to LAST_ACK
3002 * state. In all other cases, have already sent FIN to peer (e.g.
3003 * after PRU_SHUTDOWN), and just have to play tedious game waiting
3004 * for peer to send FIN or not respond to keep-alives, etc.
3005 * We can let the user exit from the close as soon as the FIN is acked.
3006 */
3007 static struct tcpcb *
3008 tcp_usrclosed(struct tcpcb *tp)
3009 {
3010 switch (tp->t_state) {
3011 case TCPS_CLOSED:
3012 case TCPS_LISTEN:
3013 case TCPS_SYN_SENT:
3014 tp = tcp_close(tp);
3015 break;
3016
3017 case TCPS_SYN_RECEIVED:
3018 tp->t_flags |= TF_NEEDFIN;
3019 break;
3020
3021 case TCPS_ESTABLISHED:
3022 DTRACE_TCP4(state__change, void, NULL,
3023 struct inpcb *, tp->t_inpcb,
3024 struct tcpcb *, tp,
3025 int32_t, TCPS_FIN_WAIT_1);
3026 tp->t_state = TCPS_FIN_WAIT_1;
3027 TCP_LOG_CONNECTION_SUMMARY(tp);
3028 break;
3029
3030 case TCPS_CLOSE_WAIT:
3031 DTRACE_TCP4(state__change, void, NULL,
3032 struct inpcb *, tp->t_inpcb,
3033 struct tcpcb *, tp,
3034 int32_t, TCPS_LAST_ACK);
3035 tp->t_state = TCPS_LAST_ACK;
3036 TCP_LOG_CONNECTION_SUMMARY(tp);
3037 break;
3038 }
3039 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
3040 soisdisconnected(tp->t_inpcb->inp_socket);
3041 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
3042 if (tp->t_state == TCPS_FIN_WAIT_2) {
3043 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
3044 TCP_CONN_MAXIDLE(tp));
3045 }
3046 }
3047 return tp;
3048 }
3049
3050 void
3051 tcp_in_cksum_stats(u_int32_t len)
3052 {
3053 tcpstat.tcps_rcv_swcsum++;
3054 tcpstat.tcps_rcv_swcsum_bytes += len;
3055 }
3056
3057 void
3058 tcp_out_cksum_stats(u_int32_t len)
3059 {
3060 tcpstat.tcps_snd_swcsum++;
3061 tcpstat.tcps_snd_swcsum_bytes += len;
3062 }
3063
3064 #if INET6
3065 void
3066 tcp_in6_cksum_stats(u_int32_t len)
3067 {
3068 tcpstat.tcps_rcv6_swcsum++;
3069 tcpstat.tcps_rcv6_swcsum_bytes += len;
3070 }
3071
3072 void
3073 tcp_out6_cksum_stats(u_int32_t len)
3074 {
3075 tcpstat.tcps_snd6_swcsum++;
3076 tcpstat.tcps_snd6_swcsum_bytes += len;
3077 }
3078 #endif /* INET6 */
3079
3080 /*
3081 * When messages are enabled on a TCP socket, the message priority
3082 * is sent as a control message. This function will extract it.
3083 */
3084 int
3085 tcp_get_msg_priority(struct mbuf *control, uint32_t *msgpri)
3086 {
3087 struct cmsghdr *cm;
3088
3089 if (control == NULL) {
3090 return EINVAL;
3091 }
3092
3093 for (cm = M_FIRST_CMSGHDR(control);
3094 is_cmsg_valid(control, cm);
3095 cm = M_NXT_CMSGHDR(control, cm)) {
3096 if (cm->cmsg_level == SOL_SOCKET &&
3097 cm->cmsg_type == SCM_MSG_PRIORITY) {
3098 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
3099 return EINVAL;
3100 }
3101 *msgpri = *(uint32_t *)(void *)CMSG_DATA(cm);
3102 if (*msgpri < MSG_PRI_MIN || *msgpri > MSG_PRI_MAX) {
3103 return EINVAL;
3104 }
3105 break;
3106 }
3107 }
3108 return 0;
3109 }
3110
3111 int
3112 tcp_get_mpkl_send_info(struct mbuf *control,
3113 struct so_mpkl_send_info *mpkl_send_info)
3114 {
3115 struct cmsghdr *cm;
3116
3117 if (control == NULL || mpkl_send_info == NULL) {
3118 return EINVAL;
3119 }
3120
3121 for (cm = M_FIRST_CMSGHDR(control); cm;
3122 cm = M_NXT_CMSGHDR(control, cm)) {
3123 if (cm->cmsg_len < sizeof(struct cmsghdr) ||
3124 cm->cmsg_len > control->m_len) {
3125 return EINVAL;
3126 }
3127 if (cm->cmsg_level != SOL_SOCKET ||
3128 cm->cmsg_type != SCM_MPKL_SEND_INFO) {
3129 continue;
3130 }
3131 if (cm->cmsg_len != CMSG_LEN(sizeof(struct so_mpkl_send_info))) {
3132 return EINVAL;
3133 }
3134 memcpy(mpkl_send_info, CMSG_DATA(cm),
3135 sizeof(struct so_mpkl_send_info));
3136 return 0;
3137 }
3138 return ENOMSG;
3139 }