]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/netinet/tcp_usrreq.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_usrreq.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $
62 */
63
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/kernel.h>
68#include <sys/sysctl.h>
69#include <sys/mbuf.h>
70#include <sys/domain.h>
71#if XNU_TARGET_OS_OSX
72#include <sys/kasl.h>
73#endif /* XNU_TARGET_OS_OSX */
74#include <sys/priv.h>
75#include <sys/socket.h>
76#include <sys/socketvar.h>
77#include <sys/protosw.h>
78#include <sys/syslog.h>
79
80#include <net/if.h>
81#include <net/route.h>
82#include <net/ntstat.h>
83#include <net/content_filter.h>
84#include <net/multi_layer_pkt_log.h>
85
86#include <netinet/in.h>
87#include <netinet/in_systm.h>
88#include <netinet/ip6.h>
89#include <netinet/in_pcb.h>
90#include <netinet6/in6_pcb.h>
91#include <netinet/in_var.h>
92#include <netinet/ip_var.h>
93#include <netinet6/ip6_var.h>
94#include <netinet/tcp.h>
95#include <netinet/tcp_fsm.h>
96#include <netinet/tcp_seq.h>
97#include <netinet/tcp_timer.h>
98#include <netinet/tcp_var.h>
99#include <netinet/tcpip.h>
100#include <netinet/tcp_cc.h>
101#include <netinet/tcp_log.h>
102#include <mach/sdt.h>
103#if TCPDEBUG
104#include <netinet/tcp_debug.h>
105#endif
106#if MPTCP
107#include <netinet/mptcp_var.h>
108#endif /* MPTCP */
109
110#if IPSEC
111#include <netinet6/ipsec.h>
112#endif /*IPSEC*/
113
114#if FLOW_DIVERT
115#include <netinet/flow_divert.h>
116#endif /* FLOW_DIVERT */
117
118errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *);
119
120int tcp_sysctl_info(struct sysctl_oid *, void *, int, struct sysctl_req *);
121static void tcp_connection_fill_info(struct tcpcb *tp,
122 struct tcp_connection_info *tci);
123static int tcp_get_mpkl_send_info(struct mbuf *, struct so_mpkl_send_info *);
124
125/*
126 * TCP protocol interface to socket abstraction.
127 */
128static int tcp_attach(struct socket *, struct proc *);
129static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *);
130static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *);
131static int tcp6_usr_connect(struct socket *, struct sockaddr *,
132 struct proc *);
133static struct tcpcb *tcp_disconnect(struct tcpcb *);
134static struct tcpcb *tcp_usrclosed(struct tcpcb *);
135extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
136
137#if TCPDEBUG
138#define TCPDEBUG0 int ostate = 0
139#define TCPDEBUG1() ostate = tp ? tp->t_state : 0
140#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
141 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
142#else
143#define TCPDEBUG0
144#define TCPDEBUG1()
145#define TCPDEBUG2(req)
146#endif
147
148SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info,
149 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
150 0, 0, tcp_sysctl_info, "S", "TCP info per tuple");
151
152/*
153 * TCP attaches to socket via pru_attach(), reserving space,
154 * and an internet control block.
155 *
156 * Returns: 0 Success
157 * EISCONN
158 * tcp_attach:ENOBUFS
159 * tcp_attach:ENOMEM
160 * tcp_attach:??? [IPSEC specific]
161 */
162static int
163tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p)
164{
165 int error;
166 struct inpcb *inp = sotoinpcb(so);
167 struct tcpcb *tp = 0;
168 TCPDEBUG0;
169
170 TCPDEBUG1();
171 if (inp) {
172 error = EISCONN;
173 goto out;
174 }
175
176 error = tcp_attach(so, p);
177 if (error) {
178 goto out;
179 }
180
181 if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
182 so->so_linger = (short)(TCP_LINGERTIME * hz);
183 }
184 tp = sototcpcb(so);
185out:
186 TCPDEBUG2(PRU_ATTACH);
187 return error;
188}
189
190/*
191 * pru_detach() detaches the TCP protocol from the socket.
192 * If the protocol state is non-embryonic, then can't
193 * do this directly: have to initiate a pru_disconnect(),
194 * which may finish later; embryonic TCB's can just
195 * be discarded here.
196 */
197static int
198tcp_usr_detach(struct socket *so)
199{
200 int error = 0;
201 struct inpcb *inp = sotoinpcb(so);
202 struct tcpcb *tp;
203 TCPDEBUG0;
204
205 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
206 return EINVAL; /* XXX */
207 }
208 socket_lock_assert_owned(so);
209 tp = intotcpcb(inp);
210 /* In case we got disconnected from the peer */
211 if (tp == NULL) {
212 goto out;
213 }
214 TCPDEBUG1();
215
216 calculate_tcp_clock();
217
218 tp = tcp_disconnect(tp);
219out:
220 TCPDEBUG2(PRU_DETACH);
221 return error;
222}
223
224#if NECP
225#define COMMON_START_ALLOW_FLOW_DIVERT(allow) TCPDEBUG0; \
226do { \
227 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
228 return (EINVAL); \
229 if (!(allow) && necp_socket_should_use_flow_divert(inp)) \
230 return (EPROTOTYPE); \
231 tp = intotcpcb(inp); \
232 TCPDEBUG1(); \
233 calculate_tcp_clock(); \
234} while (0)
235#else /* NECP */
236#define COMMON_START_ALLOW_FLOW_DIVERT(allow) TCPDEBUG0; \
237do { \
238 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
239 return (EINVAL); \
240 tp = intotcpcb(inp); \
241 TCPDEBUG1(); \
242 calculate_tcp_clock(); \
243} while (0)
244#endif /* !NECP */
245
246#define COMMON_START() COMMON_START_ALLOW_FLOW_DIVERT(false)
247#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out
248
249
250/*
251 * Give the socket an address.
252 *
253 * Returns: 0 Success
254 * EINVAL Invalid argument [COMMON_START]
255 * EAFNOSUPPORT Address family not supported
256 * in_pcbbind:EADDRNOTAVAIL Address not available.
257 * in_pcbbind:EINVAL Invalid argument
258 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
259 * in_pcbbind:EACCES Permission denied
260 * in_pcbbind:EADDRINUSE Address in use
261 * in_pcbbind:EAGAIN Resource unavailable, try again
262 * in_pcbbind:EPERM Operation not permitted
263 */
264static int
265tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
266{
267 int error = 0;
268 struct inpcb *inp = sotoinpcb(so);
269 struct tcpcb *tp;
270 struct sockaddr_in *sinp;
271
272 COMMON_START_ALLOW_FLOW_DIVERT(true);
273
274 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
275 error = EAFNOSUPPORT;
276 goto out;
277 }
278
279 /*
280 * Must check for multicast addresses and disallow binding
281 * to them.
282 */
283 sinp = (struct sockaddr_in *)(void *)nam;
284 if (sinp->sin_family == AF_INET &&
285 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
286 error = EAFNOSUPPORT;
287 goto out;
288 }
289 error = in_pcbbind(inp, nam, p);
290 if (error) {
291 goto out;
292 }
293
294#if NECP
295 /* Update NECP client with bind result if not in middle of connect */
296 if ((inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS) &&
297 !uuid_is_null(inp->necp_client_uuid)) {
298 socket_unlock(so, 0);
299 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
300 socket_lock(so, 0);
301 }
302#endif /* NECP */
303
304 COMMON_END(PRU_BIND);
305}
306
307static int
308tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
309{
310 int error = 0;
311 struct inpcb *inp = sotoinpcb(so);
312 struct tcpcb *tp;
313 struct sockaddr_in6 *sin6p;
314
315 COMMON_START_ALLOW_FLOW_DIVERT(true);
316
317 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
318 error = EAFNOSUPPORT;
319 goto out;
320 }
321
322 /*
323 * Must check for multicast addresses and disallow binding
324 * to them.
325 */
326 sin6p = (struct sockaddr_in6 *)(void *)nam;
327 if (sin6p->sin6_family == AF_INET6 &&
328 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
329 error = EAFNOSUPPORT;
330 goto out;
331 }
332 inp->inp_vflag &= ~INP_IPV4;
333 inp->inp_vflag |= INP_IPV6;
334 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
335 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) {
336 inp->inp_vflag |= INP_IPV4;
337 } else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
338 struct sockaddr_in sin;
339
340 in6_sin6_2_sin(&sin, sin6p);
341 inp->inp_vflag |= INP_IPV4;
342 inp->inp_vflag &= ~INP_IPV6;
343 error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
344 goto out;
345 }
346 }
347 error = in6_pcbbind(inp, nam, p);
348 if (error) {
349 goto out;
350 }
351 COMMON_END(PRU_BIND);
352}
353
354/*
355 * Prepare to accept connections.
356 *
357 * Returns: 0 Success
358 * EINVAL [COMMON_START]
359 * in_pcbbind:EADDRNOTAVAIL Address not available.
360 * in_pcbbind:EINVAL Invalid argument
361 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
362 * in_pcbbind:EACCES Permission denied
363 * in_pcbbind:EADDRINUSE Address in use
364 * in_pcbbind:EAGAIN Resource unavailable, try again
365 * in_pcbbind:EPERM Operation not permitted
366 */
367static int
368tcp_usr_listen(struct socket *so, struct proc *p)
369{
370 int error = 0;
371 struct inpcb *inp = sotoinpcb(so);
372 struct tcpcb *tp;
373
374 COMMON_START_ALLOW_FLOW_DIVERT(true);
375 if (inp->inp_lport == 0) {
376 error = in_pcbbind(inp, NULL, p);
377 }
378 if (error == 0) {
379 tp->t_state = TCPS_LISTEN;
380 }
381 TCP_LOG_LISTEN(tp, error);
382 COMMON_END(PRU_LISTEN);
383}
384
385static int
386tcp6_usr_listen(struct socket *so, struct proc *p)
387{
388 int error = 0;
389 struct inpcb *inp = sotoinpcb(so);
390 struct tcpcb *tp;
391
392 COMMON_START_ALLOW_FLOW_DIVERT(true);
393 if (inp->inp_lport == 0) {
394 inp->inp_vflag &= ~INP_IPV4;
395 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
396 inp->inp_vflag |= INP_IPV4;
397 }
398 error = in6_pcbbind(inp, NULL, p);
399 }
400 if (error == 0) {
401 tp->t_state = TCPS_LISTEN;
402 }
403 TCP_LOG_LISTEN(tp, error);
404 COMMON_END(PRU_LISTEN);
405}
406
407static int
408tcp_connect_complete(struct socket *so)
409{
410 struct tcpcb *tp = sototcpcb(so);
411 struct inpcb *inp = sotoinpcb(so);
412 int error = 0;
413
414 /* TFO delays the tcp_output until later, when the app calls write() */
415 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
416 if (!necp_socket_is_allowed_to_send_recv(sotoinpcb(so), NULL, 0, NULL, NULL, NULL, NULL)) {
417 TCP_LOG_DROP_NECP(NULL, NULL, tp, true);
418 return EHOSTUNREACH;
419 }
420
421 /* Initialize enough state so that we can actually send data */
422 tcp_mss(tp, -1, IFSCOPE_NONE);
423 tp->snd_wnd = tp->t_maxseg;
424 tp->max_sndwnd = tp->snd_wnd;
425 } else {
426 error = tcp_output(tp);
427 }
428
429#if NECP
430 /* Update NECP client with connected five-tuple */
431 if (error == 0 && !uuid_is_null(inp->necp_client_uuid)) {
432 socket_unlock(so, 0);
433 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
434 socket_lock(so, 0);
435 }
436#endif /* NECP */
437
438 return error;
439}
440
441/*
442 * Initiate connection to peer.
443 * Create a template for use in transmissions on this connection.
444 * Enter SYN_SENT state, and mark socket as connecting.
445 * Start keep-alive timer, and seed output sequence space.
446 * Send initial segment on connection.
447 */
448static int
449tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
450{
451 int error = 0;
452 struct inpcb *inp = sotoinpcb(so);
453 struct tcpcb *tp;
454 struct sockaddr_in *sinp;
455
456 TCPDEBUG0;
457 if (inp == NULL) {
458 return EINVAL;
459 } else if (inp->inp_state == INPCB_STATE_DEAD) {
460 if (so->so_error) {
461 error = so->so_error;
462 so->so_error = 0;
463 return error;
464 } else {
465 return EINVAL;
466 }
467 }
468#if NECP
469#if CONTENT_FILTER
470 error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
471 if (error != 0) {
472 return error;
473 }
474#endif /* CONTENT_FILTER */
475#if FLOW_DIVERT
476 if (necp_socket_should_use_flow_divert(inp)) {
477 error = flow_divert_pcb_init(so);
478 if (error == 0) {
479 error = flow_divert_connect_out(so, nam, p);
480 }
481 return error;
482 }
483#endif /* FLOW_DIVERT */
484#endif /* NECP */
485 tp = intotcpcb(inp);
486 TCPDEBUG1();
487
488 calculate_tcp_clock();
489
490 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
491 error = EAFNOSUPPORT;
492 goto out;
493 }
494 /*
495 * Must disallow TCP ``connections'' to multicast addresses.
496 */
497 sinp = (struct sockaddr_in *)(void *)nam;
498 if (sinp->sin_family == AF_INET
499 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
500 error = EAFNOSUPPORT;
501 goto out;
502 }
503
504 if ((error = tcp_connect(tp, nam, p)) != 0) {
505 TCP_LOG_CONNECT(tp, true, error);
506 goto out;
507 }
508
509 error = tcp_connect_complete(so);
510
511 TCP_LOG_CONNECT(tp, true, error);
512
513 COMMON_END(PRU_CONNECT);
514}
515
516static int
517tcp_usr_connectx_common(struct socket *so, int af,
518 struct sockaddr *src, struct sockaddr *dst,
519 struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid,
520 uint32_t flags, void *arg, uint32_t arglen, struct uio *auio,
521 user_ssize_t *bytes_written)
522{
523#pragma unused(aid, flags, arg, arglen)
524 struct inpcb *inp = sotoinpcb(so);
525 int error = 0;
526 user_ssize_t datalen = 0;
527
528 if (inp == NULL) {
529 return EINVAL;
530 }
531
532 VERIFY(dst != NULL);
533
534 ASSERT(!(inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS));
535 inp->inp_flags2 |= INP2_CONNECT_IN_PROGRESS;
536
537#if NECP
538 inp_update_necp_policy(inp, src, dst, ifscope);
539#endif /* NECP */
540
541 if ((so->so_flags1 & SOF1_DATA_IDEMPOTENT) &&
542 (tcp_fastopen & TCP_FASTOPEN_CLIENT)) {
543 sototcpcb(so)->t_flagsext |= TF_FASTOPEN;
544 }
545
546 /* bind socket to the specified interface, if requested */
547 if (ifscope != IFSCOPE_NONE &&
548 (error = inp_bindif(inp, ifscope, NULL)) != 0) {
549 goto done;
550 }
551
552 /* if source address and/or port is specified, bind to it */
553 if (src != NULL) {
554 error = sobindlock(so, src, 0); /* already locked */
555 if (error != 0) {
556 goto done;
557 }
558 }
559
560 switch (af) {
561 case AF_INET:
562 error = tcp_usr_connect(so, dst, p);
563 break;
564 case AF_INET6:
565 error = tcp6_usr_connect(so, dst, p);
566 break;
567 default:
568 VERIFY(0);
569 /* NOTREACHED */
570 }
571
572 if (error != 0) {
573 goto done;
574 }
575
576 /* if there is data, copy it */
577 if (auio != NULL) {
578 socket_unlock(so, 0);
579
580 VERIFY(bytes_written != NULL);
581
582 datalen = uio_resid(auio);
583 error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL,
584 (uio_t)auio, NULL, NULL, 0);
585 socket_lock(so, 0);
586
587 if (error == 0 || error == EWOULDBLOCK) {
588 *bytes_written = datalen - uio_resid(auio);
589 }
590
591 /*
592 * sosend returns EWOULDBLOCK if it's a non-blocking
593 * socket or a timeout occured (this allows to return
594 * the amount of queued data through sendit()).
595 *
596 * However, connectx() returns EINPROGRESS in case of a
597 * blocking socket. So we change the return value here.
598 */
599 if (error == EWOULDBLOCK) {
600 error = EINPROGRESS;
601 }
602 }
603
604 if (error == 0 && pcid != NULL) {
605 *pcid = 1; /* there is only one connection in regular TCP */
606 }
607done:
608 if (error && error != EINPROGRESS) {
609 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
610 }
611
612 inp->inp_flags2 &= ~INP2_CONNECT_IN_PROGRESS;
613 return error;
614}
615
616static int
617tcp_usr_connectx(struct socket *so, struct sockaddr *src,
618 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
619 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
620 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
621{
622 return tcp_usr_connectx_common(so, AF_INET, src, dst, p, ifscope, aid,
623 pcid, flags, arg, arglen, uio, bytes_written);
624}
625
626static int
627tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
628{
629 int error = 0;
630 struct inpcb *inp = sotoinpcb(so);
631 struct tcpcb *tp;
632 struct sockaddr_in6 *sin6p;
633
634 TCPDEBUG0;
635 if (inp == NULL) {
636 return EINVAL;
637 } else if (inp->inp_state == INPCB_STATE_DEAD) {
638 if (so->so_error) {
639 error = so->so_error;
640 so->so_error = 0;
641 return error;
642 } else {
643 return EINVAL;
644 }
645 }
646#if NECP
647#if CONTENT_FILTER
648 error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
649 if (error != 0) {
650 return error;
651 }
652#endif /* CONTENT_FILTER */
653#if FLOW_DIVERT
654 if (necp_socket_should_use_flow_divert(inp)) {
655 error = flow_divert_pcb_init(so);
656 if (error == 0) {
657 error = flow_divert_connect_out(so, nam, p);
658 }
659 return error;
660 }
661#endif /* FLOW_DIVERT */
662#endif /* NECP */
663
664 tp = intotcpcb(inp);
665 TCPDEBUG1();
666
667 calculate_tcp_clock();
668
669 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
670 error = EAFNOSUPPORT;
671 goto out;
672 }
673
674 /*
675 * Must disallow TCP ``connections'' to multicast addresses.
676 */
677 sin6p = (struct sockaddr_in6 *)(void *)nam;
678 if (sin6p->sin6_family == AF_INET6
679 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
680 error = EAFNOSUPPORT;
681 goto out;
682 }
683
684 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
685 struct sockaddr_in sin;
686
687 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
688 error = EINVAL;
689 goto out;
690 }
691
692 in6_sin6_2_sin(&sin, sin6p);
693 /*
694 * Must disallow TCP ``connections'' to multicast addresses.
695 */
696 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
697 error = EAFNOSUPPORT;
698 goto out;
699 }
700 inp->inp_vflag |= INP_IPV4;
701 inp->inp_vflag &= ~INP_IPV6;
702 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) {
703 TCP_LOG_CONNECT(tp, true, error);
704 goto out;
705 }
706
707 error = tcp_connect_complete(so);
708 goto out;
709 }
710 inp->inp_vflag &= ~INP_IPV4;
711 inp->inp_vflag |= INP_IPV6;
712 if ((error = tcp6_connect(tp, nam, p)) != 0) {
713 TCP_LOG_CONNECT(tp, true, error);
714 goto out;
715 }
716
717 error = tcp_connect_complete(so);
718
719 TCP_LOG_CONNECT(tp, true, error);
720
721 COMMON_END(PRU_CONNECT);
722}
723
724static int
725tcp6_usr_connectx(struct socket *so, struct sockaddr*src,
726 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
727 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
728 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
729{
730 return tcp_usr_connectx_common(so, AF_INET6, src, dst, p, ifscope, aid,
731 pcid, flags, arg, arglen, uio, bytes_written);
732}
733
734/*
735 * Initiate disconnect from peer.
736 * If connection never passed embryonic stage, just drop;
737 * else if don't need to let data drain, then can just drop anyways,
738 * else have to begin TCP shutdown process: mark socket disconnecting,
739 * drain unread data, state switch to reflect user close, and
740 * send segment (e.g. FIN) to peer. Socket will be really disconnected
741 * when peer sends FIN and acks ours.
742 *
743 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
744 */
745static int
746tcp_usr_disconnect(struct socket *so)
747{
748 int error = 0;
749 struct inpcb *inp = sotoinpcb(so);
750 struct tcpcb *tp;
751
752 socket_lock_assert_owned(so);
753 COMMON_START();
754 /* In case we got disconnected from the peer */
755 if (tp == NULL) {
756 goto out;
757 }
758 tp = tcp_disconnect(tp);
759 COMMON_END(PRU_DISCONNECT);
760}
761
762/*
763 * User-protocol pru_disconnectx callback.
764 */
765static int
766tcp_usr_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
767{
768#pragma unused(cid)
769 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
770 return EINVAL;
771 }
772
773 return tcp_usr_disconnect(so);
774}
775
776/*
777 * Accept a connection. Essentially all the work is
778 * done at higher levels; just return the address
779 * of the peer, storing through addr.
780 */
781static int
782tcp_usr_accept(struct socket *so, struct sockaddr **nam)
783{
784 int error = 0;
785 struct inpcb *inp = sotoinpcb(so);
786 struct tcpcb *tp = NULL;
787 TCPDEBUG0;
788
789 in_getpeeraddr(so, nam);
790
791 if (so->so_state & SS_ISDISCONNECTED) {
792 error = ECONNABORTED;
793 goto out;
794 }
795 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
796 return EINVAL;
797 }
798#if NECP
799 else if (necp_socket_should_use_flow_divert(inp)) {
800 return EPROTOTYPE;
801 }
802
803#endif /* NECP */
804
805 tp = intotcpcb(inp);
806 TCPDEBUG1();
807
808 TCP_LOG_ACCEPT(tp, 0);
809
810 calculate_tcp_clock();
811
812 COMMON_END(PRU_ACCEPT);
813}
814
815static int
816tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
817{
818 int error = 0;
819 struct inpcb *inp = sotoinpcb(so);
820 struct tcpcb *tp = NULL;
821 TCPDEBUG0;
822
823 if (so->so_state & SS_ISDISCONNECTED) {
824 error = ECONNABORTED;
825 goto out;
826 }
827 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
828 return EINVAL;
829 }
830#if NECP
831 else if (necp_socket_should_use_flow_divert(inp)) {
832 return EPROTOTYPE;
833 }
834
835#endif /* NECP */
836
837 tp = intotcpcb(inp);
838 TCPDEBUG1();
839
840 TCP_LOG_ACCEPT(tp, 0);
841
842 calculate_tcp_clock();
843
844 in6_mapped_peeraddr(so, nam);
845 COMMON_END(PRU_ACCEPT);
846}
847
848/*
849 * Mark the connection as being incapable of further output.
850 *
851 * Returns: 0 Success
852 * EINVAL [COMMON_START]
853 * tcp_output:EADDRNOTAVAIL
854 * tcp_output:ENOBUFS
855 * tcp_output:EMSGSIZE
856 * tcp_output:EHOSTUNREACH
857 * tcp_output:ENETUNREACH
858 * tcp_output:ENETDOWN
859 * tcp_output:ENOMEM
860 * tcp_output:EACCES
861 * tcp_output:EMSGSIZE
862 * tcp_output:ENOBUFS
863 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
864 */
865static int
866tcp_usr_shutdown(struct socket *so)
867{
868 int error = 0;
869 struct inpcb *inp = sotoinpcb(so);
870 struct tcpcb *tp;
871
872 TCPDEBUG0;
873 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
874 return EINVAL;
875 }
876
877 socantsendmore(so);
878
879 /*
880 * In case we got disconnected from the peer, or if this is
881 * a socket that is to be flow-diverted (but not yet).
882 */
883 tp = intotcpcb(inp);
884 TCPDEBUG1();
885
886 if (tp == NULL
887#if NECP
888 || (necp_socket_should_use_flow_divert(inp))
889#endif /* NECP */
890 ) {
891 if (tp != NULL) {
892 error = EPROTOTYPE;
893 }
894 goto out;
895 }
896
897 calculate_tcp_clock();
898
899 tp = tcp_usrclosed(tp);
900#if MPTCP
901 /* A reset has been sent but socket exists, do not send FIN */
902 if ((so->so_flags & SOF_MP_SUBFLOW) &&
903 (tp) && (tp->t_mpflags & TMPF_RESET)) {
904 goto out;
905 }
906#endif
907#if CONTENT_FILTER
908 /* Don't send a FIN yet */
909 if (tp && !(so->so_state & SS_ISDISCONNECTED) &&
910 cfil_sock_data_pending(&so->so_snd)) {
911 goto out;
912 }
913#endif /* CONTENT_FILTER */
914 if (tp) {
915 error = tcp_output(tp);
916 }
917 COMMON_END(PRU_SHUTDOWN);
918}
919
920/*
921 * After a receive, possibly send window update to peer.
922 */
923static int
924tcp_usr_rcvd(struct socket *so, int flags)
925{
926 int error = 0;
927 struct inpcb *inp = sotoinpcb(so);
928 struct tcpcb *tp;
929
930 COMMON_START();
931 /* In case we got disconnected from the peer */
932 if (tp == NULL) {
933 goto out;
934 }
935 tcp_sbrcv_trim(tp, &so->so_rcv);
936
937 if (flags & MSG_WAITALL) {
938 tp->t_flags |= TF_ACKNOW;
939 }
940
941 /*
942 * This tcp_output is solely there to trigger window-updates.
943 * However, we really do not want these window-updates while we
944 * are still in SYN_SENT or SYN_RECEIVED.
945 */
946 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
947 tcp_output(tp);
948 }
949
950#if CONTENT_FILTER
951 cfil_sock_buf_update(&so->so_rcv);
952#endif /* CONTENT_FILTER */
953
954 COMMON_END(PRU_RCVD);
955}
956
957/*
958 * Do a send by putting data in output queue and updating urgent
959 * marker if URG set. Possibly send more data. Unlike the other
960 * pru_*() routines, the mbuf chains are our responsibility. We
961 * must either enqueue them or free them. The other pru_* routines
962 * generally are caller-frees.
963 *
964 * Returns: 0 Success
965 * ECONNRESET
966 * EINVAL
967 * ENOBUFS
968 * tcp_connect:EADDRINUSE Address in use
969 * tcp_connect:EADDRNOTAVAIL Address not available.
970 * tcp_connect:EINVAL Invalid argument
971 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef]
972 * tcp_connect:EACCES Permission denied
973 * tcp_connect:EAGAIN Resource unavailable, try again
974 * tcp_connect:EPERM Operation not permitted
975 * tcp_output:EADDRNOTAVAIL
976 * tcp_output:ENOBUFS
977 * tcp_output:EMSGSIZE
978 * tcp_output:EHOSTUNREACH
979 * tcp_output:ENETUNREACH
980 * tcp_output:ENETDOWN
981 * tcp_output:ENOMEM
982 * tcp_output:EACCES
983 * tcp_output:EMSGSIZE
984 * tcp_output:ENOBUFS
985 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
986 * tcp6_connect:??? [IPV6 only]
987 */
988static int
989tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
990 struct sockaddr *nam, struct mbuf *control, struct proc *p)
991{
992 int error = 0;
993 struct inpcb *inp = sotoinpcb(so);
994 struct tcpcb *tp;
995 uint32_t mpkl_len = 0; /* length of mbuf chain */
996 uint32_t mpkl_seq; /* sequence number where new data is added */
997 struct so_mpkl_send_info mpkl_send_info = {};
998
999 int isipv6;
1000 TCPDEBUG0;
1001
1002 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD
1003#if NECP
1004 || (necp_socket_should_use_flow_divert(inp))
1005#endif /* NECP */
1006 ) {
1007 /*
1008 * OOPS! we lost a race, the TCP session got reset after
1009 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
1010 * network interrupt in the non-splnet() section of sosend().
1011 */
1012 if (m != NULL) {
1013 m_freem(m);
1014 }
1015 if (control != NULL) {
1016 m_freem(control);
1017 control = NULL;
1018 }
1019
1020 if (inp == NULL) {
1021 error = ECONNRESET; /* XXX EPIPE? */
1022 } else {
1023 error = EPROTOTYPE;
1024 }
1025 tp = NULL;
1026 TCPDEBUG1();
1027 goto out;
1028 }
1029 isipv6 = nam && nam->sa_family == AF_INET6;
1030 tp = intotcpcb(inp);
1031 TCPDEBUG1();
1032
1033 calculate_tcp_clock();
1034
1035 if (net_mpklog_enabled) {
1036 mpkl_seq = tp->snd_una + so->so_snd.sb_cc;
1037 if (m) {
1038 mpkl_len = m_length(m);
1039 }
1040 if (so->so_flags1 & SOF1_MPKL_SEND_INFO) {
1041 uuid_copy(mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
1042 mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
1043 }
1044 }
1045
1046 if (control != NULL) {
1047 if (control->m_len > 0 && net_mpklog_enabled) {
1048 error = tcp_get_mpkl_send_info(control, &mpkl_send_info);
1049 /*
1050 * Intepretation of the returned code:
1051 * 0: client wants us to use value passed in SCM_MPKL_SEND_INFO
1052 * 1: SCM_MPKL_SEND_INFO was not present
1053 * other: failure
1054 */
1055 if (error != 0 && error != ENOMSG) {
1056 m_freem(control);
1057 if (m != NULL) {
1058 m_freem(m);
1059 }
1060 control = NULL;
1061 m = NULL;
1062 goto out;
1063 }
1064 }
1065 /*
1066 * Silently drop unsupported ancillary data messages
1067 */
1068 m_freem(control);
1069 control = NULL;
1070 }
1071
1072 /* MPTCP sublow socket buffers must not be compressed */
1073 VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) ||
1074 (so->so_snd.sb_flags & SB_NOCOMPRESS));
1075
1076 if (!(flags & PRUS_OOB) || (so->so_flags1 & SOF1_PRECONNECT_DATA)) {
1077 sbappendstream(&so->so_snd, m);
1078
1079 if (nam && tp->t_state < TCPS_SYN_SENT) {
1080 /*
1081 * Do implied connect if not yet connected,
1082 * initialize window to default value, and
1083 * initialize maxseg/maxopd using peer's cached
1084 * MSS.
1085 */
1086 if (isipv6) {
1087 error = tcp6_connect(tp, nam, p);
1088 } else {
1089 error = tcp_connect(tp, nam, p);
1090 }
1091 if (error) {
1092 TCP_LOG_CONNECT(tp, true, error);
1093 goto out;
1094 }
1095 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1096 tp->max_sndwnd = tp->snd_wnd;
1097 tcp_mss(tp, -1, IFSCOPE_NONE);
1098
1099 TCP_LOG_CONNECT(tp, true, error);
1100
1101 /* The sequence number of the data is past the SYN */
1102 mpkl_seq = tp->iss + 1;
1103 }
1104
1105 if (flags & PRUS_EOF) {
1106 /*
1107 * Close the send side of the connection after
1108 * the data is sent.
1109 */
1110 socantsendmore(so);
1111 tp = tcp_usrclosed(tp);
1112 }
1113 if (tp != NULL) {
1114 if (flags & PRUS_MORETOCOME) {
1115 tp->t_flags |= TF_MORETOCOME;
1116 }
1117 error = tcp_output(tp);
1118 if (flags & PRUS_MORETOCOME) {
1119 tp->t_flags &= ~TF_MORETOCOME;
1120 }
1121 }
1122 } else {
1123 if (sbspace(&so->so_snd) == 0) {
1124 /* if no space is left in sockbuf,
1125 * do not try to squeeze in OOB traffic */
1126 m_freem(m);
1127 error = ENOBUFS;
1128 goto out;
1129 }
1130 /*
1131 * According to RFC961 (Assigned Protocols),
1132 * the urgent pointer points to the last octet
1133 * of urgent data. We continue, however,
1134 * to consider it to indicate the first octet
1135 * of data past the urgent section.
1136 * Otherwise, snd_up should be one lower.
1137 */
1138 sbappendstream(&so->so_snd, m);
1139 if (nam && tp->t_state < TCPS_SYN_SENT) {
1140 /*
1141 * Do implied connect if not yet connected,
1142 * initialize window to default value, and
1143 * initialize maxseg/maxopd using peer's cached
1144 * MSS.
1145 */
1146 if (isipv6) {
1147 error = tcp6_connect(tp, nam, p);
1148 } else {
1149 error = tcp_connect(tp, nam, p);
1150 }
1151 if (error) {
1152 TCP_LOG_CONNECT(tp, true, error);
1153 goto out;
1154 }
1155 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1156 tp->max_sndwnd = tp->snd_wnd;
1157 tcp_mss(tp, -1, IFSCOPE_NONE);
1158
1159 TCP_LOG_CONNECT(tp, true, error);
1160 }
1161 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
1162 tp->t_flagsext |= TF_FORCE;
1163 error = tcp_output(tp);
1164 tp->t_flagsext &= ~TF_FORCE;
1165 }
1166
1167 if (net_mpklog_enabled && (inp = tp->t_inpcb) != NULL &&
1168 ((inp->inp_last_outifp != NULL &&
1169 (inp->inp_last_outifp->if_xflags & IFXF_MPK_LOG)) ||
1170 (inp->inp_boundifp != NULL &&
1171 (inp->inp_boundifp->if_xflags & IFXF_MPK_LOG)))) {
1172 MPKL_TCP_SEND(tcp_mpkl_log_object,
1173 mpkl_send_info.mpkl_proto, mpkl_send_info.mpkl_uuid,
1174 ntohs(inp->inp_lport), ntohs(inp->inp_fport),
1175 mpkl_seq, mpkl_len,
1176 so->last_pid, so->so_log_seqn++);
1177 }
1178
1179 /*
1180 * We wait for the socket to successfully connect before returning.
1181 * This allows us to signal a timeout to the application.
1182 */
1183 if (so->so_state & SS_ISCONNECTING) {
1184 if (so->so_state & SS_NBIO) {
1185 error = EWOULDBLOCK;
1186 } else {
1187 error = sbwait(&so->so_snd);
1188 }
1189 }
1190
1191 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
1192 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1193}
1194
1195/*
1196 * Abort the TCP.
1197 */
1198static int
1199tcp_usr_abort(struct socket *so)
1200{
1201 int error = 0;
1202 struct inpcb *inp = sotoinpcb(so);
1203 struct tcpcb *tp;
1204
1205 COMMON_START();
1206 /* In case we got disconnected from the peer */
1207 if (tp == NULL) {
1208 goto out;
1209 }
1210 tp = tcp_drop(tp, ECONNABORTED);
1211 VERIFY(so->so_usecount > 0);
1212 so->so_usecount--;
1213 COMMON_END(PRU_ABORT);
1214}
1215
1216/*
1217 * Receive out-of-band data.
1218 *
1219 * Returns: 0 Success
1220 * EINVAL [COMMON_START]
1221 * EINVAL
1222 * EWOULDBLOCK
1223 */
1224static int
1225tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1226{
1227 int error = 0;
1228 struct inpcb *inp = sotoinpcb(so);
1229 struct tcpcb *tp;
1230
1231 COMMON_START();
1232 if ((so->so_oobmark == 0 &&
1233 (so->so_state & SS_RCVATMARK) == 0) ||
1234 so->so_options & SO_OOBINLINE ||
1235 tp->t_oobflags & TCPOOB_HADDATA) {
1236 error = EINVAL;
1237 goto out;
1238 }
1239 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1240 error = EWOULDBLOCK;
1241 goto out;
1242 }
1243 m->m_len = 1;
1244 *mtod(m, caddr_t) = tp->t_iobc;
1245 so->so_state &= ~SS_RCVATMARK;
1246 if ((flags & MSG_PEEK) == 0) {
1247 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1248 }
1249 COMMON_END(PRU_RCVOOB);
1250}
1251
1252static int
1253tcp_usr_preconnect(struct socket *so)
1254{
1255 struct inpcb *inp = sotoinpcb(so);
1256 int error = 0;
1257
1258#if NECP
1259 if (necp_socket_should_use_flow_divert(inp)) {
1260 /* May happen, if in tcp_usr_connect we did not had a chance
1261 * to set the usrreqs (due to some error). So, let's get out
1262 * of here.
1263 */
1264 goto out;
1265 }
1266#endif /* NECP */
1267
1268 error = tcp_output(sototcpcb(so));
1269
1270 soclearfastopen(so);
1271
1272 COMMON_END(PRU_PRECONNECT);
1273}
1274
1275/* xxx - should be const */
1276struct pr_usrreqs tcp_usrreqs = {
1277 .pru_abort = tcp_usr_abort,
1278 .pru_accept = tcp_usr_accept,
1279 .pru_attach = tcp_usr_attach,
1280 .pru_bind = tcp_usr_bind,
1281 .pru_connect = tcp_usr_connect,
1282 .pru_connectx = tcp_usr_connectx,
1283 .pru_control = in_control,
1284 .pru_detach = tcp_usr_detach,
1285 .pru_disconnect = tcp_usr_disconnect,
1286 .pru_disconnectx = tcp_usr_disconnectx,
1287 .pru_listen = tcp_usr_listen,
1288 .pru_peeraddr = in_getpeeraddr,
1289 .pru_rcvd = tcp_usr_rcvd,
1290 .pru_rcvoob = tcp_usr_rcvoob,
1291 .pru_send = tcp_usr_send,
1292 .pru_shutdown = tcp_usr_shutdown,
1293 .pru_sockaddr = in_getsockaddr,
1294 .pru_sosend = sosend,
1295 .pru_soreceive = soreceive,
1296 .pru_preconnect = tcp_usr_preconnect,
1297};
1298
1299struct pr_usrreqs tcp6_usrreqs = {
1300 .pru_abort = tcp_usr_abort,
1301 .pru_accept = tcp6_usr_accept,
1302 .pru_attach = tcp_usr_attach,
1303 .pru_bind = tcp6_usr_bind,
1304 .pru_connect = tcp6_usr_connect,
1305 .pru_connectx = tcp6_usr_connectx,
1306 .pru_control = in6_control,
1307 .pru_detach = tcp_usr_detach,
1308 .pru_disconnect = tcp_usr_disconnect,
1309 .pru_disconnectx = tcp_usr_disconnectx,
1310 .pru_listen = tcp6_usr_listen,
1311 .pru_peeraddr = in6_mapped_peeraddr,
1312 .pru_rcvd = tcp_usr_rcvd,
1313 .pru_rcvoob = tcp_usr_rcvoob,
1314 .pru_send = tcp_usr_send,
1315 .pru_shutdown = tcp_usr_shutdown,
1316 .pru_sockaddr = in6_mapped_sockaddr,
1317 .pru_sosend = sosend,
1318 .pru_soreceive = soreceive,
1319 .pru_preconnect = tcp_usr_preconnect,
1320};
1321
1322/*
1323 * Common subroutine to open a TCP connection to remote host specified
1324 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
1325 * port number if needed. Call in_pcbladdr to do the routing and to choose
1326 * a local host address (interface). If there is an existing incarnation
1327 * of the same connection in TIME-WAIT state and if the remote host was
1328 * sending CC options and if the connection duration was < MSL, then
1329 * truncate the previous TIME-WAIT state and proceed.
1330 * Initialize connection parameters and enter SYN-SENT state.
1331 *
1332 * Returns: 0 Success
1333 * EADDRINUSE
1334 * EINVAL
1335 * in_pcbbind:EADDRNOTAVAIL Address not available.
1336 * in_pcbbind:EINVAL Invalid argument
1337 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
1338 * in_pcbbind:EACCES Permission denied
1339 * in_pcbbind:EADDRINUSE Address in use
1340 * in_pcbbind:EAGAIN Resource unavailable, try again
1341 * in_pcbbind:EPERM Operation not permitted
1342 * in_pcbladdr:EINVAL Invalid argument
1343 * in_pcbladdr:EAFNOSUPPORT Address family not supported
1344 * in_pcbladdr:EADDRNOTAVAIL Address not available
1345 */
1346static int
1347tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
1348{
1349 struct inpcb *inp = tp->t_inpcb, *oinp;
1350 struct socket *so = inp->inp_socket;
1351 struct tcpcb *otp;
1352 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
1353 struct in_addr laddr;
1354 int error = 0;
1355 struct ifnet *outif = NULL;
1356
1357 if (inp->inp_lport == 0) {
1358 error = in_pcbbind(inp, NULL, p);
1359 if (error) {
1360 goto done;
1361 }
1362 }
1363
1364 /*
1365 * Cannot simply call in_pcbconnect, because there might be an
1366 * earlier incarnation of this same connection still in
1367 * TIME_WAIT state, creating an ADDRINUSE error.
1368 */
1369 error = in_pcbladdr(inp, nam, &laddr, IFSCOPE_NONE, &outif, 0);
1370 if (error) {
1371 goto done;
1372 }
1373
1374 socket_unlock(inp->inp_socket, 0);
1375 oinp = in_pcblookup_hash(inp->inp_pcbinfo,
1376 sin->sin_addr, sin->sin_port,
1377 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : laddr,
1378 inp->inp_lport, 0, NULL);
1379
1380 socket_lock(inp->inp_socket, 0);
1381 if (oinp) {
1382 if (oinp != inp) { /* 4143933: avoid deadlock if inp == oinp */
1383 socket_lock(oinp->inp_socket, 1);
1384 }
1385 if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1386 if (oinp != inp) {
1387 socket_unlock(oinp->inp_socket, 1);
1388 }
1389 goto skip_oinp;
1390 }
1391
1392 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
1393 otp->t_state == TCPS_TIME_WAIT &&
1394 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
1395 (otp->t_flags & TF_RCVD_CC)) {
1396 otp = tcp_close(otp);
1397 } else {
1398 printf("tcp_connect: inp=0x%llx err=EADDRINUSE\n",
1399 (uint64_t)VM_KERNEL_ADDRPERM(inp));
1400 if (oinp != inp) {
1401 socket_unlock(oinp->inp_socket, 1);
1402 }
1403 error = EADDRINUSE;
1404 goto done;
1405 }
1406 if (oinp != inp) {
1407 socket_unlock(oinp->inp_socket, 1);
1408 }
1409 }
1410skip_oinp:
1411 if ((inp->inp_laddr.s_addr == INADDR_ANY ? laddr.s_addr :
1412 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr &&
1413 inp->inp_lport == sin->sin_port) {
1414 error = EINVAL;
1415 goto done;
1416 }
1417 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1418 /*lock inversion issue, mostly with udp multicast packets */
1419 socket_unlock(inp->inp_socket, 0);
1420 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1421 socket_lock(inp->inp_socket, 0);
1422 }
1423 if (inp->inp_laddr.s_addr == INADDR_ANY) {
1424 inp->inp_laddr = laddr;
1425 /* no reference needed */
1426 inp->inp_last_outifp = outif;
1427
1428 inp->inp_flags |= INP_INADDR_ANY;
1429 }
1430 inp->inp_faddr = sin->sin_addr;
1431 inp->inp_fport = sin->sin_port;
1432 in_pcbrehash(inp);
1433 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1434
1435 if (inp->inp_flowhash == 0) {
1436 inp->inp_flowhash = inp_calc_flowhash(inp);
1437 }
1438
1439 tcp_set_max_rwinscale(tp, so);
1440
1441 soisconnecting(so);
1442 tcpstat.tcps_connattempt++;
1443 tp->t_state = TCPS_SYN_SENT;
1444 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPINIT(tp));
1445 tp->iss = tcp_new_isn(tp);
1446 tcp_sendseqinit(tp);
1447 tp->t_connect_time = tcp_now;
1448 if (nstat_collect) {
1449 nstat_route_connect_attempt(inp->inp_route.ro_rt);
1450 }
1451
1452 tcp_add_fsw_flow(tp, outif);
1453
1454done:
1455 if (outif != NULL) {
1456 ifnet_release(outif);
1457 }
1458
1459 return error;
1460}
1461
1462static int
1463tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
1464{
1465 struct inpcb *inp = tp->t_inpcb, *oinp;
1466 struct socket *so = inp->inp_socket;
1467 struct tcpcb *otp;
1468 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
1469 struct in6_addr addr6;
1470 int error = 0;
1471 struct ifnet *outif = NULL;
1472
1473 if (inp->inp_lport == 0) {
1474 error = in6_pcbbind(inp, NULL, p);
1475 if (error) {
1476 goto done;
1477 }
1478 }
1479
1480 /*
1481 * Cannot simply call in_pcbconnect, because there might be an
1482 * earlier incarnation of this same connection still in
1483 * TIME_WAIT state, creating an ADDRINUSE error.
1484 *
1485 * in6_pcbladdr() might return an ifp with its reference held
1486 * even in the error case, so make sure that it's released
1487 * whenever it's non-NULL.
1488 */
1489 error = in6_pcbladdr(inp, nam, &addr6, &outif);
1490 if (error) {
1491 goto done;
1492 }
1493 socket_unlock(inp->inp_socket, 0);
1494 oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
1495 &sin6->sin6_addr, sin6->sin6_port,
1496 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
1497 ? &addr6
1498 : &inp->in6p_laddr,
1499 inp->inp_lport, 0, NULL);
1500 socket_lock(inp->inp_socket, 0);
1501 if (oinp) {
1502 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
1503 otp->t_state == TCPS_TIME_WAIT &&
1504 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
1505 (otp->t_flags & TF_RCVD_CC)) {
1506 otp = tcp_close(otp);
1507 } else {
1508 error = EADDRINUSE;
1509 goto done;
1510 }
1511 }
1512 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
1513 /*lock inversion issue, mostly with udp multicast packets */
1514 socket_unlock(inp->inp_socket, 0);
1515 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
1516 socket_lock(inp->inp_socket, 0);
1517 }
1518 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
1519 inp->in6p_laddr = addr6;
1520 inp->in6p_last_outifp = outif; /* no reference needed */
1521 inp->in6p_flags |= INP_IN6ADDR_ANY;
1522 }
1523 inp->in6p_faddr = sin6->sin6_addr;
1524 inp->inp_fport = sin6->sin6_port;
1525 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) {
1526 inp->inp_flow = sin6->sin6_flowinfo;
1527 }
1528 in_pcbrehash(inp);
1529 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1530
1531 if (inp->inp_flowhash == 0) {
1532 inp->inp_flowhash = inp_calc_flowhash(inp);
1533 }
1534 /* update flowinfo - RFC 6437 */
1535 if (inp->inp_flow == 0 && inp->in6p_flags & IN6P_AUTOFLOWLABEL) {
1536 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
1537 inp->inp_flow |=
1538 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
1539 }
1540
1541 tcp_set_max_rwinscale(tp, so);
1542
1543 soisconnecting(so);
1544 tcpstat.tcps_connattempt++;
1545 tp->t_state = TCPS_SYN_SENT;
1546 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
1547 TCP_CONN_KEEPINIT(tp));
1548 tp->iss = tcp_new_isn(tp);
1549 tcp_sendseqinit(tp);
1550 tp->t_connect_time = tcp_now;
1551 if (nstat_collect) {
1552 nstat_route_connect_attempt(inp->inp_route.ro_rt);
1553 }
1554
1555 tcp_add_fsw_flow(tp, outif);
1556
1557done:
1558 if (outif != NULL) {
1559 ifnet_release(outif);
1560 }
1561
1562 return error;
1563}
1564
1565/*
1566 * Export TCP internal state information via a struct tcp_info
1567 */
1568void
1569tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1570{
1571 struct inpcb *inp = tp->t_inpcb;
1572
1573 bzero(ti, sizeof(*ti));
1574
1575 ti->tcpi_state = (uint8_t)tp->t_state;
1576 ti->tcpi_flowhash = inp->inp_flowhash;
1577
1578 if (tp->t_state > TCPS_LISTEN) {
1579 if (TSTMP_SUPPORTED(tp)) {
1580 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1581 }
1582 if (SACK_ENABLED(tp)) {
1583 ti->tcpi_options |= TCPI_OPT_SACK;
1584 }
1585 if (TCP_WINDOW_SCALE_ENABLED(tp)) {
1586 ti->tcpi_options |= TCPI_OPT_WSCALE;
1587 ti->tcpi_snd_wscale = tp->snd_scale;
1588 ti->tcpi_rcv_wscale = tp->rcv_scale;
1589 }
1590 if (TCP_ECN_ENABLED(tp)) {
1591 ti->tcpi_options |= TCPI_OPT_ECN;
1592 }
1593
1594 /* Are we in retranmission episode */
1595 if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) {
1596 ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY;
1597 }
1598
1599 if (tp->t_flags & TF_STREAMING_ON) {
1600 ti->tcpi_flags |= TCPI_FLAG_STREAMING_ON;
1601 }
1602
1603 ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0;
1604 ti->tcpi_snd_mss = tp->t_maxseg;
1605 ti->tcpi_rcv_mss = tp->t_maxseg;
1606
1607 ti->tcpi_rttcur = tp->t_rttcur;
1608 ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT;
1609 ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
1610 ti->tcpi_rttbest = tp->t_rttbest >> TCP_RTT_SHIFT;
1611
1612 ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1613 ti->tcpi_snd_cwnd = tp->snd_cwnd;
1614 ti->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
1615
1616 ti->tcpi_rcv_space = tp->rcv_wnd;
1617
1618 ti->tcpi_snd_wnd = tp->snd_wnd;
1619 ti->tcpi_snd_nxt = tp->snd_nxt;
1620 ti->tcpi_rcv_nxt = tp->rcv_nxt;
1621
1622 /* convert bytes/msec to bits/sec */
1623 if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
1624 tp->t_bwmeas != NULL) {
1625 ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000);
1626 }
1627
1628 ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1629 tp->t_inpcb->inp_last_outifp->if_index;
1630
1631 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes);
1632 ti->tcpi_txpackets = inp->inp_stat->txpackets;
1633 ti->tcpi_txbytes = inp->inp_stat->txbytes;
1634 ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
1635 ti->tcpi_txretransmitpackets = tp->t_stat.rxmitpkts;
1636 ti->tcpi_txunacked = tp->snd_max - tp->snd_una;
1637
1638 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes);
1639 ti->tcpi_rxpackets = inp->inp_stat->rxpackets;
1640 ti->tcpi_rxbytes = inp->inp_stat->rxbytes;
1641 ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
1642 ti->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
1643
1644 if (tp->t_state > TCPS_LISTEN) {
1645 ti->tcpi_synrexmits = (uint8_t)tp->t_stat.rxmitsyns;
1646 }
1647 ti->tcpi_cell_rxpackets = inp->inp_cstat->rxpackets;
1648 ti->tcpi_cell_rxbytes = inp->inp_cstat->rxbytes;
1649 ti->tcpi_cell_txpackets = inp->inp_cstat->txpackets;
1650 ti->tcpi_cell_txbytes = inp->inp_cstat->txbytes;
1651
1652 ti->tcpi_wifi_rxpackets = inp->inp_wstat->rxpackets;
1653 ti->tcpi_wifi_rxbytes = inp->inp_wstat->rxbytes;
1654 ti->tcpi_wifi_txpackets = inp->inp_wstat->txpackets;
1655 ti->tcpi_wifi_txbytes = inp->inp_wstat->txbytes;
1656
1657 ti->tcpi_wired_rxpackets = inp->inp_Wstat->rxpackets;
1658 ti->tcpi_wired_rxbytes = inp->inp_Wstat->rxbytes;
1659 ti->tcpi_wired_txpackets = inp->inp_Wstat->txpackets;
1660 ti->tcpi_wired_txbytes = inp->inp_Wstat->txbytes;
1661 tcp_get_connectivity_status(tp, &ti->tcpi_connstatus);
1662
1663 ti->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
1664 ti->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
1665 ti->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
1666 ti->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
1667
1668 ti->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
1669 ti->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
1670 ti->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
1671 ti->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
1672 ti->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
1673 ti->tcpi_tfo_cookie_wrong = !!(tp->t_tfo_stats & TFO_S_COOKIE_WRONG);
1674 ti->tcpi_tfo_no_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_NO_COOKIE_RCV);
1675 ti->tcpi_tfo_heuristics_disable = !!(tp->t_tfo_stats & TFO_S_HEURISTICS_DISABLE);
1676 ti->tcpi_tfo_send_blackhole = !!(tp->t_tfo_stats & TFO_S_SEND_BLACKHOLE);
1677 ti->tcpi_tfo_recv_blackhole = !!(tp->t_tfo_stats & TFO_S_RECV_BLACKHOLE);
1678 ti->tcpi_tfo_onebyte_proxy = !!(tp->t_tfo_stats & TFO_S_ONE_BYTE_PROXY);
1679
1680 ti->tcpi_ecn_client_setup = !!(tp->ecn_flags & TE_SETUPSENT);
1681 ti->tcpi_ecn_server_setup = !!(tp->ecn_flags & TE_SETUPRECEIVED);
1682 ti->tcpi_ecn_success = (tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON ? 1 : 0;
1683 ti->tcpi_ecn_lost_syn = !!(tp->ecn_flags & TE_LOST_SYN);
1684 ti->tcpi_ecn_lost_synack = !!(tp->ecn_flags & TE_LOST_SYNACK);
1685
1686 ti->tcpi_local_peer = !!(tp->t_flags & TF_LOCAL);
1687
1688 if (tp->t_inpcb->inp_last_outifp != NULL) {
1689 if (IFNET_IS_CELLULAR(tp->t_inpcb->inp_last_outifp)) {
1690 ti->tcpi_if_cell = 1;
1691 }
1692 if (IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
1693 ti->tcpi_if_wifi = 1;
1694 }
1695 if (IFNET_IS_WIRED(tp->t_inpcb->inp_last_outifp)) {
1696 ti->tcpi_if_wired = 1;
1697 }
1698 if (IFNET_IS_WIFI_INFRA(tp->t_inpcb->inp_last_outifp)) {
1699 ti->tcpi_if_wifi_infra = 1;
1700 }
1701 if (tp->t_inpcb->inp_last_outifp->if_eflags & IFEF_AWDL) {
1702 ti->tcpi_if_wifi_awdl = 1;
1703 }
1704 }
1705 if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) {
1706 ti->tcpi_snd_background = 1;
1707 }
1708 if (tcp_recv_bg == 1 ||
1709 IS_TCP_RECV_BG(tp->t_inpcb->inp_socket)) {
1710 ti->tcpi_rcv_background = 1;
1711 }
1712
1713 ti->tcpi_ecn_recv_ce = tp->t_ecn_recv_ce;
1714 ti->tcpi_ecn_recv_cwr = tp->t_ecn_recv_cwr;
1715
1716 ti->tcpi_rcvoopack = tp->t_rcvoopack;
1717 ti->tcpi_pawsdrop = tp->t_pawsdrop;
1718 ti->tcpi_sack_recovery_episode = tp->t_sack_recovery_episode;
1719 ti->tcpi_reordered_pkts = tp->t_reordered_pkts;
1720 ti->tcpi_dsack_sent = tp->t_dsack_sent;
1721 ti->tcpi_dsack_recvd = tp->t_dsack_recvd;
1722 }
1723}
1724
1725__private_extern__ errno_t
1726tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti)
1727{
1728 struct inpcbinfo *pcbinfo = NULL;
1729 struct inpcb *inp = NULL;
1730 struct socket *so;
1731 struct tcpcb *tp;
1732
1733 if (itpl->itpl_proto == IPPROTO_TCP) {
1734 pcbinfo = &tcbinfo;
1735 } else {
1736 return EINVAL;
1737 }
1738
1739 if (itpl->itpl_local_sa.sa_family == AF_INET &&
1740 itpl->itpl_remote_sa.sa_family == AF_INET) {
1741 inp = in_pcblookup_hash(pcbinfo,
1742 itpl->itpl_remote_sin.sin_addr,
1743 itpl->itpl_remote_sin.sin_port,
1744 itpl->itpl_local_sin.sin_addr,
1745 itpl->itpl_local_sin.sin_port,
1746 0, NULL);
1747 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 &&
1748 itpl->itpl_remote_sa.sa_family == AF_INET6) {
1749 struct in6_addr ina6_local;
1750 struct in6_addr ina6_remote;
1751
1752 ina6_local = itpl->itpl_local_sin6.sin6_addr;
1753 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) &&
1754 itpl->itpl_local_sin6.sin6_scope_id) {
1755 ina6_local.s6_addr16[1] = htons((uint16_t)itpl->itpl_local_sin6.sin6_scope_id);
1756 }
1757
1758 ina6_remote = itpl->itpl_remote_sin6.sin6_addr;
1759 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) &&
1760 itpl->itpl_remote_sin6.sin6_scope_id) {
1761 ina6_remote.s6_addr16[1] = htons((uint16_t)itpl->itpl_remote_sin6.sin6_scope_id);
1762 }
1763
1764 inp = in6_pcblookup_hash(pcbinfo,
1765 &ina6_remote,
1766 itpl->itpl_remote_sin6.sin6_port,
1767 &ina6_local,
1768 itpl->itpl_local_sin6.sin6_port,
1769 0, NULL);
1770 } else {
1771 return EINVAL;
1772 }
1773 if (inp == NULL || (so = inp->inp_socket) == NULL) {
1774 return ENOENT;
1775 }
1776
1777 socket_lock(so, 0);
1778 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1779 socket_unlock(so, 0);
1780 return ENOENT;
1781 }
1782 tp = intotcpcb(inp);
1783
1784 tcp_fill_info(tp, ti);
1785 socket_unlock(so, 0);
1786
1787 return 0;
1788}
1789
1790static void
1791tcp_connection_fill_info(struct tcpcb *tp, struct tcp_connection_info *tci)
1792{
1793 struct inpcb *inp = tp->t_inpcb;
1794
1795 bzero(tci, sizeof(*tci));
1796 tci->tcpi_state = (uint8_t)tp->t_state;
1797 if (tp->t_state > TCPS_LISTEN) {
1798 if (TSTMP_SUPPORTED(tp)) {
1799 tci->tcpi_options |= TCPCI_OPT_TIMESTAMPS;
1800 }
1801 if (SACK_ENABLED(tp)) {
1802 tci->tcpi_options |= TCPCI_OPT_SACK;
1803 }
1804 if (TCP_WINDOW_SCALE_ENABLED(tp)) {
1805 tci->tcpi_options |= TCPCI_OPT_WSCALE;
1806 tci->tcpi_snd_wscale = tp->snd_scale;
1807 tci->tcpi_rcv_wscale = tp->rcv_scale;
1808 }
1809 if (TCP_ECN_ENABLED(tp)) {
1810 tci->tcpi_options |= TCPCI_OPT_ECN;
1811 }
1812 if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) {
1813 tci->tcpi_flags |= TCPCI_FLAG_LOSSRECOVERY;
1814 }
1815 if (tp->t_flagsext & TF_PKTS_REORDERED) {
1816 tci->tcpi_flags |= TCPCI_FLAG_REORDERING_DETECTED;
1817 }
1818 tci->tcpi_rto = (tp->t_timer[TCPT_REXMT] > 0) ?
1819 tp->t_rxtcur : 0;
1820 tci->tcpi_maxseg = tp->t_maxseg;
1821 tci->tcpi_snd_ssthresh = tp->snd_ssthresh;
1822 tci->tcpi_snd_cwnd = tp->snd_cwnd;
1823 tci->tcpi_snd_wnd = tp->snd_wnd;
1824 tci->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
1825 tci->tcpi_rcv_wnd = tp->rcv_wnd;
1826 tci->tcpi_rttcur = tp->t_rttcur;
1827 tci->tcpi_srtt = (tp->t_srtt >> TCP_RTT_SHIFT);
1828 tci->tcpi_rttvar = (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1829 tci->tcpi_txpackets = inp->inp_stat->txpackets;
1830 tci->tcpi_txbytes = inp->inp_stat->txbytes;
1831 tci->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
1832 tci->tcpi_txretransmitpackets = tp->t_stat.rxmitpkts;
1833 tci->tcpi_rxpackets = inp->inp_stat->rxpackets;
1834 tci->tcpi_rxbytes = inp->inp_stat->rxbytes;
1835 tci->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
1836
1837 tci->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
1838 tci->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
1839 tci->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
1840 tci->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
1841 tci->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
1842 tci->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
1843 tci->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
1844 tci->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
1845 tci->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
1846 tci->tcpi_tfo_cookie_wrong = !!(tp->t_tfo_stats & TFO_S_COOKIE_WRONG);
1847 tci->tcpi_tfo_no_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_NO_COOKIE_RCV);
1848 tci->tcpi_tfo_heuristics_disable = !!(tp->t_tfo_stats & TFO_S_HEURISTICS_DISABLE);
1849 tci->tcpi_tfo_send_blackhole = !!(tp->t_tfo_stats & TFO_S_SEND_BLACKHOLE);
1850 tci->tcpi_tfo_recv_blackhole = !!(tp->t_tfo_stats & TFO_S_RECV_BLACKHOLE);
1851 tci->tcpi_tfo_onebyte_proxy = !!(tp->t_tfo_stats & TFO_S_ONE_BYTE_PROXY);
1852 }
1853}
1854
1855
1856__private_extern__ int
1857tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
1858{
1859 int error;
1860 struct tcp_info ti = {};
1861 struct info_tuple itpl;
1862
1863 if (req->newptr == USER_ADDR_NULL) {
1864 return EINVAL;
1865 }
1866 if (req->newlen < sizeof(struct info_tuple)) {
1867 return EINVAL;
1868 }
1869 error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple));
1870 if (error != 0) {
1871 return error;
1872 }
1873 error = tcp_fill_info_for_info_tuple(&itpl, &ti);
1874 if (error != 0) {
1875 return error;
1876 }
1877 error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info));
1878 if (error != 0) {
1879 return error;
1880 }
1881
1882 return 0;
1883}
1884
1885static int
1886tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid)
1887{
1888 int error = EHOSTUNREACH;
1889 *out_pid = -1;
1890 if ((so->so_state & SS_ISCONNECTED) == 0) {
1891 return ENOTCONN;
1892 }
1893
1894 struct inpcb *inp = (struct inpcb*)so->so_pcb;
1895 uint16_t lport = inp->inp_lport;
1896 uint16_t fport = inp->inp_fport;
1897 struct inpcb *finp = NULL;
1898 struct in6_addr laddr6, faddr6;
1899 struct in_addr laddr4, faddr4;
1900
1901 if (inp->inp_vflag & INP_IPV6) {
1902 laddr6 = inp->in6p_laddr;
1903 faddr6 = inp->in6p_faddr;
1904 } else if (inp->inp_vflag & INP_IPV4) {
1905 laddr4 = inp->inp_laddr;
1906 faddr4 = inp->inp_faddr;
1907 }
1908
1909 socket_unlock(so, 0);
1910 if (inp->inp_vflag & INP_IPV6) {
1911 finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL);
1912 } else if (inp->inp_vflag & INP_IPV4) {
1913 finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL);
1914 }
1915
1916 if (finp) {
1917 *out_pid = finp->inp_socket->last_pid;
1918 error = 0;
1919 in_pcb_checkstate(finp, WNT_RELEASE, 0);
1920 }
1921 socket_lock(so, 0);
1922
1923 return error;
1924}
1925
1926void
1927tcp_getconninfo(struct socket *so, struct conninfo_tcp *tcp_ci)
1928{
1929 (void) tcp_lookup_peer_pid_locked(so, &tcp_ci->tcpci_peer_pid);
1930 tcp_fill_info(sototcpcb(so), &tcp_ci->tcpci_tcp_info);
1931}
1932
1933void
1934tcp_clear_keep_alive_offload(struct socket *so)
1935{
1936 struct inpcb *inp;
1937 struct ifnet *ifp;
1938
1939 inp = sotoinpcb(so);
1940 if (inp == NULL) {
1941 return;
1942 }
1943
1944 if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) == 0) {
1945 return;
1946 }
1947
1948 ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
1949 inp->inp_last_outifp;
1950 if (ifp == NULL) {
1951 panic("%s: so %p inp %p ifp NULL",
1952 __func__, so, inp);
1953 }
1954
1955 ifnet_lock_exclusive(ifp);
1956
1957 if (ifp->if_tcp_kao_cnt == 0) {
1958 panic("%s: so %p inp %p ifp %p if_tcp_kao_cnt == 0",
1959 __func__, so, inp, ifp);
1960 }
1961 ifp->if_tcp_kao_cnt--;
1962 inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD;
1963
1964 ifnet_lock_done(ifp);
1965}
1966
1967static int
1968tcp_set_keep_alive_offload(struct socket *so, struct proc *proc)
1969{
1970 int error = 0;
1971 struct inpcb *inp;
1972 struct ifnet *ifp;
1973
1974 inp = sotoinpcb(so);
1975 if (inp == NULL) {
1976 return ECONNRESET;
1977 }
1978 if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) != 0) {
1979 return 0;
1980 }
1981
1982 ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
1983 inp->inp_last_outifp;
1984 if (ifp == NULL) {
1985 error = ENXIO;
1986 os_log_info(OS_LOG_DEFAULT,
1987 "%s: error %d for proc %s[%u] out ifp is not set\n",
1988 __func__, error,
1989 proc != NULL ? proc->p_comm : "kernel",
1990 proc != NULL ? proc->p_pid : 0);
1991 return ENXIO;
1992 }
1993
1994 error = if_get_tcp_kao_max(ifp);
1995 if (error != 0) {
1996 return error;
1997 }
1998
1999 ifnet_lock_exclusive(ifp);
2000 if (ifp->if_tcp_kao_cnt < ifp->if_tcp_kao_max) {
2001 ifp->if_tcp_kao_cnt++;
2002 inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD;
2003 } else {
2004 error = ETOOMANYREFS;
2005 os_log_info(OS_LOG_DEFAULT,
2006 "%s: error %d for proc %s[%u] if_tcp_kao_max %u\n",
2007 __func__, error,
2008 proc != NULL ? proc->p_comm : "kernel",
2009 proc != NULL ? proc->p_pid : 0,
2010 ifp->if_tcp_kao_max);
2011 }
2012 ifnet_lock_done(ifp);
2013
2014 return error;
2015}
2016
2017/*
2018 * The new sockopt interface makes it possible for us to block in the
2019 * copyin/out step (if we take a page fault). Taking a page fault at
2020 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
2021 * use TSM, there probably isn't any need for this function to run at
2022 * splnet() any more. This needs more examination.)
2023 */
2024int
2025tcp_ctloutput(struct socket *so, struct sockopt *sopt)
2026{
2027 int error = 0, opt = 0, optval = 0;
2028 struct inpcb *inp;
2029 struct tcpcb *tp;
2030
2031 inp = sotoinpcb(so);
2032 if (inp == NULL) {
2033 return ECONNRESET;
2034 }
2035 /* Allow <SOL_SOCKET,SO_FLUSH/SO_TRAFFIC_MGT_BACKGROUND> at this level */
2036 if (sopt->sopt_level != IPPROTO_TCP &&
2037 !(sopt->sopt_level == SOL_SOCKET && (sopt->sopt_name == SO_FLUSH ||
2038 sopt->sopt_name == SO_TRAFFIC_MGT_BACKGROUND))) {
2039 if (SOCK_CHECK_DOM(so, PF_INET6)) {
2040 error = ip6_ctloutput(so, sopt);
2041 } else {
2042 error = ip_ctloutput(so, sopt);
2043 }
2044 return error;
2045 }
2046 tp = intotcpcb(inp);
2047 if (tp == NULL) {
2048 return ECONNRESET;
2049 }
2050
2051 calculate_tcp_clock();
2052
2053 switch (sopt->sopt_dir) {
2054 case SOPT_SET:
2055 switch (sopt->sopt_name) {
2056 case TCP_NODELAY:
2057 case TCP_NOOPT:
2058 case TCP_NOPUSH:
2059 error = sooptcopyin(sopt, &optval, sizeof optval,
2060 sizeof optval);
2061 if (error) {
2062 break;
2063 }
2064
2065 switch (sopt->sopt_name) {
2066 case TCP_NODELAY:
2067 opt = TF_NODELAY;
2068 break;
2069 case TCP_NOOPT:
2070 opt = TF_NOOPT;
2071 break;
2072 case TCP_NOPUSH:
2073 opt = TF_NOPUSH;
2074 break;
2075 default:
2076 opt = 0; /* dead code to fool gcc */
2077 break;
2078 }
2079
2080 if (optval) {
2081 tp->t_flags |= opt;
2082 } else {
2083 tp->t_flags &= ~opt;
2084 }
2085 break;
2086 case TCP_RXT_FINDROP:
2087 case TCP_NOTIMEWAIT:
2088 error = sooptcopyin(sopt, &optval, sizeof optval,
2089 sizeof optval);
2090 if (error) {
2091 break;
2092 }
2093 switch (sopt->sopt_name) {
2094 case TCP_RXT_FINDROP:
2095 opt = TF_RXTFINDROP;
2096 break;
2097 case TCP_NOTIMEWAIT:
2098 opt = TF_NOTIMEWAIT;
2099 break;
2100 default:
2101 opt = 0;
2102 break;
2103 }
2104 if (optval) {
2105 tp->t_flagsext |= opt;
2106 } else {
2107 tp->t_flagsext &= ~opt;
2108 }
2109 break;
2110 case TCP_MEASURE_SND_BW:
2111 error = sooptcopyin(sopt, &optval, sizeof optval,
2112 sizeof optval);
2113 if (error) {
2114 break;
2115 }
2116 opt = TF_MEASURESNDBW;
2117 if (optval) {
2118 if (tp->t_bwmeas == NULL) {
2119 tp->t_bwmeas = tcp_bwmeas_alloc(tp);
2120 if (tp->t_bwmeas == NULL) {
2121 error = ENOMEM;
2122 break;
2123 }
2124 }
2125 tp->t_flagsext |= opt;
2126 } else {
2127 tp->t_flagsext &= ~opt;
2128 /* Reset snd bw measurement state */
2129 tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS);
2130 if (tp->t_bwmeas != NULL) {
2131 tcp_bwmeas_free(tp);
2132 }
2133 }
2134 break;
2135 case TCP_MEASURE_BW_BURST: {
2136 struct tcp_measure_bw_burst in;
2137 uint32_t minpkts, maxpkts;
2138 bzero(&in, sizeof(in));
2139
2140 error = sooptcopyin(sopt, &in, sizeof(in),
2141 sizeof(in));
2142 if (error) {
2143 break;
2144 }
2145 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
2146 tp->t_bwmeas == NULL) {
2147 error = EINVAL;
2148 break;
2149 }
2150 minpkts = (in.min_burst_size != 0) ? in.min_burst_size :
2151 tp->t_bwmeas->bw_minsizepkts;
2152 maxpkts = (in.max_burst_size != 0) ? in.max_burst_size :
2153 tp->t_bwmeas->bw_maxsizepkts;
2154 if (minpkts > maxpkts) {
2155 error = EINVAL;
2156 break;
2157 }
2158 tp->t_bwmeas->bw_minsizepkts = minpkts;
2159 tp->t_bwmeas->bw_maxsizepkts = maxpkts;
2160 tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg);
2161 tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg);
2162 break;
2163 }
2164 case TCP_MAXSEG:
2165 error = sooptcopyin(sopt, &optval, sizeof optval,
2166 sizeof optval);
2167 if (error) {
2168 break;
2169 }
2170
2171 if (optval > 0 && optval <= tp->t_maxseg &&
2172 optval + 40 >= tcp_minmss) {
2173 tp->t_maxseg = optval;
2174 } else {
2175 error = EINVAL;
2176 }
2177 break;
2178
2179 case TCP_KEEPALIVE:
2180 error = sooptcopyin(sopt, &optval, sizeof optval,
2181 sizeof optval);
2182 if (error) {
2183 break;
2184 }
2185 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
2186 error = EINVAL;
2187 } else {
2188 tp->t_keepidle = optval * TCP_RETRANSHZ;
2189 /* reset the timer to new value */
2190 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
2191 TCP_CONN_KEEPIDLE(tp));
2192 tcp_check_timer_state(tp);
2193 }
2194 break;
2195
2196 case TCP_CONNECTIONTIMEOUT:
2197 error = sooptcopyin(sopt, &optval, sizeof optval,
2198 sizeof optval);
2199 if (error) {
2200 break;
2201 }
2202 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
2203 error = EINVAL;
2204 } else {
2205 tp->t_keepinit = optval * TCP_RETRANSHZ;
2206 if (tp->t_state == TCPS_SYN_RECEIVED ||
2207 tp->t_state == TCPS_SYN_SENT) {
2208 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
2209 TCP_CONN_KEEPINIT(tp));
2210 tcp_check_timer_state(tp);
2211 }
2212 }
2213 break;
2214
2215 case TCP_KEEPINTVL:
2216 error = sooptcopyin(sopt, &optval, sizeof(optval),
2217 sizeof(optval));
2218 if (error) {
2219 break;
2220 }
2221 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
2222 error = EINVAL;
2223 } else {
2224 tp->t_keepintvl = optval * TCP_RETRANSHZ;
2225 if (tp->t_state == TCPS_FIN_WAIT_2 &&
2226 TCP_CONN_MAXIDLE(tp) > 0) {
2227 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
2228 TCP_CONN_MAXIDLE(tp));
2229 tcp_check_timer_state(tp);
2230 }
2231 }
2232 break;
2233
2234 case TCP_KEEPCNT:
2235 error = sooptcopyin(sopt, &optval, sizeof(optval),
2236 sizeof(optval));
2237 if (error) {
2238 break;
2239 }
2240 if (optval < 0 || optval > INT32_MAX) {
2241 error = EINVAL;
2242 } else {
2243 tp->t_keepcnt = optval;
2244 if (tp->t_state == TCPS_FIN_WAIT_2 &&
2245 TCP_CONN_MAXIDLE(tp) > 0) {
2246 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
2247 TCP_CONN_MAXIDLE(tp));
2248 tcp_check_timer_state(tp);
2249 }
2250 }
2251 break;
2252
2253 case TCP_KEEPALIVE_OFFLOAD:
2254 if ((error = priv_check_cred(kauth_cred_get(),
2255 PRIV_NETINET_TCP_KA_OFFLOAD, 0)) != 0) {
2256 break;
2257 }
2258 error = sooptcopyin(sopt, &optval, sizeof(optval),
2259 sizeof(optval));
2260 if (error) {
2261 break;
2262 }
2263 if (optval < 0 || optval > INT32_MAX) {
2264 error = EINVAL;
2265 break;
2266 }
2267 if (optval != 0) {
2268 error = tcp_set_keep_alive_offload(so,
2269 sopt->sopt_p);
2270 } else {
2271 tcp_clear_keep_alive_offload(so);
2272 }
2273 break;
2274
2275 case PERSIST_TIMEOUT:
2276 error = sooptcopyin(sopt, &optval, sizeof optval,
2277 sizeof optval);
2278 if (error) {
2279 break;
2280 }
2281 if (optval < 0) {
2282 error = EINVAL;
2283 } else {
2284 tp->t_persist_timeout = optval * TCP_RETRANSHZ;
2285 }
2286 break;
2287 case TCP_RXT_CONNDROPTIME:
2288 error = sooptcopyin(sopt, &optval, sizeof(optval),
2289 sizeof(optval));
2290 if (error) {
2291 break;
2292 }
2293 if (optval < 0) {
2294 error = EINVAL;
2295 } else {
2296 tp->t_rxt_conndroptime = optval * TCP_RETRANSHZ;
2297 }
2298 break;
2299 case TCP_NOTSENT_LOWAT:
2300 error = sooptcopyin(sopt, &optval, sizeof(optval),
2301 sizeof(optval));
2302 if (error) {
2303 break;
2304 }
2305 if (optval < 0) {
2306 error = EINVAL;
2307 break;
2308 } else {
2309 if (optval == 0) {
2310 so->so_flags &= ~(SOF_NOTSENT_LOWAT);
2311 tp->t_notsent_lowat = 0;
2312 } else {
2313 so->so_flags |= SOF_NOTSENT_LOWAT;
2314 tp->t_notsent_lowat = optval;
2315 }
2316 }
2317 break;
2318 case TCP_ADAPTIVE_READ_TIMEOUT:
2319 error = sooptcopyin(sopt, &optval, sizeof(optval),
2320 sizeof(optval));
2321 if (error) {
2322 break;
2323 }
2324 if (optval < 0 ||
2325 optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
2326 error = EINVAL;
2327 break;
2328 } else if (optval == 0) {
2329 tp->t_adaptive_rtimo = 0;
2330 tcp_keepalive_reset(tp);
2331
2332 if (tp->t_mpsub) {
2333 mptcp_reset_keepalive(tp);
2334 }
2335 } else {
2336 tp->t_adaptive_rtimo = (uint8_t)optval;
2337 }
2338 break;
2339 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2340 error = sooptcopyin(sopt, &optval, sizeof(optval),
2341 sizeof(optval));
2342 if (error) {
2343 break;
2344 }
2345 if (optval < 0 ||
2346 optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
2347 error = EINVAL;
2348 break;
2349 } else {
2350 tp->t_adaptive_wtimo = (uint8_t)optval;
2351 }
2352 break;
2353 case TCP_SENDMOREACKS:
2354 error = sooptcopyin(sopt, &optval, sizeof(optval),
2355 sizeof(optval));
2356 if (error) {
2357 break;
2358 }
2359 if (optval < 0 || optval > 1) {
2360 error = EINVAL;
2361 } else if (optval == 0) {
2362 tp->t_flagsext &= ~(TF_NOSTRETCHACK);
2363 } else {
2364 tp->t_flagsext |= TF_NOSTRETCHACK;
2365 }
2366 break;
2367 case TCP_DISABLE_BLACKHOLE_DETECTION:
2368 error = sooptcopyin(sopt, &optval, sizeof(optval),
2369 sizeof(optval));
2370 if (error) {
2371 break;
2372 }
2373 if (optval < 0 || optval > 1) {
2374 error = EINVAL;
2375 } else if (optval == 0) {
2376 tp->t_flagsext &= ~TF_NOBLACKHOLE_DETECTION;
2377 } else {
2378 tp->t_flagsext |= TF_NOBLACKHOLE_DETECTION;
2379 if ((tp->t_flags & TF_BLACKHOLE) &&
2380 tp->t_pmtud_saved_maxopd > 0) {
2381 tcp_pmtud_revert_segment_size(tp);
2382 }
2383 }
2384 break;
2385 case TCP_FASTOPEN:
2386 if (!(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
2387 error = ENOTSUP;
2388 break;
2389 }
2390
2391 error = sooptcopyin(sopt, &optval, sizeof(optval),
2392 sizeof(optval));
2393 if (error) {
2394 break;
2395 }
2396 if (optval < 0 || optval > 1) {
2397 error = EINVAL;
2398 break;
2399 }
2400 if (tp->t_state != TCPS_LISTEN) {
2401 error = EINVAL;
2402 break;
2403 }
2404 if (optval) {
2405 tp->t_flagsext |= TF_FASTOPEN;
2406 } else {
2407 tcp_disable_tfo(tp);
2408 }
2409 break;
2410 case TCP_FASTOPEN_FORCE_HEURISTICS:
2411
2412 break;
2413 case TCP_FASTOPEN_FORCE_ENABLE:
2414 error = sooptcopyin(sopt, &optval, sizeof(optval),
2415 sizeof(optval));
2416
2417 if (error) {
2418 break;
2419 }
2420 if (optval < 0 || optval > 1) {
2421 error = EINVAL;
2422 break;
2423 }
2424
2425 if (tp->t_state != TCPS_CLOSED) {
2426 error = EINVAL;
2427 break;
2428 }
2429 if (optval) {
2430 tp->t_flagsext |= TF_FASTOPEN_FORCE_ENABLE;
2431 } else {
2432 tp->t_flagsext &= ~TF_FASTOPEN_FORCE_ENABLE;
2433 }
2434
2435 break;
2436 case TCP_ENABLE_ECN:
2437 error = sooptcopyin(sopt, &optval, sizeof optval,
2438 sizeof optval);
2439 if (error) {
2440 break;
2441 }
2442 if (optval) {
2443 tp->ecn_flags |= TE_ECN_MODE_ENABLE;
2444 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2445 } else {
2446 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2447 tp->ecn_flags |= TE_ECN_MODE_DISABLE;
2448 }
2449 break;
2450 case TCP_ECN_MODE:
2451 error = sooptcopyin(sopt, &optval, sizeof optval,
2452 sizeof optval);
2453 if (error) {
2454 break;
2455 }
2456 if (optval == ECN_MODE_DEFAULT) {
2457 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2458 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2459 } else if (optval == ECN_MODE_ENABLE) {
2460 tp->ecn_flags |= TE_ECN_MODE_ENABLE;
2461 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2462 } else if (optval == ECN_MODE_DISABLE) {
2463 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2464 tp->ecn_flags |= TE_ECN_MODE_DISABLE;
2465 } else {
2466 error = EINVAL;
2467 }
2468 break;
2469 case TCP_NOTIFY_ACKNOWLEDGEMENT:
2470 error = sooptcopyin(sopt, &optval,
2471 sizeof(optval), sizeof(optval));
2472 if (error) {
2473 break;
2474 }
2475 if (optval <= 0) {
2476 error = EINVAL;
2477 break;
2478 }
2479 if (tp->t_notify_ack_count >= TCP_MAX_NOTIFY_ACK) {
2480 error = ETOOMANYREFS;
2481 break;
2482 }
2483
2484 /*
2485 * validate that the given marker id is not
2486 * a duplicate to avoid ambiguity
2487 */
2488 if ((error = tcp_notify_ack_id_valid(tp, so,
2489 optval)) != 0) {
2490 break;
2491 }
2492 error = tcp_add_notify_ack_marker(tp, optval);
2493 break;
2494 case SO_FLUSH:
2495 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
2496 sizeof(optval))) != 0) {
2497 break;
2498 }
2499
2500 error = inp_flush(inp, optval);
2501 break;
2502
2503 case SO_TRAFFIC_MGT_BACKGROUND:
2504 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
2505 sizeof(optval))) != 0) {
2506 break;
2507 }
2508
2509 if (optval) {
2510 socket_set_traffic_mgt_flags_locked(so,
2511 TRAFFIC_MGT_SO_BACKGROUND);
2512 } else {
2513 socket_clear_traffic_mgt_flags_locked(so,
2514 TRAFFIC_MGT_SO_BACKGROUND);
2515 }
2516 break;
2517 case TCP_RXT_MINIMUM_TIMEOUT:
2518 error = sooptcopyin(sopt, &optval, sizeof(optval),
2519 sizeof(optval));
2520 if (error) {
2521 break;
2522 }
2523 if (optval < 0) {
2524 error = EINVAL;
2525 break;
2526 }
2527 if (optval == 0) {
2528 tp->t_rxt_minimum_timeout = 0;
2529 } else {
2530 tp->t_rxt_minimum_timeout = min(optval,
2531 TCP_RXT_MINIMUM_TIMEOUT_LIMIT);
2532 /* convert to milliseconds */
2533 tp->t_rxt_minimum_timeout *= TCP_RETRANSHZ;
2534 }
2535 break;
2536 default:
2537 error = ENOPROTOOPT;
2538 break;
2539 }
2540 break;
2541
2542 case SOPT_GET:
2543 switch (sopt->sopt_name) {
2544 case TCP_NODELAY:
2545 optval = tp->t_flags & TF_NODELAY;
2546 break;
2547 case TCP_MAXSEG:
2548 optval = tp->t_maxseg;
2549 break;
2550 case TCP_KEEPALIVE:
2551 if (tp->t_keepidle > 0) {
2552 optval = tp->t_keepidle / TCP_RETRANSHZ;
2553 } else {
2554 optval = tcp_keepidle / TCP_RETRANSHZ;
2555 }
2556 break;
2557 case TCP_KEEPINTVL:
2558 if (tp->t_keepintvl > 0) {
2559 optval = tp->t_keepintvl / TCP_RETRANSHZ;
2560 } else {
2561 optval = tcp_keepintvl / TCP_RETRANSHZ;
2562 }
2563 break;
2564 case TCP_KEEPCNT:
2565 if (tp->t_keepcnt > 0) {
2566 optval = tp->t_keepcnt;
2567 } else {
2568 optval = tcp_keepcnt;
2569 }
2570 break;
2571 case TCP_KEEPALIVE_OFFLOAD:
2572 optval = !!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD);
2573 break;
2574 case TCP_NOOPT:
2575 optval = tp->t_flags & TF_NOOPT;
2576 break;
2577 case TCP_NOPUSH:
2578 optval = tp->t_flags & TF_NOPUSH;
2579 break;
2580 case TCP_ENABLE_ECN:
2581 optval = (tp->ecn_flags & TE_ECN_MODE_ENABLE) ? 1 : 0;
2582 break;
2583 case TCP_ECN_MODE:
2584 if (tp->ecn_flags & TE_ECN_MODE_ENABLE) {
2585 optval = ECN_MODE_ENABLE;
2586 } else if (tp->ecn_flags & TE_ECN_MODE_DISABLE) {
2587 optval = ECN_MODE_DISABLE;
2588 } else {
2589 optval = ECN_MODE_DEFAULT;
2590 }
2591 break;
2592 case TCP_CONNECTIONTIMEOUT:
2593 optval = tp->t_keepinit / TCP_RETRANSHZ;
2594 break;
2595 case PERSIST_TIMEOUT:
2596 optval = tp->t_persist_timeout / TCP_RETRANSHZ;
2597 break;
2598 case TCP_RXT_CONNDROPTIME:
2599 optval = tp->t_rxt_conndroptime / TCP_RETRANSHZ;
2600 break;
2601 case TCP_RXT_FINDROP:
2602 optval = tp->t_flagsext & TF_RXTFINDROP;
2603 break;
2604 case TCP_NOTIMEWAIT:
2605 optval = (tp->t_flagsext & TF_NOTIMEWAIT) ? 1 : 0;
2606 break;
2607 case TCP_FASTOPEN:
2608 if (tp->t_state != TCPS_LISTEN ||
2609 !(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
2610 error = ENOTSUP;
2611 break;
2612 }
2613 optval = tfo_enabled(tp);
2614 break;
2615 case TCP_FASTOPEN_FORCE_HEURISTICS:
2616 optval = 0;
2617 break;
2618 case TCP_FASTOPEN_FORCE_ENABLE:
2619 optval = (tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) ? 1 : 0;
2620 break;
2621 case TCP_MEASURE_SND_BW:
2622 optval = tp->t_flagsext & TF_MEASURESNDBW;
2623 break;
2624 case TCP_INFO: {
2625 struct tcp_info ti;
2626
2627 tcp_fill_info(tp, &ti);
2628 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
2629 goto done;
2630 /* NOT REACHED */
2631 }
2632 case TCP_CONNECTION_INFO: {
2633 struct tcp_connection_info tci;
2634 tcp_connection_fill_info(tp, &tci);
2635 error = sooptcopyout(sopt, &tci,
2636 sizeof(struct tcp_connection_info));
2637 goto done;
2638 }
2639 case TCP_MEASURE_BW_BURST: {
2640 struct tcp_measure_bw_burst out = {};
2641 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
2642 tp->t_bwmeas == NULL) {
2643 error = EINVAL;
2644 break;
2645 }
2646 out.min_burst_size = tp->t_bwmeas->bw_minsizepkts;
2647 out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts;
2648 error = sooptcopyout(sopt, &out, sizeof(out));
2649 goto done;
2650 }
2651 case TCP_NOTSENT_LOWAT:
2652 if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
2653 optval = tp->t_notsent_lowat;
2654 } else {
2655 optval = 0;
2656 }
2657 break;
2658 case TCP_SENDMOREACKS:
2659 if (tp->t_flagsext & TF_NOSTRETCHACK) {
2660 optval = 1;
2661 } else {
2662 optval = 0;
2663 }
2664 break;
2665 case TCP_DISABLE_BLACKHOLE_DETECTION:
2666 if (tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) {
2667 optval = 1;
2668 } else {
2669 optval = 0;
2670 }
2671 break;
2672 case TCP_PEER_PID: {
2673 pid_t pid;
2674 error = tcp_lookup_peer_pid_locked(so, &pid);
2675 if (error == 0) {
2676 error = sooptcopyout(sopt, &pid, sizeof(pid));
2677 }
2678 goto done;
2679 }
2680 case TCP_ADAPTIVE_READ_TIMEOUT:
2681 optval = tp->t_adaptive_rtimo;
2682 break;
2683 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2684 optval = tp->t_adaptive_wtimo;
2685 break;
2686 case SO_TRAFFIC_MGT_BACKGROUND:
2687 optval = (so->so_flags1 &
2688 SOF1_TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0;
2689 break;
2690 case TCP_NOTIFY_ACKNOWLEDGEMENT: {
2691 struct tcp_notify_ack_complete retid;
2692
2693 if (sopt->sopt_valsize != sizeof(retid)) {
2694 error = EINVAL;
2695 break;
2696 }
2697 bzero(&retid, sizeof(retid));
2698 tcp_get_notify_ack_count(tp, &retid);
2699 if (retid.notify_complete_count > 0) {
2700 tcp_get_notify_ack_ids(tp, &retid);
2701 }
2702
2703 error = sooptcopyout(sopt, &retid, sizeof(retid));
2704 goto done;
2705 }
2706 case TCP_RXT_MINIMUM_TIMEOUT:
2707 optval = tp->t_rxt_minimum_timeout / TCP_RETRANSHZ;
2708 break;
2709 default:
2710 error = ENOPROTOOPT;
2711 break;
2712 }
2713 if (error == 0) {
2714 error = sooptcopyout(sopt, &optval, sizeof optval);
2715 }
2716 break;
2717 }
2718done:
2719 return error;
2720}
2721
2722/*
2723 * tcp_sendspace and tcp_recvspace are the default send and receive window
2724 * sizes, respectively. These are obsolescent (this information should
2725 * be set by the route).
2726 */
2727u_int32_t tcp_sendspace = 1448 * 256;
2728u_int32_t tcp_recvspace = 1448 * 384;
2729
2730/* During attach, the size of socket buffer allocated is limited to
2731 * sb_max in sbreserve. Disallow setting the tcp send and recv space
2732 * to be more than sb_max because that will cause tcp_attach to fail
2733 * (see radar 5713060)
2734 */
2735static int
2736sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1,
2737 int arg2, struct sysctl_req *req)
2738{
2739#pragma unused(arg2)
2740 u_int32_t new_value = 0, *space_p = NULL;
2741 int changed = 0, error = 0;
2742 u_quad_t sb_effective_max = (sb_max / (MSIZE + MCLBYTES)) * MCLBYTES;
2743
2744 switch (oidp->oid_number) {
2745 case TCPCTL_SENDSPACE:
2746 space_p = &tcp_sendspace;
2747 break;
2748 case TCPCTL_RECVSPACE:
2749 space_p = &tcp_recvspace;
2750 break;
2751 default:
2752 return EINVAL;
2753 }
2754 error = sysctl_io_number(req, *space_p, sizeof(u_int32_t),
2755 &new_value, &changed);
2756 if (changed) {
2757 if (new_value > 0 && new_value <= sb_effective_max) {
2758 *space_p = new_value;
2759 SYSCTL_SKMEM_UPDATE_AT_OFFSET(arg2, new_value);
2760 } else {
2761 error = ERANGE;
2762 }
2763 }
2764 return error;
2765}
2766
2767#if SYSCTL_SKMEM
2768SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace,
2769 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_sendspace,
2770 offsetof(skmem_sysctl, tcp.sendspace), sysctl_tcp_sospace,
2771 "IU", "Maximum outgoing TCP datagram size");
2772SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace,
2773 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_recvspace,
2774 offsetof(skmem_sysctl, tcp.recvspace), sysctl_tcp_sospace,
2775 "IU", "Maximum incoming TCP datagram size");
2776#else /* SYSCTL_SKMEM */
2777SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2778 &tcp_sendspace, 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size");
2779SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
2780 &tcp_recvspace, 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size");
2781#endif /* SYSCTL_SKMEM */
2782
2783/*
2784 * Attach TCP protocol to socket, allocating
2785 * internet protocol control block, tcp control block,
2786 * bufer space, and entering LISTEN state if to accept connections.
2787 *
2788 * Returns: 0 Success
2789 * in_pcballoc:ENOBUFS
2790 * in_pcballoc:ENOMEM
2791 * in_pcballoc:??? [IPSEC specific]
2792 * soreserve:ENOBUFS
2793 */
2794static int
2795tcp_attach(struct socket *so, struct proc *p)
2796{
2797 struct tcpcb *tp;
2798 struct inpcb *inp;
2799 int error;
2800 int isipv6 = SOCK_CHECK_DOM(so, PF_INET6) != 0;
2801
2802 error = in_pcballoc(so, &tcbinfo, p);
2803 if (error) {
2804 return error;
2805 }
2806
2807 inp = sotoinpcb(so);
2808
2809 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
2810 error = soreserve(so, tcp_sendspace, tcp_recvspace);
2811 if (error) {
2812 return error;
2813 }
2814 }
2815
2816 if (so->so_snd.sb_preconn_hiwat == 0) {
2817 soreserve_preconnect(so, 2048);
2818 }
2819
2820 if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
2821 so->so_rcv.sb_flags |= SB_AUTOSIZE;
2822 }
2823 if ((so->so_snd.sb_flags & SB_USRSIZE) == 0) {
2824 so->so_snd.sb_flags |= SB_AUTOSIZE;
2825 }
2826
2827 if (isipv6) {
2828 inp->inp_vflag |= INP_IPV6;
2829 inp->in6p_hops = -1; /* use kernel default */
2830 } else {
2831 inp->inp_vflag |= INP_IPV4;
2832 }
2833 tp = tcp_newtcpcb(inp);
2834 if (tp == NULL) {
2835 int nofd = so->so_state & SS_NOFDREF; /* XXX */
2836
2837 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
2838 if (isipv6) {
2839 in6_pcbdetach(inp);
2840 } else {
2841 in_pcbdetach(inp);
2842 }
2843 so->so_state |= nofd;
2844 return ENOBUFS;
2845 }
2846 if (nstat_collect) {
2847 nstat_tcp_new_pcb(inp);
2848 }
2849 tp->t_state = TCPS_CLOSED;
2850 return 0;
2851}
2852
2853/*
2854 * Initiate (or continue) disconnect.
2855 * If embryonic state, just send reset (once).
2856 * If in ``let data drain'' option and linger null, just drop.
2857 * Otherwise (hard), mark socket disconnecting and drop
2858 * current input data; switch states based on user close, and
2859 * send segment to peer (with FIN).
2860 */
2861static struct tcpcb *
2862tcp_disconnect(struct tcpcb *tp)
2863{
2864 struct socket *so = tp->t_inpcb->inp_socket;
2865
2866 if (so->so_rcv.sb_cc != 0 || tp->t_reassqlen != 0) {
2867 return tcp_drop(tp, 0);
2868 }
2869
2870 if (tp->t_state < TCPS_ESTABLISHED) {
2871 tp = tcp_close(tp);
2872 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
2873 tp = tcp_drop(tp, 0);
2874 } else {
2875 soisdisconnecting(so);
2876 sbflush(&so->so_rcv);
2877 tp = tcp_usrclosed(tp);
2878#if MPTCP
2879 /* A reset has been sent but socket exists, do not send FIN */
2880 if ((so->so_flags & SOF_MP_SUBFLOW) &&
2881 (tp) && (tp->t_mpflags & TMPF_RESET)) {
2882 return tp;
2883 }
2884#endif
2885 if (tp) {
2886 (void) tcp_output(tp);
2887 }
2888 }
2889 return tp;
2890}
2891
2892/*
2893 * User issued close, and wish to trail through shutdown states:
2894 * if never received SYN, just forget it. If got a SYN from peer,
2895 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
2896 * If already got a FIN from peer, then almost done; go to LAST_ACK
2897 * state. In all other cases, have already sent FIN to peer (e.g.
2898 * after PRU_SHUTDOWN), and just have to play tedious game waiting
2899 * for peer to send FIN or not respond to keep-alives, etc.
2900 * We can let the user exit from the close as soon as the FIN is acked.
2901 */
2902static struct tcpcb *
2903tcp_usrclosed(struct tcpcb *tp)
2904{
2905 switch (tp->t_state) {
2906 case TCPS_CLOSED:
2907 case TCPS_LISTEN:
2908 case TCPS_SYN_SENT:
2909 tp = tcp_close(tp);
2910 break;
2911
2912 case TCPS_SYN_RECEIVED:
2913 tp->t_flags |= TF_NEEDFIN;
2914 break;
2915
2916 case TCPS_ESTABLISHED:
2917 DTRACE_TCP4(state__change, void, NULL,
2918 struct inpcb *, tp->t_inpcb,
2919 struct tcpcb *, tp,
2920 int32_t, TCPS_FIN_WAIT_1);
2921 tp->t_state = TCPS_FIN_WAIT_1;
2922 TCP_LOG_CONNECTION_SUMMARY(tp);
2923 break;
2924
2925 case TCPS_CLOSE_WAIT:
2926 DTRACE_TCP4(state__change, void, NULL,
2927 struct inpcb *, tp->t_inpcb,
2928 struct tcpcb *, tp,
2929 int32_t, TCPS_LAST_ACK);
2930 tp->t_state = TCPS_LAST_ACK;
2931 TCP_LOG_CONNECTION_SUMMARY(tp);
2932 break;
2933 }
2934 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
2935 soisdisconnected(tp->t_inpcb->inp_socket);
2936 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
2937 if (tp->t_state == TCPS_FIN_WAIT_2) {
2938 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
2939 TCP_CONN_MAXIDLE(tp));
2940 }
2941 }
2942 return tp;
2943}
2944
2945void
2946tcp_in_cksum_stats(u_int32_t len)
2947{
2948 tcpstat.tcps_rcv_swcsum++;
2949 tcpstat.tcps_rcv_swcsum_bytes += len;
2950}
2951
2952void
2953tcp_out_cksum_stats(u_int32_t len)
2954{
2955 tcpstat.tcps_snd_swcsum++;
2956 tcpstat.tcps_snd_swcsum_bytes += len;
2957}
2958
2959void
2960tcp_in6_cksum_stats(u_int32_t len)
2961{
2962 tcpstat.tcps_rcv6_swcsum++;
2963 tcpstat.tcps_rcv6_swcsum_bytes += len;
2964}
2965
2966void
2967tcp_out6_cksum_stats(u_int32_t len)
2968{
2969 tcpstat.tcps_snd6_swcsum++;
2970 tcpstat.tcps_snd6_swcsum_bytes += len;
2971}
2972
2973int
2974tcp_get_mpkl_send_info(struct mbuf *control,
2975 struct so_mpkl_send_info *mpkl_send_info)
2976{
2977 struct cmsghdr *cm;
2978
2979 if (control == NULL || mpkl_send_info == NULL) {
2980 return EINVAL;
2981 }
2982
2983 for (cm = M_FIRST_CMSGHDR(control); cm;
2984 cm = M_NXT_CMSGHDR(control, cm)) {
2985 if (cm->cmsg_len < sizeof(struct cmsghdr) ||
2986 cm->cmsg_len > control->m_len) {
2987 return EINVAL;
2988 }
2989 if (cm->cmsg_level != SOL_SOCKET ||
2990 cm->cmsg_type != SCM_MPKL_SEND_INFO) {
2991 continue;
2992 }
2993 if (cm->cmsg_len != CMSG_LEN(sizeof(struct so_mpkl_send_info))) {
2994 return EINVAL;
2995 }
2996 memcpy(mpkl_send_info, CMSG_DATA(cm),
2997 sizeof(struct so_mpkl_send_info));
2998 return 0;
2999 }
3000 return ENOMSG;
3001}