]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/tcp_usrreq.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_usrreq.c
CommitLineData
1c79356b 1/*
f427ee49 2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
9bccf70c 61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $
1c79356b
A
62 */
63
1c79356b
A
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/kernel.h>
68#include <sys/sysctl.h>
69#include <sys/mbuf.h>
1c79356b 70#include <sys/domain.h>
f427ee49 71#if XNU_TARGET_OS_OSX
39236c6e 72#include <sys/kasl.h>
f427ee49 73#endif /* XNU_TARGET_OS_OSX */
cb323159 74#include <sys/priv.h>
1c79356b
A
75#include <sys/socket.h>
76#include <sys/socketvar.h>
77#include <sys/protosw.h>
39236c6e 78#include <sys/syslog.h>
1c79356b
A
79
80#include <net/if.h>
81#include <net/route.h>
6d2010ae 82#include <net/ntstat.h>
fe8ab488 83#include <net/content_filter.h>
cb323159 84#include <net/multi_layer_pkt_log.h>
1c79356b
A
85
86#include <netinet/in.h>
87#include <netinet/in_systm.h>
9bccf70c 88#include <netinet/ip6.h>
1c79356b 89#include <netinet/in_pcb.h>
9bccf70c 90#include <netinet6/in6_pcb.h>
1c79356b
A
91#include <netinet/in_var.h>
92#include <netinet/ip_var.h>
1c79356b 93#include <netinet6/ip6_var.h>
1c79356b
A
94#include <netinet/tcp.h>
95#include <netinet/tcp_fsm.h>
96#include <netinet/tcp_seq.h>
97#include <netinet/tcp_timer.h>
98#include <netinet/tcp_var.h>
99#include <netinet/tcpip.h>
5ba3f43e 100#include <netinet/tcp_cc.h>
cb323159 101#include <netinet/tcp_log.h>
39236c6e 102#include <mach/sdt.h>
1c79356b
A
103#if TCPDEBUG
104#include <netinet/tcp_debug.h>
105#endif
39236c6e
A
106#if MPTCP
107#include <netinet/mptcp_var.h>
108#endif /* MPTCP */
1c79356b
A
109
110#if IPSEC
111#include <netinet6/ipsec.h>
112#endif /*IPSEC*/
113
39236c6e
A
114#if FLOW_DIVERT
115#include <netinet/flow_divert.h>
116#endif /* FLOW_DIVERT */
117
6d2010ae
A
118errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *);
119
0a7de745 120int tcp_sysctl_info(struct sysctl_oid *, void *, int, struct sysctl_req *);
3e170ce0
A
121static void tcp_connection_fill_info(struct tcpcb *tp,
122 struct tcp_connection_info *tci);
cb323159 123static int tcp_get_mpkl_send_info(struct mbuf *, struct so_mpkl_send_info *);
6d2010ae 124
1c79356b
A
125/*
126 * TCP protocol interface to socket abstraction.
127 */
0a7de745
A
128static int tcp_attach(struct socket *, struct proc *);
129static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *);
0a7de745
A
130static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *);
131static int tcp6_usr_connect(struct socket *, struct sockaddr *,
132 struct proc *);
39037602
A
133static struct tcpcb *tcp_disconnect(struct tcpcb *);
134static struct tcpcb *tcp_usrclosed(struct tcpcb *);
316670eb
A
135extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
136
1c79356b 137#if TCPDEBUG
0a7de745
A
138#define TCPDEBUG0 int ostate = 0
139#define TCPDEBUG1() ostate = tp ? tp->t_state : 0
140#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
141 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
1c79356b 142#else
0a7de745
A
143#define TCPDEBUG0
144#define TCPDEBUG1()
145#define TCPDEBUG2(req)
1c79356b
A
146#endif
147
39236c6e
A
148SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info,
149 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
0a7de745 150 0, 0, tcp_sysctl_info, "S", "TCP info per tuple");
6d2010ae 151
1c79356b
A
152/*
153 * TCP attaches to socket via pru_attach(), reserving space,
154 * and an internet control block.
2d21ac55
A
155 *
156 * Returns: 0 Success
157 * EISCONN
158 * tcp_attach:ENOBUFS
159 * tcp_attach:ENOMEM
160 * tcp_attach:??? [IPSEC specific]
1c79356b
A
161 */
162static int
2d21ac55 163tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p)
1c79356b 164{
1c79356b
A
165 int error;
166 struct inpcb *inp = sotoinpcb(so);
167 struct tcpcb *tp = 0;
168 TCPDEBUG0;
169
170 TCPDEBUG1();
171 if (inp) {
172 error = EISCONN;
173 goto out;
174 }
39037602 175
1c79356b 176 error = tcp_attach(so, p);
0a7de745 177 if (error) {
1c79356b 178 goto out;
0a7de745 179 }
1c79356b 180
0a7de745 181 if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
f427ee49 182 so->so_linger = (short)(TCP_LINGERTIME * hz);
0a7de745 183 }
1c79356b
A
184 tp = sototcpcb(so);
185out:
186 TCPDEBUG2(PRU_ATTACH);
1c79356b
A
187 return error;
188}
189
190/*
191 * pru_detach() detaches the TCP protocol from the socket.
192 * If the protocol state is non-embryonic, then can't
193 * do this directly: have to initiate a pru_disconnect(),
194 * which may finish later; embryonic TCB's can just
195 * be discarded here.
196 */
197static int
198tcp_usr_detach(struct socket *so)
199{
1c79356b
A
200 int error = 0;
201 struct inpcb *inp = sotoinpcb(so);
202 struct tcpcb *tp;
203 TCPDEBUG0;
204
91447636 205 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
0a7de745 206 return EINVAL; /* XXX */
1c79356b 207 }
5ba3f43e 208 socket_lock_assert_owned(so);
1c79356b
A
209 tp = intotcpcb(inp);
210 /* In case we got disconnected from the peer */
0a7de745 211 if (tp == NULL) {
39236c6e 212 goto out;
0a7de745 213 }
1c79356b 214 TCPDEBUG1();
6d2010ae
A
215
216 calculate_tcp_clock();
217
1c79356b
A
218 tp = tcp_disconnect(tp);
219out:
220 TCPDEBUG2(PRU_DETACH);
1c79356b
A
221 return error;
222}
223
fe8ab488 224#if NECP
f427ee49 225#define COMMON_START_ALLOW_FLOW_DIVERT(allow) TCPDEBUG0; \
0a7de745
A
226do { \
227 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
228 return (EINVAL); \
f427ee49 229 if (!(allow) && necp_socket_should_use_flow_divert(inp)) \
0a7de745
A
230 return (EPROTOTYPE); \
231 tp = intotcpcb(inp); \
232 TCPDEBUG1(); \
233 calculate_tcp_clock(); \
39236c6e 234} while (0)
fe8ab488 235#else /* NECP */
f427ee49 236#define COMMON_START_ALLOW_FLOW_DIVERT(allow) TCPDEBUG0; \
0a7de745
A
237do { \
238 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
239 return (EINVAL); \
240 tp = intotcpcb(inp); \
241 TCPDEBUG1(); \
242 calculate_tcp_clock(); \
fe8ab488
A
243} while (0)
244#endif /* !NECP */
39236c6e 245
f427ee49 246#define COMMON_START() COMMON_START_ALLOW_FLOW_DIVERT(false)
0a7de745 247#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out
1c79356b
A
248
249
250/*
251 * Give the socket an address.
2d21ac55
A
252 *
253 * Returns: 0 Success
254 * EINVAL Invalid argument [COMMON_START]
255 * EAFNOSUPPORT Address family not supported
256 * in_pcbbind:EADDRNOTAVAIL Address not available.
257 * in_pcbbind:EINVAL Invalid argument
258 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
259 * in_pcbbind:EACCES Permission denied
260 * in_pcbbind:EADDRINUSE Address in use
261 * in_pcbbind:EAGAIN Resource unavailable, try again
262 * in_pcbbind:EPERM Operation not permitted
1c79356b
A
263 */
264static int
265tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
266{
1c79356b
A
267 int error = 0;
268 struct inpcb *inp = sotoinpcb(so);
269 struct tcpcb *tp;
270 struct sockaddr_in *sinp;
271
f427ee49 272 COMMON_START_ALLOW_FLOW_DIVERT(true);
1c79356b 273
2d21ac55
A
274 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
275 error = EAFNOSUPPORT;
276 goto out;
277 }
278
1c79356b
A
279 /*
280 * Must check for multicast addresses and disallow binding
281 * to them.
282 */
316670eb 283 sinp = (struct sockaddr_in *)(void *)nam;
1c79356b
A
284 if (sinp->sin_family == AF_INET &&
285 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
286 error = EAFNOSUPPORT;
287 goto out;
288 }
289 error = in_pcbbind(inp, nam, p);
0a7de745 290 if (error) {
1c79356b 291 goto out;
0a7de745 292 }
5ba3f43e
A
293
294#if NECP
295 /* Update NECP client with bind result if not in middle of connect */
296 if ((inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS) &&
0a7de745 297 !uuid_is_null(inp->necp_client_uuid)) {
5ba3f43e
A
298 socket_unlock(so, 0);
299 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
300 socket_lock(so, 0);
301 }
302#endif /* NECP */
303
1c79356b 304 COMMON_END(PRU_BIND);
1c79356b
A
305}
306
1c79356b
A
307static int
308tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
309{
1c79356b
A
310 int error = 0;
311 struct inpcb *inp = sotoinpcb(so);
312 struct tcpcb *tp;
313 struct sockaddr_in6 *sin6p;
314
f427ee49 315 COMMON_START_ALLOW_FLOW_DIVERT(true);
1c79356b 316
2d21ac55
A
317 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
318 error = EAFNOSUPPORT;
319 goto out;
320 }
321
1c79356b
A
322 /*
323 * Must check for multicast addresses and disallow binding
324 * to them.
325 */
316670eb 326 sin6p = (struct sockaddr_in6 *)(void *)nam;
1c79356b
A
327 if (sin6p->sin6_family == AF_INET6 &&
328 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
329 error = EAFNOSUPPORT;
330 goto out;
331 }
332 inp->inp_vflag &= ~INP_IPV4;
333 inp->inp_vflag |= INP_IPV6;
55e303ae 334 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
0a7de745 335 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) {
1c79356b 336 inp->inp_vflag |= INP_IPV4;
0a7de745 337 } else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
1c79356b
A
338 struct sockaddr_in sin;
339
340 in6_sin6_2_sin(&sin, sin6p);
341 inp->inp_vflag |= INP_IPV4;
342 inp->inp_vflag &= ~INP_IPV6;
343 error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
344 goto out;
345 }
346 }
347 error = in6_pcbbind(inp, nam, p);
0a7de745 348 if (error) {
9bccf70c 349 goto out;
0a7de745 350 }
1c79356b
A
351 COMMON_END(PRU_BIND);
352}
1c79356b
A
353
354/*
355 * Prepare to accept connections.
2d21ac55
A
356 *
357 * Returns: 0 Success
358 * EINVAL [COMMON_START]
359 * in_pcbbind:EADDRNOTAVAIL Address not available.
360 * in_pcbbind:EINVAL Invalid argument
361 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
362 * in_pcbbind:EACCES Permission denied
363 * in_pcbbind:EADDRINUSE Address in use
364 * in_pcbbind:EAGAIN Resource unavailable, try again
365 * in_pcbbind:EPERM Operation not permitted
1c79356b
A
366 */
367static int
368tcp_usr_listen(struct socket *so, struct proc *p)
369{
1c79356b
A
370 int error = 0;
371 struct inpcb *inp = sotoinpcb(so);
372 struct tcpcb *tp;
373
2a1bd2d3 374 COMMON_START_ALLOW_FLOW_DIVERT(true);
0a7de745 375 if (inp->inp_lport == 0) {
39236c6e 376 error = in_pcbbind(inp, NULL, p);
0a7de745
A
377 }
378 if (error == 0) {
1c79356b 379 tp->t_state = TCPS_LISTEN;
0a7de745 380 }
cb323159 381 TCP_LOG_LISTEN(tp, error);
1c79356b
A
382 COMMON_END(PRU_LISTEN);
383}
384
1c79356b
A
385static int
386tcp6_usr_listen(struct socket *so, struct proc *p)
387{
1c79356b
A
388 int error = 0;
389 struct inpcb *inp = sotoinpcb(so);
390 struct tcpcb *tp;
391
2a1bd2d3 392 COMMON_START_ALLOW_FLOW_DIVERT(true);
1c79356b
A
393 if (inp->inp_lport == 0) {
394 inp->inp_vflag &= ~INP_IPV4;
0a7de745 395 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
1c79356b 396 inp->inp_vflag |= INP_IPV4;
0a7de745 397 }
39236c6e 398 error = in6_pcbbind(inp, NULL, p);
1c79356b 399 }
0a7de745 400 if (error == 0) {
1c79356b 401 tp->t_state = TCPS_LISTEN;
0a7de745 402 }
cb323159 403 TCP_LOG_LISTEN(tp, error);
1c79356b
A
404 COMMON_END(PRU_LISTEN);
405}
1c79356b 406
3e170ce0
A
407static int
408tcp_connect_complete(struct socket *so)
409{
410 struct tcpcb *tp = sototcpcb(so);
5ba3f43e 411 struct inpcb *inp = sotoinpcb(so);
3e170ce0
A
412 int error = 0;
413
414 /* TFO delays the tcp_output until later, when the app calls write() */
415 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
f427ee49 416 if (!necp_socket_is_allowed_to_send_recv(sotoinpcb(so), NULL, 0, NULL, NULL, NULL, NULL)) {
cb323159 417 TCP_LOG_DROP_NECP(NULL, NULL, tp, true);
0a7de745
A
418 return EHOSTUNREACH;
419 }
3e170ce0
A
420
421 /* Initialize enough state so that we can actually send data */
422 tcp_mss(tp, -1, IFSCOPE_NONE);
423 tp->snd_wnd = tp->t_maxseg;
d9a64523 424 tp->max_sndwnd = tp->snd_wnd;
3e170ce0
A
425 } else {
426 error = tcp_output(tp);
427 }
428
a39ff7e2
A
429#if NECP
430 /* Update NECP client with connected five-tuple */
431 if (error == 0 && !uuid_is_null(inp->necp_client_uuid)) {
432 socket_unlock(so, 0);
433 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
434 socket_lock(so, 0);
435 }
436#endif /* NECP */
437
0a7de745 438 return error;
3e170ce0
A
439}
440
1c79356b
A
441/*
442 * Initiate connection to peer.
443 * Create a template for use in transmissions on this connection.
444 * Enter SYN_SENT state, and mark socket as connecting.
445 * Start keep-alive timer, and seed output sequence space.
446 * Send initial segment on connection.
447 */
448static int
449tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
450{
1c79356b
A
451 int error = 0;
452 struct inpcb *inp = sotoinpcb(so);
453 struct tcpcb *tp;
454 struct sockaddr_in *sinp;
455
2d21ac55 456 TCPDEBUG0;
39236c6e 457 if (inp == NULL) {
2d21ac55 458 return EINVAL;
39236c6e 459 } else if (inp->inp_state == INPCB_STATE_DEAD) {
2d21ac55
A
460 if (so->so_error) {
461 error = so->so_error;
462 so->so_error = 0;
463 return error;
0a7de745 464 } else {
2d21ac55 465 return EINVAL;
0a7de745 466 }
2d21ac55 467 }
fe8ab488 468#if NECP
cb323159
A
469#if CONTENT_FILTER
470 error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
471 if (error != 0) {
472 return error;
473 }
474#endif /* CONTENT_FILTER */
39236c6e 475#if FLOW_DIVERT
cb323159 476 if (necp_socket_should_use_flow_divert(inp)) {
f427ee49
A
477 error = flow_divert_pcb_init(so);
478 if (error == 0) {
479 error = flow_divert_connect_out(so, nam, p);
39236c6e
A
480 }
481 return error;
482 }
483#endif /* FLOW_DIVERT */
fe8ab488 484#endif /* NECP */
2d21ac55
A
485 tp = intotcpcb(inp);
486 TCPDEBUG1();
1c79356b 487
6d2010ae
A
488 calculate_tcp_clock();
489
2d21ac55
A
490 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
491 error = EAFNOSUPPORT;
492 goto out;
493 }
1c79356b
A
494 /*
495 * Must disallow TCP ``connections'' to multicast addresses.
496 */
316670eb 497 sinp = (struct sockaddr_in *)(void *)nam;
1c79356b
A
498 if (sinp->sin_family == AF_INET
499 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
500 error = EAFNOSUPPORT;
501 goto out;
502 }
503
0a7de745 504 if ((error = tcp_connect(tp, nam, p)) != 0) {
cb323159 505 TCP_LOG_CONNECT(tp, true, error);
1c79356b 506 goto out;
0a7de745 507 }
3e170ce0
A
508
509 error = tcp_connect_complete(so);
510
cb323159
A
511 TCP_LOG_CONNECT(tp, true, error);
512
1c79356b
A
513 COMMON_END(PRU_CONNECT);
514}
515
39236c6e
A
516static int
517tcp_usr_connectx_common(struct socket *so, int af,
813fb2f6 518 struct sockaddr *src, struct sockaddr *dst,
3e170ce0
A
519 struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid,
520 uint32_t flags, void *arg, uint32_t arglen, struct uio *auio,
521 user_ssize_t *bytes_written)
39236c6e 522{
5ba3f43e 523#pragma unused(aid, flags, arg, arglen)
39236c6e 524 struct inpcb *inp = sotoinpcb(so);
5ba3f43e 525 int error = 0;
3e170ce0 526 user_ssize_t datalen = 0;
39236c6e 527
0a7de745
A
528 if (inp == NULL) {
529 return EINVAL;
530 }
39236c6e 531
813fb2f6 532 VERIFY(dst != NULL);
39236c6e 533
5ba3f43e
A
534 ASSERT(!(inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS));
535 inp->inp_flags2 |= INP2_CONNECT_IN_PROGRESS;
536
fe8ab488 537#if NECP
813fb2f6 538 inp_update_necp_policy(inp, src, dst, ifscope);
fe8ab488 539#endif /* NECP */
3e170ce0
A
540
541 if ((so->so_flags1 & SOF1_DATA_IDEMPOTENT) &&
0a7de745 542 (tcp_fastopen & TCP_FASTOPEN_CLIENT)) {
3e170ce0 543 sototcpcb(so)->t_flagsext |= TF_FASTOPEN;
0a7de745 544 }
3e170ce0 545
5ba3f43e
A
546 /* bind socket to the specified interface, if requested */
547 if (ifscope != IFSCOPE_NONE &&
0a7de745 548 (error = inp_bindif(inp, ifscope, NULL)) != 0) {
5ba3f43e
A
549 goto done;
550 }
39236c6e 551
5ba3f43e
A
552 /* if source address and/or port is specified, bind to it */
553 if (src != NULL) {
0a7de745 554 error = sobindlock(so, src, 0); /* already locked */
5ba3f43e
A
555 if (error != 0) {
556 goto done;
39236c6e
A
557 }
558 }
559
560 switch (af) {
561 case AF_INET:
813fb2f6 562 error = tcp_usr_connect(so, dst, p);
39236c6e 563 break;
39236c6e 564 case AF_INET6:
813fb2f6 565 error = tcp6_usr_connect(so, dst, p);
39236c6e 566 break;
39236c6e
A
567 default:
568 VERIFY(0);
569 /* NOTREACHED */
570 }
571
5ba3f43e
A
572 if (error != 0) {
573 goto done;
574 }
3e170ce0
A
575
576 /* if there is data, copy it */
577 if (auio != NULL) {
578 socket_unlock(so, 0);
579
580 VERIFY(bytes_written != NULL);
581
582 datalen = uio_resid(auio);
583 error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL,
0a7de745 584 (uio_t)auio, NULL, NULL, 0);
3e170ce0
A
585 socket_lock(so, 0);
586
0a7de745 587 if (error == 0 || error == EWOULDBLOCK) {
3e170ce0 588 *bytes_written = datalen - uio_resid(auio);
0a7de745 589 }
3e170ce0
A
590
591 /*
592 * sosend returns EWOULDBLOCK if it's a non-blocking
593 * socket or a timeout occured (this allows to return
594 * the amount of queued data through sendit()).
595 *
596 * However, connectx() returns EINPROGRESS in case of a
597 * blocking socket. So we change the return value here.
598 */
0a7de745 599 if (error == EWOULDBLOCK) {
3e170ce0 600 error = EINPROGRESS;
0a7de745 601 }
3e170ce0
A
602 }
603
0a7de745 604 if (error == 0 && pcid != NULL) {
3e170ce0 605 *pcid = 1; /* there is only one connection in regular TCP */
0a7de745 606 }
5ba3f43e 607done:
0a7de745 608 if (error && error != EINPROGRESS) {
a39ff7e2 609 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
0a7de745 610 }
a39ff7e2 611
5ba3f43e 612 inp->inp_flags2 &= ~INP2_CONNECT_IN_PROGRESS;
0a7de745 613 return error;
39236c6e
A
614}
615
616static int
813fb2f6
A
617tcp_usr_connectx(struct socket *so, struct sockaddr *src,
618 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
619 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
620 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
39236c6e 621{
0a7de745
A
622 return tcp_usr_connectx_common(so, AF_INET, src, dst, p, ifscope, aid,
623 pcid, flags, arg, arglen, uio, bytes_written);
39236c6e
A
624}
625
1c79356b
A
626static int
627tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
628{
1c79356b
A
629 int error = 0;
630 struct inpcb *inp = sotoinpcb(so);
631 struct tcpcb *tp;
632 struct sockaddr_in6 *sin6p;
633
39236c6e
A
634 TCPDEBUG0;
635 if (inp == NULL) {
636 return EINVAL;
637 } else if (inp->inp_state == INPCB_STATE_DEAD) {
638 if (so->so_error) {
639 error = so->so_error;
640 so->so_error = 0;
641 return error;
0a7de745 642 } else {
39236c6e 643 return EINVAL;
0a7de745 644 }
39236c6e 645 }
fe8ab488 646#if NECP
cb323159
A
647#if CONTENT_FILTER
648 error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
649 if (error != 0) {
650 return error;
651 }
652#endif /* CONTENT_FILTER */
39236c6e 653#if FLOW_DIVERT
cb323159 654 if (necp_socket_should_use_flow_divert(inp)) {
f427ee49
A
655 error = flow_divert_pcb_init(so);
656 if (error == 0) {
657 error = flow_divert_connect_out(so, nam, p);
39236c6e
A
658 }
659 return error;
660 }
661#endif /* FLOW_DIVERT */
fe8ab488
A
662#endif /* NECP */
663
39236c6e
A
664 tp = intotcpcb(inp);
665 TCPDEBUG1();
666
667 calculate_tcp_clock();
1c79356b 668
2d21ac55
A
669 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
670 error = EAFNOSUPPORT;
671 goto out;
672 }
673
1c79356b
A
674 /*
675 * Must disallow TCP ``connections'' to multicast addresses.
676 */
316670eb 677 sin6p = (struct sockaddr_in6 *)(void *)nam;
1c79356b
A
678 if (sin6p->sin6_family == AF_INET6
679 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
680 error = EAFNOSUPPORT;
681 goto out;
682 }
9bccf70c
A
683
684 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
1c79356b
A
685 struct sockaddr_in sin;
686
0a7de745 687 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
c3c9b80d
A
688 error = EINVAL;
689 goto out;
0a7de745 690 }
9bccf70c 691
1c79356b 692 in6_sin6_2_sin(&sin, sin6p);
c3c9b80d
A
693 /*
694 * Must disallow TCP ``connections'' to multicast addresses.
695 */
696 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
697 error = EAFNOSUPPORT;
698 goto out;
699 }
1c79356b
A
700 inp->inp_vflag |= INP_IPV4;
701 inp->inp_vflag &= ~INP_IPV6;
0a7de745 702 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) {
cb323159 703 TCP_LOG_CONNECT(tp, true, error);
1c79356b 704 goto out;
0a7de745 705 }
3e170ce0
A
706
707 error = tcp_connect_complete(so);
1c79356b
A
708 goto out;
709 }
9bccf70c
A
710 inp->inp_vflag &= ~INP_IPV4;
711 inp->inp_vflag |= INP_IPV6;
0a7de745 712 if ((error = tcp6_connect(tp, nam, p)) != 0) {
cb323159 713 TCP_LOG_CONNECT(tp, true, error);
1c79356b 714 goto out;
0a7de745 715 }
3e170ce0
A
716
717 error = tcp_connect_complete(so);
cb323159
A
718
719 TCP_LOG_CONNECT(tp, true, error);
720
1c79356b
A
721 COMMON_END(PRU_CONNECT);
722}
39236c6e
A
723
724static int
813fb2f6
A
725tcp6_usr_connectx(struct socket *so, struct sockaddr*src,
726 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
727 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
728 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
39236c6e 729{
0a7de745
A
730 return tcp_usr_connectx_common(so, AF_INET6, src, dst, p, ifscope, aid,
731 pcid, flags, arg, arglen, uio, bytes_written);
39236c6e 732}
1c79356b
A
733
734/*
735 * Initiate disconnect from peer.
736 * If connection never passed embryonic stage, just drop;
737 * else if don't need to let data drain, then can just drop anyways,
738 * else have to begin TCP shutdown process: mark socket disconnecting,
739 * drain unread data, state switch to reflect user close, and
740 * send segment (e.g. FIN) to peer. Socket will be really disconnected
741 * when peer sends FIN and acks ours.
742 *
743 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
744 */
745static int
746tcp_usr_disconnect(struct socket *so)
747{
1c79356b
A
748 int error = 0;
749 struct inpcb *inp = sotoinpcb(so);
750 struct tcpcb *tp;
39236c6e 751
5ba3f43e 752 socket_lock_assert_owned(so);
1c79356b 753 COMMON_START();
0a7de745
A
754 /* In case we got disconnected from the peer */
755 if (tp == NULL) {
39236c6e 756 goto out;
0a7de745 757 }
1c79356b
A
758 tp = tcp_disconnect(tp);
759 COMMON_END(PRU_DISCONNECT);
760}
761
39236c6e
A
762/*
763 * User-protocol pru_disconnectx callback.
764 */
765static int
3e170ce0 766tcp_usr_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
39236c6e
A
767{
768#pragma unused(cid)
0a7de745
A
769 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
770 return EINVAL;
771 }
39236c6e 772
0a7de745 773 return tcp_usr_disconnect(so);
39236c6e
A
774}
775
1c79356b
A
776/*
777 * Accept a connection. Essentially all the work is
778 * done at higher levels; just return the address
779 * of the peer, storing through addr.
780 */
781static int
782tcp_usr_accept(struct socket *so, struct sockaddr **nam)
783{
1c79356b
A
784 int error = 0;
785 struct inpcb *inp = sotoinpcb(so);
9bccf70c
A
786 struct tcpcb *tp = NULL;
787 TCPDEBUG0;
1c79356b 788
39236c6e
A
789 in_getpeeraddr(so, nam);
790
9bccf70c
A
791 if (so->so_state & SS_ISDISCONNECTED) {
792 error = ECONNABORTED;
793 goto out;
794 }
0a7de745
A
795 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
796 return EINVAL;
797 }
fe8ab488 798#if NECP
0a7de745
A
799 else if (necp_socket_should_use_flow_divert(inp)) {
800 return EPROTOTYPE;
801 }
cb323159 802
fe8ab488 803#endif /* NECP */
39236c6e 804
9bccf70c
A
805 tp = intotcpcb(inp);
806 TCPDEBUG1();
6d2010ae 807
cb323159
A
808 TCP_LOG_ACCEPT(tp, 0);
809
6d2010ae
A
810 calculate_tcp_clock();
811
1c79356b
A
812 COMMON_END(PRU_ACCEPT);
813}
814
1c79356b
A
815static int
816tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
817{
1c79356b
A
818 int error = 0;
819 struct inpcb *inp = sotoinpcb(so);
9bccf70c
A
820 struct tcpcb *tp = NULL;
821 TCPDEBUG0;
1c79356b 822
9bccf70c
A
823 if (so->so_state & SS_ISDISCONNECTED) {
824 error = ECONNABORTED;
825 goto out;
826 }
0a7de745
A
827 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
828 return EINVAL;
829 }
fe8ab488 830#if NECP
0a7de745
A
831 else if (necp_socket_should_use_flow_divert(inp)) {
832 return EPROTOTYPE;
833 }
cb323159 834
fe8ab488 835#endif /* NECP */
39236c6e 836
9bccf70c
A
837 tp = intotcpcb(inp);
838 TCPDEBUG1();
6d2010ae 839
cb323159
A
840 TCP_LOG_ACCEPT(tp, 0);
841
6d2010ae
A
842 calculate_tcp_clock();
843
1c79356b
A
844 in6_mapped_peeraddr(so, nam);
845 COMMON_END(PRU_ACCEPT);
846}
2d21ac55 847
1c79356b
A
848/*
849 * Mark the connection as being incapable of further output.
2d21ac55
A
850 *
851 * Returns: 0 Success
852 * EINVAL [COMMON_START]
853 * tcp_output:EADDRNOTAVAIL
854 * tcp_output:ENOBUFS
855 * tcp_output:EMSGSIZE
856 * tcp_output:EHOSTUNREACH
857 * tcp_output:ENETUNREACH
858 * tcp_output:ENETDOWN
859 * tcp_output:ENOMEM
860 * tcp_output:EACCES
861 * tcp_output:EMSGSIZE
862 * tcp_output:ENOBUFS
863 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
1c79356b
A
864 */
865static int
866tcp_usr_shutdown(struct socket *so)
867{
1c79356b
A
868 int error = 0;
869 struct inpcb *inp = sotoinpcb(so);
870 struct tcpcb *tp;
871
39236c6e 872 TCPDEBUG0;
0a7de745
A
873 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
874 return EINVAL;
875 }
39236c6e 876
1c79356b 877 socantsendmore(so);
39236c6e 878
0a7de745 879 /*
39236c6e
A
880 * In case we got disconnected from the peer, or if this is
881 * a socket that is to be flow-diverted (but not yet).
882 */
883 tp = intotcpcb(inp);
884 TCPDEBUG1();
fe8ab488
A
885
886 if (tp == NULL
887#if NECP
0a7de745 888 || (necp_socket_should_use_flow_divert(inp))
fe8ab488 889#endif /* NECP */
0a7de745
A
890 ) {
891 if (tp != NULL) {
39236c6e 892 error = EPROTOTYPE;
0a7de745 893 }
39236c6e
A
894 goto out;
895 }
896
897 calculate_tcp_clock();
898
1c79356b 899 tp = tcp_usrclosed(tp);
39236c6e
A
900#if MPTCP
901 /* A reset has been sent but socket exists, do not send FIN */
902 if ((so->so_flags & SOF_MP_SUBFLOW) &&
903 (tp) && (tp->t_mpflags & TMPF_RESET)) {
904 goto out;
905 }
906#endif
fe8ab488
A
907#if CONTENT_FILTER
908 /* Don't send a FIN yet */
909 if (tp && !(so->so_state & SS_ISDISCONNECTED) &&
0a7de745 910 cfil_sock_data_pending(&so->so_snd)) {
fe8ab488 911 goto out;
0a7de745 912 }
fe8ab488 913#endif /* CONTENT_FILTER */
0a7de745 914 if (tp) {
1c79356b 915 error = tcp_output(tp);
0a7de745 916 }
1c79356b
A
917 COMMON_END(PRU_SHUTDOWN);
918}
919
920/*
921 * After a receive, possibly send window update to peer.
922 */
923static int
f427ee49 924tcp_usr_rcvd(struct socket *so, int flags)
1c79356b 925{
1c79356b
A
926 int error = 0;
927 struct inpcb *inp = sotoinpcb(so);
928 struct tcpcb *tp;
929
930 COMMON_START();
0a7de745
A
931 /* In case we got disconnected from the peer */
932 if (tp == NULL) {
39236c6e 933 goto out;
0a7de745 934 }
316670eb
A
935 tcp_sbrcv_trim(tp, &so->so_rcv);
936
f427ee49
A
937 if (flags & MSG_WAITALL) {
938 tp->t_flags |= TF_ACKNOW;
939 }
940
3e170ce0
A
941 /*
942 * This tcp_output is solely there to trigger window-updates.
943 * However, we really do not want these window-updates while we
944 * are still in SYN_SENT or SYN_RECEIVED.
945 */
0a7de745 946 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
3e170ce0 947 tcp_output(tp);
0a7de745 948 }
fe8ab488
A
949
950#if CONTENT_FILTER
951 cfil_sock_buf_update(&so->so_rcv);
952#endif /* CONTENT_FILTER */
953
1c79356b
A
954 COMMON_END(PRU_RCVD);
955}
956
957/*
958 * Do a send by putting data in output queue and updating urgent
9bccf70c
A
959 * marker if URG set. Possibly send more data. Unlike the other
960 * pru_*() routines, the mbuf chains are our responsibility. We
961 * must either enqueue them or free them. The other pru_* routines
962 * generally are caller-frees.
2d21ac55
A
963 *
964 * Returns: 0 Success
965 * ECONNRESET
966 * EINVAL
967 * ENOBUFS
968 * tcp_connect:EADDRINUSE Address in use
969 * tcp_connect:EADDRNOTAVAIL Address not available.
970 * tcp_connect:EINVAL Invalid argument
971 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef]
972 * tcp_connect:EACCES Permission denied
973 * tcp_connect:EAGAIN Resource unavailable, try again
974 * tcp_connect:EPERM Operation not permitted
975 * tcp_output:EADDRNOTAVAIL
976 * tcp_output:ENOBUFS
977 * tcp_output:EMSGSIZE
978 * tcp_output:EHOSTUNREACH
979 * tcp_output:ENETUNREACH
980 * tcp_output:ENETDOWN
981 * tcp_output:ENOMEM
982 * tcp_output:EACCES
983 * tcp_output:EMSGSIZE
984 * tcp_output:ENOBUFS
985 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
986 * tcp6_connect:??? [IPV6 only]
1c79356b
A
987 */
988static int
39236c6e 989tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
0a7de745 990 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1c79356b 991{
1c79356b
A
992 int error = 0;
993 struct inpcb *inp = sotoinpcb(so);
994 struct tcpcb *tp;
cb323159
A
995 uint32_t mpkl_len = 0; /* length of mbuf chain */
996 uint32_t mpkl_seq; /* sequence number where new data is added */
997 struct so_mpkl_send_info mpkl_send_info = {};
998
1c79356b 999 int isipv6;
9bccf70c 1000 TCPDEBUG0;
1c79356b 1001
fe8ab488
A
1002 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD
1003#if NECP
0a7de745 1004 || (necp_socket_should_use_flow_divert(inp))
fe8ab488 1005#endif /* NECP */
0a7de745 1006 ) {
9bccf70c
A
1007 /*
1008 * OOPS! we lost a race, the TCP session got reset after
1009 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
1010 * network interrupt in the non-splnet() section of sosend().
1011 */
0a7de745 1012 if (m != NULL) {
1c79356b 1013 m_freem(m);
0a7de745 1014 }
39236c6e 1015 if (control != NULL) {
9bccf70c 1016 m_freem(control);
39236c6e
A
1017 control = NULL;
1018 }
fe8ab488 1019
0a7de745
A
1020 if (inp == NULL) {
1021 error = ECONNRESET; /* XXX EPIPE? */
1022 } else {
fe8ab488 1023 error = EPROTOTYPE;
0a7de745 1024 }
9bccf70c
A
1025 tp = NULL;
1026 TCPDEBUG1();
1c79356b
A
1027 goto out;
1028 }
1c79356b 1029 isipv6 = nam && nam->sa_family == AF_INET6;
9bccf70c
A
1030 tp = intotcpcb(inp);
1031 TCPDEBUG1();
6d2010ae
A
1032
1033 calculate_tcp_clock();
1034
cb323159
A
1035 if (net_mpklog_enabled) {
1036 mpkl_seq = tp->snd_una + so->so_snd.sb_cc;
1037 if (m) {
1038 mpkl_len = m_length(m);
1039 }
1040 if (so->so_flags1 & SOF1_MPKL_SEND_INFO) {
1041 uuid_copy(mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
1042 mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
1043 }
1044 }
1045
39236c6e 1046 if (control != NULL) {
cb323159
A
1047 if (control->m_len > 0 && net_mpklog_enabled) {
1048 error = tcp_get_mpkl_send_info(control, &mpkl_send_info);
5ba3f43e 1049 /*
cb323159
A
1050 * Intepretation of the returned code:
1051 * 0: client wants us to use value passed in SCM_MPKL_SEND_INFO
1052 * 1: SCM_MPKL_SEND_INFO was not present
1053 * other: failure
39236c6e 1054 */
cb323159
A
1055 if (error != 0 && error != ENOMSG) {
1056 m_freem(control);
1057 if (m != NULL) {
1058 m_freem(m);
1059 }
1060 control = NULL;
1061 m = NULL;
1062 goto out;
0a7de745 1063 }
9bccf70c 1064 }
cb323159
A
1065 /*
1066 * Silently drop unsupported ancillary data messages
1067 */
1068 m_freem(control);
1069 control = NULL;
9bccf70c 1070 }
39236c6e 1071
39236c6e
A
1072 /* MPTCP sublow socket buffers must not be compressed */
1073 VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) ||
1074 (so->so_snd.sb_flags & SB_NOCOMPRESS));
1075
0a7de745 1076 if (!(flags & PRUS_OOB) || (so->so_flags1 & SOF1_PRECONNECT_DATA)) {
f427ee49 1077 sbappendstream(&so->so_snd, m);
39236c6e 1078
1c79356b
A
1079 if (nam && tp->t_state < TCPS_SYN_SENT) {
1080 /*
1081 * Do implied connect if not yet connected,
1082 * initialize window to default value, and
1083 * initialize maxseg/maxopd using peer's cached
1084 * MSS.
1085 */
0a7de745 1086 if (isipv6) {
1c79356b 1087 error = tcp6_connect(tp, nam, p);
f427ee49
A
1088 } else {
1089 error = tcp_connect(tp, nam, p);
1090 }
0a7de745 1091 if (error) {
cb323159 1092 TCP_LOG_CONNECT(tp, true, error);
1c79356b 1093 goto out;
0a7de745 1094 }
1c79356b 1095 tp->snd_wnd = TTCP_CLIENT_SND_WND;
d9a64523 1096 tp->max_sndwnd = tp->snd_wnd;
c910b4d9 1097 tcp_mss(tp, -1, IFSCOPE_NONE);
cb323159
A
1098
1099 TCP_LOG_CONNECT(tp, true, error);
1100
1101 /* The sequence number of the data is past the SYN */
1102 mpkl_seq = tp->iss + 1;
1c79356b
A
1103 }
1104
1105 if (flags & PRUS_EOF) {
1106 /*
1107 * Close the send side of the connection after
1108 * the data is sent.
1109 */
1110 socantsendmore(so);
1111 tp = tcp_usrclosed(tp);
1112 }
1113 if (tp != NULL) {
0a7de745 1114 if (flags & PRUS_MORETOCOME) {
1c79356b 1115 tp->t_flags |= TF_MORETOCOME;
0a7de745 1116 }
1c79356b 1117 error = tcp_output(tp);
0a7de745 1118 if (flags & PRUS_MORETOCOME) {
1c79356b 1119 tp->t_flags &= ~TF_MORETOCOME;
0a7de745 1120 }
1c79356b
A
1121 }
1122 } else {
5ba3f43e
A
1123 if (sbspace(&so->so_snd) == 0) {
1124 /* if no space is left in sockbuf,
b0d623f7 1125 * do not try to squeeze in OOB traffic */
1c79356b
A
1126 m_freem(m);
1127 error = ENOBUFS;
1128 goto out;
1129 }
1130 /*
1131 * According to RFC961 (Assigned Protocols),
1132 * the urgent pointer points to the last octet
1133 * of urgent data. We continue, however,
1134 * to consider it to indicate the first octet
1135 * of data past the urgent section.
1136 * Otherwise, snd_up should be one lower.
1137 */
2d21ac55 1138 sbappendstream(&so->so_snd, m);
1c79356b
A
1139 if (nam && tp->t_state < TCPS_SYN_SENT) {
1140 /*
1141 * Do implied connect if not yet connected,
1142 * initialize window to default value, and
1143 * initialize maxseg/maxopd using peer's cached
1144 * MSS.
1145 */
0a7de745 1146 if (isipv6) {
1c79356b 1147 error = tcp6_connect(tp, nam, p);
f427ee49
A
1148 } else {
1149 error = tcp_connect(tp, nam, p);
1150 }
0a7de745 1151 if (error) {
cb323159 1152 TCP_LOG_CONNECT(tp, true, error);
1c79356b 1153 goto out;
0a7de745 1154 }
1c79356b 1155 tp->snd_wnd = TTCP_CLIENT_SND_WND;
d9a64523 1156 tp->max_sndwnd = tp->snd_wnd;
c910b4d9 1157 tcp_mss(tp, -1, IFSCOPE_NONE);
cb323159
A
1158
1159 TCP_LOG_CONNECT(tp, true, error);
1c79356b
A
1160 }
1161 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
fe8ab488 1162 tp->t_flagsext |= TF_FORCE;
1c79356b 1163 error = tcp_output(tp);
fe8ab488 1164 tp->t_flagsext &= ~TF_FORCE;
1c79356b 1165 }
3e170ce0 1166
cb323159
A
1167 if (net_mpklog_enabled && (inp = tp->t_inpcb) != NULL &&
1168 ((inp->inp_last_outifp != NULL &&
1169 (inp->inp_last_outifp->if_xflags & IFXF_MPK_LOG)) ||
1170 (inp->inp_boundifp != NULL &&
1171 (inp->inp_boundifp->if_xflags & IFXF_MPK_LOG)))) {
1172 MPKL_TCP_SEND(tcp_mpkl_log_object,
1173 mpkl_send_info.mpkl_proto, mpkl_send_info.mpkl_uuid,
1174 ntohs(inp->inp_lport), ntohs(inp->inp_fport),
1175 mpkl_seq, mpkl_len,
1176 so->last_pid, so->so_log_seqn++);
1177 }
3e170ce0
A
1178
1179 /*
1180 * We wait for the socket to successfully connect before returning.
1181 * This allows us to signal a timeout to the application.
1182 */
1183 if (so->so_state & SS_ISCONNECTING) {
0a7de745 1184 if (so->so_state & SS_NBIO) {
3e170ce0 1185 error = EWOULDBLOCK;
0a7de745 1186 } else {
3e170ce0 1187 error = sbwait(&so->so_snd);
0a7de745 1188 }
3e170ce0
A
1189 }
1190
5ba3f43e 1191 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
0a7de745 1192 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1c79356b
A
1193}
1194
1195/*
1196 * Abort the TCP.
1197 */
1198static int
1199tcp_usr_abort(struct socket *so)
1200{
1c79356b
A
1201 int error = 0;
1202 struct inpcb *inp = sotoinpcb(so);
1203 struct tcpcb *tp;
1204
1205 COMMON_START();
0a7de745
A
1206 /* In case we got disconnected from the peer */
1207 if (tp == NULL) {
39236c6e 1208 goto out;
0a7de745 1209 }
1c79356b 1210 tp = tcp_drop(tp, ECONNABORTED);
d190cdc3 1211 VERIFY(so->so_usecount > 0);
91447636 1212 so->so_usecount--;
1c79356b
A
1213 COMMON_END(PRU_ABORT);
1214}
1215
1216/*
1217 * Receive out-of-band data.
2d21ac55
A
1218 *
1219 * Returns: 0 Success
1220 * EINVAL [COMMON_START]
1221 * EINVAL
1222 * EWOULDBLOCK
1c79356b
A
1223 */
1224static int
1225tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1226{
1c79356b
A
1227 int error = 0;
1228 struct inpcb *inp = sotoinpcb(so);
1229 struct tcpcb *tp;
1230
1231 COMMON_START();
1232 if ((so->so_oobmark == 0 &&
0a7de745 1233 (so->so_state & SS_RCVATMARK) == 0) ||
1c79356b
A
1234 so->so_options & SO_OOBINLINE ||
1235 tp->t_oobflags & TCPOOB_HADDATA) {
1236 error = EINVAL;
1237 goto out;
1238 }
1239 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1240 error = EWOULDBLOCK;
1241 goto out;
1242 }
1243 m->m_len = 1;
1244 *mtod(m, caddr_t) = tp->t_iobc;
39037602 1245 so->so_state &= ~SS_RCVATMARK;
0a7de745 1246 if ((flags & MSG_PEEK) == 0) {
1c79356b 1247 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
0a7de745 1248 }
1c79356b
A
1249 COMMON_END(PRU_RCVOOB);
1250}
1251
3e170ce0
A
1252static int
1253tcp_usr_preconnect(struct socket *so)
1254{
490019cf
A
1255 struct inpcb *inp = sotoinpcb(so);
1256 int error = 0;
1257
1258#if NECP
1259 if (necp_socket_should_use_flow_divert(inp)) {
1260 /* May happen, if in tcp_usr_connect we did not had a chance
1261 * to set the usrreqs (due to some error). So, let's get out
1262 * of here.
1263 */
1264 goto out;
1265 }
1266#endif /* NECP */
1267
1268 error = tcp_output(sototcpcb(so));
3e170ce0 1269
5ba3f43e 1270 soclearfastopen(so);
3e170ce0
A
1271
1272 COMMON_END(PRU_PRECONNECT);
1273}
1274
1c79356b
A
1275/* xxx - should be const */
1276struct pr_usrreqs tcp_usrreqs = {
0a7de745
A
1277 .pru_abort = tcp_usr_abort,
1278 .pru_accept = tcp_usr_accept,
1279 .pru_attach = tcp_usr_attach,
1280 .pru_bind = tcp_usr_bind,
1281 .pru_connect = tcp_usr_connect,
1282 .pru_connectx = tcp_usr_connectx,
1283 .pru_control = in_control,
1284 .pru_detach = tcp_usr_detach,
1285 .pru_disconnect = tcp_usr_disconnect,
1286 .pru_disconnectx = tcp_usr_disconnectx,
1287 .pru_listen = tcp_usr_listen,
1288 .pru_peeraddr = in_getpeeraddr,
1289 .pru_rcvd = tcp_usr_rcvd,
1290 .pru_rcvoob = tcp_usr_rcvoob,
1291 .pru_send = tcp_usr_send,
1292 .pru_shutdown = tcp_usr_shutdown,
1293 .pru_sockaddr = in_getsockaddr,
1294 .pru_sosend = sosend,
1295 .pru_soreceive = soreceive,
1296 .pru_preconnect = tcp_usr_preconnect,
1c79356b
A
1297};
1298
1c79356b 1299struct pr_usrreqs tcp6_usrreqs = {
0a7de745
A
1300 .pru_abort = tcp_usr_abort,
1301 .pru_accept = tcp6_usr_accept,
1302 .pru_attach = tcp_usr_attach,
1303 .pru_bind = tcp6_usr_bind,
1304 .pru_connect = tcp6_usr_connect,
1305 .pru_connectx = tcp6_usr_connectx,
1306 .pru_control = in6_control,
1307 .pru_detach = tcp_usr_detach,
1308 .pru_disconnect = tcp_usr_disconnect,
1309 .pru_disconnectx = tcp_usr_disconnectx,
1310 .pru_listen = tcp6_usr_listen,
1311 .pru_peeraddr = in6_mapped_peeraddr,
1312 .pru_rcvd = tcp_usr_rcvd,
1313 .pru_rcvoob = tcp_usr_rcvoob,
1314 .pru_send = tcp_usr_send,
1315 .pru_shutdown = tcp_usr_shutdown,
1316 .pru_sockaddr = in6_mapped_sockaddr,
1317 .pru_sosend = sosend,
1318 .pru_soreceive = soreceive,
1319 .pru_preconnect = tcp_usr_preconnect,
1c79356b 1320};
1c79356b
A
1321
1322/*
1323 * Common subroutine to open a TCP connection to remote host specified
1324 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
1325 * port number if needed. Call in_pcbladdr to do the routing and to choose
1326 * a local host address (interface). If there is an existing incarnation
1327 * of the same connection in TIME-WAIT state and if the remote host was
1328 * sending CC options and if the connection duration was < MSL, then
1329 * truncate the previous TIME-WAIT state and proceed.
1330 * Initialize connection parameters and enter SYN-SENT state.
2d21ac55
A
1331 *
1332 * Returns: 0 Success
1333 * EADDRINUSE
1334 * EINVAL
1335 * in_pcbbind:EADDRNOTAVAIL Address not available.
1336 * in_pcbbind:EINVAL Invalid argument
1337 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
1338 * in_pcbbind:EACCES Permission denied
1339 * in_pcbbind:EADDRINUSE Address in use
1340 * in_pcbbind:EAGAIN Resource unavailable, try again
1341 * in_pcbbind:EPERM Operation not permitted
1342 * in_pcbladdr:EINVAL Invalid argument
1343 * in_pcbladdr:EAFNOSUPPORT Address family not supported
1344 * in_pcbladdr:EADDRNOTAVAIL Address not available
1c79356b
A
1345 */
1346static int
39037602 1347tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
1c79356b
A
1348{
1349 struct inpcb *inp = tp->t_inpcb, *oinp;
1350 struct socket *so = inp->inp_socket;
1351 struct tcpcb *otp;
316670eb 1352 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
39236c6e 1353 struct in_addr laddr;
39236c6e 1354 int error = 0;
316670eb 1355 struct ifnet *outif = NULL;
1c79356b
A
1356
1357 if (inp->inp_lport == 0) {
39236c6e 1358 error = in_pcbbind(inp, NULL, p);
0a7de745 1359 if (error) {
39236c6e 1360 goto done;
0a7de745 1361 }
1c79356b
A
1362 }
1363
1364 /*
1365 * Cannot simply call in_pcbconnect, because there might be an
1366 * earlier incarnation of this same connection still in
1367 * TIME_WAIT state, creating an ADDRINUSE error.
1368 */
39037602 1369 error = in_pcbladdr(inp, nam, &laddr, IFSCOPE_NONE, &outif, 0);
0a7de745 1370 if (error) {
39236c6e 1371 goto done;
0a7de745 1372 }
91447636 1373
5ba3f43e 1374 socket_unlock(inp->inp_socket, 0);
1c79356b
A
1375 oinp = in_pcblookup_hash(inp->inp_pcbinfo,
1376 sin->sin_addr, sin->sin_port,
39236c6e 1377 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : laddr,
0a7de745 1378 inp->inp_lport, 0, NULL);
91447636 1379
5ba3f43e 1380 socket_lock(inp->inp_socket, 0);
1c79356b 1381 if (oinp) {
0a7de745 1382 if (oinp != inp) { /* 4143933: avoid deadlock if inp == oinp */
5ba3f43e 1383 socket_lock(oinp->inp_socket, 1);
0a7de745 1384 }
91447636 1385 if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) {
0a7de745 1386 if (oinp != inp) {
5ba3f43e 1387 socket_unlock(oinp->inp_socket, 1);
0a7de745 1388 }
91447636
A
1389 goto skip_oinp;
1390 }
1391
1c79356b 1392 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
39236c6e 1393 otp->t_state == TCPS_TIME_WAIT &&
6d2010ae 1394 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
39236c6e 1395 (otp->t_flags & TF_RCVD_CC)) {
1c79356b 1396 otp = tcp_close(otp);
39236c6e
A
1397 } else {
1398 printf("tcp_connect: inp=0x%llx err=EADDRINUSE\n",
1399 (uint64_t)VM_KERNEL_ADDRPERM(inp));
0a7de745 1400 if (oinp != inp) {
5ba3f43e 1401 socket_unlock(oinp->inp_socket, 1);
0a7de745 1402 }
39236c6e
A
1403 error = EADDRINUSE;
1404 goto done;
91447636 1405 }
0a7de745 1406 if (oinp != inp) {
5ba3f43e 1407 socket_unlock(oinp->inp_socket, 1);
0a7de745 1408 }
1c79356b 1409 }
91447636 1410skip_oinp:
39236c6e
A
1411 if ((inp->inp_laddr.s_addr == INADDR_ANY ? laddr.s_addr :
1412 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr &&
1413 inp->inp_lport == sin->sin_port) {
1414 error = EINVAL;
1415 goto done;
1416 }
1417 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
91447636
A
1418 /*lock inversion issue, mostly with udp multicast packets */
1419 socket_unlock(inp->inp_socket, 0);
39236c6e 1420 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
91447636
A
1421 socket_lock(inp->inp_socket, 0);
1422 }
6d2010ae 1423 if (inp->inp_laddr.s_addr == INADDR_ANY) {
39236c6e
A
1424 inp->inp_laddr = laddr;
1425 /* no reference needed */
316670eb 1426 inp->inp_last_outifp = outif;
5ba3f43e 1427
39236c6e 1428 inp->inp_flags |= INP_INADDR_ANY;
6d2010ae 1429 }
1c79356b
A
1430 inp->inp_faddr = sin->sin_addr;
1431 inp->inp_fport = sin->sin_port;
1432 in_pcbrehash(inp);
39236c6e 1433 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1c79356b 1434
0a7de745 1435 if (inp->inp_flowhash == 0) {
316670eb 1436 inp->inp_flowhash = inp_calc_flowhash(inp);
0a7de745 1437 }
1c79356b 1438
f427ee49 1439 tcp_set_max_rwinscale(tp, so);
2d21ac55 1440
1c79356b
A
1441 soisconnecting(so);
1442 tcpstat.tcps_connattempt++;
1443 tp->t_state = TCPS_SYN_SENT;
39236c6e 1444 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPINIT(tp));
9bccf70c 1445 tp->iss = tcp_new_isn(tp);
1c79356b 1446 tcp_sendseqinit(tp);
cb323159 1447 tp->t_connect_time = tcp_now;
0a7de745 1448 if (nstat_collect) {
6d2010ae 1449 nstat_route_connect_attempt(inp->inp_route.ro_rt);
0a7de745 1450 }
1c79356b 1451
f427ee49
A
1452 tcp_add_fsw_flow(tp, outif);
1453
39236c6e 1454done:
0a7de745 1455 if (outif != NULL) {
39236c6e 1456 ifnet_release(outif);
0a7de745 1457 }
39236c6e 1458
0a7de745 1459 return error;
1c79356b
A
1460}
1461
1c79356b 1462static int
39037602 1463tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
1c79356b
A
1464{
1465 struct inpcb *inp = tp->t_inpcb, *oinp;
1466 struct socket *so = inp->inp_socket;
1467 struct tcpcb *otp;
316670eb 1468 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
91447636 1469 struct in6_addr addr6;
316670eb
A
1470 int error = 0;
1471 struct ifnet *outif = NULL;
1c79356b
A
1472
1473 if (inp->inp_lport == 0) {
39236c6e 1474 error = in6_pcbbind(inp, NULL, p);
0a7de745 1475 if (error) {
316670eb 1476 goto done;
0a7de745 1477 }
1c79356b
A
1478 }
1479
1480 /*
1481 * Cannot simply call in_pcbconnect, because there might be an
1482 * earlier incarnation of this same connection still in
1483 * TIME_WAIT state, creating an ADDRINUSE error.
316670eb
A
1484 *
1485 * in6_pcbladdr() might return an ifp with its reference held
1486 * even in the error case, so make sure that it's released
1487 * whenever it's non-NULL.
1c79356b 1488 */
6d2010ae 1489 error = in6_pcbladdr(inp, nam, &addr6, &outif);
0a7de745 1490 if (error) {
316670eb 1491 goto done;
0a7de745 1492 }
5ba3f43e 1493 socket_unlock(inp->inp_socket, 0);
1c79356b 1494 oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
0a7de745
A
1495 &sin6->sin6_addr, sin6->sin6_port,
1496 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
1497 ? &addr6
1498 : &inp->in6p_laddr,
1499 inp->inp_lport, 0, NULL);
5ba3f43e 1500 socket_lock(inp->inp_socket, 0);
1c79356b
A
1501 if (oinp) {
1502 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
1503 otp->t_state == TCPS_TIME_WAIT &&
6d2010ae 1504 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
316670eb 1505 (otp->t_flags & TF_RCVD_CC)) {
1c79356b 1506 otp = tcp_close(otp);
316670eb
A
1507 } else {
1508 error = EADDRINUSE;
1509 goto done;
1510 }
1c79356b 1511 }
39236c6e 1512 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
91447636
A
1513 /*lock inversion issue, mostly with udp multicast packets */
1514 socket_unlock(inp->inp_socket, 0);
39236c6e 1515 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
91447636
A
1516 socket_lock(inp->inp_socket, 0);
1517 }
6d2010ae 1518 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
91447636 1519 inp->in6p_laddr = addr6;
0a7de745 1520 inp->in6p_last_outifp = outif; /* no reference needed */
39236c6e 1521 inp->in6p_flags |= INP_IN6ADDR_ANY;
6d2010ae 1522 }
1c79356b
A
1523 inp->in6p_faddr = sin6->sin6_addr;
1524 inp->inp_fport = sin6->sin6_port;
0a7de745 1525 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) {
39236c6e 1526 inp->inp_flow = sin6->sin6_flowinfo;
0a7de745 1527 }
1c79356b 1528 in_pcbrehash(inp);
39236c6e 1529 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1c79356b 1530
0a7de745 1531 if (inp->inp_flowhash == 0) {
316670eb 1532 inp->inp_flowhash = inp_calc_flowhash(inp);
0a7de745 1533 }
39236c6e
A
1534 /* update flowinfo - RFC 6437 */
1535 if (inp->inp_flow == 0 && inp->in6p_flags & IN6P_AUTOFLOWLABEL) {
1536 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
1537 inp->inp_flow |=
a991bd8d 1538 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
39236c6e 1539 }
316670eb 1540
f427ee49 1541 tcp_set_max_rwinscale(tp, so);
1c79356b
A
1542
1543 soisconnecting(so);
1544 tcpstat.tcps_connattempt++;
1545 tp->t_state = TCPS_SYN_SENT;
5ba3f43e 1546 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
0a7de745 1547 TCP_CONN_KEEPINIT(tp));
9bccf70c 1548 tp->iss = tcp_new_isn(tp);
1c79356b 1549 tcp_sendseqinit(tp);
cb323159 1550 tp->t_connect_time = tcp_now;
0a7de745 1551 if (nstat_collect) {
6d2010ae 1552 nstat_route_connect_attempt(inp->inp_route.ro_rt);
0a7de745 1553 }
1c79356b 1554
f427ee49
A
1555 tcp_add_fsw_flow(tp, outif);
1556
316670eb 1557done:
0a7de745 1558 if (outif != NULL) {
316670eb 1559 ifnet_release(outif);
0a7de745 1560 }
316670eb 1561
0a7de745 1562 return error;
1c79356b 1563}
1c79356b 1564
6d2010ae
A
1565/*
1566 * Export TCP internal state information via a struct tcp_info
1567 */
5ba3f43e 1568void
6d2010ae
A
1569tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1570{
316670eb 1571 struct inpcb *inp = tp->t_inpcb;
5ba3f43e 1572
6d2010ae
A
1573 bzero(ti, sizeof(*ti));
1574
f427ee49 1575 ti->tcpi_state = (uint8_t)tp->t_state;
4bd07ac2
A
1576 ti->tcpi_flowhash = inp->inp_flowhash;
1577
316670eb 1578 if (tp->t_state > TCPS_LISTEN) {
0a7de745 1579 if (TSTMP_SUPPORTED(tp)) {
6d2010ae 1580 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
0a7de745
A
1581 }
1582 if (SACK_ENABLED(tp)) {
6d2010ae 1583 ti->tcpi_options |= TCPI_OPT_SACK;
0a7de745 1584 }
3e170ce0 1585 if (TCP_WINDOW_SCALE_ENABLED(tp)) {
6d2010ae
A
1586 ti->tcpi_options |= TCPI_OPT_WSCALE;
1587 ti->tcpi_snd_wscale = tp->snd_scale;
1588 ti->tcpi_rcv_wscale = tp->rcv_scale;
1589 }
0a7de745 1590 if (TCP_ECN_ENABLED(tp)) {
4bd07ac2 1591 ti->tcpi_options |= TCPI_OPT_ECN;
0a7de745 1592 }
316670eb
A
1593
1594 /* Are we in retranmission episode */
0a7de745 1595 if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) {
316670eb 1596 ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY;
0a7de745 1597 }
316670eb 1598
0a7de745 1599 if (tp->t_flags & TF_STREAMING_ON) {
39037602 1600 ti->tcpi_flags |= TCPI_FLAG_STREAMING_ON;
0a7de745 1601 }
39037602 1602
316670eb 1603 ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0;
6d2010ae
A
1604 ti->tcpi_snd_mss = tp->t_maxseg;
1605 ti->tcpi_rcv_mss = tp->t_maxseg;
1606
316670eb
A
1607 ti->tcpi_rttcur = tp->t_rttcur;
1608 ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT;
1609 ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
39236c6e 1610 ti->tcpi_rttbest = tp->t_rttbest >> TCP_RTT_SHIFT;
316670eb 1611
6d2010ae
A
1612 ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1613 ti->tcpi_snd_cwnd = tp->snd_cwnd;
3e170ce0 1614 ti->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
5ba3f43e 1615
6d2010ae
A
1616 ti->tcpi_rcv_space = tp->rcv_wnd;
1617
1618 ti->tcpi_snd_wnd = tp->snd_wnd;
6d2010ae
A
1619 ti->tcpi_snd_nxt = tp->snd_nxt;
1620 ti->tcpi_rcv_nxt = tp->rcv_nxt;
316670eb
A
1621
1622 /* convert bytes/msec to bits/sec */
1623 if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
0a7de745
A
1624 tp->t_bwmeas != NULL) {
1625 ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000);
316670eb 1626 }
813fb2f6 1627
316670eb
A
1628 ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1629 tp->t_inpcb->inp_last_outifp->if_index;
1630
1631 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes);
39236c6e 1632 ti->tcpi_txpackets = inp->inp_stat->txpackets;
316670eb
A
1633 ti->tcpi_txbytes = inp->inp_stat->txbytes;
1634 ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
813fb2f6 1635 ti->tcpi_txretransmitpackets = tp->t_stat.rxmitpkts;
316670eb 1636 ti->tcpi_txunacked = tp->snd_max - tp->snd_una;
813fb2f6 1637
316670eb 1638 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes);
39236c6e 1639 ti->tcpi_rxpackets = inp->inp_stat->rxpackets;
316670eb
A
1640 ti->tcpi_rxbytes = inp->inp_stat->rxbytes;
1641 ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
39236c6e
A
1642 ti->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
1643
1644 if (tp->t_state > TCPS_LISTEN) {
f427ee49 1645 ti->tcpi_synrexmits = (uint8_t)tp->t_stat.rxmitsyns;
39236c6e
A
1646 }
1647 ti->tcpi_cell_rxpackets = inp->inp_cstat->rxpackets;
1648 ti->tcpi_cell_rxbytes = inp->inp_cstat->rxbytes;
1649 ti->tcpi_cell_txpackets = inp->inp_cstat->txpackets;
1650 ti->tcpi_cell_txbytes = inp->inp_cstat->txbytes;
1651
1652 ti->tcpi_wifi_rxpackets = inp->inp_wstat->rxpackets;
1653 ti->tcpi_wifi_rxbytes = inp->inp_wstat->rxbytes;
1654 ti->tcpi_wifi_txpackets = inp->inp_wstat->txpackets;
1655 ti->tcpi_wifi_txbytes = inp->inp_wstat->txbytes;
fe8ab488
A
1656
1657 ti->tcpi_wired_rxpackets = inp->inp_Wstat->rxpackets;
1658 ti->tcpi_wired_rxbytes = inp->inp_Wstat->rxbytes;
1659 ti->tcpi_wired_txpackets = inp->inp_Wstat->txpackets;
1660 ti->tcpi_wired_txbytes = inp->inp_Wstat->txbytes;
3e170ce0
A
1661 tcp_get_connectivity_status(tp, &ti->tcpi_connstatus);
1662
1663 ti->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
1664 ti->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
1665 ti->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
1666 ti->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
1667
1668 ti->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
1669 ti->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
1670 ti->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
1671 ti->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
1672 ti->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
39037602
A
1673 ti->tcpi_tfo_cookie_wrong = !!(tp->t_tfo_stats & TFO_S_COOKIE_WRONG);
1674 ti->tcpi_tfo_no_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_NO_COOKIE_RCV);
1675 ti->tcpi_tfo_heuristics_disable = !!(tp->t_tfo_stats & TFO_S_HEURISTICS_DISABLE);
1676 ti->tcpi_tfo_send_blackhole = !!(tp->t_tfo_stats & TFO_S_SEND_BLACKHOLE);
1677 ti->tcpi_tfo_recv_blackhole = !!(tp->t_tfo_stats & TFO_S_RECV_BLACKHOLE);
5ba3f43e 1678 ti->tcpi_tfo_onebyte_proxy = !!(tp->t_tfo_stats & TFO_S_ONE_BYTE_PROXY);
4bd07ac2
A
1679
1680 ti->tcpi_ecn_client_setup = !!(tp->ecn_flags & TE_SETUPSENT);
1681 ti->tcpi_ecn_server_setup = !!(tp->ecn_flags & TE_SETUPRECEIVED);
1682 ti->tcpi_ecn_success = (tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON ? 1 : 0;
1683 ti->tcpi_ecn_lost_syn = !!(tp->ecn_flags & TE_LOST_SYN);
1684 ti->tcpi_ecn_lost_synack = !!(tp->ecn_flags & TE_LOST_SYNACK);
1685
1686 ti->tcpi_local_peer = !!(tp->t_flags & TF_LOCAL);
1687
1688 if (tp->t_inpcb->inp_last_outifp != NULL) {
0a7de745 1689 if (IFNET_IS_CELLULAR(tp->t_inpcb->inp_last_outifp)) {
4bd07ac2 1690 ti->tcpi_if_cell = 1;
0a7de745
A
1691 }
1692 if (IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
4bd07ac2 1693 ti->tcpi_if_wifi = 1;
0a7de745
A
1694 }
1695 if (IFNET_IS_WIRED(tp->t_inpcb->inp_last_outifp)) {
5ba3f43e 1696 ti->tcpi_if_wired = 1;
0a7de745
A
1697 }
1698 if (IFNET_IS_WIFI_INFRA(tp->t_inpcb->inp_last_outifp)) {
5ba3f43e 1699 ti->tcpi_if_wifi_infra = 1;
0a7de745
A
1700 }
1701 if (tp->t_inpcb->inp_last_outifp->if_eflags & IFEF_AWDL) {
5ba3f43e 1702 ti->tcpi_if_wifi_awdl = 1;
0a7de745 1703 }
4bd07ac2 1704 }
0a7de745 1705 if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) {
5ba3f43e 1706 ti->tcpi_snd_background = 1;
0a7de745 1707 }
5ba3f43e 1708 if (tcp_recv_bg == 1 ||
0a7de745 1709 IS_TCP_RECV_BG(tp->t_inpcb->inp_socket)) {
5ba3f43e 1710 ti->tcpi_rcv_background = 1;
0a7de745 1711 }
4bd07ac2
A
1712
1713 ti->tcpi_ecn_recv_ce = tp->t_ecn_recv_ce;
1714 ti->tcpi_ecn_recv_cwr = tp->t_ecn_recv_cwr;
1715
1716 ti->tcpi_rcvoopack = tp->t_rcvoopack;
1717 ti->tcpi_pawsdrop = tp->t_pawsdrop;
1718 ti->tcpi_sack_recovery_episode = tp->t_sack_recovery_episode;
1719 ti->tcpi_reordered_pkts = tp->t_reordered_pkts;
1720 ti->tcpi_dsack_sent = tp->t_dsack_sent;
1721 ti->tcpi_dsack_recvd = tp->t_dsack_recvd;
6d2010ae
A
1722 }
1723}
1724
1725__private_extern__ errno_t
1726tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti)
1727{
1728 struct inpcbinfo *pcbinfo = NULL;
1729 struct inpcb *inp = NULL;
1730 struct socket *so;
1731 struct tcpcb *tp;
5ba3f43e 1732
0a7de745 1733 if (itpl->itpl_proto == IPPROTO_TCP) {
6d2010ae 1734 pcbinfo = &tcbinfo;
0a7de745 1735 } else {
6d2010ae 1736 return EINVAL;
0a7de745 1737 }
5ba3f43e 1738
6d2010ae 1739 if (itpl->itpl_local_sa.sa_family == AF_INET &&
0a7de745 1740 itpl->itpl_remote_sa.sa_family == AF_INET) {
5ba3f43e 1741 inp = in_pcblookup_hash(pcbinfo,
0a7de745
A
1742 itpl->itpl_remote_sin.sin_addr,
1743 itpl->itpl_remote_sin.sin_port,
1744 itpl->itpl_local_sin.sin_addr,
1745 itpl->itpl_local_sin.sin_port,
1746 0, NULL);
6d2010ae 1747 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 &&
0a7de745 1748 itpl->itpl_remote_sa.sa_family == AF_INET6) {
6d2010ae
A
1749 struct in6_addr ina6_local;
1750 struct in6_addr ina6_remote;
5ba3f43e 1751
6d2010ae 1752 ina6_local = itpl->itpl_local_sin6.sin6_addr;
5ba3f43e 1753 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) &&
0a7de745 1754 itpl->itpl_local_sin6.sin6_scope_id) {
f427ee49 1755 ina6_local.s6_addr16[1] = htons((uint16_t)itpl->itpl_local_sin6.sin6_scope_id);
0a7de745 1756 }
6d2010ae
A
1757
1758 ina6_remote = itpl->itpl_remote_sin6.sin6_addr;
5ba3f43e 1759 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) &&
0a7de745 1760 itpl->itpl_remote_sin6.sin6_scope_id) {
f427ee49 1761 ina6_remote.s6_addr16[1] = htons((uint16_t)itpl->itpl_remote_sin6.sin6_scope_id);
0a7de745 1762 }
5ba3f43e
A
1763
1764 inp = in6_pcblookup_hash(pcbinfo,
0a7de745
A
1765 &ina6_remote,
1766 itpl->itpl_remote_sin6.sin6_port,
1767 &ina6_local,
1768 itpl->itpl_local_sin6.sin6_port,
1769 0, NULL);
39236c6e 1770 } else {
6d2010ae 1771 return EINVAL;
39236c6e 1772 }
0a7de745 1773 if (inp == NULL || (so = inp->inp_socket) == NULL) {
6d2010ae 1774 return ENOENT;
0a7de745 1775 }
6d2010ae
A
1776
1777 socket_lock(so, 0);
1778 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1779 socket_unlock(so, 0);
1780 return ENOENT;
1781 }
1782 tp = intotcpcb(inp);
1783
1784 tcp_fill_info(tp, ti);
1785 socket_unlock(so, 0);
1786
1787 return 0;
1788}
1789
3e170ce0
A
1790static void
1791tcp_connection_fill_info(struct tcpcb *tp, struct tcp_connection_info *tci)
1792{
1793 struct inpcb *inp = tp->t_inpcb;
1794
1795 bzero(tci, sizeof(*tci));
f427ee49 1796 tci->tcpi_state = (uint8_t)tp->t_state;
3e170ce0 1797 if (tp->t_state > TCPS_LISTEN) {
0a7de745 1798 if (TSTMP_SUPPORTED(tp)) {
3e170ce0 1799 tci->tcpi_options |= TCPCI_OPT_TIMESTAMPS;
0a7de745
A
1800 }
1801 if (SACK_ENABLED(tp)) {
3e170ce0 1802 tci->tcpi_options |= TCPCI_OPT_SACK;
0a7de745 1803 }
3e170ce0
A
1804 if (TCP_WINDOW_SCALE_ENABLED(tp)) {
1805 tci->tcpi_options |= TCPCI_OPT_WSCALE;
1806 tci->tcpi_snd_wscale = tp->snd_scale;
1807 tci->tcpi_rcv_wscale = tp->rcv_scale;
1808 }
0a7de745 1809 if (TCP_ECN_ENABLED(tp)) {
3e170ce0 1810 tci->tcpi_options |= TCPCI_OPT_ECN;
0a7de745
A
1811 }
1812 if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) {
3e170ce0 1813 tci->tcpi_flags |= TCPCI_FLAG_LOSSRECOVERY;
0a7de745
A
1814 }
1815 if (tp->t_flagsext & TF_PKTS_REORDERED) {
3e170ce0 1816 tci->tcpi_flags |= TCPCI_FLAG_REORDERING_DETECTED;
0a7de745 1817 }
3e170ce0 1818 tci->tcpi_rto = (tp->t_timer[TCPT_REXMT] > 0) ?
0a7de745 1819 tp->t_rxtcur : 0;
3e170ce0
A
1820 tci->tcpi_maxseg = tp->t_maxseg;
1821 tci->tcpi_snd_ssthresh = tp->snd_ssthresh;
1822 tci->tcpi_snd_cwnd = tp->snd_cwnd;
1823 tci->tcpi_snd_wnd = tp->snd_wnd;
1824 tci->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
1825 tci->tcpi_rcv_wnd = tp->rcv_wnd;
1826 tci->tcpi_rttcur = tp->t_rttcur;
1827 tci->tcpi_srtt = (tp->t_srtt >> TCP_RTT_SHIFT);
1828 tci->tcpi_rttvar = (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1829 tci->tcpi_txpackets = inp->inp_stat->txpackets;
1830 tci->tcpi_txbytes = inp->inp_stat->txbytes;
1831 tci->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
813fb2f6 1832 tci->tcpi_txretransmitpackets = tp->t_stat.rxmitpkts;
3e170ce0
A
1833 tci->tcpi_rxpackets = inp->inp_stat->rxpackets;
1834 tci->tcpi_rxbytes = inp->inp_stat->rxbytes;
1835 tci->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
1836
1837 tci->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
1838 tci->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
1839 tci->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
1840 tci->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
1841 tci->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
1842 tci->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
1843 tci->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
1844 tci->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
1845 tci->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
39037602
A
1846 tci->tcpi_tfo_cookie_wrong = !!(tp->t_tfo_stats & TFO_S_COOKIE_WRONG);
1847 tci->tcpi_tfo_no_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_NO_COOKIE_RCV);
1848 tci->tcpi_tfo_heuristics_disable = !!(tp->t_tfo_stats & TFO_S_HEURISTICS_DISABLE);
1849 tci->tcpi_tfo_send_blackhole = !!(tp->t_tfo_stats & TFO_S_SEND_BLACKHOLE);
1850 tci->tcpi_tfo_recv_blackhole = !!(tp->t_tfo_stats & TFO_S_RECV_BLACKHOLE);
5ba3f43e 1851 tci->tcpi_tfo_onebyte_proxy = !!(tp->t_tfo_stats & TFO_S_ONE_BYTE_PROXY);
3e170ce0
A
1852 }
1853}
1854
6d2010ae 1855
5ba3f43e 1856__private_extern__ int
6d2010ae
A
1857tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
1858{
1859 int error;
527f9951 1860 struct tcp_info ti = {};
6d2010ae 1861 struct info_tuple itpl;
39236c6e 1862
6d2010ae
A
1863 if (req->newptr == USER_ADDR_NULL) {
1864 return EINVAL;
1865 }
1866 if (req->newlen < sizeof(struct info_tuple)) {
1867 return EINVAL;
1868 }
1869 error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple));
1870 if (error != 0) {
1871 return error;
1872 }
1873 error = tcp_fill_info_for_info_tuple(&itpl, &ti);
1874 if (error != 0) {
1875 return error;
1876 }
1877 error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info));
1878 if (error != 0) {
1879 return error;
1880 }
5ba3f43e 1881
6d2010ae
A
1882 return 0;
1883}
1884
316670eb
A
1885static int
1886tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid)
1887{
1888 int error = EHOSTUNREACH;
1889 *out_pid = -1;
0a7de745
A
1890 if ((so->so_state & SS_ISCONNECTED) == 0) {
1891 return ENOTCONN;
1892 }
5ba3f43e 1893
0a7de745
A
1894 struct inpcb *inp = (struct inpcb*)so->so_pcb;
1895 uint16_t lport = inp->inp_lport;
1896 uint16_t fport = inp->inp_fport;
1897 struct inpcb *finp = NULL;
a39ff7e2
A
1898 struct in6_addr laddr6, faddr6;
1899 struct in_addr laddr4, faddr4;
5ba3f43e 1900
316670eb 1901 if (inp->inp_vflag & INP_IPV6) {
a39ff7e2
A
1902 laddr6 = inp->in6p_laddr;
1903 faddr6 = inp->in6p_faddr;
1904 } else if (inp->inp_vflag & INP_IPV4) {
1905 laddr4 = inp->inp_laddr;
1906 faddr4 = inp->inp_faddr;
1907 }
1908
1909 socket_unlock(so, 0);
1910 if (inp->inp_vflag & INP_IPV6) {
316670eb 1911 finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL);
316670eb 1912 } else if (inp->inp_vflag & INP_IPV4) {
316670eb 1913 finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL);
316670eb 1914 }
5ba3f43e 1915
316670eb
A
1916 if (finp) {
1917 *out_pid = finp->inp_socket->last_pid;
1918 error = 0;
a39ff7e2 1919 in_pcb_checkstate(finp, WNT_RELEASE, 0);
316670eb 1920 }
a39ff7e2 1921 socket_lock(so, 0);
5ba3f43e 1922
316670eb
A
1923 return error;
1924}
1925
39236c6e
A
1926void
1927tcp_getconninfo(struct socket *so, struct conninfo_tcp *tcp_ci)
1928{
1929 (void) tcp_lookup_peer_pid_locked(so, &tcp_ci->tcpci_peer_pid);
1930 tcp_fill_info(sototcpcb(so), &tcp_ci->tcpci_tcp_info);
1931}
1932
cb323159
A
1933void
1934tcp_clear_keep_alive_offload(struct socket *so)
1935{
1936 struct inpcb *inp;
1937 struct ifnet *ifp;
1938
1939 inp = sotoinpcb(so);
1940 if (inp == NULL) {
1941 return;
1942 }
1943
1944 if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) == 0) {
1945 return;
1946 }
1947
1948 ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
1949 inp->inp_last_outifp;
1950 if (ifp == NULL) {
1951 panic("%s: so %p inp %p ifp NULL",
1952 __func__, so, inp);
1953 }
1954
1955 ifnet_lock_exclusive(ifp);
1956
1957 if (ifp->if_tcp_kao_cnt == 0) {
1958 panic("%s: so %p inp %p ifp %p if_tcp_kao_cnt == 0",
1959 __func__, so, inp, ifp);
1960 }
1961 ifp->if_tcp_kao_cnt--;
1962 inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD;
1963
1964 ifnet_lock_done(ifp);
1965}
1966
1967static int
1968tcp_set_keep_alive_offload(struct socket *so, struct proc *proc)
1969{
1970 int error = 0;
1971 struct inpcb *inp;
1972 struct ifnet *ifp;
1973
1974 inp = sotoinpcb(so);
1975 if (inp == NULL) {
1976 return ECONNRESET;
1977 }
1978 if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) != 0) {
1979 return 0;
1980 }
1981
1982 ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
1983 inp->inp_last_outifp;
1984 if (ifp == NULL) {
1985 error = ENXIO;
1986 os_log_info(OS_LOG_DEFAULT,
1987 "%s: error %d for proc %s[%u] out ifp is not set\n",
1988 __func__, error,
1989 proc != NULL ? proc->p_comm : "kernel",
1990 proc != NULL ? proc->p_pid : 0);
1991 return ENXIO;
1992 }
1993
1994 error = if_get_tcp_kao_max(ifp);
1995 if (error != 0) {
1996 return error;
1997 }
1998
1999 ifnet_lock_exclusive(ifp);
2000 if (ifp->if_tcp_kao_cnt < ifp->if_tcp_kao_max) {
2001 ifp->if_tcp_kao_cnt++;
2002 inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD;
2003 } else {
2004 error = ETOOMANYREFS;
2005 os_log_info(OS_LOG_DEFAULT,
2006 "%s: error %d for proc %s[%u] if_tcp_kao_max %u\n",
2007 __func__, error,
2008 proc != NULL ? proc->p_comm : "kernel",
2009 proc != NULL ? proc->p_pid : 0,
2010 ifp->if_tcp_kao_max);
2011 }
2012 ifnet_lock_done(ifp);
2013
2014 return error;
2015}
2016
1c79356b
A
2017/*
2018 * The new sockopt interface makes it possible for us to block in the
2019 * copyin/out step (if we take a page fault). Taking a page fault at
2020 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
2021 * use TSM, there probably isn't any need for this function to run at
2022 * splnet() any more. This needs more examination.)
2023 */
2024int
39037602 2025tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1c79356b 2026{
0a7de745
A
2027 int error = 0, opt = 0, optval = 0;
2028 struct inpcb *inp;
2029 struct tcpcb *tp;
1c79356b 2030
1c79356b
A
2031 inp = sotoinpcb(so);
2032 if (inp == NULL) {
0a7de745 2033 return ECONNRESET;
1c79356b 2034 }
39236c6e 2035 /* Allow <SOL_SOCKET,SO_FLUSH/SO_TRAFFIC_MGT_BACKGROUND> at this level */
316670eb 2036 if (sopt->sopt_level != IPPROTO_TCP &&
39236c6e
A
2037 !(sopt->sopt_level == SOL_SOCKET && (sopt->sopt_name == SO_FLUSH ||
2038 sopt->sopt_name == SO_TRAFFIC_MGT_BACKGROUND))) {
0a7de745 2039 if (SOCK_CHECK_DOM(so, PF_INET6)) {
1c79356b 2040 error = ip6_ctloutput(so, sopt);
f427ee49
A
2041 } else {
2042 error = ip_ctloutput(so, sopt);
2043 }
0a7de745 2044 return error;
1c79356b
A
2045 }
2046 tp = intotcpcb(inp);
39236c6e 2047 if (tp == NULL) {
0a7de745 2048 return ECONNRESET;
39236c6e 2049 }
1c79356b 2050
6d2010ae
A
2051 calculate_tcp_clock();
2052
1c79356b
A
2053 switch (sopt->sopt_dir) {
2054 case SOPT_SET:
2055 switch (sopt->sopt_name) {
2056 case TCP_NODELAY:
2057 case TCP_NOOPT:
2058 case TCP_NOPUSH:
2059 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2060 sizeof optval);
2061 if (error) {
1c79356b 2062 break;
0a7de745 2063 }
1c79356b
A
2064
2065 switch (sopt->sopt_name) {
2066 case TCP_NODELAY:
2067 opt = TF_NODELAY;
2068 break;
2069 case TCP_NOOPT:
2070 opt = TF_NOOPT;
2071 break;
2072 case TCP_NOPUSH:
2073 opt = TF_NOPUSH;
2074 break;
2075 default:
2076 opt = 0; /* dead code to fool gcc */
2077 break;
2078 }
2079
0a7de745 2080 if (optval) {
1c79356b 2081 tp->t_flags |= opt;
0a7de745 2082 } else {
1c79356b 2083 tp->t_flags &= ~opt;
0a7de745 2084 }
1c79356b 2085 break;
6d2010ae 2086 case TCP_RXT_FINDROP:
fe8ab488 2087 case TCP_NOTIMEWAIT:
6d2010ae 2088 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2089 sizeof optval);
2090 if (error) {
6d2010ae 2091 break;
0a7de745 2092 }
fe8ab488
A
2093 switch (sopt->sopt_name) {
2094 case TCP_RXT_FINDROP:
2095 opt = TF_RXTFINDROP;
2096 break;
2097 case TCP_NOTIMEWAIT:
2098 opt = TF_NOTIMEWAIT;
2099 break;
2100 default:
2101 opt = 0;
2102 break;
2103 }
0a7de745 2104 if (optval) {
6d2010ae 2105 tp->t_flagsext |= opt;
0a7de745 2106 } else {
6d2010ae 2107 tp->t_flagsext &= ~opt;
0a7de745 2108 }
6d2010ae 2109 break;
316670eb
A
2110 case TCP_MEASURE_SND_BW:
2111 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2112 sizeof optval);
2113 if (error) {
316670eb 2114 break;
0a7de745 2115 }
316670eb
A
2116 opt = TF_MEASURESNDBW;
2117 if (optval) {
2118 if (tp->t_bwmeas == NULL) {
2119 tp->t_bwmeas = tcp_bwmeas_alloc(tp);
2120 if (tp->t_bwmeas == NULL) {
2121 error = ENOMEM;
2122 break;
2123 }
2124 }
2125 tp->t_flagsext |= opt;
2126 } else {
2127 tp->t_flagsext &= ~opt;
2128 /* Reset snd bw measurement state */
2129 tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS);
2130 if (tp->t_bwmeas != NULL) {
2131 tcp_bwmeas_free(tp);
2132 }
2133 }
2134 break;
2135 case TCP_MEASURE_BW_BURST: {
2136 struct tcp_measure_bw_burst in;
2137 uint32_t minpkts, maxpkts;
2138 bzero(&in, sizeof(in));
2139
2140 error = sooptcopyin(sopt, &in, sizeof(in),
0a7de745
A
2141 sizeof(in));
2142 if (error) {
316670eb 2143 break;
0a7de745 2144 }
316670eb 2145 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
0a7de745 2146 tp->t_bwmeas == NULL) {
316670eb
A
2147 error = EINVAL;
2148 break;
2149 }
5ba3f43e 2150 minpkts = (in.min_burst_size != 0) ? in.min_burst_size :
0a7de745 2151 tp->t_bwmeas->bw_minsizepkts;
316670eb 2152 maxpkts = (in.max_burst_size != 0) ? in.max_burst_size :
0a7de745 2153 tp->t_bwmeas->bw_maxsizepkts;
316670eb
A
2154 if (minpkts > maxpkts) {
2155 error = EINVAL;
2156 break;
2157 }
2158 tp->t_bwmeas->bw_minsizepkts = minpkts;
2159 tp->t_bwmeas->bw_maxsizepkts = maxpkts;
2160 tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg);
2161 tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg);
2162 break;
2163 }
1c79356b
A
2164 case TCP_MAXSEG:
2165 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2166 sizeof optval);
2167 if (error) {
1c79356b 2168 break;
0a7de745 2169 }
1c79356b 2170
e5568f75 2171 if (optval > 0 && optval <= tp->t_maxseg &&
0a7de745 2172 optval + 40 >= tcp_minmss) {
1c79356b 2173 tp->t_maxseg = optval;
0a7de745 2174 } else {
1c79356b 2175 error = EINVAL;
0a7de745 2176 }
1c79356b
A
2177 break;
2178
2d21ac55
A
2179 case TCP_KEEPALIVE:
2180 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2181 sizeof optval);
2182 if (error) {
2d21ac55 2183 break;
0a7de745
A
2184 }
2185 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
2d21ac55 2186 error = EINVAL;
39236c6e 2187 } else {
2d21ac55 2188 tp->t_keepidle = optval * TCP_RETRANSHZ;
39236c6e 2189 /* reset the timer to new value */
5ba3f43e 2190 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
0a7de745 2191 TCP_CONN_KEEPIDLE(tp));
6d2010ae 2192 tcp_check_timer_state(tp);
2d21ac55 2193 }
0a7de745 2194 break;
b0d623f7
A
2195
2196 case TCP_CONNECTIONTIMEOUT:
2197 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2198 sizeof optval);
2199 if (error) {
b0d623f7 2200 break;
0a7de745
A
2201 }
2202 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
b0d623f7 2203 error = EINVAL;
39236c6e 2204 } else {
b0d623f7 2205 tp->t_keepinit = optval * TCP_RETRANSHZ;
39236c6e 2206 if (tp->t_state == TCPS_SYN_RECEIVED ||
0a7de745 2207 tp->t_state == TCPS_SYN_SENT) {
39236c6e 2208 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
0a7de745 2209 TCP_CONN_KEEPINIT(tp));
39236c6e
A
2210 tcp_check_timer_state(tp);
2211 }
2212 }
2213 break;
2214
2215 case TCP_KEEPINTVL:
2216 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2217 sizeof(optval));
2218 if (error) {
39236c6e 2219 break;
0a7de745
A
2220 }
2221 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
39236c6e
A
2222 error = EINVAL;
2223 } else {
2224 tp->t_keepintvl = optval * TCP_RETRANSHZ;
2225 if (tp->t_state == TCPS_FIN_WAIT_2 &&
0a7de745 2226 TCP_CONN_MAXIDLE(tp) > 0) {
39236c6e 2227 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
0a7de745 2228 TCP_CONN_MAXIDLE(tp));
39236c6e
A
2229 tcp_check_timer_state(tp);
2230 }
2231 }
2232 break;
2233
2234 case TCP_KEEPCNT:
2235 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2236 sizeof(optval));
2237 if (error) {
39236c6e 2238 break;
0a7de745 2239 }
39236c6e
A
2240 if (optval < 0 || optval > INT32_MAX) {
2241 error = EINVAL;
2242 } else {
2243 tp->t_keepcnt = optval;
2244 if (tp->t_state == TCPS_FIN_WAIT_2 &&
0a7de745 2245 TCP_CONN_MAXIDLE(tp) > 0) {
39236c6e 2246 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
0a7de745 2247 TCP_CONN_MAXIDLE(tp));
39236c6e
A
2248 tcp_check_timer_state(tp);
2249 }
2250 }
b0d623f7 2251 break;
316670eb 2252
39037602 2253 case TCP_KEEPALIVE_OFFLOAD:
cb323159
A
2254 if ((error = priv_check_cred(kauth_cred_get(),
2255 PRIV_NETINET_TCP_KA_OFFLOAD, 0)) != 0) {
2256 break;
2257 }
39037602 2258 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2259 sizeof(optval));
2260 if (error) {
39037602 2261 break;
0a7de745 2262 }
39037602
A
2263 if (optval < 0 || optval > INT32_MAX) {
2264 error = EINVAL;
2265 break;
5ba3f43e 2266 }
0a7de745 2267 if (optval != 0) {
cb323159
A
2268 error = tcp_set_keep_alive_offload(so,
2269 sopt->sopt_p);
0a7de745 2270 } else {
cb323159 2271 tcp_clear_keep_alive_offload(so);
0a7de745 2272 }
39037602
A
2273 break;
2274
6d2010ae
A
2275 case PERSIST_TIMEOUT:
2276 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2277 sizeof optval);
2278 if (error) {
6d2010ae 2279 break;
0a7de745
A
2280 }
2281 if (optval < 0) {
6d2010ae 2282 error = EINVAL;
0a7de745 2283 } else {
6d2010ae 2284 tp->t_persist_timeout = optval * TCP_RETRANSHZ;
0a7de745 2285 }
6d2010ae
A
2286 break;
2287 case TCP_RXT_CONNDROPTIME:
2288 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2289 sizeof(optval));
2290 if (error) {
6d2010ae 2291 break;
0a7de745
A
2292 }
2293 if (optval < 0) {
6d2010ae 2294 error = EINVAL;
0a7de745 2295 } else {
39236c6e 2296 tp->t_rxt_conndroptime = optval * TCP_RETRANSHZ;
0a7de745 2297 }
6d2010ae 2298 break;
316670eb
A
2299 case TCP_NOTSENT_LOWAT:
2300 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2301 sizeof(optval));
2302 if (error) {
316670eb 2303 break;
0a7de745 2304 }
316670eb
A
2305 if (optval < 0) {
2306 error = EINVAL;
2307 break;
2308 } else {
2309 if (optval == 0) {
2310 so->so_flags &= ~(SOF_NOTSENT_LOWAT);
2311 tp->t_notsent_lowat = 0;
5ba3f43e 2312 } else {
316670eb
A
2313 so->so_flags |= SOF_NOTSENT_LOWAT;
2314 tp->t_notsent_lowat = optval;
2315 }
2316 }
2317 break;
39236c6e 2318 case TCP_ADAPTIVE_READ_TIMEOUT:
0a7de745 2319 error = sooptcopyin(sopt, &optval, sizeof(optval),
3e170ce0 2320 sizeof(optval));
0a7de745 2321 if (error) {
39236c6e 2322 break;
0a7de745 2323 }
5ba3f43e 2324 if (optval < 0 ||
3e170ce0 2325 optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
39236c6e
A
2326 error = EINVAL;
2327 break;
2328 } else if (optval == 0) {
2329 tp->t_adaptive_rtimo = 0;
2330 tcp_keepalive_reset(tp);
5ba3f43e 2331
0a7de745 2332 if (tp->t_mpsub) {
5ba3f43e 2333 mptcp_reset_keepalive(tp);
0a7de745 2334 }
39236c6e 2335 } else {
f427ee49 2336 tp->t_adaptive_rtimo = (uint8_t)optval;
39236c6e
A
2337 }
2338 break;
2339 case TCP_ADAPTIVE_WRITE_TIMEOUT:
0a7de745
A
2340 error = sooptcopyin(sopt, &optval, sizeof(optval),
2341 sizeof(optval));
2342 if (error) {
39236c6e 2343 break;
0a7de745 2344 }
5ba3f43e 2345 if (optval < 0 ||
3e170ce0 2346 optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
39236c6e
A
2347 error = EINVAL;
2348 break;
2349 } else {
f427ee49 2350 tp->t_adaptive_wtimo = (uint8_t)optval;
39236c6e
A
2351 }
2352 break;
2353 case TCP_SENDMOREACKS:
2354 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2355 sizeof(optval));
2356 if (error) {
39236c6e 2357 break;
0a7de745 2358 }
39236c6e
A
2359 if (optval < 0 || optval > 1) {
2360 error = EINVAL;
2361 } else if (optval == 0) {
2362 tp->t_flagsext &= ~(TF_NOSTRETCHACK);
2363 } else {
2364 tp->t_flagsext |= TF_NOSTRETCHACK;
2365 }
2366 break;
fe8ab488
A
2367 case TCP_DISABLE_BLACKHOLE_DETECTION:
2368 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2369 sizeof(optval));
2370 if (error) {
fe8ab488 2371 break;
0a7de745 2372 }
fe8ab488
A
2373 if (optval < 0 || optval > 1) {
2374 error = EINVAL;
2375 } else if (optval == 0) {
2376 tp->t_flagsext &= ~TF_NOBLACKHOLE_DETECTION;
2377 } else {
2378 tp->t_flagsext |= TF_NOBLACKHOLE_DETECTION;
2379 if ((tp->t_flags & TF_BLACKHOLE) &&
0a7de745 2380 tp->t_pmtud_saved_maxopd > 0) {
fe8ab488 2381 tcp_pmtud_revert_segment_size(tp);
0a7de745 2382 }
fe8ab488
A
2383 }
2384 break;
3e170ce0
A
2385 case TCP_FASTOPEN:
2386 if (!(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
2387 error = ENOTSUP;
2388 break;
2389 }
2390
2391 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2392 sizeof(optval));
2393 if (error) {
3e170ce0 2394 break;
0a7de745 2395 }
3e170ce0
A
2396 if (optval < 0 || optval > 1) {
2397 error = EINVAL;
2398 break;
2399 }
2400 if (tp->t_state != TCPS_LISTEN) {
2401 error = EINVAL;
2402 break;
2403 }
0a7de745 2404 if (optval) {
3e170ce0 2405 tp->t_flagsext |= TF_FASTOPEN;
0a7de745 2406 } else {
3e170ce0 2407 tcp_disable_tfo(tp);
0a7de745 2408 }
3e170ce0 2409 break;
5ba3f43e 2410 case TCP_FASTOPEN_FORCE_HEURISTICS:
cb323159
A
2411
2412 break;
2413 case TCP_FASTOPEN_FORCE_ENABLE:
5ba3f43e 2414 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745 2415 sizeof(optval));
5ba3f43e 2416
0a7de745 2417 if (error) {
5ba3f43e 2418 break;
0a7de745 2419 }
5ba3f43e
A
2420 if (optval < 0 || optval > 1) {
2421 error = EINVAL;
2422 break;
2423 }
2424
2425 if (tp->t_state != TCPS_CLOSED) {
2426 error = EINVAL;
2427 break;
2428 }
0a7de745 2429 if (optval) {
cb323159 2430 tp->t_flagsext |= TF_FASTOPEN_FORCE_ENABLE;
0a7de745 2431 } else {
cb323159 2432 tp->t_flagsext &= ~TF_FASTOPEN_FORCE_ENABLE;
0a7de745 2433 }
5ba3f43e
A
2434
2435 break;
4bd07ac2
A
2436 case TCP_ENABLE_ECN:
2437 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2438 sizeof optval);
2439 if (error) {
4bd07ac2 2440 break;
0a7de745 2441 }
4bd07ac2
A
2442 if (optval) {
2443 tp->ecn_flags |= TE_ECN_MODE_ENABLE;
2444 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2445 } else {
2446 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
5ba3f43e 2447 tp->ecn_flags |= TE_ECN_MODE_DISABLE;
4bd07ac2
A
2448 }
2449 break;
2450 case TCP_ECN_MODE:
2451 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2452 sizeof optval);
2453 if (error) {
4bd07ac2 2454 break;
0a7de745 2455 }
4bd07ac2
A
2456 if (optval == ECN_MODE_DEFAULT) {
2457 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2458 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2459 } else if (optval == ECN_MODE_ENABLE) {
2460 tp->ecn_flags |= TE_ECN_MODE_ENABLE;
2461 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2462 } else if (optval == ECN_MODE_DISABLE) {
2463 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2464 tp->ecn_flags |= TE_ECN_MODE_DISABLE;
2465 } else {
2466 error = EINVAL;
2467 }
2468 break;
39037602
A
2469 case TCP_NOTIFY_ACKNOWLEDGEMENT:
2470 error = sooptcopyin(sopt, &optval,
2471 sizeof(optval), sizeof(optval));
0a7de745 2472 if (error) {
39037602 2473 break;
0a7de745 2474 }
39037602
A
2475 if (optval <= 0) {
2476 error = EINVAL;
2477 break;
2478 }
2479 if (tp->t_notify_ack_count >= TCP_MAX_NOTIFY_ACK) {
2480 error = ETOOMANYREFS;
2481 break;
2482 }
2483
2484 /*
2485 * validate that the given marker id is not
2486 * a duplicate to avoid ambiguity
2487 */
2488 if ((error = tcp_notify_ack_id_valid(tp, so,
2489 optval)) != 0) {
2490 break;
2491 }
2492 error = tcp_add_notify_ack_marker(tp, optval);
2493 break;
316670eb 2494 case SO_FLUSH:
0a7de745
A
2495 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
2496 sizeof(optval))) != 0) {
316670eb 2497 break;
0a7de745 2498 }
316670eb
A
2499
2500 error = inp_flush(inp, optval);
2501 break;
2502
39236c6e 2503 case SO_TRAFFIC_MGT_BACKGROUND:
0a7de745
A
2504 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
2505 sizeof(optval))) != 0) {
39236c6e 2506 break;
0a7de745 2507 }
39236c6e
A
2508
2509 if (optval) {
2510 socket_set_traffic_mgt_flags_locked(so,
2511 TRAFFIC_MGT_SO_BACKGROUND);
2512 } else {
2513 socket_clear_traffic_mgt_flags_locked(so,
2514 TRAFFIC_MGT_SO_BACKGROUND);
2515 }
2516 break;
5ba3f43e
A
2517 case TCP_RXT_MINIMUM_TIMEOUT:
2518 error = sooptcopyin(sopt, &optval, sizeof(optval),
2519 sizeof(optval));
0a7de745 2520 if (error) {
5ba3f43e 2521 break;
0a7de745 2522 }
5ba3f43e
A
2523 if (optval < 0) {
2524 error = EINVAL;
2525 break;
2526 }
2527 if (optval == 0) {
2528 tp->t_rxt_minimum_timeout = 0;
2529 } else {
2530 tp->t_rxt_minimum_timeout = min(optval,
2531 TCP_RXT_MINIMUM_TIMEOUT_LIMIT);
2532 /* convert to milliseconds */
2533 tp->t_rxt_minimum_timeout *= TCP_RETRANSHZ;
2534 }
2535 break;
1c79356b
A
2536 default:
2537 error = ENOPROTOOPT;
2538 break;
2539 }
2540 break;
2541
2542 case SOPT_GET:
2543 switch (sopt->sopt_name) {
2544 case TCP_NODELAY:
2545 optval = tp->t_flags & TF_NODELAY;
2546 break;
2547 case TCP_MAXSEG:
2548 optval = tp->t_maxseg;
2549 break;
55e303ae 2550 case TCP_KEEPALIVE:
0a7de745 2551 if (tp->t_keepidle > 0) {
39037602 2552 optval = tp->t_keepidle / TCP_RETRANSHZ;
0a7de745 2553 } else {
39037602 2554 optval = tcp_keepidle / TCP_RETRANSHZ;
0a7de745 2555 }
55e303ae 2556 break;
39236c6e 2557 case TCP_KEEPINTVL:
0a7de745 2558 if (tp->t_keepintvl > 0) {
39037602 2559 optval = tp->t_keepintvl / TCP_RETRANSHZ;
0a7de745 2560 } else {
39037602 2561 optval = tcp_keepintvl / TCP_RETRANSHZ;
0a7de745 2562 }
39236c6e
A
2563 break;
2564 case TCP_KEEPCNT:
0a7de745 2565 if (tp->t_keepcnt > 0) {
39037602 2566 optval = tp->t_keepcnt;
0a7de745 2567 } else {
39037602 2568 optval = tcp_keepcnt;
0a7de745 2569 }
39037602
A
2570 break;
2571 case TCP_KEEPALIVE_OFFLOAD:
2572 optval = !!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD);
39236c6e 2573 break;
1c79356b
A
2574 case TCP_NOOPT:
2575 optval = tp->t_flags & TF_NOOPT;
2576 break;
2577 case TCP_NOPUSH:
2578 optval = tp->t_flags & TF_NOPUSH;
2579 break;
fe8ab488 2580 case TCP_ENABLE_ECN:
4bd07ac2
A
2581 optval = (tp->ecn_flags & TE_ECN_MODE_ENABLE) ? 1 : 0;
2582 break;
2583 case TCP_ECN_MODE:
0a7de745 2584 if (tp->ecn_flags & TE_ECN_MODE_ENABLE) {
4bd07ac2 2585 optval = ECN_MODE_ENABLE;
0a7de745 2586 } else if (tp->ecn_flags & TE_ECN_MODE_DISABLE) {
4bd07ac2 2587 optval = ECN_MODE_DISABLE;
0a7de745 2588 } else {
4bd07ac2 2589 optval = ECN_MODE_DEFAULT;
0a7de745 2590 }
fe8ab488 2591 break;
b0d623f7
A
2592 case TCP_CONNECTIONTIMEOUT:
2593 optval = tp->t_keepinit / TCP_RETRANSHZ;
2594 break;
6d2010ae
A
2595 case PERSIST_TIMEOUT:
2596 optval = tp->t_persist_timeout / TCP_RETRANSHZ;
2597 break;
2598 case TCP_RXT_CONNDROPTIME:
39236c6e 2599 optval = tp->t_rxt_conndroptime / TCP_RETRANSHZ;
6d2010ae
A
2600 break;
2601 case TCP_RXT_FINDROP:
2602 optval = tp->t_flagsext & TF_RXTFINDROP;
5ba3f43e 2603 break;
fe8ab488
A
2604 case TCP_NOTIMEWAIT:
2605 optval = (tp->t_flagsext & TF_NOTIMEWAIT) ? 1 : 0;
2606 break;
3e170ce0
A
2607 case TCP_FASTOPEN:
2608 if (tp->t_state != TCPS_LISTEN ||
2609 !(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
2610 error = ENOTSUP;
2611 break;
2612 }
2613 optval = tfo_enabled(tp);
2614 break;
5ba3f43e 2615 case TCP_FASTOPEN_FORCE_HEURISTICS:
cb323159
A
2616 optval = 0;
2617 break;
2618 case TCP_FASTOPEN_FORCE_ENABLE:
2619 optval = (tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) ? 1 : 0;
5ba3f43e 2620 break;
316670eb
A
2621 case TCP_MEASURE_SND_BW:
2622 optval = tp->t_flagsext & TF_MEASURESNDBW;
2623 break;
6d2010ae
A
2624 case TCP_INFO: {
2625 struct tcp_info ti;
2626
2627 tcp_fill_info(tp, &ti);
2628 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
2629 goto done;
316670eb
A
2630 /* NOT REACHED */
2631 }
3e170ce0
A
2632 case TCP_CONNECTION_INFO: {
2633 struct tcp_connection_info tci;
2634 tcp_connection_fill_info(tp, &tci);
2635 error = sooptcopyout(sopt, &tci,
2636 sizeof(struct tcp_connection_info));
2637 goto done;
2638 }
316670eb 2639 case TCP_MEASURE_BW_BURST: {
527f9951 2640 struct tcp_measure_bw_burst out = {};
316670eb 2641 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
0a7de745 2642 tp->t_bwmeas == NULL) {
316670eb
A
2643 error = EINVAL;
2644 break;
2645 }
2646 out.min_burst_size = tp->t_bwmeas->bw_minsizepkts;
2647 out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts;
2648 error = sooptcopyout(sopt, &out, sizeof(out));
2649 goto done;
2650 }
2651 case TCP_NOTSENT_LOWAT:
2652 if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
2653 optval = tp->t_notsent_lowat;
2654 } else {
2655 optval = 0;
2656 }
2657 break;
39236c6e 2658 case TCP_SENDMOREACKS:
0a7de745 2659 if (tp->t_flagsext & TF_NOSTRETCHACK) {
39236c6e 2660 optval = 1;
0a7de745 2661 } else {
39236c6e 2662 optval = 0;
0a7de745 2663 }
39236c6e 2664 break;
fe8ab488 2665 case TCP_DISABLE_BLACKHOLE_DETECTION:
0a7de745 2666 if (tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) {
fe8ab488 2667 optval = 1;
0a7de745 2668 } else {
fe8ab488 2669 optval = 0;
0a7de745 2670 }
fe8ab488 2671 break;
316670eb 2672 case TCP_PEER_PID: {
0a7de745 2673 pid_t pid;
316670eb 2674 error = tcp_lookup_peer_pid_locked(so, &pid);
0a7de745 2675 if (error == 0) {
316670eb 2676 error = sooptcopyout(sopt, &pid, sizeof(pid));
0a7de745 2677 }
316670eb 2678 goto done;
6d2010ae 2679 }
39236c6e
A
2680 case TCP_ADAPTIVE_READ_TIMEOUT:
2681 optval = tp->t_adaptive_rtimo;
2682 break;
2683 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2684 optval = tp->t_adaptive_wtimo;
2685 break;
2686 case SO_TRAFFIC_MGT_BACKGROUND:
39037602
A
2687 optval = (so->so_flags1 &
2688 SOF1_TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0;
39236c6e 2689 break;
39037602
A
2690 case TCP_NOTIFY_ACKNOWLEDGEMENT: {
2691 struct tcp_notify_ack_complete retid;
2692
0a7de745 2693 if (sopt->sopt_valsize != sizeof(retid)) {
39037602
A
2694 error = EINVAL;
2695 break;
2696 }
0a7de745 2697 bzero(&retid, sizeof(retid));
39037602 2698 tcp_get_notify_ack_count(tp, &retid);
0a7de745 2699 if (retid.notify_complete_count > 0) {
39037602 2700 tcp_get_notify_ack_ids(tp, &retid);
0a7de745 2701 }
39037602 2702
0a7de745 2703 error = sooptcopyout(sopt, &retid, sizeof(retid));
39037602
A
2704 goto done;
2705 }
5ba3f43e
A
2706 case TCP_RXT_MINIMUM_TIMEOUT:
2707 optval = tp->t_rxt_minimum_timeout / TCP_RETRANSHZ;
2708 break;
1c79356b
A
2709 default:
2710 error = ENOPROTOOPT;
2711 break;
2712 }
0a7de745 2713 if (error == 0) {
1c79356b 2714 error = sooptcopyout(sopt, &optval, sizeof optval);
0a7de745 2715 }
1c79356b
A
2716 break;
2717 }
6d2010ae 2718done:
0a7de745 2719 return error;
1c79356b
A
2720}
2721
2722/*
2723 * tcp_sendspace and tcp_recvspace are the default send and receive window
2724 * sizes, respectively. These are obsolescent (this information should
2725 * be set by the route).
2726 */
0a7de745
A
2727u_int32_t tcp_sendspace = 1448 * 256;
2728u_int32_t tcp_recvspace = 1448 * 384;
b0d623f7
A
2729
2730/* During attach, the size of socket buffer allocated is limited to
2731 * sb_max in sbreserve. Disallow setting the tcp send and recv space
2732 * to be more than sb_max because that will cause tcp_attach to fail
2733 * (see radar 5713060)
5ba3f43e 2734 */
b0d623f7
A
2735static int
2736sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1,
0a7de745 2737 int arg2, struct sysctl_req *req)
5ba3f43e
A
2738{
2739#pragma unused(arg2)
b0d623f7
A
2740 u_int32_t new_value = 0, *space_p = NULL;
2741 int changed = 0, error = 0;
0a7de745 2742 u_quad_t sb_effective_max = (sb_max / (MSIZE + MCLBYTES)) * MCLBYTES;
b0d623f7
A
2743
2744 switch (oidp->oid_number) {
0a7de745
A
2745 case TCPCTL_SENDSPACE:
2746 space_p = &tcp_sendspace;
2747 break;
2748 case TCPCTL_RECVSPACE:
2749 space_p = &tcp_recvspace;
2750 break;
2751 default:
2752 return EINVAL;
b0d623f7
A
2753 }
2754 error = sysctl_io_number(req, *space_p, sizeof(u_int32_t),
0a7de745 2755 &new_value, &changed);
b0d623f7
A
2756 if (changed) {
2757 if (new_value > 0 && new_value <= sb_effective_max) {
2758 *space_p = new_value;
5ba3f43e 2759 SYSCTL_SKMEM_UPDATE_AT_OFFSET(arg2, new_value);
b0d623f7
A
2760 } else {
2761 error = ERANGE;
2762 }
2763 }
2764 return error;
2765}
2766
5ba3f43e
A
2767#if SYSCTL_SKMEM
2768SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace,
0a7de745
A
2769 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_sendspace,
2770 offsetof(skmem_sysctl, tcp.sendspace), sysctl_tcp_sospace,
2771 "IU", "Maximum outgoing TCP datagram size");
5ba3f43e 2772SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace,
0a7de745
A
2773 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_recvspace,
2774 offsetof(skmem_sysctl, tcp.recvspace), sysctl_tcp_sospace,
2775 "IU", "Maximum incoming TCP datagram size");
5ba3f43e 2776#else /* SYSCTL_SKMEM */
6d2010ae 2777SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2778 &tcp_sendspace, 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size");
6d2010ae 2779SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2780 &tcp_recvspace, 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size");
5ba3f43e 2781#endif /* SYSCTL_SKMEM */
1c79356b
A
2782
2783/*
2784 * Attach TCP protocol to socket, allocating
2785 * internet protocol control block, tcp control block,
2786 * bufer space, and entering LISTEN state if to accept connections.
2d21ac55
A
2787 *
2788 * Returns: 0 Success
2789 * in_pcballoc:ENOBUFS
2790 * in_pcballoc:ENOMEM
2791 * in_pcballoc:??? [IPSEC specific]
2792 * soreserve:ENOBUFS
1c79356b
A
2793 */
2794static int
39037602 2795tcp_attach(struct socket *so, struct proc *p)
1c79356b 2796{
39037602 2797 struct tcpcb *tp;
1c79356b
A
2798 struct inpcb *inp;
2799 int error;
39236c6e 2800 int isipv6 = SOCK_CHECK_DOM(so, PF_INET6) != 0;
1c79356b 2801
1c79356b 2802 error = in_pcballoc(so, &tcbinfo, p);
0a7de745
A
2803 if (error) {
2804 return error;
2805 }
55e303ae 2806
1c79356b 2807 inp = sotoinpcb(so);
55e303ae
A
2808
2809 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
316670eb 2810 error = soreserve(so, tcp_sendspace, tcp_recvspace);
0a7de745
A
2811 if (error) {
2812 return error;
2813 }
55e303ae 2814 }
3e170ce0
A
2815
2816 if (so->so_snd.sb_preconn_hiwat == 0) {
39037602 2817 soreserve_preconnect(so, 2048);
3e170ce0
A
2818 }
2819
0a7de745 2820 if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
316670eb 2821 so->so_rcv.sb_flags |= SB_AUTOSIZE;
0a7de745
A
2822 }
2823 if ((so->so_snd.sb_flags & SB_USRSIZE) == 0) {
316670eb 2824 so->so_snd.sb_flags |= SB_AUTOSIZE;
0a7de745 2825 }
55e303ae 2826
1c79356b
A
2827 if (isipv6) {
2828 inp->inp_vflag |= INP_IPV6;
0a7de745 2829 inp->in6p_hops = -1; /* use kernel default */
f427ee49
A
2830 } else {
2831 inp->inp_vflag |= INP_IPV4;
2832 }
1c79356b 2833 tp = tcp_newtcpcb(inp);
39236c6e 2834 if (tp == NULL) {
0a7de745 2835 int nofd = so->so_state & SS_NOFDREF; /* XXX */
1c79356b 2836
0a7de745 2837 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
0a7de745 2838 if (isipv6) {
1c79356b 2839 in6_pcbdetach(inp);
f427ee49
A
2840 } else {
2841 in_pcbdetach(inp);
2842 }
1c79356b 2843 so->so_state |= nofd;
0a7de745 2844 return ENOBUFS;
1c79356b 2845 }
0a7de745 2846 if (nstat_collect) {
6d2010ae 2847 nstat_tcp_new_pcb(inp);
0a7de745 2848 }
1c79356b 2849 tp->t_state = TCPS_CLOSED;
0a7de745 2850 return 0;
1c79356b
A
2851}
2852
2853/*
2854 * Initiate (or continue) disconnect.
2855 * If embryonic state, just send reset (once).
2856 * If in ``let data drain'' option and linger null, just drop.
2857 * Otherwise (hard), mark socket disconnecting and drop
2858 * current input data; switch states based on user close, and
2859 * send segment to peer (with FIN).
2860 */
2861static struct tcpcb *
39037602 2862tcp_disconnect(struct tcpcb *tp)
1c79356b
A
2863{
2864 struct socket *so = tp->t_inpcb->inp_socket;
2865
0a7de745 2866 if (so->so_rcv.sb_cc != 0 || tp->t_reassqlen != 0) {
5c9f4661 2867 return tcp_drop(tp, 0);
0a7de745 2868 }
5c9f4661 2869
0a7de745 2870 if (tp->t_state < TCPS_ESTABLISHED) {
1c79356b 2871 tp = tcp_close(tp);
0a7de745 2872 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
1c79356b 2873 tp = tcp_drop(tp, 0);
0a7de745 2874 } else {
1c79356b
A
2875 soisdisconnecting(so);
2876 sbflush(&so->so_rcv);
2877 tp = tcp_usrclosed(tp);
39236c6e
A
2878#if MPTCP
2879 /* A reset has been sent but socket exists, do not send FIN */
2880 if ((so->so_flags & SOF_MP_SUBFLOW) &&
0a7de745
A
2881 (tp) && (tp->t_mpflags & TMPF_RESET)) {
2882 return tp;
2883 }
39236c6e 2884#endif
0a7de745 2885 if (tp) {
1c79356b 2886 (void) tcp_output(tp);
0a7de745 2887 }
1c79356b 2888 }
0a7de745 2889 return tp;
1c79356b
A
2890}
2891
2892/*
2893 * User issued close, and wish to trail through shutdown states:
2894 * if never received SYN, just forget it. If got a SYN from peer,
2895 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
2896 * If already got a FIN from peer, then almost done; go to LAST_ACK
2897 * state. In all other cases, have already sent FIN to peer (e.g.
2898 * after PRU_SHUTDOWN), and just have to play tedious game waiting
2899 * for peer to send FIN or not respond to keep-alives, etc.
2900 * We can let the user exit from the close as soon as the FIN is acked.
2901 */
2902static struct tcpcb *
39037602 2903tcp_usrclosed(struct tcpcb *tp)
1c79356b 2904{
1c79356b 2905 switch (tp->t_state) {
1c79356b
A
2906 case TCPS_CLOSED:
2907 case TCPS_LISTEN:
a39ff7e2 2908 case TCPS_SYN_SENT:
1c79356b
A
2909 tp = tcp_close(tp);
2910 break;
2911
1c79356b
A
2912 case TCPS_SYN_RECEIVED:
2913 tp->t_flags |= TF_NEEDFIN;
2914 break;
2915
2916 case TCPS_ESTABLISHED:
5ba3f43e 2917 DTRACE_TCP4(state__change, void, NULL,
0a7de745
A
2918 struct inpcb *, tp->t_inpcb,
2919 struct tcpcb *, tp,
2920 int32_t, TCPS_FIN_WAIT_1);
1c79356b 2921 tp->t_state = TCPS_FIN_WAIT_1;
cb323159 2922 TCP_LOG_CONNECTION_SUMMARY(tp);
1c79356b
A
2923 break;
2924
2925 case TCPS_CLOSE_WAIT:
5ba3f43e 2926 DTRACE_TCP4(state__change, void, NULL,
0a7de745
A
2927 struct inpcb *, tp->t_inpcb,
2928 struct tcpcb *, tp,
2929 int32_t, TCPS_LAST_ACK);
1c79356b 2930 tp->t_state = TCPS_LAST_ACK;
cb323159 2931 TCP_LOG_CONNECTION_SUMMARY(tp);
1c79356b
A
2932 break;
2933 }
2934 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
2935 soisdisconnected(tp->t_inpcb->inp_socket);
2936 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
0a7de745 2937 if (tp->t_state == TCPS_FIN_WAIT_2) {
5ba3f43e 2938 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
0a7de745
A
2939 TCP_CONN_MAXIDLE(tp));
2940 }
1c79356b 2941 }
0a7de745 2942 return tp;
1c79356b
A
2943}
2944
2d21ac55
A
2945void
2946tcp_in_cksum_stats(u_int32_t len)
2947{
39236c6e
A
2948 tcpstat.tcps_rcv_swcsum++;
2949 tcpstat.tcps_rcv_swcsum_bytes += len;
2d21ac55
A
2950}
2951
2952void
2953tcp_out_cksum_stats(u_int32_t len)
2954{
39236c6e
A
2955 tcpstat.tcps_snd_swcsum++;
2956 tcpstat.tcps_snd_swcsum_bytes += len;
2957}
2958
39236c6e
A
2959void
2960tcp_in6_cksum_stats(u_int32_t len)
2961{
2962 tcpstat.tcps_rcv6_swcsum++;
2963 tcpstat.tcps_rcv6_swcsum_bytes += len;
2d21ac55 2964}
39236c6e
A
2965
2966void
2967tcp_out6_cksum_stats(u_int32_t len)
2968{
2969 tcpstat.tcps_snd6_swcsum++;
2970 tcpstat.tcps_snd6_swcsum_bytes += len;
2971}
cb323159
A
2972
2973int
2974tcp_get_mpkl_send_info(struct mbuf *control,
2975 struct so_mpkl_send_info *mpkl_send_info)
2976{
2977 struct cmsghdr *cm;
2978
2979 if (control == NULL || mpkl_send_info == NULL) {
2980 return EINVAL;
2981 }
2982
2983 for (cm = M_FIRST_CMSGHDR(control); cm;
2984 cm = M_NXT_CMSGHDR(control, cm)) {
2985 if (cm->cmsg_len < sizeof(struct cmsghdr) ||
2986 cm->cmsg_len > control->m_len) {
2987 return EINVAL;
2988 }
2989 if (cm->cmsg_level != SOL_SOCKET ||
2990 cm->cmsg_type != SCM_MPKL_SEND_INFO) {
2991 continue;
2992 }
2993 if (cm->cmsg_len != CMSG_LEN(sizeof(struct so_mpkl_send_info))) {
2994 return EINVAL;
2995 }
2996 memcpy(mpkl_send_info, CMSG_DATA(cm),
2997 sizeof(struct so_mpkl_send_info));
2998 return 0;
2999 }
3000 return ENOMSG;
3001}