]> git.saurik.com Git - apple/xnu.git/blame - bsd/netinet/tcp_usrreq.c
xnu-6153.41.3.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_usrreq.c
CommitLineData
1c79356b 1/*
0a7de745 2 * Copyright (c) 2000-2019 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
39037602 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
39037602 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
39037602 17 *
2d21ac55
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
39037602 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
31 *
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
34 * are met:
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
9bccf70c 61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $
1c79356b
A
62 */
63
1c79356b
A
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/kernel.h>
68#include <sys/sysctl.h>
69#include <sys/mbuf.h>
70#if INET6
71#include <sys/domain.h>
72#endif /* INET6 */
5ba3f43e 73#if !CONFIG_EMBEDDED
39236c6e 74#include <sys/kasl.h>
5ba3f43e 75#endif
cb323159 76#include <sys/priv.h>
1c79356b
A
77#include <sys/socket.h>
78#include <sys/socketvar.h>
79#include <sys/protosw.h>
39236c6e 80#include <sys/syslog.h>
1c79356b
A
81
82#include <net/if.h>
83#include <net/route.h>
6d2010ae 84#include <net/ntstat.h>
fe8ab488 85#include <net/content_filter.h>
cb323159 86#include <net/multi_layer_pkt_log.h>
1c79356b
A
87
88#include <netinet/in.h>
89#include <netinet/in_systm.h>
9bccf70c
A
90#if INET6
91#include <netinet/ip6.h>
92#endif
1c79356b 93#include <netinet/in_pcb.h>
9bccf70c
A
94#if INET6
95#include <netinet6/in6_pcb.h>
96#endif
1c79356b
A
97#include <netinet/in_var.h>
98#include <netinet/ip_var.h>
99#if INET6
1c79356b
A
100#include <netinet6/ip6_var.h>
101#endif
102#include <netinet/tcp.h>
103#include <netinet/tcp_fsm.h>
104#include <netinet/tcp_seq.h>
105#include <netinet/tcp_timer.h>
106#include <netinet/tcp_var.h>
107#include <netinet/tcpip.h>
5ba3f43e 108#include <netinet/tcp_cc.h>
cb323159 109#include <netinet/tcp_log.h>
39236c6e 110#include <mach/sdt.h>
1c79356b
A
111#if TCPDEBUG
112#include <netinet/tcp_debug.h>
113#endif
39236c6e
A
114#if MPTCP
115#include <netinet/mptcp_var.h>
116#endif /* MPTCP */
1c79356b
A
117
118#if IPSEC
119#include <netinet6/ipsec.h>
120#endif /*IPSEC*/
121
39236c6e
A
122#if FLOW_DIVERT
123#include <netinet/flow_divert.h>
124#endif /* FLOW_DIVERT */
125
6d2010ae
A
126errno_t tcp_fill_info_for_info_tuple(struct info_tuple *, struct tcp_info *);
127
0a7de745 128int tcp_sysctl_info(struct sysctl_oid *, void *, int, struct sysctl_req *);
3e170ce0
A
129static void tcp_connection_fill_info(struct tcpcb *tp,
130 struct tcp_connection_info *tci);
cb323159 131static int tcp_get_mpkl_send_info(struct mbuf *, struct so_mpkl_send_info *);
6d2010ae 132
1c79356b
A
133/*
134 * TCP protocol interface to socket abstraction.
135 */
0a7de745
A
136static int tcp_attach(struct socket *, struct proc *);
137static int tcp_connect(struct tcpcb *, struct sockaddr *, struct proc *);
1c79356b 138#if INET6
0a7de745
A
139static int tcp6_connect(struct tcpcb *, struct sockaddr *, struct proc *);
140static int tcp6_usr_connect(struct socket *, struct sockaddr *,
141 struct proc *);
1c79356b 142#endif /* INET6 */
39037602
A
143static struct tcpcb *tcp_disconnect(struct tcpcb *);
144static struct tcpcb *tcp_usrclosed(struct tcpcb *);
316670eb
A
145extern void tcp_sbrcv_trim(struct tcpcb *tp, struct sockbuf *sb);
146
1c79356b 147#if TCPDEBUG
0a7de745
A
148#define TCPDEBUG0 int ostate = 0
149#define TCPDEBUG1() ostate = tp ? tp->t_state : 0
150#define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
151 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
1c79356b 152#else
0a7de745
A
153#define TCPDEBUG0
154#define TCPDEBUG1()
155#define TCPDEBUG2(req)
1c79356b
A
156#endif
157
39236c6e
A
158SYSCTL_PROC(_net_inet_tcp, OID_AUTO, info,
159 CTLFLAG_RW | CTLFLAG_LOCKED | CTLFLAG_ANYBODY | CTLFLAG_KERN,
0a7de745 160 0, 0, tcp_sysctl_info, "S", "TCP info per tuple");
6d2010ae 161
1c79356b
A
162/*
163 * TCP attaches to socket via pru_attach(), reserving space,
164 * and an internet control block.
2d21ac55
A
165 *
166 * Returns: 0 Success
167 * EISCONN
168 * tcp_attach:ENOBUFS
169 * tcp_attach:ENOMEM
170 * tcp_attach:??? [IPSEC specific]
1c79356b
A
171 */
172static int
2d21ac55 173tcp_usr_attach(struct socket *so, __unused int proto, struct proc *p)
1c79356b 174{
1c79356b
A
175 int error;
176 struct inpcb *inp = sotoinpcb(so);
177 struct tcpcb *tp = 0;
178 TCPDEBUG0;
179
180 TCPDEBUG1();
181 if (inp) {
182 error = EISCONN;
183 goto out;
184 }
39037602 185
1c79356b 186 error = tcp_attach(so, p);
0a7de745 187 if (error) {
1c79356b 188 goto out;
0a7de745 189 }
1c79356b 190
0a7de745 191 if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
1c79356b 192 so->so_linger = TCP_LINGERTIME * hz;
0a7de745 193 }
1c79356b
A
194 tp = sototcpcb(so);
195out:
196 TCPDEBUG2(PRU_ATTACH);
1c79356b
A
197 return error;
198}
199
200/*
201 * pru_detach() detaches the TCP protocol from the socket.
202 * If the protocol state is non-embryonic, then can't
203 * do this directly: have to initiate a pru_disconnect(),
204 * which may finish later; embryonic TCB's can just
205 * be discarded here.
206 */
207static int
208tcp_usr_detach(struct socket *so)
209{
1c79356b
A
210 int error = 0;
211 struct inpcb *inp = sotoinpcb(so);
212 struct tcpcb *tp;
213 TCPDEBUG0;
214
91447636 215 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) {
0a7de745 216 return EINVAL; /* XXX */
1c79356b 217 }
5ba3f43e 218 socket_lock_assert_owned(so);
1c79356b
A
219 tp = intotcpcb(inp);
220 /* In case we got disconnected from the peer */
0a7de745 221 if (tp == NULL) {
39236c6e 222 goto out;
0a7de745 223 }
1c79356b 224 TCPDEBUG1();
6d2010ae
A
225
226 calculate_tcp_clock();
227
1c79356b
A
228 tp = tcp_disconnect(tp);
229out:
230 TCPDEBUG2(PRU_DETACH);
1c79356b
A
231 return error;
232}
233
fe8ab488 234#if NECP
0a7de745
A
235#define COMMON_START() TCPDEBUG0; \
236do { \
237 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
238 return (EINVAL); \
239 if (necp_socket_should_use_flow_divert(inp)) \
240 return (EPROTOTYPE); \
241 tp = intotcpcb(inp); \
242 TCPDEBUG1(); \
243 calculate_tcp_clock(); \
39236c6e 244} while (0)
fe8ab488 245#else /* NECP */
0a7de745
A
246#define COMMON_START() TCPDEBUG0; \
247do { \
248 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) \
249 return (EINVAL); \
250 tp = intotcpcb(inp); \
251 TCPDEBUG1(); \
252 calculate_tcp_clock(); \
fe8ab488
A
253} while (0)
254#endif /* !NECP */
39236c6e 255
0a7de745 256#define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out
1c79356b
A
257
258
259/*
260 * Give the socket an address.
2d21ac55
A
261 *
262 * Returns: 0 Success
263 * EINVAL Invalid argument [COMMON_START]
264 * EAFNOSUPPORT Address family not supported
265 * in_pcbbind:EADDRNOTAVAIL Address not available.
266 * in_pcbbind:EINVAL Invalid argument
267 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
268 * in_pcbbind:EACCES Permission denied
269 * in_pcbbind:EADDRINUSE Address in use
270 * in_pcbbind:EAGAIN Resource unavailable, try again
271 * in_pcbbind:EPERM Operation not permitted
1c79356b
A
272 */
273static int
274tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
275{
1c79356b
A
276 int error = 0;
277 struct inpcb *inp = sotoinpcb(so);
278 struct tcpcb *tp;
279 struct sockaddr_in *sinp;
280
281 COMMON_START();
282
2d21ac55
A
283 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
284 error = EAFNOSUPPORT;
285 goto out;
286 }
287
1c79356b
A
288 /*
289 * Must check for multicast addresses and disallow binding
290 * to them.
291 */
316670eb 292 sinp = (struct sockaddr_in *)(void *)nam;
1c79356b
A
293 if (sinp->sin_family == AF_INET &&
294 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
295 error = EAFNOSUPPORT;
296 goto out;
297 }
298 error = in_pcbbind(inp, nam, p);
0a7de745 299 if (error) {
1c79356b 300 goto out;
0a7de745 301 }
5ba3f43e
A
302
303#if NECP
304 /* Update NECP client with bind result if not in middle of connect */
305 if ((inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS) &&
0a7de745 306 !uuid_is_null(inp->necp_client_uuid)) {
5ba3f43e
A
307 socket_unlock(so, 0);
308 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
309 socket_lock(so, 0);
310 }
311#endif /* NECP */
312
1c79356b 313 COMMON_END(PRU_BIND);
1c79356b
A
314}
315
316#if INET6
317static int
318tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
319{
1c79356b
A
320 int error = 0;
321 struct inpcb *inp = sotoinpcb(so);
322 struct tcpcb *tp;
323 struct sockaddr_in6 *sin6p;
324
325 COMMON_START();
326
2d21ac55
A
327 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
328 error = EAFNOSUPPORT;
329 goto out;
330 }
331
1c79356b
A
332 /*
333 * Must check for multicast addresses and disallow binding
334 * to them.
335 */
316670eb 336 sin6p = (struct sockaddr_in6 *)(void *)nam;
1c79356b
A
337 if (sin6p->sin6_family == AF_INET6 &&
338 IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
339 error = EAFNOSUPPORT;
340 goto out;
341 }
342 inp->inp_vflag &= ~INP_IPV4;
343 inp->inp_vflag |= INP_IPV6;
55e303ae 344 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
0a7de745 345 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr)) {
1c79356b 346 inp->inp_vflag |= INP_IPV4;
0a7de745 347 } else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
1c79356b
A
348 struct sockaddr_in sin;
349
350 in6_sin6_2_sin(&sin, sin6p);
351 inp->inp_vflag |= INP_IPV4;
352 inp->inp_vflag &= ~INP_IPV6;
353 error = in_pcbbind(inp, (struct sockaddr *)&sin, p);
354 goto out;
355 }
356 }
357 error = in6_pcbbind(inp, nam, p);
0a7de745 358 if (error) {
9bccf70c 359 goto out;
0a7de745 360 }
1c79356b
A
361 COMMON_END(PRU_BIND);
362}
363#endif /* INET6 */
364
365/*
366 * Prepare to accept connections.
2d21ac55
A
367 *
368 * Returns: 0 Success
369 * EINVAL [COMMON_START]
370 * in_pcbbind:EADDRNOTAVAIL Address not available.
371 * in_pcbbind:EINVAL Invalid argument
372 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
373 * in_pcbbind:EACCES Permission denied
374 * in_pcbbind:EADDRINUSE Address in use
375 * in_pcbbind:EAGAIN Resource unavailable, try again
376 * in_pcbbind:EPERM Operation not permitted
1c79356b
A
377 */
378static int
379tcp_usr_listen(struct socket *so, struct proc *p)
380{
1c79356b
A
381 int error = 0;
382 struct inpcb *inp = sotoinpcb(so);
383 struct tcpcb *tp;
384
385 COMMON_START();
0a7de745 386 if (inp->inp_lport == 0) {
39236c6e 387 error = in_pcbbind(inp, NULL, p);
0a7de745
A
388 }
389 if (error == 0) {
1c79356b 390 tp->t_state = TCPS_LISTEN;
0a7de745 391 }
cb323159 392 TCP_LOG_LISTEN(tp, error);
1c79356b
A
393 COMMON_END(PRU_LISTEN);
394}
395
396#if INET6
397static int
398tcp6_usr_listen(struct socket *so, struct proc *p)
399{
1c79356b
A
400 int error = 0;
401 struct inpcb *inp = sotoinpcb(so);
402 struct tcpcb *tp;
403
404 COMMON_START();
405 if (inp->inp_lport == 0) {
406 inp->inp_vflag &= ~INP_IPV4;
0a7de745 407 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
1c79356b 408 inp->inp_vflag |= INP_IPV4;
0a7de745 409 }
39236c6e 410 error = in6_pcbbind(inp, NULL, p);
1c79356b 411 }
0a7de745 412 if (error == 0) {
1c79356b 413 tp->t_state = TCPS_LISTEN;
0a7de745 414 }
cb323159 415 TCP_LOG_LISTEN(tp, error);
1c79356b
A
416 COMMON_END(PRU_LISTEN);
417}
418#endif /* INET6 */
419
3e170ce0
A
420static int
421tcp_connect_complete(struct socket *so)
422{
423 struct tcpcb *tp = sototcpcb(so);
5ba3f43e 424 struct inpcb *inp = sotoinpcb(so);
3e170ce0
A
425 int error = 0;
426
427 /* TFO delays the tcp_output until later, when the app calls write() */
428 if (so->so_flags1 & SOF1_PRECONNECT_DATA) {
cb323159
A
429 if (!necp_socket_is_allowed_to_send_recv(sotoinpcb(so), NULL, NULL, NULL, NULL)) {
430 TCP_LOG_DROP_NECP(NULL, NULL, tp, true);
0a7de745
A
431 return EHOSTUNREACH;
432 }
3e170ce0
A
433
434 /* Initialize enough state so that we can actually send data */
435 tcp_mss(tp, -1, IFSCOPE_NONE);
436 tp->snd_wnd = tp->t_maxseg;
d9a64523 437 tp->max_sndwnd = tp->snd_wnd;
3e170ce0
A
438 } else {
439 error = tcp_output(tp);
440 }
441
a39ff7e2
A
442#if NECP
443 /* Update NECP client with connected five-tuple */
444 if (error == 0 && !uuid_is_null(inp->necp_client_uuid)) {
445 socket_unlock(so, 0);
446 necp_client_assign_from_socket(so->last_pid, inp->necp_client_uuid, inp);
447 socket_lock(so, 0);
448 }
449#endif /* NECP */
450
0a7de745 451 return error;
3e170ce0
A
452}
453
1c79356b
A
454/*
455 * Initiate connection to peer.
456 * Create a template for use in transmissions on this connection.
457 * Enter SYN_SENT state, and mark socket as connecting.
458 * Start keep-alive timer, and seed output sequence space.
459 * Send initial segment on connection.
460 */
461static int
462tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
463{
1c79356b
A
464 int error = 0;
465 struct inpcb *inp = sotoinpcb(so);
466 struct tcpcb *tp;
467 struct sockaddr_in *sinp;
468
2d21ac55 469 TCPDEBUG0;
39236c6e 470 if (inp == NULL) {
2d21ac55 471 return EINVAL;
39236c6e 472 } else if (inp->inp_state == INPCB_STATE_DEAD) {
2d21ac55
A
473 if (so->so_error) {
474 error = so->so_error;
475 so->so_error = 0;
476 return error;
0a7de745 477 } else {
2d21ac55 478 return EINVAL;
0a7de745 479 }
2d21ac55 480 }
fe8ab488 481#if NECP
cb323159
A
482#if CONTENT_FILTER
483 error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
484 if (error != 0) {
485 return error;
486 }
487#endif /* CONTENT_FILTER */
39236c6e 488#if FLOW_DIVERT
cb323159 489 if (necp_socket_should_use_flow_divert(inp)) {
fe8ab488
A
490 uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp);
491 if (fd_ctl_unit > 0) {
492 error = flow_divert_pcb_init(so, fd_ctl_unit);
493 if (error == 0) {
494 error = flow_divert_connect_out(so, nam, p);
39236c6e 495 }
fe8ab488
A
496 } else {
497 error = ENETDOWN;
39236c6e 498 }
3e170ce0 499
39236c6e
A
500 return error;
501 }
502#endif /* FLOW_DIVERT */
fe8ab488 503#endif /* NECP */
2d21ac55
A
504 tp = intotcpcb(inp);
505 TCPDEBUG1();
1c79356b 506
6d2010ae
A
507 calculate_tcp_clock();
508
2d21ac55
A
509 if (nam->sa_family != 0 && nam->sa_family != AF_INET) {
510 error = EAFNOSUPPORT;
511 goto out;
512 }
1c79356b
A
513 /*
514 * Must disallow TCP ``connections'' to multicast addresses.
515 */
316670eb 516 sinp = (struct sockaddr_in *)(void *)nam;
1c79356b
A
517 if (sinp->sin_family == AF_INET
518 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
519 error = EAFNOSUPPORT;
520 goto out;
521 }
522
0a7de745 523 if ((error = tcp_connect(tp, nam, p)) != 0) {
cb323159 524 TCP_LOG_CONNECT(tp, true, error);
1c79356b 525 goto out;
0a7de745 526 }
3e170ce0
A
527
528 error = tcp_connect_complete(so);
529
cb323159
A
530 TCP_LOG_CONNECT(tp, true, error);
531
1c79356b
A
532 COMMON_END(PRU_CONNECT);
533}
534
39236c6e
A
535static int
536tcp_usr_connectx_common(struct socket *so, int af,
813fb2f6 537 struct sockaddr *src, struct sockaddr *dst,
3e170ce0
A
538 struct proc *p, uint32_t ifscope, sae_associd_t aid, sae_connid_t *pcid,
539 uint32_t flags, void *arg, uint32_t arglen, struct uio *auio,
540 user_ssize_t *bytes_written)
39236c6e 541{
5ba3f43e 542#pragma unused(aid, flags, arg, arglen)
39236c6e 543 struct inpcb *inp = sotoinpcb(so);
5ba3f43e 544 int error = 0;
3e170ce0 545 user_ssize_t datalen = 0;
39236c6e 546
0a7de745
A
547 if (inp == NULL) {
548 return EINVAL;
549 }
39236c6e 550
813fb2f6 551 VERIFY(dst != NULL);
39236c6e 552
5ba3f43e
A
553 ASSERT(!(inp->inp_flags2 & INP2_CONNECT_IN_PROGRESS));
554 inp->inp_flags2 |= INP2_CONNECT_IN_PROGRESS;
555
fe8ab488 556#if NECP
813fb2f6 557 inp_update_necp_policy(inp, src, dst, ifscope);
fe8ab488 558#endif /* NECP */
3e170ce0
A
559
560 if ((so->so_flags1 & SOF1_DATA_IDEMPOTENT) &&
0a7de745 561 (tcp_fastopen & TCP_FASTOPEN_CLIENT)) {
3e170ce0 562 sototcpcb(so)->t_flagsext |= TF_FASTOPEN;
0a7de745 563 }
3e170ce0 564
5ba3f43e
A
565 /* bind socket to the specified interface, if requested */
566 if (ifscope != IFSCOPE_NONE &&
0a7de745 567 (error = inp_bindif(inp, ifscope, NULL)) != 0) {
5ba3f43e
A
568 goto done;
569 }
39236c6e 570
5ba3f43e
A
571 /* if source address and/or port is specified, bind to it */
572 if (src != NULL) {
0a7de745 573 error = sobindlock(so, src, 0); /* already locked */
5ba3f43e
A
574 if (error != 0) {
575 goto done;
39236c6e
A
576 }
577 }
578
579 switch (af) {
580 case AF_INET:
813fb2f6 581 error = tcp_usr_connect(so, dst, p);
39236c6e
A
582 break;
583#if INET6
584 case AF_INET6:
813fb2f6 585 error = tcp6_usr_connect(so, dst, p);
39236c6e
A
586 break;
587#endif /* INET6 */
588 default:
589 VERIFY(0);
590 /* NOTREACHED */
591 }
592
5ba3f43e
A
593 if (error != 0) {
594 goto done;
595 }
3e170ce0
A
596
597 /* if there is data, copy it */
598 if (auio != NULL) {
599 socket_unlock(so, 0);
600
601 VERIFY(bytes_written != NULL);
602
603 datalen = uio_resid(auio);
604 error = so->so_proto->pr_usrreqs->pru_sosend(so, NULL,
0a7de745 605 (uio_t)auio, NULL, NULL, 0);
3e170ce0
A
606 socket_lock(so, 0);
607
0a7de745 608 if (error == 0 || error == EWOULDBLOCK) {
3e170ce0 609 *bytes_written = datalen - uio_resid(auio);
0a7de745 610 }
3e170ce0
A
611
612 /*
613 * sosend returns EWOULDBLOCK if it's a non-blocking
614 * socket or a timeout occured (this allows to return
615 * the amount of queued data through sendit()).
616 *
617 * However, connectx() returns EINPROGRESS in case of a
618 * blocking socket. So we change the return value here.
619 */
0a7de745 620 if (error == EWOULDBLOCK) {
3e170ce0 621 error = EINPROGRESS;
0a7de745 622 }
3e170ce0
A
623 }
624
0a7de745 625 if (error == 0 && pcid != NULL) {
3e170ce0 626 *pcid = 1; /* there is only one connection in regular TCP */
0a7de745 627 }
5ba3f43e 628done:
0a7de745 629 if (error && error != EINPROGRESS) {
a39ff7e2 630 so->so_flags1 &= ~SOF1_PRECONNECT_DATA;
0a7de745 631 }
a39ff7e2 632
5ba3f43e 633 inp->inp_flags2 &= ~INP2_CONNECT_IN_PROGRESS;
0a7de745 634 return error;
39236c6e
A
635}
636
637static int
813fb2f6
A
638tcp_usr_connectx(struct socket *so, struct sockaddr *src,
639 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
640 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
641 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
39236c6e 642{
0a7de745
A
643 return tcp_usr_connectx_common(so, AF_INET, src, dst, p, ifscope, aid,
644 pcid, flags, arg, arglen, uio, bytes_written);
39236c6e
A
645}
646
1c79356b
A
647#if INET6
648static int
649tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
650{
1c79356b
A
651 int error = 0;
652 struct inpcb *inp = sotoinpcb(so);
653 struct tcpcb *tp;
654 struct sockaddr_in6 *sin6p;
655
39236c6e
A
656 TCPDEBUG0;
657 if (inp == NULL) {
658 return EINVAL;
659 } else if (inp->inp_state == INPCB_STATE_DEAD) {
660 if (so->so_error) {
661 error = so->so_error;
662 so->so_error = 0;
663 return error;
0a7de745 664 } else {
39236c6e 665 return EINVAL;
0a7de745 666 }
39236c6e 667 }
fe8ab488 668#if NECP
cb323159
A
669#if CONTENT_FILTER
670 error = cfil_sock_attach(so, NULL, nam, CFS_CONNECTION_DIR_OUT);
671 if (error != 0) {
672 return error;
673 }
674#endif /* CONTENT_FILTER */
39236c6e 675#if FLOW_DIVERT
cb323159 676 if (necp_socket_should_use_flow_divert(inp)) {
fe8ab488
A
677 uint32_t fd_ctl_unit = necp_socket_get_flow_divert_control_unit(inp);
678 if (fd_ctl_unit > 0) {
679 error = flow_divert_pcb_init(so, fd_ctl_unit);
680 if (error == 0) {
681 error = flow_divert_connect_out(so, nam, p);
39236c6e 682 }
fe8ab488
A
683 } else {
684 error = ENETDOWN;
39236c6e 685 }
4bd07ac2 686
39236c6e
A
687 return error;
688 }
689#endif /* FLOW_DIVERT */
fe8ab488
A
690#endif /* NECP */
691
39236c6e
A
692 tp = intotcpcb(inp);
693 TCPDEBUG1();
694
695 calculate_tcp_clock();
1c79356b 696
2d21ac55
A
697 if (nam->sa_family != 0 && nam->sa_family != AF_INET6) {
698 error = EAFNOSUPPORT;
699 goto out;
700 }
701
1c79356b
A
702 /*
703 * Must disallow TCP ``connections'' to multicast addresses.
704 */
316670eb 705 sin6p = (struct sockaddr_in6 *)(void *)nam;
1c79356b
A
706 if (sin6p->sin6_family == AF_INET6
707 && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr)) {
708 error = EAFNOSUPPORT;
709 goto out;
710 }
9bccf70c
A
711
712 if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
1c79356b
A
713 struct sockaddr_in sin;
714
0a7de745
A
715 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
716 return EINVAL;
717 }
9bccf70c 718
1c79356b
A
719 in6_sin6_2_sin(&sin, sin6p);
720 inp->inp_vflag |= INP_IPV4;
721 inp->inp_vflag &= ~INP_IPV6;
0a7de745 722 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, p)) != 0) {
cb323159 723 TCP_LOG_CONNECT(tp, true, error);
1c79356b 724 goto out;
0a7de745 725 }
3e170ce0
A
726
727 error = tcp_connect_complete(so);
1c79356b
A
728 goto out;
729 }
9bccf70c
A
730 inp->inp_vflag &= ~INP_IPV4;
731 inp->inp_vflag |= INP_IPV6;
0a7de745 732 if ((error = tcp6_connect(tp, nam, p)) != 0) {
cb323159 733 TCP_LOG_CONNECT(tp, true, error);
1c79356b 734 goto out;
0a7de745 735 }
3e170ce0
A
736
737 error = tcp_connect_complete(so);
cb323159
A
738
739 TCP_LOG_CONNECT(tp, true, error);
740
1c79356b
A
741 COMMON_END(PRU_CONNECT);
742}
39236c6e
A
743
744static int
813fb2f6
A
745tcp6_usr_connectx(struct socket *so, struct sockaddr*src,
746 struct sockaddr *dst, struct proc *p, uint32_t ifscope,
3e170ce0
A
747 sae_associd_t aid, sae_connid_t *pcid, uint32_t flags, void *arg,
748 uint32_t arglen, struct uio *uio, user_ssize_t *bytes_written)
39236c6e 749{
0a7de745
A
750 return tcp_usr_connectx_common(so, AF_INET6, src, dst, p, ifscope, aid,
751 pcid, flags, arg, arglen, uio, bytes_written);
39236c6e 752}
1c79356b
A
753#endif /* INET6 */
754
755/*
756 * Initiate disconnect from peer.
757 * If connection never passed embryonic stage, just drop;
758 * else if don't need to let data drain, then can just drop anyways,
759 * else have to begin TCP shutdown process: mark socket disconnecting,
760 * drain unread data, state switch to reflect user close, and
761 * send segment (e.g. FIN) to peer. Socket will be really disconnected
762 * when peer sends FIN and acks ours.
763 *
764 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
765 */
766static int
767tcp_usr_disconnect(struct socket *so)
768{
1c79356b
A
769 int error = 0;
770 struct inpcb *inp = sotoinpcb(so);
771 struct tcpcb *tp;
39236c6e 772
5ba3f43e 773 socket_lock_assert_owned(so);
1c79356b 774 COMMON_START();
0a7de745
A
775 /* In case we got disconnected from the peer */
776 if (tp == NULL) {
39236c6e 777 goto out;
0a7de745 778 }
1c79356b
A
779 tp = tcp_disconnect(tp);
780 COMMON_END(PRU_DISCONNECT);
781}
782
39236c6e
A
783/*
784 * User-protocol pru_disconnectx callback.
785 */
786static int
3e170ce0 787tcp_usr_disconnectx(struct socket *so, sae_associd_t aid, sae_connid_t cid)
39236c6e
A
788{
789#pragma unused(cid)
0a7de745
A
790 if (aid != SAE_ASSOCID_ANY && aid != SAE_ASSOCID_ALL) {
791 return EINVAL;
792 }
39236c6e 793
0a7de745 794 return tcp_usr_disconnect(so);
39236c6e
A
795}
796
1c79356b
A
797/*
798 * Accept a connection. Essentially all the work is
799 * done at higher levels; just return the address
800 * of the peer, storing through addr.
801 */
802static int
803tcp_usr_accept(struct socket *so, struct sockaddr **nam)
804{
1c79356b
A
805 int error = 0;
806 struct inpcb *inp = sotoinpcb(so);
9bccf70c
A
807 struct tcpcb *tp = NULL;
808 TCPDEBUG0;
1c79356b 809
39236c6e
A
810 in_getpeeraddr(so, nam);
811
9bccf70c
A
812 if (so->so_state & SS_ISDISCONNECTED) {
813 error = ECONNABORTED;
814 goto out;
815 }
0a7de745
A
816 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
817 return EINVAL;
818 }
fe8ab488 819#if NECP
0a7de745
A
820 else if (necp_socket_should_use_flow_divert(inp)) {
821 return EPROTOTYPE;
822 }
cb323159 823
fe8ab488 824#endif /* NECP */
39236c6e 825
9bccf70c
A
826 tp = intotcpcb(inp);
827 TCPDEBUG1();
6d2010ae 828
cb323159
A
829 TCP_LOG_ACCEPT(tp, 0);
830
6d2010ae
A
831 calculate_tcp_clock();
832
1c79356b
A
833 COMMON_END(PRU_ACCEPT);
834}
835
836#if INET6
837static int
838tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
839{
1c79356b
A
840 int error = 0;
841 struct inpcb *inp = sotoinpcb(so);
9bccf70c
A
842 struct tcpcb *tp = NULL;
843 TCPDEBUG0;
1c79356b 844
9bccf70c
A
845 if (so->so_state & SS_ISDISCONNECTED) {
846 error = ECONNABORTED;
847 goto out;
848 }
0a7de745
A
849 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
850 return EINVAL;
851 }
fe8ab488 852#if NECP
0a7de745
A
853 else if (necp_socket_should_use_flow_divert(inp)) {
854 return EPROTOTYPE;
855 }
cb323159 856
fe8ab488 857#endif /* NECP */
39236c6e 858
9bccf70c
A
859 tp = intotcpcb(inp);
860 TCPDEBUG1();
6d2010ae 861
cb323159
A
862 TCP_LOG_ACCEPT(tp, 0);
863
6d2010ae
A
864 calculate_tcp_clock();
865
1c79356b
A
866 in6_mapped_peeraddr(so, nam);
867 COMMON_END(PRU_ACCEPT);
868}
869#endif /* INET6 */
2d21ac55 870
1c79356b
A
871/*
872 * Mark the connection as being incapable of further output.
2d21ac55
A
873 *
874 * Returns: 0 Success
875 * EINVAL [COMMON_START]
876 * tcp_output:EADDRNOTAVAIL
877 * tcp_output:ENOBUFS
878 * tcp_output:EMSGSIZE
879 * tcp_output:EHOSTUNREACH
880 * tcp_output:ENETUNREACH
881 * tcp_output:ENETDOWN
882 * tcp_output:ENOMEM
883 * tcp_output:EACCES
884 * tcp_output:EMSGSIZE
885 * tcp_output:ENOBUFS
886 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
1c79356b
A
887 */
888static int
889tcp_usr_shutdown(struct socket *so)
890{
1c79356b
A
891 int error = 0;
892 struct inpcb *inp = sotoinpcb(so);
893 struct tcpcb *tp;
894
39236c6e 895 TCPDEBUG0;
0a7de745
A
896 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD) {
897 return EINVAL;
898 }
39236c6e 899
1c79356b 900 socantsendmore(so);
39236c6e 901
0a7de745 902 /*
39236c6e
A
903 * In case we got disconnected from the peer, or if this is
904 * a socket that is to be flow-diverted (but not yet).
905 */
906 tp = intotcpcb(inp);
907 TCPDEBUG1();
fe8ab488
A
908
909 if (tp == NULL
910#if NECP
0a7de745 911 || (necp_socket_should_use_flow_divert(inp))
fe8ab488 912#endif /* NECP */
0a7de745
A
913 ) {
914 if (tp != NULL) {
39236c6e 915 error = EPROTOTYPE;
0a7de745 916 }
39236c6e
A
917 goto out;
918 }
919
920 calculate_tcp_clock();
921
1c79356b 922 tp = tcp_usrclosed(tp);
39236c6e
A
923#if MPTCP
924 /* A reset has been sent but socket exists, do not send FIN */
925 if ((so->so_flags & SOF_MP_SUBFLOW) &&
926 (tp) && (tp->t_mpflags & TMPF_RESET)) {
927 goto out;
928 }
929#endif
fe8ab488
A
930#if CONTENT_FILTER
931 /* Don't send a FIN yet */
932 if (tp && !(so->so_state & SS_ISDISCONNECTED) &&
0a7de745 933 cfil_sock_data_pending(&so->so_snd)) {
fe8ab488 934 goto out;
0a7de745 935 }
fe8ab488 936#endif /* CONTENT_FILTER */
0a7de745 937 if (tp) {
1c79356b 938 error = tcp_output(tp);
0a7de745 939 }
1c79356b
A
940 COMMON_END(PRU_SHUTDOWN);
941}
942
943/*
944 * After a receive, possibly send window update to peer.
945 */
946static int
2d21ac55 947tcp_usr_rcvd(struct socket *so, __unused int flags)
1c79356b 948{
1c79356b
A
949 int error = 0;
950 struct inpcb *inp = sotoinpcb(so);
951 struct tcpcb *tp;
952
953 COMMON_START();
0a7de745
A
954 /* In case we got disconnected from the peer */
955 if (tp == NULL) {
39236c6e 956 goto out;
0a7de745 957 }
316670eb
A
958 tcp_sbrcv_trim(tp, &so->so_rcv);
959
3e170ce0
A
960 /*
961 * This tcp_output is solely there to trigger window-updates.
962 * However, we really do not want these window-updates while we
963 * are still in SYN_SENT or SYN_RECEIVED.
964 */
0a7de745 965 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
3e170ce0 966 tcp_output(tp);
0a7de745 967 }
fe8ab488
A
968
969#if CONTENT_FILTER
970 cfil_sock_buf_update(&so->so_rcv);
971#endif /* CONTENT_FILTER */
972
1c79356b
A
973 COMMON_END(PRU_RCVD);
974}
975
976/*
977 * Do a send by putting data in output queue and updating urgent
9bccf70c
A
978 * marker if URG set. Possibly send more data. Unlike the other
979 * pru_*() routines, the mbuf chains are our responsibility. We
980 * must either enqueue them or free them. The other pru_* routines
981 * generally are caller-frees.
2d21ac55
A
982 *
983 * Returns: 0 Success
984 * ECONNRESET
985 * EINVAL
986 * ENOBUFS
987 * tcp_connect:EADDRINUSE Address in use
988 * tcp_connect:EADDRNOTAVAIL Address not available.
989 * tcp_connect:EINVAL Invalid argument
990 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef]
991 * tcp_connect:EACCES Permission denied
992 * tcp_connect:EAGAIN Resource unavailable, try again
993 * tcp_connect:EPERM Operation not permitted
994 * tcp_output:EADDRNOTAVAIL
995 * tcp_output:ENOBUFS
996 * tcp_output:EMSGSIZE
997 * tcp_output:EHOSTUNREACH
998 * tcp_output:ENETUNREACH
999 * tcp_output:ENETDOWN
1000 * tcp_output:ENOMEM
1001 * tcp_output:EACCES
1002 * tcp_output:EMSGSIZE
1003 * tcp_output:ENOBUFS
1004 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
1005 * tcp6_connect:??? [IPV6 only]
1c79356b
A
1006 */
1007static int
39236c6e 1008tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
0a7de745 1009 struct sockaddr *nam, struct mbuf *control, struct proc *p)
1c79356b 1010{
1c79356b
A
1011 int error = 0;
1012 struct inpcb *inp = sotoinpcb(so);
1013 struct tcpcb *tp;
39236c6e 1014 uint32_t msgpri = MSG_PRI_DEFAULT;
cb323159
A
1015 uint32_t mpkl_len = 0; /* length of mbuf chain */
1016 uint32_t mpkl_seq; /* sequence number where new data is added */
1017 struct so_mpkl_send_info mpkl_send_info = {};
1018
1c79356b
A
1019#if INET6
1020 int isipv6;
9bccf70c
A
1021#endif
1022 TCPDEBUG0;
1c79356b 1023
fe8ab488
A
1024 if (inp == NULL || inp->inp_state == INPCB_STATE_DEAD
1025#if NECP
0a7de745 1026 || (necp_socket_should_use_flow_divert(inp))
fe8ab488 1027#endif /* NECP */
0a7de745 1028 ) {
9bccf70c
A
1029 /*
1030 * OOPS! we lost a race, the TCP session got reset after
1031 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
1032 * network interrupt in the non-splnet() section of sosend().
1033 */
0a7de745 1034 if (m != NULL) {
1c79356b 1035 m_freem(m);
0a7de745 1036 }
39236c6e 1037 if (control != NULL) {
9bccf70c 1038 m_freem(control);
39236c6e
A
1039 control = NULL;
1040 }
fe8ab488 1041
0a7de745
A
1042 if (inp == NULL) {
1043 error = ECONNRESET; /* XXX EPIPE? */
1044 } else {
fe8ab488 1045 error = EPROTOTYPE;
0a7de745 1046 }
9bccf70c
A
1047 tp = NULL;
1048 TCPDEBUG1();
1c79356b
A
1049 goto out;
1050 }
1c79356b
A
1051#if INET6
1052 isipv6 = nam && nam->sa_family == AF_INET6;
1053#endif /* INET6 */
9bccf70c
A
1054 tp = intotcpcb(inp);
1055 TCPDEBUG1();
6d2010ae
A
1056
1057 calculate_tcp_clock();
1058
cb323159
A
1059 if (net_mpklog_enabled) {
1060 mpkl_seq = tp->snd_una + so->so_snd.sb_cc;
1061 if (m) {
1062 mpkl_len = m_length(m);
1063 }
1064 if (so->so_flags1 & SOF1_MPKL_SEND_INFO) {
1065 uuid_copy(mpkl_send_info.mpkl_uuid, so->so_mpkl_send_uuid);
1066 mpkl_send_info.mpkl_proto = so->so_mpkl_send_proto;
1067 }
1068 }
1069
39236c6e
A
1070 if (control != NULL) {
1071 if (so->so_flags & SOF_ENABLE_MSGS) {
1072 /* Get the msg priority from control mbufs */
1073 error = tcp_get_msg_priority(control, &msgpri);
1074 if (error) {
1075 m_freem(control);
0a7de745 1076 if (m != NULL) {
39236c6e 1077 m_freem(m);
0a7de745 1078 }
39236c6e
A
1079 control = NULL;
1080 m = NULL;
1081 goto out;
1082 }
cb323159
A
1083 }
1084 if (control->m_len > 0 && net_mpklog_enabled) {
1085 error = tcp_get_mpkl_send_info(control, &mpkl_send_info);
5ba3f43e 1086 /*
cb323159
A
1087 * Intepretation of the returned code:
1088 * 0: client wants us to use value passed in SCM_MPKL_SEND_INFO
1089 * 1: SCM_MPKL_SEND_INFO was not present
1090 * other: failure
39236c6e 1091 */
cb323159
A
1092 if (error != 0 && error != ENOMSG) {
1093 m_freem(control);
1094 if (m != NULL) {
1095 m_freem(m);
1096 }
1097 control = NULL;
1098 m = NULL;
1099 goto out;
0a7de745 1100 }
9bccf70c 1101 }
cb323159
A
1102 /*
1103 * Silently drop unsupported ancillary data messages
1104 */
1105 m_freem(control);
1106 control = NULL;
9bccf70c 1107 }
39236c6e
A
1108
1109 if (so->so_flags & SOF_ENABLE_MSGS) {
1110 VERIFY(m->m_flags & M_PKTHDR);
1111 m->m_pkthdr.msg_pri = msgpri;
1112 }
1113
1114 /* MPTCP sublow socket buffers must not be compressed */
1115 VERIFY(!(so->so_flags & SOF_MP_SUBFLOW) ||
1116 (so->so_snd.sb_flags & SB_NOCOMPRESS));
1117
0a7de745 1118 if (!(flags & PRUS_OOB) || (so->so_flags1 & SOF1_PRECONNECT_DATA)) {
39236c6e 1119 /* Call msg send if message delivery is enabled */
0a7de745 1120 if (so->so_flags & SOF_ENABLE_MSGS) {
39236c6e 1121 sbappendmsg_snd(&so->so_snd, m);
0a7de745 1122 } else {
39236c6e 1123 sbappendstream(&so->so_snd, m);
0a7de745 1124 }
39236c6e 1125
1c79356b
A
1126 if (nam && tp->t_state < TCPS_SYN_SENT) {
1127 /*
1128 * Do implied connect if not yet connected,
1129 * initialize window to default value, and
1130 * initialize maxseg/maxopd using peer's cached
1131 * MSS.
1132 */
1133#if INET6
0a7de745 1134 if (isipv6) {
1c79356b 1135 error = tcp6_connect(tp, nam, p);
0a7de745 1136 } else
1c79356b 1137#endif /* INET6 */
0a7de745
A
1138 error = tcp_connect(tp, nam, p);
1139 if (error) {
cb323159 1140 TCP_LOG_CONNECT(tp, true, error);
1c79356b 1141 goto out;
0a7de745 1142 }
1c79356b 1143 tp->snd_wnd = TTCP_CLIENT_SND_WND;
d9a64523 1144 tp->max_sndwnd = tp->snd_wnd;
c910b4d9 1145 tcp_mss(tp, -1, IFSCOPE_NONE);
cb323159
A
1146
1147 TCP_LOG_CONNECT(tp, true, error);
1148
1149 /* The sequence number of the data is past the SYN */
1150 mpkl_seq = tp->iss + 1;
1c79356b
A
1151 }
1152
1153 if (flags & PRUS_EOF) {
1154 /*
1155 * Close the send side of the connection after
1156 * the data is sent.
1157 */
1158 socantsendmore(so);
1159 tp = tcp_usrclosed(tp);
1160 }
1161 if (tp != NULL) {
0a7de745 1162 if (flags & PRUS_MORETOCOME) {
1c79356b 1163 tp->t_flags |= TF_MORETOCOME;
0a7de745 1164 }
1c79356b 1165 error = tcp_output(tp);
0a7de745 1166 if (flags & PRUS_MORETOCOME) {
1c79356b 1167 tp->t_flags &= ~TF_MORETOCOME;
0a7de745 1168 }
1c79356b
A
1169 }
1170 } else {
5ba3f43e
A
1171 if (sbspace(&so->so_snd) == 0) {
1172 /* if no space is left in sockbuf,
b0d623f7 1173 * do not try to squeeze in OOB traffic */
1c79356b
A
1174 m_freem(m);
1175 error = ENOBUFS;
1176 goto out;
1177 }
1178 /*
1179 * According to RFC961 (Assigned Protocols),
1180 * the urgent pointer points to the last octet
1181 * of urgent data. We continue, however,
1182 * to consider it to indicate the first octet
1183 * of data past the urgent section.
1184 * Otherwise, snd_up should be one lower.
1185 */
2d21ac55 1186 sbappendstream(&so->so_snd, m);
1c79356b
A
1187 if (nam && tp->t_state < TCPS_SYN_SENT) {
1188 /*
1189 * Do implied connect if not yet connected,
1190 * initialize window to default value, and
1191 * initialize maxseg/maxopd using peer's cached
1192 * MSS.
1193 */
1194#if INET6
0a7de745 1195 if (isipv6) {
1c79356b 1196 error = tcp6_connect(tp, nam, p);
0a7de745 1197 } else
1c79356b
A
1198#endif /* INET6 */
1199 error = tcp_connect(tp, nam, p);
0a7de745 1200 if (error) {
cb323159 1201 TCP_LOG_CONNECT(tp, true, error);
1c79356b 1202 goto out;
0a7de745 1203 }
1c79356b 1204 tp->snd_wnd = TTCP_CLIENT_SND_WND;
d9a64523 1205 tp->max_sndwnd = tp->snd_wnd;
c910b4d9 1206 tcp_mss(tp, -1, IFSCOPE_NONE);
cb323159
A
1207
1208 TCP_LOG_CONNECT(tp, true, error);
1c79356b
A
1209 }
1210 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
fe8ab488 1211 tp->t_flagsext |= TF_FORCE;
1c79356b 1212 error = tcp_output(tp);
fe8ab488 1213 tp->t_flagsext &= ~TF_FORCE;
1c79356b 1214 }
3e170ce0 1215
cb323159
A
1216 if (net_mpklog_enabled && (inp = tp->t_inpcb) != NULL &&
1217 ((inp->inp_last_outifp != NULL &&
1218 (inp->inp_last_outifp->if_xflags & IFXF_MPK_LOG)) ||
1219 (inp->inp_boundifp != NULL &&
1220 (inp->inp_boundifp->if_xflags & IFXF_MPK_LOG)))) {
1221 MPKL_TCP_SEND(tcp_mpkl_log_object,
1222 mpkl_send_info.mpkl_proto, mpkl_send_info.mpkl_uuid,
1223 ntohs(inp->inp_lport), ntohs(inp->inp_fport),
1224 mpkl_seq, mpkl_len,
1225 so->last_pid, so->so_log_seqn++);
1226 }
3e170ce0
A
1227
1228 /*
1229 * We wait for the socket to successfully connect before returning.
1230 * This allows us to signal a timeout to the application.
1231 */
1232 if (so->so_state & SS_ISCONNECTING) {
0a7de745 1233 if (so->so_state & SS_NBIO) {
3e170ce0 1234 error = EWOULDBLOCK;
0a7de745 1235 } else {
3e170ce0 1236 error = sbwait(&so->so_snd);
0a7de745 1237 }
3e170ce0
A
1238 }
1239
5ba3f43e 1240 COMMON_END((flags & PRUS_OOB) ? PRU_SENDOOB :
0a7de745 1241 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1c79356b
A
1242}
1243
1244/*
1245 * Abort the TCP.
1246 */
1247static int
1248tcp_usr_abort(struct socket *so)
1249{
1c79356b
A
1250 int error = 0;
1251 struct inpcb *inp = sotoinpcb(so);
1252 struct tcpcb *tp;
1253
1254 COMMON_START();
0a7de745
A
1255 /* In case we got disconnected from the peer */
1256 if (tp == NULL) {
39236c6e 1257 goto out;
0a7de745 1258 }
1c79356b 1259 tp = tcp_drop(tp, ECONNABORTED);
d190cdc3 1260 VERIFY(so->so_usecount > 0);
91447636 1261 so->so_usecount--;
1c79356b
A
1262 COMMON_END(PRU_ABORT);
1263}
1264
1265/*
1266 * Receive out-of-band data.
2d21ac55
A
1267 *
1268 * Returns: 0 Success
1269 * EINVAL [COMMON_START]
1270 * EINVAL
1271 * EWOULDBLOCK
1c79356b
A
1272 */
1273static int
1274tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1275{
1c79356b
A
1276 int error = 0;
1277 struct inpcb *inp = sotoinpcb(so);
1278 struct tcpcb *tp;
1279
1280 COMMON_START();
1281 if ((so->so_oobmark == 0 &&
0a7de745 1282 (so->so_state & SS_RCVATMARK) == 0) ||
1c79356b
A
1283 so->so_options & SO_OOBINLINE ||
1284 tp->t_oobflags & TCPOOB_HADDATA) {
1285 error = EINVAL;
1286 goto out;
1287 }
1288 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1289 error = EWOULDBLOCK;
1290 goto out;
1291 }
1292 m->m_len = 1;
1293 *mtod(m, caddr_t) = tp->t_iobc;
39037602 1294 so->so_state &= ~SS_RCVATMARK;
0a7de745 1295 if ((flags & MSG_PEEK) == 0) {
1c79356b 1296 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
0a7de745 1297 }
1c79356b
A
1298 COMMON_END(PRU_RCVOOB);
1299}
1300
3e170ce0
A
1301static int
1302tcp_usr_preconnect(struct socket *so)
1303{
490019cf
A
1304 struct inpcb *inp = sotoinpcb(so);
1305 int error = 0;
1306
1307#if NECP
1308 if (necp_socket_should_use_flow_divert(inp)) {
1309 /* May happen, if in tcp_usr_connect we did not had a chance
1310 * to set the usrreqs (due to some error). So, let's get out
1311 * of here.
1312 */
1313 goto out;
1314 }
1315#endif /* NECP */
1316
1317 error = tcp_output(sototcpcb(so));
3e170ce0 1318
5ba3f43e 1319 soclearfastopen(so);
3e170ce0
A
1320
1321 COMMON_END(PRU_PRECONNECT);
1322}
1323
1c79356b
A
1324/* xxx - should be const */
1325struct pr_usrreqs tcp_usrreqs = {
0a7de745
A
1326 .pru_abort = tcp_usr_abort,
1327 .pru_accept = tcp_usr_accept,
1328 .pru_attach = tcp_usr_attach,
1329 .pru_bind = tcp_usr_bind,
1330 .pru_connect = tcp_usr_connect,
1331 .pru_connectx = tcp_usr_connectx,
1332 .pru_control = in_control,
1333 .pru_detach = tcp_usr_detach,
1334 .pru_disconnect = tcp_usr_disconnect,
1335 .pru_disconnectx = tcp_usr_disconnectx,
1336 .pru_listen = tcp_usr_listen,
1337 .pru_peeraddr = in_getpeeraddr,
1338 .pru_rcvd = tcp_usr_rcvd,
1339 .pru_rcvoob = tcp_usr_rcvoob,
1340 .pru_send = tcp_usr_send,
1341 .pru_shutdown = tcp_usr_shutdown,
1342 .pru_sockaddr = in_getsockaddr,
1343 .pru_sosend = sosend,
1344 .pru_soreceive = soreceive,
1345 .pru_preconnect = tcp_usr_preconnect,
1c79356b
A
1346};
1347
1348#if INET6
1349struct pr_usrreqs tcp6_usrreqs = {
0a7de745
A
1350 .pru_abort = tcp_usr_abort,
1351 .pru_accept = tcp6_usr_accept,
1352 .pru_attach = tcp_usr_attach,
1353 .pru_bind = tcp6_usr_bind,
1354 .pru_connect = tcp6_usr_connect,
1355 .pru_connectx = tcp6_usr_connectx,
1356 .pru_control = in6_control,
1357 .pru_detach = tcp_usr_detach,
1358 .pru_disconnect = tcp_usr_disconnect,
1359 .pru_disconnectx = tcp_usr_disconnectx,
1360 .pru_listen = tcp6_usr_listen,
1361 .pru_peeraddr = in6_mapped_peeraddr,
1362 .pru_rcvd = tcp_usr_rcvd,
1363 .pru_rcvoob = tcp_usr_rcvoob,
1364 .pru_send = tcp_usr_send,
1365 .pru_shutdown = tcp_usr_shutdown,
1366 .pru_sockaddr = in6_mapped_sockaddr,
1367 .pru_sosend = sosend,
1368 .pru_soreceive = soreceive,
1369 .pru_preconnect = tcp_usr_preconnect,
1c79356b
A
1370};
1371#endif /* INET6 */
1372
1373/*
1374 * Common subroutine to open a TCP connection to remote host specified
1375 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
1376 * port number if needed. Call in_pcbladdr to do the routing and to choose
1377 * a local host address (interface). If there is an existing incarnation
1378 * of the same connection in TIME-WAIT state and if the remote host was
1379 * sending CC options and if the connection duration was < MSL, then
1380 * truncate the previous TIME-WAIT state and proceed.
1381 * Initialize connection parameters and enter SYN-SENT state.
2d21ac55
A
1382 *
1383 * Returns: 0 Success
1384 * EADDRINUSE
1385 * EINVAL
1386 * in_pcbbind:EADDRNOTAVAIL Address not available.
1387 * in_pcbbind:EINVAL Invalid argument
1388 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
1389 * in_pcbbind:EACCES Permission denied
1390 * in_pcbbind:EADDRINUSE Address in use
1391 * in_pcbbind:EAGAIN Resource unavailable, try again
1392 * in_pcbbind:EPERM Operation not permitted
1393 * in_pcbladdr:EINVAL Invalid argument
1394 * in_pcbladdr:EAFNOSUPPORT Address family not supported
1395 * in_pcbladdr:EADDRNOTAVAIL Address not available
1c79356b
A
1396 */
1397static int
39037602 1398tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
1c79356b
A
1399{
1400 struct inpcb *inp = tp->t_inpcb, *oinp;
1401 struct socket *so = inp->inp_socket;
1402 struct tcpcb *otp;
316670eb 1403 struct sockaddr_in *sin = (struct sockaddr_in *)(void *)nam;
39236c6e 1404 struct in_addr laddr;
39236c6e 1405 int error = 0;
316670eb 1406 struct ifnet *outif = NULL;
1c79356b
A
1407
1408 if (inp->inp_lport == 0) {
39236c6e 1409 error = in_pcbbind(inp, NULL, p);
0a7de745 1410 if (error) {
39236c6e 1411 goto done;
0a7de745 1412 }
1c79356b
A
1413 }
1414
1415 /*
1416 * Cannot simply call in_pcbconnect, because there might be an
1417 * earlier incarnation of this same connection still in
1418 * TIME_WAIT state, creating an ADDRINUSE error.
1419 */
39037602 1420 error = in_pcbladdr(inp, nam, &laddr, IFSCOPE_NONE, &outif, 0);
0a7de745 1421 if (error) {
39236c6e 1422 goto done;
0a7de745 1423 }
91447636 1424
5ba3f43e 1425 socket_unlock(inp->inp_socket, 0);
1c79356b
A
1426 oinp = in_pcblookup_hash(inp->inp_pcbinfo,
1427 sin->sin_addr, sin->sin_port,
39236c6e 1428 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr : laddr,
0a7de745 1429 inp->inp_lport, 0, NULL);
91447636 1430
5ba3f43e 1431 socket_lock(inp->inp_socket, 0);
1c79356b 1432 if (oinp) {
0a7de745 1433 if (oinp != inp) { /* 4143933: avoid deadlock if inp == oinp */
5ba3f43e 1434 socket_lock(oinp->inp_socket, 1);
0a7de745 1435 }
91447636 1436 if (in_pcb_checkstate(oinp, WNT_RELEASE, 1) == WNT_STOPUSING) {
0a7de745 1437 if (oinp != inp) {
5ba3f43e 1438 socket_unlock(oinp->inp_socket, 1);
0a7de745 1439 }
91447636
A
1440 goto skip_oinp;
1441 }
1442
1c79356b 1443 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
39236c6e 1444 otp->t_state == TCPS_TIME_WAIT &&
6d2010ae 1445 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
39236c6e 1446 (otp->t_flags & TF_RCVD_CC)) {
1c79356b 1447 otp = tcp_close(otp);
39236c6e
A
1448 } else {
1449 printf("tcp_connect: inp=0x%llx err=EADDRINUSE\n",
1450 (uint64_t)VM_KERNEL_ADDRPERM(inp));
0a7de745 1451 if (oinp != inp) {
5ba3f43e 1452 socket_unlock(oinp->inp_socket, 1);
0a7de745 1453 }
39236c6e
A
1454 error = EADDRINUSE;
1455 goto done;
91447636 1456 }
0a7de745 1457 if (oinp != inp) {
5ba3f43e 1458 socket_unlock(oinp->inp_socket, 1);
0a7de745 1459 }
1c79356b 1460 }
91447636 1461skip_oinp:
39236c6e
A
1462 if ((inp->inp_laddr.s_addr == INADDR_ANY ? laddr.s_addr :
1463 inp->inp_laddr.s_addr) == sin->sin_addr.s_addr &&
1464 inp->inp_lport == sin->sin_port) {
1465 error = EINVAL;
1466 goto done;
1467 }
1468 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
91447636
A
1469 /*lock inversion issue, mostly with udp multicast packets */
1470 socket_unlock(inp->inp_socket, 0);
39236c6e 1471 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
91447636
A
1472 socket_lock(inp->inp_socket, 0);
1473 }
6d2010ae 1474 if (inp->inp_laddr.s_addr == INADDR_ANY) {
39236c6e
A
1475 inp->inp_laddr = laddr;
1476 /* no reference needed */
316670eb 1477 inp->inp_last_outifp = outif;
5ba3f43e 1478
39236c6e 1479 inp->inp_flags |= INP_INADDR_ANY;
6d2010ae 1480 }
1c79356b
A
1481 inp->inp_faddr = sin->sin_addr;
1482 inp->inp_fport = sin->sin_port;
1483 in_pcbrehash(inp);
39236c6e 1484 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1c79356b 1485
0a7de745 1486 if (inp->inp_flowhash == 0) {
316670eb 1487 inp->inp_flowhash = inp_calc_flowhash(inp);
0a7de745 1488 }
1c79356b 1489
d9a64523 1490 tcp_set_max_rwinscale(tp, so, outif);
2d21ac55 1491
1c79356b
A
1492 soisconnecting(so);
1493 tcpstat.tcps_connattempt++;
1494 tp->t_state = TCPS_SYN_SENT;
39236c6e 1495 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp, TCP_CONN_KEEPINIT(tp));
9bccf70c 1496 tp->iss = tcp_new_isn(tp);
1c79356b 1497 tcp_sendseqinit(tp);
cb323159 1498 tp->t_connect_time = tcp_now;
0a7de745 1499 if (nstat_collect) {
6d2010ae 1500 nstat_route_connect_attempt(inp->inp_route.ro_rt);
0a7de745 1501 }
1c79356b 1502
39236c6e 1503done:
0a7de745 1504 if (outif != NULL) {
39236c6e 1505 ifnet_release(outif);
0a7de745 1506 }
39236c6e 1507
0a7de745 1508 return error;
1c79356b
A
1509}
1510
1511#if INET6
1512static int
39037602 1513tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct proc *p)
1c79356b
A
1514{
1515 struct inpcb *inp = tp->t_inpcb, *oinp;
1516 struct socket *so = inp->inp_socket;
1517 struct tcpcb *otp;
316670eb 1518 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(void *)nam;
91447636 1519 struct in6_addr addr6;
316670eb
A
1520 int error = 0;
1521 struct ifnet *outif = NULL;
1c79356b
A
1522
1523 if (inp->inp_lport == 0) {
39236c6e 1524 error = in6_pcbbind(inp, NULL, p);
0a7de745 1525 if (error) {
316670eb 1526 goto done;
0a7de745 1527 }
1c79356b
A
1528 }
1529
1530 /*
1531 * Cannot simply call in_pcbconnect, because there might be an
1532 * earlier incarnation of this same connection still in
1533 * TIME_WAIT state, creating an ADDRINUSE error.
316670eb
A
1534 *
1535 * in6_pcbladdr() might return an ifp with its reference held
1536 * even in the error case, so make sure that it's released
1537 * whenever it's non-NULL.
1c79356b 1538 */
6d2010ae 1539 error = in6_pcbladdr(inp, nam, &addr6, &outif);
0a7de745 1540 if (error) {
316670eb 1541 goto done;
0a7de745 1542 }
5ba3f43e 1543 socket_unlock(inp->inp_socket, 0);
1c79356b 1544 oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
0a7de745
A
1545 &sin6->sin6_addr, sin6->sin6_port,
1546 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
1547 ? &addr6
1548 : &inp->in6p_laddr,
1549 inp->inp_lport, 0, NULL);
5ba3f43e 1550 socket_lock(inp->inp_socket, 0);
1c79356b
A
1551 if (oinp) {
1552 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
1553 otp->t_state == TCPS_TIME_WAIT &&
6d2010ae 1554 ((int)(tcp_now - otp->t_starttime)) < tcp_msl &&
316670eb 1555 (otp->t_flags & TF_RCVD_CC)) {
1c79356b 1556 otp = tcp_close(otp);
316670eb
A
1557 } else {
1558 error = EADDRINUSE;
1559 goto done;
1560 }
1c79356b 1561 }
39236c6e 1562 if (!lck_rw_try_lock_exclusive(inp->inp_pcbinfo->ipi_lock)) {
91447636
A
1563 /*lock inversion issue, mostly with udp multicast packets */
1564 socket_unlock(inp->inp_socket, 0);
39236c6e 1565 lck_rw_lock_exclusive(inp->inp_pcbinfo->ipi_lock);
91447636
A
1566 socket_lock(inp->inp_socket, 0);
1567 }
6d2010ae 1568 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
91447636 1569 inp->in6p_laddr = addr6;
0a7de745 1570 inp->in6p_last_outifp = outif; /* no reference needed */
39236c6e 1571 inp->in6p_flags |= INP_IN6ADDR_ANY;
6d2010ae 1572 }
1c79356b
A
1573 inp->in6p_faddr = sin6->sin6_addr;
1574 inp->inp_fport = sin6->sin6_port;
0a7de745 1575 if ((sin6->sin6_flowinfo & IPV6_FLOWINFO_MASK) != 0) {
39236c6e 1576 inp->inp_flow = sin6->sin6_flowinfo;
0a7de745 1577 }
1c79356b 1578 in_pcbrehash(inp);
39236c6e 1579 lck_rw_done(inp->inp_pcbinfo->ipi_lock);
1c79356b 1580
0a7de745 1581 if (inp->inp_flowhash == 0) {
316670eb 1582 inp->inp_flowhash = inp_calc_flowhash(inp);
0a7de745 1583 }
39236c6e
A
1584 /* update flowinfo - RFC 6437 */
1585 if (inp->inp_flow == 0 && inp->in6p_flags & IN6P_AUTOFLOWLABEL) {
1586 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
1587 inp->inp_flow |=
1588 (htonl(inp->inp_flowhash) & IPV6_FLOWLABEL_MASK);
1589 }
316670eb 1590
d9a64523 1591 tcp_set_max_rwinscale(tp, so, outif);
1c79356b
A
1592
1593 soisconnecting(so);
1594 tcpstat.tcps_connattempt++;
1595 tp->t_state = TCPS_SYN_SENT;
5ba3f43e 1596 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
0a7de745 1597 TCP_CONN_KEEPINIT(tp));
9bccf70c 1598 tp->iss = tcp_new_isn(tp);
1c79356b 1599 tcp_sendseqinit(tp);
cb323159 1600 tp->t_connect_time = tcp_now;
0a7de745 1601 if (nstat_collect) {
6d2010ae 1602 nstat_route_connect_attempt(inp->inp_route.ro_rt);
0a7de745 1603 }
1c79356b 1604
316670eb 1605done:
0a7de745 1606 if (outif != NULL) {
316670eb 1607 ifnet_release(outif);
0a7de745 1608 }
316670eb 1609
0a7de745 1610 return error;
1c79356b
A
1611}
1612#endif /* INET6 */
1613
6d2010ae
A
1614/*
1615 * Export TCP internal state information via a struct tcp_info
1616 */
5ba3f43e 1617void
6d2010ae
A
1618tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1619{
316670eb 1620 struct inpcb *inp = tp->t_inpcb;
5ba3f43e 1621
6d2010ae
A
1622 bzero(ti, sizeof(*ti));
1623
1624 ti->tcpi_state = tp->t_state;
4bd07ac2
A
1625 ti->tcpi_flowhash = inp->inp_flowhash;
1626
316670eb 1627 if (tp->t_state > TCPS_LISTEN) {
0a7de745 1628 if (TSTMP_SUPPORTED(tp)) {
6d2010ae 1629 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
0a7de745
A
1630 }
1631 if (SACK_ENABLED(tp)) {
6d2010ae 1632 ti->tcpi_options |= TCPI_OPT_SACK;
0a7de745 1633 }
3e170ce0 1634 if (TCP_WINDOW_SCALE_ENABLED(tp)) {
6d2010ae
A
1635 ti->tcpi_options |= TCPI_OPT_WSCALE;
1636 ti->tcpi_snd_wscale = tp->snd_scale;
1637 ti->tcpi_rcv_wscale = tp->rcv_scale;
1638 }
0a7de745 1639 if (TCP_ECN_ENABLED(tp)) {
4bd07ac2 1640 ti->tcpi_options |= TCPI_OPT_ECN;
0a7de745 1641 }
316670eb
A
1642
1643 /* Are we in retranmission episode */
0a7de745 1644 if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) {
316670eb 1645 ti->tcpi_flags |= TCPI_FLAG_LOSSRECOVERY;
0a7de745 1646 }
316670eb 1647
0a7de745 1648 if (tp->t_flags & TF_STREAMING_ON) {
39037602 1649 ti->tcpi_flags |= TCPI_FLAG_STREAMING_ON;
0a7de745 1650 }
39037602 1651
316670eb 1652 ti->tcpi_rto = tp->t_timer[TCPT_REXMT] ? tp->t_rxtcur : 0;
6d2010ae
A
1653 ti->tcpi_snd_mss = tp->t_maxseg;
1654 ti->tcpi_rcv_mss = tp->t_maxseg;
1655
316670eb
A
1656 ti->tcpi_rttcur = tp->t_rttcur;
1657 ti->tcpi_srtt = tp->t_srtt >> TCP_RTT_SHIFT;
1658 ti->tcpi_rttvar = tp->t_rttvar >> TCP_RTTVAR_SHIFT;
39236c6e 1659 ti->tcpi_rttbest = tp->t_rttbest >> TCP_RTT_SHIFT;
316670eb 1660
6d2010ae
A
1661 ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1662 ti->tcpi_snd_cwnd = tp->snd_cwnd;
3e170ce0 1663 ti->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
5ba3f43e 1664
6d2010ae
A
1665 ti->tcpi_rcv_space = tp->rcv_wnd;
1666
1667 ti->tcpi_snd_wnd = tp->snd_wnd;
6d2010ae
A
1668 ti->tcpi_snd_nxt = tp->snd_nxt;
1669 ti->tcpi_rcv_nxt = tp->rcv_nxt;
316670eb
A
1670
1671 /* convert bytes/msec to bits/sec */
1672 if ((tp->t_flagsext & TF_MEASURESNDBW) != 0 &&
0a7de745
A
1673 tp->t_bwmeas != NULL) {
1674 ti->tcpi_snd_bw = (tp->t_bwmeas->bw_sndbw * 8000);
316670eb 1675 }
813fb2f6 1676
316670eb
A
1677 ti->tcpi_last_outif = (tp->t_inpcb->inp_last_outifp == NULL) ? 0 :
1678 tp->t_inpcb->inp_last_outifp->if_index;
1679
1680 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes);
39236c6e 1681 ti->tcpi_txpackets = inp->inp_stat->txpackets;
316670eb
A
1682 ti->tcpi_txbytes = inp->inp_stat->txbytes;
1683 ti->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
813fb2f6 1684 ti->tcpi_txretransmitpackets = tp->t_stat.rxmitpkts;
316670eb 1685 ti->tcpi_txunacked = tp->snd_max - tp->snd_una;
813fb2f6 1686
316670eb 1687 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes);
39236c6e 1688 ti->tcpi_rxpackets = inp->inp_stat->rxpackets;
316670eb
A
1689 ti->tcpi_rxbytes = inp->inp_stat->rxbytes;
1690 ti->tcpi_rxduplicatebytes = tp->t_stat.rxduplicatebytes;
39236c6e
A
1691 ti->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
1692
1693 if (tp->t_state > TCPS_LISTEN) {
cb323159 1694 ti->tcpi_synrexmits = tp->t_stat.rxmitsyns;
39236c6e
A
1695 }
1696 ti->tcpi_cell_rxpackets = inp->inp_cstat->rxpackets;
1697 ti->tcpi_cell_rxbytes = inp->inp_cstat->rxbytes;
1698 ti->tcpi_cell_txpackets = inp->inp_cstat->txpackets;
1699 ti->tcpi_cell_txbytes = inp->inp_cstat->txbytes;
1700
1701 ti->tcpi_wifi_rxpackets = inp->inp_wstat->rxpackets;
1702 ti->tcpi_wifi_rxbytes = inp->inp_wstat->rxbytes;
1703 ti->tcpi_wifi_txpackets = inp->inp_wstat->txpackets;
1704 ti->tcpi_wifi_txbytes = inp->inp_wstat->txbytes;
fe8ab488
A
1705
1706 ti->tcpi_wired_rxpackets = inp->inp_Wstat->rxpackets;
1707 ti->tcpi_wired_rxbytes = inp->inp_Wstat->rxbytes;
1708 ti->tcpi_wired_txpackets = inp->inp_Wstat->txpackets;
1709 ti->tcpi_wired_txbytes = inp->inp_Wstat->txbytes;
3e170ce0
A
1710 tcp_get_connectivity_status(tp, &ti->tcpi_connstatus);
1711
1712 ti->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
1713 ti->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
1714 ti->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
1715 ti->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
1716
1717 ti->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
1718 ti->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
1719 ti->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
1720 ti->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
1721 ti->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
39037602
A
1722 ti->tcpi_tfo_cookie_wrong = !!(tp->t_tfo_stats & TFO_S_COOKIE_WRONG);
1723 ti->tcpi_tfo_no_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_NO_COOKIE_RCV);
1724 ti->tcpi_tfo_heuristics_disable = !!(tp->t_tfo_stats & TFO_S_HEURISTICS_DISABLE);
1725 ti->tcpi_tfo_send_blackhole = !!(tp->t_tfo_stats & TFO_S_SEND_BLACKHOLE);
1726 ti->tcpi_tfo_recv_blackhole = !!(tp->t_tfo_stats & TFO_S_RECV_BLACKHOLE);
5ba3f43e 1727 ti->tcpi_tfo_onebyte_proxy = !!(tp->t_tfo_stats & TFO_S_ONE_BYTE_PROXY);
4bd07ac2
A
1728
1729 ti->tcpi_ecn_client_setup = !!(tp->ecn_flags & TE_SETUPSENT);
1730 ti->tcpi_ecn_server_setup = !!(tp->ecn_flags & TE_SETUPRECEIVED);
1731 ti->tcpi_ecn_success = (tp->ecn_flags & TE_ECN_ON) == TE_ECN_ON ? 1 : 0;
1732 ti->tcpi_ecn_lost_syn = !!(tp->ecn_flags & TE_LOST_SYN);
1733 ti->tcpi_ecn_lost_synack = !!(tp->ecn_flags & TE_LOST_SYNACK);
1734
1735 ti->tcpi_local_peer = !!(tp->t_flags & TF_LOCAL);
1736
1737 if (tp->t_inpcb->inp_last_outifp != NULL) {
0a7de745 1738 if (IFNET_IS_CELLULAR(tp->t_inpcb->inp_last_outifp)) {
4bd07ac2 1739 ti->tcpi_if_cell = 1;
0a7de745
A
1740 }
1741 if (IFNET_IS_WIFI(tp->t_inpcb->inp_last_outifp)) {
4bd07ac2 1742 ti->tcpi_if_wifi = 1;
0a7de745
A
1743 }
1744 if (IFNET_IS_WIRED(tp->t_inpcb->inp_last_outifp)) {
5ba3f43e 1745 ti->tcpi_if_wired = 1;
0a7de745
A
1746 }
1747 if (IFNET_IS_WIFI_INFRA(tp->t_inpcb->inp_last_outifp)) {
5ba3f43e 1748 ti->tcpi_if_wifi_infra = 1;
0a7de745
A
1749 }
1750 if (tp->t_inpcb->inp_last_outifp->if_eflags & IFEF_AWDL) {
5ba3f43e 1751 ti->tcpi_if_wifi_awdl = 1;
0a7de745 1752 }
4bd07ac2 1753 }
0a7de745 1754 if (tp->tcp_cc_index == TCP_CC_ALGO_BACKGROUND_INDEX) {
5ba3f43e 1755 ti->tcpi_snd_background = 1;
0a7de745 1756 }
5ba3f43e 1757 if (tcp_recv_bg == 1 ||
0a7de745 1758 IS_TCP_RECV_BG(tp->t_inpcb->inp_socket)) {
5ba3f43e 1759 ti->tcpi_rcv_background = 1;
0a7de745 1760 }
4bd07ac2
A
1761
1762 ti->tcpi_ecn_recv_ce = tp->t_ecn_recv_ce;
1763 ti->tcpi_ecn_recv_cwr = tp->t_ecn_recv_cwr;
1764
1765 ti->tcpi_rcvoopack = tp->t_rcvoopack;
1766 ti->tcpi_pawsdrop = tp->t_pawsdrop;
1767 ti->tcpi_sack_recovery_episode = tp->t_sack_recovery_episode;
1768 ti->tcpi_reordered_pkts = tp->t_reordered_pkts;
1769 ti->tcpi_dsack_sent = tp->t_dsack_sent;
1770 ti->tcpi_dsack_recvd = tp->t_dsack_recvd;
6d2010ae
A
1771 }
1772}
1773
1774__private_extern__ errno_t
1775tcp_fill_info_for_info_tuple(struct info_tuple *itpl, struct tcp_info *ti)
1776{
1777 struct inpcbinfo *pcbinfo = NULL;
1778 struct inpcb *inp = NULL;
1779 struct socket *so;
1780 struct tcpcb *tp;
5ba3f43e 1781
0a7de745 1782 if (itpl->itpl_proto == IPPROTO_TCP) {
6d2010ae 1783 pcbinfo = &tcbinfo;
0a7de745 1784 } else {
6d2010ae 1785 return EINVAL;
0a7de745 1786 }
5ba3f43e 1787
6d2010ae 1788 if (itpl->itpl_local_sa.sa_family == AF_INET &&
0a7de745 1789 itpl->itpl_remote_sa.sa_family == AF_INET) {
5ba3f43e 1790 inp = in_pcblookup_hash(pcbinfo,
0a7de745
A
1791 itpl->itpl_remote_sin.sin_addr,
1792 itpl->itpl_remote_sin.sin_port,
1793 itpl->itpl_local_sin.sin_addr,
1794 itpl->itpl_local_sin.sin_port,
1795 0, NULL);
6d2010ae 1796 } else if (itpl->itpl_local_sa.sa_family == AF_INET6 &&
0a7de745 1797 itpl->itpl_remote_sa.sa_family == AF_INET6) {
6d2010ae
A
1798 struct in6_addr ina6_local;
1799 struct in6_addr ina6_remote;
5ba3f43e 1800
6d2010ae 1801 ina6_local = itpl->itpl_local_sin6.sin6_addr;
5ba3f43e 1802 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local) &&
0a7de745 1803 itpl->itpl_local_sin6.sin6_scope_id) {
6d2010ae 1804 ina6_local.s6_addr16[1] = htons(itpl->itpl_local_sin6.sin6_scope_id);
0a7de745 1805 }
6d2010ae
A
1806
1807 ina6_remote = itpl->itpl_remote_sin6.sin6_addr;
5ba3f43e 1808 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote) &&
0a7de745 1809 itpl->itpl_remote_sin6.sin6_scope_id) {
6d2010ae 1810 ina6_remote.s6_addr16[1] = htons(itpl->itpl_remote_sin6.sin6_scope_id);
0a7de745 1811 }
5ba3f43e
A
1812
1813 inp = in6_pcblookup_hash(pcbinfo,
0a7de745
A
1814 &ina6_remote,
1815 itpl->itpl_remote_sin6.sin6_port,
1816 &ina6_local,
1817 itpl->itpl_local_sin6.sin6_port,
1818 0, NULL);
39236c6e 1819 } else {
6d2010ae 1820 return EINVAL;
39236c6e 1821 }
0a7de745 1822 if (inp == NULL || (so = inp->inp_socket) == NULL) {
6d2010ae 1823 return ENOENT;
0a7de745 1824 }
6d2010ae
A
1825
1826 socket_lock(so, 0);
1827 if (in_pcb_checkstate(inp, WNT_RELEASE, 1) == WNT_STOPUSING) {
1828 socket_unlock(so, 0);
1829 return ENOENT;
1830 }
1831 tp = intotcpcb(inp);
1832
1833 tcp_fill_info(tp, ti);
1834 socket_unlock(so, 0);
1835
1836 return 0;
1837}
1838
3e170ce0
A
1839static void
1840tcp_connection_fill_info(struct tcpcb *tp, struct tcp_connection_info *tci)
1841{
1842 struct inpcb *inp = tp->t_inpcb;
1843
1844 bzero(tci, sizeof(*tci));
1845 tci->tcpi_state = tp->t_state;
1846 if (tp->t_state > TCPS_LISTEN) {
0a7de745 1847 if (TSTMP_SUPPORTED(tp)) {
3e170ce0 1848 tci->tcpi_options |= TCPCI_OPT_TIMESTAMPS;
0a7de745
A
1849 }
1850 if (SACK_ENABLED(tp)) {
3e170ce0 1851 tci->tcpi_options |= TCPCI_OPT_SACK;
0a7de745 1852 }
3e170ce0
A
1853 if (TCP_WINDOW_SCALE_ENABLED(tp)) {
1854 tci->tcpi_options |= TCPCI_OPT_WSCALE;
1855 tci->tcpi_snd_wscale = tp->snd_scale;
1856 tci->tcpi_rcv_wscale = tp->rcv_scale;
1857 }
0a7de745 1858 if (TCP_ECN_ENABLED(tp)) {
3e170ce0 1859 tci->tcpi_options |= TCPCI_OPT_ECN;
0a7de745
A
1860 }
1861 if (IN_FASTRECOVERY(tp) || tp->t_rxtshift > 0) {
3e170ce0 1862 tci->tcpi_flags |= TCPCI_FLAG_LOSSRECOVERY;
0a7de745
A
1863 }
1864 if (tp->t_flagsext & TF_PKTS_REORDERED) {
3e170ce0 1865 tci->tcpi_flags |= TCPCI_FLAG_REORDERING_DETECTED;
0a7de745 1866 }
3e170ce0 1867 tci->tcpi_rto = (tp->t_timer[TCPT_REXMT] > 0) ?
0a7de745 1868 tp->t_rxtcur : 0;
3e170ce0
A
1869 tci->tcpi_maxseg = tp->t_maxseg;
1870 tci->tcpi_snd_ssthresh = tp->snd_ssthresh;
1871 tci->tcpi_snd_cwnd = tp->snd_cwnd;
1872 tci->tcpi_snd_wnd = tp->snd_wnd;
1873 tci->tcpi_snd_sbbytes = inp->inp_socket->so_snd.sb_cc;
1874 tci->tcpi_rcv_wnd = tp->rcv_wnd;
1875 tci->tcpi_rttcur = tp->t_rttcur;
1876 tci->tcpi_srtt = (tp->t_srtt >> TCP_RTT_SHIFT);
1877 tci->tcpi_rttvar = (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
1878 tci->tcpi_txpackets = inp->inp_stat->txpackets;
1879 tci->tcpi_txbytes = inp->inp_stat->txbytes;
1880 tci->tcpi_txretransmitbytes = tp->t_stat.txretransmitbytes;
813fb2f6 1881 tci->tcpi_txretransmitpackets = tp->t_stat.rxmitpkts;
3e170ce0
A
1882 tci->tcpi_rxpackets = inp->inp_stat->rxpackets;
1883 tci->tcpi_rxbytes = inp->inp_stat->rxbytes;
1884 tci->tcpi_rxoutoforderbytes = tp->t_stat.rxoutoforderbytes;
1885
1886 tci->tcpi_tfo_syn_data_rcv = !!(tp->t_tfo_stats & TFO_S_SYNDATA_RCV);
1887 tci->tcpi_tfo_cookie_req_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIEREQ_RECV);
1888 tci->tcpi_tfo_cookie_sent = !!(tp->t_tfo_stats & TFO_S_COOKIE_SENT);
1889 tci->tcpi_tfo_cookie_invalid = !!(tp->t_tfo_stats & TFO_S_COOKIE_INVALID);
1890 tci->tcpi_tfo_cookie_req = !!(tp->t_tfo_stats & TFO_S_COOKIE_REQ);
1891 tci->tcpi_tfo_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_COOKIE_RCV);
1892 tci->tcpi_tfo_syn_data_sent = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_SENT);
1893 tci->tcpi_tfo_syn_data_acked = !!(tp->t_tfo_stats & TFO_S_SYN_DATA_ACKED);
1894 tci->tcpi_tfo_syn_loss = !!(tp->t_tfo_stats & TFO_S_SYN_LOSS);
39037602
A
1895 tci->tcpi_tfo_cookie_wrong = !!(tp->t_tfo_stats & TFO_S_COOKIE_WRONG);
1896 tci->tcpi_tfo_no_cookie_rcv = !!(tp->t_tfo_stats & TFO_S_NO_COOKIE_RCV);
1897 tci->tcpi_tfo_heuristics_disable = !!(tp->t_tfo_stats & TFO_S_HEURISTICS_DISABLE);
1898 tci->tcpi_tfo_send_blackhole = !!(tp->t_tfo_stats & TFO_S_SEND_BLACKHOLE);
1899 tci->tcpi_tfo_recv_blackhole = !!(tp->t_tfo_stats & TFO_S_RECV_BLACKHOLE);
5ba3f43e 1900 tci->tcpi_tfo_onebyte_proxy = !!(tp->t_tfo_stats & TFO_S_ONE_BYTE_PROXY);
3e170ce0
A
1901 }
1902}
1903
6d2010ae 1904
5ba3f43e 1905__private_extern__ int
6d2010ae
A
1906tcp_sysctl_info(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
1907{
1908 int error;
527f9951 1909 struct tcp_info ti = {};
6d2010ae 1910 struct info_tuple itpl;
39236c6e 1911
6d2010ae
A
1912 if (req->newptr == USER_ADDR_NULL) {
1913 return EINVAL;
1914 }
1915 if (req->newlen < sizeof(struct info_tuple)) {
1916 return EINVAL;
1917 }
1918 error = SYSCTL_IN(req, &itpl, sizeof(struct info_tuple));
1919 if (error != 0) {
1920 return error;
1921 }
1922 error = tcp_fill_info_for_info_tuple(&itpl, &ti);
1923 if (error != 0) {
1924 return error;
1925 }
1926 error = SYSCTL_OUT(req, &ti, sizeof(struct tcp_info));
1927 if (error != 0) {
1928 return error;
1929 }
5ba3f43e 1930
6d2010ae
A
1931 return 0;
1932}
1933
316670eb
A
1934static int
1935tcp_lookup_peer_pid_locked(struct socket *so, pid_t *out_pid)
1936{
1937 int error = EHOSTUNREACH;
1938 *out_pid = -1;
0a7de745
A
1939 if ((so->so_state & SS_ISCONNECTED) == 0) {
1940 return ENOTCONN;
1941 }
5ba3f43e 1942
0a7de745
A
1943 struct inpcb *inp = (struct inpcb*)so->so_pcb;
1944 uint16_t lport = inp->inp_lport;
1945 uint16_t fport = inp->inp_fport;
1946 struct inpcb *finp = NULL;
a39ff7e2
A
1947 struct in6_addr laddr6, faddr6;
1948 struct in_addr laddr4, faddr4;
5ba3f43e 1949
316670eb 1950 if (inp->inp_vflag & INP_IPV6) {
a39ff7e2
A
1951 laddr6 = inp->in6p_laddr;
1952 faddr6 = inp->in6p_faddr;
1953 } else if (inp->inp_vflag & INP_IPV4) {
1954 laddr4 = inp->inp_laddr;
1955 faddr4 = inp->inp_faddr;
1956 }
1957
1958 socket_unlock(so, 0);
1959 if (inp->inp_vflag & INP_IPV6) {
316670eb 1960 finp = in6_pcblookup_hash(&tcbinfo, &laddr6, lport, &faddr6, fport, 0, NULL);
316670eb 1961 } else if (inp->inp_vflag & INP_IPV4) {
316670eb 1962 finp = in_pcblookup_hash(&tcbinfo, laddr4, lport, faddr4, fport, 0, NULL);
316670eb 1963 }
5ba3f43e 1964
316670eb
A
1965 if (finp) {
1966 *out_pid = finp->inp_socket->last_pid;
1967 error = 0;
a39ff7e2 1968 in_pcb_checkstate(finp, WNT_RELEASE, 0);
316670eb 1969 }
a39ff7e2 1970 socket_lock(so, 0);
5ba3f43e 1971
316670eb
A
1972 return error;
1973}
1974
39236c6e
A
1975void
1976tcp_getconninfo(struct socket *so, struct conninfo_tcp *tcp_ci)
1977{
1978 (void) tcp_lookup_peer_pid_locked(so, &tcp_ci->tcpci_peer_pid);
1979 tcp_fill_info(sototcpcb(so), &tcp_ci->tcpci_tcp_info);
1980}
1981
cb323159
A
1982void
1983tcp_clear_keep_alive_offload(struct socket *so)
1984{
1985 struct inpcb *inp;
1986 struct ifnet *ifp;
1987
1988 inp = sotoinpcb(so);
1989 if (inp == NULL) {
1990 return;
1991 }
1992
1993 if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) == 0) {
1994 return;
1995 }
1996
1997 ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
1998 inp->inp_last_outifp;
1999 if (ifp == NULL) {
2000 panic("%s: so %p inp %p ifp NULL",
2001 __func__, so, inp);
2002 }
2003
2004 ifnet_lock_exclusive(ifp);
2005
2006 if (ifp->if_tcp_kao_cnt == 0) {
2007 panic("%s: so %p inp %p ifp %p if_tcp_kao_cnt == 0",
2008 __func__, so, inp, ifp);
2009 }
2010 ifp->if_tcp_kao_cnt--;
2011 inp->inp_flags2 &= ~INP2_KEEPALIVE_OFFLOAD;
2012
2013 ifnet_lock_done(ifp);
2014}
2015
2016static int
2017tcp_set_keep_alive_offload(struct socket *so, struct proc *proc)
2018{
2019 int error = 0;
2020 struct inpcb *inp;
2021 struct ifnet *ifp;
2022
2023 inp = sotoinpcb(so);
2024 if (inp == NULL) {
2025 return ECONNRESET;
2026 }
2027 if ((inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD) != 0) {
2028 return 0;
2029 }
2030
2031 ifp = inp->inp_boundifp != NULL ? inp->inp_boundifp :
2032 inp->inp_last_outifp;
2033 if (ifp == NULL) {
2034 error = ENXIO;
2035 os_log_info(OS_LOG_DEFAULT,
2036 "%s: error %d for proc %s[%u] out ifp is not set\n",
2037 __func__, error,
2038 proc != NULL ? proc->p_comm : "kernel",
2039 proc != NULL ? proc->p_pid : 0);
2040 return ENXIO;
2041 }
2042
2043 error = if_get_tcp_kao_max(ifp);
2044 if (error != 0) {
2045 return error;
2046 }
2047
2048 ifnet_lock_exclusive(ifp);
2049 if (ifp->if_tcp_kao_cnt < ifp->if_tcp_kao_max) {
2050 ifp->if_tcp_kao_cnt++;
2051 inp->inp_flags2 |= INP2_KEEPALIVE_OFFLOAD;
2052 } else {
2053 error = ETOOMANYREFS;
2054 os_log_info(OS_LOG_DEFAULT,
2055 "%s: error %d for proc %s[%u] if_tcp_kao_max %u\n",
2056 __func__, error,
2057 proc != NULL ? proc->p_comm : "kernel",
2058 proc != NULL ? proc->p_pid : 0,
2059 ifp->if_tcp_kao_max);
2060 }
2061 ifnet_lock_done(ifp);
2062
2063 return error;
2064}
2065
1c79356b
A
2066/*
2067 * The new sockopt interface makes it possible for us to block in the
2068 * copyin/out step (if we take a page fault). Taking a page fault at
2069 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
2070 * use TSM, there probably isn't any need for this function to run at
2071 * splnet() any more. This needs more examination.)
2072 */
2073int
39037602 2074tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1c79356b 2075{
0a7de745
A
2076 int error = 0, opt = 0, optval = 0;
2077 struct inpcb *inp;
2078 struct tcpcb *tp;
1c79356b 2079
1c79356b
A
2080 inp = sotoinpcb(so);
2081 if (inp == NULL) {
0a7de745 2082 return ECONNRESET;
1c79356b 2083 }
39236c6e 2084 /* Allow <SOL_SOCKET,SO_FLUSH/SO_TRAFFIC_MGT_BACKGROUND> at this level */
316670eb 2085 if (sopt->sopt_level != IPPROTO_TCP &&
39236c6e
A
2086 !(sopt->sopt_level == SOL_SOCKET && (sopt->sopt_name == SO_FLUSH ||
2087 sopt->sopt_name == SO_TRAFFIC_MGT_BACKGROUND))) {
1c79356b 2088#if INET6
0a7de745 2089 if (SOCK_CHECK_DOM(so, PF_INET6)) {
1c79356b 2090 error = ip6_ctloutput(so, sopt);
0a7de745 2091 } else
1c79356b
A
2092#endif /* INET6 */
2093 error = ip_ctloutput(so, sopt);
0a7de745 2094 return error;
1c79356b
A
2095 }
2096 tp = intotcpcb(inp);
39236c6e 2097 if (tp == NULL) {
0a7de745 2098 return ECONNRESET;
39236c6e 2099 }
1c79356b 2100
6d2010ae
A
2101 calculate_tcp_clock();
2102
1c79356b
A
2103 switch (sopt->sopt_dir) {
2104 case SOPT_SET:
2105 switch (sopt->sopt_name) {
2106 case TCP_NODELAY:
2107 case TCP_NOOPT:
2108 case TCP_NOPUSH:
2109 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2110 sizeof optval);
2111 if (error) {
1c79356b 2112 break;
0a7de745 2113 }
1c79356b
A
2114
2115 switch (sopt->sopt_name) {
2116 case TCP_NODELAY:
2117 opt = TF_NODELAY;
2118 break;
2119 case TCP_NOOPT:
2120 opt = TF_NOOPT;
2121 break;
2122 case TCP_NOPUSH:
2123 opt = TF_NOPUSH;
2124 break;
2125 default:
2126 opt = 0; /* dead code to fool gcc */
2127 break;
2128 }
2129
0a7de745 2130 if (optval) {
1c79356b 2131 tp->t_flags |= opt;
0a7de745 2132 } else {
1c79356b 2133 tp->t_flags &= ~opt;
0a7de745 2134 }
1c79356b 2135 break;
6d2010ae 2136 case TCP_RXT_FINDROP:
fe8ab488 2137 case TCP_NOTIMEWAIT:
6d2010ae 2138 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2139 sizeof optval);
2140 if (error) {
6d2010ae 2141 break;
0a7de745 2142 }
fe8ab488
A
2143 switch (sopt->sopt_name) {
2144 case TCP_RXT_FINDROP:
2145 opt = TF_RXTFINDROP;
2146 break;
2147 case TCP_NOTIMEWAIT:
2148 opt = TF_NOTIMEWAIT;
2149 break;
2150 default:
2151 opt = 0;
2152 break;
2153 }
0a7de745 2154 if (optval) {
6d2010ae 2155 tp->t_flagsext |= opt;
0a7de745 2156 } else {
6d2010ae 2157 tp->t_flagsext &= ~opt;
0a7de745 2158 }
6d2010ae 2159 break;
316670eb
A
2160 case TCP_MEASURE_SND_BW:
2161 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2162 sizeof optval);
2163 if (error) {
316670eb 2164 break;
0a7de745 2165 }
316670eb
A
2166 opt = TF_MEASURESNDBW;
2167 if (optval) {
2168 if (tp->t_bwmeas == NULL) {
2169 tp->t_bwmeas = tcp_bwmeas_alloc(tp);
2170 if (tp->t_bwmeas == NULL) {
2171 error = ENOMEM;
2172 break;
2173 }
2174 }
2175 tp->t_flagsext |= opt;
2176 } else {
2177 tp->t_flagsext &= ~opt;
2178 /* Reset snd bw measurement state */
2179 tp->t_flagsext &= ~(TF_BWMEAS_INPROGRESS);
2180 if (tp->t_bwmeas != NULL) {
2181 tcp_bwmeas_free(tp);
2182 }
2183 }
2184 break;
2185 case TCP_MEASURE_BW_BURST: {
2186 struct tcp_measure_bw_burst in;
2187 uint32_t minpkts, maxpkts;
2188 bzero(&in, sizeof(in));
2189
2190 error = sooptcopyin(sopt, &in, sizeof(in),
0a7de745
A
2191 sizeof(in));
2192 if (error) {
316670eb 2193 break;
0a7de745 2194 }
316670eb 2195 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
0a7de745 2196 tp->t_bwmeas == NULL) {
316670eb
A
2197 error = EINVAL;
2198 break;
2199 }
5ba3f43e 2200 minpkts = (in.min_burst_size != 0) ? in.min_burst_size :
0a7de745 2201 tp->t_bwmeas->bw_minsizepkts;
316670eb 2202 maxpkts = (in.max_burst_size != 0) ? in.max_burst_size :
0a7de745 2203 tp->t_bwmeas->bw_maxsizepkts;
316670eb
A
2204 if (minpkts > maxpkts) {
2205 error = EINVAL;
2206 break;
2207 }
2208 tp->t_bwmeas->bw_minsizepkts = minpkts;
2209 tp->t_bwmeas->bw_maxsizepkts = maxpkts;
2210 tp->t_bwmeas->bw_minsize = (minpkts * tp->t_maxseg);
2211 tp->t_bwmeas->bw_maxsize = (maxpkts * tp->t_maxseg);
2212 break;
2213 }
1c79356b
A
2214 case TCP_MAXSEG:
2215 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2216 sizeof optval);
2217 if (error) {
1c79356b 2218 break;
0a7de745 2219 }
1c79356b 2220
e5568f75 2221 if (optval > 0 && optval <= tp->t_maxseg &&
0a7de745 2222 optval + 40 >= tcp_minmss) {
1c79356b 2223 tp->t_maxseg = optval;
0a7de745 2224 } else {
1c79356b 2225 error = EINVAL;
0a7de745 2226 }
1c79356b
A
2227 break;
2228
2d21ac55
A
2229 case TCP_KEEPALIVE:
2230 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2231 sizeof optval);
2232 if (error) {
2d21ac55 2233 break;
0a7de745
A
2234 }
2235 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
2d21ac55 2236 error = EINVAL;
39236c6e 2237 } else {
2d21ac55 2238 tp->t_keepidle = optval * TCP_RETRANSHZ;
39236c6e 2239 /* reset the timer to new value */
5ba3f43e 2240 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
0a7de745 2241 TCP_CONN_KEEPIDLE(tp));
6d2010ae 2242 tcp_check_timer_state(tp);
2d21ac55 2243 }
0a7de745 2244 break;
b0d623f7
A
2245
2246 case TCP_CONNECTIONTIMEOUT:
2247 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2248 sizeof optval);
2249 if (error) {
b0d623f7 2250 break;
0a7de745
A
2251 }
2252 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
b0d623f7 2253 error = EINVAL;
39236c6e 2254 } else {
b0d623f7 2255 tp->t_keepinit = optval * TCP_RETRANSHZ;
39236c6e 2256 if (tp->t_state == TCPS_SYN_RECEIVED ||
0a7de745 2257 tp->t_state == TCPS_SYN_SENT) {
39236c6e 2258 tp->t_timer[TCPT_KEEP] = OFFSET_FROM_START(tp,
0a7de745 2259 TCP_CONN_KEEPINIT(tp));
39236c6e
A
2260 tcp_check_timer_state(tp);
2261 }
2262 }
2263 break;
2264
2265 case TCP_KEEPINTVL:
2266 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2267 sizeof(optval));
2268 if (error) {
39236c6e 2269 break;
0a7de745
A
2270 }
2271 if (optval < 0 || optval > UINT32_MAX / TCP_RETRANSHZ) {
39236c6e
A
2272 error = EINVAL;
2273 } else {
2274 tp->t_keepintvl = optval * TCP_RETRANSHZ;
2275 if (tp->t_state == TCPS_FIN_WAIT_2 &&
0a7de745 2276 TCP_CONN_MAXIDLE(tp) > 0) {
39236c6e 2277 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
0a7de745 2278 TCP_CONN_MAXIDLE(tp));
39236c6e
A
2279 tcp_check_timer_state(tp);
2280 }
2281 }
2282 break;
2283
2284 case TCP_KEEPCNT:
2285 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2286 sizeof(optval));
2287 if (error) {
39236c6e 2288 break;
0a7de745 2289 }
39236c6e
A
2290 if (optval < 0 || optval > INT32_MAX) {
2291 error = EINVAL;
2292 } else {
2293 tp->t_keepcnt = optval;
2294 if (tp->t_state == TCPS_FIN_WAIT_2 &&
0a7de745 2295 TCP_CONN_MAXIDLE(tp) > 0) {
39236c6e 2296 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
0a7de745 2297 TCP_CONN_MAXIDLE(tp));
39236c6e
A
2298 tcp_check_timer_state(tp);
2299 }
2300 }
b0d623f7 2301 break;
316670eb 2302
39037602 2303 case TCP_KEEPALIVE_OFFLOAD:
cb323159
A
2304 if ((error = priv_check_cred(kauth_cred_get(),
2305 PRIV_NETINET_TCP_KA_OFFLOAD, 0)) != 0) {
2306 break;
2307 }
39037602 2308 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2309 sizeof(optval));
2310 if (error) {
39037602 2311 break;
0a7de745 2312 }
39037602
A
2313 if (optval < 0 || optval > INT32_MAX) {
2314 error = EINVAL;
2315 break;
5ba3f43e 2316 }
0a7de745 2317 if (optval != 0) {
cb323159
A
2318 error = tcp_set_keep_alive_offload(so,
2319 sopt->sopt_p);
0a7de745 2320 } else {
cb323159 2321 tcp_clear_keep_alive_offload(so);
0a7de745 2322 }
39037602
A
2323 break;
2324
6d2010ae
A
2325 case PERSIST_TIMEOUT:
2326 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2327 sizeof optval);
2328 if (error) {
6d2010ae 2329 break;
0a7de745
A
2330 }
2331 if (optval < 0) {
6d2010ae 2332 error = EINVAL;
0a7de745 2333 } else {
6d2010ae 2334 tp->t_persist_timeout = optval * TCP_RETRANSHZ;
0a7de745 2335 }
6d2010ae
A
2336 break;
2337 case TCP_RXT_CONNDROPTIME:
2338 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2339 sizeof(optval));
2340 if (error) {
6d2010ae 2341 break;
0a7de745
A
2342 }
2343 if (optval < 0) {
6d2010ae 2344 error = EINVAL;
0a7de745 2345 } else {
39236c6e 2346 tp->t_rxt_conndroptime = optval * TCP_RETRANSHZ;
0a7de745 2347 }
6d2010ae 2348 break;
316670eb
A
2349 case TCP_NOTSENT_LOWAT:
2350 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2351 sizeof(optval));
2352 if (error) {
316670eb 2353 break;
0a7de745 2354 }
316670eb
A
2355 if (optval < 0) {
2356 error = EINVAL;
2357 break;
2358 } else {
2359 if (optval == 0) {
2360 so->so_flags &= ~(SOF_NOTSENT_LOWAT);
2361 tp->t_notsent_lowat = 0;
5ba3f43e 2362 } else {
316670eb
A
2363 so->so_flags |= SOF_NOTSENT_LOWAT;
2364 tp->t_notsent_lowat = optval;
2365 }
2366 }
2367 break;
39236c6e 2368 case TCP_ADAPTIVE_READ_TIMEOUT:
0a7de745 2369 error = sooptcopyin(sopt, &optval, sizeof(optval),
3e170ce0 2370 sizeof(optval));
0a7de745 2371 if (error) {
39236c6e 2372 break;
0a7de745 2373 }
5ba3f43e 2374 if (optval < 0 ||
3e170ce0 2375 optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
39236c6e
A
2376 error = EINVAL;
2377 break;
2378 } else if (optval == 0) {
2379 tp->t_adaptive_rtimo = 0;
2380 tcp_keepalive_reset(tp);
5ba3f43e 2381
0a7de745 2382 if (tp->t_mpsub) {
5ba3f43e 2383 mptcp_reset_keepalive(tp);
0a7de745 2384 }
39236c6e
A
2385 } else {
2386 tp->t_adaptive_rtimo = optval;
2387 }
2388 break;
2389 case TCP_ADAPTIVE_WRITE_TIMEOUT:
0a7de745
A
2390 error = sooptcopyin(sopt, &optval, sizeof(optval),
2391 sizeof(optval));
2392 if (error) {
39236c6e 2393 break;
0a7de745 2394 }
5ba3f43e 2395 if (optval < 0 ||
3e170ce0 2396 optval > TCP_ADAPTIVE_TIMEOUT_MAX) {
39236c6e
A
2397 error = EINVAL;
2398 break;
2399 } else {
2400 tp->t_adaptive_wtimo = optval;
2401 }
2402 break;
2403 case TCP_ENABLE_MSGS:
2404 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2405 sizeof(optval));
2406 if (error) {
39236c6e 2407 break;
0a7de745 2408 }
39236c6e
A
2409 if (optval < 0 || optval > 1) {
2410 error = EINVAL;
2411 } else if (optval == 1) {
2412 /*
2413 * Check if messages option is already
2414 * enabled, if so return.
2415 */
2416 if (so->so_flags & SOF_ENABLE_MSGS) {
2417 VERIFY(so->so_msg_state != NULL);
2418 break;
2419 }
316670eb 2420
39236c6e 2421 /*
39037602 2422 * allocate memory for storing message
39236c6e
A
2423 * related state
2424 */
2425 VERIFY(so->so_msg_state == NULL);
39037602 2426 MALLOC(so->so_msg_state,
0a7de745
A
2427 struct msg_state *,
2428 sizeof(struct msg_state),
2429 M_TEMP, M_WAITOK | M_ZERO);
39236c6e
A
2430 if (so->so_msg_state == NULL) {
2431 error = ENOMEM;
2432 break;
2433 }
2434
2435 /* Enable message delivery */
2436 so->so_flags |= SOF_ENABLE_MSGS;
2437 } else {
39037602
A
2438 /*
2439 * Can't disable message delivery on socket
2440 * because of restrictions imposed by
39236c6e
A
2441 * encoding/decoding
2442 */
2443 error = EINVAL;
2444 }
2445 break;
2446 case TCP_SENDMOREACKS:
2447 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2448 sizeof(optval));
2449 if (error) {
39236c6e 2450 break;
0a7de745 2451 }
39236c6e
A
2452 if (optval < 0 || optval > 1) {
2453 error = EINVAL;
2454 } else if (optval == 0) {
2455 tp->t_flagsext &= ~(TF_NOSTRETCHACK);
2456 } else {
2457 tp->t_flagsext |= TF_NOSTRETCHACK;
2458 }
2459 break;
fe8ab488
A
2460 case TCP_DISABLE_BLACKHOLE_DETECTION:
2461 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2462 sizeof(optval));
2463 if (error) {
fe8ab488 2464 break;
0a7de745 2465 }
fe8ab488
A
2466 if (optval < 0 || optval > 1) {
2467 error = EINVAL;
2468 } else if (optval == 0) {
2469 tp->t_flagsext &= ~TF_NOBLACKHOLE_DETECTION;
2470 } else {
2471 tp->t_flagsext |= TF_NOBLACKHOLE_DETECTION;
2472 if ((tp->t_flags & TF_BLACKHOLE) &&
0a7de745 2473 tp->t_pmtud_saved_maxopd > 0) {
fe8ab488 2474 tcp_pmtud_revert_segment_size(tp);
0a7de745 2475 }
fe8ab488
A
2476 }
2477 break;
3e170ce0
A
2478 case TCP_FASTOPEN:
2479 if (!(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
2480 error = ENOTSUP;
2481 break;
2482 }
2483
2484 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745
A
2485 sizeof(optval));
2486 if (error) {
3e170ce0 2487 break;
0a7de745 2488 }
3e170ce0
A
2489 if (optval < 0 || optval > 1) {
2490 error = EINVAL;
2491 break;
2492 }
2493 if (tp->t_state != TCPS_LISTEN) {
2494 error = EINVAL;
2495 break;
2496 }
0a7de745 2497 if (optval) {
3e170ce0 2498 tp->t_flagsext |= TF_FASTOPEN;
0a7de745 2499 } else {
3e170ce0 2500 tcp_disable_tfo(tp);
0a7de745 2501 }
3e170ce0 2502 break;
5ba3f43e 2503 case TCP_FASTOPEN_FORCE_HEURISTICS:
cb323159
A
2504
2505 break;
2506 case TCP_FASTOPEN_FORCE_ENABLE:
5ba3f43e 2507 error = sooptcopyin(sopt, &optval, sizeof(optval),
0a7de745 2508 sizeof(optval));
5ba3f43e 2509
0a7de745 2510 if (error) {
5ba3f43e 2511 break;
0a7de745 2512 }
5ba3f43e
A
2513 if (optval < 0 || optval > 1) {
2514 error = EINVAL;
2515 break;
2516 }
2517
2518 if (tp->t_state != TCPS_CLOSED) {
2519 error = EINVAL;
2520 break;
2521 }
0a7de745 2522 if (optval) {
cb323159 2523 tp->t_flagsext |= TF_FASTOPEN_FORCE_ENABLE;
0a7de745 2524 } else {
cb323159 2525 tp->t_flagsext &= ~TF_FASTOPEN_FORCE_ENABLE;
0a7de745 2526 }
5ba3f43e
A
2527
2528 break;
4bd07ac2
A
2529 case TCP_ENABLE_ECN:
2530 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2531 sizeof optval);
2532 if (error) {
4bd07ac2 2533 break;
0a7de745 2534 }
4bd07ac2
A
2535 if (optval) {
2536 tp->ecn_flags |= TE_ECN_MODE_ENABLE;
2537 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2538 } else {
2539 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
5ba3f43e 2540 tp->ecn_flags |= TE_ECN_MODE_DISABLE;
4bd07ac2
A
2541 }
2542 break;
2543 case TCP_ECN_MODE:
2544 error = sooptcopyin(sopt, &optval, sizeof optval,
0a7de745
A
2545 sizeof optval);
2546 if (error) {
4bd07ac2 2547 break;
0a7de745 2548 }
4bd07ac2
A
2549 if (optval == ECN_MODE_DEFAULT) {
2550 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2551 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2552 } else if (optval == ECN_MODE_ENABLE) {
2553 tp->ecn_flags |= TE_ECN_MODE_ENABLE;
2554 tp->ecn_flags &= ~TE_ECN_MODE_DISABLE;
2555 } else if (optval == ECN_MODE_DISABLE) {
2556 tp->ecn_flags &= ~TE_ECN_MODE_ENABLE;
2557 tp->ecn_flags |= TE_ECN_MODE_DISABLE;
2558 } else {
2559 error = EINVAL;
2560 }
2561 break;
39037602
A
2562 case TCP_NOTIFY_ACKNOWLEDGEMENT:
2563 error = sooptcopyin(sopt, &optval,
2564 sizeof(optval), sizeof(optval));
0a7de745 2565 if (error) {
39037602 2566 break;
0a7de745 2567 }
39037602
A
2568 if (optval <= 0) {
2569 error = EINVAL;
2570 break;
2571 }
2572 if (tp->t_notify_ack_count >= TCP_MAX_NOTIFY_ACK) {
2573 error = ETOOMANYREFS;
2574 break;
2575 }
2576
2577 /*
2578 * validate that the given marker id is not
2579 * a duplicate to avoid ambiguity
2580 */
2581 if ((error = tcp_notify_ack_id_valid(tp, so,
2582 optval)) != 0) {
2583 break;
2584 }
2585 error = tcp_add_notify_ack_marker(tp, optval);
2586 break;
316670eb 2587 case SO_FLUSH:
0a7de745
A
2588 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
2589 sizeof(optval))) != 0) {
316670eb 2590 break;
0a7de745 2591 }
316670eb
A
2592
2593 error = inp_flush(inp, optval);
2594 break;
2595
39236c6e 2596 case SO_TRAFFIC_MGT_BACKGROUND:
0a7de745
A
2597 if ((error = sooptcopyin(sopt, &optval, sizeof(optval),
2598 sizeof(optval))) != 0) {
39236c6e 2599 break;
0a7de745 2600 }
39236c6e
A
2601
2602 if (optval) {
2603 socket_set_traffic_mgt_flags_locked(so,
2604 TRAFFIC_MGT_SO_BACKGROUND);
2605 } else {
2606 socket_clear_traffic_mgt_flags_locked(so,
2607 TRAFFIC_MGT_SO_BACKGROUND);
2608 }
2609 break;
5ba3f43e
A
2610 case TCP_RXT_MINIMUM_TIMEOUT:
2611 error = sooptcopyin(sopt, &optval, sizeof(optval),
2612 sizeof(optval));
0a7de745 2613 if (error) {
5ba3f43e 2614 break;
0a7de745 2615 }
5ba3f43e
A
2616 if (optval < 0) {
2617 error = EINVAL;
2618 break;
2619 }
2620 if (optval == 0) {
2621 tp->t_rxt_minimum_timeout = 0;
2622 } else {
2623 tp->t_rxt_minimum_timeout = min(optval,
2624 TCP_RXT_MINIMUM_TIMEOUT_LIMIT);
2625 /* convert to milliseconds */
2626 tp->t_rxt_minimum_timeout *= TCP_RETRANSHZ;
2627 }
2628 break;
1c79356b
A
2629 default:
2630 error = ENOPROTOOPT;
2631 break;
2632 }
2633 break;
2634
2635 case SOPT_GET:
2636 switch (sopt->sopt_name) {
2637 case TCP_NODELAY:
2638 optval = tp->t_flags & TF_NODELAY;
2639 break;
2640 case TCP_MAXSEG:
2641 optval = tp->t_maxseg;
2642 break;
55e303ae 2643 case TCP_KEEPALIVE:
0a7de745 2644 if (tp->t_keepidle > 0) {
39037602 2645 optval = tp->t_keepidle / TCP_RETRANSHZ;
0a7de745 2646 } else {
39037602 2647 optval = tcp_keepidle / TCP_RETRANSHZ;
0a7de745 2648 }
55e303ae 2649 break;
39236c6e 2650 case TCP_KEEPINTVL:
0a7de745 2651 if (tp->t_keepintvl > 0) {
39037602 2652 optval = tp->t_keepintvl / TCP_RETRANSHZ;
0a7de745 2653 } else {
39037602 2654 optval = tcp_keepintvl / TCP_RETRANSHZ;
0a7de745 2655 }
39236c6e
A
2656 break;
2657 case TCP_KEEPCNT:
0a7de745 2658 if (tp->t_keepcnt > 0) {
39037602 2659 optval = tp->t_keepcnt;
0a7de745 2660 } else {
39037602 2661 optval = tcp_keepcnt;
0a7de745 2662 }
39037602
A
2663 break;
2664 case TCP_KEEPALIVE_OFFLOAD:
2665 optval = !!(inp->inp_flags2 & INP2_KEEPALIVE_OFFLOAD);
39236c6e 2666 break;
1c79356b
A
2667 case TCP_NOOPT:
2668 optval = tp->t_flags & TF_NOOPT;
2669 break;
2670 case TCP_NOPUSH:
2671 optval = tp->t_flags & TF_NOPUSH;
2672 break;
fe8ab488 2673 case TCP_ENABLE_ECN:
4bd07ac2
A
2674 optval = (tp->ecn_flags & TE_ECN_MODE_ENABLE) ? 1 : 0;
2675 break;
2676 case TCP_ECN_MODE:
0a7de745 2677 if (tp->ecn_flags & TE_ECN_MODE_ENABLE) {
4bd07ac2 2678 optval = ECN_MODE_ENABLE;
0a7de745 2679 } else if (tp->ecn_flags & TE_ECN_MODE_DISABLE) {
4bd07ac2 2680 optval = ECN_MODE_DISABLE;
0a7de745 2681 } else {
4bd07ac2 2682 optval = ECN_MODE_DEFAULT;
0a7de745 2683 }
fe8ab488 2684 break;
b0d623f7
A
2685 case TCP_CONNECTIONTIMEOUT:
2686 optval = tp->t_keepinit / TCP_RETRANSHZ;
2687 break;
6d2010ae
A
2688 case PERSIST_TIMEOUT:
2689 optval = tp->t_persist_timeout / TCP_RETRANSHZ;
2690 break;
2691 case TCP_RXT_CONNDROPTIME:
39236c6e 2692 optval = tp->t_rxt_conndroptime / TCP_RETRANSHZ;
6d2010ae
A
2693 break;
2694 case TCP_RXT_FINDROP:
2695 optval = tp->t_flagsext & TF_RXTFINDROP;
5ba3f43e 2696 break;
fe8ab488
A
2697 case TCP_NOTIMEWAIT:
2698 optval = (tp->t_flagsext & TF_NOTIMEWAIT) ? 1 : 0;
2699 break;
3e170ce0
A
2700 case TCP_FASTOPEN:
2701 if (tp->t_state != TCPS_LISTEN ||
2702 !(tcp_fastopen & TCP_FASTOPEN_SERVER)) {
2703 error = ENOTSUP;
2704 break;
2705 }
2706 optval = tfo_enabled(tp);
2707 break;
5ba3f43e 2708 case TCP_FASTOPEN_FORCE_HEURISTICS:
cb323159
A
2709 optval = 0;
2710 break;
2711 case TCP_FASTOPEN_FORCE_ENABLE:
2712 optval = (tp->t_flagsext & TF_FASTOPEN_FORCE_ENABLE) ? 1 : 0;
5ba3f43e 2713 break;
316670eb
A
2714 case TCP_MEASURE_SND_BW:
2715 optval = tp->t_flagsext & TF_MEASURESNDBW;
2716 break;
6d2010ae
A
2717 case TCP_INFO: {
2718 struct tcp_info ti;
2719
2720 tcp_fill_info(tp, &ti);
2721 error = sooptcopyout(sopt, &ti, sizeof(struct tcp_info));
2722 goto done;
316670eb
A
2723 /* NOT REACHED */
2724 }
3e170ce0
A
2725 case TCP_CONNECTION_INFO: {
2726 struct tcp_connection_info tci;
2727 tcp_connection_fill_info(tp, &tci);
2728 error = sooptcopyout(sopt, &tci,
2729 sizeof(struct tcp_connection_info));
2730 goto done;
2731 }
316670eb 2732 case TCP_MEASURE_BW_BURST: {
527f9951 2733 struct tcp_measure_bw_burst out = {};
316670eb 2734 if ((tp->t_flagsext & TF_MEASURESNDBW) == 0 ||
0a7de745 2735 tp->t_bwmeas == NULL) {
316670eb
A
2736 error = EINVAL;
2737 break;
2738 }
2739 out.min_burst_size = tp->t_bwmeas->bw_minsizepkts;
2740 out.max_burst_size = tp->t_bwmeas->bw_maxsizepkts;
2741 error = sooptcopyout(sopt, &out, sizeof(out));
2742 goto done;
2743 }
2744 case TCP_NOTSENT_LOWAT:
2745 if ((so->so_flags & SOF_NOTSENT_LOWAT) != 0) {
2746 optval = tp->t_notsent_lowat;
2747 } else {
2748 optval = 0;
2749 }
2750 break;
39236c6e
A
2751
2752 case TCP_ENABLE_MSGS:
2753 if (so->so_flags & SOF_ENABLE_MSGS) {
2754 optval = 1;
2755 } else {
2756 optval = 0;
2757 }
2758 break;
2759 case TCP_SENDMOREACKS:
0a7de745 2760 if (tp->t_flagsext & TF_NOSTRETCHACK) {
39236c6e 2761 optval = 1;
0a7de745 2762 } else {
39236c6e 2763 optval = 0;
0a7de745 2764 }
39236c6e 2765 break;
fe8ab488 2766 case TCP_DISABLE_BLACKHOLE_DETECTION:
0a7de745 2767 if (tp->t_flagsext & TF_NOBLACKHOLE_DETECTION) {
fe8ab488 2768 optval = 1;
0a7de745 2769 } else {
fe8ab488 2770 optval = 0;
0a7de745 2771 }
fe8ab488 2772 break;
316670eb 2773 case TCP_PEER_PID: {
0a7de745 2774 pid_t pid;
316670eb 2775 error = tcp_lookup_peer_pid_locked(so, &pid);
0a7de745 2776 if (error == 0) {
316670eb 2777 error = sooptcopyout(sopt, &pid, sizeof(pid));
0a7de745 2778 }
316670eb 2779 goto done;
6d2010ae 2780 }
39236c6e
A
2781 case TCP_ADAPTIVE_READ_TIMEOUT:
2782 optval = tp->t_adaptive_rtimo;
2783 break;
2784 case TCP_ADAPTIVE_WRITE_TIMEOUT:
2785 optval = tp->t_adaptive_wtimo;
2786 break;
2787 case SO_TRAFFIC_MGT_BACKGROUND:
39037602
A
2788 optval = (so->so_flags1 &
2789 SOF1_TRAFFIC_MGT_SO_BACKGROUND) ? 1 : 0;
39236c6e 2790 break;
39037602
A
2791 case TCP_NOTIFY_ACKNOWLEDGEMENT: {
2792 struct tcp_notify_ack_complete retid;
2793
0a7de745 2794 if (sopt->sopt_valsize != sizeof(retid)) {
39037602
A
2795 error = EINVAL;
2796 break;
2797 }
0a7de745 2798 bzero(&retid, sizeof(retid));
39037602 2799 tcp_get_notify_ack_count(tp, &retid);
0a7de745 2800 if (retid.notify_complete_count > 0) {
39037602 2801 tcp_get_notify_ack_ids(tp, &retid);
0a7de745 2802 }
39037602 2803
0a7de745 2804 error = sooptcopyout(sopt, &retid, sizeof(retid));
39037602
A
2805 goto done;
2806 }
5ba3f43e
A
2807 case TCP_RXT_MINIMUM_TIMEOUT:
2808 optval = tp->t_rxt_minimum_timeout / TCP_RETRANSHZ;
2809 break;
1c79356b
A
2810 default:
2811 error = ENOPROTOOPT;
2812 break;
2813 }
0a7de745 2814 if (error == 0) {
1c79356b 2815 error = sooptcopyout(sopt, &optval, sizeof optval);
0a7de745 2816 }
1c79356b
A
2817 break;
2818 }
6d2010ae 2819done:
0a7de745 2820 return error;
1c79356b
A
2821}
2822
2823/*
2824 * tcp_sendspace and tcp_recvspace are the default send and receive window
2825 * sizes, respectively. These are obsolescent (this information should
2826 * be set by the route).
2827 */
0a7de745
A
2828u_int32_t tcp_sendspace = 1448 * 256;
2829u_int32_t tcp_recvspace = 1448 * 384;
b0d623f7
A
2830
2831/* During attach, the size of socket buffer allocated is limited to
2832 * sb_max in sbreserve. Disallow setting the tcp send and recv space
2833 * to be more than sb_max because that will cause tcp_attach to fail
2834 * (see radar 5713060)
5ba3f43e 2835 */
b0d623f7
A
2836static int
2837sysctl_tcp_sospace(struct sysctl_oid *oidp, __unused void *arg1,
0a7de745 2838 int arg2, struct sysctl_req *req)
5ba3f43e
A
2839{
2840#pragma unused(arg2)
b0d623f7
A
2841 u_int32_t new_value = 0, *space_p = NULL;
2842 int changed = 0, error = 0;
0a7de745 2843 u_quad_t sb_effective_max = (sb_max / (MSIZE + MCLBYTES)) * MCLBYTES;
b0d623f7
A
2844
2845 switch (oidp->oid_number) {
0a7de745
A
2846 case TCPCTL_SENDSPACE:
2847 space_p = &tcp_sendspace;
2848 break;
2849 case TCPCTL_RECVSPACE:
2850 space_p = &tcp_recvspace;
2851 break;
2852 default:
2853 return EINVAL;
b0d623f7
A
2854 }
2855 error = sysctl_io_number(req, *space_p, sizeof(u_int32_t),
0a7de745 2856 &new_value, &changed);
b0d623f7
A
2857 if (changed) {
2858 if (new_value > 0 && new_value <= sb_effective_max) {
2859 *space_p = new_value;
5ba3f43e 2860 SYSCTL_SKMEM_UPDATE_AT_OFFSET(arg2, new_value);
b0d623f7
A
2861 } else {
2862 error = ERANGE;
2863 }
2864 }
2865 return error;
2866}
2867
5ba3f43e
A
2868#if SYSCTL_SKMEM
2869SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace,
0a7de745
A
2870 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_sendspace,
2871 offsetof(skmem_sysctl, tcp.sendspace), sysctl_tcp_sospace,
2872 "IU", "Maximum outgoing TCP datagram size");
5ba3f43e 2873SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace,
0a7de745
A
2874 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_recvspace,
2875 offsetof(skmem_sysctl, tcp.recvspace), sysctl_tcp_sospace,
2876 "IU", "Maximum incoming TCP datagram size");
5ba3f43e 2877#else /* SYSCTL_SKMEM */
6d2010ae 2878SYSCTL_PROC(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2879 &tcp_sendspace, 0, &sysctl_tcp_sospace, "IU", "Maximum outgoing TCP datagram size");
6d2010ae 2880SYSCTL_PROC(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
0a7de745 2881 &tcp_recvspace, 0, &sysctl_tcp_sospace, "IU", "Maximum incoming TCP datagram size");
5ba3f43e 2882#endif /* SYSCTL_SKMEM */
1c79356b
A
2883
2884/*
2885 * Attach TCP protocol to socket, allocating
2886 * internet protocol control block, tcp control block,
2887 * bufer space, and entering LISTEN state if to accept connections.
2d21ac55
A
2888 *
2889 * Returns: 0 Success
2890 * in_pcballoc:ENOBUFS
2891 * in_pcballoc:ENOMEM
2892 * in_pcballoc:??? [IPSEC specific]
2893 * soreserve:ENOBUFS
1c79356b
A
2894 */
2895static int
39037602 2896tcp_attach(struct socket *so, struct proc *p)
1c79356b 2897{
39037602 2898 struct tcpcb *tp;
1c79356b
A
2899 struct inpcb *inp;
2900 int error;
2901#if INET6
39236c6e 2902 int isipv6 = SOCK_CHECK_DOM(so, PF_INET6) != 0;
9bccf70c 2903#endif
1c79356b 2904
1c79356b 2905 error = in_pcballoc(so, &tcbinfo, p);
0a7de745
A
2906 if (error) {
2907 return error;
2908 }
55e303ae 2909
1c79356b 2910 inp = sotoinpcb(so);
55e303ae
A
2911
2912 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
316670eb 2913 error = soreserve(so, tcp_sendspace, tcp_recvspace);
0a7de745
A
2914 if (error) {
2915 return error;
2916 }
55e303ae 2917 }
3e170ce0
A
2918
2919 if (so->so_snd.sb_preconn_hiwat == 0) {
39037602 2920 soreserve_preconnect(so, 2048);
3e170ce0
A
2921 }
2922
0a7de745 2923 if ((so->so_rcv.sb_flags & SB_USRSIZE) == 0) {
316670eb 2924 so->so_rcv.sb_flags |= SB_AUTOSIZE;
0a7de745
A
2925 }
2926 if ((so->so_snd.sb_flags & SB_USRSIZE) == 0) {
316670eb 2927 so->so_snd.sb_flags |= SB_AUTOSIZE;
0a7de745 2928 }
55e303ae 2929
1c79356b
A
2930#if INET6
2931 if (isipv6) {
2932 inp->inp_vflag |= INP_IPV6;
0a7de745
A
2933 inp->in6p_hops = -1; /* use kernel default */
2934 } else
1c79356b
A
2935#endif /* INET6 */
2936 inp->inp_vflag |= INP_IPV4;
2937 tp = tcp_newtcpcb(inp);
39236c6e 2938 if (tp == NULL) {
0a7de745 2939 int nofd = so->so_state & SS_NOFDREF; /* XXX */
1c79356b 2940
0a7de745 2941 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
1c79356b 2942#if INET6
0a7de745 2943 if (isipv6) {
1c79356b 2944 in6_pcbdetach(inp);
0a7de745 2945 } else
1c79356b
A
2946#endif /* INET6 */
2947 in_pcbdetach(inp);
2948 so->so_state |= nofd;
0a7de745 2949 return ENOBUFS;
1c79356b 2950 }
0a7de745 2951 if (nstat_collect) {
6d2010ae 2952 nstat_tcp_new_pcb(inp);
0a7de745 2953 }
1c79356b 2954 tp->t_state = TCPS_CLOSED;
0a7de745 2955 return 0;
1c79356b
A
2956}
2957
2958/*
2959 * Initiate (or continue) disconnect.
2960 * If embryonic state, just send reset (once).
2961 * If in ``let data drain'' option and linger null, just drop.
2962 * Otherwise (hard), mark socket disconnecting and drop
2963 * current input data; switch states based on user close, and
2964 * send segment to peer (with FIN).
2965 */
2966static struct tcpcb *
39037602 2967tcp_disconnect(struct tcpcb *tp)
1c79356b
A
2968{
2969 struct socket *so = tp->t_inpcb->inp_socket;
2970
0a7de745 2971 if (so->so_rcv.sb_cc != 0 || tp->t_reassqlen != 0) {
5c9f4661 2972 return tcp_drop(tp, 0);
0a7de745 2973 }
5c9f4661 2974
0a7de745 2975 if (tp->t_state < TCPS_ESTABLISHED) {
1c79356b 2976 tp = tcp_close(tp);
0a7de745 2977 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
1c79356b 2978 tp = tcp_drop(tp, 0);
0a7de745 2979 } else {
1c79356b
A
2980 soisdisconnecting(so);
2981 sbflush(&so->so_rcv);
2982 tp = tcp_usrclosed(tp);
39236c6e
A
2983#if MPTCP
2984 /* A reset has been sent but socket exists, do not send FIN */
2985 if ((so->so_flags & SOF_MP_SUBFLOW) &&
0a7de745
A
2986 (tp) && (tp->t_mpflags & TMPF_RESET)) {
2987 return tp;
2988 }
39236c6e 2989#endif
0a7de745 2990 if (tp) {
1c79356b 2991 (void) tcp_output(tp);
0a7de745 2992 }
1c79356b 2993 }
0a7de745 2994 return tp;
1c79356b
A
2995}
2996
2997/*
2998 * User issued close, and wish to trail through shutdown states:
2999 * if never received SYN, just forget it. If got a SYN from peer,
3000 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
3001 * If already got a FIN from peer, then almost done; go to LAST_ACK
3002 * state. In all other cases, have already sent FIN to peer (e.g.
3003 * after PRU_SHUTDOWN), and just have to play tedious game waiting
3004 * for peer to send FIN or not respond to keep-alives, etc.
3005 * We can let the user exit from the close as soon as the FIN is acked.
3006 */
3007static struct tcpcb *
39037602 3008tcp_usrclosed(struct tcpcb *tp)
1c79356b 3009{
1c79356b 3010 switch (tp->t_state) {
1c79356b
A
3011 case TCPS_CLOSED:
3012 case TCPS_LISTEN:
a39ff7e2 3013 case TCPS_SYN_SENT:
1c79356b
A
3014 tp = tcp_close(tp);
3015 break;
3016
1c79356b
A
3017 case TCPS_SYN_RECEIVED:
3018 tp->t_flags |= TF_NEEDFIN;
3019 break;
3020
3021 case TCPS_ESTABLISHED:
5ba3f43e 3022 DTRACE_TCP4(state__change, void, NULL,
0a7de745
A
3023 struct inpcb *, tp->t_inpcb,
3024 struct tcpcb *, tp,
3025 int32_t, TCPS_FIN_WAIT_1);
1c79356b 3026 tp->t_state = TCPS_FIN_WAIT_1;
cb323159 3027 TCP_LOG_CONNECTION_SUMMARY(tp);
1c79356b
A
3028 break;
3029
3030 case TCPS_CLOSE_WAIT:
5ba3f43e 3031 DTRACE_TCP4(state__change, void, NULL,
0a7de745
A
3032 struct inpcb *, tp->t_inpcb,
3033 struct tcpcb *, tp,
3034 int32_t, TCPS_LAST_ACK);
1c79356b 3035 tp->t_state = TCPS_LAST_ACK;
cb323159 3036 TCP_LOG_CONNECTION_SUMMARY(tp);
1c79356b
A
3037 break;
3038 }
3039 if (tp && tp->t_state >= TCPS_FIN_WAIT_2) {
3040 soisdisconnected(tp->t_inpcb->inp_socket);
3041 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
0a7de745 3042 if (tp->t_state == TCPS_FIN_WAIT_2) {
5ba3f43e 3043 tp->t_timer[TCPT_2MSL] = OFFSET_FROM_START(tp,
0a7de745
A
3044 TCP_CONN_MAXIDLE(tp));
3045 }
1c79356b 3046 }
0a7de745 3047 return tp;
1c79356b
A
3048}
3049
2d21ac55
A
3050void
3051tcp_in_cksum_stats(u_int32_t len)
3052{
39236c6e
A
3053 tcpstat.tcps_rcv_swcsum++;
3054 tcpstat.tcps_rcv_swcsum_bytes += len;
2d21ac55
A
3055}
3056
3057void
3058tcp_out_cksum_stats(u_int32_t len)
3059{
39236c6e
A
3060 tcpstat.tcps_snd_swcsum++;
3061 tcpstat.tcps_snd_swcsum_bytes += len;
3062}
3063
3064#if INET6
3065void
3066tcp_in6_cksum_stats(u_int32_t len)
3067{
3068 tcpstat.tcps_rcv6_swcsum++;
3069 tcpstat.tcps_rcv6_swcsum_bytes += len;
2d21ac55 3070}
39236c6e
A
3071
3072void
3073tcp_out6_cksum_stats(u_int32_t len)
3074{
3075 tcpstat.tcps_snd6_swcsum++;
3076 tcpstat.tcps_snd6_swcsum_bytes += len;
3077}
cb323159 3078#endif /* INET6 */
39236c6e 3079
5ba3f43e 3080/*
39236c6e
A
3081 * When messages are enabled on a TCP socket, the message priority
3082 * is sent as a control message. This function will extract it.
3083 */
3084int
5ba3f43e 3085tcp_get_msg_priority(struct mbuf *control, uint32_t *msgpri)
39236c6e
A
3086{
3087 struct cmsghdr *cm;
cb323159 3088
0a7de745
A
3089 if (control == NULL) {
3090 return EINVAL;
3091 }
39236c6e 3092
0a7de745
A
3093 for (cm = M_FIRST_CMSGHDR(control);
3094 is_cmsg_valid(control, cm);
3095 cm = M_NXT_CMSGHDR(control, cm)) {
39236c6e 3096 if (cm->cmsg_level == SOL_SOCKET &&
0a7de745
A
3097 cm->cmsg_type == SCM_MSG_PRIORITY) {
3098 if (cm->cmsg_len != CMSG_LEN(sizeof(uint32_t))) {
3099 return EINVAL;
3100 }
3101 *msgpri = *(uint32_t *)(void *)CMSG_DATA(cm);
3102 if (*msgpri < MSG_PRI_MIN || *msgpri > MSG_PRI_MAX) {
3103 return EINVAL;
3104 }
39236c6e
A
3105 break;
3106 }
3107 }
0a7de745 3108 return 0;
39236c6e 3109}
cb323159
A
3110
3111int
3112tcp_get_mpkl_send_info(struct mbuf *control,
3113 struct so_mpkl_send_info *mpkl_send_info)
3114{
3115 struct cmsghdr *cm;
3116
3117 if (control == NULL || mpkl_send_info == NULL) {
3118 return EINVAL;
3119 }
3120
3121 for (cm = M_FIRST_CMSGHDR(control); cm;
3122 cm = M_NXT_CMSGHDR(control, cm)) {
3123 if (cm->cmsg_len < sizeof(struct cmsghdr) ||
3124 cm->cmsg_len > control->m_len) {
3125 return EINVAL;
3126 }
3127 if (cm->cmsg_level != SOL_SOCKET ||
3128 cm->cmsg_type != SCM_MPKL_SEND_INFO) {
3129 continue;
3130 }
3131 if (cm->cmsg_len != CMSG_LEN(sizeof(struct so_mpkl_send_info))) {
3132 return EINVAL;
3133 }
3134 memcpy(mpkl_send_info, CMSG_DATA(cm),
3135 sizeof(struct so_mpkl_send_info));
3136 return 0;
3137 }
3138 return ENOMSG;
3139}