2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 * Copyright (c) 1982, 1986, 1988, 1993
30 * The Regents of the University of California. All rights reserved.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed by the University of
43 * California, Berkeley and its contributors.
44 * 4. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
61 * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $
65 #include <sys/param.h>
66 #include <sys/systm.h>
67 #include <sys/kernel.h>
68 #include <sys/sysctl.h>
71 #include <sys/domain.h>
73 #include <sys/socket.h>
74 #include <sys/socketvar.h>
75 #include <sys/protosw.h>
78 #include <net/route.h>
79 #include <net/ntstat.h>
81 #include <netinet/in.h>
82 #include <netinet/in_systm.h>
84 #include <netinet/ip6.h>
86 #include <netinet/in_pcb.h>
88 #include <netinet6/in6_pcb.h>
90 #include <netinet/in_var.h>
91 #include <netinet/ip_var.h>
93 #include <netinet6/ip6_var.h>
95 #include <netinet/tcp.h>
96 #include <netinet/tcp_fsm.h>
97 #include <netinet/tcp_seq.h>
98 #include <netinet/tcp_timer.h>
99 #include <netinet/tcp_var.h>
100 #include <netinet/tcpip.h>
102 #include <netinet/tcp_debug.h>
106 #include <netinet6/ipsec.h>
109 void tcp_fill_info(struct tcpcb
*, struct tcp_info
*);
110 errno_t
tcp_fill_info_for_info_tuple(struct info_tuple
*, struct tcp_info
*);
112 int tcp_sysctl_info(struct sysctl_oid
*, void *, int , struct sysctl_req
*);
115 * TCP protocol interface to socket abstraction.
117 extern char *tcpstates
[]; /* XXX ??? */
119 static int tcp_attach(struct socket
*, struct proc
*);
120 static int tcp_connect(struct tcpcb
*, struct sockaddr
*, struct proc
*);
122 static int tcp6_connect(struct tcpcb
*, struct sockaddr
*, struct proc
*);
124 static struct tcpcb
*
125 tcp_disconnect(struct tcpcb
*);
126 static struct tcpcb
*
127 tcp_usrclosed(struct tcpcb
*);
129 static u_int32_t tcps_in_sw_cksum
;
130 SYSCTL_UINT(_net_inet_tcp
, OID_AUTO
, in_sw_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
131 &tcps_in_sw_cksum
, 0,
132 "Number of received packets checksummed in software");
134 static u_int64_t tcps_in_sw_cksum_bytes
;
135 SYSCTL_QUAD(_net_inet_tcp
, OID_AUTO
, in_sw_cksum_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
136 &tcps_in_sw_cksum_bytes
,
137 "Amount of received data checksummed in software");
139 static u_int32_t tcps_out_sw_cksum
;
140 SYSCTL_UINT(_net_inet_tcp
, OID_AUTO
, out_sw_cksum
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
141 &tcps_out_sw_cksum
, 0,
142 "Number of transmitted packets checksummed in software");
144 static u_int64_t tcps_out_sw_cksum_bytes
;
145 SYSCTL_QUAD(_net_inet_tcp
, OID_AUTO
, out_sw_cksum_bytes
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
146 &tcps_out_sw_cksum_bytes
,
147 "Amount of transmitted data checksummed in software");
149 extern uint32_t tcp_autorcvbuf_max
;
151 extern void tcp_sbrcv_trim(struct tcpcb
*tp
, struct sockbuf
*sb
);
154 #define TCPDEBUG0 int ostate = 0
155 #define TCPDEBUG1() ostate = tp ? tp->t_state : 0
156 #define TCPDEBUG2(req) if (tp && (so->so_options & SO_DEBUG)) \
157 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
161 #define TCPDEBUG2(req)
164 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, info
, CTLFLAG_RW
| CTLFLAG_LOCKED
| CTLFLAG_ANYBODY
,
165 0 , 0, tcp_sysctl_info
, "S", "TCP info per tuple");
168 * TCP attaches to socket via pru_attach(), reserving space,
169 * and an internet control block.
175 * tcp_attach:??? [IPSEC specific]
178 tcp_usr_attach(struct socket
*so
, __unused
int proto
, struct proc
*p
)
181 struct inpcb
*inp
= sotoinpcb(so
);
182 struct tcpcb
*tp
= 0;
191 error
= tcp_attach(so
, p
);
195 if ((so
->so_options
& SO_LINGER
) && so
->so_linger
== 0)
196 so
->so_linger
= TCP_LINGERTIME
* hz
;
199 TCPDEBUG2(PRU_ATTACH
);
204 * pru_detach() detaches the TCP protocol from the socket.
205 * If the protocol state is non-embryonic, then can't
206 * do this directly: have to initiate a pru_disconnect(),
207 * which may finish later; embryonic TCB's can just
211 tcp_usr_detach(struct socket
*so
)
214 struct inpcb
*inp
= sotoinpcb(so
);
218 if (inp
== 0 || (inp
->inp_state
== INPCB_STATE_DEAD
)) {
219 return EINVAL
; /* XXX */
221 lck_mtx_assert(&((struct inpcb
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
);
223 /* In case we got disconnected from the peer */
228 calculate_tcp_clock();
230 tp
= tcp_disconnect(tp
);
232 TCPDEBUG2(PRU_DETACH
);
236 #define COMMON_START() TCPDEBUG0; \
238 if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { \
241 tp = intotcpcb(inp); \
243 calculate_tcp_clock(); \
246 #define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out
250 * Give the socket an address.
253 * EINVAL Invalid argument [COMMON_START]
254 * EAFNOSUPPORT Address family not supported
255 * in_pcbbind:EADDRNOTAVAIL Address not available.
256 * in_pcbbind:EINVAL Invalid argument
257 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
258 * in_pcbbind:EACCES Permission denied
259 * in_pcbbind:EADDRINUSE Address in use
260 * in_pcbbind:EAGAIN Resource unavailable, try again
261 * in_pcbbind:EPERM Operation not permitted
264 tcp_usr_bind(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
267 struct inpcb
*inp
= sotoinpcb(so
);
269 struct sockaddr_in
*sinp
;
273 if (nam
->sa_family
!= 0 && nam
->sa_family
!= AF_INET
) {
274 error
= EAFNOSUPPORT
;
279 * Must check for multicast addresses and disallow binding
282 sinp
= (struct sockaddr_in
*)(void *)nam
;
283 if (sinp
->sin_family
== AF_INET
&&
284 IN_MULTICAST(ntohl(sinp
->sin_addr
.s_addr
))) {
285 error
= EAFNOSUPPORT
;
288 error
= in_pcbbind(inp
, nam
, p
);
291 COMMON_END(PRU_BIND
);
297 tcp6_usr_bind(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
300 struct inpcb
*inp
= sotoinpcb(so
);
302 struct sockaddr_in6
*sin6p
;
306 if (nam
->sa_family
!= 0 && nam
->sa_family
!= AF_INET6
) {
307 error
= EAFNOSUPPORT
;
312 * Must check for multicast addresses and disallow binding
315 sin6p
= (struct sockaddr_in6
*)(void *)nam
;
316 if (sin6p
->sin6_family
== AF_INET6
&&
317 IN6_IS_ADDR_MULTICAST(&sin6p
->sin6_addr
)) {
318 error
= EAFNOSUPPORT
;
321 inp
->inp_vflag
&= ~INP_IPV4
;
322 inp
->inp_vflag
|= INP_IPV6
;
323 if ((inp
->inp_flags
& IN6P_IPV6_V6ONLY
) == 0) {
324 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p
->sin6_addr
))
325 inp
->inp_vflag
|= INP_IPV4
;
326 else if (IN6_IS_ADDR_V4MAPPED(&sin6p
->sin6_addr
)) {
327 struct sockaddr_in sin
;
329 in6_sin6_2_sin(&sin
, sin6p
);
330 inp
->inp_vflag
|= INP_IPV4
;
331 inp
->inp_vflag
&= ~INP_IPV6
;
332 error
= in_pcbbind(inp
, (struct sockaddr
*)&sin
, p
);
336 error
= in6_pcbbind(inp
, nam
, p
);
339 COMMON_END(PRU_BIND
);
344 * Prepare to accept connections.
347 * EINVAL [COMMON_START]
348 * in_pcbbind:EADDRNOTAVAIL Address not available.
349 * in_pcbbind:EINVAL Invalid argument
350 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
351 * in_pcbbind:EACCES Permission denied
352 * in_pcbbind:EADDRINUSE Address in use
353 * in_pcbbind:EAGAIN Resource unavailable, try again
354 * in_pcbbind:EPERM Operation not permitted
357 tcp_usr_listen(struct socket
*so
, struct proc
*p
)
360 struct inpcb
*inp
= sotoinpcb(so
);
364 if (inp
->inp_lport
== 0)
365 error
= in_pcbbind(inp
, (struct sockaddr
*)0, p
);
367 tp
->t_state
= TCPS_LISTEN
;
368 COMMON_END(PRU_LISTEN
);
373 tcp6_usr_listen(struct socket
*so
, struct proc
*p
)
376 struct inpcb
*inp
= sotoinpcb(so
);
380 if (inp
->inp_lport
== 0) {
381 inp
->inp_vflag
&= ~INP_IPV4
;
382 if ((inp
->inp_flags
& IN6P_IPV6_V6ONLY
) == 0)
383 inp
->inp_vflag
|= INP_IPV4
;
384 error
= in6_pcbbind(inp
, (struct sockaddr
*)0, p
);
387 tp
->t_state
= TCPS_LISTEN
;
388 COMMON_END(PRU_LISTEN
);
393 * Initiate connection to peer.
394 * Create a template for use in transmissions on this connection.
395 * Enter SYN_SENT state, and mark socket as connecting.
396 * Start keep-alive timer, and seed output sequence space.
397 * Send initial segment on connection.
400 tcp_usr_connect(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
403 struct inpcb
*inp
= sotoinpcb(so
);
405 struct sockaddr_in
*sinp
;
410 else if (inp
->inp_state
== INPCB_STATE_DEAD
) {
412 error
= so
->so_error
;
421 calculate_tcp_clock();
423 if (nam
->sa_family
!= 0 && nam
->sa_family
!= AF_INET
) {
424 error
= EAFNOSUPPORT
;
428 * Must disallow TCP ``connections'' to multicast addresses.
430 sinp
= (struct sockaddr_in
*)(void *)nam
;
431 if (sinp
->sin_family
== AF_INET
432 && IN_MULTICAST(ntohl(sinp
->sin_addr
.s_addr
))) {
433 error
= EAFNOSUPPORT
;
438 if ((error
= tcp_connect(tp
, nam
, p
)) != 0)
440 error
= tcp_output(tp
);
441 COMMON_END(PRU_CONNECT
);
446 tcp6_usr_connect(struct socket
*so
, struct sockaddr
*nam
, struct proc
*p
)
449 struct inpcb
*inp
= sotoinpcb(so
);
451 struct sockaddr_in6
*sin6p
;
455 if (nam
->sa_family
!= 0 && nam
->sa_family
!= AF_INET6
) {
456 error
= EAFNOSUPPORT
;
461 * Must disallow TCP ``connections'' to multicast addresses.
463 sin6p
= (struct sockaddr_in6
*)(void *)nam
;
464 if (sin6p
->sin6_family
== AF_INET6
465 && IN6_IS_ADDR_MULTICAST(&sin6p
->sin6_addr
)) {
466 error
= EAFNOSUPPORT
;
470 if (IN6_IS_ADDR_V4MAPPED(&sin6p
->sin6_addr
)) {
471 struct sockaddr_in sin
;
473 if ((inp
->inp_flags
& IN6P_IPV6_V6ONLY
) != 0)
476 in6_sin6_2_sin(&sin
, sin6p
);
477 inp
->inp_vflag
|= INP_IPV4
;
478 inp
->inp_vflag
&= ~INP_IPV6
;
479 if ((error
= tcp_connect(tp
, (struct sockaddr
*)&sin
, p
)) != 0)
481 error
= tcp_output(tp
);
484 inp
->inp_vflag
&= ~INP_IPV4
;
485 inp
->inp_vflag
|= INP_IPV6
;
486 if ((error
= tcp6_connect(tp
, nam
, p
)) != 0)
488 error
= tcp_output(tp
);
491 COMMON_END(PRU_CONNECT
);
496 * Initiate disconnect from peer.
497 * If connection never passed embryonic stage, just drop;
498 * else if don't need to let data drain, then can just drop anyways,
499 * else have to begin TCP shutdown process: mark socket disconnecting,
500 * drain unread data, state switch to reflect user close, and
501 * send segment (e.g. FIN) to peer. Socket will be really disconnected
502 * when peer sends FIN and acks ours.
504 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
507 tcp_usr_disconnect(struct socket
*so
)
510 struct inpcb
*inp
= sotoinpcb(so
);
513 lck_mtx_assert(&((struct inpcb
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
);
515 /* In case we got disconnected from the peer */
518 tp
= tcp_disconnect(tp
);
519 COMMON_END(PRU_DISCONNECT
);
523 * Accept a connection. Essentially all the work is
524 * done at higher levels; just return the address
525 * of the peer, storing through addr.
528 tcp_usr_accept(struct socket
*so
, struct sockaddr
**nam
)
531 struct inpcb
*inp
= sotoinpcb(so
);
532 struct tcpcb
*tp
= NULL
;
535 in_setpeeraddr(so
, nam
);
537 if (so
->so_state
& SS_ISDISCONNECTED
) {
538 error
= ECONNABORTED
;
541 if (inp
== 0 || (inp
->inp_state
== INPCB_STATE_DEAD
)) {
547 calculate_tcp_clock();
549 COMMON_END(PRU_ACCEPT
);
554 tcp6_usr_accept(struct socket
*so
, struct sockaddr
**nam
)
557 struct inpcb
*inp
= sotoinpcb(so
);
558 struct tcpcb
*tp
= NULL
;
561 if (so
->so_state
& SS_ISDISCONNECTED
) {
562 error
= ECONNABORTED
;
565 if (inp
== 0 || (inp
->inp_state
== INPCB_STATE_DEAD
)) {
571 calculate_tcp_clock();
573 in6_mapped_peeraddr(so
, nam
);
574 COMMON_END(PRU_ACCEPT
);
579 * Mark the connection as being incapable of further output.
582 * EINVAL [COMMON_START]
583 * tcp_output:EADDRNOTAVAIL
585 * tcp_output:EMSGSIZE
586 * tcp_output:EHOSTUNREACH
587 * tcp_output:ENETUNREACH
588 * tcp_output:ENETDOWN
591 * tcp_output:EMSGSIZE
593 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
596 tcp_usr_shutdown(struct socket
*so
)
599 struct inpcb
*inp
= sotoinpcb(so
);
604 /* In case we got disconnected from the peer */
607 tp
= tcp_usrclosed(tp
);
609 error
= tcp_output(tp
);
610 COMMON_END(PRU_SHUTDOWN
);
614 * After a receive, possibly send window update to peer.
617 tcp_usr_rcvd(struct socket
*so
, __unused
int flags
)
620 struct inpcb
*inp
= sotoinpcb(so
);
624 /* In case we got disconnected from the peer */
627 tcp_sbrcv_trim(tp
, &so
->so_rcv
);
630 COMMON_END(PRU_RCVD
);
634 * Do a send by putting data in output queue and updating urgent
635 * marker if URG set. Possibly send more data. Unlike the other
636 * pru_*() routines, the mbuf chains are our responsibility. We
637 * must either enqueue them or free them. The other pru_* routines
638 * generally are caller-frees.
644 * tcp_connect:EADDRINUSE Address in use
645 * tcp_connect:EADDRNOTAVAIL Address not available.
646 * tcp_connect:EINVAL Invalid argument
647 * tcp_connect:EAFNOSUPPORT Address family not supported [notdef]
648 * tcp_connect:EACCES Permission denied
649 * tcp_connect:EAGAIN Resource unavailable, try again
650 * tcp_connect:EPERM Operation not permitted
651 * tcp_output:EADDRNOTAVAIL
653 * tcp_output:EMSGSIZE
654 * tcp_output:EHOSTUNREACH
655 * tcp_output:ENETUNREACH
656 * tcp_output:ENETDOWN
659 * tcp_output:EMSGSIZE
661 * tcp_output:??? [ignorable: mostly IPSEC/firewall/DLIL]
662 * tcp6_connect:??? [IPV6 only]
665 tcp_usr_send(struct socket
*so
, int flags
, struct mbuf
*m
,
666 struct sockaddr
*nam
, struct mbuf
*control
, struct proc
*p
)
669 struct inpcb
*inp
= sotoinpcb(so
);
676 if (inp
== NULL
|| inp
->inp_state
== INPCB_STATE_DEAD
) {
678 * OOPS! we lost a race, the TCP session got reset after
679 * we checked SS_CANTSENDMORE, eg: while doing uiomove or a
680 * network interrupt in the non-splnet() section of sosend().
686 error
= ECONNRESET
; /* XXX EPIPE? */
692 isipv6
= nam
&& nam
->sa_family
== AF_INET6
;
697 calculate_tcp_clock();
700 /* TCP doesn't do control messages (rights, creds, etc) */
701 if (control
->m_len
) {
708 m_freem(control
); /* empty control, just free it */
710 if(!(flags
& PRUS_OOB
)) {
711 sbappendstream(&so
->so_snd
, m
);
712 if (nam
&& tp
->t_state
< TCPS_SYN_SENT
) {
714 * Do implied connect if not yet connected,
715 * initialize window to default value, and
716 * initialize maxseg/maxopd using peer's cached
721 error
= tcp6_connect(tp
, nam
, p
);
724 error
= tcp_connect(tp
, nam
, p
);
727 tp
->snd_wnd
= TTCP_CLIENT_SND_WND
;
728 tcp_mss(tp
, -1, IFSCOPE_NONE
);
731 if (flags
& PRUS_EOF
) {
733 * Close the send side of the connection after
737 tp
= tcp_usrclosed(tp
);
740 if (flags
& PRUS_MORETOCOME
)
741 tp
->t_flags
|= TF_MORETOCOME
;
742 error
= tcp_output(tp
);
743 if (flags
& PRUS_MORETOCOME
)
744 tp
->t_flags
&= ~TF_MORETOCOME
;
747 if (sbspace(&so
->so_snd
) == 0) {
748 /* if no space is left in sockbuf,
749 * do not try to squeeze in OOB traffic */
755 * According to RFC961 (Assigned Protocols),
756 * the urgent pointer points to the last octet
757 * of urgent data. We continue, however,
758 * to consider it to indicate the first octet
759 * of data past the urgent section.
760 * Otherwise, snd_up should be one lower.
762 sbappendstream(&so
->so_snd
, m
);
763 if (nam
&& tp
->t_state
< TCPS_SYN_SENT
) {
765 * Do implied connect if not yet connected,
766 * initialize window to default value, and
767 * initialize maxseg/maxopd using peer's cached
772 error
= tcp6_connect(tp
, nam
, p
);
775 error
= tcp_connect(tp
, nam
, p
);
778 tp
->snd_wnd
= TTCP_CLIENT_SND_WND
;
779 tcp_mss(tp
, -1, IFSCOPE_NONE
);
781 tp
->snd_up
= tp
->snd_una
+ so
->so_snd
.sb_cc
;
783 error
= tcp_output(tp
);
786 COMMON_END((flags
& PRUS_OOB
) ? PRU_SENDOOB
:
787 ((flags
& PRUS_EOF
) ? PRU_SEND_EOF
: PRU_SEND
));
794 tcp_usr_abort(struct socket
*so
)
797 struct inpcb
*inp
= sotoinpcb(so
);
801 /* In case we got disconnected from the peer */
804 tp
= tcp_drop(tp
, ECONNABORTED
);
806 COMMON_END(PRU_ABORT
);
810 * Receive out-of-band data.
813 * EINVAL [COMMON_START]
818 tcp_usr_rcvoob(struct socket
*so
, struct mbuf
*m
, int flags
)
821 struct inpcb
*inp
= sotoinpcb(so
);
825 if ((so
->so_oobmark
== 0 &&
826 (so
->so_state
& SS_RCVATMARK
) == 0) ||
827 so
->so_options
& SO_OOBINLINE
||
828 tp
->t_oobflags
& TCPOOB_HADDATA
) {
832 if ((tp
->t_oobflags
& TCPOOB_HAVEDATA
) == 0) {
837 *mtod(m
, caddr_t
) = tp
->t_iobc
;
838 if ((flags
& MSG_PEEK
) == 0)
839 tp
->t_oobflags
^= (TCPOOB_HAVEDATA
| TCPOOB_HADDATA
);
840 COMMON_END(PRU_RCVOOB
);
843 /* xxx - should be const */
844 struct pr_usrreqs tcp_usrreqs
= {
845 tcp_usr_abort
, tcp_usr_accept
, tcp_usr_attach
, tcp_usr_bind
,
846 tcp_usr_connect
, pru_connect2_notsupp
, in_control
, tcp_usr_detach
,
847 tcp_usr_disconnect
, tcp_usr_listen
, in_setpeeraddr
, tcp_usr_rcvd
,
848 tcp_usr_rcvoob
, tcp_usr_send
, pru_sense_null
, tcp_usr_shutdown
,
849 in_setsockaddr
, sosend
, soreceive
, pru_sopoll_notsupp
853 struct pr_usrreqs tcp6_usrreqs
= {
854 tcp_usr_abort
, tcp6_usr_accept
, tcp_usr_attach
, tcp6_usr_bind
,
855 tcp6_usr_connect
, pru_connect2_notsupp
, in6_control
, tcp_usr_detach
,
856 tcp_usr_disconnect
, tcp6_usr_listen
, in6_mapped_peeraddr
, tcp_usr_rcvd
,
857 tcp_usr_rcvoob
, tcp_usr_send
, pru_sense_null
, tcp_usr_shutdown
,
858 in6_mapped_sockaddr
, sosend
, soreceive
, pru_sopoll_notsupp
863 * Common subroutine to open a TCP connection to remote host specified
864 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
865 * port number if needed. Call in_pcbladdr to do the routing and to choose
866 * a local host address (interface). If there is an existing incarnation
867 * of the same connection in TIME-WAIT state and if the remote host was
868 * sending CC options and if the connection duration was < MSL, then
869 * truncate the previous TIME-WAIT state and proceed.
870 * Initialize connection parameters and enter SYN-SENT state.
875 * in_pcbbind:EADDRNOTAVAIL Address not available.
876 * in_pcbbind:EINVAL Invalid argument
877 * in_pcbbind:EAFNOSUPPORT Address family not supported [notdef]
878 * in_pcbbind:EACCES Permission denied
879 * in_pcbbind:EADDRINUSE Address in use
880 * in_pcbbind:EAGAIN Resource unavailable, try again
881 * in_pcbbind:EPERM Operation not permitted
882 * in_pcbladdr:EINVAL Invalid argument
883 * in_pcbladdr:EAFNOSUPPORT Address family not supported
884 * in_pcbladdr:EADDRNOTAVAIL Address not available
887 tcp_connect(tp
, nam
, p
)
888 register struct tcpcb
*tp
;
889 struct sockaddr
*nam
;
892 struct inpcb
*inp
= tp
->t_inpcb
, *oinp
;
893 struct socket
*so
= inp
->inp_socket
;
895 struct sockaddr_in
*sin
= (struct sockaddr_in
*)(void *)nam
;
896 struct sockaddr_in ifaddr
;
897 struct rmxp_tao
*taop
;
898 struct rmxp_tao tao_noncached
;
900 struct ifnet
*outif
= NULL
;
902 if (inp
->inp_lport
== 0) {
903 error
= in_pcbbind(inp
, (struct sockaddr
*)0, p
);
909 * Cannot simply call in_pcbconnect, because there might be an
910 * earlier incarnation of this same connection still in
911 * TIME_WAIT state, creating an ADDRINUSE error.
913 error
= in_pcbladdr(inp
, nam
, &ifaddr
, &outif
);
917 tcp_unlock(inp
->inp_socket
, 0, 0);
918 oinp
= in_pcblookup_hash(inp
->inp_pcbinfo
,
919 sin
->sin_addr
, sin
->sin_port
,
920 inp
->inp_laddr
.s_addr
!= INADDR_ANY
? inp
->inp_laddr
922 inp
->inp_lport
, 0, NULL
);
924 tcp_lock(inp
->inp_socket
, 0, 0);
926 if (oinp
!= inp
) /* 4143933: avoid deadlock if inp == oinp */
927 tcp_lock(oinp
->inp_socket
, 1, 0);
928 if (in_pcb_checkstate(oinp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
930 tcp_unlock(oinp
->inp_socket
, 1, 0);
934 if (oinp
!= inp
&& (otp
= intotcpcb(oinp
)) != NULL
&&
935 otp
->t_state
== TCPS_TIME_WAIT
&&
936 ((int)(tcp_now
- otp
->t_starttime
)) < tcp_msl
&&
937 (otp
->t_flags
& TF_RCVD_CC
))
938 otp
= tcp_close(otp
);
940 printf("tcp_connect: inp=%p err=EADDRINUSE\n", inp
);
942 tcp_unlock(oinp
->inp_socket
, 1, 0);
946 tcp_unlock(oinp
->inp_socket
, 1, 0);
949 if ((inp
->inp_laddr
.s_addr
== INADDR_ANY
? ifaddr
.sin_addr
.s_addr
:
950 inp
->inp_laddr
.s_addr
) == sin
->sin_addr
.s_addr
&&
951 inp
->inp_lport
== sin
->sin_port
)
953 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->mtx
)) {
954 /*lock inversion issue, mostly with udp multicast packets */
955 socket_unlock(inp
->inp_socket
, 0);
956 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->mtx
);
957 socket_lock(inp
->inp_socket
, 0);
959 if (inp
->inp_laddr
.s_addr
== INADDR_ANY
) {
960 inp
->inp_laddr
= ifaddr
.sin_addr
;
961 inp
->inp_last_outifp
= outif
;
963 inp
->inp_faddr
= sin
->sin_addr
;
964 inp
->inp_fport
= sin
->sin_port
;
966 lck_rw_done(inp
->inp_pcbinfo
->mtx
);
968 if (inp
->inp_flowhash
== 0)
969 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
971 tcp_set_max_rwinscale(tp
, so
);
974 tcpstat
.tcps_connattempt
++;
975 tp
->t_state
= TCPS_SYN_SENT
;
976 tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START(tp
,
977 tp
->t_keepinit
? tp
->t_keepinit
: tcp_keepinit
);
978 tp
->iss
= tcp_new_isn(tp
);
981 nstat_route_connect_attempt(inp
->inp_route
.ro_rt
);
984 * Generate a CC value for this connection and
985 * check whether CC or CCnew should be used.
987 if ((taop
= tcp_gettaocache(tp
->t_inpcb
)) == NULL
) {
988 taop
= &tao_noncached
;
989 bzero(taop
, sizeof(*taop
));
992 tp
->cc_send
= CC_INC(tcp_ccgen
);
993 if (taop
->tao_ccsent
!= 0 &&
994 CC_GEQ(tp
->cc_send
, taop
->tao_ccsent
)) {
995 taop
->tao_ccsent
= tp
->cc_send
;
997 taop
->tao_ccsent
= 0;
998 tp
->t_flags
|= TF_SENDCCNEW
;
1006 tcp6_connect(tp
, nam
, p
)
1007 register struct tcpcb
*tp
;
1008 struct sockaddr
*nam
;
1011 struct inpcb
*inp
= tp
->t_inpcb
, *oinp
;
1012 struct socket
*so
= inp
->inp_socket
;
1014 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)(void *)nam
;
1015 struct in6_addr addr6
;
1016 struct rmxp_tao
*taop
;
1017 struct rmxp_tao tao_noncached
;
1019 struct ifnet
*outif
= NULL
;
1021 if (inp
->inp_lport
== 0) {
1022 error
= in6_pcbbind(inp
, (struct sockaddr
*)0, p
);
1028 * Cannot simply call in_pcbconnect, because there might be an
1029 * earlier incarnation of this same connection still in
1030 * TIME_WAIT state, creating an ADDRINUSE error.
1032 * in6_pcbladdr() might return an ifp with its reference held
1033 * even in the error case, so make sure that it's released
1034 * whenever it's non-NULL.
1036 error
= in6_pcbladdr(inp
, nam
, &addr6
, &outif
);
1039 tcp_unlock(inp
->inp_socket
, 0, 0);
1040 oinp
= in6_pcblookup_hash(inp
->inp_pcbinfo
,
1041 &sin6
->sin6_addr
, sin6
->sin6_port
,
1042 IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
)
1045 inp
->inp_lport
, 0, NULL
);
1046 tcp_lock(inp
->inp_socket
, 0, 0);
1048 if (oinp
!= inp
&& (otp
= intotcpcb(oinp
)) != NULL
&&
1049 otp
->t_state
== TCPS_TIME_WAIT
&&
1050 ((int)(tcp_now
- otp
->t_starttime
)) < tcp_msl
&&
1051 (otp
->t_flags
& TF_RCVD_CC
)) {
1052 otp
= tcp_close(otp
);
1058 if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->mtx
)) {
1059 /*lock inversion issue, mostly with udp multicast packets */
1060 socket_unlock(inp
->inp_socket
, 0);
1061 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->mtx
);
1062 socket_lock(inp
->inp_socket
, 0);
1064 if (IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
)) {
1065 inp
->in6p_laddr
= addr6
;
1066 inp
->in6p_last_outifp
= outif
; /* no reference needed */
1068 inp
->in6p_faddr
= sin6
->sin6_addr
;
1069 inp
->inp_fport
= sin6
->sin6_port
;
1070 if ((sin6
->sin6_flowinfo
& IPV6_FLOWINFO_MASK
) != 0)
1071 inp
->in6p_flowinfo
= sin6
->sin6_flowinfo
;
1073 lck_rw_done(inp
->inp_pcbinfo
->mtx
);
1075 if (inp
->inp_flowhash
== 0)
1076 inp
->inp_flowhash
= inp_calc_flowhash(inp
);
1078 tcp_set_max_rwinscale(tp
, so
);
1081 tcpstat
.tcps_connattempt
++;
1082 tp
->t_state
= TCPS_SYN_SENT
;
1083 tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START(tp
,
1084 tp
->t_keepinit
? tp
->t_keepinit
: tcp_keepinit
);
1085 tp
->iss
= tcp_new_isn(tp
);
1086 tcp_sendseqinit(tp
);
1088 nstat_route_connect_attempt(inp
->inp_route
.ro_rt
);
1091 * Generate a CC value for this connection and
1092 * check whether CC or CCnew should be used.
1094 if ((taop
= tcp_gettaocache(tp
->t_inpcb
)) == NULL
) {
1095 taop
= &tao_noncached
;
1096 bzero(taop
, sizeof(*taop
));
1099 tp
->cc_send
= CC_INC(tcp_ccgen
);
1100 if (taop
->tao_ccsent
!= 0 &&
1101 CC_GEQ(tp
->cc_send
, taop
->tao_ccsent
)) {
1102 taop
->tao_ccsent
= tp
->cc_send
;
1104 taop
->tao_ccsent
= 0;
1105 tp
->t_flags
|= TF_SENDCCNEW
;
1110 ifnet_release(outif
);
1117 * Export TCP internal state information via a struct tcp_info
1119 __private_extern__
void
1120 tcp_fill_info(struct tcpcb
*tp
, struct tcp_info
*ti
)
1122 struct inpcb
*inp
= tp
->t_inpcb
;
1124 bzero(ti
, sizeof(*ti
));
1126 ti
->tcpi_state
= tp
->t_state
;
1128 if (tp
->t_state
> TCPS_LISTEN
) {
1129 if ((tp
->t_flags
& TF_REQ_TSTMP
) && (tp
->t_flags
& TF_RCVD_TSTMP
))
1130 ti
->tcpi_options
|= TCPI_OPT_TIMESTAMPS
;
1131 if (tp
->t_flags
& TF_SACK_PERMIT
)
1132 ti
->tcpi_options
|= TCPI_OPT_SACK
;
1133 if ((tp
->t_flags
& TF_REQ_SCALE
) && (tp
->t_flags
& TF_RCVD_SCALE
)) {
1134 ti
->tcpi_options
|= TCPI_OPT_WSCALE
;
1135 ti
->tcpi_snd_wscale
= tp
->snd_scale
;
1136 ti
->tcpi_rcv_wscale
= tp
->rcv_scale
;
1139 /* Are we in retranmission episode */
1140 if (tp
->snd_max
!= tp
->snd_nxt
)
1141 ti
->tcpi_flags
|= TCPI_FLAG_LOSSRECOVERY
;
1143 ti
->tcpi_flags
&= ~TCPI_FLAG_LOSSRECOVERY
;
1145 ti
->tcpi_rto
= tp
->t_timer
[TCPT_REXMT
] ? tp
->t_rxtcur
: 0;
1146 ti
->tcpi_snd_mss
= tp
->t_maxseg
;
1147 ti
->tcpi_rcv_mss
= tp
->t_maxseg
;
1149 ti
->tcpi_rttcur
= tp
->t_rttcur
;
1150 ti
->tcpi_srtt
= tp
->t_srtt
>> TCP_RTT_SHIFT
;
1151 ti
->tcpi_rttvar
= tp
->t_rttvar
>> TCP_RTTVAR_SHIFT
;
1153 ti
->tcpi_snd_ssthresh
= tp
->snd_ssthresh
;
1154 ti
->tcpi_snd_cwnd
= tp
->snd_cwnd
;
1155 ti
->tcpi_snd_sbbytes
= tp
->t_inpcb
->inp_socket
->so_snd
.sb_cc
;
1157 ti
->tcpi_rcv_space
= tp
->rcv_wnd
;
1159 ti
->tcpi_snd_wnd
= tp
->snd_wnd
;
1160 ti
->tcpi_snd_nxt
= tp
->snd_nxt
;
1161 ti
->tcpi_rcv_nxt
= tp
->rcv_nxt
;
1163 /* convert bytes/msec to bits/sec */
1164 if ((tp
->t_flagsext
& TF_MEASURESNDBW
) != 0 &&
1165 tp
->t_bwmeas
!= NULL
) {
1166 ti
->tcpi_snd_bw
= (tp
->t_bwmeas
->bw_sndbw
* 8000);
1169 ti
->tcpi_last_outif
= (tp
->t_inpcb
->inp_last_outifp
== NULL
) ? 0 :
1170 tp
->t_inpcb
->inp_last_outifp
->if_index
;
1172 //atomic_get_64(ti->tcpi_txbytes, &inp->inp_stat->txbytes);
1173 ti
->tcpi_txbytes
= inp
->inp_stat
->txbytes
;
1174 ti
->tcpi_txretransmitbytes
= tp
->t_stat
.txretransmitbytes
;
1175 ti
->tcpi_txunacked
= tp
->snd_max
- tp
->snd_una
;
1177 //atomic_get_64(ti->tcpi_rxbytes, &inp->inp_stat->rxbytes);
1178 ti
->tcpi_rxbytes
= inp
->inp_stat
->rxbytes
;
1179 ti
->tcpi_rxduplicatebytes
= tp
->t_stat
.rxduplicatebytes
;
1183 __private_extern__ errno_t
1184 tcp_fill_info_for_info_tuple(struct info_tuple
*itpl
, struct tcp_info
*ti
)
1186 struct inpcbinfo
*pcbinfo
= NULL
;
1187 struct inpcb
*inp
= NULL
;
1191 if (itpl
->itpl_proto
== IPPROTO_TCP
)
1196 if (itpl
->itpl_local_sa
.sa_family
== AF_INET
&&
1197 itpl
->itpl_remote_sa
.sa_family
== AF_INET
) {
1198 inp
= in_pcblookup_hash(pcbinfo
,
1199 itpl
->itpl_remote_sin
.sin_addr
,
1200 itpl
->itpl_remote_sin
.sin_port
,
1201 itpl
->itpl_local_sin
.sin_addr
,
1202 itpl
->itpl_local_sin
.sin_port
,
1204 } else if (itpl
->itpl_local_sa
.sa_family
== AF_INET6
&&
1205 itpl
->itpl_remote_sa
.sa_family
== AF_INET6
) {
1206 struct in6_addr ina6_local
;
1207 struct in6_addr ina6_remote
;
1209 ina6_local
= itpl
->itpl_local_sin6
.sin6_addr
;
1210 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_local
) && itpl
->itpl_local_sin6
.sin6_scope_id
)
1211 ina6_local
.s6_addr16
[1] = htons(itpl
->itpl_local_sin6
.sin6_scope_id
);
1213 ina6_remote
= itpl
->itpl_remote_sin6
.sin6_addr
;
1214 if (IN6_IS_SCOPE_LINKLOCAL(&ina6_remote
) && itpl
->itpl_remote_sin6
.sin6_scope_id
)
1215 ina6_remote
.s6_addr16
[1] = htons(itpl
->itpl_remote_sin6
.sin6_scope_id
);
1217 inp
= in6_pcblookup_hash(pcbinfo
,
1219 itpl
->itpl_remote_sin6
.sin6_port
,
1221 itpl
->itpl_local_sin6
.sin6_port
,
1225 if (inp
== NULL
|| (so
= inp
->inp_socket
) == NULL
)
1229 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) {
1230 socket_unlock(so
, 0);
1233 tp
= intotcpcb(inp
);
1235 tcp_fill_info(tp
, ti
);
1236 socket_unlock(so
, 0);
1242 __private_extern__
int
1243 tcp_sysctl_info(__unused
struct sysctl_oid
*oidp
, __unused
void *arg1
, __unused
int arg2
, struct sysctl_req
*req
)
1247 struct info_tuple itpl
;
1249 if (req
->newptr
== USER_ADDR_NULL
) {
1252 if (req
->newlen
< sizeof(struct info_tuple
)) {
1255 error
= SYSCTL_IN(req
, &itpl
, sizeof(struct info_tuple
));
1259 error
= tcp_fill_info_for_info_tuple(&itpl
, &ti
);
1263 error
= SYSCTL_OUT(req
, &ti
, sizeof(struct tcp_info
));
1272 tcp_lookup_peer_pid_locked(struct socket
*so
, pid_t
*out_pid
)
1274 int error
= EHOSTUNREACH
;
1276 if ((so
->so_state
& SS_ISCONNECTED
) == 0) return ENOTCONN
;
1278 struct inpcb
*inp
= (struct inpcb
*)so
->so_pcb
;
1279 uint16_t lport
= inp
->inp_lport
;
1280 uint16_t fport
= inp
->inp_fport
;
1281 struct inpcb
*finp
= NULL
;
1283 if (inp
->inp_vflag
& INP_IPV6
) {
1284 struct in6_addr laddr6
= inp
->in6p_laddr
;
1285 struct in6_addr faddr6
= inp
->in6p_faddr
;
1286 socket_unlock(so
, 0);
1287 finp
= in6_pcblookup_hash(&tcbinfo
, &laddr6
, lport
, &faddr6
, fport
, 0, NULL
);
1289 } else if (inp
->inp_vflag
& INP_IPV4
) {
1290 struct in_addr laddr4
= inp
->inp_laddr
;
1291 struct in_addr faddr4
= inp
->inp_faddr
;
1292 socket_unlock(so
, 0);
1293 finp
= in_pcblookup_hash(&tcbinfo
, laddr4
, lport
, faddr4
, fport
, 0, NULL
);
1298 *out_pid
= finp
->inp_socket
->last_pid
;
1300 in_pcb_checkstate(finp
, WNT_RELEASE
, 0);
1307 * The new sockopt interface makes it possible for us to block in the
1308 * copyin/out step (if we take a page fault). Taking a page fault at
1309 * splnet() is probably a Bad Thing. (Since sockets and pcbs both now
1310 * use TSM, there probably isn't any need for this function to run at
1311 * splnet() any more. This needs more examination.)
1314 tcp_ctloutput(so
, sopt
)
1316 struct sockopt
*sopt
;
1318 int error
, opt
, optval
;
1323 inp
= sotoinpcb(so
);
1325 return (ECONNRESET
);
1327 /* Allow <SOL_SOCKET,SO_FLUSH> at this level */
1328 if (sopt
->sopt_level
!= IPPROTO_TCP
&&
1329 !(sopt
->sopt_level
== SOL_SOCKET
&& sopt
->sopt_name
== SO_FLUSH
)) {
1331 if (INP_CHECK_SOCKAF(so
, AF_INET6
))
1332 error
= ip6_ctloutput(so
, sopt
);
1335 error
= ip_ctloutput(so
, sopt
);
1338 tp
= intotcpcb(inp
);
1340 return (ECONNRESET
);
1343 calculate_tcp_clock();
1345 switch (sopt
->sopt_dir
) {
1347 switch (sopt
->sopt_name
) {
1351 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1356 switch (sopt
->sopt_name
) {
1367 opt
= 0; /* dead code to fool gcc */
1374 tp
->t_flags
&= ~opt
;
1376 case TCP_RXT_FINDROP
:
1377 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1381 opt
= TF_RXTFINDROP
;
1383 tp
->t_flagsext
|= opt
;
1385 tp
->t_flagsext
&= ~opt
;
1387 case TCP_MEASURE_SND_BW
:
1388 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1392 opt
= TF_MEASURESNDBW
;
1394 if (tp
->t_bwmeas
== NULL
) {
1395 tp
->t_bwmeas
= tcp_bwmeas_alloc(tp
);
1396 if (tp
->t_bwmeas
== NULL
) {
1401 tp
->t_flagsext
|= opt
;
1403 tp
->t_flagsext
&= ~opt
;
1404 /* Reset snd bw measurement state */
1405 tp
->t_flagsext
&= ~(TF_BWMEAS_INPROGRESS
);
1406 if (tp
->t_bwmeas
!= NULL
) {
1407 tcp_bwmeas_free(tp
);
1411 case TCP_MEASURE_BW_BURST
: {
1412 struct tcp_measure_bw_burst in
;
1413 uint32_t minpkts
, maxpkts
;
1414 bzero(&in
, sizeof(in
));
1416 error
= sooptcopyin(sopt
, &in
, sizeof(in
),
1420 if ((tp
->t_flagsext
& TF_MEASURESNDBW
) == 0 ||
1421 tp
->t_bwmeas
== NULL
) {
1425 minpkts
= (in
.min_burst_size
!= 0) ? in
.min_burst_size
:
1426 tp
->t_bwmeas
->bw_minsizepkts
;
1427 maxpkts
= (in
.max_burst_size
!= 0) ? in
.max_burst_size
:
1428 tp
->t_bwmeas
->bw_maxsizepkts
;
1429 if (minpkts
> maxpkts
) {
1433 tp
->t_bwmeas
->bw_minsizepkts
= minpkts
;
1434 tp
->t_bwmeas
->bw_maxsizepkts
= maxpkts
;
1435 tp
->t_bwmeas
->bw_minsize
= (minpkts
* tp
->t_maxseg
);
1436 tp
->t_bwmeas
->bw_maxsize
= (maxpkts
* tp
->t_maxseg
);
1440 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1445 if (optval
> 0 && optval
<= tp
->t_maxseg
&&
1446 optval
+ 40 >= tcp_minmss
)
1447 tp
->t_maxseg
= optval
;
1453 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1460 tp
->t_keepidle
= optval
* TCP_RETRANSHZ
;
1461 tp
->t_timer
[TCPT_KEEP
] = OFFSET_FROM_START(tp
,
1462 TCP_KEEPIDLE(tp
)); /* reset the timer to new value */
1463 tcp_check_timer_state(tp
);
1467 case TCP_CONNECTIONTIMEOUT
:
1468 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1475 tp
->t_keepinit
= optval
* TCP_RETRANSHZ
;
1478 case PERSIST_TIMEOUT
:
1479 error
= sooptcopyin(sopt
, &optval
, sizeof optval
,
1486 tp
->t_persist_timeout
= optval
* TCP_RETRANSHZ
;
1488 case TCP_RXT_CONNDROPTIME
:
1489 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1496 tp
->rxt_conndroptime
= optval
* TCP_RETRANSHZ
;
1498 case TCP_NOTSENT_LOWAT
:
1499 error
= sooptcopyin(sopt
, &optval
, sizeof(optval
),
1508 so
->so_flags
&= ~(SOF_NOTSENT_LOWAT
);
1509 tp
->t_notsent_lowat
= 0;
1511 so
->so_flags
|= SOF_NOTSENT_LOWAT
;
1512 tp
->t_notsent_lowat
= optval
;
1518 if ((error
= sooptcopyin(sopt
, &optval
, sizeof (optval
),
1519 sizeof (optval
))) != 0)
1522 error
= inp_flush(inp
, optval
);
1526 error
= ENOPROTOOPT
;
1532 switch (sopt
->sopt_name
) {
1534 optval
= tp
->t_flags
& TF_NODELAY
;
1537 optval
= tp
->t_maxseg
;
1540 optval
= tp
->t_keepidle
/ TCP_RETRANSHZ
;
1543 optval
= tp
->t_flags
& TF_NOOPT
;
1546 optval
= tp
->t_flags
& TF_NOPUSH
;
1548 case TCP_CONNECTIONTIMEOUT
:
1549 optval
= tp
->t_keepinit
/ TCP_RETRANSHZ
;
1551 case PERSIST_TIMEOUT
:
1552 optval
= tp
->t_persist_timeout
/ TCP_RETRANSHZ
;
1554 case TCP_RXT_CONNDROPTIME
:
1555 optval
= tp
->rxt_conndroptime
/ TCP_RETRANSHZ
;
1557 case TCP_RXT_FINDROP
:
1558 optval
= tp
->t_flagsext
& TF_RXTFINDROP
;
1560 case TCP_MEASURE_SND_BW
:
1561 optval
= tp
->t_flagsext
& TF_MEASURESNDBW
;
1566 tcp_fill_info(tp
, &ti
);
1567 error
= sooptcopyout(sopt
, &ti
, sizeof(struct tcp_info
));
1571 case TCP_MEASURE_BW_BURST
: {
1572 struct tcp_measure_bw_burst out
;
1573 if ((tp
->t_flagsext
& TF_MEASURESNDBW
) == 0 ||
1574 tp
->t_bwmeas
== NULL
) {
1578 out
.min_burst_size
= tp
->t_bwmeas
->bw_minsizepkts
;
1579 out
.max_burst_size
= tp
->t_bwmeas
->bw_maxsizepkts
;
1580 error
= sooptcopyout(sopt
, &out
, sizeof(out
));
1583 case TCP_NOTSENT_LOWAT
:
1584 if ((so
->so_flags
& SOF_NOTSENT_LOWAT
) != 0) {
1585 optval
= tp
->t_notsent_lowat
;
1590 case TCP_PEER_PID
: {
1592 error
= tcp_lookup_peer_pid_locked(so
, &pid
);
1594 error
= sooptcopyout(sopt
, &pid
, sizeof(pid
));
1598 error
= ENOPROTOOPT
;
1602 error
= sooptcopyout(sopt
, &optval
, sizeof optval
);
1610 * tcp_sendspace and tcp_recvspace are the default send and receive window
1611 * sizes, respectively. These are obsolescent (this information should
1612 * be set by the route).
1614 u_int32_t tcp_sendspace
= 1448*256;
1615 u_int32_t tcp_recvspace
= 1448*384;
1617 /* During attach, the size of socket buffer allocated is limited to
1618 * sb_max in sbreserve. Disallow setting the tcp send and recv space
1619 * to be more than sb_max because that will cause tcp_attach to fail
1620 * (see radar 5713060)
1623 sysctl_tcp_sospace(struct sysctl_oid
*oidp
, __unused
void *arg1
,
1624 __unused
int arg2
, struct sysctl_req
*req
) {
1625 u_int32_t new_value
= 0, *space_p
= NULL
;
1626 int changed
= 0, error
= 0;
1627 u_quad_t sb_effective_max
= (sb_max
/ (MSIZE
+MCLBYTES
)) * MCLBYTES
;
1629 switch (oidp
->oid_number
) {
1630 case TCPCTL_SENDSPACE
:
1631 space_p
= &tcp_sendspace
;
1633 case TCPCTL_RECVSPACE
:
1634 space_p
= &tcp_recvspace
;
1639 error
= sysctl_io_number(req
, *space_p
, sizeof(u_int32_t
),
1640 &new_value
, &changed
);
1642 if (new_value
> 0 && new_value
<= sb_effective_max
) {
1643 *space_p
= new_value
;
1651 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_SENDSPACE
, sendspace
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
1652 &tcp_sendspace
, 0, &sysctl_tcp_sospace
, "IU", "Maximum outgoing TCP datagram size");
1653 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_RECVSPACE
, recvspace
, CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
1654 &tcp_recvspace
, 0, &sysctl_tcp_sospace
, "IU", "Maximum incoming TCP datagram size");
1658 * Attach TCP protocol to socket, allocating
1659 * internet protocol control block, tcp control block,
1660 * bufer space, and entering LISTEN state if to accept connections.
1662 * Returns: 0 Success
1663 * in_pcballoc:ENOBUFS
1664 * in_pcballoc:ENOMEM
1665 * in_pcballoc:??? [IPSEC specific]
1673 register struct tcpcb
*tp
;
1677 int isipv6
= INP_CHECK_SOCKAF(so
, AF_INET6
) != 0;
1680 error
= in_pcballoc(so
, &tcbinfo
, p
);
1684 inp
= sotoinpcb(so
);
1686 if (so
->so_snd
.sb_hiwat
== 0 || so
->so_rcv
.sb_hiwat
== 0) {
1687 error
= soreserve(so
, tcp_sendspace
, tcp_recvspace
);
1691 if ((so
->so_rcv
.sb_flags
& SB_USRSIZE
) == 0)
1692 so
->so_rcv
.sb_flags
|= SB_AUTOSIZE
;
1693 if ((so
->so_snd
.sb_flags
& SB_USRSIZE
) == 0)
1694 so
->so_snd
.sb_flags
|= SB_AUTOSIZE
;
1698 inp
->inp_vflag
|= INP_IPV6
;
1699 inp
->in6p_hops
= -1; /* use kernel default */
1703 inp
->inp_vflag
|= INP_IPV4
;
1704 tp
= tcp_newtcpcb(inp
);
1706 int nofd
= so
->so_state
& SS_NOFDREF
; /* XXX */
1708 so
->so_state
&= ~SS_NOFDREF
; /* don't free the socket yet */
1715 so
->so_state
|= nofd
;
1718 if (nstat_collect
) {
1719 nstat_tcp_new_pcb(inp
);
1721 tp
->t_state
= TCPS_CLOSED
;
1726 * Initiate (or continue) disconnect.
1727 * If embryonic state, just send reset (once).
1728 * If in ``let data drain'' option and linger null, just drop.
1729 * Otherwise (hard), mark socket disconnecting and drop
1730 * current input data; switch states based on user close, and
1731 * send segment to peer (with FIN).
1733 static struct tcpcb
*
1735 register struct tcpcb
*tp
;
1737 struct socket
*so
= tp
->t_inpcb
->inp_socket
;
1739 if (tp
->t_state
< TCPS_ESTABLISHED
)
1741 else if ((so
->so_options
& SO_LINGER
) && so
->so_linger
== 0)
1742 tp
= tcp_drop(tp
, 0);
1744 soisdisconnecting(so
);
1745 sbflush(&so
->so_rcv
);
1746 tp
= tcp_usrclosed(tp
);
1748 (void) tcp_output(tp
);
1754 * User issued close, and wish to trail through shutdown states:
1755 * if never received SYN, just forget it. If got a SYN from peer,
1756 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1757 * If already got a FIN from peer, then almost done; go to LAST_ACK
1758 * state. In all other cases, have already sent FIN to peer (e.g.
1759 * after PRU_SHUTDOWN), and just have to play tedious game waiting
1760 * for peer to send FIN or not respond to keep-alives, etc.
1761 * We can let the user exit from the close as soon as the FIN is acked.
1763 static struct tcpcb
*
1765 register struct tcpcb
*tp
;
1768 switch (tp
->t_state
) {
1772 tp
->t_state
= TCPS_CLOSED
;
1777 case TCPS_SYN_RECEIVED
:
1778 tp
->t_flags
|= TF_NEEDFIN
;
1781 case TCPS_ESTABLISHED
:
1782 tp
->t_state
= TCPS_FIN_WAIT_1
;
1785 case TCPS_CLOSE_WAIT
:
1786 tp
->t_state
= TCPS_LAST_ACK
;
1789 if (tp
&& tp
->t_state
>= TCPS_FIN_WAIT_2
) {
1790 soisdisconnected(tp
->t_inpcb
->inp_socket
);
1791 /* To prevent the connection hanging in FIN_WAIT_2 forever. */
1792 if (tp
->t_state
== TCPS_FIN_WAIT_2
)
1793 tp
->t_timer
[TCPT_2MSL
] = OFFSET_FROM_START(tp
, tcp_maxidle
);
1799 tcp_in_cksum_stats(u_int32_t len
)
1802 tcps_in_sw_cksum_bytes
+= len
;
1806 tcp_out_cksum_stats(u_int32_t len
)
1808 tcps_out_sw_cksum
++;
1809 tcps_out_sw_cksum_bytes
+= len
;