]>
git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/tcp_usrreq.c
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  * Copyright (c) 1982, 1986, 1988, 1993 
  30  *      The Regents of the University of California.  All rights reserved. 
  32  * Redistribution and use in source and binary forms, with or without 
  33  * modification, are permitted provided that the following conditions 
  35  * 1. Redistributions of source code must retain the above copyright 
  36  *    notice, this list of conditions and the following disclaimer. 
  37  * 2. Redistributions in binary form must reproduce the above copyright 
  38  *    notice, this list of conditions and the following disclaimer in the 
  39  *    documentation and/or other materials provided with the distribution. 
  40  * 3. All advertising materials mentioning features or use of this software 
  41  *    must display the following acknowledgement: 
  42  *      This product includes software developed by the University of 
  43  *      California, Berkeley and its contributors. 
  44  * 4. Neither the name of the University nor the names of its contributors 
  45  *    may be used to endorse or promote products derived from this software 
  46  *    without specific prior written permission. 
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  60  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94 
  61  * $FreeBSD: src/sys/netinet/tcp_usrreq.c,v 1.51.2.9 2001/08/22 00:59:12 silby Exp $ 
  65 #include <sys/param.h> 
  66 #include <sys/systm.h> 
  67 #include <sys/kernel.h> 
  68 #include <sys/sysctl.h> 
  71 #include <sys/domain.h> 
  73 #include <sys/socket.h> 
  74 #include <sys/socketvar.h> 
  75 #include <sys/protosw.h> 
  78 #include <net/route.h> 
  80 #include <netinet/in.h> 
  81 #include <netinet/in_systm.h> 
  83 #include <netinet/ip6.h> 
  85 #include <netinet/in_pcb.h> 
  87 #include <netinet6/in6_pcb.h> 
  89 #include <netinet/in_var.h> 
  90 #include <netinet/ip_var.h> 
  92 #include <netinet6/ip6_var.h> 
  94 #include <netinet/tcp.h> 
  95 #include <netinet/tcp_fsm.h> 
  96 #include <netinet/tcp_seq.h> 
  97 #include <netinet/tcp_timer.h> 
  98 #include <netinet/tcp_var.h> 
  99 #include <netinet/tcpip.h> 
 101 #include <netinet/tcp_debug.h> 
 105 #include <netinet6/ipsec.h> 
 109  * TCP protocol interface to socket abstraction. 
 111 extern  char *tcpstates
[];      /* XXX ??? */ 
 113 static int      tcp_attach(struct socket 
*, struct proc 
*); 
 114 static int      tcp_connect(struct tcpcb 
*, struct sockaddr 
*, struct proc 
*); 
 116 static int      tcp6_connect(struct tcpcb 
*, struct sockaddr 
*, struct proc 
*); 
 118 static struct tcpcb 
* 
 119                 tcp_disconnect(struct tcpcb 
*); 
 120 static struct tcpcb 
* 
 121                 tcp_usrclosed(struct tcpcb 
*); 
 124 #define TCPDEBUG0       int ostate = 0 
 125 #define TCPDEBUG1()     ostate = tp ? tp->t_state : 0 
 126 #define TCPDEBUG2(req)  if (tp && (so->so_options & SO_DEBUG)) \ 
 127                                 tcp_trace(TA_USER, ostate, tp, 0, 0, req) 
 131 #define TCPDEBUG2(req) 
 135  * TCP attaches to socket via pru_attach(), reserving space, 
 136  * and an internet control block. 
 139 tcp_usr_attach(struct socket 
*so
, int proto
, struct proc 
*p
) 
 142         struct inpcb 
*inp 
= sotoinpcb(so
); 
 143         struct tcpcb 
*tp 
= 0; 
 152         error 
= tcp_attach(so
, p
); 
 156         if ((so
->so_options 
& SO_LINGER
) && so
->so_linger 
== 0) 
 157                 so
->so_linger 
= TCP_LINGERTIME 
* hz
; 
 160         TCPDEBUG2(PRU_ATTACH
); 
 165  * pru_detach() detaches the TCP protocol from the socket. 
 166  * If the protocol state is non-embryonic, then can't 
 167  * do this directly: have to initiate a pru_disconnect(), 
 168  * which may finish later; embryonic TCB's can just 
 172 tcp_usr_detach(struct socket 
*so
) 
 175         struct inpcb 
*inp 
= sotoinpcb(so
); 
 179         if (inp 
== 0 || (inp
->inp_state 
== INPCB_STATE_DEAD
)) { 
 180                 return EINVAL
;  /* XXX */ 
 183         lck_mtx_assert(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
 186         /* In case we got disconnected from the peer */ 
 190         tp 
= tcp_disconnect(tp
); 
 192         TCPDEBUG2(PRU_DETACH
); 
 196 #define COMMON_START()  TCPDEBUG0; \ 
 198                                      if (inp == 0 || (inp->inp_state == INPCB_STATE_DEAD)) { \ 
 201                                      tp = intotcpcb(inp); \ 
 205 #define COMMON_END(req) out: TCPDEBUG2(req); return error; goto out 
 209  * Give the socket an address. 
 212 tcp_usr_bind(struct socket 
*so
, struct sockaddr 
*nam
, struct proc 
*p
) 
 215         struct inpcb 
*inp 
= sotoinpcb(so
); 
 217         struct sockaddr_in 
*sinp
; 
 222          * Must check for multicast addresses and disallow binding 
 225         sinp 
= (struct sockaddr_in 
*)nam
; 
 226         if (sinp
->sin_family 
== AF_INET 
&& 
 227             IN_MULTICAST(ntohl(sinp
->sin_addr
.s_addr
))) { 
 228                 error 
= EAFNOSUPPORT
; 
 231         error 
= in_pcbbind(inp
, nam
, p
); 
 234         COMMON_END(PRU_BIND
); 
 240 tcp6_usr_bind(struct socket 
*so
, struct sockaddr 
*nam
, struct proc 
*p
) 
 243         struct inpcb 
*inp 
= sotoinpcb(so
); 
 245         struct sockaddr_in6 
*sin6p
; 
 250          * Must check for multicast addresses and disallow binding 
 253         sin6p 
= (struct sockaddr_in6 
*)nam
; 
 254         if (sin6p
->sin6_family 
== AF_INET6 
&& 
 255             IN6_IS_ADDR_MULTICAST(&sin6p
->sin6_addr
)) { 
 256                 error 
= EAFNOSUPPORT
; 
 259         inp
->inp_vflag 
&= ~INP_IPV4
; 
 260         inp
->inp_vflag 
|= INP_IPV6
; 
 261         if ((inp
->inp_flags 
& IN6P_IPV6_V6ONLY
) == 0) { 
 262                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p
->sin6_addr
)) 
 263                         inp
->inp_vflag 
|= INP_IPV4
; 
 264                 else if (IN6_IS_ADDR_V4MAPPED(&sin6p
->sin6_addr
)) { 
 265                         struct sockaddr_in sin
; 
 267                         in6_sin6_2_sin(&sin
, sin6p
); 
 268                         inp
->inp_vflag 
|= INP_IPV4
; 
 269                         inp
->inp_vflag 
&= ~INP_IPV6
; 
 270                         error 
= in_pcbbind(inp
, (struct sockaddr 
*)&sin
, p
); 
 274         error 
= in6_pcbbind(inp
, nam
, p
); 
 277         COMMON_END(PRU_BIND
); 
 282  * Prepare to accept connections. 
 285 tcp_usr_listen(struct socket 
*so
, struct proc 
*p
) 
 288         struct inpcb 
*inp 
= sotoinpcb(so
); 
 292         if (inp
->inp_lport 
== 0) 
 293                 error 
= in_pcbbind(inp
, (struct sockaddr 
*)0, p
); 
 295                 tp
->t_state 
= TCPS_LISTEN
; 
 296         COMMON_END(PRU_LISTEN
); 
 301 tcp6_usr_listen(struct socket 
*so
, struct proc 
*p
) 
 304         struct inpcb 
*inp 
= sotoinpcb(so
); 
 308         if (inp
->inp_lport 
== 0) { 
 309                 inp
->inp_vflag 
&= ~INP_IPV4
; 
 310                 if ((inp
->inp_flags 
& IN6P_IPV6_V6ONLY
) == 0) 
 311                         inp
->inp_vflag 
|= INP_IPV4
; 
 312                 error 
= in6_pcbbind(inp
, (struct sockaddr 
*)0, p
); 
 315                 tp
->t_state 
= TCPS_LISTEN
; 
 316         COMMON_END(PRU_LISTEN
); 
 321  * Initiate connection to peer. 
 322  * Create a template for use in transmissions on this connection. 
 323  * Enter SYN_SENT state, and mark socket as connecting. 
 324  * Start keep-alive timer, and seed output sequence space. 
 325  * Send initial segment on connection. 
 328 tcp_usr_connect(struct socket 
*so
, struct sockaddr 
*nam
, struct proc 
*p
) 
 331         struct inpcb 
*inp 
= sotoinpcb(so
); 
 333         struct sockaddr_in 
*sinp
; 
 338          * Must disallow TCP ``connections'' to multicast addresses. 
 340         sinp 
= (struct sockaddr_in 
*)nam
; 
 341         if (sinp
->sin_family 
== AF_INET
 
 342             && IN_MULTICAST(ntohl(sinp
->sin_addr
.s_addr
))) { 
 343                 error 
= EAFNOSUPPORT
; 
 348         prison_remote_ip(p
, 0, &sinp
->sin_addr
.s_addr
); 
 351         if ((error 
= tcp_connect(tp
, nam
, p
)) != 0) 
 353         error 
= tcp_output(tp
); 
 354         COMMON_END(PRU_CONNECT
); 
 359 tcp6_usr_connect(struct socket 
*so
, struct sockaddr 
*nam
, struct proc 
*p
) 
 362         struct inpcb 
*inp 
= sotoinpcb(so
); 
 364         struct sockaddr_in6 
*sin6p
; 
 369          * Must disallow TCP ``connections'' to multicast addresses. 
 371         sin6p 
= (struct sockaddr_in6 
*)nam
; 
 372         if (sin6p
->sin6_family 
== AF_INET6
 
 373             && IN6_IS_ADDR_MULTICAST(&sin6p
->sin6_addr
)) { 
 374                 error 
= EAFNOSUPPORT
; 
 378         if (IN6_IS_ADDR_V4MAPPED(&sin6p
->sin6_addr
)) { 
 379                 struct sockaddr_in sin
; 
 381                 if ((inp
->inp_flags 
& IN6P_IPV6_V6ONLY
) != 0) 
 384                 in6_sin6_2_sin(&sin
, sin6p
); 
 385                 inp
->inp_vflag 
|= INP_IPV4
; 
 386                 inp
->inp_vflag 
&= ~INP_IPV6
; 
 387                 if ((error 
= tcp_connect(tp
, (struct sockaddr 
*)&sin
, p
)) != 0) 
 389                 error 
= tcp_output(tp
); 
 392         inp
->inp_vflag 
&= ~INP_IPV4
; 
 393         inp
->inp_vflag 
|= INP_IPV6
; 
 394         if ((error 
= tcp6_connect(tp
, nam
, p
)) != 0) 
 396         error 
= tcp_output(tp
); 
 399         COMMON_END(PRU_CONNECT
); 
 404  * Initiate disconnect from peer. 
 405  * If connection never passed embryonic stage, just drop; 
 406  * else if don't need to let data drain, then can just drop anyways, 
 407  * else have to begin TCP shutdown process: mark socket disconnecting, 
 408  * drain unread data, state switch to reflect user close, and 
 409  * send segment (e.g. FIN) to peer.  Socket will be really disconnected 
 410  * when peer sends FIN and acks ours. 
 412  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB. 
 415 tcp_usr_disconnect(struct socket 
*so
) 
 418         struct inpcb 
*inp 
= sotoinpcb(so
); 
 422         lck_mtx_assert(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
 425         /* In case we got disconnected from the peer */ 
 428         tp 
= tcp_disconnect(tp
); 
 429         COMMON_END(PRU_DISCONNECT
); 
 433  * Accept a connection.  Essentially all the work is 
 434  * done at higher levels; just return the address 
 435  * of the peer, storing through addr. 
 438 tcp_usr_accept(struct socket 
*so
, struct sockaddr 
**nam
) 
 441         struct inpcb 
*inp 
= sotoinpcb(so
); 
 442         struct tcpcb 
*tp 
= NULL
; 
 445         if (so
->so_state 
& SS_ISDISCONNECTED
) { 
 446                 error 
= ECONNABORTED
; 
 449         if (inp 
== 0 || (inp
->inp_state 
== INPCB_STATE_DEAD
)) { 
 454         in_setpeeraddr(so
, nam
); 
 455         COMMON_END(PRU_ACCEPT
); 
 460 tcp6_usr_accept(struct socket 
*so
, struct sockaddr 
**nam
) 
 463         struct inpcb 
*inp 
= sotoinpcb(so
); 
 464         struct tcpcb 
*tp 
= NULL
; 
 467         if (so
->so_state 
& SS_ISDISCONNECTED
) { 
 468                 error 
= ECONNABORTED
; 
 471         if (inp 
== 0 || (inp
->inp_state 
== INPCB_STATE_DEAD
)) { 
 476         in6_mapped_peeraddr(so
, nam
); 
 477         COMMON_END(PRU_ACCEPT
); 
 481  * Mark the connection as being incapable of further output. 
 484 tcp_usr_shutdown(struct socket 
*so
) 
 487         struct inpcb 
*inp 
= sotoinpcb(so
); 
 492         /* In case we got disconnected from the peer */ 
 495         tp 
= tcp_usrclosed(tp
); 
 497                 error 
= tcp_output(tp
); 
 498         COMMON_END(PRU_SHUTDOWN
); 
 502  * After a receive, possibly send window update to peer. 
 505 tcp_usr_rcvd(struct socket 
*so
, int flags
) 
 508         struct inpcb 
*inp 
= sotoinpcb(so
); 
 512         /* In case we got disconnected from the peer */ 
 516         COMMON_END(PRU_RCVD
); 
 520  * Do a send by putting data in output queue and updating urgent 
 521  * marker if URG set.  Possibly send more data.  Unlike the other 
 522  * pru_*() routines, the mbuf chains are our responsibility.  We 
 523  * must either enqueue them or free them.  The other pru_* routines 
 524  * generally are caller-frees. 
 527 tcp_usr_send(struct socket 
*so
, int flags
, struct mbuf 
*m
,  
 528              struct sockaddr 
*nam
, struct mbuf 
*control
, struct proc 
*p
) 
 531         struct inpcb 
*inp 
= sotoinpcb(so
); 
 538         if (inp 
== NULL 
|| inp
->inp_state 
== INPCB_STATE_DEAD
) { 
 540                  * OOPS! we lost a race, the TCP session got reset after 
 541                  * we checked SS_CANTSENDMORE, eg: while doing uiomove or a 
 542                  * network interrupt in the non-splnet() section of sosend(). 
 548                 error 
= ECONNRESET
;     /* XXX EPIPE? */ 
 554         isipv6 
= nam 
&& nam
->sa_family 
== AF_INET6
; 
 559                 /* TCP doesn't do control messages (rights, creds, etc) */ 
 560                 if (control
->m_len
) { 
 567                 m_freem(control
);       /* empty control, just free it */ 
 569         if(!(flags 
& PRUS_OOB
)) { 
 570                 sbappend(&so
->so_snd
, m
); 
 571                 if (nam 
&& tp
->t_state 
< TCPS_SYN_SENT
) { 
 573                          * Do implied connect if not yet connected, 
 574                          * initialize window to default value, and 
 575                          * initialize maxseg/maxopd using peer's cached 
 580                                 error 
= tcp6_connect(tp
, nam
, p
); 
 583                         error 
= tcp_connect(tp
, nam
, p
); 
 586                         tp
->snd_wnd 
= TTCP_CLIENT_SND_WND
; 
 590                 if (flags 
& PRUS_EOF
) { 
 592                          * Close the send side of the connection after 
 596                         tp 
= tcp_usrclosed(tp
); 
 599                         if (flags 
& PRUS_MORETOCOME
) 
 600                                 tp
->t_flags 
|= TF_MORETOCOME
; 
 601                         error 
= tcp_output(tp
); 
 602                         if (flags 
& PRUS_MORETOCOME
) 
 603                                 tp
->t_flags 
&= ~TF_MORETOCOME
; 
 606                 if (sbspace(&so
->so_snd
) < -512) { 
 612                  * According to RFC961 (Assigned Protocols), 
 613                  * the urgent pointer points to the last octet 
 614                  * of urgent data.  We continue, however, 
 615                  * to consider it to indicate the first octet 
 616                  * of data past the urgent section. 
 617                  * Otherwise, snd_up should be one lower. 
 619                 sbappend(&so
->so_snd
, m
); 
 620                 if (nam 
&& tp
->t_state 
< TCPS_SYN_SENT
) { 
 622                          * Do implied connect if not yet connected, 
 623                          * initialize window to default value, and 
 624                          * initialize maxseg/maxopd using peer's cached 
 629                                 error 
= tcp6_connect(tp
, nam
, p
); 
 632                         error 
= tcp_connect(tp
, nam
, p
); 
 635                         tp
->snd_wnd 
= TTCP_CLIENT_SND_WND
; 
 638                 tp
->snd_up 
= tp
->snd_una 
+ so
->so_snd
.sb_cc
; 
 640                 error 
= tcp_output(tp
); 
 643         COMMON_END((flags 
& PRUS_OOB
) ? PRU_SENDOOB 
:  
 644                    ((flags 
& PRUS_EOF
) ? PRU_SEND_EOF 
: PRU_SEND
)); 
 651 tcp_usr_abort(struct socket 
*so
) 
 654         struct inpcb 
*inp 
= sotoinpcb(so
); 
 658         /* In case we got disconnected from the peer */ 
 661         tp 
= tcp_drop(tp
, ECONNABORTED
); 
 663         COMMON_END(PRU_ABORT
); 
 667  * Receive out-of-band data. 
 670 tcp_usr_rcvoob(struct socket 
*so
, struct mbuf 
*m
, int flags
) 
 673         struct inpcb 
*inp 
= sotoinpcb(so
); 
 677         if ((so
->so_oobmark 
== 0 && 
 678              (so
->so_state 
& SS_RCVATMARK
) == 0) || 
 679             so
->so_options 
& SO_OOBINLINE 
|| 
 680             tp
->t_oobflags 
& TCPOOB_HADDATA
) { 
 684         if ((tp
->t_oobflags 
& TCPOOB_HAVEDATA
) == 0) { 
 689         *mtod(m
, caddr_t
) = tp
->t_iobc
; 
 690         if ((flags 
& MSG_PEEK
) == 0) 
 691                 tp
->t_oobflags 
^= (TCPOOB_HAVEDATA 
| TCPOOB_HADDATA
); 
 692         COMMON_END(PRU_RCVOOB
); 
 695 /* xxx - should be const */ 
 696 struct pr_usrreqs tcp_usrreqs 
= { 
 697         tcp_usr_abort
, tcp_usr_accept
, tcp_usr_attach
, tcp_usr_bind
, 
 698         tcp_usr_connect
, pru_connect2_notsupp
, in_control
, tcp_usr_detach
, 
 699         tcp_usr_disconnect
, tcp_usr_listen
, in_setpeeraddr
, tcp_usr_rcvd
, 
 700         tcp_usr_rcvoob
, tcp_usr_send
, pru_sense_null
, tcp_usr_shutdown
, 
 701         in_setsockaddr
, sosend
, soreceive
, pru_sopoll_notsupp
 
 705 struct pr_usrreqs tcp6_usrreqs 
= { 
 706         tcp_usr_abort
, tcp6_usr_accept
, tcp_usr_attach
, tcp6_usr_bind
, 
 707         tcp6_usr_connect
, pru_connect2_notsupp
, in6_control
, tcp_usr_detach
, 
 708         tcp_usr_disconnect
, tcp6_usr_listen
, in6_mapped_peeraddr
, tcp_usr_rcvd
, 
 709         tcp_usr_rcvoob
, tcp_usr_send
, pru_sense_null
, tcp_usr_shutdown
, 
 710         in6_mapped_sockaddr
, sosend
, soreceive
, pru_sopoll_notsupp
 
 715  * Common subroutine to open a TCP connection to remote host specified 
 716  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local 
 717  * port number if needed.  Call in_pcbladdr to do the routing and to choose 
 718  * a local host address (interface).  If there is an existing incarnation 
 719  * of the same connection in TIME-WAIT state and if the remote host was 
 720  * sending CC options and if the connection duration was < MSL, then 
 721  * truncate the previous TIME-WAIT state and proceed. 
 722  * Initialize connection parameters and enter SYN-SENT state. 
 725 tcp_connect(tp
, nam
, p
) 
 726         register struct tcpcb 
*tp
; 
 727         struct sockaddr 
*nam
; 
 730         struct inpcb 
*inp 
= tp
->t_inpcb
, *oinp
; 
 731         struct socket 
*so 
= inp
->inp_socket
; 
 733         struct sockaddr_in 
*sin 
= (struct sockaddr_in 
*)nam
; 
 734         struct sockaddr_in 
*ifaddr
; 
 735         struct rmxp_tao 
*taop
; 
 736         struct rmxp_tao tao_noncached
; 
 739         if (inp
->inp_lport 
== 0) { 
 740                 error 
= in_pcbbind(inp
, (struct sockaddr 
*)0, p
); 
 746          * Cannot simply call in_pcbconnect, because there might be an 
 747          * earlier incarnation of this same connection still in 
 748          * TIME_WAIT state, creating an ADDRINUSE error. 
 750         error 
= in_pcbladdr(inp
, nam
, &ifaddr
); 
 754         tcp_unlock(inp
->inp_socket
, 0, 0); 
 755         oinp 
= in_pcblookup_hash(inp
->inp_pcbinfo
, 
 756             sin
->sin_addr
, sin
->sin_port
, 
 757             inp
->inp_laddr
.s_addr 
!= INADDR_ANY 
? inp
->inp_laddr
 
 759             inp
->inp_lport
,  0, NULL
); 
 761         tcp_lock(inp
->inp_socket
, 0, 0); 
 763                 if (oinp 
!= inp
) /* 4143933: avoid deadlock if inp == oinp */ 
 764                         tcp_lock(oinp
->inp_socket
, 1, 0); 
 765                 if (in_pcb_checkstate(oinp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) { 
 767                                 tcp_unlock(oinp
->inp_socket
, 1, 0); 
 771                 if (oinp 
!= inp 
&& (otp 
= intotcpcb(oinp
)) != NULL 
&& 
 772                 otp
->t_state 
== TCPS_TIME_WAIT 
&& 
 773                     otp
->t_starttime 
< tcp_msl 
&& 
 774                     (otp
->t_flags 
& TF_RCVD_CC
)) 
 775                         otp 
= tcp_close(otp
); 
 777                         printf("tcp_connect: inp=%x err=EADDRINUSE\n", inp
); 
 779                                 tcp_unlock(oinp
->inp_socket
, 1, 0); 
 783                         tcp_unlock(oinp
->inp_socket
, 1, 0); 
 786         if ((inp
->inp_laddr
.s_addr 
== INADDR_ANY 
? ifaddr
->sin_addr
.s_addr 
: 
 787                  inp
->inp_laddr
.s_addr
) == sin
->sin_addr
.s_addr 
&& 
 788             inp
->inp_lport 
== sin
->sin_port
) 
 790         if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->mtx
)) { 
 791                 /*lock inversion issue, mostly with udp multicast packets */ 
 792                 socket_unlock(inp
->inp_socket
, 0); 
 793                 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->mtx
); 
 794                 socket_lock(inp
->inp_socket
, 0); 
 796         if (inp
->inp_laddr
.s_addr 
== INADDR_ANY
) 
 797                 inp
->inp_laddr 
= ifaddr
->sin_addr
; 
 798         inp
->inp_faddr 
= sin
->sin_addr
; 
 799         inp
->inp_fport 
= sin
->sin_port
; 
 801         lck_rw_done(inp
->inp_pcbinfo
->mtx
); 
 803         /* Compute window scaling to request.  */ 
 804         while (tp
->request_r_scale 
< TCP_MAX_WINSHIFT 
&& 
 805             (TCP_MAXWIN 
<< tp
->request_r_scale
) < so
->so_rcv
.sb_hiwat
) 
 806                 tp
->request_r_scale
++; 
 809         tcpstat
.tcps_connattempt
++; 
 810         tp
->t_state 
= TCPS_SYN_SENT
; 
 811         tp
->t_timer
[TCPT_KEEP
] = tcp_keepinit
; 
 812         tp
->iss 
= tcp_new_isn(tp
); 
 816          * Generate a CC value for this connection and 
 817          * check whether CC or CCnew should be used. 
 819         if ((taop 
= tcp_gettaocache(tp
->t_inpcb
)) == NULL
) { 
 820                 taop 
= &tao_noncached
; 
 821                 bzero(taop
, sizeof(*taop
)); 
 824         tp
->cc_send 
= CC_INC(tcp_ccgen
); 
 825         if (taop
->tao_ccsent 
!= 0 && 
 826             CC_GEQ(tp
->cc_send
, taop
->tao_ccsent
)) { 
 827                 taop
->tao_ccsent 
= tp
->cc_send
; 
 829                 taop
->tao_ccsent 
= 0; 
 830                 tp
->t_flags 
|= TF_SENDCCNEW
; 
 838 tcp6_connect(tp
, nam
, p
) 
 839         register struct tcpcb 
*tp
; 
 840         struct sockaddr 
*nam
; 
 843         struct inpcb 
*inp 
= tp
->t_inpcb
, *oinp
; 
 844         struct socket 
*so 
= inp
->inp_socket
; 
 846         struct sockaddr_in6 
*sin6 
= (struct sockaddr_in6 
*)nam
; 
 847         struct in6_addr addr6
; 
 848         struct rmxp_tao 
*taop
; 
 849         struct rmxp_tao tao_noncached
; 
 852         if (inp
->inp_lport 
== 0) { 
 853                 error 
= in6_pcbbind(inp
, (struct sockaddr 
*)0, p
); 
 859          * Cannot simply call in_pcbconnect, because there might be an 
 860          * earlier incarnation of this same connection still in 
 861          * TIME_WAIT state, creating an ADDRINUSE error. 
 863         error 
= in6_pcbladdr(inp
, nam
, &addr6
); 
 866         tcp_unlock(inp
->inp_socket
, 0, 0); 
 867         oinp 
= in6_pcblookup_hash(inp
->inp_pcbinfo
, 
 868                                   &sin6
->sin6_addr
, sin6
->sin6_port
, 
 869                                   IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
) 
 872                                   inp
->inp_lport
,  0, NULL
); 
 873         tcp_lock(inp
->inp_socket
, 0, 0); 
 875                 if (oinp 
!= inp 
&& (otp 
= intotcpcb(oinp
)) != NULL 
&& 
 876                     otp
->t_state 
== TCPS_TIME_WAIT 
&& 
 877                     otp
->t_starttime 
< tcp_msl 
&& 
 878                     (otp
->t_flags 
& TF_RCVD_CC
)) 
 879                         otp 
= tcp_close(otp
); 
 883         if (!lck_rw_try_lock_exclusive(inp
->inp_pcbinfo
->mtx
)) { 
 884                 /*lock inversion issue, mostly with udp multicast packets */ 
 885                 socket_unlock(inp
->inp_socket
, 0); 
 886                 lck_rw_lock_exclusive(inp
->inp_pcbinfo
->mtx
); 
 887                 socket_lock(inp
->inp_socket
, 0); 
 889         if (IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
)) 
 890                 inp
->in6p_laddr 
= addr6
; 
 891         inp
->in6p_faddr 
= sin6
->sin6_addr
; 
 892         inp
->inp_fport 
= sin6
->sin6_port
; 
 893         if ((sin6
->sin6_flowinfo 
& IPV6_FLOWINFO_MASK
) != NULL
) 
 894                 inp
->in6p_flowinfo 
= sin6
->sin6_flowinfo
; 
 896         lck_rw_done(inp
->inp_pcbinfo
->mtx
); 
 898         /* Compute window scaling to request.  */ 
 899         while (tp
->request_r_scale 
< TCP_MAX_WINSHIFT 
&& 
 900             (TCP_MAXWIN 
<< tp
->request_r_scale
) < so
->so_rcv
.sb_hiwat
) 
 901                 tp
->request_r_scale
++; 
 904         tcpstat
.tcps_connattempt
++; 
 905         tp
->t_state 
= TCPS_SYN_SENT
; 
 906         tp
->t_timer
[TCPT_KEEP
] = tcp_keepinit
; 
 907         tp
->iss 
= tcp_new_isn(tp
); 
 911          * Generate a CC value for this connection and 
 912          * check whether CC or CCnew should be used. 
 914         if ((taop 
= tcp_gettaocache(tp
->t_inpcb
)) == NULL
) { 
 915                 taop 
= &tao_noncached
; 
 916                 bzero(taop
, sizeof(*taop
)); 
 919         tp
->cc_send 
= CC_INC(tcp_ccgen
); 
 920         if (taop
->tao_ccsent 
!= 0 && 
 921             CC_GEQ(tp
->cc_send
, taop
->tao_ccsent
)) { 
 922                 taop
->tao_ccsent 
= tp
->cc_send
; 
 924                 taop
->tao_ccsent 
= 0; 
 925                 tp
->t_flags 
|= TF_SENDCCNEW
; 
 933  * The new sockopt interface makes it possible for us to block in the 
 934  * copyin/out step (if we take a page fault).  Taking a page fault at 
 935  * splnet() is probably a Bad Thing.  (Since sockets and pcbs both now 
 936  * use TSM, there probably isn't any need for this function to run at 
 937  * splnet() any more.  This needs more examination.) 
 940 tcp_ctloutput(so
, sopt
) 
 942         struct sockopt 
*sopt
; 
 944         int     error
, opt
, optval
; 
 953         if (sopt
->sopt_level 
!= IPPROTO_TCP
) { 
 955                 if (INP_CHECK_SOCKAF(so
, AF_INET6
)) 
 956                         error 
= ip6_ctloutput(so
, sopt
); 
 959                 error 
= ip_ctloutput(so
, sopt
); 
 967         switch (sopt
->sopt_dir
) { 
 969                 switch (sopt
->sopt_name
) { 
 973                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
 978                         switch (sopt
->sopt_name
) { 
 989                                 opt 
= 0; /* dead code to fool gcc */ 
1000                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
1005                         if (optval 
> 0 && optval 
<= tp
->t_maxseg 
&& 
1006                             optval 
+ 40 >= tcp_minmss
) 
1007                                 tp
->t_maxseg 
= optval
; 
1013                         error 
= sooptcopyin(sopt
, &optval
, sizeof optval
, 
1020                                 tp
->t_keepidle 
= optval 
* PR_SLOWHZ
; 
1024                         error 
= ENOPROTOOPT
; 
1030                 switch (sopt
->sopt_name
) { 
1032                         optval 
= tp
->t_flags 
& TF_NODELAY
; 
1035                         optval 
= tp
->t_maxseg
; 
1038                         optval 
= tp
->t_keepidle 
/ PR_SLOWHZ
; 
1041                         optval 
= tp
->t_flags 
& TF_NOOPT
; 
1044                         optval 
= tp
->t_flags 
& TF_NOPUSH
; 
1047                         error 
= ENOPROTOOPT
; 
1051                         error 
= sooptcopyout(sopt
, &optval
, sizeof optval
); 
1058  * tcp_sendspace and tcp_recvspace are the default send and receive window 
1059  * sizes, respectively.  These are obsolescent (this information should 
1060  * be set by the route). 
1062 u_long  tcp_sendspace 
= 1024*16; 
1063 SYSCTL_INT(_net_inet_tcp
, TCPCTL_SENDSPACE
, sendspace
, CTLFLAG_RW
,  
1064     &tcp_sendspace 
, 0, "Maximum outgoing TCP datagram size"); 
1065 u_long  tcp_recvspace 
= 1024*16; 
1066 SYSCTL_INT(_net_inet_tcp
, TCPCTL_RECVSPACE
, recvspace
, CTLFLAG_RW
,  
1067     &tcp_recvspace 
, 0, "Maximum incoming TCP datagram size"); 
1069 __private_extern__ 
int  tcp_sockthreshold 
= 256; 
1070 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, sockthreshold
, CTLFLAG_RW
,  
1071     &tcp_sockthreshold 
, 0, "TCP Socket size increased if less than threshold"); 
1073 #define TCP_INCREASED_SPACE     65535   /* Automatically increase tcp send/rcv space to this value */ 
1075  * Attach TCP protocol to socket, allocating 
1076  * internet protocol control block, tcp control block, 
1077  * bufer space, and entering LISTEN state if to accept connections. 
1084         register struct tcpcb 
*tp
; 
1088         int isipv6 
= INP_CHECK_SOCKAF(so
, AF_INET6
) != NULL
; 
1091         error 
= in_pcballoc(so
, &tcbinfo
, p
); 
1095         inp 
= sotoinpcb(so
); 
1097         if (so
->so_snd
.sb_hiwat 
== 0 || so
->so_rcv
.sb_hiwat 
== 0) { 
1099                  * The goal is to let clients have large send/rcv default windows (TCP_INCREASED_SPACE) 
1100                  * while not hogging mbuf space for servers. This is done by watching a threshold 
1101                  * of tcpcbs in use and bumping the default send and rcvspace only if under that threshold. 
1102                  * The theory being that busy servers have a lot more active tcpcbs and don't want the potential 
1103                  * memory penalty of having much larger sockbuffs. The sysctl allows to fine tune that threshold value.          */ 
1105                 if (inp
->inp_pcbinfo
->ipi_count 
< tcp_sockthreshold
) 
1106                         error 
= soreserve(so
, MAX(TCP_INCREASED_SPACE
, tcp_sendspace
), MAX(TCP_INCREASED_SPACE
,tcp_recvspace
)); 
1108                         error 
= soreserve(so
, tcp_sendspace
, tcp_recvspace
); 
1115                 inp
->inp_vflag 
|= INP_IPV6
; 
1116                 inp
->in6p_hops 
= -1;    /* use kernel default */ 
1120         inp
->inp_vflag 
|= INP_IPV4
; 
1121         tp 
= tcp_newtcpcb(inp
); 
1123                 int nofd 
= so
->so_state 
& SS_NOFDREF
;   /* XXX */ 
1125                 so
->so_state 
&= ~SS_NOFDREF
;    /* don't free the socket yet */ 
1132                 so
->so_state 
|= nofd
; 
1135         tp
->t_state 
= TCPS_CLOSED
; 
1140  * Initiate (or continue) disconnect. 
1141  * If embryonic state, just send reset (once). 
1142  * If in ``let data drain'' option and linger null, just drop. 
1143  * Otherwise (hard), mark socket disconnecting and drop 
1144  * current input data; switch states based on user close, and 
1145  * send segment to peer (with FIN). 
1147 static struct tcpcb 
* 
1149         register struct tcpcb 
*tp
; 
1151         struct socket 
*so 
= tp
->t_inpcb
->inp_socket
; 
1153         if (tp
->t_state 
< TCPS_ESTABLISHED
) 
1155         else if ((so
->so_options 
& SO_LINGER
) && so
->so_linger 
== 0) 
1156                 tp 
= tcp_drop(tp
, 0); 
1158                 soisdisconnecting(so
); 
1159                 sbflush(&so
->so_rcv
); 
1160                 tp 
= tcp_usrclosed(tp
); 
1162                         (void) tcp_output(tp
); 
1168  * User issued close, and wish to trail through shutdown states: 
1169  * if never received SYN, just forget it.  If got a SYN from peer, 
1170  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN. 
1171  * If already got a FIN from peer, then almost done; go to LAST_ACK 
1172  * state.  In all other cases, have already sent FIN to peer (e.g. 
1173  * after PRU_SHUTDOWN), and just have to play tedious game waiting 
1174  * for peer to send FIN or not respond to keep-alives, etc. 
1175  * We can let the user exit from the close as soon as the FIN is acked. 
1177 static struct tcpcb 
* 
1179         register struct tcpcb 
*tp
; 
1182         switch (tp
->t_state
) { 
1186                 tp
->t_state 
= TCPS_CLOSED
; 
1191         case TCPS_SYN_RECEIVED
: 
1192                 tp
->t_flags 
|= TF_NEEDFIN
; 
1195         case TCPS_ESTABLISHED
: 
1196                 tp
->t_state 
= TCPS_FIN_WAIT_1
; 
1199         case TCPS_CLOSE_WAIT
: 
1200                 tp
->t_state 
= TCPS_LAST_ACK
; 
1203         if (tp 
&& tp
->t_state 
>= TCPS_FIN_WAIT_2
) { 
1204                 soisdisconnected(tp
->t_inpcb
->inp_socket
); 
1205                 /* To prevent the connection hanging in FIN_WAIT_2 forever. */ 
1206                 if (tp
->t_state 
== TCPS_FIN_WAIT_2
) 
1207                         tp
->t_timer
[TCPT_2MSL
] = tcp_maxidle
;