2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_LICENSE_HEADER_START@ 
   6  * The contents of this file constitute Original Code as defined in and 
   7  * are subject to the Apple Public Source License Version 1.1 (the 
   8  * "License").  You may not use this file except in compliance with the 
   9  * License.  Please obtain a copy of the License at 
  10  * http://www.apple.com/publicsource and read it before using this file. 
  12  * This Original Code and all software distributed under the License are 
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the 
  17  * License for the specific language governing rights and limitations 
  20  * @APPLE_LICENSE_HEADER_END@ 
  23  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 
  24  *      The Regents of the University of California.  All rights reserved. 
  26  * Redistribution and use in source and binary forms, with or without 
  27  * modification, are permitted provided that the following conditions 
  29  * 1. Redistributions of source code must retain the above copyright 
  30  *    notice, this list of conditions and the following disclaimer. 
  31  * 2. Redistributions in binary form must reproduce the above copyright 
  32  *    notice, this list of conditions and the following disclaimer in the 
  33  *    documentation and/or other materials provided with the distribution. 
  34  * 3. All advertising materials mentioning features or use of this software 
  35  *    must display the following acknowledgement: 
  36  *      This product includes software developed by the University of 
  37  *      California, Berkeley and its contributors. 
  38  * 4. Neither the name of the University nor the names of its contributors 
  39  *    may be used to endorse or promote products derived from this software 
  40  *    without specific prior written permission. 
  42  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  54  *      @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95 
  55  * $FreeBSD: src/sys/netinet/tcp_subr.c,v 1.73.2.22 2001/08/22 00:59:12 silby Exp $ 
  59 #include <sys/param.h> 
  60 #include <sys/systm.h> 
  61 #include <sys/callout.h> 
  62 #include <sys/kernel.h> 
  63 #include <sys/sysctl.h> 
  64 #include <sys/malloc.h> 
  67 #include <sys/domain.h> 
  70 #include <sys/kauth.h> 
  71 #include <sys/socket.h> 
  72 #include <sys/socketvar.h> 
  73 #include <sys/protosw.h> 
  74 #include <sys/random.h> 
  75 #include <sys/syslog.h> 
  76 #include <kern/locks.h> 
  80 #include <net/route.h> 
  84 #include <netinet/in.h> 
  85 #include <netinet/in_systm.h> 
  86 #include <netinet/ip.h> 
  88 #include <netinet/ip6.h> 
  90 #include <netinet/in_pcb.h> 
  92 #include <netinet6/in6_pcb.h> 
  94 #include <netinet/in_var.h> 
  95 #include <netinet/ip_var.h> 
  97 #include <netinet6/ip6_var.h> 
  99 #include <netinet/tcp.h> 
 100 #include <netinet/tcp_fsm.h> 
 101 #include <netinet/tcp_seq.h> 
 102 #include <netinet/tcp_timer.h> 
 103 #include <netinet/tcp_var.h> 
 105 #include <netinet6/tcp6_var.h> 
 107 #include <netinet/tcpip.h> 
 109 #include <netinet/tcp_debug.h> 
 111 #include <netinet6/ip6protosw.h> 
 114 #include <netinet6/ipsec.h> 
 116 #include <netinet6/ipsec6.h> 
 121 #include <sys/kdebug.h> 
 123 #define DBG_FNC_TCP_CLOSE       NETDBG_CODE(DBG_NETTCP, ((5 << 8) | 2)) 
 125 extern int tcp_lq_overflow
; 
 127 /* temporary: for testing */ 
 129 extern int ipsec_bypass
; 
 130 extern lck_mtx_t 
*sadb_mutex
; 
 133 int     tcp_mssdflt 
= TCP_MSS
; 
 134 SYSCTL_INT(_net_inet_tcp
, TCPCTL_MSSDFLT
, mssdflt
, CTLFLAG_RW
,  
 135     &tcp_mssdflt 
, 0, "Default TCP Maximum Segment Size"); 
 138 int     tcp_v6mssdflt 
= TCP6_MSS
; 
 139 SYSCTL_INT(_net_inet_tcp
, TCPCTL_V6MSSDFLT
, v6mssdflt
, 
 140         CTLFLAG_RW
, &tcp_v6mssdflt 
, 0, 
 141         "Default TCP Maximum Segment Size for IPv6"); 
 145  * Minimum MSS we accept and use. This prevents DoS attacks where 
 146  * we are forced to a ridiculous low MSS like 20 and send hundreds 
 147  * of packets instead of one. The effect scales with the available 
 148  * bandwidth and quickly saturates the CPU and network interface 
 149  * with packet generation and sending. Set to zero to disable MINMSS 
 150  * checking. This setting prevents us from sending too small packets. 
 152 int     tcp_minmss 
= TCP_MINMSS
; 
 153 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, minmss
, CTLFLAG_RW
, 
 154     &tcp_minmss 
, 0, "Minmum TCP Maximum Segment Size"); 
 157  * Number of TCP segments per second we accept from remote host 
 158  * before we start to calculate average segment size. If average 
 159  * segment size drops below the minimum TCP MSS we assume a DoS 
 160  * attack and reset+drop the connection. Care has to be taken not to 
 161  * set this value too small to not kill interactive type connections 
 162  * (telnet, SSH) which send many small packets. 
 164 #ifdef FIX_WORKAROUND_FOR_3894301 
 165 __private_extern__ 
int     tcp_minmssoverload 
= TCP_MINMSSOVERLOAD
; 
 167 __private_extern__ 
int     tcp_minmssoverload 
= 0; 
 169 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, minmssoverload
, CTLFLAG_RW
, 
 170     &tcp_minmssoverload 
, 0, "Number of TCP Segments per Second allowed to" 
 171     "be under the MINMSS Size"); 
 173 static int      tcp_do_rfc1323 
= 1; 
 174 SYSCTL_INT(_net_inet_tcp
, TCPCTL_DO_RFC1323
, rfc1323
, CTLFLAG_RW
,  
 175     &tcp_do_rfc1323 
, 0, "Enable rfc1323 (high performance TCP) extensions"); 
 177 static int      tcp_do_rfc1644 
= 0; 
 178 SYSCTL_INT(_net_inet_tcp
, TCPCTL_DO_RFC1644
, rfc1644
, CTLFLAG_RW
,  
 179     &tcp_do_rfc1644 
, 0, "Enable rfc1644 (TTCP) extensions"); 
 181 static int      tcp_tcbhashsize 
= 0; 
 182 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcbhashsize
, CTLFLAG_RD
, 
 183      &tcp_tcbhashsize
, 0, "Size of TCP control-block hashtable"); 
 185 static int      do_tcpdrain 
= 0; 
 186 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, do_tcpdrain
, CTLFLAG_RW
, &do_tcpdrain
, 0, 
 187      "Enable tcp_drain routine for extra help when low on mbufs"); 
 189 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, pcbcount
, CTLFLAG_RD
,  
 190     &tcbinfo
.ipi_count
, 0, "Number of active PCBs"); 
 192 static int      icmp_may_rst 
= 1; 
 193 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, icmp_may_rst
, CTLFLAG_RW
, &icmp_may_rst
, 0,  
 194     "Certain ICMP unreachable messages may abort connections in SYN_SENT"); 
 196 static int      tcp_strict_rfc1948 
= 0; 
 197 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, strict_rfc1948
, CTLFLAG_RW
, 
 198     &tcp_strict_rfc1948
, 0, "Determines if RFC1948 is followed exactly"); 
 200 static int      tcp_isn_reseed_interval 
= 0; 
 201 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, isn_reseed_interval
, CTLFLAG_RW
, 
 202     &tcp_isn_reseed_interval
, 0, "Seconds between reseeding of ISN secret"); 
 204 static void     tcp_cleartaocache(void); 
 205 static void     tcp_notify(struct inpcb 
*, int); 
 208  * Target size of TCP PCB hash tables. Must be a power of two. 
 210  * Note that this can be overridden by the kernel environment 
 211  * variable net.inet.tcp.tcbhashsize 
 214 #define TCBHASHSIZE     4096 
 218  * This is the actual shape of what we allocate using the zone 
 219  * allocator.  Doing it this way allows us to protect both structures 
 220  * using the same generation count, and also eliminates the overhead 
 221  * of allocating tcpcbs separately.  By hiding the structure here, 
 222  * we avoid changing most of the rest of the code (although it needs 
 223  * to be changed, eventually, for greater efficiency). 
 226 #define ALIGNM1         (ALIGNMENT - 1) 
 230                 char    align
[(sizeof(struct inpcb
) + ALIGNM1
) & ~ALIGNM1
]; 
 234         struct  callout inp_tp_rexmt
, inp_tp_persist
, inp_tp_keep
, inp_tp_2msl
; 
 235         struct  callout inp_tp_delack
; 
 241 static struct tcpcb dummy_tcb
; 
 244 extern struct   inpcbhead       time_wait_slots
[]; 
 245 extern int              cur_tw_slot
; 
 246 extern u_long           
*delack_bitmask
; 
 247 extern u_long  route_generation
; 
 250 int  get_inpcb_str_size() 
 252         return sizeof(struct inpcb
); 
 256 int  get_tcp_str_size() 
 258         return sizeof(struct tcpcb
); 
 261 int     tcp_freeq(struct tcpcb 
*tp
); 
 270         int hashsize 
= TCBHASHSIZE
; 
 273         struct inpcbinfo 
*pcbinfo
; 
 278         tcp_delacktime 
= TCPTV_DELACK
; 
 279         tcp_keepinit 
= TCPTV_KEEP_INIT
; 
 280         tcp_keepidle 
= TCPTV_KEEP_IDLE
; 
 281         tcp_keepintvl 
= TCPTV_KEEPINTVL
; 
 282         tcp_maxpersistidle 
= TCPTV_KEEP_IDLE
; 
 284         read_random(&tcp_now
, sizeof(tcp_now
)); 
 285         tcp_now  
= tcp_now 
& 0x7fffffff; /* Starts tcp internal 500ms clock at a random value */ 
 289         tcbinfo
.listhead 
= &tcb
; 
 292         TUNABLE_INT_FETCH("net.inet.tcp.tcbhashsize", &hashsize
); 
 294         if (!powerof2(hashsize
)) { 
 295                 printf("WARNING: TCB hash size not a power of 2\n"); 
 296                 hashsize 
= 512; /* safe default */ 
 298         tcp_tcbhashsize 
= hashsize
; 
 299         tcbinfo
.hashsize 
= hashsize
; 
 300         tcbinfo
.hashbase 
= hashinit(hashsize
, M_PCB
, &tcbinfo
.hashmask
); 
 301         tcbinfo
.porthashbase 
= hashinit(hashsize
, M_PCB
, 
 302                                         &tcbinfo
.porthashmask
); 
 304         str_size 
= (vm_size_t
) sizeof(struct inp_tp
); 
 305         tcbinfo
.ipi_zone 
= (void *) zinit(str_size
, 120000*str_size
, 8192, "tcpcb"); 
 307         tcbinfo
.ipi_zone 
= zinit("tcpcb", sizeof(struct inp_tp
), maxsockets
, 
 311         tcp_reass_maxseg 
= nmbclusters 
/ 16; 
 313         TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", 
 318 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) 
 320 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr)) 
 322         if (max_protohdr 
< TCP_MINPROTOHDR
) 
 323                 max_protohdr 
= TCP_MINPROTOHDR
; 
 324         if (max_linkhdr 
+ TCP_MINPROTOHDR 
> MHLEN
) 
 326 #undef TCP_MINPROTOHDR 
 327         dummy_tcb
.t_state 
= TCP_NSTATES
; 
 328         dummy_tcb
.t_flags 
= 0; 
 329         tcbinfo
.dummy_cb 
= (caddr_t
) &dummy_tcb
; 
 332          * allocate lock group attribute and group for tcp pcb mutexes 
 334      pcbinfo
->mtx_grp_attr 
= lck_grp_attr_alloc_init(); 
 335         lck_grp_attr_setdefault(pcbinfo
->mtx_grp_attr
); 
 336         pcbinfo
->mtx_grp 
= lck_grp_alloc_init("tcppcb", pcbinfo
->mtx_grp_attr
); 
 339          * allocate the lock attribute for tcp pcb mutexes 
 341         pcbinfo
->mtx_attr 
= lck_attr_alloc_init(); 
 342         lck_attr_setdefault(pcbinfo
->mtx_attr
); 
 344         if ((pcbinfo
->mtx 
= lck_rw_alloc_init(pcbinfo
->mtx_grp
, pcbinfo
->mtx_attr
)) == NULL
) { 
 345                 printf("tcp_init: mutex not alloced!\n"); 
 346                 return; /* pretty much dead if this fails... */ 
 350         in_pcb_nat_init(&tcbinfo
, AF_INET
, IPPROTO_TCP
, SOCK_STREAM
); 
 352         delack_bitmask 
= _MALLOC((4 * hashsize
)/32, M_PCB
, M_WAITOK
); 
 353         if (delack_bitmask 
== 0)  
 354              panic("Delack Memory"); 
 356         for (i
=0; i 
< (tcbinfo
.hashsize 
/ 32); i
++) 
 357                  delack_bitmask
[i
] = 0; 
 359         for (i
=0; i 
< N_TIME_WAIT_SLOTS
; i
++) { 
 360              LIST_INIT(&time_wait_slots
[i
]); 
 365  * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. 
 366  * tcp_template used to store this data in mbufs, but we now recopy it out 
 367  * of the tcpcb each time to conserve mbufs. 
 370 tcp_fillheaders(tp
, ip_ptr
, tcp_ptr
) 
 375         struct inpcb 
*inp 
= tp
->t_inpcb
; 
 376         struct tcphdr 
*tcp_hdr 
= (struct tcphdr 
*)tcp_ptr
; 
 379         if ((inp
->inp_vflag 
& INP_IPV6
) != 0) { 
 382                 ip6 
= (struct ip6_hdr 
*)ip_ptr
; 
 383                 ip6
->ip6_flow 
= (ip6
->ip6_flow 
& ~IPV6_FLOWINFO_MASK
) | 
 384                         (inp
->in6p_flowinfo 
& IPV6_FLOWINFO_MASK
); 
 385                 ip6
->ip6_vfc 
= (ip6
->ip6_vfc 
& ~IPV6_VERSION_MASK
) | 
 386                         (IPV6_VERSION 
& IPV6_VERSION_MASK
); 
 387                 ip6
->ip6_nxt 
= IPPROTO_TCP
; 
 388                 ip6
->ip6_plen 
= sizeof(struct tcphdr
); 
 389                 ip6
->ip6_src 
= inp
->in6p_laddr
; 
 390                 ip6
->ip6_dst 
= inp
->in6p_faddr
; 
 395         struct ip 
*ip 
= (struct ip 
*) ip_ptr
; 
 397         ip
->ip_vhl 
= IP_VHL_BORING
; 
 404         ip
->ip_p 
= IPPROTO_TCP
; 
 405         ip
->ip_src 
= inp
->inp_laddr
; 
 406         ip
->ip_dst 
= inp
->inp_faddr
; 
 407         tcp_hdr
->th_sum 
= in_pseudo(ip
->ip_src
.s_addr
, ip
->ip_dst
.s_addr
, 
 408                 htons(sizeof(struct tcphdr
) + IPPROTO_TCP
)); 
 411         tcp_hdr
->th_sport 
= inp
->inp_lport
; 
 412         tcp_hdr
->th_dport 
= inp
->inp_fport
; 
 417         tcp_hdr
->th_flags 
= 0; 
 423  * Create template to be used to send tcp packets on a connection. 
 424  * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only 
 425  * use for this function is in keepalives, which use tcp_respond. 
 434         m 
= m_get(M_DONTWAIT
, MT_HEADER
); 
 437         m
->m_len 
= sizeof(struct tcptemp
); 
 438         n 
= mtod(m
, struct tcptemp 
*); 
 440         tcp_fillheaders(tp
, (void *)&n
->tt_ipgen
, (void *)&n
->tt_t
); 
 445  * Send a single message to the TCP at address specified by 
 446  * the given TCP/IP header.  If m == 0, then we make a copy 
 447  * of the tcpiphdr at ti and send directly to the addressed host. 
 448  * This is used to force keep alive messages out using the TCP 
 449  * template for a connection.  If flags are given then we send 
 450  * a message back to the TCP which originated the * segment ti, 
 451  * and discard the mbuf containing it and any other attached mbufs. 
 453  * In any case the ack and sequence number of the transmitted 
 454  * segment are as specified by the parameters. 
 456  * NOTE: If m != NULL, then ti must point to *inside* the mbuf. 
 459 tcp_respond(tp
, ipgen
, th
, m
, ack
, seq
, flags
) 
 462         register struct tcphdr 
*th
; 
 463         register struct mbuf 
*m
; 
 469         struct route 
*ro 
= 0; 
 474         struct route_in6 
*ro6 
= 0; 
 475         struct route_in6 sro6
; 
 482         isipv6 
= IP_VHL_V(((struct ip 
*)ipgen
)->ip_vhl
) == 6; 
 488                 if (!(flags 
& TH_RST
)) { 
 489                         win 
= sbspace(&tp
->t_inpcb
->inp_socket
->so_rcv
); 
 490                         if (win 
> (long)TCP_MAXWIN 
<< tp
->rcv_scale
) 
 491                                 win 
= (long)TCP_MAXWIN 
<< tp
->rcv_scale
; 
 495                         ro6 
= &tp
->t_inpcb
->in6p_route
; 
 498                 ro 
= &tp
->t_inpcb
->inp_route
; 
 503                         bzero(ro6
, sizeof *ro6
); 
 508                         bzero(ro
, sizeof *ro
); 
 512                 m 
= m_gethdr(M_DONTWAIT
, MT_HEADER
); 
 516                 m
->m_data 
+= max_linkhdr
; 
 519                         bcopy((caddr_t
)ip6
, mtod(m
, caddr_t
),  
 520                               sizeof(struct ip6_hdr
)); 
 521                         ip6 
= mtod(m
, struct ip6_hdr 
*); 
 522                         nth 
= (struct tcphdr 
*)(ip6 
+ 1); 
 526                         bcopy((caddr_t
)ip
, mtod(m
, caddr_t
), sizeof(struct ip
)); 
 527                         ip 
= mtod(m
, struct ip 
*); 
 528                         nth 
= (struct tcphdr 
*)(ip 
+ 1); 
 530                 bcopy((caddr_t
)th
, (caddr_t
)nth
, sizeof(struct tcphdr
)); 
 535                 m
->m_data 
= (caddr_t
)ipgen
; 
 536                 /* m_len is set later */ 
 538 #define xchg(a,b,type) { type t; t=a; a=b; b=t; } 
 541                         xchg(ip6
->ip6_dst
, ip6
->ip6_src
, struct in6_addr
); 
 542                         nth 
= (struct tcphdr 
*)(ip6 
+ 1); 
 546                 xchg(ip
->ip_dst
.s_addr
, ip
->ip_src
.s_addr
, n_long
); 
 547                 nth 
= (struct tcphdr 
*)(ip 
+ 1); 
 551                          * this is usually a case when an extension header 
 552                          * exists between the IPv6 header and the 
 555                         nth
->th_sport 
= th
->th_sport
; 
 556                         nth
->th_dport 
= th
->th_dport
; 
 558                 xchg(nth
->th_dport
, nth
->th_sport
, n_short
); 
 563                 ip6
->ip6_plen 
= htons((u_short
)(sizeof (struct tcphdr
) + 
 565                 tlen 
+= sizeof (struct ip6_hdr
) + sizeof (struct tcphdr
); 
 569         tlen 
+= sizeof (struct tcpiphdr
); 
 571         ip
->ip_ttl 
= ip_defttl
; 
 574         m
->m_pkthdr
.len 
= tlen
; 
 575         m
->m_pkthdr
.rcvif 
= 0; 
 576         nth
->th_seq 
= htonl(seq
); 
 577         nth
->th_ack 
= htonl(ack
); 
 579         nth
->th_off 
= sizeof (struct tcphdr
) >> 2; 
 580         nth
->th_flags 
= flags
; 
 582                 nth
->th_win 
= htons((u_short
) (win 
>> tp
->rcv_scale
)); 
 584                 nth
->th_win 
= htons((u_short
)win
); 
 589                 nth
->th_sum 
= in6_cksum(m
, IPPROTO_TCP
, 
 590                                         sizeof(struct ip6_hdr
), 
 591                                         tlen 
- sizeof(struct ip6_hdr
)); 
 592                 ip6
->ip6_hlim 
= in6_selecthlim(tp 
? tp
->t_inpcb 
: NULL
, 
 599                 nth
->th_sum 
= in_pseudo(ip
->ip_src
.s_addr
, ip
->ip_dst
.s_addr
, 
 600                 htons((u_short
)(tlen 
- sizeof(struct ip
) + ip
->ip_p
))); 
 601                 m
->m_pkthdr
.csum_flags 
= CSUM_TCP
; 
 602                 m
->m_pkthdr
.csum_data 
= offsetof(struct tcphdr
, th_sum
); 
 605         if (tp 
== NULL 
|| (tp
->t_inpcb
->inp_socket
->so_options 
& SO_DEBUG
)) 
 606                 tcp_trace(TA_OUTPUT
, 0, tp
, mtod(m
, void *), th
, 0); 
 609         if (ipsec_bypass 
== 0 && ipsec_setsocket(m
, tp 
? tp
->t_inpcb
->inp_socket 
: NULL
) != 0) { 
 616                 (void)ip6_output(m
, NULL
, ro6
, ipflags
, NULL
, NULL
, 0); 
 617                 if (ro6 
== &sro6 
&& ro6
->ro_rt
) { 
 624                 (void) ip_output_list(m
, 0, NULL
, ro
, ipflags
, NULL
); 
 625                 if (ro 
== &sro 
&& ro
->ro_rt
) { 
 633  * Create a new TCP control block, making an 
 634  * empty reassembly queue and hooking it to the argument 
 635  * protocol control block.  The `inp' parameter must have 
 636  * come from the zone allocator set up in tcp_init(). 
 643         register struct tcpcb 
*tp
; 
 644         register struct socket 
*so 
= inp
->inp_socket
;    
 646         int isipv6 
= (inp
->inp_vflag 
& INP_IPV6
) != 0; 
 649         if (so
->cached_in_sock_layer 
== 0) { 
 650              it 
= (struct inp_tp 
*)inp
; 
 654              tp 
= (struct tcpcb 
*) inp
->inp_saved_ppcb
; 
 656         bzero((char *) tp
, sizeof(struct tcpcb
)); 
 657         LIST_INIT(&tp
->t_segq
); 
 658         tp
->t_maxseg 
= tp
->t_maxopd 
= 
 660                 isipv6 
? tcp_v6mssdflt 
: 
 665         /* Set up our timeouts. */ 
 666         callout_init(tp
->tt_rexmt 
= &it
->inp_tp_rexmt
); 
 667         callout_init(tp
->tt_persist 
= &it
->inp_tp_persist
); 
 668         callout_init(tp
->tt_keep 
= &it
->inp_tp_keep
); 
 669         callout_init(tp
->tt_2msl 
= &it
->inp_tp_2msl
); 
 670         callout_init(tp
->tt_delack 
= &it
->inp_tp_delack
); 
 674                 tp
->t_flags 
= (TF_REQ_SCALE
|TF_REQ_TSTMP
); 
 676                 tp
->t_flags 
|= TF_REQ_CC
; 
 677         tp
->t_inpcb 
= inp
;      /* XXX */ 
 679          * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no 
 680          * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives 
 681          * reasonable initial retransmit time. 
 683         tp
->t_srtt 
= TCPTV_SRTTBASE
; 
 684         tp
->t_rttvar 
= ((TCPTV_RTOBASE 
- TCPTV_SRTTBASE
) << TCP_RTTVAR_SHIFT
) / 4; 
 685         tp
->t_rttmin 
= TCPTV_MIN
; 
 686         tp
->t_rxtcur 
= TCPTV_RTOBASE
; 
 687         tp
->snd_cwnd 
= TCP_MAXWIN 
<< TCP_MAX_WINSHIFT
; 
 688         tp
->snd_ssthresh 
= TCP_MAXWIN 
<< TCP_MAX_WINSHIFT
; 
 690          * IPv4 TTL initialization is necessary for an IPv6 socket as well, 
 691          * because the socket may be bound to an IPv6 wildcard address, 
 692          * which may match an IPv4-mapped IPv6 address. 
 694         inp
->inp_ip_ttl 
= ip_defttl
; 
 695         inp
->inp_ppcb 
= (caddr_t
)tp
; 
 696         return (tp
);            /* XXX */ 
 700  * Drop a TCP connection, reporting 
 701  * the specified error.  If connection is synchronized, 
 702  * then send a RST to peer. 
 706         register struct tcpcb 
*tp
; 
 709         struct socket 
*so 
= tp
->t_inpcb
->inp_socket
; 
 714         case TCPS_ESTABLISHED
: 
 715         case TCPS_FIN_WAIT_1
: 
 717         case TCPS_CLOSE_WAIT
: 
 723         if (TCPS_HAVERCVDSYN(tp
->t_state
)) { 
 724                 tp
->t_state 
= TCPS_CLOSED
; 
 725                 (void) tcp_output(tp
); 
 726                 tcpstat
.tcps_drops
++; 
 728                 tcpstat
.tcps_conndrops
++; 
 729         if (errno 
== ETIMEDOUT 
&& tp
->t_softerror
) 
 730                 errno 
= tp
->t_softerror
; 
 731         so
->so_error 
= errno
; 
 732         return (tcp_close(tp
)); 
 736  * Close a TCP control block: 
 737  *      discard all space held by the tcp 
 738  *      discard internet protocol block 
 739  *      wake up any sleepers 
 743         register struct tcpcb 
*tp
; 
 745         register struct tseg_qent 
*q
; 
 746         struct inpcb 
*inp 
= tp
->t_inpcb
; 
 747         struct socket 
*so 
= inp
->inp_socket
; 
 749         int isipv6 
= (inp
->inp_vflag 
& INP_IPV6
) != 0; 
 751         register struct rtentry 
*rt
; 
 754         if ( inp
->inp_ppcb 
== NULL
) /* tcp_close was called previously, bail */ 
 759          * Make sure that all of our timers are stopped before we 
 762         callout_stop(tp
->tt_rexmt
); 
 763         callout_stop(tp
->tt_persist
); 
 764         callout_stop(tp
->tt_keep
); 
 765         callout_stop(tp
->tt_2msl
); 
 766         callout_stop(tp
->tt_delack
); 
 768         /* Clear the timers before we delete the PCB. */ 
 771                 for (i 
= 0; i 
< TCPT_NTIMERS
; i
++) { 
 777         KERNEL_DEBUG(DBG_FNC_TCP_CLOSE 
| DBG_FUNC_START
, tp
,0,0,0,0); 
 780         case TCPS_ESTABLISHED
: 
 781         case TCPS_FIN_WAIT_1
: 
 783         case TCPS_CLOSE_WAIT
: 
 790          * If we got enough samples through the srtt filter, 
 791          * save the rtt and rttvar in the routing entry. 
 792          * 'Enough' is arbitrarily defined as the 16 samples. 
 793          * 16 samples is enough for the srtt filter to converge 
 794          * to within 5% of the correct value; fewer samples and 
 795          * we could save a very bogus rtt. 
 797          * Don't update the default route's characteristics and don't 
 798          * update anything that the user "locked". 
 800         if (tp
->t_rttupdated 
>= 16) { 
 801                 register u_long i 
= 0; 
 804                         struct sockaddr_in6 
*sin6
; 
 806                         if ((rt 
= inp
->in6p_route
.ro_rt
) == NULL
) 
 808                         sin6 
= (struct sockaddr_in6 
*)rt_key(rt
); 
 809                         if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
)) 
 814                 rt 
= inp
->inp_route
.ro_rt
;       
 816                     ((struct sockaddr_in 
*)rt_key(rt
))->sin_addr
.s_addr
 
 817                     == INADDR_ANY 
|| rt
->generation_id 
!= route_generation
) { 
 818                         if (tp
->t_state 
>= TCPS_CLOSE_WAIT
) 
 819                                 tp
->t_state 
= TCPS_CLOSING
; 
 824                 if ((rt
->rt_rmx
.rmx_locks 
& RTV_RTT
) == 0) { 
 826                             (RTM_RTTUNIT 
/ (PR_SLOWHZ 
* TCP_RTT_SCALE
)); 
 827                         if (rt
->rt_rmx
.rmx_rtt 
&& i
) 
 829                                  * filter this update to half the old & half 
 830                                  * the new values, converting scale. 
 831                                  * See route.h and tcp_var.h for a 
 832                                  * description of the scaling constants. 
 835                                     (rt
->rt_rmx
.rmx_rtt 
+ i
) / 2; 
 837                                 rt
->rt_rmx
.rmx_rtt 
= i
; 
 838                         tcpstat
.tcps_cachedrtt
++; 
 840                 if ((rt
->rt_rmx
.rmx_locks 
& RTV_RTTVAR
) == 0) { 
 842                             (RTM_RTTUNIT 
/ (PR_SLOWHZ 
* TCP_RTTVAR_SCALE
)); 
 843                         if (rt
->rt_rmx
.rmx_rttvar 
&& i
) 
 844                                 rt
->rt_rmx
.rmx_rttvar 
= 
 845                                     (rt
->rt_rmx
.rmx_rttvar 
+ i
) / 2; 
 847                                 rt
->rt_rmx
.rmx_rttvar 
= i
; 
 848                         tcpstat
.tcps_cachedrttvar
++; 
 851                  * The old comment here said: 
 852                  * update the pipelimit (ssthresh) if it has been updated 
 853                  * already or if a pipesize was specified & the threshhold 
 854                  * got below half the pipesize.  I.e., wait for bad news 
 855                  * before we start updating, then update on both good 
 858                  * But we want to save the ssthresh even if no pipesize is 
 859                  * specified explicitly in the route, because such 
 860                  * connections still have an implicit pipesize specified 
 861                  * by the global tcp_sendspace.  In the absence of a reliable 
 862                  * way to calculate the pipesize, it will have to do. 
 864                 i 
= tp
->snd_ssthresh
; 
 865                 if (rt
->rt_rmx
.rmx_sendpipe 
!= 0) 
 866                         dosavessthresh 
= (i 
< rt
->rt_rmx
.rmx_sendpipe 
/ 2); 
 868                         dosavessthresh 
= (i 
< so
->so_snd
.sb_hiwat 
/ 2); 
 869                 if (((rt
->rt_rmx
.rmx_locks 
& RTV_SSTHRESH
) == 0 && 
 870                      i 
!= 0 && rt
->rt_rmx
.rmx_ssthresh 
!= 0) 
 873                          * convert the limit from user data bytes to 
 874                          * packets then to packet data bytes. 
 876                         i 
= (i 
+ tp
->t_maxseg 
/ 2) / tp
->t_maxseg
; 
 879                         i 
*= (u_long
)(tp
->t_maxseg 
+ 
 881                                       (isipv6 
? sizeof (struct ip6_hdr
) + 
 882                                                sizeof (struct tcphdr
) : 
 884                                        sizeof (struct tcpiphdr
) 
 889                         if (rt
->rt_rmx
.rmx_ssthresh
) 
 890                                 rt
->rt_rmx
.rmx_ssthresh 
= 
 891                                     (rt
->rt_rmx
.rmx_ssthresh 
+ i
) / 2; 
 893                                 rt
->rt_rmx
.rmx_ssthresh 
= i
; 
 894                         tcpstat
.tcps_cachedssthresh
++; 
 897         rt 
= inp
->inp_route
.ro_rt
; 
 900                  * mark route for deletion if no information is 
 903                 if ((tp
->t_flags 
& TF_LQ_OVERFLOW
) && tcp_lq_overflow 
&&  
 904                     ((rt
->rt_rmx
.rmx_locks 
& RTV_RTT
) == 0)){ 
 905                         if (rt
->rt_rmx
.rmx_rtt 
== 0) 
 906                                 rt
->rt_flags 
|= RTF_DELCLONE
; 
 910         /* free the reassembly queue, if any */ 
 911         (void) tcp_freeq(tp
); 
 914         if (so
->cached_in_sock_layer
) 
 915             inp
->inp_saved_ppcb 
= (caddr_t
) tp
; 
 918         soisdisconnected(so
); 
 920         if (INP_CHECK_SOCKAF(so
, AF_INET6
)) 
 925         tcpstat
.tcps_closed
++; 
 926         KERNEL_DEBUG(DBG_FNC_TCP_CLOSE 
| DBG_FUNC_END
, tcpstat
.tcps_closed
,0,0,0,0); 
 927         return ((struct tcpcb 
*)0); 
 935         register struct tseg_qent 
*q
; 
 938         while((q 
= LIST_FIRST(&tp
->t_segq
)) != NULL
) { 
 939                 LIST_REMOVE(q
, tqe_q
); 
 952  * ###LD 05/19/04 locking issue, tcpdrain is disabled, deadlock situation with tcbinfo.mtx 
 958                 struct tseg_qent 
*te
; 
 961          * Walk the tcpbs, if existing, and flush the reassembly queue, 
 963          * XXX: The "Net/3" implementation doesn't imply that the TCP 
 964          *      reassembly queue should be flushed, but in a situation 
 965          *      where we're really low on mbufs, this is potentially 
 968                 lck_rw_lock_exclusive(tcbinfo
.mtx
); 
 969                 for (inpb 
= LIST_FIRST(tcbinfo
.listhead
); inpb
; 
 970                         inpb 
= LIST_NEXT(inpb
, inp_list
)) { 
 971                                 if ((tcpb 
= intotcpcb(inpb
))) { 
 972                                         while ((te 
= LIST_FIRST(&tcpb
->t_segq
)) 
 974                                         LIST_REMOVE(te
, tqe_q
); 
 981                 lck_rw_done(tcbinfo
.mtx
); 
 987  * Notify a tcp user of an asynchronous error; 
 988  * store error as soft error, but wake up user 
 989  * (for now, won't do anything until can select for soft error). 
 991  * Do not wake up user since there currently is no mechanism for 
 992  * reporting soft errors (yet - a kqueue filter may be added). 
 995 tcp_notify(inp
, error
) 
1001         if (inp 
== NULL 
|| (inp
->inp_state 
== INPCB_STATE_DEAD
))  
1002                 return; /* pcb is gone already */ 
1004         tp 
= (struct tcpcb 
*)inp
->inp_ppcb
; 
1007          * Ignore some errors if we are hooked up. 
1008          * If connection hasn't completed, has retransmitted several times, 
1009          * and receives a second error, give up now.  This is better 
1010          * than waiting a long time to establish a connection that 
1011          * can never complete. 
1013         if (tp
->t_state 
== TCPS_ESTABLISHED 
&& 
1014              (error 
== EHOSTUNREACH 
|| error 
== ENETUNREACH 
|| 
1015               error 
== EHOSTDOWN
)) { 
1017         } else if (tp
->t_state 
< TCPS_ESTABLISHED 
&& tp
->t_rxtshift 
> 3 && 
1019                 tcp_drop(tp
, error
); 
1021                 tp
->t_softerror 
= error
; 
1023         wakeup((caddr_t
) &so
->so_timeo
); 
1030 tcp_pcblist SYSCTL_HANDLER_ARGS
 
1033         struct inpcb 
*inp
, **inp_list
; 
1038          * The process of preparing the TCB list is too time-consuming and 
1039          * resource-intensive to repeat twice on every request. 
1041         lck_rw_lock_shared(tcbinfo
.mtx
); 
1042         if (req
->oldptr 
== USER_ADDR_NULL
) { 
1043                 n 
= tcbinfo
.ipi_count
; 
1044                 req
->oldidx 
= 2 * (sizeof xig
) 
1045                         + (n 
+ n
/8) * sizeof(struct xtcpcb
); 
1046                 lck_rw_done(tcbinfo
.mtx
); 
1050         if (req
->newptr 
!= USER_ADDR_NULL
) { 
1051                 lck_rw_done(tcbinfo
.mtx
); 
1056          * OK, now we're committed to doing something. 
1058         gencnt 
= tcbinfo
.ipi_gencnt
; 
1059         n 
= tcbinfo
.ipi_count
; 
1061         bzero(&xig
, sizeof(xig
)); 
1062         xig
.xig_len 
= sizeof xig
; 
1064         xig
.xig_gen 
= gencnt
; 
1065         xig
.xig_sogen 
= so_gencnt
; 
1066         error 
= SYSCTL_OUT(req
, &xig
, sizeof xig
); 
1068                 lck_rw_done(tcbinfo
.mtx
); 
1072          * We are done if there is no pcb 
1075             lck_rw_done(tcbinfo
.mtx
); 
1079         inp_list 
= _MALLOC(n 
* sizeof *inp_list
, M_TEMP
, M_WAITOK
); 
1080         if (inp_list 
== 0) { 
1081                 lck_rw_done(tcbinfo
.mtx
); 
1085         for (inp 
= LIST_FIRST(tcbinfo
.listhead
), i 
= 0; inp 
&& i 
< n
; 
1086              inp 
= LIST_NEXT(inp
, inp_list
)) { 
1088                 if (inp
->inp_gencnt 
<= gencnt 
&& inp
->inp_state 
!= INPCB_STATE_DEAD
) 
1090                 if (inp
->inp_gencnt 
<= gencnt 
&& !prison_xinpcb(req
->p
, inp
)) 
1092                         inp_list
[i
++] = inp
; 
1097         for (i 
= 0; i 
< n
; i
++) { 
1099                 if (inp
->inp_gencnt 
<= gencnt 
&& inp
->inp_state 
!= INPCB_STATE_DEAD
) { 
1103                         bzero(&xt
, sizeof(xt
)); 
1104                         xt
.xt_len 
= sizeof xt
; 
1105                         /* XXX should avoid extra copy */ 
1106                         inpcb_to_compat(inp
, &xt
.xt_inp
); 
1107                         inp_ppcb 
= inp
->inp_ppcb
; 
1108                         if (inp_ppcb 
!= NULL
) { 
1109                                 bcopy(inp_ppcb
, &xt
.xt_tp
, sizeof xt
.xt_tp
); 
1112                                 bzero((char *) &xt
.xt_tp
, sizeof xt
.xt_tp
); 
1113                         if (inp
->inp_socket
) 
1114                                 sotoxsocket(inp
->inp_socket
, &xt
.xt_socket
); 
1115                         error 
= SYSCTL_OUT(req
, &xt
, sizeof xt
); 
1120                  * Give the user an updated idea of our state. 
1121                  * If the generation differs from what we told 
1122                  * her before, she knows that something happened 
1123                  * while we were processing this request, and it 
1124                  * might be necessary to retry. 
1126                 bzero(&xig
, sizeof(xig
)); 
1127                 xig
.xig_len 
= sizeof xig
; 
1128                 xig
.xig_gen 
= tcbinfo
.ipi_gencnt
; 
1129                 xig
.xig_sogen 
= so_gencnt
; 
1130                 xig
.xig_count 
= tcbinfo
.ipi_count
; 
1131                 error 
= SYSCTL_OUT(req
, &xig
, sizeof xig
); 
1133         FREE(inp_list
, M_TEMP
); 
1134         lck_rw_done(tcbinfo
.mtx
); 
1138 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_PCBLIST
, pcblist
, CTLFLAG_RD
, 0, 0, 
1139             tcp_pcblist
, "S,xtcpcb", "List of active TCP connections"); 
1143 tcp_getcred(SYSCTL_HANDLER_ARGS
) 
1145         struct sockaddr_in addrs
[2]; 
1149         error 
= suser(req
->p
); 
1152         error 
= SYSCTL_IN(req
, addrs
, sizeof(addrs
)); 
1156         inp 
= in_pcblookup_hash(&tcbinfo
, addrs
[1].sin_addr
, addrs
[1].sin_port
, 
1157             addrs
[0].sin_addr
, addrs
[0].sin_port
, 0, NULL
); 
1158         if (inp 
== NULL 
|| inp
->inp_socket 
== NULL
) { 
1162         error 
= SYSCTL_OUT(req
, inp
->inp_socket
->so_cred
, sizeof(*(kauth_cred_t
)0); 
1168 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, getcred
, CTLTYPE_OPAQUE
|CTLFLAG_RW
, 
1169     0, 0, tcp_getcred
, "S,ucred", "Get the ucred of a TCP connection"); 
1173 tcp6_getcred(SYSCTL_HANDLER_ARGS
) 
1175         struct sockaddr_in6 addrs
[2]; 
1177         int error
, s
, mapped 
= 0; 
1179         error 
= suser(req
->p
); 
1182         error 
= SYSCTL_IN(req
, addrs
, sizeof(addrs
)); 
1185         if (IN6_IS_ADDR_V4MAPPED(&addrs
[0].sin6_addr
)) { 
1186                 if (IN6_IS_ADDR_V4MAPPED(&addrs
[1].sin6_addr
)) 
1193                 inp 
= in_pcblookup_hash(&tcbinfo
, 
1194                         *(struct in_addr 
*)&addrs
[1].sin6_addr
.s6_addr
[12], 
1196                         *(struct in_addr 
*)&addrs
[0].sin6_addr
.s6_addr
[12], 
1200                 inp 
= in6_pcblookup_hash(&tcbinfo
, &addrs
[1].sin6_addr
, 
1202                                  &addrs
[0].sin6_addr
, addrs
[0].sin6_port
, 
1204         if (inp 
== NULL 
|| inp
->inp_socket 
== NULL
) { 
1208         error 
= SYSCTL_OUT(req
, inp
->inp_socket
->so_cred
,  
1209                            sizeof(*(kauth_cred_t
)0); 
1215 SYSCTL_PROC(_net_inet6_tcp6
, OID_AUTO
, getcred
, CTLTYPE_OPAQUE
|CTLFLAG_RW
, 
1217             tcp6_getcred
, "S,ucred", "Get the ucred of a TCP6 connection"); 
1219 #endif /* __APPLE__*/ 
1222 tcp_ctlinput(cmd
, sa
, vip
) 
1224         struct sockaddr 
*sa
; 
1227         struct ip 
*ip 
= vip
; 
1229         struct in_addr faddr
; 
1232         void (*notify
)(struct inpcb 
*, int) = tcp_notify
; 
1236         faddr 
= ((struct sockaddr_in 
*)sa
)->sin_addr
; 
1237         if (sa
->sa_family 
!= AF_INET 
|| faddr
.s_addr 
== INADDR_ANY
) 
1240         if (cmd 
== PRC_QUENCH
) 
1241                 notify 
= tcp_quench
; 
1242         else if (icmp_may_rst 
&& (cmd 
== PRC_UNREACH_ADMIN_PROHIB 
|| 
1243                 cmd 
== PRC_UNREACH_PORT
) && ip
) 
1244                 notify 
= tcp_drop_syn_sent
; 
1245         else if (cmd 
== PRC_MSGSIZE
) 
1246                 notify 
= tcp_mtudisc
; 
1247         else if (PRC_IS_REDIRECT(cmd
)) { 
1249                 notify 
= in_rtchange
; 
1250         } else if (cmd 
== PRC_HOSTDEAD
) 
1252         else if ((unsigned)cmd 
> PRC_NCMDS 
|| inetctlerrmap
[cmd
] == 0) 
1255                 th 
= (struct tcphdr 
*)((caddr_t
)ip 
 
1256                                        + (IP_VHL_HL(ip
->ip_vhl
) << 2)); 
1257                 inp 
= in_pcblookup_hash(&tcbinfo
, faddr
, th
->th_dport
, 
1258                     ip
->ip_src
, th
->th_sport
, 0, NULL
); 
1259                 if (inp 
!= NULL 
&& inp
->inp_socket 
!= NULL
) { 
1260                         tcp_lock(inp
->inp_socket
, 1, 0); 
1261                         if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) { 
1262                                 tcp_unlock(inp
->inp_socket
, 1, 0); 
1265                         icmp_seq 
= htonl(th
->th_seq
); 
1266                         tp 
= intotcpcb(inp
); 
1267                         if (SEQ_GEQ(icmp_seq
, tp
->snd_una
) && 
1268                             SEQ_LT(icmp_seq
, tp
->snd_max
)) 
1269                                 (*notify
)(inp
, inetctlerrmap
[cmd
]); 
1270                         tcp_unlock(inp
->inp_socket
, 1, 0); 
1273                 in_pcbnotifyall(&tcbinfo
, faddr
, inetctlerrmap
[cmd
], notify
); 
1278 tcp6_ctlinput(cmd
, sa
, d
) 
1280         struct sockaddr 
*sa
; 
1284         void (*notify
)(struct inpcb 
*, int) = tcp_notify
; 
1285         struct ip6_hdr 
*ip6
; 
1287         struct ip6ctlparam 
*ip6cp 
= NULL
; 
1288         const struct sockaddr_in6 
*sa6_src 
= NULL
; 
1290         struct tcp_portonly 
{ 
1295         if (sa
->sa_family 
!= AF_INET6 
|| 
1296             sa
->sa_len 
!= sizeof(struct sockaddr_in6
)) 
1299         if (cmd 
== PRC_QUENCH
) 
1300                 notify 
= tcp_quench
; 
1301         else if (cmd 
== PRC_MSGSIZE
) 
1302                 notify 
= tcp_mtudisc
; 
1303         else if (!PRC_IS_REDIRECT(cmd
) && 
1304                  ((unsigned)cmd 
> PRC_NCMDS 
|| inet6ctlerrmap
[cmd
] == 0)) 
1307         /* if the parameter is from icmp6, decode it. */ 
1309                 ip6cp 
= (struct ip6ctlparam 
*)d
; 
1311                 ip6 
= ip6cp
->ip6c_ip6
; 
1312                 off 
= ip6cp
->ip6c_off
; 
1313                 sa6_src 
= ip6cp
->ip6c_src
; 
1317                 off 
= 0;        /* fool gcc */ 
1323                  * XXX: We assume that when IPV6 is non NULL, 
1324                  * M and OFF are valid. 
1327                 /* check if we can safely examine src and dst ports */ 
1328                 if (m
->m_pkthdr
.len 
< off 
+ sizeof(*thp
)) 
1331                 bzero(&th
, sizeof(th
)); 
1332                 m_copydata(m
, off
, sizeof(*thp
), (caddr_t
)&th
); 
1334                 in6_pcbnotify(&tcbinfo
, sa
, th
.th_dport
, 
1335                     (struct sockaddr 
*)ip6cp
->ip6c_src
, 
1336                     th
.th_sport
, cmd
, notify
); 
1338                 in6_pcbnotify(&tcbinfo
, sa
, 0, (struct sockaddr 
*)sa6_src
, 
1345  * Following is where TCP initial sequence number generation occurs. 
1347  * There are two places where we must use initial sequence numbers: 
1348  * 1.  In SYN-ACK packets. 
1349  * 2.  In SYN packets. 
1351  * The ISNs in SYN-ACK packets have no monotonicity requirement,  
1352  * and should be as unpredictable as possible to avoid the possibility 
1353  * of spoofing and/or connection hijacking.  To satisfy this 
1354  * requirement, SYN-ACK ISNs are generated via the arc4random() 
1355  * function.  If exact RFC 1948 compliance is requested via sysctl, 
1356  * these ISNs will be generated just like those in SYN packets. 
1358  * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling 
1359  * depends on this property.  In addition, these ISNs should be 
1360  * unguessable so as to prevent connection hijacking.  To satisfy 
1361  * the requirements of this situation, the algorithm outlined in 
1362  * RFC 1948 is used to generate sequence numbers. 
1364  * For more information on the theory of operation, please see 
1367  * Implementation details: 
1369  * Time is based off the system timer, and is corrected so that it 
1370  * increases by one megabyte per second.  This allows for proper 
1371  * recycling on high speed LANs while still leaving over an hour 
1374  * Two sysctls control the generation of ISNs: 
1376  * net.inet.tcp.isn_reseed_interval controls the number of seconds 
1377  * between seeding of isn_secret.  This is normally set to zero, 
1378  * as reseeding should not be necessary. 
1380  * net.inet.tcp.strict_rfc1948 controls whether RFC 1948 is followed 
1381  * strictly.  When strict compliance is requested, reseeding is 
1382  * disabled and SYN-ACKs will be generated in the same manner as 
1383  * SYNs.  Strict mode is disabled by default. 
1387 #define ISN_BYTES_PER_SECOND 1048576 
1389 u_char isn_secret
[32]; 
1390 int isn_last_reseed
; 
1397         u_int32_t md5_buffer
[4]; 
1399         struct timeval time
; 
1401         /* Use arc4random for SYN-ACKs when not in exact RFC1948 mode. */ 
1402         if (((tp
->t_state 
== TCPS_LISTEN
) || (tp
->t_state 
== TCPS_TIME_WAIT
)) 
1403            && tcp_strict_rfc1948 
== 0) 
1407                 return arc4random(); 
1410         /* Seed if this is the first use, reseed if requested. */ 
1411         if ((isn_last_reseed 
== 0) || 
1412             ((tcp_strict_rfc1948 
== 0) && (tcp_isn_reseed_interval 
> 0) && 
1413              (((u_int
)isn_last_reseed 
+ (u_int
)tcp_isn_reseed_interval
*hz
) 
1414                 < (u_int
)time
.tv_sec
))) { 
1416                 read_random(&isn_secret
, sizeof(isn_secret
)); 
1418                 read_random_unlimited(&isn_secret
, sizeof(isn_secret
)); 
1420                 isn_last_reseed 
= time
.tv_sec
; 
1423         /* Compute the md5 hash and return the ISN. */ 
1425         MD5Update(&isn_ctx
, (u_char 
*) &tp
->t_inpcb
->inp_fport
, sizeof(u_short
)); 
1426         MD5Update(&isn_ctx
, (u_char 
*) &tp
->t_inpcb
->inp_lport
, sizeof(u_short
)); 
1428         if ((tp
->t_inpcb
->inp_vflag 
& INP_IPV6
) != 0) { 
1429                 MD5Update(&isn_ctx
, (u_char 
*) &tp
->t_inpcb
->in6p_faddr
, 
1430                           sizeof(struct in6_addr
)); 
1431                 MD5Update(&isn_ctx
, (u_char 
*) &tp
->t_inpcb
->in6p_laddr
, 
1432                           sizeof(struct in6_addr
)); 
1436                 MD5Update(&isn_ctx
, (u_char 
*) &tp
->t_inpcb
->inp_faddr
, 
1437                           sizeof(struct in_addr
)); 
1438                 MD5Update(&isn_ctx
, (u_char 
*) &tp
->t_inpcb
->inp_laddr
, 
1439                           sizeof(struct in_addr
)); 
1441         MD5Update(&isn_ctx
, (u_char 
*) &isn_secret
, sizeof(isn_secret
)); 
1442         MD5Final((u_char 
*) &md5_buffer
, &isn_ctx
); 
1443         new_isn 
= (tcp_seq
) md5_buffer
[0]; 
1444         new_isn 
+= time
.tv_sec 
* (ISN_BYTES_PER_SECOND 
/ hz
); 
1449  * When a source quench is received, close congestion window 
1450  * to one segment.  We will gradually open it again as we proceed. 
1453 tcp_quench(inp
, errno
) 
1457         struct tcpcb 
*tp 
= intotcpcb(inp
); 
1460                 tp
->snd_cwnd 
= tp
->t_maxseg
; 
1464  * When a specific ICMP unreachable message is received and the 
1465  * connection state is SYN-SENT, drop the connection.  This behavior 
1466  * is controlled by the icmp_may_rst sysctl. 
1469 tcp_drop_syn_sent(inp
, errno
) 
1473         struct tcpcb 
*tp 
= intotcpcb(inp
); 
1475         if (tp 
&& tp
->t_state 
== TCPS_SYN_SENT
) 
1476                 tcp_drop(tp
, errno
); 
1480  * When `need fragmentation' ICMP is received, update our idea of the MSS 
1481  * based on the new value in the route.  Also nudge TCP to send something, 
1482  * since we know the packet we just sent was dropped. 
1483  * This duplicates some code in the tcp_mss() function in tcp_input.c. 
1486 tcp_mtudisc(inp
, errno
) 
1490         struct tcpcb 
*tp 
= intotcpcb(inp
); 
1492         struct rmxp_tao 
*taop
; 
1493         struct socket 
*so 
= inp
->inp_socket
; 
1497         int isipv6 
= (tp
->t_inpcb
->inp_vflag 
& INP_IPV6
) != 0; 
1503                         rt 
= tcp_rtlookup6(inp
); 
1506                 rt 
= tcp_rtlookup(inp
); 
1507                 if (!rt 
|| !rt
->rt_rmx
.rmx_mtu
) { 
1508                         tp
->t_maxopd 
= tp
->t_maxseg 
= 
1510                                 isipv6 
? tcp_v6mssdflt 
: 
1515                 taop 
= rmx_taop(rt
->rt_rmx
); 
1516                 offered 
= taop
->tao_mssopt
; 
1517                 mss 
= rt
->rt_rmx
.rmx_mtu 
- 
1520                          sizeof(struct ip6_hdr
) + sizeof(struct tcphdr
) : 
1522                          sizeof(struct tcpiphdr
) 
1529                         mss 
= min(mss
, offered
); 
1531                  * XXX - The above conditional probably violates the TCP 
1532                  * spec.  The problem is that, since we don't know the 
1533                  * other end's MSS, we are supposed to use a conservative 
1534                  * default.  But, if we do that, then MTU discovery will 
1535                  * never actually take place, because the conservative 
1536                  * default is much less than the MTUs typically seen 
1537                  * on the Internet today.  For the moment, we'll sweep 
1538                  * this under the carpet. 
1540                  * The conservative default might not actually be a problem 
1541                  * if the only case this occurs is when sending an initial 
1542                  * SYN with options and data to a host we've never talked 
1543                  * to before.  Then, they will reply with an MSS value which 
1544                  * will get recorded and the new parameters should get 
1545                  * recomputed.  For Further Study. 
1547                 if (tp
->t_maxopd 
<= mss
) 
1551                 if ((tp
->t_flags 
& (TF_REQ_TSTMP
|TF_NOOPT
)) == TF_REQ_TSTMP 
&& 
1552                     (tp
->t_flags 
& TF_RCVD_TSTMP
) == TF_RCVD_TSTMP
) 
1553                         mss 
-= TCPOLEN_TSTAMP_APPA
; 
1554                 if ((tp
->t_flags 
& (TF_REQ_CC
|TF_NOOPT
)) == TF_REQ_CC 
&& 
1555                     (tp
->t_flags 
& TF_RCVD_CC
) == TF_RCVD_CC
) 
1556                         mss 
-= TCPOLEN_CC_APPA
; 
1558                 if (so
->so_snd
.sb_hiwat 
< mss
) 
1559                         mss 
= so
->so_snd
.sb_hiwat
; 
1563                 tcpstat
.tcps_mturesent
++; 
1565                 tp
->snd_nxt 
= tp
->snd_una
; 
1571  * Look-up the routing entry to the peer of this inpcb.  If no route 
1572  * is found and it cannot be allocated the return NULL.  This routine 
1573  * is called by TCP routines that access the rmx structure and by tcp_mss 
1574  * to get the interface MTU. 
1583         ro 
= &inp
->inp_route
; 
1587         if (rt 
== NULL 
|| !(rt
->rt_flags 
& RTF_UP
) || rt
->generation_id 
!= route_generation
) { 
1588                 /* No route yet, so try to acquire one */ 
1589                 if (inp
->inp_faddr
.s_addr 
!= INADDR_ANY
) { 
1590                         ro
->ro_dst
.sa_family 
= AF_INET
; 
1591                         ro
->ro_dst
.sa_len 
= sizeof(struct sockaddr_in
); 
1592                         ((struct sockaddr_in 
*) &ro
->ro_dst
)->sin_addr 
= 
1606         struct route_in6 
*ro6
; 
1609         ro6 
= &inp
->in6p_route
; 
1611         if (rt 
== NULL 
|| !(rt
->rt_flags 
& RTF_UP
)) { 
1612                 /* No route yet, so try to acquire one */ 
1613                 if (!IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_faddr
)) { 
1614                         struct sockaddr_in6 
*dst6
; 
1616                         dst6 
= (struct sockaddr_in6 
*)&ro6
->ro_dst
; 
1617                         dst6
->sin6_family 
= AF_INET6
; 
1618                         dst6
->sin6_len 
= sizeof(*dst6
); 
1619                         dst6
->sin6_addr 
= inp
->in6p_faddr
; 
1620                         rtalloc((struct route 
*)ro6
); 
1629 /* compute ESP/AH header size for TCP, including outer IP header. */ 
1631 ipsec_hdrsiz_tcp(tp
) 
1639         struct ip6_hdr 
*ip6 
= NULL
; 
1643         if ((tp 
== NULL
) || ((inp 
= tp
->t_inpcb
) == NULL
)) 
1645         MGETHDR(m
, M_DONTWAIT
, MT_DATA
); 
1649         lck_mtx_lock(sadb_mutex
); 
1651         if ((inp
->inp_vflag 
& INP_IPV6
) != 0) { 
1652                 ip6 
= mtod(m
, struct ip6_hdr 
*); 
1653                 th 
= (struct tcphdr 
*)(ip6 
+ 1); 
1654                 m
->m_pkthdr
.len 
= m
->m_len 
= 
1655                         sizeof(struct ip6_hdr
) + sizeof(struct tcphdr
); 
1656                 tcp_fillheaders(tp
, ip6
, th
); 
1657                 hdrsiz 
= ipsec6_hdrsiz(m
, IPSEC_DIR_OUTBOUND
, inp
); 
1661         ip 
= mtod(m
, struct ip 
*); 
1662         th 
= (struct tcphdr 
*)(ip 
+ 1); 
1663         m
->m_pkthdr
.len 
= m
->m_len 
= sizeof(struct tcpiphdr
); 
1664         tcp_fillheaders(tp
, ip
, th
); 
1665         hdrsiz 
= ipsec4_hdrsiz(m
, IPSEC_DIR_OUTBOUND
, inp
); 
1667         lck_mtx_unlock(sadb_mutex
); 
1674  * Return a pointer to the cached information about the remote host. 
1675  * The cached information is stored in the protocol specific part of 
1676  * the route metrics. 
1679 tcp_gettaocache(inp
) 
1685         if ((inp
->inp_vflag 
& INP_IPV6
) != 0) 
1686                 rt 
= tcp_rtlookup6(inp
); 
1689         rt 
= tcp_rtlookup(inp
); 
1691         /* Make sure this is a host route and is up. */ 
1693             (rt
->rt_flags 
& (RTF_UP
|RTF_HOST
)) != (RTF_UP
|RTF_HOST
)) 
1696         return rmx_taop(rt
->rt_rmx
); 
1700  * Clear all the TAO cache entries, called from tcp_init. 
1703  * This routine is just an empty one, because we assume that the routing 
1704  * routing tables are initialized at the same time when TCP, so there is 
1705  * nothing in the cache left over. 
1713 tcp_lock(so
, refcount
, lr
) 
1721                 __asm__ 
volatile("mflr %0" : "=r" (lr_saved
)); 
1727                 lck_mtx_lock(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
); 
1730                 panic("tcp_lock: so=%x NO PCB! lr=%x\n", so
, lr_saved
); 
1731                 lck_mtx_lock(so
->so_proto
->pr_domain
->dom_mtx
); 
1734         if (so
->so_usecount 
< 0) 
1735                 panic("tcp_lock: so=%x so_pcb=%x lr=%x ref=%x\n", 
1736                 so
, so
->so_pcb
, lr_saved
, so
->so_usecount
); 
1740         so
->reserved3 
= (void *)lr_saved
; 
1745 tcp_unlock(so
, refcount
, lr
) 
1753                 __asm__ 
volatile("mflr %0" : "=r" (lr_saved
)); 
1758 #ifdef MORE_TCPLOCK_DEBUG 
1759         printf("tcp_unlock: so=%x sopcb=%x lock=%x ref=%x lr=%x\n",  
1760                 so
, so
->so_pcb
, ((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, so
->so_usecount
, lr_saved
); 
1765         if (so
->so_usecount 
< 0) 
1766                 panic("tcp_unlock: so=%x usecount=%x\n", so
, so
->so_usecount
);   
1767         if (so
->so_pcb 
== NULL
) { 
1768                 panic("tcp_unlock: so=%x NO PCB usecount=%x lr=%x\n", so
, so
->so_usecount
, lr_saved
); 
1769                 lck_mtx_unlock(so
->so_proto
->pr_domain
->dom_mtx
); 
1772                 lck_mtx_assert(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
1773                 lck_mtx_unlock(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
); 
1775         so
->reserved4 
= (void *)lr_saved
; 
1780 tcp_getlock(so
, locktype
) 
1784         struct inpcb 
*inp 
= sotoinpcb(so
); 
1787                 if (so
->so_usecount 
< 0) 
1788                         panic("tcp_getlock: so=%x usecount=%x\n", so
, so
->so_usecount
);  
1789                 return(inp
->inpcb_mtx
); 
1792                 panic("tcp_getlock: so=%x NULL so_pcb\n", so
); 
1793                 return (so
->so_proto
->pr_domain
->dom_mtx
);