2  * Copyright (c) 2000-2008 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 
  30  *      The Regents of the University of California.  All rights reserved. 
  32  * Redistribution and use in source and binary forms, with or without 
  33  * modification, are permitted provided that the following conditions 
  35  * 1. Redistributions of source code must retain the above copyright 
  36  *    notice, this list of conditions and the following disclaimer. 
  37  * 2. Redistributions in binary form must reproduce the above copyright 
  38  *    notice, this list of conditions and the following disclaimer in the 
  39  *    documentation and/or other materials provided with the distribution. 
  40  * 3. All advertising materials mentioning features or use of this software 
  41  *    must display the following acknowledgement: 
  42  *      This product includes software developed by the University of 
  43  *      California, Berkeley and its contributors. 
  44  * 4. Neither the name of the University nor the names of its contributors 
  45  *    may be used to endorse or promote products derived from this software 
  46  *    without specific prior written permission. 
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  60  *      @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 
  61  * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.107.2.16 2001/08/22 00:59:12 silby Exp $ 
  64  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  65  * support for mandatory and extensible security protections.  This notice 
  66  * is included in support of clause 2.2 (b) of the Apple Public License, 
  70 #include <sys/param.h> 
  71 #include <sys/systm.h> 
  72 #include <sys/kernel.h> 
  73 #include <sys/sysctl.h> 
  74 #include <sys/malloc.h> 
  76 #include <sys/proc.h>           /* for proc0 declaration */ 
  77 #include <sys/protosw.h> 
  78 #include <sys/socket.h> 
  79 #include <sys/socketvar.h> 
  80 #include <sys/syslog.h> 
  82 #include <kern/cpu_number.h>    /* before tcp_seq.h, for tcp_random18() */ 
  84 #include <machine/endian.h> 
  87 #include <net/if_types.h> 
  88 #include <net/route.h> 
  90 #include <netinet/in.h> 
  91 #include <netinet/in_systm.h> 
  92 #include <netinet/ip.h> 
  93 #include <netinet/ip_icmp.h>    /* for ICMP_BANDLIM             */    
  94 #include <netinet/in_var.h> 
  95 #include <netinet/icmp_var.h>   /* for ICMP_BANDLIM     */ 
  96 #include <netinet/in_pcb.h> 
  97 #include <netinet/ip_var.h> 
  99 #include <netinet/ip6.h> 
 100 #include <netinet/icmp6.h> 
 101 #include <netinet6/nd6.h> 
 102 #include <netinet6/ip6_var.h> 
 103 #include <netinet6/in6_pcb.h> 
 105 #include <netinet/tcp.h> 
 106 #include <netinet/tcp_fsm.h> 
 107 #include <netinet/tcp_seq.h> 
 108 #include <netinet/tcp_timer.h> 
 109 #include <netinet/tcp_var.h> 
 111 #include <netinet6/tcp6_var.h> 
 113 #include <netinet/tcpip.h> 
 115 #include <netinet/tcp_debug.h> 
 116 u_char tcp_saveipgen
[40]; /* the size must be of max ip header, now IPv6 */ 
 117 struct tcphdr tcp_savetcp
; 
 118 #endif /* TCPDEBUG */ 
 121 #include <netinet6/ipsec.h> 
 123 #include <netinet6/ipsec6.h> 
 125 #include <netkey/key.h> 
 128 #if CONFIG_MACF_NET || CONFIG_MACF_SOCKET 
 129 #include <security/mac_framework.h> 
 130 #endif /* CONFIG_MACF_NET || CONFIG_MACF_SOCKET */ 
 132 #include <sys/kdebug.h> 
 135 MALLOC_DEFINE(M_TSEGQ
, "tseg_qent", "TCP segment queue entry"); 
 138 #define DBG_LAYER_BEG           NETDBG_CODE(DBG_NETTCP, 0) 
 139 #define DBG_LAYER_END           NETDBG_CODE(DBG_NETTCP, 2) 
 140 #define DBG_FNC_TCP_INPUT       NETDBG_CODE(DBG_NETTCP, (3 << 8)) 
 141 #define DBG_FNC_TCP_NEWCONN     NETDBG_CODE(DBG_NETTCP, (7 << 8)) 
 143 static int      tcprexmtthresh 
= 2; 
 147 extern int ipsec_bypass
; 
 150 struct  tcpstat tcpstat
; 
 152 static int log_in_vain 
= 0; 
 153 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, log_in_vain
, CTLFLAG_RW
,  
 154     &log_in_vain
, 0, "Log all incoming TCP connections"); 
 156 static int blackhole 
= 0; 
 157 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, blackhole
, CTLFLAG_RW
, 
 158         &blackhole
, 0, "Do not send RST when dropping refused connections"); 
 160 int tcp_delack_enabled 
= 3; 
 161 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, delayed_ack
, CTLFLAG_RW
,  
 162     &tcp_delack_enabled
, 0,  
 163     "Delay ACK to try and piggyback it onto a data packet"); 
 165 int tcp_lq_overflow 
= 1; 
 166 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_lq_overflow
, CTLFLAG_RW
, 
 168     "Listen Queue Overflow"); 
 171 static int drop_synfin 
= 1; 
 172 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, drop_synfin
, CTLFLAG_RW
, 
 173     &drop_synfin
, 0, "Drop TCP packets with SYN+FIN set"); 
 176 SYSCTL_NODE(_net_inet_tcp
, OID_AUTO
, reass
, CTLFLAG_RW
|CTLFLAG_LOCKED
, 0, 
 177     "TCP Segment Reassembly Queue"); 
 179 __private_extern__ 
int tcp_reass_maxseg 
= 0; 
 180 SYSCTL_INT(_net_inet_tcp_reass
, OID_AUTO
, maxsegments
, CTLFLAG_RW
, 
 181     &tcp_reass_maxseg
, 0, 
 182     "Global maximum number of TCP Segments in Reassembly Queue"); 
 184 __private_extern__ 
int tcp_reass_qsize 
= 0; 
 185 SYSCTL_INT(_net_inet_tcp_reass
, OID_AUTO
, cursegments
, CTLFLAG_RD
, 
 187     "Global number of TCP Segments currently in Reassembly Queue"); 
 189 static int tcp_reass_overflows 
= 0; 
 190 SYSCTL_INT(_net_inet_tcp_reass
, OID_AUTO
, overflows
, CTLFLAG_RD
, 
 191     &tcp_reass_overflows
, 0, 
 192     "Global number of TCP Segment Reassembly Queue Overflows"); 
 195 __private_extern__ 
int slowlink_wsize 
= 8192; 
 196 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, slowlink_wsize
, CTLFLAG_RW
, 
 197         &slowlink_wsize
, 0, "Maximum advertised window size for slowlink"); 
 199 static int maxseg_unacked 
= 8; 
 200 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, maxseg_unacked
, CTLFLAG_RW
, 
 201         &maxseg_unacked
, 0, "Maximum number of outstanding segments left unacked"); 
 203 static int      tcp_do_rfc3465 
= 1; 
 204 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, rfc3465
, CTLFLAG_RW
, 
 205         &tcp_do_rfc3465
, 0, ""); 
 207 static int      tcp_do_rfc3465_lim2 
= 1; 
 208 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, rfc3465_lim2
, CTLFLAG_RW
, 
 209         &tcp_do_rfc3465_lim2
, 0, "Appropriate bytes counting w/ L=2*SMSS"); 
 211 #if CONFIG_IFEF_NOWINDOWSCALE 
 212 int tcp_obey_ifef_nowindowscale 
= 0; 
 213 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, obey_ifef_nowindowscale
, CTLFLAG_RW
, 
 214         &tcp_obey_ifef_nowindowscale
, 0, ""); 
 217 extern int tcp_TCPTV_MIN
; 
 221 struct inpcbhead tcb
; 
 222 #define tcb6    tcb  /* for KAME src sync over BSD*'s */ 
 223 struct inpcbinfo tcbinfo
; 
 225 static void      tcp_dooptions(struct tcpcb 
*, 
 226             u_char 
*, int, struct tcphdr 
*, struct tcpopt 
*, unsigned int); 
 227 static void      tcp_pulloutofband(struct socket 
*, 
 228             struct tcphdr 
*, struct mbuf 
*, int); 
 229 static int       tcp_reass(struct tcpcb 
*, struct tcphdr 
*, int *, 
 231 static void     tcp_xmit_timer(struct tcpcb 
*, int); 
 232 static inline unsigned int tcp_maxmtu(struct rtentry 
*); 
 234 static inline unsigned int tcp_maxmtu6(struct rtentry 
*); 
 237 /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ 
 239 #define ND6_HINT(tp) \ 
 241         if ((tp) && (tp)->t_inpcb && \ 
 242             ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \ 
 243             (tp)->t_inpcb->in6p_route.ro_rt) \ 
 244                 nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \ 
 250 extern u_int32_t        
*delack_bitmask
; 
 252 extern void     add_to_time_wait(struct tcpcb 
*); 
 253 extern void postevent(struct socket 
*, struct sockbuf 
*, int); 
 255 extern  void    ipfwsyslog( int level
, const char *format
,...); 
 256 extern int ChkAddressOK( __uint32_t dstaddr
, __uint32_t srcaddr 
); 
 257 extern int fw_verbose
; 
 258 __private_extern__ 
int tcp_sockthreshold
; 
 259 __private_extern__ 
int tcp_win_scale
; 
 262 #define log_in_vain_log( a ) {            \ 
 263         if ( (log_in_vain == 3 ) && (fw_verbose == 2)) {        /* Apple logging, log to ipfw.log */ \ 
 269 #define log_in_vain_log( a ) { log a; } 
 274  * Indicate whether this ack should be delayed.   
 275  * We can delay the ack if: 
 276  *  - delayed acks are enabled (set to 1) and 
 277  *      - our last ack wasn't a 0-sized window.  We never want to delay 
 278  *        the ack that opens up a 0-sized window. 
 279  *  - delayed acks are enabled (set to 2, "more compatible") and 
 280  *      - our last ack wasn't a 0-sized window. 
 281  *      - if the peer hasn't sent us a TH_PUSH data packet (this solves 3649245) 
 282  *      - the peer hasn't sent us a TH_PUSH data packet, if he did, take this as a clue that we 
 283  *        need to ACK with no delay. This helps higher level protocols who won't send 
 284  *        us more data even if the window is open because their last "segment" hasn't been ACKed 
 285  *  - delayed acks are enabled (set to 3, "streaming detection") and 
 286  *      - if we receive more than "maxseg_unacked"  full packets per second on this socket 
 287  *              - if we don't have more than "maxseg_unacked" delayed so far 
 288  *      - if those criteria aren't met, acts like "2". Allowing faster acking while browsing for example. 
 291 #define DELAY_ACK(tp) \ 
 292         (((tcp_delack_enabled == 1) && ((tp->t_flags & TF_RXWIN0SENT) == 0)) || \ 
 293          (((tcp_delack_enabled == 2) && (tp->t_flags & TF_RXWIN0SENT) == 0) && \ 
 294            ((thflags & TH_PUSH) == 0) && ((tp->t_flags & TF_DELACK) == 0)) || \ 
 295          (((tcp_delack_enabled == 3) && (tp->t_flags & TF_RXWIN0SENT) == 0) && \ 
 296            (tp->t_rcvtime == 0) && ((thflags & TH_PUSH) == 0) && \ 
 297            (((tp->t_unacksegs == 0)) || \ 
 298            ((tp->rcv_byps > (maxseg_unacked * tp->t_maxseg))  && (tp->t_unacksegs < maxseg_unacked))))) 
 300 static int tcp_dropdropablreq(struct socket 
*head
); 
 301 static void tcp_newreno_partial_ack(struct tcpcb 
*tp
, struct tcphdr 
*th
); 
 305 tcp_reass(tp
, th
, tlenp
, m
) 
 306         register struct tcpcb 
*tp
; 
 307         register struct tcphdr 
*th
; 
 312         struct tseg_qent 
*p 
= NULL
; 
 313         struct tseg_qent 
*nq
; 
 314         struct tseg_qent 
*te 
= NULL
; 
 315         struct socket 
*so 
= tp
->t_inpcb
->inp_socket
; 
 320          * Call with th==0 after become established to 
 321          * force pre-ESTABLISHED data up to user socket. 
 327          * Limit the number of segments in the reassembly queue to prevent 
 328          * holding on to too many segments (and thus running out of mbufs). 
 329          * Make sure to let the missing segment through which caused this 
 330          * queue.  Always keep one global queue entry spare to be able to 
 331          * process the missing segment. 
 333         if (th
->th_seq 
!= tp
->rcv_nxt 
&& 
 334             tcp_reass_qsize 
+ 1 >= tcp_reass_maxseg
) { 
 335                 tcp_reass_overflows
++; 
 336                 tcpstat
.tcps_rcvmemdrop
++; 
 342         /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */ 
 343         MALLOC(te
, struct tseg_qent 
*, sizeof (struct tseg_qent
), M_TSEGQ
, 
 346                 tcpstat
.tcps_rcvmemdrop
++; 
 353          * Find a segment which begins after this one does. 
 355         LIST_FOREACH(q
, &tp
->t_segq
, tqe_q
) { 
 356                 if (SEQ_GT(q
->tqe_th
->th_seq
, th
->th_seq
)) 
 362          * If there is a preceding segment, it may provide some of 
 363          * our data already.  If so, drop the data from the incoming 
 364          * segment.  If it provides all of our data, drop us. 
 368                 /* conversion to int (in i) handles seq wraparound */ 
 369                 i 
= p
->tqe_th
->th_seq 
+ p
->tqe_len 
- th
->th_seq
; 
 372                                 tcpstat
.tcps_rcvduppack
++; 
 373                                 tcpstat
.tcps_rcvdupbyte 
+= *tlenp
; 
 378                                  * Try to present any queued data 
 379                                  * at the left window edge to the user. 
 380                                  * This is needed after the 3-WHS 
 383                                 goto present
;   /* ??? */ 
 390         tcpstat
.tcps_rcvoopack
++; 
 391         tcpstat
.tcps_rcvoobyte 
+= *tlenp
; 
 394          * While we overlap succeeding segments trim them or, 
 395          * if they are completely covered, dequeue them. 
 398                 register int i 
= (th
->th_seq 
+ *tlenp
) - q
->tqe_th
->th_seq
; 
 401                 if (i 
< q
->tqe_len
) { 
 402                         q
->tqe_th
->th_seq 
+= i
; 
 408                 nq 
= LIST_NEXT(q
, tqe_q
); 
 409                 LIST_REMOVE(q
, tqe_q
); 
 416         /* Insert the new segment queue entry into place. */ 
 419         te
->tqe_len 
= *tlenp
; 
 422                 LIST_INSERT_HEAD(&tp
->t_segq
, te
, tqe_q
); 
 424                 LIST_INSERT_AFTER(p
, te
, tqe_q
); 
 429          * Present data to user, advancing rcv_nxt through 
 430          * completed sequence space. 
 432         if (!TCPS_HAVEESTABLISHED(tp
->t_state
)) 
 434         q 
= LIST_FIRST(&tp
->t_segq
); 
 435         if (!q 
|| q
->tqe_th
->th_seq 
!= tp
->rcv_nxt
) 
 438                 tp
->rcv_nxt 
+= q
->tqe_len
; 
 439                 flags 
= q
->tqe_th
->th_flags 
& TH_FIN
; 
 440                 nq 
= LIST_NEXT(q
, tqe_q
); 
 441                 LIST_REMOVE(q
, tqe_q
); 
 442                 if (so
->so_state 
& SS_CANTRCVMORE
) 
 445                         if (sbappendstream(&so
->so_rcv
, q
->tqe_m
)) 
 451         } while (q 
&& q
->tqe_th
->th_seq 
== tp
->rcv_nxt
); 
 455         if ((tp
->t_inpcb
->inp_vflag 
& INP_IPV6
) != 0) { 
 457                 KERNEL_DEBUG(DBG_LAYER_BEG
, 
 458                      ((tp
->t_inpcb
->inp_fport 
<< 16) | tp
->t_inpcb
->inp_lport
), 
 459                      (((tp
->t_inpcb
->in6p_laddr
.s6_addr16
[0] & 0xffff) << 16) | 
 460                       (tp
->t_inpcb
->in6p_faddr
.s6_addr16
[0] & 0xffff)), 
 466                 KERNEL_DEBUG(DBG_LAYER_BEG
, 
 467                      ((tp
->t_inpcb
->inp_fport 
<< 16) | tp
->t_inpcb
->inp_lport
), 
 468                      (((tp
->t_inpcb
->inp_laddr
.s_addr 
& 0xffff) << 16) | 
 469                       (tp
->t_inpcb
->inp_faddr
.s_addr 
& 0xffff)), 
 473                 sorwakeup(so
); /* done with socket lock held */ 
 479  * Reduce congestion window. 
 482 tcp_reduce_congestion_window( 
 487         win 
= min(tp
->snd_wnd
, tp
->snd_cwnd
) / 
 491         tp
->snd_ssthresh 
= win 
* tp
->t_maxseg
; 
 492         ENTER_FASTRECOVERY(tp
); 
 493         tp
->snd_recover 
= tp
->snd_max
; 
 494         tp
->t_timer
[TCPT_REXMT
] = 0; 
 496         tp
->ecn_flags 
|= TE_SENDCWR
; 
 497         tp
->snd_cwnd 
= tp
->snd_ssthresh 
+ 
 498                  tp
->t_maxseg 
* tcprexmtthresh
; 
 503  * TCP input routine, follows pages 65-76 of the 
 504  * protocol specification dated September, 1981 very closely. 
 512         register struct mbuf 
*m 
= *mp
; 
 513         struct in6_ifaddr 
*ia6
; 
 515         IP6_EXTHDR_CHECK(m
, *offp
, sizeof(struct tcphdr
), return IPPROTO_DONE
); 
 518          * draft-itojun-ipv6-tcp-to-anycast 
 519          * better place to put this in? 
 521         ia6 
= ip6_getdstifaddr(m
); 
 522         if (ia6 
&& (ia6
->ia6_flags 
& IN6_IFF_ANYCAST
)) {                 
 525                 ip6 
= mtod(m
, struct ip6_hdr 
*); 
 526                 icmp6_error(m
, ICMP6_DST_UNREACH
, ICMP6_DST_UNREACH_ADDR
, 
 527                             (caddr_t
)&ip6
->ip6_dst 
- (caddr_t
)ip6
); 
 541         register struct tcphdr 
*th
; 
 542         register struct ip 
*ip 
= NULL
; 
 543         register struct ipovly 
*ipov
; 
 544         register struct inpcb 
*inp
; 
 549         register struct tcpcb 
*tp 
= 0; 
 550         register int thflags
; 
 551         struct socket 
*so 
= 0; 
 552         int todrop
, acked
, ourfinisacked
, needoutput 
= 0; 
 553         struct in_addr laddr
; 
 555         struct in6_addr laddr6
; 
 561         struct tcpopt to
;               /* options in this segment */ 
 562         struct sockaddr_in 
*next_hop 
= NULL
; 
 566         struct m_tag 
*fwd_tag
; 
 567         u_char ip_ecn 
= IPTOS_ECN_NOTECT
; 
 568         unsigned int ifscope
; 
 571          * Record the interface where this segment arrived on; this does not 
 572          * affect normal data output (for non-detached TCP) as it provides a 
 573          * hint about which route and interface to use for sending in the 
 574          * absence of a PCB, when scoped routing (and thus source interface 
 575          * selection) are enabled. 
 577         if ((m
->m_flags 
& M_PKTHDR
) && m
->m_pkthdr
.rcvif 
!= NULL
) 
 578                 ifscope 
= m
->m_pkthdr
.rcvif
->if_index
; 
 580                 ifscope 
= IFSCOPE_NONE
; 
 582         /* Grab info from PACKET_TAG_IPFORWARD tag prepended to the chain. */ 
 583         if (!SLIST_EMPTY(&m
->m_pkthdr
.tags
)) { 
 584                 fwd_tag 
= m_tag_locate(m
, KERNEL_MODULE_TAG_ID
, 
 585                     KERNEL_TAG_TYPE_IPFORWARD
, NULL
); 
 589         if (fwd_tag 
!= NULL
) { 
 590                 struct ip_fwd_tag 
*ipfwd_tag 
= (struct ip_fwd_tag 
*)(fwd_tag
+1); 
 592                 next_hop 
= ipfwd_tag
->next_hop
; 
 593                 m_tag_delete(m
, fwd_tag
); 
 597         struct ip6_hdr 
*ip6 
= NULL
; 
 600         int rstreason
; /* For badport_bandlim accounting purposes */ 
 601         struct proc 
*proc0
=current_proc(); 
 603         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_START
,0,0,0,0,0); 
 606         isipv6 
= (mtod(m
, struct ip 
*)->ip_v 
== 6) ? 1 : 0; 
 608         bzero((char *)&to
, sizeof(to
)); 
 610         tcpstat
.tcps_rcvtotal
++; 
 616                 /* IP6_EXTHDR_CHECK() is already done at tcp6_input() */ 
 617                 ip6 
= mtod(m
, struct ip6_hdr 
*); 
 618                 tlen 
= sizeof(*ip6
) + ntohs(ip6
->ip6_plen
) - off0
; 
 619                 if (in6_cksum(m
, IPPROTO_TCP
, off0
, tlen
)) { 
 620                         tcpstat
.tcps_rcvbadsum
++; 
 623                 th 
= (struct tcphdr 
*)((caddr_t
)ip6 
+ off0
); 
 625                 KERNEL_DEBUG(DBG_LAYER_BEG
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
 626                      (((ip6
->ip6_src
.s6_addr16
[0]) << 16) | (ip6
->ip6_dst
.s6_addr16
[0])), 
 627                      th
->th_seq
, th
->th_ack
, th
->th_win
); 
 629                  * Be proactive about unspecified IPv6 address in source. 
 630                  * As we use all-zero to indicate unbounded/unconnected pcb, 
 631                  * unspecified IPv6 address can be used to confuse us. 
 633                  * Note that packets with unspecified IPv6 destination is 
 634                  * already dropped in ip6_input. 
 636                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6
->ip6_src
)) { 
 644          * Get IP and TCP header together in first mbuf. 
 645          * Note: IP leaves IP header in first mbuf. 
 647         if (off0 
> sizeof (struct ip
)) { 
 648                 ip_stripoptions(m
, (struct mbuf 
*)0); 
 649                 off0 
= sizeof(struct ip
); 
 650                 if (m
->m_pkthdr
.csum_flags 
& CSUM_TCP_SUM16
) 
 651                         m
->m_pkthdr
.csum_flags 
= 0; /* invalidate hwcksuming */ 
 654         if (m
->m_len 
< sizeof (struct tcpiphdr
)) { 
 655                 if ((m 
= m_pullup(m
, sizeof (struct tcpiphdr
))) == 0) { 
 656                         tcpstat
.tcps_rcvshort
++; 
 660         ip 
= mtod(m
, struct ip 
*); 
 661         ipov 
= (struct ipovly 
*)ip
; 
 662         th 
= (struct tcphdr 
*)((caddr_t
)ip 
+ off0
); 
 665         KERNEL_DEBUG(DBG_LAYER_BEG
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
 666                      (((ip
->ip_src
.s_addr 
& 0xffff) << 16) | (ip
->ip_dst
.s_addr 
& 0xffff)), 
 667                      th
->th_seq
, th
->th_ack
, th
->th_win
); 
 669         if (m
->m_pkthdr
.csum_flags 
& CSUM_DATA_VALID
) { 
 670                 if (m
->m_pkthdr
.csum_flags 
& CSUM_TCP_SUM16
) { 
 673                         *(uint32_t*)&b
[0] = *(uint32_t*)&ipov
->ih_x1
[0]; 
 674                         *(uint32_t*)&b
[4] = *(uint32_t*)&ipov
->ih_x1
[4]; 
 675                         *(uint8_t*)&b
[8] = *(uint8_t*)&ipov
->ih_x1
[8]; 
 677                         bzero(ipov
->ih_x1
, sizeof(ipov
->ih_x1
)); 
 678                         ipov
->ih_len 
= (u_short
)tlen
; 
 680 #if BYTE_ORDER != BIG_ENDIAN 
 684                         pseudo 
= in_cksum(m
, sizeof (struct ip
)); 
 686                         *(uint32_t*)&ipov
->ih_x1
[0] = *(uint32_t*)&b
[0]; 
 687                         *(uint32_t*)&ipov
->ih_x1
[4] = *(uint32_t*)&b
[4]; 
 688                         *(uint8_t*)&ipov
->ih_x1
[8] = *(uint8_t*)&b
[8]; 
 690                         th
->th_sum 
= in_addword(pseudo
, (m
->m_pkthdr
.csum_data 
& 0xFFFF)); 
 692                         if (m
->m_pkthdr
.csum_flags 
& CSUM_PSEUDO_HDR
) 
 693                                 th
->th_sum 
= m
->m_pkthdr
.csum_data
; 
 695                                 th
->th_sum 
= in_pseudo(ip
->ip_src
.s_addr
, 
 696                                         ip
->ip_dst
.s_addr
, htonl(m
->m_pkthdr
.csum_data 
+ 
 697                                         ip
->ip_len 
+ IPPROTO_TCP
)); 
 699                 th
->th_sum 
^= 0xffff; 
 703                  * Checksum extended TCP header and data. 
 705                 *(uint32_t*)&b
[0] = *(uint32_t*)&ipov
->ih_x1
[0]; 
 706                 *(uint32_t*)&b
[4] = *(uint32_t*)&ipov
->ih_x1
[4]; 
 707                 *(uint8_t*)&b
[8] = *(uint8_t*)&ipov
->ih_x1
[8]; 
 709                 len 
= sizeof (struct ip
) + tlen
; 
 710                 bzero(ipov
->ih_x1
, sizeof(ipov
->ih_x1
)); 
 711                 ipov
->ih_len 
= (u_short
)tlen
; 
 713 #if BYTE_ORDER != BIG_ENDIAN 
 717                 th
->th_sum 
= in_cksum(m
, len
); 
 719                 *(uint32_t*)&ipov
->ih_x1
[0] = *(uint32_t*)&b
[0]; 
 720                 *(uint32_t*)&ipov
->ih_x1
[4] = *(uint32_t*)&b
[4]; 
 721                 *(uint8_t*)&ipov
->ih_x1
[8] = *(uint8_t*)&b
[8]; 
 723                 tcp_in_cksum_stats(len
); 
 726                 tcpstat
.tcps_rcvbadsum
++; 
 730         /* Re-initialization for later version check */ 
 731         ip
->ip_v 
= IPVERSION
; 
 733         ip_ecn 
= (ip
->ip_tos 
& IPTOS_ECN_MASK
); 
 737          * Check that TCP offset makes sense, 
 738          * pull out TCP options and adjust length.              XXX 
 740         off 
= th
->th_off 
<< 2; 
 741         if (off 
< sizeof (struct tcphdr
) || off 
> tlen
) { 
 742                 tcpstat
.tcps_rcvbadoff
++; 
 745         tlen 
-= off
;    /* tlen is used instead of ti->ti_len */ 
 746         if (off 
> sizeof (struct tcphdr
)) { 
 749                         IP6_EXTHDR_CHECK(m
, off0
, off
, return); 
 750                         ip6 
= mtod(m
, struct ip6_hdr 
*); 
 751                         th 
= (struct tcphdr 
*)((caddr_t
)ip6 
+ off0
); 
 755                         if (m
->m_len 
< sizeof(struct ip
) + off
) { 
 756                                 if ((m 
= m_pullup(m
, sizeof (struct ip
) + off
)) == 0) { 
 757                                         tcpstat
.tcps_rcvshort
++; 
 760                                 ip 
= mtod(m
, struct ip 
*); 
 761                                 ipov 
= (struct ipovly 
*)ip
; 
 762                                 th 
= (struct tcphdr 
*)((caddr_t
)ip 
+ off0
); 
 765                 optlen 
= off 
- sizeof (struct tcphdr
); 
 766                 optp 
= (u_char 
*)(th 
+ 1); 
 768                  * Do quick retrieval of timestamp options ("options 
 769                  * prediction?").  If timestamp is the only option and it's 
 770                  * formatted as recommended in RFC 1323 appendix A, we 
 771                  * quickly get the values now and not bother calling 
 772                  * tcp_dooptions(), etc. 
 774                 if ((optlen 
== TCPOLEN_TSTAMP_APPA 
|| 
 775                         (optlen 
> TCPOLEN_TSTAMP_APPA 
&& 
 776                         optp
[TCPOLEN_TSTAMP_APPA
] == TCPOPT_EOL
)) && 
 777                         *(u_int32_t 
*)optp 
== htonl(TCPOPT_TSTAMP_HDR
) && 
 778                         (th
->th_flags 
& TH_SYN
) == 0) { 
 779                         to
.to_flags 
|= TOF_TS
; 
 780                         to
.to_tsval 
= ntohl(*(u_int32_t 
*)(optp 
+ 4)); 
 781                         to
.to_tsecr 
= ntohl(*(u_int32_t 
*)(optp 
+ 8)); 
 782                         optp 
= NULL
;    /* we've parsed the options */ 
 785         thflags 
= th
->th_flags
; 
 789          * If the drop_synfin option is enabled, drop all packets with 
 790          * both the SYN and FIN bits set. This prevents e.g. nmap from 
 791          * identifying the TCP/IP stack. 
 793          * This is a violation of the TCP specification. 
 795         if (drop_synfin 
&& (thflags 
& (TH_SYN
|TH_FIN
)) == (TH_SYN
|TH_FIN
)) 
 800          * Convert TCP protocol specific fields to host format. 
 803 #if BYTE_ORDER != BIG_ENDIAN 
 811          * Delay dropping TCP, IP headers, IPv6 ext headers, and TCP options, 
 812          * until after ip6_savecontrol() is called and before other functions 
 813          * which don't want those proto headers. 
 814          * Because ip6_savecontrol() is going to parse the mbuf to 
 815          * search for data to be passed up to user-land, it wants mbuf 
 816          * parameters to be unchanged. 
 818         drop_hdrlen 
= off0 
+ off
; 
 821          * Locate pcb for segment. 
 824 #if IPFIREWALL_FORWARD 
 827             && isipv6 
== 0 /* IPv6 support is not yet */ 
 831                  * Diverted. Pretend to be the destination. 
 832                  * already got one like this?  
 834                 inp 
= in_pcblookup_hash(&tcbinfo
, ip
->ip_src
, th
->th_sport
, 
 835                         ip
->ip_dst
, th
->th_dport
, 0, m
->m_pkthdr
.rcvif
); 
 838                          * No, then it's new. Try find the ambushing socket 
 840                         if (!next_hop
->sin_port
) { 
 841                                 inp 
= in_pcblookup_hash(&tcbinfo
, ip
->ip_src
, 
 842                                     th
->th_sport
, next_hop
->sin_addr
, 
 843                                     th
->th_dport
, 1, m
->m_pkthdr
.rcvif
); 
 845                                 inp 
= in_pcblookup_hash(&tcbinfo
, 
 846                                     ip
->ip_src
, th
->th_sport
, 
 848                                     ntohs(next_hop
->sin_port
), 1, 
 853 #endif  /* IPFIREWALL_FORWARD */ 
 857                 inp 
= in6_pcblookup_hash(&tcbinfo
, &ip6
->ip6_src
, th
->th_sport
, 
 858                                          &ip6
->ip6_dst
, th
->th_dport
, 1, 
 862         inp 
= in_pcblookup_hash(&tcbinfo
, ip
->ip_src
, th
->th_sport
, 
 863             ip
->ip_dst
, th
->th_dport
, 1, m
->m_pkthdr
.rcvif
); 
 867          * Use the interface scope information from the PCB for outbound 
 868          * segments.  If the PCB isn't present and if scoped routing is 
 869          * enabled, tcp_respond will use the scope of the interface where 
 870          * the segment arrived on. 
 872         if (inp 
!= NULL 
&& (inp
->inp_flags 
& INP_BOUND_IF
)) 
 873                 ifscope 
= inp
->inp_boundif
; 
 875         if (ipsec_bypass 
== 0)  { 
 878                         if (inp 
!= NULL 
&& ipsec6_in_reject_so(m
, inp
->inp_socket
)) { 
 879                                 IPSEC_STAT_INCREMENT(ipsec6stat
.in_polvio
); 
 880                                 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 0) == WNT_STOPUSING
)  
 881                                         inp 
= NULL
;     // pretend we didn't find it  
 886                         if (inp 
!= NULL 
&& ipsec4_in_reject_so(m
, inp
->inp_socket
)) { 
 887                                 IPSEC_STAT_INCREMENT(ipsecstat
.in_polvio
); 
 888                                 if (in_pcb_checkstate(inp
, WNT_RELEASE
, 0) == WNT_STOPUSING
)  
 889                                         inp 
= NULL
;     // pretend we didn't find it  
 896          * If the state is CLOSED (i.e., TCB does not exist) then 
 897          * all data in the incoming segment is discarded. 
 898          * If the TCB exists but is in CLOSED state, it is embryonic, 
 899          * but should either do a listen or a connect soon. 
 904                         char dbuf
[MAX_IPv6_STR_LEN
], sbuf
[MAX_IPv6_STR_LEN
]; 
 906                         char dbuf
[MAX_IPv4_STR_LEN
], sbuf
[MAX_IPv4_STR_LEN
]; 
 911                                 inet_ntop(AF_INET6
, &ip6
->ip6_dst
, dbuf
, sizeof(dbuf
)); 
 912                                 inet_ntop(AF_INET6
, &ip6
->ip6_src
, sbuf
, sizeof(sbuf
)); 
 916                                 inet_ntop(AF_INET
, &ip
->ip_dst
, dbuf
, sizeof(dbuf
)); 
 917                                 inet_ntop(AF_INET
, &ip
->ip_src
, sbuf
, sizeof(sbuf
)); 
 919                         switch (log_in_vain
) { 
 923                                                 "Connection attempt to TCP %s:%d from %s:%d\n", 
 924                                                 dbuf
, ntohs(th
->th_dport
), 
 926                                                 ntohs(th
->th_sport
)); 
 930                                         "Connection attempt to TCP %s:%d from %s:%d flags:0x%x\n", 
 931                                         dbuf
, ntohs(th
->th_dport
), sbuf
, 
 932                                         ntohs(th
->th_sport
), thflags
); 
 935                                 if ((thflags 
& TH_SYN
) && 
 936                                         !(m
->m_flags 
& (M_BCAST 
| M_MCAST
)) && 
 938                                         ((isipv6 
&& !IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
, &ip6
->ip6_src
)) || 
 939                                          (!isipv6 
&& ip
->ip_dst
.s_addr 
!= ip
->ip_src
.s_addr
)) 
 941                                         ip
->ip_dst
.s_addr 
!= ip
->ip_src
.s_addr
 
 944                                         log_in_vain_log((LOG_INFO
, 
 945                                                 "Stealth Mode connection attempt to TCP %s:%d from %s:%d\n", 
 946                                                 dbuf
, ntohs(th
->th_dport
), 
 948                                                 ntohs(th
->th_sport
))); 
 955                         if (m
->m_pkthdr
.rcvif 
&& m
->m_pkthdr
.rcvif
->if_type 
!= IFT_LOOP
) 
 958                                         if (thflags 
& TH_SYN
) 
 967                 rstreason 
= BANDLIM_RST_CLOSEDPORT
; 
 968                 goto dropwithresetnosock
; 
 970         so 
= inp
->inp_socket
; 
 972                 /* This case shouldn't happen  as the socket shouldn't be null 
 973                  * if inp_state isn't set to INPCB_STATE_DEAD 
 974                  * But just in case, we pretend we didn't find the socket if we hit this case 
 975                  * as this isn't cause for a panic (the socket might be leaked however)... 
 979                 printf("tcp_input: no more socket for inp=%x. This shouldn't happen\n", inp
); 
 984         tcp_lock(so
, 1, (void *)2); 
 985         if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) { 
 986                 tcp_unlock(so
, 1, (void *)2); 
 987                 inp 
= NULL
;     // pretend we didn't find it  
 993                 rstreason 
= BANDLIM_RST_CLOSEDPORT
; 
 996         if (tp
->t_state 
== TCPS_CLOSED
) 
 999         /* Unscale the window into a 32-bit value. */ 
1000         if ((thflags 
& TH_SYN
) == 0) 
1001                 tiwin 
= th
->th_win 
<< tp
->snd_scale
; 
1006         if (mac_inpcb_check_deliver(inp
, m
, AF_INET
, SOCK_STREAM
)) 
1010         /* Radar 7377561: Avoid processing packets while closing a listen socket */ 
1011         if (tp
->t_state 
== TCPS_LISTEN 
&& (so
->so_options 
& SO_ACCEPTCONN
) == 0)  
1014         if (so
->so_options 
& (SO_DEBUG
|SO_ACCEPTCONN
)) { 
1016                 if (so
->so_options 
& SO_DEBUG
) { 
1017                         ostate 
= tp
->t_state
; 
1020                                 bcopy((char *)ip6
, (char *)tcp_saveipgen
, 
1024                         bcopy((char *)ip
, (char *)tcp_saveipgen
, sizeof(*ip
)); 
1028                 if (so
->so_options 
& SO_ACCEPTCONN
) { 
1029                     register struct tcpcb 
*tp0 
= tp
; 
1032                         struct sockaddr_storage from
; 
1034                         struct inpcb 
*oinp 
= sotoinpcb(so
); 
1036                         unsigned int head_ifscope
; 
1038                         /* Get listener's bound-to-interface, if any */ 
1039                         head_ifscope 
= (inp
->inp_flags 
& INP_BOUND_IF
) ? 
1040                             inp
->inp_boundif 
: IFSCOPE_NONE
; 
1043                          * If the state is LISTEN then ignore segment if it contains an RST. 
1044                          * If the segment contains an ACK then it is bad and send a RST. 
1045                          * If it does not contain a SYN then it is not interesting; drop it. 
1046                          * If it is from this socket, drop it, it must be forged. 
1048                         if ((thflags 
& (TH_RST
|TH_ACK
|TH_SYN
)) != TH_SYN
) { 
1049                                 if (thflags 
& TH_RST
) { 
1052                                 if (thflags 
& TH_ACK
) { 
1054                                         tcpstat
.tcps_badsyn
++; 
1055                                         rstreason 
= BANDLIM_RST_OPENPORT
; 
1059                                 /* We come here if there is no SYN set */ 
1060                                 tcpstat
.tcps_badsyn
++; 
1063                         KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN 
| DBG_FUNC_START
,0,0,0,0,0); 
1064                         if (th
->th_dport 
== th
->th_sport
) { 
1067                                         if (IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
, 
1072                                         if (ip
->ip_dst
.s_addr 
== ip
->ip_src
.s_addr
) 
1076                          * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN 
1077                          * in_broadcast() should never return true on a received 
1078                          * packet with M_BCAST not set. 
1080                          * Packets with a multicast source address should also 
1083                         if (m
->m_flags 
& (M_BCAST
|M_MCAST
)) 
1087                                 if (IN6_IS_ADDR_MULTICAST(&ip6
->ip6_dst
) || 
1088                                         IN6_IS_ADDR_MULTICAST(&ip6
->ip6_src
)) 
1092                         if (IN_MULTICAST(ntohl(ip
->ip_dst
.s_addr
)) || 
1093                                 IN_MULTICAST(ntohl(ip
->ip_src
.s_addr
)) || 
1094                                 ip
->ip_src
.s_addr 
== htonl(INADDR_BROADCAST
) || 
1095                                 in_broadcast(ip
->ip_dst
, m
->m_pkthdr
.rcvif
)) 
1101                          * If deprecated address is forbidden, 
1102                          * we do not accept SYN to deprecated interface 
1103                          * address to prevent any new inbound connection from 
1104                          * getting established. 
1105                          * When we do not accept SYN, we send a TCP RST, 
1106                          * with deprecated source address (instead of dropping 
1107                          * it).  We compromise it as it is much better for peer 
1108                          * to send a RST, and RST will be the final packet 
1111                          * If we do not forbid deprecated addresses, we accept 
1112                          * the SYN packet.  RFC2462 does not suggest dropping 
1114                          * If we decipher RFC2462 5.5.4, it says like this: 
1115                          * 1. use of deprecated addr with existing 
1116                          *    communication is okay - "SHOULD continue to be 
1118                          * 2. use of it with new communication: 
1119                          *   (2a) "SHOULD NOT be used if alternate address 
1120                          *        with sufficient scope is available" 
1121                          *   (2b) nothing mentioned otherwise. 
1122                          * Here we fall into (2b) case as we have no choice in 
1123                          * our source address selection - we must obey the peer. 
1125                          * The wording in RFC2462 is confusing, and there are 
1126                          * multiple description text for deprecated address 
1127                          * handling - worse, they are not exactly the same. 
1128                          * I believe 5.5.4 is the best one, so we follow 5.5.4. 
1130                         if (isipv6 
&& !ip6_use_deprecated
) { 
1131                                 struct in6_ifaddr 
*ia6
; 
1133                                 if ((ia6 
= ip6_getdstifaddr(m
)) && 
1134                                     (ia6
->ia6_flags 
& IN6_IFF_DEPRECATED
)) { 
1136                                         rstreason 
= BANDLIM_RST_OPENPORT
; 
1144                                         struct sockaddr_in6     
*sin6 
= (struct sockaddr_in6
*)&from
; 
1146                                         sin6
->sin6_len 
= sizeof(*sin6
); 
1147                                         sin6
->sin6_family 
= AF_INET6
; 
1148                                         sin6
->sin6_port 
= th
->th_sport
; 
1149                                         sin6
->sin6_flowinfo 
= 0; 
1150                                         sin6
->sin6_addr 
= ip6
->ip6_src
; 
1151                                         sin6
->sin6_scope_id 
= 0; 
1156                                         struct sockaddr_in 
*sin 
= (struct sockaddr_in
*)&from
; 
1158                                         sin
->sin_len 
= sizeof(*sin
); 
1159                                         sin
->sin_family 
= AF_INET
; 
1160                                         sin
->sin_port 
= th
->th_sport
; 
1161                                         sin
->sin_addr 
= ip
->ip_src
; 
1163                                 so2 
= sonewconn(so
, 0, (struct sockaddr
*)&from
); 
1165                                 so2 
= sonewconn(so
, 0, NULL
); 
1168                                 tcpstat
.tcps_listendrop
++; 
1169                                 if (tcp_dropdropablreq(so
)) { 
1171                                                 so2 
= sonewconn(so
, 0, (struct sockaddr
*)&from
); 
1173                                                 so2 
= sonewconn(so
, 0, NULL
); 
1179                         /* Point "inp" and "tp" in tandem to new socket */ 
1180                         inp 
= (struct inpcb 
*)so2
->so_pcb
; 
1181                         tp 
= intotcpcb(inp
); 
1184                         tcp_unlock(so
, 0, 0); /* Unlock but keep a reference on listener for now */ 
1189                          * Mark socket as temporary until we're 
1190                          * committed to keeping it.  The code at 
1191                          * ``drop'' and ``dropwithreset'' check the 
1192                          * flag dropsocket to see if the temporary 
1193                          * socket created here should be discarded. 
1194                          * We mark the socket as discardable until 
1195                          * we're committed to it below in TCPS_LISTEN. 
1196                          * There are some error conditions in which we 
1197                          * have to drop the temporary socket. 
1201                          * Inherit INP_BOUND_IF from listener; testing if 
1202                          * head_ifscope is non-zero is sufficient, since it 
1203                          * can only be set to a non-zero value earlier if 
1204                          * the listener has such a flag set. 
1207                         if (head_ifscope 
!= IFSCOPE_NONE 
&& !isipv6
) { 
1209                         if (head_ifscope 
!= IFSCOPE_NONE
) { 
1211                                 inp
->inp_flags 
|= INP_BOUND_IF
; 
1212                                 inp
->inp_boundif 
= head_ifscope
; 
1216                                 inp
->in6p_laddr 
= ip6
->ip6_dst
; 
1218                                 inp
->inp_vflag 
&= ~INP_IPV6
; 
1219                                 inp
->inp_vflag 
|= INP_IPV4
; 
1221                                 inp
->inp_laddr 
= ip
->ip_dst
; 
1225                         inp
->inp_lport 
= th
->th_dport
; 
1226                         if (in_pcbinshash(inp
, 0) != 0) { 
1228                                  * Undo the assignments above if we failed to 
1229                                  * put the PCB on the hash lists. 
1233                                         inp
->in6p_laddr 
= in6addr_any
; 
1236                                 inp
->inp_laddr
.s_addr 
= INADDR_ANY
; 
1238                                 tcp_lock(oso
, 0, 0);    /* release ref on parent */ 
1239                                 tcp_unlock(oso
, 1, 0); 
1245                                  * Inherit socket options from the listening 
1247                                  * Note that in6p_inputopts are not (even 
1248                                  * should not be) copied, since it stores 
1249                                  * previously received options and is used to 
1250                                  * detect if each new option is different than 
1251                                  * the previous one and hence should be passed 
1253                                  * If we copied in6p_inputopts, a user would 
1254                                  * not be able to receive options just after 
1255                                  * calling the accept system call. 
1258                                         oinp
->inp_flags 
& INP_CONTROLOPTS
; 
1259                                 if (oinp
->in6p_outputopts
) 
1260                                         inp
->in6p_outputopts 
= 
1261                                                 ip6_copypktopts(oinp
->in6p_outputopts
, 
1265                         inp
->inp_options 
= ip_srcroute(); 
1266                         tcp_lock(oso
, 0, 0); 
1268                         /* copy old policy into new socket's */ 
1269                         if (sotoinpcb(oso
)->inp_sp
) 
1272                                 /* Is it a security hole here to silently fail to copy the policy? */ 
1273                                 if (inp
->inp_sp 
!= NULL
) 
1274                                         error 
= ipsec_init_policy(so
, &inp
->inp_sp
); 
1275                                 if (error 
!= 0 || ipsec_copy_policy(sotoinpcb(oso
)->inp_sp
, inp
->inp_sp
)) 
1276                                         printf("tcp_input: could not copy policy\n"); 
1279                         /* inherit states from the listener */ 
1280                         tp
->t_state 
= TCPS_LISTEN
; 
1281                         tp
->t_flags 
|= tp0
->t_flags 
& (TF_NOPUSH
|TF_NOOPT
|TF_NODELAY
); 
1282                         tp
->t_keepinit 
= tp0
->t_keepinit
; 
1283                         tp
->t_inpcb
->inp_ip_ttl 
= tp0
->t_inpcb
->inp_ip_ttl
; 
1285                         /* now drop the reference on the listener */ 
1286                         tcp_unlock(oso
, 1, 0); 
1288                         /* Compute proper scaling value from buffer space */ 
1289                         if (inp
->inp_pcbinfo
->ipi_count 
< tcp_sockthreshold
) { 
1290                                 tp
->request_r_scale 
= max(tcp_win_scale
, tp
->request_r_scale
); 
1291                                 so
->so_rcv
.sb_hiwat 
= imin(TCP_MAXWIN 
<< tp
->request_r_scale
, (sb_max 
/ (MSIZE
+MCLBYTES
)) * MCLBYTES
);   
1294                                 while (tp
->request_r_scale 
< TCP_MAX_WINSHIFT 
&& 
1295                                 TCP_MAXWIN 
<< tp
->request_r_scale 
< 
1296                                 so
->so_rcv
.sb_hiwat
) 
1297                                         tp
->request_r_scale
++; 
1300                         KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN 
| DBG_FUNC_END
,0,0,0,0,0); 
1304         lck_mtx_assert(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
1308          * This is the second part of the MSS DoS prevention code (after 
1309          * minmss on the sending side) and it deals with too many too small 
1310          * tcp packets in a too short timeframe (1 second). 
1312          * For every full second we count the number of received packets 
1313          * and bytes. If we get a lot of packets per second for this connection 
1314          * (tcp_minmssoverload) we take a closer look at it and compute the 
1315          * average packet size for the past second. If that is less than 
1316          * tcp_minmss we get too many packets with very small payload which 
1317          * is not good and burdens our system (and every packet generates 
1318          * a wakeup to the process connected to our socket). We can reasonable 
1319          * expect this to be small packet DoS attack to exhaust our CPU 
1322          * Care has to be taken for the minimum packet overload value. This 
1323          * value defines the minimum number of packets per second before we 
1324          * start to worry. This must not be too low to avoid killing for 
1325          * example interactive connections with many small packets like 
1328          * Setting either tcp_minmssoverload or tcp_minmss to "0" disables 
1331          * Account for packet if payload packet, skip over ACK, etc. 
1333          * The packet per second count is done all the time and is also used 
1334          * by "DELAY_ACK" to detect streaming situations. 
1337         if (tp
->t_state 
== TCPS_ESTABLISHED 
&& tlen 
> 0) { 
1338                 if (tp
->rcv_reset 
> tcp_now
) { 
1340                         tp
->rcv_byps 
+= tlen 
+ off
; 
1341                         if (tp
->rcv_byps 
> tp
->rcv_maxbyps
) 
1342                                 tp
->rcv_maxbyps 
= tp
->rcv_byps
; 
1344                  * Setting either tcp_minmssoverload or tcp_minmss to "0" disables 
1347                         if (tcp_minmss 
&& tcp_minmssoverload 
&& tp
->rcv_pps 
> tcp_minmssoverload
) { 
1348                                 if ((tp
->rcv_byps 
/ tp
->rcv_pps
) < tcp_minmss
) { 
1349                                         char    ipstrbuf
[MAX_IPv6_STR_LEN
]; 
1350                                         printf("too many small tcp packets from " 
1351                                                "%s:%u, av. %ubyte/packet, " 
1352                                                "dropping connection\n", 
1355                                                 inet_ntop(AF_INET6
, &inp
->in6p_faddr
, ipstrbuf
, 
1358                                                 inet_ntop(AF_INET
, &inp
->inp_faddr
, ipstrbuf
, 
1361                                                 tp
->rcv_byps 
/ tp
->rcv_pps
); 
1362                                         tp 
= tcp_drop(tp
, ECONNRESET
); 
1363 /*                                      tcpstat.tcps_minmssdrops++; */ 
1368                         tp
->rcv_reset 
= tcp_now 
+ TCP_RETRANSHZ
; 
1370                         tp
->rcv_byps 
= tlen 
+ off
; 
1375         if (so
->so_traffic_mgt_flags 
& TRAFFIC_MGT_SO_BG_REGULATE
) { 
1376                 tcpstat
.tcps_bg_rcvtotal
++; 
1378                  /* Take snapshots of pkts recv; 
1379                   * tcpcb should have been initialized to 0 when allocated,  
1380                   * so if 0 then this is the first time we're doing this 
1382                 if (!tp
->tot_recv_snapshot
) { 
1383                          tp
->tot_recv_snapshot 
= tcpstat
.tcps_rcvtotal
; 
1385                 if (!tp
->bg_recv_snapshot
) { 
1386                          tp
->bg_recv_snapshot 
= tcpstat
.tcps_bg_rcvtotal
; 
1389 #endif /* TRAFFIC_MGT */ 
1392            Explicit Congestion Notification - Flag that we need to send ECT if 
1393                 + The IP Congestion experienced flag was set. 
1394                 + Socket is in established state 
1395                 + We negotiated ECN in the TCP setup 
1396                 + This isn't a pure ack (tlen > 0) 
1397                 + The data is in the valid window 
1399                 TE_SENDECE will be cleared when we receive a packet with TH_CWR set. 
1401         if (ip_ecn 
== IPTOS_ECN_CE 
&& tp
->t_state 
== TCPS_ESTABLISHED 
&& 
1402                 (tp
->ecn_flags 
& (TE_SETUPSENT 
| TE_SETUPRECEIVED
)) == 
1403                  (TE_SETUPSENT 
| TE_SETUPRECEIVED
) && tlen 
> 0 && 
1404                 SEQ_GEQ(th
->th_seq
, tp
->last_ack_sent
) && 
1405                 SEQ_LT(th
->th_seq
, tp
->last_ack_sent 
+ tp
->rcv_wnd
)) { 
1406                 tp
->ecn_flags 
|= TE_SENDECE
; 
1410            Clear TE_SENDECE if TH_CWR is set. This is harmless, so we don't 
1411            bother doing extensive checks for state and whatnot. 
1413         if ((thflags 
& TH_CWR
) == TH_CWR
) { 
1414                 tp
->ecn_flags 
&= ~TE_SENDECE
; 
1418          * Segment received on connection. 
1419          * Reset idle time and keep-alive timer. 
1422         if (TCPS_HAVEESTABLISHED(tp
->t_state
)) 
1423                 tp
->t_timer
[TCPT_KEEP
] = TCP_KEEPIDLE(tp
); 
1426          * Process options if not in LISTEN state, 
1427          * else do it below (after getting remote address). 
1429         if (tp
->t_state 
!= TCPS_LISTEN 
&& optp
) 
1430                 tcp_dooptions(tp
, optp
, optlen
, th
, &to
, ifscope
); 
1432         if (tp
->t_state 
== TCPS_SYN_SENT 
&& (thflags 
& TH_SYN
)) { 
1433                 if (to
.to_flags 
& TOF_SCALE
) { 
1434                         tp
->t_flags 
|= TF_RCVD_SCALE
; 
1435                         tp
->requested_s_scale 
= to
.to_requested_s_scale
; 
1436                         tp
->snd_wnd 
= th
->th_win 
<< tp
->snd_scale
; 
1437                         tiwin 
= tp
->snd_wnd
; 
1439                 if (to
.to_flags 
& TOF_TS
) { 
1440                         tp
->t_flags 
|= TF_RCVD_TSTMP
; 
1441                         tp
->ts_recent 
= to
.to_tsval
; 
1442                         tp
->ts_recent_age 
= tcp_now
; 
1444                 if (to
.to_flags 
& TOF_MSS
) 
1445                         tcp_mss(tp
, to
.to_mss
, ifscope
); 
1446                 if (tp
->sack_enable
) { 
1447                         if (!(to
.to_flags 
& TOF_SACK
)) 
1448                                 tp
->sack_enable 
= 0; 
1450                                 tp
->t_flags 
|= TF_SACK_PERMIT
; 
1455          * Header prediction: check for the two common cases 
1456          * of a uni-directional data xfer.  If the packet has 
1457          * no control flags, is in-sequence, the window didn't 
1458          * change and we're not retransmitting, it's a 
1459          * candidate.  If the length is zero and the ack moved 
1460          * forward, we're the sender side of the xfer.  Just 
1461          * free the data acked & wake any higher level process 
1462          * that was blocked waiting for space.  If the length 
1463          * is non-zero and the ack didn't move, we're the 
1464          * receiver side.  If we're getting packets in-order 
1465          * (the reassembly queue is empty), add the data to 
1466          * the socket buffer and note that we need a delayed ack. 
1467          * Make sure that the hidden state-flags are also off. 
1468          * Since we check for TCPS_ESTABLISHED above, it can only 
1471         if (tp
->t_state 
== TCPS_ESTABLISHED 
&& 
1472             (thflags 
& (TH_SYN
|TH_FIN
|TH_RST
|TH_URG
|TH_ACK
|TH_ECE
)) == TH_ACK 
&& 
1473             ((tp
->t_flags 
& (TF_NEEDSYN
|TF_NEEDFIN
)) == 0) && 
1474             ((to
.to_flags 
& TOF_TS
) == 0 || 
1475              TSTMP_GEQ(to
.to_tsval
, tp
->ts_recent
)) && 
1476             th
->th_seq 
== tp
->rcv_nxt 
&& 
1477             tiwin 
&& tiwin 
== tp
->snd_wnd 
&& 
1478             tp
->snd_nxt 
== tp
->snd_max
) { 
1481                  * If last ACK falls within this segment's sequence numbers, 
1482                  * record the timestamp. 
1483                  * NOTE that the test is modified according to the latest 
1484                  * proposal of the tcplw@cray.com list (Braden 1993/04/26). 
1486                 if ((to
.to_flags 
& TOF_TS
) != 0 && 
1487                    SEQ_LEQ(th
->th_seq
, tp
->last_ack_sent
)) { 
1488                         tp
->ts_recent_age 
= tcp_now
; 
1489                         tp
->ts_recent 
= to
.to_tsval
; 
1492                 /* Force acknowledgment if we received a FIN */ 
1494                 if (thflags 
& TH_FIN
) 
1495                         tp
->t_flags 
|= TF_ACKNOW
; 
1498                         if (SEQ_GT(th
->th_ack
, tp
->snd_una
) && 
1499                             SEQ_LEQ(th
->th_ack
, tp
->snd_max
) && 
1500                             tp
->snd_cwnd 
>= tp
->snd_ssthresh 
&& 
1501                             ((!tcp_do_newreno 
&& !tp
->sack_enable 
&& 
1502                               tp
->t_dupacks 
< tcprexmtthresh
) || 
1503                              ((tcp_do_newreno 
|| tp
->sack_enable
) && 
1504                               !IN_FASTRECOVERY(tp
) && to
.to_nsacks 
== 0 && 
1505                               TAILQ_EMPTY(&tp
->snd_holes
)))) { 
1507                                  * this is a pure ack for outstanding data. 
1509                                 ++tcpstat
.tcps_predack
; 
1511                                  * "bad retransmit" recovery 
1513                                 if (tp
->t_rxtshift 
== 1 && 
1514                                     tcp_now 
< tp
->t_badrxtwin
) { 
1515                                         ++tcpstat
.tcps_sndrexmitbad
; 
1516                                         tp
->snd_cwnd 
= tp
->snd_cwnd_prev
; 
1518                                             tp
->snd_ssthresh_prev
; 
1519                                         tp
->snd_recover 
= tp
->snd_recover_prev
; 
1520                                         if (tp
->t_flags 
& TF_WASFRECOVERY
) 
1521                                             ENTER_FASTRECOVERY(tp
); 
1522                                         tp
->snd_nxt 
= tp
->snd_max
; 
1523                                         tp
->t_badrxtwin 
= 0; 
1526                                  * Recalculate the transmit timer / rtt. 
1528                                  * Some boxes send broken timestamp replies 
1529                                  * during the SYN+ACK phase, ignore 
1530                                  * timestamps of 0 or we could calculate a 
1531                                  * huge RTT and blow up the retransmit timer. 
1533                                 if (((to
.to_flags 
& TOF_TS
) != 0) && (to
.to_tsecr 
!= 0)) { /* Makes sure we already have a TS */ 
1534                                         if (!tp
->t_rttlow 
|| 
1535                                             tp
->t_rttlow 
> tcp_now 
- to
.to_tsecr
) 
1536                                                 tp
->t_rttlow 
= tcp_now 
- to
.to_tsecr
; 
1538                                             tcp_now 
- to
.to_tsecr
); 
1539                                 } else if (tp
->t_rtttime 
&& 
1540                                             SEQ_GT(th
->th_ack
, tp
->t_rtseq
)) { 
1541                                         if (!tp
->t_rttlow 
|| 
1542                                             tp
->t_rttlow 
> tcp_now 
- tp
->t_rtttime
) 
1543                                                 tp
->t_rttlow 
= tcp_now 
- tp
->t_rtttime
; 
1544                                         tcp_xmit_timer(tp
, tp
->t_rtttime
); 
1546                                 acked 
= th
->th_ack 
- tp
->snd_una
; 
1547                                 tcpstat
.tcps_rcvackpack
++; 
1548                                 tcpstat
.tcps_rcvackbyte 
+= acked
; 
1550                                  * Grow the congestion window, if the 
1551                                  * connection is cwnd bound. 
1553                                 if (tp
->snd_cwnd 
< tp
->snd_wnd
) { 
1554                                         tp
->t_bytes_acked 
+= acked
; 
1555                                         if (tp
->t_bytes_acked 
> tp
->snd_cwnd
) { 
1556                                                 tp
->t_bytes_acked 
-= tp
->snd_cwnd
; 
1557                                                 tp
->snd_cwnd 
+= tp
->t_maxseg
; 
1560                                 sbdrop(&so
->so_snd
, acked
); 
1561                                 if (SEQ_GT(tp
->snd_una
, tp
->snd_recover
) && 
1562                                     SEQ_LEQ(th
->th_ack
, tp
->snd_recover
)) 
1563                                         tp
->snd_recover 
= th
->th_ack 
- 1; 
1564                                 tp
->snd_una 
= th
->th_ack
; 
1566                                  * pull snd_wl2 up to prevent seq wrap relative 
1569                                 tp
->snd_wl2 
= th
->th_ack
; 
1572                                 ND6_HINT(tp
); /* some progress has been done */ 
1575                                  * If all outstanding data are acked, stop 
1576                                  * retransmit timer, otherwise restart timer 
1577                                  * using current (possibly backed-off) value. 
1578                                  * If process is waiting for space, 
1579                                  * wakeup/selwakeup/signal.  If data 
1580                                  * are ready to send, let tcp_output 
1581                                  * decide between more output or persist. 
1583                                 if (tp
->snd_una 
== tp
->snd_max
) 
1584                                         tp
->t_timer
[TCPT_REXMT
] = 0; 
1585                                 else if (tp
->t_timer
[TCPT_PERSIST
] == 0) 
1586                                         tp
->t_timer
[TCPT_REXMT
] = tp
->t_rxtcur
; 
1588                                 sowwakeup(so
); /* has to be done with socket lock held */ 
1589                                 if ((so
->so_snd
.sb_cc
) || (tp
->t_flags 
& TF_ACKNOW
)) { 
1590                                         tp
->t_unacksegs 
= 0; 
1591                                         (void) tcp_output(tp
); 
1593                                 tcp_unlock(so
, 1, 0); 
1594                                 KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
1597                 } else if (th
->th_ack 
== tp
->snd_una 
&& 
1598                     LIST_EMPTY(&tp
->t_segq
) && 
1599                     tlen 
<= tcp_sbspace(tp
)) { 
1601                          * this is a pure, in-sequence data packet 
1602                          * with nothing on the reassembly queue and 
1603                          * we have enough buffer space to take it. 
1605                         /* Clean receiver SACK report if present */ 
1606                         if (tp
->sack_enable 
&& tp
->rcv_numsacks
) 
1607                                 tcp_clean_sackreport(tp
); 
1608                         ++tcpstat
.tcps_preddat
; 
1609                         tp
->rcv_nxt 
+= tlen
; 
1611                          * Pull snd_wl1 up to prevent seq wrap relative to 
1614                         tp
->snd_wl1 
= th
->th_seq
; 
1616                          * Pull rcv_up up to prevent seq wrap relative to 
1619                         tp
->rcv_up 
= tp
->rcv_nxt
; 
1620                         tcpstat
.tcps_rcvpack
++; 
1621                         tcpstat
.tcps_rcvbyte 
+= tlen
; 
1622                         ND6_HINT(tp
);   /* some progress has been done */ 
1624                          * Add data to socket buffer. 
1626                         m_adj(m
, drop_hdrlen
);  /* delayed header drop */ 
1627                         if (sbappendstream(&so
->so_rcv
, m
)) 
1631                                 KERNEL_DEBUG(DBG_LAYER_END
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
1632                                         (((ip6
->ip6_src
.s6_addr16
[0]) << 16) | (ip6
->ip6_dst
.s6_addr16
[0])), 
1633                                         th
->th_seq
, th
->th_ack
, th
->th_win
);  
1638                                 KERNEL_DEBUG(DBG_LAYER_END
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
1639                                         (((ip
->ip_src
.s_addr 
& 0xffff) << 16) | (ip
->ip_dst
.s_addr 
& 0xffff)), 
1640                                         th
->th_seq
, th
->th_ack
, th
->th_win
);  
1642                         if (DELAY_ACK(tp
))  { 
1643                                 tp
->t_flags 
|= TF_DELACK
; 
1646                                 tp
->t_unacksegs 
= 0; 
1647                                 tp
->t_flags 
|= TF_ACKNOW
; 
1650                         tcp_unlock(so
, 1, 0); 
1651                         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
1657          * Calculate amount of space in receive window, 
1658          * and then do TCP input processing. 
1659          * Receive window is amount of space in rcv queue, 
1660          * but not less than advertised window. 
1663         lck_mtx_assert(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
1667         win 
= tcp_sbspace(tp
); 
1671         else {  /* clip rcv window to 4K for modems */ 
1672                 if (tp
->t_flags 
& TF_SLOWLINK 
&& slowlink_wsize 
> 0) 
1673                         win 
= min(win
, slowlink_wsize
); 
1675         tp
->rcv_wnd 
= imax(win
, (int)(tp
->rcv_adv 
- tp
->rcv_nxt
)); 
1678         switch (tp
->t_state
) { 
1681          * Initialize tp->rcv_nxt, and tp->irs, select an initial 
1682          * tp->iss, and send a segment: 
1683          *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 
1684          * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. 
1685          * Fill in remote peer address fields if not previously specified. 
1686          * Enter SYN_RECEIVED state, and process any other fields of this 
1687          * segment in this state. 
1690                 register struct sockaddr_in 
*sin
; 
1692                 register struct sockaddr_in6 
*sin6
; 
1696                 lck_mtx_assert(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
1700                         MALLOC(sin6
, struct sockaddr_in6 
*, sizeof *sin6
, 
1701                                M_SONAME
, M_NOWAIT
); 
1704                         bzero(sin6
, sizeof(*sin6
)); 
1705                         sin6
->sin6_family 
= AF_INET6
; 
1706                         sin6
->sin6_len 
= sizeof(*sin6
); 
1707                         sin6
->sin6_addr 
= ip6
->ip6_src
; 
1708                         sin6
->sin6_port 
= th
->th_sport
; 
1709                         laddr6 
= inp
->in6p_laddr
; 
1710                         if (IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
)) 
1711                                 inp
->in6p_laddr 
= ip6
->ip6_dst
; 
1712                         if (in6_pcbconnect(inp
, (struct sockaddr 
*)sin6
, 
1714                                 inp
->in6p_laddr 
= laddr6
; 
1715                                 FREE(sin6
, M_SONAME
); 
1718                         FREE(sin6
, M_SONAME
); 
1723                         lck_mtx_assert(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
1725                         MALLOC(sin
, struct sockaddr_in 
*, sizeof *sin
, M_SONAME
, 
1729                         sin
->sin_family 
= AF_INET
; 
1730                         sin
->sin_len 
= sizeof(*sin
); 
1731                         sin
->sin_addr 
= ip
->ip_src
; 
1732                         sin
->sin_port 
= th
->th_sport
; 
1733                         bzero((caddr_t
)sin
->sin_zero
, sizeof(sin
->sin_zero
)); 
1734                         laddr 
= inp
->inp_laddr
; 
1735                         if (inp
->inp_laddr
.s_addr 
== INADDR_ANY
) 
1736                                 inp
->inp_laddr 
= ip
->ip_dst
; 
1737                         if (in_pcbconnect(inp
, (struct sockaddr 
*)sin
, proc0
)) { 
1738                                 inp
->inp_laddr 
= laddr
; 
1739                                 FREE(sin
, M_SONAME
); 
1742                         FREE(sin
, M_SONAME
); 
1745                 tcp_dooptions(tp
, optp
, optlen
, th
, &to
, ifscope
); 
1747                 if (tp
->sack_enable
) { 
1748                         if (!(to
.to_flags 
& TOF_SACK
)) 
1749                                 tp
->sack_enable 
= 0; 
1751                                 tp
->t_flags 
|= TF_SACK_PERMIT
; 
1757                         tp
->iss 
= tcp_new_isn(tp
); 
1759                 tp
->irs 
= th
->th_seq
; 
1760                 tcp_sendseqinit(tp
); 
1762                 tp
->snd_recover 
= tp
->snd_una
; 
1764                  * Initialization of the tcpcb for transaction; 
1765                  *   set SND.WND = SEG.WND, 
1766                  *   initialize CCsend and CCrecv. 
1768                 tp
->snd_wnd 
= tiwin
;    /* initial send-window */ 
1769                 tp
->t_flags 
|= TF_ACKNOW
; 
1770                 tp
->t_unacksegs 
= 0; 
1771                 tp
->t_state 
= TCPS_SYN_RECEIVED
; 
1772                 tp
->t_timer
[TCPT_KEEP
] = tp
->t_keepinit 
? tp
->t_keepinit 
: tcp_keepinit
; 
1773                 dropsocket 
= 0;         /* committed to socket */ 
1774                 tcpstat
.tcps_accepts
++; 
1775                 if ((thflags 
& (TH_ECE 
| TH_CWR
)) == (TH_ECE 
| TH_CWR
)) { 
1777                         tp
->ecn_flags 
|= (TE_SETUPRECEIVED 
| TE_SENDIPECT
); 
1779 #if CONFIG_IFEF_NOWINDOWSCALE 
1780                 if (tcp_obey_ifef_nowindowscale 
&& m
->m_pkthdr
.rcvif 
!= NULL 
&& 
1781                     (m
->m_pkthdr
.rcvif
->if_eflags 
& IFEF_NOWINDOWSCALE
)) { 
1782                         /* Window scaling is not enabled on this interface */ 
1783                         tp
->t_flags 
&= ~TF_REQ_SCALE
; 
1790          * If the state is SYN_RECEIVED: 
1791          *      if seg contains an ACK, but not for our SYN/ACK, send a RST. 
1793         case TCPS_SYN_RECEIVED
: 
1794                 if ((thflags 
& TH_ACK
) && 
1795                     (SEQ_LEQ(th
->th_ack
, tp
->snd_una
) || 
1796                      SEQ_GT(th
->th_ack
, tp
->snd_max
))) { 
1797                                 rstreason 
= BANDLIM_RST_OPENPORT
; 
1803          * If the state is SYN_SENT: 
1804          *      if seg contains an ACK, but not for our SYN, drop the input. 
1805          *      if seg contains a RST, then drop the connection. 
1806          *      if seg does not contain SYN, then drop it. 
1807          * Otherwise this is an acceptable SYN segment 
1808          *      initialize tp->rcv_nxt and tp->irs 
1809          *      if seg contains ack then advance tp->snd_una 
1810          *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state 
1811          *      arrange for segment to be acked (eventually) 
1812          *      continue processing rest of data/controls, beginning with URG 
1815                 if ((thflags 
& TH_ACK
) && 
1816                     (SEQ_LEQ(th
->th_ack
, tp
->iss
) || 
1817                      SEQ_GT(th
->th_ack
, tp
->snd_max
))) { 
1818                         rstreason 
= BANDLIM_UNLIMITED
; 
1821                 if (thflags 
& TH_RST
) { 
1822                         if ((thflags 
& TH_ACK
) != 0) { 
1823                                 tp 
= tcp_drop(tp
, ECONNREFUSED
); 
1824                                 postevent(so
, 0, EV_RESET
); 
1828                 if ((thflags 
& TH_SYN
) == 0) 
1830                 tp
->snd_wnd 
= th
->th_win
;       /* initial send window */ 
1832                 tp
->irs 
= th
->th_seq
; 
1834                 if (thflags 
& TH_ACK
) { 
1835                         tcpstat
.tcps_connects
++; 
1837                         if ((thflags 
& (TH_ECE 
| TH_CWR
)) == (TH_ECE
)) { 
1838                                 /* ECN-setup SYN-ACK */ 
1839                                 tp
->ecn_flags 
|= TE_SETUPRECEIVED
; 
1842                                 /* non-ECN-setup SYN-ACK */ 
1843                                 tp
->ecn_flags 
&= ~TE_SENDIPECT
; 
1846 #if CONFIG_MACF_NET && CONFIG_MACF_SOCKET 
1847                         /* XXXMAC: recursive lock: SOCK_LOCK(so); */ 
1848                         mac_socketpeer_label_associate_mbuf(m
, so
); 
1849                         /* XXXMAC: SOCK_UNLOCK(so); */ 
1851                         /* Do window scaling on this connection? */ 
1852                         if ((tp
->t_flags 
& (TF_RCVD_SCALE
|TF_REQ_SCALE
)) == 
1853                                 (TF_RCVD_SCALE
|TF_REQ_SCALE
)) { 
1854                                 tp
->snd_scale 
= tp
->requested_s_scale
; 
1855                                 tp
->rcv_scale 
= tp
->request_r_scale
; 
1857                         tp
->rcv_adv 
+= tp
->rcv_wnd
; 
1858                         tp
->snd_una
++;          /* SYN is acked */ 
1860                          * If there's data, delay ACK; if there's also a FIN 
1861                          * ACKNOW will be turned on later. 
1863                         if (DELAY_ACK(tp
) && tlen 
!= 0) { 
1864                                 tp
->t_flags 
|= TF_DELACK
; 
1868                                 tp
->t_flags 
|= TF_ACKNOW
; 
1869                                 tp
->t_unacksegs 
= 0; 
1872                          * Received <SYN,ACK> in SYN_SENT[*] state. 
1874                          *      SYN_SENT  --> ESTABLISHED 
1875                          *      SYN_SENT* --> FIN_WAIT_1 
1877                         tp
->t_starttime 
= 0; 
1878                         if (tp
->t_flags 
& TF_NEEDFIN
) { 
1879                                 tp
->t_state 
= TCPS_FIN_WAIT_1
; 
1880                                 tp
->t_flags 
&= ~TF_NEEDFIN
; 
1883                                 tp
->t_state 
= TCPS_ESTABLISHED
; 
1884                                 tp
->t_timer
[TCPT_KEEP
] = TCP_KEEPIDLE(tp
); 
1886                         /* soisconnected may lead to socket_unlock in case of upcalls, 
1887                          * make sure this is done when everything is setup. 
1892                  *  Received initial SYN in SYN-SENT[*] state => simul- 
1893                  *  taneous open.  If segment contains CC option and there is 
1894                  *  a cached CC, apply TAO test; if it succeeds, connection is 
1895                  *  half-synchronized.  Otherwise, do 3-way handshake: 
1896                  *        SYN-SENT -> SYN-RECEIVED 
1897                  *        SYN-SENT* -> SYN-RECEIVED* 
1899                         tp
->t_flags 
|= TF_ACKNOW
; 
1900                         tp
->t_timer
[TCPT_REXMT
] = 0; 
1901                         tp
->t_state 
= TCPS_SYN_RECEIVED
; 
1907                  * Advance th->th_seq to correspond to first data byte. 
1908                  * If data, trim to stay within window, 
1909                  * dropping FIN if necessary. 
1912                 if (tlen 
> tp
->rcv_wnd
) { 
1913                         todrop 
= tlen 
- tp
->rcv_wnd
; 
1917                         tcpstat
.tcps_rcvpackafterwin
++; 
1918                         tcpstat
.tcps_rcvbyteafterwin 
+= todrop
; 
1920                 tp
->snd_wl1 
= th
->th_seq 
- 1; 
1921                 tp
->rcv_up 
= th
->th_seq
; 
1923                  *  Client side of transaction: already sent SYN and data. 
1924                  *  If the remote host used T/TCP to validate the SYN, 
1925                  *  our data will be ACK'd; if so, enter normal data segment 
1926                  *  processing in the middle of step 5, ack processing. 
1927                  *  Otherwise, goto step 6. 
1929                 if (thflags 
& TH_ACK
) 
1933          * If the state is LAST_ACK or CLOSING or TIME_WAIT: 
1934          *      do normal processing. 
1936          * NB: Leftover from RFC1644 T/TCP.  Cases to be reused later. 
1940         case TCPS_TIME_WAIT
: 
1941                 break;  /* continue normal processing */ 
1943         /* Received a SYN while connection is already established. 
1944          * This is a "half open connection and other anomalies" described 
1945          * in RFC793 page 34, send an ACK so the remote reset the connection 
1946          * or recovers by adjusting its sequence numberering  
1948         case TCPS_ESTABLISHED
: 
1949                 if (thflags 
& TH_SYN
)   
1955          * States other than LISTEN or SYN_SENT. 
1956          * First check the RST flag and sequence number since reset segments 
1957          * are exempt from the timestamp and connection count tests.  This 
1958          * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix 
1959          * below which allowed reset segments in half the sequence space 
1960          * to fall though and be processed (which gives forged reset 
1961          * segments with a random sequence number a 50 percent chance of 
1962          * killing a connection). 
1963          * Then check timestamp, if present. 
1964          * Then check the connection count, if present. 
1965          * Then check that at least some bytes of segment are within 
1966          * receive window.  If segment begins before rcv_nxt, 
1967          * drop leading data (and SYN); if nothing left, just ack. 
1970          * If the RST bit is set, check the sequence number to see 
1971          * if this is a valid reset segment. 
1973          *   In all states except SYN-SENT, all reset (RST) segments 
1974          *   are validated by checking their SEQ-fields.  A reset is 
1975          *   valid if its sequence number is in the window. 
1976          * Note: this does not take into account delayed ACKs, so 
1977          *   we should test against last_ack_sent instead of rcv_nxt. 
1978          *   The sequence number in the reset segment is normally an 
1979          *   echo of our outgoing acknowlegement numbers, but some hosts 
1980          *   send a reset with the sequence number at the rightmost edge 
1981          *   of our receive window, and we have to handle this case. 
1982          * Note 2: Paul Watson's paper "Slipping in the Window" has shown 
1983          *   that brute force RST attacks are possible.  To combat this, 
1984          *   we use a much stricter check while in the ESTABLISHED state, 
1985          *   only accepting RSTs where the sequence number is equal to 
1986          *   last_ack_sent.  In all other states (the states in which a 
1987          *   RST is more likely), the more permissive check is used. 
1988          * If we have multiple segments in flight, the intial reset 
1989          * segment sequence numbers will be to the left of last_ack_sent, 
1990          * but they will eventually catch up. 
1991          * In any case, it never made sense to trim reset segments to 
1992          * fit the receive window since RFC 1122 says: 
1993          *   4.2.2.12  RST Segment: RFC-793 Section 3.4 
1995          *    A TCP SHOULD allow a received RST segment to include data. 
1998          *         It has been suggested that a RST segment could contain 
1999          *         ASCII text that encoded and explained the cause of the 
2000          *         RST.  No standard has yet been established for such 
2003          * If the reset segment passes the sequence number test examine 
2005          *    SYN_RECEIVED STATE: 
2006          *      If passive open, return to LISTEN state. 
2007          *      If active open, inform user that connection was refused. 
2008          *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT_2, CLOSE_WAIT STATES: 
2009          *      Inform user that connection was reset, and close tcb. 
2010          *    CLOSING, LAST_ACK STATES: 
2013          *      Drop the segment - see Stevens, vol. 2, p. 964 and 
2016          *      Radar 4803931: Allows for the case where we ACKed the FIN but 
2017          *                     there is already a RST in flight from the peer. 
2018          *                     In that case, accept the RST for non-established 
2019          *                     state if it's one off from last_ack_sent. 
2022         if (thflags 
& TH_RST
) { 
2023                 if ((SEQ_GEQ(th
->th_seq
, tp
->last_ack_sent
) && 
2024                     SEQ_LT(th
->th_seq
, tp
->last_ack_sent 
+ tp
->rcv_wnd
)) || 
2025                     (tp
->rcv_wnd 
== 0 &&  
2026                         ((tp
->last_ack_sent 
== th
->th_seq
) || ((tp
->last_ack_sent 
-1) == th
->th_seq
)))) { 
2027                         switch (tp
->t_state
) { 
2029                         case TCPS_SYN_RECEIVED
: 
2030                                 so
->so_error 
= ECONNREFUSED
; 
2033                         case TCPS_ESTABLISHED
: 
2034                                 if (tp
->last_ack_sent 
!= th
->th_seq
) { 
2035                                         tcpstat
.tcps_badrst
++; 
2038                         case TCPS_FIN_WAIT_1
: 
2039                         case TCPS_CLOSE_WAIT
: 
2043                         case TCPS_FIN_WAIT_2
: 
2044                                 so
->so_error 
= ECONNRESET
; 
2046                                 postevent(so
, 0, EV_RESET
); 
2047                                 tp
->t_state 
= TCPS_CLOSED
; 
2048                                 tcpstat
.tcps_drops
++; 
2057                         case TCPS_TIME_WAIT
: 
2065         lck_mtx_assert(((struct inpcb 
*)so
->so_pcb
)->inpcb_mtx
, LCK_MTX_ASSERT_OWNED
); 
2069          * RFC 1323 PAWS: If we have a timestamp reply on this segment 
2070          * and it's less than ts_recent, drop it. 
2072         if ((to
.to_flags 
& TOF_TS
) != 0 && tp
->ts_recent 
&& 
2073             TSTMP_LT(to
.to_tsval
, tp
->ts_recent
)) { 
2075                 /* Check to see if ts_recent is over 24 days old.  */ 
2076                 if ((int)(tcp_now 
- tp
->ts_recent_age
) > TCP_PAWS_IDLE
) { 
2078                          * Invalidate ts_recent.  If this segment updates 
2079                          * ts_recent, the age will be reset later and ts_recent 
2080                          * will get a valid value.  If it does not, setting 
2081                          * ts_recent to zero will at least satisfy the 
2082                          * requirement that zero be placed in the timestamp 
2083                          * echo reply when ts_recent isn't valid.  The 
2084                          * age isn't reset until we get a valid ts_recent 
2085                          * because we don't want out-of-order segments to be 
2086                          * dropped when ts_recent is old. 
2090                         tcpstat
.tcps_rcvduppack
++; 
2091                         tcpstat
.tcps_rcvdupbyte 
+= tlen
; 
2092                         tcpstat
.tcps_pawsdrop
++; 
2100          * In the SYN-RECEIVED state, validate that the packet belongs to 
2101          * this connection before trimming the data to fit the receive 
2102          * window.  Check the sequence number versus IRS since we know 
2103          * the sequence numbers haven't wrapped.  This is a partial fix 
2104          * for the "LAND" DoS attack. 
2106         if (tp
->t_state 
== TCPS_SYN_RECEIVED 
&& SEQ_LT(th
->th_seq
, tp
->irs
)) { 
2107                 rstreason 
= BANDLIM_RST_OPENPORT
; 
2111         todrop 
= tp
->rcv_nxt 
- th
->th_seq
; 
2113                 if (thflags 
& TH_SYN
) { 
2123                  * Following if statement from Stevens, vol. 2, p. 960. 
2126                     || (todrop 
== tlen 
&& (thflags 
& TH_FIN
) == 0)) { 
2128                          * Any valid FIN must be to the left of the window. 
2129                          * At this point the FIN must be a duplicate or out 
2130                          * of sequence; drop it. 
2135                          * Send an ACK to resynchronize and drop any data. 
2136                          * But keep on processing for RST or ACK. 
2138                         tp
->t_flags 
|= TF_ACKNOW
; 
2139                         tp
->t_unacksegs 
= 0; 
2141                         tcpstat
.tcps_rcvduppack
++; 
2142                         tcpstat
.tcps_rcvdupbyte 
+= todrop
; 
2144                         tcpstat
.tcps_rcvpartduppack
++; 
2145                         tcpstat
.tcps_rcvpartdupbyte 
+= todrop
; 
2147                 drop_hdrlen 
+= todrop
;  /* drop from the top afterwards */ 
2148                 th
->th_seq 
+= todrop
; 
2150                 if (th
->th_urp 
> todrop
) 
2151                         th
->th_urp 
-= todrop
; 
2159          * If new data are received on a connection after the 
2160          * user processes are gone, then RST the other end. 
2162         if ((so
->so_state 
& SS_NOFDREF
) && 
2163             tp
->t_state 
> TCPS_CLOSE_WAIT 
&& tlen
) { 
2165                 tcpstat
.tcps_rcvafterclose
++; 
2166                 rstreason 
= BANDLIM_UNLIMITED
; 
2171          * If segment ends after window, drop trailing data 
2172          * (and PUSH and FIN); if nothing left, just ACK. 
2174         todrop 
= (th
->th_seq
+tlen
) - (tp
->rcv_nxt
+tp
->rcv_wnd
); 
2176                 tcpstat
.tcps_rcvpackafterwin
++; 
2177                 if (todrop 
>= tlen
) { 
2178                         tcpstat
.tcps_rcvbyteafterwin 
+= tlen
; 
2180                          * If a new connection request is received 
2181                          * while in TIME_WAIT, drop the old connection 
2182                          * and start over if the sequence numbers 
2183                          * are above the previous ones. 
2185                         if (thflags 
& TH_SYN 
&& 
2186                             tp
->t_state 
== TCPS_TIME_WAIT 
&& 
2187                             SEQ_GT(th
->th_seq
, tp
->rcv_nxt
)) { 
2188                                 iss 
= tcp_new_isn(tp
); 
2190                                 tcp_unlock(so
, 1, 0); 
2194                          * If window is closed can only take segments at 
2195                          * window edge, and have to drop data and PUSH from 
2196                          * incoming segments.  Continue processing, but 
2197                          * remember to ack.  Otherwise, drop segment 
2200                         if (tp
->rcv_wnd 
== 0 && th
->th_seq 
== tp
->rcv_nxt
) { 
2201                                 tp
->t_flags 
|= TF_ACKNOW
; 
2202                                 tp
->t_unacksegs 
= 0; 
2203                                 tcpstat
.tcps_rcvwinprobe
++; 
2207                         tcpstat
.tcps_rcvbyteafterwin 
+= todrop
; 
2210                 thflags 
&= ~(TH_PUSH
|TH_FIN
); 
2214          * If last ACK falls within this segment's sequence numbers, 
2215          * record its timestamp. 
2217          * 1) That the test incorporates suggestions from the latest 
2218          *    proposal of the tcplw@cray.com list (Braden 1993/04/26). 
2219          * 2) That updating only on newer timestamps interferes with 
2220          *    our earlier PAWS tests, so this check should be solely 
2221          *    predicated on the sequence space of this segment. 
2222          * 3) That we modify the segment boundary check to be  
2223          *        Last.ACK.Sent <= SEG.SEQ + SEG.Len   
2224          *    instead of RFC1323's 
2225          *        Last.ACK.Sent < SEG.SEQ + SEG.Len, 
2226          *    This modified check allows us to overcome RFC1323's 
2227          *    limitations as described in Stevens TCP/IP Illustrated 
2228          *    Vol. 2 p.869. In such cases, we can still calculate the 
2229          *    RTT correctly when RCV.NXT == Last.ACK.Sent. 
2231         if ((to
.to_flags 
& TOF_TS
) != 0 && 
2232             SEQ_LEQ(th
->th_seq
, tp
->last_ack_sent
) && 
2233             SEQ_LEQ(tp
->last_ack_sent
, th
->th_seq 
+ tlen 
+ 
2234                 ((thflags 
& (TH_SYN
|TH_FIN
)) != 0))) { 
2235                 tp
->ts_recent_age 
= tcp_now
; 
2236                 tp
->ts_recent 
= to
.to_tsval
; 
2240          * If a SYN is in the window, then this is an 
2241          * error and we send an RST and drop the connection. 
2243         if (thflags 
& TH_SYN
) { 
2244                 tp 
= tcp_drop(tp
, ECONNRESET
); 
2245                 rstreason 
= BANDLIM_UNLIMITED
; 
2246                 postevent(so
, 0, EV_RESET
); 
2251          * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN 
2252          * flag is on (half-synchronized state), then queue data for 
2253          * later processing; else drop segment and return. 
2255         if ((thflags 
& TH_ACK
) == 0) { 
2256                 if (tp
->t_state 
== TCPS_SYN_RECEIVED 
|| 
2257                     (tp
->t_flags 
& TF_NEEDSYN
)) 
2259                 else if (tp
->t_flags 
& TF_ACKNOW
) 
2268         switch (tp
->t_state
) { 
2271          * In SYN_RECEIVED state, the ack ACKs our SYN, so enter 
2272          * ESTABLISHED state and continue processing. 
2273          * The ACK was checked above. 
2275         case TCPS_SYN_RECEIVED
: 
2277                 tcpstat
.tcps_connects
++; 
2279                 /* Do window scaling? */ 
2280                 if ((tp
->t_flags 
& (TF_RCVD_SCALE
|TF_REQ_SCALE
)) == 
2281                         (TF_RCVD_SCALE
|TF_REQ_SCALE
)) { 
2282                         tp
->snd_scale 
= tp
->requested_s_scale
; 
2283                         tp
->rcv_scale 
= tp
->request_r_scale
; 
2287                  *      SYN-RECEIVED  -> ESTABLISHED 
2288                  *      SYN-RECEIVED* -> FIN-WAIT-1 
2290                 tp
->t_starttime 
= 0; 
2291                 if (tp
->t_flags 
& TF_NEEDFIN
) { 
2292                         tp
->t_state 
= TCPS_FIN_WAIT_1
; 
2293                         tp
->t_flags 
&= ~TF_NEEDFIN
; 
2295                         tp
->t_state 
= TCPS_ESTABLISHED
; 
2296                         tp
->t_timer
[TCPT_KEEP
] = TCP_KEEPIDLE(tp
); 
2299                  * If segment contains data or ACK, will call tcp_reass() 
2300                  * later; if not, do so now to pass queued data to user. 
2302                 if (tlen 
== 0 && (thflags 
& TH_FIN
) == 0) 
2303                         (void) tcp_reass(tp
, (struct tcphdr 
*)0, &tlen
, 
2305                 tp
->snd_wl1 
= th
->th_seq 
- 1; 
2309                 /* soisconnected may lead to socket_unlock in case of upcalls, 
2310                  * make sure this is done when everything is setup. 
2315          * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 
2316          * ACKs.  If the ack is in the range 
2317          *      tp->snd_una < th->th_ack <= tp->snd_max 
2318          * then advance tp->snd_una to th->th_ack and drop 
2319          * data from the retransmission queue.  If this ACK reflects 
2320          * more up to date window information we update our window information. 
2322         case TCPS_ESTABLISHED
: 
2323         case TCPS_FIN_WAIT_1
: 
2324         case TCPS_FIN_WAIT_2
: 
2325         case TCPS_CLOSE_WAIT
: 
2328         case TCPS_TIME_WAIT
: 
2329                 if (SEQ_GT(th
->th_ack
, tp
->snd_max
)) { 
2330                         tcpstat
.tcps_rcvacktoomuch
++; 
2333                 if (tp
->sack_enable 
&& 
2334                     (to
.to_nsacks 
> 0 || !TAILQ_EMPTY(&tp
->snd_holes
))) 
2335                         tcp_sack_doack(tp
, &to
, th
->th_ack
); 
2336                 if (SEQ_LEQ(th
->th_ack
, tp
->snd_una
)) { 
2337                         if (tlen 
== 0 && tiwin 
== tp
->snd_wnd
) { 
2338                                 tcpstat
.tcps_rcvdupack
++; 
2340                                  * If we have outstanding data (other than 
2341                                  * a window probe), this is a completely 
2342                                  * duplicate ack (ie, window info didn't 
2343                                  * change), the ack is the biggest we've 
2344                                  * seen and we've seen exactly our rexmt 
2345                                  * threshhold of them, assume a packet 
2346                                  * has been dropped and retransmit it. 
2347                                  * Kludge snd_nxt & the congestion 
2348                                  * window so we send only this one 
2351                                  * We know we're losing at the current 
2352                                  * window size so do congestion avoidance 
2353                                  * (set ssthresh to half the current window 
2354                                  * and pull our congestion window back to 
2355                                  * the new ssthresh). 
2357                                  * Dup acks mean that packets have left the 
2358                                  * network (they're now cached at the receiver) 
2359                                  * so bump cwnd by the amount in the receiver 
2360                                  * to keep a constant cwnd packets in the 
2363                                 if (tp
->t_timer
[TCPT_REXMT
] == 0 || 
2364                                     th
->th_ack 
!= tp
->snd_una
) 
2366                                 else if (++tp
->t_dupacks 
> tcprexmtthresh 
|| 
2367                                          ((tcp_do_newreno 
|| tp
->sack_enable
) && 
2368                                           IN_FASTRECOVERY(tp
))) { 
2369                                         if (tp
->sack_enable 
&& IN_FASTRECOVERY(tp
)) { 
2373                                                  * Compute the amount of data in flight first. 
2374                                                  * We can inject new data into the pipe iff  
2375                                                  * we have less than 1/2 the original window's   
2376                                                  * worth of data in flight. 
2378                                                 awnd 
= (tp
->snd_nxt 
- tp
->snd_fack
) + 
2379                                                         tp
->sackhint
.sack_bytes_rexmit
; 
2380                                                 if (awnd 
< tp
->snd_ssthresh
) { 
2381                                                         tp
->snd_cwnd 
+= tp
->t_maxseg
; 
2382                                                         if (tp
->snd_cwnd 
> tp
->snd_ssthresh
) 
2383                                                                 tp
->snd_cwnd 
= tp
->snd_ssthresh
; 
2386                                                 tp
->snd_cwnd 
+= tp
->t_maxseg
; 
2387                                         tp
->t_unacksegs 
= 0; 
2388                                         (void) tcp_output(tp
); 
2390                                 } else if (tp
->t_dupacks 
== tcprexmtthresh
) { 
2391                                         tcp_seq onxt 
= tp
->snd_nxt
; 
2395                                          * If we're doing sack, check to 
2396                                          * see if we're already in sack 
2397                                          * recovery. If we're not doing sack, 
2398                                          * check to see if we're in newreno 
2401                                         if (tp
->sack_enable
) { 
2402                                                 if (IN_FASTRECOVERY(tp
)) { 
2406                                         } else if (tcp_do_newreno
) { 
2407                                                 if (SEQ_LEQ(th
->th_ack
, 
2413                                         win 
= min(tp
->snd_wnd
, tp
->snd_cwnd
) / 
2417                                         tp
->snd_ssthresh 
= win 
* tp
->t_maxseg
; 
2418                                         ENTER_FASTRECOVERY(tp
); 
2419                                         tp
->snd_recover 
= tp
->snd_max
; 
2420                                         tp
->t_timer
[TCPT_REXMT
] = 0; 
2422                                         tp
->ecn_flags 
|= TE_SENDCWR
; 
2423                                         if (tp
->sack_enable
) { 
2424                                                 tcpstat
.tcps_sack_recovery_episode
++; 
2425                                                 tp
->sack_newdata 
= tp
->snd_nxt
; 
2426                                                 tp
->snd_cwnd 
= tp
->t_maxseg
; 
2427                                                 tp
->t_unacksegs 
= 0; 
2428                                                 (void) tcp_output(tp
); 
2431                                         tp
->snd_nxt 
= th
->th_ack
; 
2432                                         tp
->snd_cwnd 
= tp
->t_maxseg
; 
2433                                         tp
->t_unacksegs 
= 0; 
2434                                         (void) tcp_output(tp
); 
2435                                         tp
->snd_cwnd 
= tp
->snd_ssthresh 
+ 
2436                                              tp
->t_maxseg 
* tp
->t_dupacks
; 
2437                                         if (SEQ_GT(onxt
, tp
->snd_nxt
)) 
2446                  * If the congestion window was inflated to account 
2447                  * for the other side's cached packets, retract it. 
2449                 if (!IN_FASTRECOVERY(tp
)) { 
2451                          * We were not in fast recovery.  Reset the duplicate ack 
2457                  * If the congestion window was inflated to account 
2458                  * for the other side's cached packets, retract it. 
2461                         if (tcp_do_newreno 
|| tp
->sack_enable
) { 
2462                                 if (SEQ_LT(th
->th_ack
, tp
->snd_recover
)) { 
2463                                         if (tp
->sack_enable
) 
2464                                                 tcp_sack_partialack(tp
, th
); 
2466                                                 tcp_newreno_partial_ack(tp
, th
);                         
2469                                         if (tcp_do_newreno
) { 
2470                                                 int32_t ss 
= tp
->snd_max 
- th
->th_ack
; 
2473                                                  * Complete ack.  Inflate the congestion window to 
2474                                                  * ssthresh and exit fast recovery. 
2476                                                  * Window inflation should have left us with approx. 
2477                                                  * snd_ssthresh outstanding data.  But in case we 
2478                                                  * would be inclined to send a burst, better to do 
2479                                                  * it via the slow start mechanism. 
2481                                                 if (ss 
< tp
->snd_ssthresh
) 
2482                                                         tp
->snd_cwnd 
= ss 
+ tp
->t_maxseg
; 
2484                                                         tp
->snd_cwnd 
= tp
->snd_ssthresh
; 
2488                                                  * Clamp the congestion window to the crossover point 
2489                                                  * and exit fast recovery. 
2491                                                 if (tp
->snd_cwnd 
> tp
->snd_ssthresh
) 
2492                                                         tp
->snd_cwnd 
= tp
->snd_ssthresh
;                                         
2495                                         EXIT_FASTRECOVERY(tp
); 
2497                                         tp
->t_bytes_acked 
= 0; 
2502                                  * Clamp the congestion window to the crossover point 
2503                                  * and exit fast recovery in non-newreno and non-SACK case. 
2505                                 if (tp
->snd_cwnd 
> tp
->snd_ssthresh
) 
2506                                         tp
->snd_cwnd 
= tp
->snd_ssthresh
;                                         
2507                                 EXIT_FASTRECOVERY(tp
); 
2509                                 tp
->t_bytes_acked 
= 0; 
2515                  * If we reach this point, ACK is not a duplicate, 
2516                  *     i.e., it ACKs something we sent. 
2518                 if (tp
->t_flags 
& TF_NEEDSYN
) { 
2520                          * T/TCP: Connection was half-synchronized, and our 
2521                          * SYN has been ACK'd (so connection is now fully 
2522                          * synchronized).  Go to non-starred state, 
2523                          * increment snd_una for ACK of SYN, and check if 
2524                          * we can do window scaling. 
2526                         tp
->t_flags 
&= ~TF_NEEDSYN
; 
2528                         /* Do window scaling? */ 
2529                         if ((tp
->t_flags 
& (TF_RCVD_SCALE
|TF_REQ_SCALE
)) == 
2530                                 (TF_RCVD_SCALE
|TF_REQ_SCALE
)) { 
2531                                 tp
->snd_scale 
= tp
->requested_s_scale
; 
2532                                 tp
->rcv_scale 
= tp
->request_r_scale
; 
2537                 acked 
= th
->th_ack 
- tp
->snd_una
; 
2538                 tcpstat
.tcps_rcvackpack
++; 
2539                 tcpstat
.tcps_rcvackbyte 
+= acked
; 
2542                  * If we just performed our first retransmit, and the ACK 
2543                  * arrives within our recovery window, then it was a mistake 
2544                  * to do the retransmit in the first place.  Recover our 
2545                  * original cwnd and ssthresh, and proceed to transmit where 
2548                 if (tp
->t_rxtshift 
== 1 && tcp_now 
< tp
->t_badrxtwin
) { 
2549                         ++tcpstat
.tcps_sndrexmitbad
; 
2550                         tp
->snd_cwnd 
= tp
->snd_cwnd_prev
; 
2551                         tp
->snd_ssthresh 
= tp
->snd_ssthresh_prev
; 
2552                         tp
->snd_recover 
= tp
->snd_recover_prev
; 
2553                         if (tp
->t_flags 
& TF_WASFRECOVERY
) 
2554                                 ENTER_FASTRECOVERY(tp
); 
2555                         tp
->snd_nxt 
= tp
->snd_max
; 
2556                         tp
->t_badrxtwin 
= 0;    /* XXX probably not required */  
2560                  * If we have a timestamp reply, update smoothed 
2561                  * round trip time.  If no timestamp is present but 
2562                  * transmit timer is running and timed sequence 
2563                  * number was acked, update smoothed round trip time. 
2564                  * Since we now have an rtt measurement, cancel the 
2565                  * timer backoff (cf., Phil Karn's retransmit alg.). 
2566                  * Recompute the initial retransmit timer. 
2567                  * Also makes sure we have a valid time stamp in hand 
2569                  * Some boxes send broken timestamp replies 
2570                  * during the SYN+ACK phase, ignore 
2571                  * timestamps of 0 or we could calculate a 
2572                  * huge RTT and blow up the retransmit timer. 
2574                 if (((to
.to_flags 
& TOF_TS
) != 0) && (to
.to_tsecr 
!= 0)) { 
2575                         if (!tp
->t_rttlow 
|| tp
->t_rttlow 
> tcp_now 
- to
.to_tsecr
) 
2576                                 tp
->t_rttlow 
= tcp_now 
- to
.to_tsecr
; 
2577                         tcp_xmit_timer(tp
, tcp_now 
- to
.to_tsecr
); 
2578                 } else if (tp
->t_rtttime 
&& SEQ_GT(th
->th_ack
, tp
->t_rtseq
)) { 
2579                         if (!tp
->t_rttlow 
|| tp
->t_rttlow 
> tcp_now 
- tp
->t_rtttime
) 
2580                                 tp
->t_rttlow 
= tcp_now 
- tp
->t_rtttime
; 
2581                         tcp_xmit_timer(tp
, tp
->t_rtttime
); 
2585                  * If all outstanding data is acked, stop retransmit 
2586                  * timer and remember to restart (more output or persist). 
2587                  * If there is more data to be acked, restart retransmit 
2588                  * timer, using current (possibly backed-off) value. 
2590                 if (th
->th_ack 
== tp
->snd_max
) { 
2591                         tp
->t_timer
[TCPT_REXMT
] = 0; 
2593                 } else if (tp
->t_timer
[TCPT_PERSIST
] == 0) 
2594                         tp
->t_timer
[TCPT_REXMT
] = tp
->t_rxtcur
; 
2597                  * If no data (only SYN) was ACK'd, 
2598                  *    skip rest of ACK processing. 
2604                  * When new data is acked, open the congestion window. 
2606                 if ((thflags 
& TH_ECE
) != 0 && 
2607                         (tp
->ecn_flags 
& TE_SETUPSENT
) != 0) { 
2609                          * Reduce the congestion window if we haven't done so. 
2611                         if (!(tp
->sack_enable 
&& IN_FASTRECOVERY(tp
)) && 
2612                                 !(tcp_do_newreno 
&& SEQ_LEQ(th
->th_ack
, tp
->snd_recover
))) { 
2613                                 tcp_reduce_congestion_window(tp
); 
2615                 } else if ((!tcp_do_newreno 
&& !tp
->sack_enable
) || 
2616                     !IN_FASTRECOVERY(tp
)) { 
2618                          * RFC 3465 - Appropriate Byte Counting. 
2620                          * If the window is currently less than ssthresh, 
2621                          * open the window by the number of bytes ACKed by 
2622                          * the last ACK, however clamp the window increase 
2623                          * to an upper limit "L". 
2625                          * In congestion avoidance phase, open the window by 
2626                          * one segment each time "bytes_acked" grows to be 
2627                          * greater than or equal to the congestion window. 
2630                         register u_int cw 
= tp
->snd_cwnd
; 
2631                         register u_int incr 
= tp
->t_maxseg
; 
2633                         if (tcp_do_rfc3465
) { 
2635                                 if (cw 
>= tp
->snd_ssthresh
) { 
2636                                         tp
->t_bytes_acked 
+= acked
; 
2637                                         if (tp
->t_bytes_acked 
>= cw
) { 
2638                                         /* Time to increase the window. */ 
2639                                                 tp
->t_bytes_acked 
-= cw
; 
2641                                         /* No need to increase yet. */ 
2646                                          * If the user explicitly enables RFC3465 
2647                                          * use 2*SMSS for the "L" param.  Otherwise 
2648                                          * use the more conservative 1*SMSS. 
2650                                          * (See RFC 3465 2.3 Choosing the Limit) 
2654                                         abc_lim 
= (tcp_do_rfc3465_lim2 
&& 
2655                                                 tp
->snd_nxt 
== tp
->snd_max
) ? incr 
* 2 : incr
; 
2657                                         incr 
= lmin(acked
, abc_lim
); 
2662                                  * If the window gives us less than ssthresh packets 
2663                                  * in flight, open exponentially (segsz per packet). 
2664                                  * Otherwise open linearly: segsz per window 
2665                                  * (segsz^2 / cwnd per packet). 
2668                                         if (cw 
>= tp
->snd_ssthresh
) { 
2669                                                 incr 
= max((incr 
* incr 
/ cw
), 1); 
2674                         tp
->snd_cwnd 
= min(cw
+incr
, TCP_MAXWIN
<<tp
->snd_scale
); 
2676                 if (acked 
> so
->so_snd
.sb_cc
) { 
2677                         tp
->snd_wnd 
-= so
->so_snd
.sb_cc
; 
2678                         sbdrop(&so
->so_snd
, (int)so
->so_snd
.sb_cc
); 
2681                         sbdrop(&so
->so_snd
, acked
); 
2682                         tp
->snd_wnd 
-= acked
; 
2685                 /* detect una wraparound */ 
2686                 if ((tcp_do_newreno 
|| tp
->sack_enable
) && 
2687                     !IN_FASTRECOVERY(tp
) && 
2688                     SEQ_GT(tp
->snd_una
, tp
->snd_recover
) && 
2689                     SEQ_LEQ(th
->th_ack
, tp
->snd_recover
)) 
2690                         tp
->snd_recover 
= th
->th_ack 
- 1; 
2691                 if ((tcp_do_newreno 
|| tp
->sack_enable
) && 
2692                     IN_FASTRECOVERY(tp
) && 
2693                     SEQ_GEQ(th
->th_ack
, tp
->snd_recover
)) 
2694                         EXIT_FASTRECOVERY(tp
); 
2695                 tp
->snd_una 
= th
->th_ack
; 
2696                 if (tp
->sack_enable
) { 
2697                         if (SEQ_GT(tp
->snd_una
, tp
->snd_recover
)) 
2698                                 tp
->snd_recover 
= tp
->snd_una
; 
2700                 if (SEQ_LT(tp
->snd_nxt
, tp
->snd_una
)) 
2701                         tp
->snd_nxt 
= tp
->snd_una
; 
2704                  * sowwakeup must happen after snd_una, et al. are updated so that 
2705                  * the sequence numbers are in sync with so_snd 
2709                 switch (tp
->t_state
) { 
2712                  * In FIN_WAIT_1 STATE in addition to the processing 
2713                  * for the ESTABLISHED state if our FIN is now acknowledged 
2714                  * then enter FIN_WAIT_2. 
2716                 case TCPS_FIN_WAIT_1
: 
2717                         if (ourfinisacked
) { 
2719                                  * If we can't receive any more 
2720                                  * data, then closing user can proceed. 
2721                                  * Starting the timer is contrary to the 
2722                                  * specification, but if we don't get a FIN 
2723                                  * we'll hang forever. 
2725                                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
2726                                         tp
->t_timer
[TCPT_2MSL
] = tcp_maxidle
; 
2727                                         add_to_time_wait(tp
); 
2728                                         soisdisconnected(so
); 
2730                                 tp
->t_state 
= TCPS_FIN_WAIT_2
; 
2731                                 /* fall through and make sure we also recognize data ACKed with the FIN */ 
2733                         tp
->t_flags 
|= TF_ACKNOW
; 
2737                  * In CLOSING STATE in addition to the processing for 
2738                  * the ESTABLISHED state if the ACK acknowledges our FIN 
2739                  * then enter the TIME-WAIT state, otherwise ignore 
2743                         if (ourfinisacked
) { 
2744                                 tp
->t_state 
= TCPS_TIME_WAIT
; 
2745                                 tcp_canceltimers(tp
); 
2746                                 /* Shorten TIME_WAIT [RFC-1644, p.28] */ 
2747                                 if (tp
->cc_recv 
!= 0 && 
2748                                     tp
->t_starttime 
< (u_int32_t
)tcp_msl
) 
2749                                         tp
->t_timer
[TCPT_2MSL
] = 
2750                                             tp
->t_rxtcur 
* TCPTV_TWTRUNC
; 
2752                                         tp
->t_timer
[TCPT_2MSL
] = 2 * tcp_msl
; 
2753                                 add_to_time_wait(tp
); 
2754                                 soisdisconnected(so
); 
2756                         tp
->t_flags 
|= TF_ACKNOW
; 
2760                  * In LAST_ACK, we may still be waiting for data to drain 
2761                  * and/or to be acked, as well as for the ack of our FIN. 
2762                  * If our FIN is now acknowledged, delete the TCB, 
2763                  * enter the closed state and return. 
2766                         if (ourfinisacked
) { 
2773                  * In TIME_WAIT state the only thing that should arrive 
2774                  * is a retransmission of the remote FIN.  Acknowledge 
2775                  * it and restart the finack timer. 
2777                 case TCPS_TIME_WAIT
: 
2778                         tp
->t_timer
[TCPT_2MSL
] = 2 * tcp_msl
; 
2779                         add_to_time_wait(tp
); 
2786          * Update window information. 
2787          * Don't look at window if no ACK: TAC's send garbage on first SYN. 
2789         if ((thflags 
& TH_ACK
) && 
2790             (SEQ_LT(tp
->snd_wl1
, th
->th_seq
) || 
2791             (tp
->snd_wl1 
== th
->th_seq 
&& (SEQ_LT(tp
->snd_wl2
, th
->th_ack
) || 
2792              (tp
->snd_wl2 
== th
->th_ack 
&& tiwin 
> tp
->snd_wnd
))))) { 
2793                 /* keep track of pure window updates */ 
2795                     tp
->snd_wl2 
== th
->th_ack 
&& tiwin 
> tp
->snd_wnd
) 
2796                         tcpstat
.tcps_rcvwinupd
++; 
2797                 tp
->snd_wnd 
= tiwin
; 
2798                 tp
->snd_wl1 
= th
->th_seq
; 
2799                 tp
->snd_wl2 
= th
->th_ack
; 
2800                 if (tp
->snd_wnd 
> tp
->max_sndwnd
) 
2801                         tp
->max_sndwnd 
= tp
->snd_wnd
; 
2806          * Process segments with URG. 
2808         if ((thflags 
& TH_URG
) && th
->th_urp 
&& 
2809             TCPS_HAVERCVDFIN(tp
->t_state
) == 0) { 
2811                  * This is a kludge, but if we receive and accept 
2812                  * random urgent pointers, we'll crash in 
2813                  * soreceive.  It's hard to imagine someone 
2814                  * actually wanting to send this much urgent data. 
2816                 if (th
->th_urp 
+ so
->so_rcv
.sb_cc 
> sb_max
) { 
2817                         th
->th_urp 
= 0;                 /* XXX */ 
2818                         thflags 
&= ~TH_URG
;             /* XXX */ 
2819                         goto dodata
;                    /* XXX */ 
2822                  * If this segment advances the known urgent pointer, 
2823                  * then mark the data stream.  This should not happen 
2824                  * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since 
2825                  * a FIN has been received from the remote side. 
2826                  * In these states we ignore the URG. 
2828                  * According to RFC961 (Assigned Protocols), 
2829                  * the urgent pointer points to the last octet 
2830                  * of urgent data.  We continue, however, 
2831                  * to consider it to indicate the first octet 
2832                  * of data past the urgent section as the original 
2833                  * spec states (in one of two places). 
2835                 if (SEQ_GT(th
->th_seq
+th
->th_urp
, tp
->rcv_up
)) { 
2836                         tp
->rcv_up 
= th
->th_seq 
+ th
->th_urp
; 
2837                         so
->so_oobmark 
= so
->so_rcv
.sb_cc 
+ 
2838                             (tp
->rcv_up 
- tp
->rcv_nxt
) - 1; 
2839                         if (so
->so_oobmark 
== 0) { 
2840                                 so
->so_state 
|= SS_RCVATMARK
; 
2841                                 postevent(so
, 0, EV_OOB
); 
2844                         tp
->t_oobflags 
&= ~(TCPOOB_HAVEDATA 
| TCPOOB_HADDATA
); 
2847                  * Remove out of band data so doesn't get presented to user. 
2848                  * This can happen independent of advancing the URG pointer, 
2849                  * but if two URG's are pending at once, some out-of-band 
2850                  * data may creep in... ick. 
2852                 if (th
->th_urp 
<= (u_int32_t
)tlen
 
2854                      && (so
->so_options 
& SO_OOBINLINE
) == 0 
2857                         tcp_pulloutofband(so
, th
, m
, 
2858                                 drop_hdrlen
);   /* hdr drop is delayed */ 
2861                  * If no out of band data is expected, 
2862                  * pull receive urgent pointer along 
2863                  * with the receive window. 
2865                 if (SEQ_GT(tp
->rcv_nxt
, tp
->rcv_up
)) 
2866                         tp
->rcv_up 
= tp
->rcv_nxt
; 
2870          * Process the segment text, merging it into the TCP sequencing queue, 
2871          * and arranging for acknowledgment of receipt if necessary. 
2872          * This process logically involves adjusting tp->rcv_wnd as data 
2873          * is presented to the user (this happens in tcp_usrreq.c, 
2874          * case PRU_RCVD).  If a FIN has already been received on this 
2875          * connection then we just ignore the text. 
2877         if ((tlen 
|| (thflags 
& TH_FIN
)) && 
2878             TCPS_HAVERCVDFIN(tp
->t_state
) == 0) { 
2879                 tcp_seq save_start 
= th
->th_seq
; 
2880                 tcp_seq save_end 
= th
->th_seq 
+ tlen
; 
2881                 m_adj(m
, drop_hdrlen
);  /* delayed header drop */ 
2883                  * Insert segment which includes th into TCP reassembly queue 
2884                  * with control block tp.  Set thflags to whether reassembly now 
2885                  * includes a segment with FIN.  This handles the common case 
2886                  * inline (segment is the next to be received on an established 
2887                  * connection, and the queue is empty), avoiding linkage into 
2888                  * and removal from the queue and repetition of various 
2890                  * Set DELACK for segments received in order, but ack 
2891                  * immediately when segments are out of order (so 
2892                  * fast retransmit can work). 
2894                 if (th
->th_seq 
== tp
->rcv_nxt 
&& 
2895                     LIST_EMPTY(&tp
->t_segq
) && 
2896                     TCPS_HAVEESTABLISHED(tp
->t_state
)) { 
2897                         if (DELAY_ACK(tp
) && ((tp
->t_flags 
& TF_ACKNOW
) == 0)) { 
2898                                 tp
->t_flags 
|= TF_DELACK
; 
2902                                 tp
->t_unacksegs 
= 0; 
2903                                 tp
->t_flags 
|= TF_ACKNOW
; 
2905                         tp
->rcv_nxt 
+= tlen
; 
2906                         thflags 
= th
->th_flags 
& TH_FIN
; 
2907                         tcpstat
.tcps_rcvpack
++; 
2908                         tcpstat
.tcps_rcvbyte 
+= tlen
; 
2910                         if (sbappendstream(&so
->so_rcv
, m
)) 
2913                         thflags 
= tcp_reass(tp
, th
, &tlen
, m
); 
2914                         tp
->t_flags 
|= TF_ACKNOW
; 
2915                         tp
->t_unacksegs 
= 0; 
2918                 if (tlen 
> 0 && tp
->sack_enable
) 
2919                         tcp_update_sack_list(tp
, save_start
, save_end
); 
2921                 if (tp
->t_flags 
& TF_DELACK
)  
2925                                 KERNEL_DEBUG(DBG_LAYER_END
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
2926                                         (((ip6
->ip6_src
.s6_addr16
[0]) << 16) | (ip6
->ip6_dst
.s6_addr16
[0])), 
2927                                         th
->th_seq
, th
->th_ack
, th
->th_win
);  
2932                                 KERNEL_DEBUG(DBG_LAYER_END
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
2933                                         (((ip
->ip_src
.s_addr 
& 0xffff) << 16) | (ip
->ip_dst
.s_addr 
& 0xffff)), 
2934                                         th
->th_seq
, th
->th_ack
, th
->th_win
);  
2939                  * Note the amount of data that peer has sent into 
2940                  * our window, in order to estimate the sender's 
2943                 len 
= (u_int
)(so
->so_rcv
.sb_hiwat 
- (tp
->rcv_adv 
- tp
->rcv_nxt
)); 
2944                 if (len 
> so
->so_rcv
.sb_maxused
) 
2945                         so
->so_rcv
.sb_maxused 
= len
; 
2952          * If FIN is received ACK the FIN and let the user know 
2953          * that the connection is closing. 
2955         if (thflags 
& TH_FIN
) { 
2956                 if (TCPS_HAVERCVDFIN(tp
->t_state
) == 0) { 
2958                         postevent(so
, 0, EV_FIN
); 
2960                          *  If connection is half-synchronized 
2961                          *  (ie NEEDSYN flag on) then delay ACK, 
2962                          * If connection is half-synchronized 
2963                          * (ie NEEDSYN flag on) then delay ACK, 
2964                          * so it may be piggybacked when SYN is sent. 
2965                          * Otherwise, since we received a FIN then no 
2966                          * more input can be expected, send ACK now. 
2968                         if (DELAY_ACK(tp
) && (tp
->t_flags 
& TF_NEEDSYN
)) { 
2969                                 tp
->t_flags 
|= TF_DELACK
; 
2973                                 tp
->t_flags 
|= TF_ACKNOW
; 
2974                                 tp
->t_unacksegs 
= 0; 
2978                 switch (tp
->t_state
) { 
2981                  * In SYN_RECEIVED and ESTABLISHED STATES 
2982                  * enter the CLOSE_WAIT state. 
2984                 case TCPS_SYN_RECEIVED
: 
2985                         tp
->t_starttime 
= 0; 
2986                 case TCPS_ESTABLISHED
: 
2987                         tp
->t_state 
= TCPS_CLOSE_WAIT
; 
2991                  * If still in FIN_WAIT_1 STATE FIN has not been acked so 
2992                  * enter the CLOSING state. 
2994                 case TCPS_FIN_WAIT_1
: 
2995                         tp
->t_state 
= TCPS_CLOSING
; 
2999                  * In FIN_WAIT_2 state enter the TIME_WAIT state, 
3000                  * starting the time-wait timer, turning off the other 
3003                 case TCPS_FIN_WAIT_2
: 
3004                         tp
->t_state 
= TCPS_TIME_WAIT
; 
3005                         tcp_canceltimers(tp
); 
3006                         /* Shorten TIME_WAIT [RFC-1644, p.28] */ 
3007                         if (tp
->cc_recv 
!= 0 && 
3008                             tp
->t_starttime 
< (u_int32_t
)tcp_msl
) { 
3009                                 tp
->t_timer
[TCPT_2MSL
] = 
3010                                     tp
->t_rxtcur 
* TCPTV_TWTRUNC
; 
3011                                 /* For transaction client, force ACK now. */ 
3012                                 tp
->t_flags 
|= TF_ACKNOW
; 
3013                                 tp
->t_unacksegs 
= 0; 
3016                                 tp
->t_timer
[TCPT_2MSL
] = 2 * tcp_msl
; 
3018                         add_to_time_wait(tp
); 
3019                         soisdisconnected(so
); 
3023                  * In TIME_WAIT state restart the 2 MSL time_wait timer. 
3025                 case TCPS_TIME_WAIT
: 
3026                         tp
->t_timer
[TCPT_2MSL
] = 2 * tcp_msl
; 
3027                         add_to_time_wait(tp
); 
3032         if (so
->so_options 
& SO_DEBUG
) 
3033                 tcp_trace(TA_INPUT
, ostate
, tp
, (void *)tcp_saveipgen
, 
3038          * Return any desired output. 
3040         if (needoutput 
|| (tp
->t_flags 
& TF_ACKNOW
)) { 
3041                 tp
->t_unacksegs 
= 0; 
3042                 (void) tcp_output(tp
); 
3044         tcp_unlock(so
, 1, 0); 
3045         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
3050          * Generate an ACK dropping incoming segment if it occupies 
3051          * sequence space, where the ACK reflects our state. 
3053          * We can now skip the test for the RST flag since all 
3054          * paths to this code happen after packets containing 
3055          * RST have been dropped. 
3057          * In the SYN-RECEIVED state, don't send an ACK unless the 
3058          * segment we received passes the SYN-RECEIVED ACK test. 
3059          * If it fails send a RST.  This breaks the loop in the 
3060          * "LAND" DoS attack, and also prevents an ACK storm 
3061          * between two listening ports that have been sent forged 
3062          * SYN segments, each with the source address of the other. 
3064         if (tp
->t_state 
== TCPS_SYN_RECEIVED 
&& (thflags 
& TH_ACK
) && 
3065             (SEQ_GT(tp
->snd_una
, th
->th_ack
) || 
3066              SEQ_GT(th
->th_ack
, tp
->snd_max
)) ) { 
3067                 rstreason 
= BANDLIM_RST_OPENPORT
; 
3071         if (so
->so_options 
& SO_DEBUG
) 
3072                 tcp_trace(TA_DROP
, ostate
, tp
, (void *)tcp_saveipgen
, 
3076         tp
->t_flags 
|= TF_ACKNOW
; 
3077         tp
->t_unacksegs 
= 0; 
3078         (void) tcp_output(tp
); 
3079         tcp_unlock(so
, 1, 0); 
3080         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
3082 dropwithresetnosock
: 
3086          * Generate a RST, dropping incoming segment. 
3087          * Make ACK acceptable to originator of segment. 
3088          * Don't bother to respond if destination was broadcast/multicast. 
3090         if ((thflags 
& TH_RST
) || m
->m_flags 
& (M_BCAST
|M_MCAST
)) 
3094                 if (IN6_IS_ADDR_MULTICAST(&ip6
->ip6_dst
) || 
3095                     IN6_IS_ADDR_MULTICAST(&ip6
->ip6_src
)) 
3099         if (IN_MULTICAST(ntohl(ip
->ip_dst
.s_addr
)) || 
3100             IN_MULTICAST(ntohl(ip
->ip_src
.s_addr
)) || 
3101             ip
->ip_src
.s_addr 
== htonl(INADDR_BROADCAST
) || 
3102             in_broadcast(ip
->ip_dst
, m
->m_pkthdr
.rcvif
)) 
3104         /* IPv6 anycast check is done at tcp6_input() */ 
3107          * Perform bandwidth limiting. 
3110         if (badport_bandlim(rstreason
) < 0) 
3115         if (tp 
== 0 || (tp
->t_inpcb
->inp_socket
->so_options 
& SO_DEBUG
)) 
3116                 tcp_trace(TA_DROP
, ostate
, tp
, (void *)tcp_saveipgen
, 
3119         if (thflags 
& TH_ACK
) 
3120                 /* mtod() below is safe as long as hdr dropping is delayed */ 
3121                 tcp_respond(tp
, mtod(m
, void *), th
, m
, (tcp_seq
)0, th
->th_ack
, 
3124                 if (thflags 
& TH_SYN
) 
3126                 /* mtod() below is safe as long as hdr dropping is delayed */ 
3127                 tcp_respond(tp
, mtod(m
, void *), th
, m
, th
->th_seq
+tlen
, 
3128                     (tcp_seq
)0, TH_RST
|TH_ACK
, ifscope
); 
3130         /* destroy temporarily created socket */ 
3133                 tcp_unlock(so
, 1, 0); 
3136                 if ((inp 
!= NULL
) && (nosock 
== 0)) 
3137                         tcp_unlock(so
, 1, 0); 
3138         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
3144          * Drop space held by incoming segment and return. 
3147         if (tp 
== 0 || (tp
->t_inpcb
->inp_socket
->so_options 
& SO_DEBUG
)) 
3148                 tcp_trace(TA_DROP
, ostate
, tp
, (void *)tcp_saveipgen
, 
3152         /* destroy temporarily created socket */ 
3155                 tcp_unlock(so
, 1, 0); 
3159                         tcp_unlock(so
, 1, 0); 
3160         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
3165 tcp_dooptions(tp
, cp
, cnt
, th
, to
, input_ifscope
) 
3167  * Parse TCP options and place in tcpopt. 
3174         unsigned int input_ifscope
; 
3179         for (; cnt 
> 0; cnt 
-= optlen
, cp 
+= optlen
) { 
3181                 if (opt 
== TCPOPT_EOL
) 
3183                 if (opt 
== TCPOPT_NOP
) 
3189                         if (optlen 
< 2 || optlen 
> cnt
) 
3198                         if (optlen 
!= TCPOLEN_MAXSEG
) 
3200                         if (!(th
->th_flags 
& TH_SYN
)) 
3202                         bcopy((char *) cp 
+ 2, (char *) &mss
, sizeof(mss
)); 
3204 #if BYTE_ORDER != BIG_ENDIAN 
3211                         if (optlen 
!= TCPOLEN_WINDOW
) 
3213                         if (!(th
->th_flags 
& TH_SYN
)) 
3215                         tp
->t_flags 
|= TF_RCVD_SCALE
; 
3216                         tp
->requested_s_scale 
= min(cp
[2], TCP_MAX_WINSHIFT
); 
3219                 case TCPOPT_TIMESTAMP
: 
3220                         if (optlen 
!= TCPOLEN_TIMESTAMP
) 
3222                         to
->to_flags 
|= TOF_TS
; 
3223                         bcopy((char *)cp 
+ 2, 
3224                             (char *)&to
->to_tsval
, sizeof(to
->to_tsval
)); 
3226 #if BYTE_ORDER != BIG_ENDIAN 
3227                         NTOHL(to
->to_tsval
); 
3230                         bcopy((char *)cp 
+ 6, 
3231                             (char *)&to
->to_tsecr
, sizeof(to
->to_tsecr
)); 
3233 #if BYTE_ORDER != BIG_ENDIAN 
3234                         NTOHL(to
->to_tsecr
); 
3238                          * A timestamp received in a SYN makes 
3239                          * it ok to send timestamp requests and replies. 
3241                         if (th
->th_flags 
& TH_SYN
) { 
3242                                 tp
->t_flags 
|= TF_RCVD_TSTMP
; 
3243                                 tp
->ts_recent 
= to
->to_tsval
; 
3244                                 tp
->ts_recent_age 
= tcp_now
; 
3247                 case TCPOPT_SACK_PERMITTED
: 
3249                             optlen 
!= TCPOLEN_SACK_PERMITTED
) 
3251                         if (th
->th_flags 
& TH_SYN
) 
3252                                 to
->to_flags 
|= TOF_SACK
; 
3255                         if (optlen 
<= 2 || (optlen 
- 2) % TCPOLEN_SACK 
!= 0) 
3257                         to
->to_nsacks 
= (optlen 
- 2) / TCPOLEN_SACK
; 
3258                         to
->to_sacks 
= cp 
+ 2; 
3259                         tcpstat
.tcps_sack_rcv_blocks
++; 
3264         if (th
->th_flags 
& TH_SYN
) 
3265                 tcp_mss(tp
, mss
, input_ifscope
);        /* sets t_maxseg */ 
3269  * Pull out of band byte out of a segment so 
3270  * it doesn't appear in the user's data queue. 
3271  * It is still reflected in the segment length for 
3272  * sequencing purposes. 
3275 tcp_pulloutofband(so
, th
, m
, off
) 
3278         register struct mbuf 
*m
; 
3279         int off
;                /* delayed to be droped hdrlen */ 
3281         int cnt 
= off 
+ th
->th_urp 
- 1; 
3284                 if (m
->m_len 
> cnt
) { 
3285                         char *cp 
= mtod(m
, caddr_t
) + cnt
; 
3286                         struct tcpcb 
*tp 
= sototcpcb(so
); 
3289                         tp
->t_oobflags 
|= TCPOOB_HAVEDATA
; 
3290                         bcopy(cp
+1, cp
, (unsigned)(m
->m_len 
- cnt 
- 1)); 
3292                         if (m
->m_flags 
& M_PKTHDR
) 
3301         panic("tcp_pulloutofband"); 
3305  * Collect new round-trip time estimate 
3306  * and update averages and current timeout. 
3309 tcp_xmit_timer(tp
, rtt
) 
3310         register struct tcpcb 
*tp
; 
3315         tcpstat
.tcps_rttupdated
++; 
3317         if (tp
->t_srtt 
!= 0) { 
3319                  * srtt is stored as fixed point with 5 bits after the 
3320                  * binary point (i.e., scaled by 8).  The following magic 
3321                  * is equivalent to the smoothing algorithm in rfc793 with 
3322                  * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed 
3323                  * point).  Adjust rtt to origin 0. 
3325                 delta 
= ((rtt 
- 1) << TCP_DELTA_SHIFT
) 
3326                         - (tp
->t_srtt 
>> (TCP_RTT_SHIFT 
- TCP_DELTA_SHIFT
)); 
3328                 if ((tp
->t_srtt 
+= delta
) <= 0) 
3332                  * We accumulate a smoothed rtt variance (actually, a 
3333                  * smoothed mean difference), then set the retransmit 
3334                  * timer to smoothed rtt + 4 times the smoothed variance. 
3335                  * rttvar is stored as fixed point with 4 bits after the 
3336                  * binary point (scaled by 16).  The following is 
3337                  * equivalent to rfc793 smoothing with an alpha of .75 
3338                  * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces 
3339                  * rfc793's wired-in beta. 
3343                 delta 
-= tp
->t_rttvar 
>> (TCP_RTTVAR_SHIFT 
- TCP_DELTA_SHIFT
); 
3344                 if ((tp
->t_rttvar 
+= delta
) <= 0) 
3346                 if (tp
->t_rttbest 
> tp
->t_srtt 
+ tp
->t_rttvar
) 
3347                     tp
->t_rttbest 
= tp
->t_srtt 
+ tp
->t_rttvar
; 
3350                  * No rtt measurement yet - use the unsmoothed rtt. 
3351                  * Set the variance to half the rtt (so our first 
3352                  * retransmit happens at 3*rtt). 
3354                 tp
->t_srtt 
= rtt 
<< TCP_RTT_SHIFT
; 
3355                 tp
->t_rttvar 
= rtt 
<< (TCP_RTTVAR_SHIFT 
- 1); 
3356                 tp
->t_rttbest 
= tp
->t_srtt 
+ tp
->t_rttvar
; 
3362          * the retransmit should happen at rtt + 4 * rttvar. 
3363          * Because of the way we do the smoothing, srtt and rttvar 
3364          * will each average +1/2 tick of bias.  When we compute 
3365          * the retransmit timer, we want 1/2 tick of rounding and 
3366          * 1 extra tick because of +-1/2 tick uncertainty in the 
3367          * firing of the timer.  The bias will give us exactly the 
3368          * 1.5 tick we need.  But, because the bias is 
3369          * statistical, we have to test that we don't drop below 
3370          * the minimum feasible timer (which is 2 ticks). 
3372         TCPT_RANGESET(tp
->t_rxtcur
, TCP_REXMTVAL(tp
), 
3373                       max(tp
->t_rttmin
, rtt 
+ 2), TCPTV_REXMTMAX
); 
3376          * We received an ack for a packet that wasn't retransmitted; 
3377          * it is probably safe to discard any error indications we've 
3378          * received recently.  This isn't quite right, but close enough 
3379          * for now (a route might have failed after we sent a segment, 
3380          * and the return path might not be symmetrical). 
3382         tp
->t_softerror 
= 0; 
3385 static inline unsigned int 
3386 tcp_maxmtu(struct rtentry 
*rt
) 
3388         unsigned int maxmtu
; 
3390         RT_LOCK_ASSERT_HELD(rt
); 
3391         if (rt
->rt_rmx
.rmx_mtu 
== 0) 
3392                 maxmtu 
= rt
->rt_ifp
->if_mtu
; 
3394                 maxmtu 
= MIN(rt
->rt_rmx
.rmx_mtu
, rt
->rt_ifp
->if_mtu
); 
3400 static inline unsigned int 
3401 tcp_maxmtu6(struct rtentry 
*rt
) 
3403         unsigned int maxmtu
; 
3405         RT_LOCK_ASSERT_HELD(rt
); 
3406         lck_rw_lock_shared(nd_if_rwlock
); 
3407         if (rt
->rt_rmx
.rmx_mtu 
== 0) 
3408                 maxmtu 
= IN6_LINKMTU(rt
->rt_ifp
); 
3410                 maxmtu 
= MIN(rt
->rt_rmx
.rmx_mtu
, IN6_LINKMTU(rt
->rt_ifp
)); 
3411         lck_rw_done(nd_if_rwlock
); 
3418  * Determine a reasonable value for maxseg size. 
3419  * If the route is known, check route for mtu. 
3420  * If none, use an mss that can be handled on the outgoing 
3421  * interface without forcing IP to fragment; if bigger than 
3422  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES 
3423  * to utilize large mbufs.  If no route is found, route has no mtu, 
3424  * or the destination isn't local, use a default, hopefully conservative 
3425  * size (usually 512 or the default IP max size, but no more than the mtu 
3426  * of the interface), as we can't discover anything about intervening 
3427  * gateways or networks.  We also initialize the congestion/slow start 
3428  * window to be a single segment if the destination isn't local. 
3429  * While looking at the routing entry, we also initialize other path-dependent 
3430  * parameters from pre-set or cached values in the routing entry. 
3432  * Also take into account the space needed for options that we 
3433  * send regularly.  Make maxseg shorter by that amount to assure 
3434  * that we can send maxseg amount of data even when the options 
3435  * are present.  Store the upper limit of the length of options plus 
3438  * NOTE that this routine is only called when we process an incoming 
3439  * segment, for outgoing segments only tcp_mssopt is called. 
3443 tcp_mss(tp
, offer
, input_ifscope
) 
3446         unsigned int input_ifscope
; 
3448         register struct rtentry 
*rt
; 
3450         register int rtt
, mss
; 
3454         struct rmxp_tao 
*taop
; 
3455         int origoffer 
= offer
; 
3456         u_int32_t sb_max_corrected
; 
3465         isipv6 
= ((inp
->inp_vflag 
& INP_IPV6
) != 0) ? 1 : 0; 
3466         min_protoh 
= isipv6 
? sizeof (struct ip6_hdr
) + sizeof (struct tcphdr
) 
3467                             : sizeof (struct tcpiphdr
); 
3469 #define min_protoh  (sizeof (struct tcpiphdr)) 
3474                 rt 
= tcp_rtlookup6(inp
); 
3476                     (IN6_IS_ADDR_LOOPBACK(&inp
->in6p_faddr
) || 
3477                     IN6_IS_ADDR_LINKLOCAL(&inp
->in6p_faddr
) || 
3478                     rt
->rt_gateway
->sa_family 
== AF_LINK
)) 
3484                 rt 
= tcp_rtlookup(inp
, input_ifscope
); 
3486                     (rt
->rt_gateway
->sa_family 
== AF_LINK 
|| 
3487                     rt
->rt_ifp
->if_flags 
& IFF_LOOPBACK
)) 
3491                 tp
->t_maxopd 
= tp
->t_maxseg 
= 
3493                 isipv6 
? tcp_v6mssdflt 
: 
3500          * Slower link window correction: 
3501          * If a value is specificied for slowlink_wsize use it for PPP links 
3502          * believed to be on a serial modem (speed <128Kbps). Excludes 9600bps as 
3503          * it is the default value adversized by pseudo-devices over ppp. 
3505         if (ifp
->if_type 
== IFT_PPP 
&& slowlink_wsize 
> 0 &&  
3506             ifp
->if_baudrate 
> 9600 && ifp
->if_baudrate 
<= 128000) { 
3507                 tp
->t_flags 
|= TF_SLOWLINK
; 
3509         so 
= inp
->inp_socket
; 
3511         taop 
= rmx_taop(rt
->rt_rmx
); 
3513          * Offer == -1 means that we didn't receive SYN yet, 
3514          * use cached value in that case; 
3517                 offer 
= taop
->tao_mssopt
; 
3519          * Offer == 0 means that there was no MSS on the SYN segment, 
3520          * in this case we use tcp_mssdflt. 
3525                         isipv6 
? tcp_v6mssdflt 
: 
3530                  * Prevent DoS attack with too small MSS. Round up 
3531                  * to at least minmss. 
3533                 offer 
= max(offer
, tcp_minmss
); 
3535                  * Sanity check: make sure that maxopd will be large 
3536                  * enough to allow some data on segments even is the 
3537                  * all the option space is used (40bytes).  Otherwise 
3538                  * funny things may happen in tcp_output. 
3540                 offer 
= max(offer
, 64); 
3542         taop
->tao_mssopt 
= offer
; 
3545          * While we're here, check if there's an initial rtt 
3546          * or rttvar.  Convert from the route-table units 
3547          * to scaled multiples of the slow timeout timer. 
3549         if (tp
->t_srtt 
== 0 && (rtt 
= rt
->rt_rmx
.rmx_rtt
)) { 
3551                  * XXX the lock bit for RTT indicates that the value 
3552                  * is also a minimum value; this is subject to time. 
3554                 if (rt
->rt_rmx
.rmx_locks 
& RTV_RTT
) 
3555                         tp
->t_rttmin 
= rtt 
/ (RTM_RTTUNIT 
/ TCP_RETRANSHZ
); 
3557                         tp
->t_rttmin 
= isnetlocal 
? tcp_TCPTV_MIN 
: TCP_RETRANSHZ
; 
3558                 tp
->t_srtt 
= rtt 
/ (RTM_RTTUNIT 
/ (TCP_RETRANSHZ 
* TCP_RTT_SCALE
)); 
3559                 tcpstat
.tcps_usedrtt
++; 
3560                 if (rt
->rt_rmx
.rmx_rttvar
) { 
3561                         tp
->t_rttvar 
= rt
->rt_rmx
.rmx_rttvar 
/ 
3562                             (RTM_RTTUNIT 
/ (TCP_RETRANSHZ 
* TCP_RTTVAR_SCALE
)); 
3563                         tcpstat
.tcps_usedrttvar
++; 
3565                         /* default variation is +- 1 rtt */ 
3567                             tp
->t_srtt 
* TCP_RTTVAR_SCALE 
/ TCP_RTT_SCALE
; 
3569                 TCPT_RANGESET(tp
->t_rxtcur
, 
3570                               ((tp
->t_srtt 
>> 2) + tp
->t_rttvar
) >> 1, 
3571                               tp
->t_rttmin
, TCPTV_REXMTMAX
); 
3574                 tp
->t_rttmin 
= isnetlocal 
? tcp_TCPTV_MIN 
: TCP_RETRANSHZ
; 
3577         mss 
= (isipv6 
? tcp_maxmtu6(rt
) : tcp_maxmtu(rt
)); 
3579         mss 
= tcp_maxmtu(rt
); 
3583         if (rt
->rt_rmx
.rmx_mtu 
== 0) { 
3587                                 mss 
= min(mss
, tcp_v6mssdflt
); 
3591                         mss 
= min(mss
, tcp_mssdflt
); 
3594         mss 
= min(mss
, offer
); 
3596          * maxopd stores the maximum length of data AND options 
3597          * in a segment; maxseg is the amount of data in a normal 
3598          * segment.  We need to store this value (maxopd) apart 
3599          * from maxseg, because now every segment carries options 
3600          * and thus we normally have somewhat less data in segments. 
3605          * origoffer==-1 indicates, that no segments were received yet. 
3606          * In this case we just guess. 
3608         if ((tp
->t_flags 
& (TF_REQ_TSTMP
|TF_NOOPT
)) == TF_REQ_TSTMP 
&& 
3610              (tp
->t_flags 
& TF_RCVD_TSTMP
) == TF_RCVD_TSTMP
)) 
3611                 mss 
-= TCPOLEN_TSTAMP_APPA
; 
3615          * Calculate corrected value for sb_max; ensure to upgrade the 
3616          * numerator for large sb_max values else it will overflow. 
3618         sb_max_corrected 
= (sb_max 
* (u_int64_t
)MCLBYTES
) / (MSIZE 
+ MCLBYTES
); 
3621          * If there's a pipesize (ie loopback), change the socket 
3622          * buffer to that size only if it's bigger than the current 
3623          * sockbuf size.  Make the socket buffers an integral 
3624          * number of mss units; if the mss is larger than 
3625          * the socket buffer, decrease the mss. 
3628         bufsize 
= rt
->rt_rmx
.rmx_sendpipe
; 
3629         if (bufsize 
< so
->so_snd
.sb_hiwat
) 
3631                 bufsize 
= so
->so_snd
.sb_hiwat
; 
3635                 bufsize 
= (((bufsize 
+ (u_int64_t
)mss 
- 1) / (u_int64_t
)mss
) * (u_int64_t
)mss
); 
3636                 if (bufsize 
> sb_max_corrected
) 
3637                         bufsize 
= sb_max_corrected
; 
3638                 (void)sbreserve(&so
->so_snd
, bufsize
); 
3643         bufsize 
= rt
->rt_rmx
.rmx_recvpipe
; 
3644         if (bufsize 
< so
->so_rcv
.sb_hiwat
) 
3646                 bufsize 
= so
->so_rcv
.sb_hiwat
; 
3647         if (bufsize 
> mss
) { 
3648                 bufsize 
= (((bufsize 
+ (u_int64_t
)mss 
- 1) / (u_int64_t
)mss
) * (u_int64_t
)mss
); 
3649                 if (bufsize 
> sb_max_corrected
) 
3650                         bufsize 
= sb_max_corrected
; 
3651                 (void)sbreserve(&so
->so_rcv
, bufsize
); 
3655          * Set the slow-start flight size depending on whether this 
3656          * is a local network or not. 
3659                 tp
->snd_cwnd 
= mss 
* ss_fltsz_local
; 
3661                 tp
->snd_cwnd 
= mss 
* ss_fltsz
; 
3663         if (rt
->rt_rmx
.rmx_ssthresh
) { 
3665                  * There's some sort of gateway or interface 
3666                  * buffer limit on the path.  Use this to set 
3667                  * the slow start threshhold, but set the 
3668                  * threshold to no less than 2*mss. 
3670                 tp
->snd_ssthresh 
= max(2 * mss
, rt
->rt_rmx
.rmx_ssthresh
); 
3671                 tcpstat
.tcps_usedssthresh
++; 
3673                 tp
->snd_ssthresh 
= TCP_MAXWIN 
<< TCP_MAX_WINSHIFT
; 
3676         /* Route locked during lookup above */ 
3681  * Determine the MSS option to send on an outgoing SYN. 
3695         isipv6 
= ((tp
->t_inpcb
->inp_vflag 
& INP_IPV6
) != 0) ? 1 : 0; 
3696         min_protoh 
= isipv6 
? sizeof (struct ip6_hdr
) + sizeof (struct tcphdr
) 
3697                             : sizeof (struct tcpiphdr
); 
3699 #define min_protoh  (sizeof (struct tcpiphdr)) 
3704                 rt 
= tcp_rtlookup6(tp
->t_inpcb
); 
3707         rt 
= tcp_rtlookup(tp
->t_inpcb
, IFSCOPE_NONE
); 
3711                         isipv6 
? tcp_v6mssdflt 
: 
3716          * Slower link window correction: 
3717          * If a value is specificied for slowlink_wsize use it for PPP links 
3718          * believed to be on a serial modem (speed <128Kbps). Excludes 9600bps as 
3719          * it is the default value adversized by pseudo-devices over ppp. 
3721         if (rt
->rt_ifp
->if_type 
== IFT_PPP 
&& slowlink_wsize 
> 0 &&  
3722             rt
->rt_ifp
->if_baudrate 
> 9600 && rt
->rt_ifp
->if_baudrate 
<= 128000) { 
3723                 tp
->t_flags 
|= TF_SLOWLINK
; 
3727         mss 
= (isipv6 
? tcp_maxmtu6(rt
) : tcp_maxmtu(rt
)); 
3729         mss 
= tcp_maxmtu(rt
); 
3731         /* Route locked during lookup above */ 
3733         return (mss 
- min_protoh
); 
3737  * On a partial ack arrives, force the retransmission of the 
3738  * next unacknowledged segment.  Do not clear tp->t_dupacks. 
3739  * By setting snd_nxt to ti_ack, this forces retransmission timer to 
3743 tcp_newreno_partial_ack(tp
, th
) 
3747                 tcp_seq onxt 
= tp
->snd_nxt
; 
3748                 u_int32_t  ocwnd 
= tp
->snd_cwnd
; 
3749                 tp
->t_timer
[TCPT_REXMT
] = 0; 
3751                 tp
->snd_nxt 
= th
->th_ack
; 
3753                  * Set snd_cwnd to one segment beyond acknowledged offset 
3754                  * (tp->snd_una has not yet been updated when this function  
3757                 tp
->snd_cwnd 
= tp
->t_maxseg 
+ (th
->th_ack 
- tp
->snd_una
); 
3758                 tp
->t_flags 
|= TF_ACKNOW
; 
3759                 tp
->t_unacksegs 
= 0; 
3760                 (void) tcp_output(tp
); 
3761                 tp
->snd_cwnd 
= ocwnd
; 
3762                 if (SEQ_GT(onxt
, tp
->snd_nxt
)) 
3765                  * Partial window deflation.  Relies on fact that tp->snd_una 
3768                 if (tp
->snd_cwnd 
> th
->th_ack 
- tp
->snd_una
) 
3769                         tp
->snd_cwnd 
-= th
->th_ack 
- tp
->snd_una
; 
3772                 tp
->snd_cwnd 
+= tp
->t_maxseg
; 
3777  * Drop a random TCP connection that hasn't been serviced yet and 
3778  * is eligible for discard.  There is a one in qlen chance that 
3779  * we will return a null, saying that there are no dropable 
3780  * requests.  In this case, the protocol specific code should drop 
3781  * the new request.  This insures fairness. 
3783  * The listening TCP socket "head" must be locked 
3786 tcp_dropdropablreq(struct socket 
*head
) 
3788         struct socket 
*so
, *sonext
; 
3789         unsigned int i
, j
, qlen
; 
3791         static struct timeval old_runtime
; 
3792         static unsigned int cur_cnt
, old_cnt
; 
3794         struct inpcb 
*inp 
= NULL
; 
3797         if ((head
->so_options 
& SO_ACCEPTCONN
) == 0) 
3800         so 
= TAILQ_FIRST(&head
->so_incomp
); 
3805         if ((i 
= (tv
.tv_sec 
- old_runtime
.tv_sec
)) != 0) { 
3807                 old_cnt 
= cur_cnt 
/ i
; 
3812         qlen 
= head
->so_incqlen
; 
3813         if (++cur_cnt 
> qlen 
|| old_cnt 
> qlen
) { 
3814                 rnd 
= (314159 * rnd 
+ 66329) & 0xffff; 
3815                 j 
= ((qlen 
+ 1) * rnd
) >> 16; 
3818                         so 
= TAILQ_NEXT(so
, so_list
); 
3820         /* Find a connection that is not already closing (or being served) */ 
3822                 inp 
= (struct inpcb 
*)so
->so_pcb
; 
3824                 sonext 
= TAILQ_NEXT(so
, so_list
); 
3826                 if (in_pcb_checkstate(inp
, WNT_ACQUIRE
, 0) != WNT_STOPUSING
) { 
3827                         /* Avoid the issue of a socket being accepted by one input thread 
3828                          * and being dropped by another input thread. 
3829                          * If we can't get a hold on this mutex, then grab the next socket in line. 
3831                         if (lck_mtx_try_lock(inp
->inpcb_mtx
)) { 
3833                                 if ((so
->so_usecount 
== 2) && so
->so_state 
& SS_INCOMP
)  
3835                                 else {/* don't use if beeing accepted or used in any other way */ 
3836                                         in_pcb_checkstate(inp
, WNT_RELEASE
, 1); 
3837                                         tcp_unlock(so
, 1, 0); 
3841                                 /* do not try to lock the inp in in_pcb_checkstate 
3842                                  * because the lock is already held in some other thread. 
3843                                  * Only drop the inp_wntcnt reference. 
3845                                 in_pcb_checkstate(inp
, WNT_RELEASE
, 1); 
3854         TAILQ_REMOVE(&head
->so_incomp
, so
, so_list
); 
3855         tcp_unlock(head
, 0, 0); 
3857         /* Makes sure socket is still in the right state to be discarded */ 
3859         if (in_pcb_checkstate(inp
, WNT_RELEASE
, 1) == WNT_STOPUSING
) { 
3860                 tcp_unlock(so
, 1, 0); 
3861                 tcp_lock(head
, 0, 0); 
3865         if (so
->so_usecount 
!= 2 || !(so
->so_state 
& SS_INCOMP
)) { 
3866                 /* do not discard: that socket is beeing accepted */ 
3867                 tcp_unlock(so
, 1, 0); 
3868                 tcp_lock(head
, 0, 0); 
3875          * We do not want to lose track of the PCB right away in case we receive  
3876          * more segments from the peer 
3879         so
->so_flags 
|= SOF_OVERFLOW
; 
3880         tp
->t_state 
= TCPS_TIME_WAIT
; 
3881         (void) tcp_close(tp
); 
3882         tp
->t_unacksegs 
= 0; 
3883         tcpstat
.tcps_drops
++; 
3884         tcp_canceltimers(tp
); 
3885         add_to_time_wait(tp
); 
3887         tcp_unlock(so
, 1, 0); 
3888         tcp_lock(head
, 0, 0); 
3895 tcp_getstat SYSCTL_HANDLER_ARGS
 
3897 #pragma unused(oidp, arg1, arg2) 
3901         if (req
->oldptr 
== 0) { 
3902                 req
->oldlen
= (size_t)sizeof(struct tcpstat
); 
3905         error 
= SYSCTL_OUT(req
, &tcpstat
, MIN(sizeof (tcpstat
), req
->oldlen
)); 
3911 SYSCTL_PROC(_net_inet_tcp
, TCPCTL_STATS
, stats
, CTLFLAG_RD
, 0, 0, 
3912     tcp_getstat
, "S,tcpstat", "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); 
3915 sysctl_rexmtthresh SYSCTL_HANDLER_ARGS
 
3917 #pragma unused(arg1, arg2) 
3919         int error
, val 
= tcprexmtthresh
; 
3921         error 
= sysctl_handle_int(oidp
, &val
, 0, req
); 
3922         if (error 
|| !req
->newptr
) 
3926          * Constrain the number of duplicate ACKs 
3927          * to consider for TCP fast retransmit  
3931         if (val 
< 2 || val 
> 3) 
3934          tcprexmtthresh 
= val
; 
3939 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, rexmt_thresh
, CTLTYPE_INT
|CTLFLAG_RW
, 
3940         &tcprexmtthresh
, 0, &sysctl_rexmtthresh
, "I", "Duplicate ACK Threshold for Fast Retransmit");