]>
git.saurik.com Git - apple/xnu.git/blob - bsd/netinet/tcp_input.c
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_LICENSE_HEADER_START@ 
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved. 
   8  * This file contains Original Code and/or Modifications of Original Code 
   9  * as defined in and that are subject to the Apple Public Source License 
  10  * Version 2.0 (the 'License'). You may not use this file except in 
  11  * compliance with the License. Please obtain a copy of the License at 
  12  * http://www.opensource.apple.com/apsl/ and read it before using this 
  15  * The Original Code and all software distributed under the License are 
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  20  * Please see the License for the specific language governing rights and 
  21  * limitations under the License. 
  23  * @APPLE_LICENSE_HEADER_END@ 
  26  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 
  27  *      The Regents of the University of California.  All rights reserved. 
  29  * Redistribution and use in source and binary forms, with or without 
  30  * modification, are permitted provided that the following conditions 
  32  * 1. Redistributions of source code must retain the above copyright 
  33  *    notice, this list of conditions and the following disclaimer. 
  34  * 2. Redistributions in binary form must reproduce the above copyright 
  35  *    notice, this list of conditions and the following disclaimer in the 
  36  *    documentation and/or other materials provided with the distribution. 
  37  * 3. All advertising materials mentioning features or use of this software 
  38  *    must display the following acknowledgement: 
  39  *      This product includes software developed by the University of 
  40  *      California, Berkeley and its contributors. 
  41  * 4. Neither the name of the University nor the names of its contributors 
  42  *    may be used to endorse or promote products derived from this software 
  43  *    without specific prior written permission. 
  45  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  46  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  47  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  48  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  49  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  50  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  51  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  52  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  53  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  54  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  57  *      @(#)tcp_input.c 8.12 (Berkeley) 5/24/95 
  58  * $FreeBSD: src/sys/netinet/tcp_input.c,v 1.107.2.16 2001/08/22 00:59:12 silby Exp $ 
  62 #include <sys/param.h> 
  63 #include <sys/systm.h> 
  64 #include <sys/kernel.h> 
  65 #include <sys/sysctl.h> 
  66 #include <sys/malloc.h> 
  68 #include <sys/proc.h>           /* for proc0 declaration */ 
  69 #include <sys/protosw.h> 
  70 #include <sys/socket.h> 
  71 #include <sys/socketvar.h> 
  72 #include <sys/syslog.h> 
  74 #include <kern/cpu_number.h>    /* before tcp_seq.h, for tcp_random18() */ 
  77 #include <net/if_types.h> 
  78 #include <net/route.h> 
  80 #include <netinet/in.h> 
  81 #include <netinet/in_systm.h> 
  82 #include <netinet/ip.h> 
  83 #include <netinet/ip_icmp.h>    /* for ICMP_BANDLIM             */    
  84 #include <netinet/in_var.h> 
  85 #include <netinet/icmp_var.h>   /* for ICMP_BANDLIM     */ 
  86 #include <netinet/in_pcb.h> 
  87 #include <netinet/ip_var.h> 
  89 #include <netinet/ip6.h> 
  90 #include <netinet/icmp6.h> 
  91 #include <netinet6/nd6.h> 
  92 #include <netinet6/ip6_var.h> 
  93 #include <netinet6/in6_pcb.h> 
  95 #include <netinet/tcp.h> 
  96 #include <netinet/tcp_fsm.h> 
  97 #include <netinet/tcp_seq.h> 
  98 #include <netinet/tcp_timer.h> 
  99 #include <netinet/tcp_var.h> 
 101 #include <netinet6/tcp6_var.h> 
 103 #include <netinet/tcpip.h> 
 105 #include <netinet/tcp_debug.h> 
 106 u_char tcp_saveipgen
[40]; /* the size must be of max ip header, now IPv6 */ 
 107 struct tcphdr tcp_savetcp
; 
 108 #endif /* TCPDEBUG */ 
 111 #include <netinet6/ipsec.h> 
 113 #include <netinet6/ipsec6.h> 
 115 #include <netkey/key.h> 
 118 #include <sys/kdebug.h> 
 121 MALLOC_DEFINE(M_TSEGQ
, "tseg_qent", "TCP segment queue entry"); 
 124 #define DBG_LAYER_BEG           NETDBG_CODE(DBG_NETTCP, 0) 
 125 #define DBG_LAYER_END           NETDBG_CODE(DBG_NETTCP, 2) 
 126 #define DBG_FNC_TCP_INPUT       NETDBG_CODE(DBG_NETTCP, (3 << 8)) 
 127 #define DBG_FNC_TCP_NEWCONN     NETDBG_CODE(DBG_NETTCP, (7 << 8)) 
 129 static int      tcprexmtthresh 
= 3; 
 131 extern int apple_hwcksum_rx
; 
 134 extern int ipsec_bypass
; 
 137 struct  tcpstat tcpstat
; 
 138 SYSCTL_STRUCT(_net_inet_tcp
, TCPCTL_STATS
, stats
, CTLFLAG_RD
,  
 139     &tcpstat 
, tcpstat
, "TCP statistics (struct tcpstat, netinet/tcp_var.h)"); 
 141 static int log_in_vain 
= 0; 
 142 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, log_in_vain
, CTLFLAG_RW
,  
 143     &log_in_vain
, 0, "Log all incoming TCP connections"); 
 145 static int blackhole 
= 0; 
 146 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, blackhole
, CTLFLAG_RW
, 
 147         &blackhole
, 0, "Do not send RST when dropping refused connections"); 
 149 int tcp_delack_enabled 
= 1; 
 150 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, delayed_ack
, CTLFLAG_RW
,  
 151     &tcp_delack_enabled
, 0,  
 152     "Delay ACK to try and piggyback it onto a data packet"); 
 154 int tcp_lq_overflow 
= 1; 
 155 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tcp_lq_overflow
, CTLFLAG_RW
, 
 157     "Listen Queue Overflow"); 
 160 static int drop_synfin 
= 1; 
 161 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, drop_synfin
, CTLFLAG_RW
, 
 162     &drop_synfin
, 0, "Drop TCP packets with SYN+FIN set"); 
 165 __private_extern__ 
int slowlink_wsize 
= 8192; 
 166 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, slowlink_wsize
, CTLFLAG_RW
, 
 167         &slowlink_wsize
, 0, "Maximum advertised window size for slowlink"); 
 171 struct inpcbhead tcb
; 
 172 #define tcb6    tcb  /* for KAME src sync over BSD*'s */ 
 173 struct inpcbinfo tcbinfo
; 
 175 static void      tcp_dooptions 
__P((struct tcpcb 
*, 
 176             u_char 
*, int, struct tcphdr 
*, struct tcpopt 
*)); 
 177 static void      tcp_pulloutofband 
__P((struct socket 
*, 
 178             struct tcphdr 
*, struct mbuf 
*, int)); 
 179 static int       tcp_reass 
__P((struct tcpcb 
*, struct tcphdr 
*, int *, 
 181 static void      tcp_xmit_timer 
__P((struct tcpcb 
*, int)); 
 182 static int       tcp_newreno 
__P((struct tcpcb 
*, struct tcphdr 
*)); 
 184 /* Neighbor Discovery, Neighbor Unreachability Detection Upper layer hint. */ 
 186 #define ND6_HINT(tp) \ 
 188         if ((tp) && (tp)->t_inpcb && \ 
 189             ((tp)->t_inpcb->inp_vflag & INP_IPV6) != 0 && \ 
 190             (tp)->t_inpcb->in6p_route.ro_rt) \ 
 191                 nd6_nud_hint((tp)->t_inpcb->in6p_route.ro_rt, NULL, 0); \ 
 197 extern u_long   
*delack_bitmask
; 
 200  * Indicate whether this ack should be delayed.  We can delay the ack if 
 201  *      - delayed acks are enabled and 
 202  *      - there is no delayed ack timer in progress and 
 203  *      - our last ack wasn't a 0-sized window.  We never want to delay 
 204  *        the ack that opens up a 0-sized window. 
 206 #define DELAY_ACK(tp) \ 
 207         (tcp_delack_enabled && !callout_pending(tp->tt_delack) && \ 
 208         (tp->t_flags & TF_RXWIN0SENT) == 0) 
 212 tcp_reass(tp
, th
, tlenp
, m
) 
 213         register struct tcpcb 
*tp
; 
 214         register struct tcphdr 
*th
; 
 219         struct tseg_qent 
*p 
= NULL
; 
 220         struct tseg_qent 
*nq
; 
 221         struct tseg_qent 
*te
; 
 222         struct socket 
*so 
= tp
->t_inpcb
->inp_socket
; 
 226          * Call with th==0 after become established to 
 227          * force pre-ESTABLISHED data up to user socket. 
 232         /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */ 
 233         MALLOC(te
, struct tseg_qent 
*, sizeof (struct tseg_qent
), M_TSEGQ
, 
 236                 tcpstat
.tcps_rcvmemdrop
++; 
 242          * Find a segment which begins after this one does. 
 244         LIST_FOREACH(q
, &tp
->t_segq
, tqe_q
) { 
 245                 if (SEQ_GT(q
->tqe_th
->th_seq
, th
->th_seq
)) 
 251          * If there is a preceding segment, it may provide some of 
 252          * our data already.  If so, drop the data from the incoming 
 253          * segment.  If it provides all of our data, drop us. 
 257                 /* conversion to int (in i) handles seq wraparound */ 
 258                 i 
= p
->tqe_th
->th_seq 
+ p
->tqe_len 
- th
->th_seq
; 
 261                                 tcpstat
.tcps_rcvduppack
++; 
 262                                 tcpstat
.tcps_rcvdupbyte 
+= *tlenp
; 
 266                                  * Try to present any queued data 
 267                                  * at the left window edge to the user. 
 268                                  * This is needed after the 3-WHS 
 271                                 goto present
;   /* ??? */ 
 278         tcpstat
.tcps_rcvoopack
++; 
 279         tcpstat
.tcps_rcvoobyte 
+= *tlenp
; 
 282          * While we overlap succeeding segments trim them or, 
 283          * if they are completely covered, dequeue them. 
 286                 register int i 
= (th
->th_seq 
+ *tlenp
) - q
->tqe_th
->th_seq
; 
 289                 if (i 
< q
->tqe_len
) { 
 290                         q
->tqe_th
->th_seq 
+= i
; 
 296                 nq 
= LIST_NEXT(q
, tqe_q
); 
 297                 LIST_REMOVE(q
, tqe_q
); 
 303         /* Insert the new segment queue entry into place. */ 
 306         te
->tqe_len 
= *tlenp
; 
 309                 LIST_INSERT_HEAD(&tp
->t_segq
, te
, tqe_q
); 
 311                 LIST_INSERT_AFTER(p
, te
, tqe_q
); 
 316          * Present data to user, advancing rcv_nxt through 
 317          * completed sequence space. 
 319         if (!TCPS_HAVEESTABLISHED(tp
->t_state
)) 
 321         q 
= LIST_FIRST(&tp
->t_segq
); 
 322         if (!q 
|| q
->tqe_th
->th_seq 
!= tp
->rcv_nxt
) 
 325                 tp
->rcv_nxt 
+= q
->tqe_len
; 
 326                 flags 
= q
->tqe_th
->th_flags 
& TH_FIN
; 
 327                 nq 
= LIST_NEXT(q
, tqe_q
); 
 328                 LIST_REMOVE(q
, tqe_q
); 
 329                 if (so
->so_state 
& SS_CANTRCVMORE
) 
 332                         sbappend(&so
->so_rcv
, q
->tqe_m
); 
 335         } while (q 
&& q
->tqe_th
->th_seq 
== tp
->rcv_nxt
); 
 339         if ((tp
->t_inpcb
->inp_vflag 
& INP_IPV6
) != 0) { 
 341                 KERNEL_DEBUG(DBG_LAYER_BEG
, 
 342                      ((tp
->t_inpcb
->inp_fport 
<< 16) | tp
->t_inpcb
->inp_lport
), 
 343                      (((tp
->t_inpcb
->in6p_laddr
.s6_addr16
[0] & 0xffff) << 16) | 
 344                       (tp
->t_inpcb
->in6p_faddr
.s6_addr16
[0] & 0xffff)), 
 350                 KERNEL_DEBUG(DBG_LAYER_BEG
, 
 351                      ((tp
->t_inpcb
->inp_fport 
<< 16) | tp
->t_inpcb
->inp_lport
), 
 352                      (((tp
->t_inpcb
->inp_laddr
.s_addr 
& 0xffff) << 16) | 
 353                       (tp
->t_inpcb
->inp_faddr
.s_addr 
& 0xffff)), 
 363  * TCP input routine, follows pages 65-76 of the 
 364  * protocol specification dated September, 1981 very closely. 
 372         register struct mbuf 
*m 
= *mp
; 
 373         struct in6_ifaddr 
*ia6
; 
 375         IP6_EXTHDR_CHECK(m
, *offp
, sizeof(struct tcphdr
), IPPROTO_DONE
); 
 378          * draft-itojun-ipv6-tcp-to-anycast 
 379          * better place to put this in? 
 381         ia6 
= ip6_getdstifaddr(m
); 
 382         if (ia6 
&& (ia6
->ia6_flags 
& IN6_IFF_ANYCAST
)) {                 
 385                 ip6 
= mtod(m
, struct ip6_hdr 
*); 
 386                 icmp6_error(m
, ICMP6_DST_UNREACH
, ICMP6_DST_UNREACH_ADDR
, 
 387                             (caddr_t
)&ip6
->ip6_dst 
- (caddr_t
)ip6
); 
 401         register struct tcphdr 
*th
; 
 402         register struct ip 
*ip 
= NULL
; 
 403         register struct ipovly 
*ipov
; 
 404         register struct inpcb 
*inp
; 
 409         register struct tcpcb 
*tp 
= 0; 
 410         register int thflags
; 
 411         struct socket 
*so 
= 0; 
 412         int todrop
, acked
, ourfinisacked
, needoutput 
= 0; 
 413         struct in_addr laddr
; 
 415         struct in6_addr laddr6
; 
 420         struct tcpopt to
;               /* options in this segment */ 
 421         struct rmxp_tao 
*taop
;          /* pointer to our TAO cache entry */ 
 422         struct rmxp_tao tao_noncached
;  /* in case there's no cached entry */ 
 427         struct ip6_hdr 
*ip6 
= NULL
; 
 430         int rstreason
; /* For badport_bandlim accounting purposes */ 
 431         struct proc 
*proc0
=current_proc(); 
 433         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_START
,0,0,0,0,0); 
 436         isipv6 
= (mtod(m
, struct ip 
*)->ip_v 
== 6) ? 1 : 0; 
 438         bzero((char *)&to
, sizeof(to
)); 
 440         tcpstat
.tcps_rcvtotal
++; 
 446                 /* IP6_EXTHDR_CHECK() is already done at tcp6_input() */ 
 447                 ip6 
= mtod(m
, struct ip6_hdr 
*); 
 448                 tlen 
= sizeof(*ip6
) + ntohs(ip6
->ip6_plen
) - off0
; 
 449                 if (in6_cksum(m
, IPPROTO_TCP
, off0
, tlen
)) { 
 450                         tcpstat
.tcps_rcvbadsum
++; 
 453                 th 
= (struct tcphdr 
*)((caddr_t
)ip6 
+ off0
); 
 455                 KERNEL_DEBUG(DBG_LAYER_BEG
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
 456                      (((ip6
->ip6_src
.s6_addr16
[0]) << 16) | (ip6
->ip6_dst
.s6_addr16
[0])), 
 457                      th
->th_seq
, th
->th_ack
, th
->th_win
); 
 459                  * Be proactive about unspecified IPv6 address in source. 
 460                  * As we use all-zero to indicate unbounded/unconnected pcb, 
 461                  * unspecified IPv6 address can be used to confuse us. 
 463                  * Note that packets with unspecified IPv6 destination is 
 464                  * already dropped in ip6_input. 
 466                 if (IN6_IS_ADDR_UNSPECIFIED(&ip6
->ip6_src
)) { 
 474          * Get IP and TCP header together in first mbuf. 
 475          * Note: IP leaves IP header in first mbuf. 
 477         if (off0 
> sizeof (struct ip
)) { 
 478                 ip_stripoptions(m
, (struct mbuf 
*)0); 
 479                 off0 
= sizeof(struct ip
); 
 480                 if (m
->m_pkthdr
.csum_flags 
& CSUM_TCP_SUM16
) 
 481                         m
->m_pkthdr
.csum_flags 
= 0; /* invalidate hwcksuming */ 
 484         if (m
->m_len 
< sizeof (struct tcpiphdr
)) { 
 485                 if ((m 
= m_pullup(m
, sizeof (struct tcpiphdr
))) == 0) { 
 486                         tcpstat
.tcps_rcvshort
++; 
 490         ip 
= mtod(m
, struct ip 
*); 
 491         ipov 
= (struct ipovly 
*)ip
; 
 492         th 
= (struct tcphdr 
*)((caddr_t
)ip 
+ off0
); 
 495         KERNEL_DEBUG(DBG_LAYER_BEG
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
 496                      (((ip
->ip_src
.s_addr 
& 0xffff) << 16) | (ip
->ip_dst
.s_addr 
& 0xffff)), 
 497                      th
->th_seq
, th
->th_ack
, th
->th_win
); 
 499         if (m
->m_pkthdr
.csum_flags 
& CSUM_DATA_VALID
) { 
 500                 if (apple_hwcksum_rx 
&& (m
->m_pkthdr
.csum_flags 
& CSUM_TCP_SUM16
)) { 
 503                         *(uint32_t*)&b
[0] = *(uint32_t*)&ipov
->ih_x1
[0]; 
 504                         *(uint32_t*)&b
[4] = *(uint32_t*)&ipov
->ih_x1
[4]; 
 505                         *(uint8_t*)&b
[8] = *(uint8_t*)&ipov
->ih_x1
[8]; 
 507                         bzero(ipov
->ih_x1
, sizeof(ipov
->ih_x1
)); 
 508                         ipov
->ih_len 
= (u_short
)tlen
; 
 510                         pseudo 
= in_cksum(m
, sizeof (struct ip
)); 
 512                         *(uint32_t*)&ipov
->ih_x1
[0] = *(uint32_t*)&b
[0]; 
 513                         *(uint32_t*)&ipov
->ih_x1
[4] = *(uint32_t*)&b
[4]; 
 514                         *(uint8_t*)&ipov
->ih_x1
[8] = *(uint8_t*)&b
[8]; 
 516                         th
->th_sum 
= in_addword(pseudo
, (m
->m_pkthdr
.csum_data 
& 0xFFFF)); 
 518                         if (m
->m_pkthdr
.csum_flags 
& CSUM_PSEUDO_HDR
) 
 519                                 th
->th_sum 
= m
->m_pkthdr
.csum_data
; 
 521                                 th
->th_sum 
= in_pseudo(ip
->ip_src
.s_addr
, 
 522                                         ip
->ip_dst
.s_addr
, htonl(m
->m_pkthdr
.csum_data 
+ 
 523                                         ip
->ip_len 
+ IPPROTO_TCP
)); 
 525                 th
->th_sum 
^= 0xffff; 
 529                  * Checksum extended TCP header and data. 
 531                 *(uint32_t*)&b
[0] = *(uint32_t*)&ipov
->ih_x1
[0]; 
 532                 *(uint32_t*)&b
[4] = *(uint32_t*)&ipov
->ih_x1
[4]; 
 533                 *(uint8_t*)&b
[8] = *(uint8_t*)&ipov
->ih_x1
[8]; 
 535                 len 
= sizeof (struct ip
) + tlen
; 
 536                 bzero(ipov
->ih_x1
, sizeof(ipov
->ih_x1
)); 
 537                 ipov
->ih_len 
= (u_short
)tlen
; 
 539                 th
->th_sum 
= in_cksum(m
, len
); 
 541                 *(uint32_t*)&ipov
->ih_x1
[0] = *(uint32_t*)&b
[0]; 
 542                 *(uint32_t*)&ipov
->ih_x1
[4] = *(uint32_t*)&b
[4]; 
 543                 *(uint8_t*)&ipov
->ih_x1
[8] = *(uint8_t*)&b
[8]; 
 546                 tcpstat
.tcps_rcvbadsum
++; 
 550         /* Re-initialization for later version check */ 
 551         ip
->ip_v 
= IPVERSION
; 
 556          * Check that TCP offset makes sense, 
 557          * pull out TCP options and adjust length.              XXX 
 559         off 
= th
->th_off 
<< 2; 
 560         if (off 
< sizeof (struct tcphdr
) || off 
> tlen
) { 
 561                 tcpstat
.tcps_rcvbadoff
++; 
 564         tlen 
-= off
;    /* tlen is used instead of ti->ti_len */ 
 565         if (off 
> sizeof (struct tcphdr
)) { 
 568                         IP6_EXTHDR_CHECK(m
, off0
, off
, ); 
 569                         ip6 
= mtod(m
, struct ip6_hdr 
*); 
 570                         th 
= (struct tcphdr 
*)((caddr_t
)ip6 
+ off0
); 
 574                 if (m
->m_len 
< sizeof(struct ip
) + off
) { 
 575                         if ((m 
= m_pullup(m
, sizeof (struct ip
) + off
)) == 0) { 
 576                                 tcpstat
.tcps_rcvshort
++; 
 579                         ip 
= mtod(m
, struct ip 
*); 
 580                         ipov 
= (struct ipovly 
*)ip
; 
 581                         th 
= (struct tcphdr 
*)((caddr_t
)ip 
+ off0
); 
 584                 optlen 
= off 
- sizeof (struct tcphdr
); 
 585                 optp 
= (u_char 
*)(th 
+ 1); 
 587                  * Do quick retrieval of timestamp options ("options 
 588                  * prediction?").  If timestamp is the only option and it's 
 589                  * formatted as recommended in RFC 1323 appendix A, we 
 590                  * quickly get the values now and not bother calling 
 591                  * tcp_dooptions(), etc. 
 593                 if ((optlen 
== TCPOLEN_TSTAMP_APPA 
|| 
 594                      (optlen 
> TCPOLEN_TSTAMP_APPA 
&& 
 595                         optp
[TCPOLEN_TSTAMP_APPA
] == TCPOPT_EOL
)) && 
 596                      *(u_int32_t 
*)optp 
== htonl(TCPOPT_TSTAMP_HDR
) && 
 597                      (th
->th_flags 
& TH_SYN
) == 0) { 
 598                         to
.to_flag 
|= TOF_TS
; 
 599                         to
.to_tsval 
= ntohl(*(u_int32_t 
*)(optp 
+ 4)); 
 600                         to
.to_tsecr 
= ntohl(*(u_int32_t 
*)(optp 
+ 8)); 
 601                         optp 
= NULL
;    /* we've parsed the options */ 
 604         thflags 
= th
->th_flags
; 
 608          * If the drop_synfin option is enabled, drop all packets with 
 609          * both the SYN and FIN bits set. This prevents e.g. nmap from 
 610          * identifying the TCP/IP stack. 
 612          * This is incompatible with RFC1644 extensions (T/TCP). 
 614         if (drop_synfin 
&& (thflags 
& (TH_SYN
|TH_FIN
)) == (TH_SYN
|TH_FIN
)) 
 619          * Convert TCP protocol specific fields to host format. 
 627          * Delay droping TCP, IP headers, IPv6 ext headers, and TCP options, 
 628          * until after ip6_savecontrol() is called and before other functions 
 629          * which don't want those proto headers. 
 630          * Because ip6_savecontrol() is going to parse the mbuf to 
 631          * search for data to be passed up to user-land, it wants mbuf 
 632          * parameters to be unchanged. 
 634         drop_hdrlen 
= off0 
+ off
; 
 637          * Locate pcb for segment. 
 640 #if IPFIREWALL_FORWARD 
 641         if (ip_fw_fwd_addr 
!= NULL
 
 643             && isipv6 
== NULL 
/* IPv6 support is not yet */ 
 647                  * Diverted. Pretend to be the destination. 
 648                  * already got one like this?  
 650                 inp 
= in_pcblookup_hash(&tcbinfo
, ip
->ip_src
, th
->th_sport
, 
 651                         ip
->ip_dst
, th
->th_dport
, 0, m
->m_pkthdr
.rcvif
); 
 654                          * No, then it's new. Try find the ambushing socket 
 656                         if (!ip_fw_fwd_addr
->sin_port
) { 
 657                                 inp 
= in_pcblookup_hash(&tcbinfo
, ip
->ip_src
, 
 658                                     th
->th_sport
, ip_fw_fwd_addr
->sin_addr
, 
 659                                     th
->th_dport
, 1, m
->m_pkthdr
.rcvif
); 
 661                                 inp 
= in_pcblookup_hash(&tcbinfo
, 
 662                                     ip
->ip_src
, th
->th_sport
, 
 663                                     ip_fw_fwd_addr
->sin_addr
, 
 664                                     ntohs(ip_fw_fwd_addr
->sin_port
), 1, 
 668                 ip_fw_fwd_addr 
= NULL
; 
 670 #endif  /* IPFIREWALL_FORWARD */ 
 674                 inp 
= in6_pcblookup_hash(&tcbinfo
, &ip6
->ip6_src
, th
->th_sport
, 
 675                                          &ip6
->ip6_dst
, th
->th_dport
, 1, 
 679         inp 
= in_pcblookup_hash(&tcbinfo
, ip
->ip_src
, th
->th_sport
, 
 680             ip
->ip_dst
, th
->th_dport
, 1, m
->m_pkthdr
.rcvif
); 
 686                 if (ipsec_bypass 
== 0 && inp 
!= NULL 
&& ipsec6_in_reject_so(m
, inp
->inp_socket
)) { 
 687                         ipsec6stat
.in_polvio
++; 
 692         if (ipsec_bypass 
== 0 && inp 
!= NULL 
&& ipsec4_in_reject_so(m
, inp
->inp_socket
)) { 
 693                 ipsecstat
.in_polvio
++; 
 699          * If the state is CLOSED (i.e., TCB does not exist) then 
 700          * all data in the incoming segment is discarded. 
 701          * If the TCB exists but is in CLOSED state, it is embryonic, 
 702          * but should either do a listen or a connect soon. 
 707                         char dbuf
[INET6_ADDRSTRLEN
], sbuf
[INET6_ADDRSTRLEN
]; 
 709                         char dbuf
[4*sizeof "123"], sbuf
[4*sizeof "123"]; 
 714                                 strcpy(dbuf
, ip6_sprintf(&ip6
->ip6_dst
)); 
 715                                 strcpy(sbuf
, ip6_sprintf(&ip6
->ip6_src
)); 
 719                         strcpy(dbuf
, inet_ntoa(ip
->ip_dst
)); 
 720                         strcpy(sbuf
, inet_ntoa(ip
->ip_src
)); 
 722                         switch (log_in_vain
) { 
 726                                         "Connection attempt to TCP %s:%d from %s:%d\n", 
 727                                         dbuf
, ntohs(th
->th_dport
), 
 729                                         ntohs(th
->th_sport
)); 
 733                                 "Connection attempt to TCP %s:%d from %s:%d flags:0x%x\n", 
 734                                 dbuf
, ntohs(th
->th_dport
), sbuf
, 
 735                                 ntohs(th
->th_sport
), thflags
); 
 744                                 if (thflags 
& TH_SYN
) 
 753                 rstreason 
= BANDLIM_RST_CLOSEDPORT
; 
 758                 rstreason 
= BANDLIM_RST_CLOSEDPORT
; 
 761         if (tp
->t_state 
== TCPS_CLOSED
) 
 766          * Bogus state when listening port owned by SharedIP with loopback as the  
 767          * only configured interface: BlueBox does not filters loopback 
 769         if (tp
->t_state 
== TCP_NSTATES
) 
 773         /* Unscale the window into a 32-bit value. */ 
 774         if ((thflags 
& TH_SYN
) == 0) 
 775                 tiwin 
= th
->th_win 
<< tp
->snd_scale
; 
 779         so 
= inp
->inp_socket
; 
 780         if (so
->so_options 
& (SO_DEBUG
|SO_ACCEPTCONN
)) { 
 782                 if (so
->so_options 
& SO_DEBUG
) { 
 783                         ostate 
= tp
->t_state
; 
 786                                 bcopy((char *)ip6
, (char *)tcp_saveipgen
, 
 790                         bcopy((char *)ip
, (char *)tcp_saveipgen
, sizeof(*ip
)); 
 794                 if (so
->so_options 
& SO_ACCEPTCONN
) { 
 795                         register struct tcpcb 
*tp0 
= tp
; 
 801                         struct inpcb 
*oinp 
= sotoinpcb(so
); 
 803                         int ogencnt 
= so
->so_gencnt
; 
 807                          * Current IPsec implementation makes incorrect IPsec 
 808                          * cache if this check is done here. 
 809                          * So delay this until duplicated socket is created. 
 811                         if ((thflags 
& (TH_RST
|TH_ACK
|TH_SYN
)) != TH_SYN
) { 
 813                                  * Note: dropwithreset makes sure we don't 
 814                                  * send a RST in response to a RST. 
 816                                 if (thflags 
& TH_ACK
) { 
 817                                         tcpstat
.tcps_badsyn
++; 
 818                                         rstreason 
= BANDLIM_RST_OPENPORT
; 
 824                         KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN 
| DBG_FUNC_START
,0,0,0,0,0); 
 828                          * If deprecated address is forbidden, 
 829                          * we do not accept SYN to deprecated interface 
 830                          * address to prevent any new inbound connection from 
 831                          * getting established. 
 832                          * When we do not accept SYN, we send a TCP RST, 
 833                          * with deprecated source address (instead of dropping 
 834                          * it).  We compromise it as it is much better for peer 
 835                          * to send a RST, and RST will be the final packet 
 838                          * If we do not forbid deprecated addresses, we accept 
 839                          * the SYN packet.  RFC2462 does not suggest dropping 
 841                          * If we decipher RFC2462 5.5.4, it says like this: 
 842                          * 1. use of deprecated addr with existing 
 843                          *    communication is okay - "SHOULD continue to be 
 845                          * 2. use of it with new communication: 
 846                          *   (2a) "SHOULD NOT be used if alternate address 
 847                          *        with sufficient scope is available" 
 848                          *   (2b) nothing mentioned otherwise. 
 849                          * Here we fall into (2b) case as we have no choice in 
 850                          * our source address selection - we must obey the peer. 
 852                          * The wording in RFC2462 is confusing, and there are 
 853                          * multiple description text for deprecated address 
 854                          * handling - worse, they are not exactly the same. 
 855                          * I believe 5.5.4 is the best one, so we follow 5.5.4. 
 857                         if (isipv6 
&& !ip6_use_deprecated
) { 
 858                                 struct in6_ifaddr 
*ia6
; 
 860                                 if ((ia6 
= ip6_getdstifaddr(m
)) && 
 861                                     (ia6
->ia6_flags 
& IN6_IFF_DEPRECATED
)) { 
 863                                         rstreason 
= BANDLIM_RST_OPENPORT
; 
 869                         so2 
= sonewconn(so
, 0); 
 871                                 tcpstat
.tcps_listendrop
++; 
 872                                 so2 
= sodropablereq(so
); 
 875                                                 sototcpcb(so2
)->t_flags 
|=  
 877                                         tcp_drop(sototcpcb(so2
), ETIMEDOUT
); 
 878                                         so2 
= sonewconn(so
, 0); 
 884                          * Make sure listening socket did not get closed during socket allocation, 
 885                          * not only this is incorrect but it is know to cause panic 
 887                         if (so
->so_gencnt 
!= ogencnt
) 
 894                          * This is ugly, but .... 
 896                          * Mark socket as temporary until we're 
 897                          * committed to keeping it.  The code at 
 898                          * ``drop'' and ``dropwithreset'' check the 
 899                          * flag dropsocket to see if the temporary 
 900                          * socket created here should be discarded. 
 901                          * We mark the socket as discardable until 
 902                          * we're committed to it below in TCPS_LISTEN. 
 905                         inp 
= (struct inpcb 
*)so
->so_pcb
; 
 908                                 inp
->in6p_laddr 
= ip6
->ip6_dst
; 
 910                                 inp
->inp_vflag 
&= ~INP_IPV6
; 
 911                                 inp
->inp_vflag 
|= INP_IPV4
; 
 913                         inp
->inp_laddr 
= ip
->ip_dst
; 
 917                         inp
->inp_lport 
= th
->th_dport
; 
 918                         if (in_pcbinshash(inp
) != 0) { 
 920                                  * Undo the assignments above if we failed to 
 921                                  * put the PCB on the hash lists. 
 925                                         inp
->in6p_laddr 
= in6addr_any
; 
 928                                 inp
->inp_laddr
.s_addr 
= INADDR_ANY
; 
 934                          * To avoid creating incorrectly cached IPsec 
 935                          * association, this is need to be done here. 
 937                          * Subject: (KAME-snap 748) 
 938                          * From: Wayne Knowles <w.knowles@niwa.cri.nz> 
 939                          * ftp://ftp.kame.net/pub/mail-list/snap-users/748 
 941                         if ((thflags 
& (TH_RST
|TH_ACK
|TH_SYN
)) != TH_SYN
) { 
 943                                  * Note: dropwithreset makes sure we don't 
 944                                  * send a RST in response to a RST. 
 946                                 if (thflags 
& TH_ACK
) { 
 947                                         tcpstat
.tcps_badsyn
++; 
 948                                         rstreason 
= BANDLIM_RST_OPENPORT
; 
 957                                  * Inherit socket options from the listening 
 959                                  * Note that in6p_inputopts are not (even 
 960                                  * should not be) copied, since it stores 
 961                                  * previously received options and is used to 
 962                                  * detect if each new option is different than 
 963                                  * the previous one and hence should be passed 
 965                                  * If we copied in6p_inputopts, a user would 
 966                                  * not be able to receive options just after 
 967                                  * calling the accept system call. 
 970                                         oinp
->inp_flags 
& INP_CONTROLOPTS
; 
 971                                 if (oinp
->in6p_outputopts
) 
 972                                         inp
->in6p_outputopts 
= 
 973                                                 ip6_copypktopts(oinp
->in6p_outputopts
, 
 977                         inp
->inp_options 
= ip_srcroute(); 
 979                         /* copy old policy into new socket's */ 
 980                         if (sotoinpcb(oso
)->inp_sp
) 
 983                                 /* Is it a security hole here to silently fail to copy the policy? */ 
 984                                 if (inp
->inp_sp 
!= NULL
) 
 985                                         error 
= ipsec_init_policy(so
, &inp
->inp_sp
); 
 986                                 if (error 
!= 0 || ipsec_copy_policy(sotoinpcb(oso
)->inp_sp
, inp
->inp_sp
)) 
 987                                         printf("tcp_input: could not copy policy\n"); 
 991                         tp
->t_state 
= TCPS_LISTEN
; 
 992                         tp
->t_flags 
|= tp0
->t_flags 
& (TF_NOPUSH
|TF_NOOPT
|TF_NODELAY
); 
 994                         /* Compute proper scaling value from buffer space */ 
 995                         while (tp
->request_r_scale 
< TCP_MAX_WINSHIFT 
&& 
 996                            TCP_MAXWIN 
<< tp
->request_r_scale 
< 
 998                                 tp
->request_r_scale
++; 
1000                         KERNEL_DEBUG(DBG_FNC_TCP_NEWCONN 
| DBG_FUNC_END
,0,0,0,0,0); 
1005          * Segment received on connection. 
1006          * Reset idle time and keep-alive timer. 
1009         if (TCPS_HAVEESTABLISHED(tp
->t_state
)) 
1010                 tp
->t_timer
[TCPT_KEEP
] = TCP_KEEPIDLE(tp
); 
1013          * Process options if not in LISTEN state, 
1014          * else do it below (after getting remote address). 
1016         if (tp
->t_state 
!= TCPS_LISTEN 
&& optp
) 
1017                 tcp_dooptions(tp
, optp
, optlen
, th
, &to
); 
1020          * Header prediction: check for the two common cases 
1021          * of a uni-directional data xfer.  If the packet has 
1022          * no control flags, is in-sequence, the window didn't 
1023          * change and we're not retransmitting, it's a 
1024          * candidate.  If the length is zero and the ack moved 
1025          * forward, we're the sender side of the xfer.  Just 
1026          * free the data acked & wake any higher level process 
1027          * that was blocked waiting for space.  If the length 
1028          * is non-zero and the ack didn't move, we're the 
1029          * receiver side.  If we're getting packets in-order 
1030          * (the reassembly queue is empty), add the data to 
1031          * the socket buffer and note that we need a delayed ack. 
1032          * Make sure that the hidden state-flags are also off. 
1033          * Since we check for TCPS_ESTABLISHED above, it can only 
1036         if (tp
->t_state 
== TCPS_ESTABLISHED 
&& 
1037             (thflags 
& (TH_SYN
|TH_FIN
|TH_RST
|TH_URG
|TH_ACK
)) == TH_ACK 
&& 
1038             ((tp
->t_flags 
& (TF_NEEDSYN
|TF_NEEDFIN
)) == 0) && 
1039             ((to
.to_flag 
& TOF_TS
) == 0 || 
1040              TSTMP_GEQ(to
.to_tsval
, tp
->ts_recent
)) && 
1042              * Using the CC option is compulsory if once started: 
1043              *   the segment is OK if no T/TCP was negotiated or 
1044              *   if the segment has a CC option equal to CCrecv 
1046             ((tp
->t_flags 
& (TF_REQ_CC
|TF_RCVD_CC
)) != (TF_REQ_CC
|TF_RCVD_CC
) || 
1047              ((to
.to_flag 
& TOF_CC
) != 0 && to
.to_cc 
== tp
->cc_recv
)) && 
1048             th
->th_seq 
== tp
->rcv_nxt 
&& 
1049             tiwin 
&& tiwin 
== tp
->snd_wnd 
&& 
1050             tp
->snd_nxt 
== tp
->snd_max
) { 
1053                  * If last ACK falls within this segment's sequence numbers, 
1054                  * record the timestamp. 
1055                  * NOTE that the test is modified according to the latest 
1056                  * proposal of the tcplw@cray.com list (Braden 1993/04/26). 
1058                 if ((to
.to_flag 
& TOF_TS
) != 0 && 
1059                    SEQ_LEQ(th
->th_seq
, tp
->last_ack_sent
)) { 
1060                         tp
->ts_recent_age 
= tcp_now
; 
1061                         tp
->ts_recent 
= to
.to_tsval
; 
1065                         if (SEQ_GT(th
->th_ack
, tp
->snd_una
) && 
1066                             SEQ_LEQ(th
->th_ack
, tp
->snd_max
) && 
1067                             tp
->snd_cwnd 
>= tp
->snd_wnd 
&& 
1068                             tp
->t_dupacks 
< tcprexmtthresh
) { 
1070                                  * this is a pure ack for outstanding data. 
1072                                 ++tcpstat
.tcps_predack
; 
1074                                  * "bad retransmit" recovery 
1076                                 if (tp
->t_rxtshift 
== 1 && 
1077                                     tcp_now 
< tp
->t_badrxtwin
) {  
1078                                         tp
->snd_cwnd 
= tp
->snd_cwnd_prev
; 
1080                                             tp
->snd_ssthresh_prev
; 
1081                                         tp
->snd_nxt 
= tp
->snd_max
; 
1082                                         tp
->t_badrxtwin 
= 0; 
1084                                 if (((to
.to_flag 
& TOF_TS
) != 0) && (to
.to_tsecr 
!= 0)) /* Makes sure we already have a TS */ 
1086                                             tcp_now 
- to
.to_tsecr 
+ 1); 
1087                                 else if (tp
->t_rtttime 
&& 
1088                                             SEQ_GT(th
->th_ack
, tp
->t_rtseq
)) 
1089                                         tcp_xmit_timer(tp
, tp
->t_rtttime
); 
1090                                 acked 
= th
->th_ack 
- tp
->snd_una
; 
1091                                 tcpstat
.tcps_rcvackpack
++; 
1092                                 tcpstat
.tcps_rcvackbyte 
+= acked
; 
1093                                 sbdrop(&so
->so_snd
, acked
); 
1094                                 tp
->snd_una 
= th
->th_ack
; 
1096                                 ND6_HINT(tp
); /* some progress has been done */ 
1099                                  * If all outstanding data are acked, stop 
1100                                  * retransmit timer, otherwise restart timer 
1101                                  * using current (possibly backed-off) value. 
1102                                  * If process is waiting for space, 
1103                                  * wakeup/selwakeup/signal.  If data 
1104                                  * are ready to send, let tcp_output 
1105                                  * decide between more output or persist. 
1107                                 if (tp
->snd_una 
== tp
->snd_max
) 
1108                                         tp
->t_timer
[TCPT_REXMT
] = 0; 
1109                                 else if (tp
->t_timer
[TCPT_PERSIST
] == 0) 
1110                                         tp
->t_timer
[TCPT_REXMT
] = tp
->t_rxtcur
; 
1112                                 if (so
->so_snd
.sb_cc
) 
1113                                         (void) tcp_output(tp
); 
1115                                 KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
1118                 } else if (th
->th_ack 
== tp
->snd_una 
&& 
1119                     LIST_EMPTY(&tp
->t_segq
) && 
1120                     tlen 
<= sbspace(&so
->so_rcv
)) { 
1122                          * this is a pure, in-sequence data packet 
1123                          * with nothing on the reassembly queue and 
1124                          * we have enough buffer space to take it. 
1126                         ++tcpstat
.tcps_preddat
; 
1127                         tp
->rcv_nxt 
+= tlen
; 
1128                         tcpstat
.tcps_rcvpack
++; 
1129                         tcpstat
.tcps_rcvbyte 
+= tlen
; 
1130                         ND6_HINT(tp
);   /* some progress has been done */ 
1132                          * Add data to socket buffer. 
1134                         m_adj(m
, drop_hdrlen
);  /* delayed header drop */ 
1135                         sbappend(&so
->so_rcv
, m
); 
1138                                 KERNEL_DEBUG(DBG_LAYER_END
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
1139                                         (((ip6
->ip6_src
.s6_addr16
[0]) << 16) | (ip6
->ip6_dst
.s6_addr16
[0])), 
1140                                         th
->th_seq
, th
->th_ack
, th
->th_win
);  
1145                                 KERNEL_DEBUG(DBG_LAYER_END
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
1146                                         (((ip
->ip_src
.s_addr 
& 0xffff) << 16) | (ip
->ip_dst
.s_addr 
& 0xffff)), 
1147                                         th
->th_seq
, th
->th_ack
, th
->th_win
);  
1149                         if (tcp_delack_enabled
) { 
1150                             TCP_DELACK_BITSET(tp
->t_inpcb
->hash_element
);  
1151                             tp
->t_flags 
|= TF_DELACK
; 
1153                                 tp
->t_flags 
|= TF_ACKNOW
; 
1157                         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
1163          * Calculate amount of space in receive window, 
1164          * and then do TCP input processing. 
1165          * Receive window is amount of space in rcv queue, 
1166          * but not less than advertised window. 
1170         win 
= sbspace(&so
->so_rcv
); 
1173         else {  /* clip rcv window to 4K for modems */ 
1174                 if (tp
->t_flags 
& TF_SLOWLINK 
&& slowlink_wsize 
> 0) 
1175                         win 
= min(win
, slowlink_wsize
); 
1177         tp
->rcv_wnd 
= imax(win
, (int)(tp
->rcv_adv 
- tp
->rcv_nxt
)); 
1180         switch (tp
->t_state
) { 
1183          * If the state is LISTEN then ignore segment if it contains an RST. 
1184          * If the segment contains an ACK then it is bad and send a RST. 
1185          * If it does not contain a SYN then it is not interesting; drop it. 
1186          * If it is from this socket, drop it, it must be forged. 
1187          * Don't bother responding if the destination was a broadcast. 
1188          * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial 
1189          * tp->iss, and send a segment: 
1190          *     <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK> 
1191          * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss. 
1192          * Fill in remote peer address fields if not previously specified. 
1193          * Enter SYN_RECEIVED state, and process any other fields of this 
1194          * segment in this state. 
1197                 register struct sockaddr_in 
*sin
; 
1199                 register struct sockaddr_in6 
*sin6
; 
1202                 if (thflags 
& TH_RST
) 
1204                 if (thflags 
& TH_ACK
) { 
1205                         rstreason 
= BANDLIM_RST_OPENPORT
; 
1208                 if ((thflags 
& TH_SYN
) == 0) 
1210                 if (th
->th_dport 
== th
->th_sport
) { 
1213                                 if (IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
, 
1218                         if (ip
->ip_dst
.s_addr 
== ip
->ip_src
.s_addr
) 
1222                  * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN 
1223                  * in_broadcast() should never return true on a received 
1224                  * packet with M_BCAST not set. 
1226                  * Packets with a multicast source address should also 
1229                 if (m
->m_flags 
& (M_BCAST
|M_MCAST
)) 
1233                         if (IN6_IS_ADDR_MULTICAST(&ip6
->ip6_dst
) || 
1234                             IN6_IS_ADDR_MULTICAST(&ip6
->ip6_src
)) 
1238                 if (IN_MULTICAST(ntohl(ip
->ip_dst
.s_addr
)) || 
1239                     IN_MULTICAST(ntohl(ip
->ip_src
.s_addr
)) || 
1240                     ip
->ip_src
.s_addr 
== htonl(INADDR_BROADCAST
) || 
1241                     in_broadcast(ip
->ip_dst
, m
->m_pkthdr
.rcvif
)) 
1245                         MALLOC(sin6
, struct sockaddr_in6 
*, sizeof *sin6
, 
1246                                M_SONAME
, M_NOWAIT
); 
1249                         bzero(sin6
, sizeof(*sin6
)); 
1250                         sin6
->sin6_family 
= AF_INET6
; 
1251                         sin6
->sin6_len 
= sizeof(*sin6
); 
1252                         sin6
->sin6_addr 
= ip6
->ip6_src
; 
1253                         sin6
->sin6_port 
= th
->th_sport
; 
1254                         laddr6 
= inp
->in6p_laddr
; 
1255                         if (IN6_IS_ADDR_UNSPECIFIED(&inp
->in6p_laddr
)) 
1256                                 inp
->in6p_laddr 
= ip6
->ip6_dst
; 
1257                         if (in6_pcbconnect(inp
, (struct sockaddr 
*)sin6
, 
1259                                 inp
->in6p_laddr 
= laddr6
; 
1260                                 FREE(sin6
, M_SONAME
); 
1263                         FREE(sin6
, M_SONAME
); 
1267                         MALLOC(sin
, struct sockaddr_in 
*, sizeof *sin
, M_SONAME
, 
1271                         sin
->sin_family 
= AF_INET
; 
1272                         sin
->sin_len 
= sizeof(*sin
); 
1273                         sin
->sin_addr 
= ip
->ip_src
; 
1274                         sin
->sin_port 
= th
->th_sport
; 
1275                         bzero((caddr_t
)sin
->sin_zero
, sizeof(sin
->sin_zero
)); 
1276                         laddr 
= inp
->inp_laddr
; 
1277                         if (inp
->inp_laddr
.s_addr 
== INADDR_ANY
) 
1278                                 inp
->inp_laddr 
= ip
->ip_dst
; 
1279                         if (in_pcbconnect(inp
, (struct sockaddr 
*)sin
, proc0
)) { 
1280                                 inp
->inp_laddr 
= laddr
; 
1281                                 FREE(sin
, M_SONAME
); 
1284                         FREE(sin
, M_SONAME
); 
1286                 if ((taop 
= tcp_gettaocache(inp
)) == NULL
) { 
1287                         taop 
= &tao_noncached
; 
1288                         bzero(taop
, sizeof(*taop
)); 
1290                 tcp_dooptions(tp
, optp
, optlen
, th
, &to
); 
1294                         tp
->iss 
= tcp_new_isn(tp
); 
1296                 tp
->irs 
= th
->th_seq
; 
1297                 tcp_sendseqinit(tp
); 
1299                 tp
->snd_recover 
= tp
->snd_una
; 
1301                  * Initialization of the tcpcb for transaction; 
1302                  *   set SND.WND = SEG.WND, 
1303                  *   initialize CCsend and CCrecv. 
1305                 tp
->snd_wnd 
= tiwin
;    /* initial send-window */ 
1306                 tp
->cc_send 
= CC_INC(tcp_ccgen
); 
1307                 tp
->cc_recv 
= to
.to_cc
; 
1309                  * Perform TAO test on incoming CC (SEG.CC) option, if any. 
1310                  * - compare SEG.CC against cached CC from the same host, 
1312                  * - if SEG.CC > chached value, SYN must be new and is accepted 
1313                  *      immediately: save new CC in the cache, mark the socket 
1314                  *      connected, enter ESTABLISHED state, turn on flag to 
1315                  *      send a SYN in the next segment. 
1316                  *      A virtual advertised window is set in rcv_adv to 
1317                  *      initialize SWS prevention.  Then enter normal segment 
1318                  *      processing: drop SYN, process data and FIN. 
1319                  * - otherwise do a normal 3-way handshake. 
1321                 if ((to
.to_flag 
& TOF_CC
) != 0) { 
1322                     if (((tp
->t_flags 
& TF_NOPUSH
) != 0) && 
1323                         taop
->tao_cc 
!= 0 && CC_GT(to
.to_cc
, taop
->tao_cc
)) { 
1325                         taop
->tao_cc 
= to
.to_cc
; 
1327                         tp
->t_state 
= TCPS_ESTABLISHED
; 
1330                          * If there is a FIN, or if there is data and the 
1331                          * connection is local, then delay SYN,ACK(SYN) in 
1332                          * the hope of piggy-backing it on a response 
1333                          * segment.  Otherwise must send ACK now in case 
1334                          * the other side is slow starting. 
1336                         if (tcp_delack_enabled 
&& ((thflags 
& TH_FIN
) || 
1339                               (isipv6 
&& in6_localaddr(&inp
->in6p_faddr
)) 
1343                                in_localaddr(inp
->inp_faddr
) 
1348                                 TCP_DELACK_BITSET(tp
->t_inpcb
->hash_element
);  
1349                                 tp
->t_flags 
|= (TF_DELACK 
| TF_NEEDSYN
); 
1352                                 tp
->t_flags 
|= (TF_ACKNOW 
| TF_NEEDSYN
); 
1355                          * Limit the `virtual advertised window' to TCP_MAXWIN 
1356                          * here.  Even if we requested window scaling, it will 
1357                          * become effective only later when our SYN is acked. 
1359                         if (tp
->t_flags 
& TF_SLOWLINK 
&& slowlink_wsize 
> 0) /* clip window size for for slow link */ 
1360                                 tp
->rcv_adv 
+= min(tp
->rcv_wnd
, slowlink_wsize
); 
1362                                 tp
->rcv_adv 
+= min(tp
->rcv_wnd
, TCP_MAXWIN
); 
1363                         tcpstat
.tcps_connects
++; 
1365                         tp
->t_timer
[TCPT_KEEP
] = tcp_keepinit
; 
1366                         dropsocket 
= 0;         /* committed to socket */ 
1367                         tcpstat
.tcps_accepts
++; 
1370                 /* else do standard 3-way handshake */ 
1373                      * No CC option, but maybe CC.NEW: 
1374                      *   invalidate cached value. 
1379                  * TAO test failed or there was no CC option, 
1380                  *    do a standard 3-way handshake. 
1382                 tp
->t_flags 
|= TF_ACKNOW
; 
1383                 tp
->t_state 
= TCPS_SYN_RECEIVED
; 
1384                 tp
->t_timer
[TCPT_KEEP
] = tcp_keepinit
; 
1385                 dropsocket 
= 0;         /* committed to socket */ 
1386                 tcpstat
.tcps_accepts
++; 
1391          * If the state is SYN_RECEIVED: 
1392          *      if seg contains an ACK, but not for our SYN/ACK, send a RST. 
1394         case TCPS_SYN_RECEIVED
: 
1395                 if ((thflags 
& TH_ACK
) && 
1396                     (SEQ_LEQ(th
->th_ack
, tp
->snd_una
) || 
1397                      SEQ_GT(th
->th_ack
, tp
->snd_max
))) { 
1398                                 rstreason 
= BANDLIM_RST_OPENPORT
; 
1404          * If the state is SYN_SENT: 
1405          *      if seg contains an ACK, but not for our SYN, drop the input. 
1406          *      if seg contains a RST, then drop the connection. 
1407          *      if seg does not contain SYN, then drop it. 
1408          * Otherwise this is an acceptable SYN segment 
1409          *      initialize tp->rcv_nxt and tp->irs 
1410          *      if seg contains ack then advance tp->snd_una 
1411          *      if SYN has been acked change to ESTABLISHED else SYN_RCVD state 
1412          *      arrange for segment to be acked (eventually) 
1413          *      continue processing rest of data/controls, beginning with URG 
1416                 if ((taop 
= tcp_gettaocache(inp
)) == NULL
) { 
1417                         taop 
= &tao_noncached
; 
1418                         bzero(taop
, sizeof(*taop
)); 
1421                 if ((thflags 
& TH_ACK
) && 
1422                     (SEQ_LEQ(th
->th_ack
, tp
->iss
) || 
1423                      SEQ_GT(th
->th_ack
, tp
->snd_max
))) { 
1425                          * If we have a cached CCsent for the remote host, 
1426                          * hence we haven't just crashed and restarted, 
1427                          * do not send a RST.  This may be a retransmission 
1428                          * from the other side after our earlier ACK was lost. 
1429                          * Our new SYN, when it arrives, will serve as the 
1432                         if (taop
->tao_ccsent 
!= 0) 
1435                                 rstreason 
= BANDLIM_UNLIMITED
; 
1439                 if (thflags 
& TH_RST
) { 
1440                         if (thflags 
& TH_ACK
) { 
1441                                 tp 
= tcp_drop(tp
, ECONNREFUSED
); 
1442                                 postevent(so
, 0, EV_RESET
); 
1446                 if ((thflags 
& TH_SYN
) == 0) 
1448                 tp
->snd_wnd 
= th
->th_win
;       /* initial send window */ 
1449                 tp
->cc_recv 
= to
.to_cc
;         /* foreign CC */ 
1451                 tp
->irs 
= th
->th_seq
; 
1453                 if (thflags 
& TH_ACK
) { 
1455                          * Our SYN was acked.  If segment contains CC.ECHO 
1456                          * option, check it to make sure this segment really 
1457                          * matches our SYN.  If not, just drop it as old 
1458                          * duplicate, but send an RST if we're still playing 
1459                          * by the old rules.  If no CC.ECHO option, make sure 
1460                          * we don't get fooled into using T/TCP. 
1462                         if (to
.to_flag 
& TOF_CCECHO
) { 
1463                                 if (tp
->cc_send 
!= to
.to_ccecho
) { 
1464                                         if (taop
->tao_ccsent 
!= 0) 
1467                                                 rstreason 
= BANDLIM_UNLIMITED
; 
1472                                 tp
->t_flags 
&= ~TF_RCVD_CC
; 
1473                         tcpstat
.tcps_connects
++; 
1475                         /* Do window scaling on this connection? */ 
1476                         if ((tp
->t_flags 
& (TF_RCVD_SCALE
|TF_REQ_SCALE
)) == 
1477                                 (TF_RCVD_SCALE
|TF_REQ_SCALE
)) { 
1478                                 tp
->snd_scale 
= tp
->requested_s_scale
; 
1479                                 tp
->rcv_scale 
= tp
->request_r_scale
; 
1481                         /* Segment is acceptable, update cache if undefined. */ 
1482                         if (taop
->tao_ccsent 
== 0) 
1483                                 taop
->tao_ccsent 
= to
.to_ccecho
; 
1485                         tp
->rcv_adv 
+= tp
->rcv_wnd
; 
1486                         tp
->snd_una
++;          /* SYN is acked */ 
1488                          * If there's data, delay ACK; if there's also a FIN 
1489                          * ACKNOW will be turned on later. 
1491                         if (tcp_delack_enabled 
&& tlen 
!= 0) { 
1492                                 TCP_DELACK_BITSET(tp
->t_inpcb
->hash_element
);  
1493                                 tp
->t_flags 
|= TF_DELACK
; 
1496                                 tp
->t_flags 
|= TF_ACKNOW
; 
1498                          * Received <SYN,ACK> in SYN_SENT[*] state. 
1500                          *      SYN_SENT  --> ESTABLISHED 
1501                          *      SYN_SENT* --> FIN_WAIT_1 
1503                         if (tp
->t_flags 
& TF_NEEDFIN
) { 
1504                                 tp
->t_state 
= TCPS_FIN_WAIT_1
; 
1505                                 tp
->t_flags 
&= ~TF_NEEDFIN
; 
1508                                 tp
->t_state 
= TCPS_ESTABLISHED
; 
1509                                 tp
->t_timer
[TCPT_KEEP
] = TCP_KEEPIDLE(tp
); 
1513                  *  Received initial SYN in SYN-SENT[*] state => simul- 
1514                  *  taneous open.  If segment contains CC option and there is 
1515                  *  a cached CC, apply TAO test; if it succeeds, connection is 
1516                  *  half-synchronized.  Otherwise, do 3-way handshake: 
1517                  *        SYN-SENT -> SYN-RECEIVED 
1518                  *        SYN-SENT* -> SYN-RECEIVED* 
1519                  *  If there was no CC option, clear cached CC value. 
1521                         tp
->t_flags 
|= TF_ACKNOW
; 
1522                         tp
->t_timer
[TCPT_REXMT
] = 0; 
1523                         if (to
.to_flag 
& TOF_CC
) { 
1524                                 if (taop
->tao_cc 
!= 0 && 
1525                                     CC_GT(to
.to_cc
, taop
->tao_cc
)) { 
1527                                          * update cache and make transition: 
1528                                          *        SYN-SENT -> ESTABLISHED* 
1529                                          *        SYN-SENT* -> FIN-WAIT-1* 
1531                                         taop
->tao_cc 
= to
.to_cc
; 
1532                                         if (tp
->t_flags 
& TF_NEEDFIN
) { 
1533                                                 tp
->t_state 
= TCPS_FIN_WAIT_1
; 
1534                                                 tp
->t_flags 
&= ~TF_NEEDFIN
; 
1536                                                 tp
->t_state 
= TCPS_ESTABLISHED
; 
1537                                                 tp
->t_timer
[TCPT_KEEP
] = TCP_KEEPIDLE(tp
); 
1539                                         tp
->t_flags 
|= TF_NEEDSYN
; 
1541                                         tp
->t_state 
= TCPS_SYN_RECEIVED
; 
1543                                 /* CC.NEW or no option => invalidate cache */ 
1545                                 tp
->t_state 
= TCPS_SYN_RECEIVED
; 
1551                  * Advance th->th_seq to correspond to first data byte. 
1552                  * If data, trim to stay within window, 
1553                  * dropping FIN if necessary. 
1556                 if (tlen 
> tp
->rcv_wnd
) { 
1557                         todrop 
= tlen 
- tp
->rcv_wnd
; 
1561                         tcpstat
.tcps_rcvpackafterwin
++; 
1562                         tcpstat
.tcps_rcvbyteafterwin 
+= todrop
; 
1564                 tp
->snd_wl1 
= th
->th_seq 
- 1; 
1565                 tp
->rcv_up 
= th
->th_seq
; 
1567                  *  Client side of transaction: already sent SYN and data. 
1568                  *  If the remote host used T/TCP to validate the SYN, 
1569                  *  our data will be ACK'd; if so, enter normal data segment 
1570                  *  processing in the middle of step 5, ack processing. 
1571                  *  Otherwise, goto step 6. 
1573                 if (thflags 
& TH_ACK
) 
1577          * If the state is LAST_ACK or CLOSING or TIME_WAIT: 
1578          *      if segment contains a SYN and CC [not CC.NEW] option: 
1579          *              if state == TIME_WAIT and connection duration > MSL, 
1580          *                  drop packet and send RST; 
1582          *              if SEG.CC > CCrecv then is new SYN, and can implicitly 
1583          *                  ack the FIN (and data) in retransmission queue. 
1584          *                  Complete close and delete TCPCB.  Then reprocess 
1585          *                  segment, hoping to find new TCPCB in LISTEN state; 
1587          *              else must be old SYN; drop it. 
1588          *      else do normal processing. 
1592         case TCPS_TIME_WAIT
: 
1593                 if ((thflags 
& TH_SYN
) && 
1594                     (to
.to_flag 
& TOF_CC
) && tp
->cc_recv 
!= 0) { 
1595                         if (tp
->t_state 
== TCPS_TIME_WAIT 
&& 
1596                                         tp
->t_starttime 
> tcp_msl
) { 
1597                                 rstreason 
= BANDLIM_UNLIMITED
; 
1600                         if (CC_GT(to
.to_cc
, tp
->cc_recv
)) { 
1607                 break;  /* continue normal processing */ 
1609         /* Received a SYN while connection is already established. 
1610          * This is a "half open connection and other anomalies" described 
1611          * in RFC793 page 34, send an ACK so the remote reset the connection 
1612          * or recovers by adjusting its sequence numberering  
1614         case TCPS_ESTABLISHED
: 
1615                 if (thflags 
& TH_SYN
)   
1621          * States other than LISTEN or SYN_SENT. 
1622          * First check the RST flag and sequence number since reset segments 
1623          * are exempt from the timestamp and connection count tests.  This 
1624          * fixes a bug introduced by the Stevens, vol. 2, p. 960 bugfix 
1625          * below which allowed reset segments in half the sequence space 
1626          * to fall though and be processed (which gives forged reset 
1627          * segments with a random sequence number a 50 percent chance of 
1628          * killing a connection). 
1629          * Then check timestamp, if present. 
1630          * Then check the connection count, if present. 
1631          * Then check that at least some bytes of segment are within 
1632          * receive window.  If segment begins before rcv_nxt, 
1633          * drop leading data (and SYN); if nothing left, just ack. 
1636          * If the RST bit is set, check the sequence number to see 
1637          * if this is a valid reset segment. 
1639          *   In all states except SYN-SENT, all reset (RST) segments 
1640          *   are validated by checking their SEQ-fields.  A reset is 
1641          *   valid if its sequence number is in the window. 
1642          * Note: this does not take into account delayed ACKs, so 
1643          *   we should test against last_ack_sent instead of rcv_nxt. 
1644          *   The sequence number in the reset segment is normally an 
1645          *   echo of our outgoing acknowlegement numbers, but some hosts 
1646          *   send a reset with the sequence number at the rightmost edge 
1647          *   of our receive window, and we have to handle this case. 
1648          * If we have multiple segments in flight, the intial reset 
1649          * segment sequence numbers will be to the left of last_ack_sent, 
1650          * but they will eventually catch up. 
1651          * In any case, it never made sense to trim reset segments to 
1652          * fit the receive window since RFC 1122 says: 
1653          *   4.2.2.12  RST Segment: RFC-793 Section 3.4 
1655          *    A TCP SHOULD allow a received RST segment to include data. 
1658          *         It has been suggested that a RST segment could contain 
1659          *         ASCII text that encoded and explained the cause of the 
1660          *         RST.  No standard has yet been established for such 
1663          * If the reset segment passes the sequence number test examine 
1665          *    SYN_RECEIVED STATE: 
1666          *      If passive open, return to LISTEN state. 
1667          *      If active open, inform user that connection was refused. 
1668          *    ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES: 
1669          *      Inform user that connection was reset, and close tcb. 
1670          *    CLOSING, LAST_ACK STATES: 
1673          *      Drop the segment - see Stevens, vol. 2, p. 964 and 
1676         if (thflags 
& TH_RST
) { 
1677                 if (SEQ_GEQ(th
->th_seq
, tp
->last_ack_sent
) && 
1678                     SEQ_LT(th
->th_seq
, tp
->last_ack_sent 
+ tp
->rcv_wnd
)) { 
1679                         switch (tp
->t_state
) { 
1681                         case TCPS_SYN_RECEIVED
: 
1682                                 so
->so_error 
= ECONNREFUSED
; 
1685                         case TCPS_ESTABLISHED
: 
1686                         case TCPS_FIN_WAIT_1
: 
1687                         case TCPS_CLOSE_WAIT
: 
1691                         case TCPS_FIN_WAIT_2
: 
1692                                 so
->so_error 
= ECONNRESET
; 
1694                                 postevent(so
, 0, EV_RESET
); 
1695                                 tp
->t_state 
= TCPS_CLOSED
; 
1696                                 tcpstat
.tcps_drops
++; 
1705                         case TCPS_TIME_WAIT
: 
1713          * RFC 1323 PAWS: If we have a timestamp reply on this segment 
1714          * and it's less than ts_recent, drop it. 
1716         if ((to
.to_flag 
& TOF_TS
) != 0 && tp
->ts_recent 
&& 
1717             TSTMP_LT(to
.to_tsval
, tp
->ts_recent
)) { 
1719                 /* Check to see if ts_recent is over 24 days old.  */ 
1720                 if ((int)(tcp_now 
- tp
->ts_recent_age
) > TCP_PAWS_IDLE
) { 
1722                          * Invalidate ts_recent.  If this segment updates 
1723                          * ts_recent, the age will be reset later and ts_recent 
1724                          * will get a valid value.  If it does not, setting 
1725                          * ts_recent to zero will at least satisfy the 
1726                          * requirement that zero be placed in the timestamp 
1727                          * echo reply when ts_recent isn't valid.  The 
1728                          * age isn't reset until we get a valid ts_recent 
1729                          * because we don't want out-of-order segments to be 
1730                          * dropped when ts_recent is old. 
1734                         tcpstat
.tcps_rcvduppack
++; 
1735                         tcpstat
.tcps_rcvdupbyte 
+= tlen
; 
1736                         tcpstat
.tcps_pawsdrop
++; 
1743          *   If T/TCP was negotiated and the segment doesn't have CC, 
1744          *   or if its CC is wrong then drop the segment. 
1745          *   RST segments do not have to comply with this. 
1747         if ((tp
->t_flags 
& (TF_REQ_CC
|TF_RCVD_CC
)) == (TF_REQ_CC
|TF_RCVD_CC
) && 
1748             ((to
.to_flag 
& TOF_CC
) == 0 || tp
->cc_recv 
!= to
.to_cc
)) 
1752          * In the SYN-RECEIVED state, validate that the packet belongs to 
1753          * this connection before trimming the data to fit the receive 
1754          * window.  Check the sequence number versus IRS since we know 
1755          * the sequence numbers haven't wrapped.  This is a partial fix 
1756          * for the "LAND" DoS attack. 
1758         if (tp
->t_state 
== TCPS_SYN_RECEIVED 
&& SEQ_LT(th
->th_seq
, tp
->irs
)) { 
1759                 rstreason 
= BANDLIM_RST_OPENPORT
; 
1763         todrop 
= tp
->rcv_nxt 
- th
->th_seq
; 
1765                 if (thflags 
& TH_SYN
) { 
1775                  * Following if statement from Stevens, vol. 2, p. 960. 
1778                     || (todrop 
== tlen 
&& (thflags 
& TH_FIN
) == 0)) { 
1780                          * Any valid FIN must be to the left of the window. 
1781                          * At this point the FIN must be a duplicate or out 
1782                          * of sequence; drop it. 
1787                          * Send an ACK to resynchronize and drop any data. 
1788                          * But keep on processing for RST or ACK. 
1790                         tp
->t_flags 
|= TF_ACKNOW
; 
1792                         tcpstat
.tcps_rcvduppack
++; 
1793                         tcpstat
.tcps_rcvdupbyte 
+= todrop
; 
1795                         tcpstat
.tcps_rcvpartduppack
++; 
1796                         tcpstat
.tcps_rcvpartdupbyte 
+= todrop
; 
1798                 drop_hdrlen 
+= todrop
;  /* drop from the top afterwards */ 
1799                 th
->th_seq 
+= todrop
; 
1801                 if (th
->th_urp 
> todrop
) 
1802                         th
->th_urp 
-= todrop
; 
1810          * If new data are received on a connection after the 
1811          * user processes are gone, then RST the other end. 
1813         if ((so
->so_state 
& SS_NOFDREF
) && 
1814             tp
->t_state 
> TCPS_CLOSE_WAIT 
&& tlen
) { 
1816                 tcpstat
.tcps_rcvafterclose
++; 
1817                 rstreason 
= BANDLIM_UNLIMITED
; 
1822          * If segment ends after window, drop trailing data 
1823          * (and PUSH and FIN); if nothing left, just ACK. 
1825         todrop 
= (th
->th_seq
+tlen
) - (tp
->rcv_nxt
+tp
->rcv_wnd
); 
1827                 tcpstat
.tcps_rcvpackafterwin
++; 
1828                 if (todrop 
>= tlen
) { 
1829                         tcpstat
.tcps_rcvbyteafterwin 
+= tlen
; 
1831                          * If a new connection request is received 
1832                          * while in TIME_WAIT, drop the old connection 
1833                          * and start over if the sequence numbers 
1834                          * are above the previous ones. 
1836                         if (thflags 
& TH_SYN 
&& 
1837                             tp
->t_state 
== TCPS_TIME_WAIT 
&& 
1838                             SEQ_GT(th
->th_seq
, tp
->rcv_nxt
)) { 
1839                                 iss 
= tcp_new_isn(tp
); 
1844                          * If window is closed can only take segments at 
1845                          * window edge, and have to drop data and PUSH from 
1846                          * incoming segments.  Continue processing, but 
1847                          * remember to ack.  Otherwise, drop segment 
1850                         if (tp
->rcv_wnd 
== 0 && th
->th_seq 
== tp
->rcv_nxt
) { 
1851                                 tp
->t_flags 
|= TF_ACKNOW
; 
1852                                 tcpstat
.tcps_rcvwinprobe
++; 
1856                         tcpstat
.tcps_rcvbyteafterwin 
+= todrop
; 
1859                 thflags 
&= ~(TH_PUSH
|TH_FIN
); 
1863          * If last ACK falls within this segment's sequence numbers, 
1864          * record its timestamp. 
1865          * NOTE that the test is modified according to the latest 
1866          * proposal of the tcplw@cray.com list (Braden 1993/04/26). 
1868         if ((to
.to_flag 
& TOF_TS
) != 0 && 
1869             SEQ_LEQ(th
->th_seq
, tp
->last_ack_sent
)) { 
1870                 tp
->ts_recent_age 
= tcp_now
; 
1871                 tp
->ts_recent 
= to
.to_tsval
; 
1875          * If a SYN is in the window, then this is an 
1876          * error and we send an RST and drop the connection. 
1878         if (thflags 
& TH_SYN
) { 
1879                 tp 
= tcp_drop(tp
, ECONNRESET
); 
1880                 rstreason 
= BANDLIM_UNLIMITED
; 
1881                 postevent(so
, 0, EV_RESET
); 
1886          * If the ACK bit is off:  if in SYN-RECEIVED state or SENDSYN 
1887          * flag is on (half-synchronized state), then queue data for 
1888          * later processing; else drop segment and return. 
1890         if ((thflags 
& TH_ACK
) == 0) { 
1891                 if (tp
->t_state 
== TCPS_SYN_RECEIVED 
|| 
1892                     (tp
->t_flags 
& TF_NEEDSYN
)) 
1901         switch (tp
->t_state
) { 
1904          * In SYN_RECEIVED state, the ack ACKs our SYN, so enter 
1905          * ESTABLISHED state and continue processing. 
1906          * The ACK was checked above. 
1908         case TCPS_SYN_RECEIVED
: 
1910                 tcpstat
.tcps_connects
++; 
1913                 /* Do window scaling? */ 
1914                 if ((tp
->t_flags 
& (TF_RCVD_SCALE
|TF_REQ_SCALE
)) == 
1915                         (TF_RCVD_SCALE
|TF_REQ_SCALE
)) { 
1916                         tp
->snd_scale 
= tp
->requested_s_scale
; 
1917                         tp
->rcv_scale 
= tp
->request_r_scale
; 
1920                  * Upon successful completion of 3-way handshake, 
1921                  * update cache.CC if it was undefined, pass any queued 
1922                  * data to the user, and advance state appropriately. 
1924                 if ((taop 
= tcp_gettaocache(inp
)) != NULL 
&& 
1926                         taop
->tao_cc 
= tp
->cc_recv
; 
1930                  *      SYN-RECEIVED  -> ESTABLISHED 
1931                  *      SYN-RECEIVED* -> FIN-WAIT-1 
1933                 if (tp
->t_flags 
& TF_NEEDFIN
) { 
1934                         tp
->t_state 
= TCPS_FIN_WAIT_1
; 
1935                         tp
->t_flags 
&= ~TF_NEEDFIN
; 
1937                         tp
->t_state 
= TCPS_ESTABLISHED
; 
1938                         tp
->t_timer
[TCPT_KEEP
] = TCP_KEEPIDLE(tp
); 
1941                  * If segment contains data or ACK, will call tcp_reass() 
1942                  * later; if not, do so now to pass queued data to user. 
1944                 if (tlen 
== 0 && (thflags 
& TH_FIN
) == 0) 
1945                         (void) tcp_reass(tp
, (struct tcphdr 
*)0, 0, 
1947                 tp
->snd_wl1 
= th
->th_seq 
- 1; 
1951          * In ESTABLISHED state: drop duplicate ACKs; ACK out of range 
1952          * ACKs.  If the ack is in the range 
1953          *      tp->snd_una < th->th_ack <= tp->snd_max 
1954          * then advance tp->snd_una to th->th_ack and drop 
1955          * data from the retransmission queue.  If this ACK reflects 
1956          * more up to date window information we update our window information. 
1958         case TCPS_ESTABLISHED
: 
1959         case TCPS_FIN_WAIT_1
: 
1960         case TCPS_FIN_WAIT_2
: 
1961         case TCPS_CLOSE_WAIT
: 
1964         case TCPS_TIME_WAIT
: 
1966                 if (SEQ_LEQ(th
->th_ack
, tp
->snd_una
)) { 
1967                         if (tlen 
== 0 && tiwin 
== tp
->snd_wnd
) { 
1968                                 tcpstat
.tcps_rcvdupack
++; 
1970                                  * If we have outstanding data (other than 
1971                                  * a window probe), this is a completely 
1972                                  * duplicate ack (ie, window info didn't 
1973                                  * change), the ack is the biggest we've 
1974                                  * seen and we've seen exactly our rexmt 
1975                                  * threshhold of them, assume a packet 
1976                                  * has been dropped and retransmit it. 
1977                                  * Kludge snd_nxt & the congestion 
1978                                  * window so we send only this one 
1981                                  * We know we're losing at the current 
1982                                  * window size so do congestion avoidance 
1983                                  * (set ssthresh to half the current window 
1984                                  * and pull our congestion window back to 
1985                                  * the new ssthresh). 
1987                                  * Dup acks mean that packets have left the 
1988                                  * network (they're now cached at the receiver) 
1989                                  * so bump cwnd by the amount in the receiver 
1990                                  * to keep a constant cwnd packets in the 
1993                                 if (tp
->t_timer
[TCPT_REXMT
] == 0 || 
1994                                     th
->th_ack 
!= tp
->snd_una
) 
1996                                 else if (++tp
->t_dupacks 
== tcprexmtthresh
) { 
1997                                         tcp_seq onxt 
= tp
->snd_nxt
; 
1999                                             min(tp
->snd_wnd
, tp
->snd_cwnd
) / 2 / 
2001                                         if (tcp_do_newreno 
&& SEQ_LT(th
->th_ack
, 
2003                                                 /* False retransmit, should not 
2006                                                 tp
->snd_cwnd 
+= tp
->t_maxseg
; 
2008                                                 (void) tcp_output(tp
); 
2013                                         tp
->snd_ssthresh 
= win 
* tp
->t_maxseg
; 
2014                                         tp
->snd_recover 
= tp
->snd_max
; 
2015                                         tp
->t_timer
[TCPT_REXMT
] = 0; 
2017                                         tp
->snd_nxt 
= th
->th_ack
; 
2018                                         tp
->snd_cwnd 
= tp
->t_maxseg
; 
2019                                         (void) tcp_output(tp
); 
2020                                         tp
->snd_cwnd 
= tp
->snd_ssthresh 
+ 
2021                                                tp
->t_maxseg 
* tp
->t_dupacks
; 
2022                                         if (SEQ_GT(onxt
, tp
->snd_nxt
)) 
2025                                 } else if (tp
->t_dupacks 
> tcprexmtthresh
) { 
2026                                         tp
->snd_cwnd 
+= tp
->t_maxseg
; 
2027                                         (void) tcp_output(tp
); 
2035                  * If the congestion window was inflated to account 
2036                  * for the other side's cached packets, retract it. 
2038                 if (tcp_do_newreno 
== 0) { 
2039                         if (tp
->t_dupacks 
>= tcprexmtthresh 
&& 
2040                                 tp
->snd_cwnd 
> tp
->snd_ssthresh
) 
2041                                 tp
->snd_cwnd 
= tp
->snd_ssthresh
; 
2043                 } else if (tp
->t_dupacks 
>= tcprexmtthresh 
&& 
2044                     !tcp_newreno(tp
, th
)) { 
2046                          * Window inflation should have left us with approx. 
2047                          * snd_ssthresh outstanding data.  But in case we 
2048                          * would be inclined to send a burst, better to do 
2049                          * it via the slow start mechanism. 
2051                         if (SEQ_GT(th
->th_ack 
+ tp
->snd_ssthresh
, tp
->snd_max
)) 
2053                                     tp
->snd_max 
- th
->th_ack 
+ tp
->t_maxseg
; 
2055                                 tp
->snd_cwnd 
= tp
->snd_ssthresh
; 
2059                 if (tp
->t_dupacks 
< tcprexmtthresh
) 
2062                 if (SEQ_GT(th
->th_ack
, tp
->snd_max
)) { 
2063                         tcpstat
.tcps_rcvacktoomuch
++; 
2067                  *  If we reach this point, ACK is not a duplicate, 
2068                  *     i.e., it ACKs something we sent. 
2070                 if (tp
->t_flags 
& TF_NEEDSYN
) { 
2072                          * T/TCP: Connection was half-synchronized, and our 
2073                          * SYN has been ACK'd (so connection is now fully 
2074                          * synchronized).  Go to non-starred state, 
2075                          * increment snd_una for ACK of SYN, and check if 
2076                          * we can do window scaling. 
2078                         tp
->t_flags 
&= ~TF_NEEDSYN
; 
2080                         /* Do window scaling? */ 
2081                         if ((tp
->t_flags 
& (TF_RCVD_SCALE
|TF_REQ_SCALE
)) == 
2082                                 (TF_RCVD_SCALE
|TF_REQ_SCALE
)) { 
2083                                 tp
->snd_scale 
= tp
->requested_s_scale
; 
2084                                 tp
->rcv_scale 
= tp
->request_r_scale
; 
2089                 acked 
= th
->th_ack 
- tp
->snd_una
; 
2090                 tcpstat
.tcps_rcvackpack
++; 
2091                 tcpstat
.tcps_rcvackbyte 
+= acked
; 
2094                  * If we just performed our first retransmit, and the ACK 
2095                  * arrives within our recovery window, then it was a mistake 
2096                  * to do the retransmit in the first place.  Recover our 
2097                  * original cwnd and ssthresh, and proceed to transmit where 
2100                 if (tp
->t_rxtshift 
== 1 && tcp_now 
< tp
->t_badrxtwin
) { 
2101                         tp
->snd_cwnd 
= tp
->snd_cwnd_prev
; 
2102                         tp
->snd_ssthresh 
= tp
->snd_ssthresh_prev
; 
2103                         tp
->snd_nxt 
= tp
->snd_max
; 
2104                         tp
->t_badrxtwin 
= 0;    /* XXX probably not required */  
2108                  * If we have a timestamp reply, update smoothed 
2109                  * round trip time.  If no timestamp is present but 
2110                  * transmit timer is running and timed sequence 
2111                  * number was acked, update smoothed round trip time. 
2112                  * Since we now have an rtt measurement, cancel the 
2113                  * timer backoff (cf., Phil Karn's retransmit alg.). 
2114                  * Recompute the initial retransmit timer. 
2115                  * Also makes sure we have a valid time stamp in hand 
2117                 if (((to
.to_flag 
& TOF_TS
) != 0) && (to
.to_tsecr 
!= 0)) 
2118                         tcp_xmit_timer(tp
, tcp_now 
- to
.to_tsecr 
+ 1); 
2119                 else if (tp
->t_rtttime 
&& SEQ_GT(th
->th_ack
, tp
->t_rtseq
)) 
2120                         tcp_xmit_timer(tp
, tp
->t_rtttime
); 
2123                  * If all outstanding data is acked, stop retransmit 
2124                  * timer and remember to restart (more output or persist). 
2125                  * If there is more data to be acked, restart retransmit 
2126                  * timer, using current (possibly backed-off) value. 
2128                 if (th
->th_ack 
== tp
->snd_max
) { 
2129                         tp
->t_timer
[TCPT_REXMT
] = 0; 
2131                 } else if (tp
->t_timer
[TCPT_PERSIST
] == 0) 
2132                         tp
->t_timer
[TCPT_REXMT
] = tp
->t_rxtcur
; 
2135                  * If no data (only SYN) was ACK'd, 
2136                  *    skip rest of ACK processing. 
2142                  * When new data is acked, open the congestion window. 
2143                  * If the window gives us less than ssthresh packets 
2144                  * in flight, open exponentially (maxseg per packet). 
2145                  * Otherwise open linearly: maxseg per window 
2146                  * (maxseg^2 / cwnd per packet). 
2149                 register u_int cw 
= tp
->snd_cwnd
; 
2150                 register u_int incr 
= tp
->t_maxseg
; 
2152                 if (cw 
> tp
->snd_ssthresh
) 
2153                         incr 
= incr 
* incr 
/ cw
; 
2155                  * If t_dupacks != 0 here, it indicates that we are still 
2156                  * in NewReno fast recovery mode, so we leave the congestion 
2159                 if (tcp_do_newreno 
== 0 || tp
->t_dupacks 
== 0) 
2160                         tp
->snd_cwnd 
= min(cw 
+ incr
,TCP_MAXWIN
<<tp
->snd_scale
); 
2162                 if (acked 
> so
->so_snd
.sb_cc
) { 
2163                         tp
->snd_wnd 
-= so
->so_snd
.sb_cc
; 
2164                         sbdrop(&so
->so_snd
, (int)so
->so_snd
.sb_cc
); 
2167                         sbdrop(&so
->so_snd
, acked
); 
2168                         tp
->snd_wnd 
-= acked
; 
2171                 tp
->snd_una 
= th
->th_ack
; 
2172                 if (SEQ_LT(tp
->snd_nxt
, tp
->snd_una
)) 
2173                         tp
->snd_nxt 
= tp
->snd_una
; 
2176                 switch (tp
->t_state
) { 
2179                  * In FIN_WAIT_1 STATE in addition to the processing 
2180                  * for the ESTABLISHED state if our FIN is now acknowledged 
2181                  * then enter FIN_WAIT_2. 
2183                 case TCPS_FIN_WAIT_1
: 
2184                         if (ourfinisacked
) { 
2186                                  * If we can't receive any more 
2187                                  * data, then closing user can proceed. 
2188                                  * Starting the timer is contrary to the 
2189                                  * specification, but if we don't get a FIN 
2190                                  * we'll hang forever. 
2192                                 if (so
->so_state 
& SS_CANTRCVMORE
) { 
2193                                         soisdisconnected(so
); 
2194                                         tp
->t_timer
[TCPT_2MSL
] = tcp_maxidle
; 
2196                                 add_to_time_wait(tp
); 
2197                                 tp
->t_state 
= TCPS_FIN_WAIT_2
; 
2202                  * In CLOSING STATE in addition to the processing for 
2203                  * the ESTABLISHED state if the ACK acknowledges our FIN 
2204                  * then enter the TIME-WAIT state, otherwise ignore 
2208                         if (ourfinisacked
) { 
2209                                 tp
->t_state 
= TCPS_TIME_WAIT
; 
2210                                 tcp_canceltimers(tp
); 
2211                                 /* Shorten TIME_WAIT [RFC-1644, p.28] */ 
2212                                 if (tp
->cc_recv 
!= 0 && 
2213                                     tp
->t_starttime 
< tcp_msl
) 
2214                                         tp
->t_timer
[TCPT_2MSL
] = 
2215                                             tp
->t_rxtcur 
* TCPTV_TWTRUNC
; 
2217                                         tp
->t_timer
[TCPT_2MSL
] = 2 * tcp_msl
; 
2218                                 add_to_time_wait(tp
); 
2219                                 soisdisconnected(so
); 
2224                  * In LAST_ACK, we may still be waiting for data to drain 
2225                  * and/or to be acked, as well as for the ack of our FIN. 
2226                  * If our FIN is now acknowledged, delete the TCB, 
2227                  * enter the closed state and return. 
2230                         if (ourfinisacked
) { 
2237                  * In TIME_WAIT state the only thing that should arrive 
2238                  * is a retransmission of the remote FIN.  Acknowledge 
2239                  * it and restart the finack timer. 
2241                 case TCPS_TIME_WAIT
: 
2242                         tp
->t_timer
[TCPT_2MSL
] = 2 * tcp_msl
; 
2243                         add_to_time_wait(tp
); 
2250          * Update window information. 
2251          * Don't look at window if no ACK: TAC's send garbage on first SYN. 
2253         if ((thflags 
& TH_ACK
) && 
2254             (SEQ_LT(tp
->snd_wl1
, th
->th_seq
) || 
2255             (tp
->snd_wl1 
== th
->th_seq 
&& (SEQ_LT(tp
->snd_wl2
, th
->th_ack
) || 
2256              (tp
->snd_wl2 
== th
->th_ack 
&& tiwin 
> tp
->snd_wnd
))))) { 
2257                 /* keep track of pure window updates */ 
2259                     tp
->snd_wl2 
== th
->th_ack 
&& tiwin 
> tp
->snd_wnd
) 
2260                         tcpstat
.tcps_rcvwinupd
++; 
2261                 tp
->snd_wnd 
= tiwin
; 
2262                 tp
->snd_wl1 
= th
->th_seq
; 
2263                 tp
->snd_wl2 
= th
->th_ack
; 
2264                 if (tp
->snd_wnd 
> tp
->max_sndwnd
) 
2265                         tp
->max_sndwnd 
= tp
->snd_wnd
; 
2270          * Process segments with URG. 
2272         if ((thflags 
& TH_URG
) && th
->th_urp 
&& 
2273             TCPS_HAVERCVDFIN(tp
->t_state
) == 0) { 
2275                  * This is a kludge, but if we receive and accept 
2276                  * random urgent pointers, we'll crash in 
2277                  * soreceive.  It's hard to imagine someone 
2278                  * actually wanting to send this much urgent data. 
2280                 if (th
->th_urp 
+ so
->so_rcv
.sb_cc 
> sb_max
) { 
2281                         th
->th_urp 
= 0;                 /* XXX */ 
2282                         thflags 
&= ~TH_URG
;             /* XXX */ 
2283                         goto dodata
;                    /* XXX */ 
2286                  * If this segment advances the known urgent pointer, 
2287                  * then mark the data stream.  This should not happen 
2288                  * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since 
2289                  * a FIN has been received from the remote side. 
2290                  * In these states we ignore the URG. 
2292                  * According to RFC961 (Assigned Protocols), 
2293                  * the urgent pointer points to the last octet 
2294                  * of urgent data.  We continue, however, 
2295                  * to consider it to indicate the first octet 
2296                  * of data past the urgent section as the original 
2297                  * spec states (in one of two places). 
2299                 if (SEQ_GT(th
->th_seq
+th
->th_urp
, tp
->rcv_up
)) { 
2300                         tp
->rcv_up 
= th
->th_seq 
+ th
->th_urp
; 
2301                         so
->so_oobmark 
= so
->so_rcv
.sb_cc 
+ 
2302                             (tp
->rcv_up 
- tp
->rcv_nxt
) - 1; 
2303                         if (so
->so_oobmark 
== 0) { 
2304                                 so
->so_state 
|= SS_RCVATMARK
; 
2305                                 postevent(so
, 0, EV_OOB
); 
2308                         tp
->t_oobflags 
&= ~(TCPOOB_HAVEDATA 
| TCPOOB_HADDATA
); 
2311                  * Remove out of band data so doesn't get presented to user. 
2312                  * This can happen independent of advancing the URG pointer, 
2313                  * but if two URG's are pending at once, some out-of-band 
2314                  * data may creep in... ick. 
2316                 if (th
->th_urp 
<= (u_long
)tlen
 
2318                      && (so
->so_options 
& SO_OOBINLINE
) == 0 
2321                         tcp_pulloutofband(so
, th
, m
, 
2322                                 drop_hdrlen
);   /* hdr drop is delayed */ 
2325                  * If no out of band data is expected, 
2326                  * pull receive urgent pointer along 
2327                  * with the receive window. 
2329                 if (SEQ_GT(tp
->rcv_nxt
, tp
->rcv_up
)) 
2330                         tp
->rcv_up 
= tp
->rcv_nxt
; 
2334          * Process the segment text, merging it into the TCP sequencing queue, 
2335          * and arranging for acknowledgment of receipt if necessary. 
2336          * This process logically involves adjusting tp->rcv_wnd as data 
2337          * is presented to the user (this happens in tcp_usrreq.c, 
2338          * case PRU_RCVD).  If a FIN has already been received on this 
2339          * connection then we just ignore the text. 
2341         if ((tlen 
|| (thflags
&TH_FIN
)) && 
2342             TCPS_HAVERCVDFIN(tp
->t_state
) == 0) { 
2343                 m_adj(m
, drop_hdrlen
);  /* delayed header drop */ 
2345                  * Insert segment which inludes th into reassembly queue of tcp with 
2346                  * control block tp.  Return TH_FIN if reassembly now includes 
2347                  * a segment with FIN.  This handle the common case inline (segment 
2348                  * is the next to be received on an established connection, and the 
2349                  * queue is empty), avoiding linkage into and removal from the queue 
2350                  * and repetition of various conversions. 
2351                  * Set DELACK for segments received in order, but ack immediately 
2352                  * when segments are out of order (so fast retransmit can work). 
2354                 if (th
->th_seq 
== tp
->rcv_nxt 
&& 
2355                     LIST_EMPTY(&tp
->t_segq
) && 
2356                     TCPS_HAVEESTABLISHED(tp
->t_state
)) { 
2358                         if (tcp_delack_enabled
) { 
2359                                 TCP_DELACK_BITSET(tp
->t_inpcb
->hash_element
); 
2360                                 tp
->t_flags 
|= TF_DELACK
; 
2364                                 callout_reset(tp
->tt_delack
, tcp_delacktime
, 
2365                                     tcp_timer_delack
, tp
); 
2368                                 tp
->t_flags 
|= TF_ACKNOW
; 
2369                         tp
->rcv_nxt 
+= tlen
; 
2370                         thflags 
= th
->th_flags 
& TH_FIN
; 
2371                         tcpstat
.tcps_rcvpack
++; 
2372                         tcpstat
.tcps_rcvbyte 
+= tlen
; 
2374                         sbappend(&so
->so_rcv
, m
); 
2377                         thflags 
= tcp_reass(tp
, th
, &tlen
, m
); 
2378                         tp
->t_flags 
|= TF_ACKNOW
; 
2381                 if (tp
->t_flags 
& TF_DELACK
)  
2385                                 KERNEL_DEBUG(DBG_LAYER_END
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
2386                                         (((ip6
->ip6_src
.s6_addr16
[0]) << 16) | (ip6
->ip6_dst
.s6_addr16
[0])), 
2387                                         th
->th_seq
, th
->th_ack
, th
->th_win
);  
2392                                 KERNEL_DEBUG(DBG_LAYER_END
, ((th
->th_dport 
<< 16) | th
->th_sport
), 
2393                                         (((ip
->ip_src
.s_addr 
& 0xffff) << 16) | (ip
->ip_dst
.s_addr 
& 0xffff)), 
2394                                         th
->th_seq
, th
->th_ack
, th
->th_win
);  
2399                  * Note the amount of data that peer has sent into 
2400                  * our window, in order to estimate the sender's 
2403                 len 
= so
->so_rcv
.sb_hiwat 
- (tp
->rcv_adv 
- tp
->rcv_nxt
); 
2410          * If FIN is received ACK the FIN and let the user know 
2411          * that the connection is closing. 
2413         if (thflags 
& TH_FIN
) { 
2414                 if (TCPS_HAVERCVDFIN(tp
->t_state
) == 0) { 
2416                         postevent(so
, 0, EV_FIN
); 
2418                          *  If connection is half-synchronized 
2419                          *  (ie NEEDSYN flag on) then delay ACK, 
2420                          *  so it may be piggybacked when SYN is sent. 
2421                          *  Otherwise, since we received a FIN then no 
2422                          *  more input can be expected, send ACK now. 
2424                         if (tcp_delack_enabled 
&& (tp
->t_flags 
& TF_NEEDSYN
)) { 
2425                                 TCP_DELACK_BITSET(tp
->t_inpcb
->hash_element
);  
2426                                 tp
->t_flags 
|= TF_DELACK
; 
2429                                 tp
->t_flags 
|= TF_ACKNOW
; 
2432                 switch (tp
->t_state
) { 
2435                  * In SYN_RECEIVED and ESTABLISHED STATES 
2436                  * enter the CLOSE_WAIT state. 
2438                 case TCPS_SYN_RECEIVED
: 
2440                 case TCPS_ESTABLISHED
: 
2441                         tp
->t_state 
= TCPS_CLOSE_WAIT
; 
2445                  * If still in FIN_WAIT_1 STATE FIN has not been acked so 
2446                  * enter the CLOSING state. 
2448                 case TCPS_FIN_WAIT_1
: 
2449                         tp
->t_state 
= TCPS_CLOSING
; 
2453                  * In FIN_WAIT_2 state enter the TIME_WAIT state, 
2454                  * starting the time-wait timer, turning off the other 
2457                 case TCPS_FIN_WAIT_2
: 
2458                         tp
->t_state 
= TCPS_TIME_WAIT
; 
2459                         tcp_canceltimers(tp
); 
2460                         /* Shorten TIME_WAIT [RFC-1644, p.28] */ 
2461                         if (tp
->cc_recv 
!= 0 && 
2462                             tp
->t_starttime 
< tcp_msl
) { 
2463                                 tp
->t_timer
[TCPT_2MSL
] = 
2464                                     tp
->t_rxtcur 
* TCPTV_TWTRUNC
; 
2465                                 /* For transaction client, force ACK now. */ 
2466                                 tp
->t_flags 
|= TF_ACKNOW
; 
2469                                 tp
->t_timer
[TCPT_2MSL
] = 2 * tcp_msl
; 
2471                         add_to_time_wait(tp
); 
2472                         soisdisconnected(so
); 
2476                  * In TIME_WAIT state restart the 2 MSL time_wait timer. 
2478                 case TCPS_TIME_WAIT
: 
2479                         tp
->t_timer
[TCPT_2MSL
] = 2 * tcp_msl
; 
2480                         add_to_time_wait(tp
); 
2485         if (so
->so_options 
& SO_DEBUG
) 
2486                 tcp_trace(TA_INPUT
, ostate
, tp
, (void *)tcp_saveipgen
, 
2491          * Return any desired output. 
2493         if (needoutput 
|| (tp
->t_flags 
& TF_ACKNOW
)) 
2494                 (void) tcp_output(tp
); 
2495         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
2500          * Generate an ACK dropping incoming segment if it occupies 
2501          * sequence space, where the ACK reflects our state. 
2503          * We can now skip the test for the RST flag since all 
2504          * paths to this code happen after packets containing 
2505          * RST have been dropped. 
2507          * In the SYN-RECEIVED state, don't send an ACK unless the 
2508          * segment we received passes the SYN-RECEIVED ACK test. 
2509          * If it fails send a RST.  This breaks the loop in the 
2510          * "LAND" DoS attack, and also prevents an ACK storm 
2511          * between two listening ports that have been sent forged 
2512          * SYN segments, each with the source address of the other. 
2514         if (tp
->t_state 
== TCPS_SYN_RECEIVED 
&& (thflags 
& TH_ACK
) && 
2515             (SEQ_GT(tp
->snd_una
, th
->th_ack
) || 
2516              SEQ_GT(th
->th_ack
, tp
->snd_max
)) ) { 
2517                 rstreason 
= BANDLIM_RST_OPENPORT
; 
2521         if (so
->so_options 
& SO_DEBUG
) 
2522                 tcp_trace(TA_DROP
, ostate
, tp
, (void *)tcp_saveipgen
, 
2526         tp
->t_flags 
|= TF_ACKNOW
; 
2527         (void) tcp_output(tp
); 
2528         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
2533          * Generate a RST, dropping incoming segment. 
2534          * Make ACK acceptable to originator of segment. 
2535          * Don't bother to respond if destination was broadcast/multicast. 
2537         if ((thflags 
& TH_RST
) || m
->m_flags 
& (M_BCAST
|M_MCAST
)) 
2541                 if (IN6_IS_ADDR_MULTICAST(&ip6
->ip6_dst
) || 
2542                     IN6_IS_ADDR_MULTICAST(&ip6
->ip6_src
)) 
2546         if (IN_MULTICAST(ntohl(ip
->ip_dst
.s_addr
)) || 
2547             IN_MULTICAST(ntohl(ip
->ip_src
.s_addr
)) || 
2548             ip
->ip_src
.s_addr 
== htonl(INADDR_BROADCAST
) || 
2549             in_broadcast(ip
->ip_dst
, m
->m_pkthdr
.rcvif
)) 
2551         /* IPv6 anycast check is done at tcp6_input() */ 
2554          * Perform bandwidth limiting. 
2557         if (badport_bandlim(rstreason
) < 0) 
2562         if (tp 
== 0 || (tp
->t_inpcb
->inp_socket
->so_options 
& SO_DEBUG
)) 
2563                 tcp_trace(TA_DROP
, ostate
, tp
, (void *)tcp_saveipgen
, 
2566         if (thflags 
& TH_ACK
) 
2567                 /* mtod() below is safe as long as hdr dropping is delayed */ 
2568                 tcp_respond(tp
, mtod(m
, void *), th
, m
, (tcp_seq
)0, th
->th_ack
, 
2571                 if (thflags 
& TH_SYN
) 
2573                 /* mtod() below is safe as long as hdr dropping is delayed */ 
2574                 tcp_respond(tp
, mtod(m
, void *), th
, m
, th
->th_seq
+tlen
, 
2575                             (tcp_seq
)0, TH_RST
|TH_ACK
); 
2577         /* destroy temporarily created socket */ 
2580         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
2585          * Drop space held by incoming segment and return. 
2588         if (tp 
== 0 || (tp
->t_inpcb
->inp_socket
->so_options 
& SO_DEBUG
)) 
2589                 tcp_trace(TA_DROP
, ostate
, tp
, (void *)tcp_saveipgen
, 
2593         /* destroy temporarily created socket */ 
2596         KERNEL_DEBUG(DBG_FNC_TCP_INPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
2601 tcp_dooptions(tp
, cp
, cnt
, th
, to
) 
2611         for (; cnt 
> 0; cnt 
-= optlen
, cp 
+= optlen
) { 
2613                 if (opt 
== TCPOPT_EOL
) 
2615                 if (opt 
== TCPOPT_NOP
) 
2621                         if (optlen 
< 2 || optlen 
> cnt
) 
2630                         if (optlen 
!= TCPOLEN_MAXSEG
) 
2632                         if (!(th
->th_flags 
& TH_SYN
)) 
2634                         bcopy((char *) cp 
+ 2, (char *) &mss
, sizeof(mss
)); 
2639                         if (optlen 
!= TCPOLEN_WINDOW
) 
2641                         if (!(th
->th_flags 
& TH_SYN
)) 
2643                         tp
->t_flags 
|= TF_RCVD_SCALE
; 
2644                         tp
->requested_s_scale 
= min(cp
[2], TCP_MAX_WINSHIFT
); 
2647                 case TCPOPT_TIMESTAMP
: 
2648                         if (optlen 
!= TCPOLEN_TIMESTAMP
) 
2650                         to
->to_flag 
|= TOF_TS
; 
2651                         bcopy((char *)cp 
+ 2, 
2652                             (char *)&to
->to_tsval
, sizeof(to
->to_tsval
)); 
2653                         NTOHL(to
->to_tsval
); 
2654                         bcopy((char *)cp 
+ 6, 
2655                             (char *)&to
->to_tsecr
, sizeof(to
->to_tsecr
)); 
2656                         NTOHL(to
->to_tsecr
); 
2659                          * A timestamp received in a SYN makes 
2660                          * it ok to send timestamp requests and replies. 
2662                         if (th
->th_flags 
& TH_SYN
) { 
2663                                 tp
->t_flags 
|= TF_RCVD_TSTMP
; 
2664                                 tp
->ts_recent 
= to
->to_tsval
; 
2665                                 tp
->ts_recent_age 
= tcp_now
; 
2669                         if (optlen 
!= TCPOLEN_CC
) 
2671                         to
->to_flag 
|= TOF_CC
; 
2672                         bcopy((char *)cp 
+ 2, 
2673                             (char *)&to
->to_cc
, sizeof(to
->to_cc
)); 
2676                          * A CC or CC.new option received in a SYN makes 
2677                          * it ok to send CC in subsequent segments. 
2679                         if (th
->th_flags 
& TH_SYN
) 
2680                                 tp
->t_flags 
|= TF_RCVD_CC
; 
2683                         if (optlen 
!= TCPOLEN_CC
) 
2685                         if (!(th
->th_flags 
& TH_SYN
)) 
2687                         to
->to_flag 
|= TOF_CCNEW
; 
2688                         bcopy((char *)cp 
+ 2, 
2689                             (char *)&to
->to_cc
, sizeof(to
->to_cc
)); 
2692                          * A CC or CC.new option received in a SYN makes 
2693                          * it ok to send CC in subsequent segments. 
2695                         tp
->t_flags 
|= TF_RCVD_CC
; 
2698                         if (optlen 
!= TCPOLEN_CC
) 
2700                         if (!(th
->th_flags 
& TH_SYN
)) 
2702                         to
->to_flag 
|= TOF_CCECHO
; 
2703                         bcopy((char *)cp 
+ 2, 
2704                             (char *)&to
->to_ccecho
, sizeof(to
->to_ccecho
)); 
2705                         NTOHL(to
->to_ccecho
); 
2709         if (th
->th_flags 
& TH_SYN
) 
2710                 tcp_mss(tp
, mss
);       /* sets t_maxseg */ 
2714  * Pull out of band byte out of a segment so 
2715  * it doesn't appear in the user's data queue. 
2716  * It is still reflected in the segment length for 
2717  * sequencing purposes. 
2720 tcp_pulloutofband(so
, th
, m
, off
) 
2723         register struct mbuf 
*m
; 
2724         int off
;                /* delayed to be droped hdrlen */ 
2726         int cnt 
= off 
+ th
->th_urp 
- 1; 
2729                 if (m
->m_len 
> cnt
) { 
2730                         char *cp 
= mtod(m
, caddr_t
) + cnt
; 
2731                         struct tcpcb 
*tp 
= sototcpcb(so
); 
2734                         tp
->t_oobflags 
|= TCPOOB_HAVEDATA
; 
2735                         bcopy(cp
+1, cp
, (unsigned)(m
->m_len 
- cnt 
- 1)); 
2737                         if (m
->m_flags 
& M_PKTHDR
) 
2746         panic("tcp_pulloutofband"); 
2750  * Collect new round-trip time estimate 
2751  * and update averages and current timeout. 
2754 tcp_xmit_timer(tp
, rtt
) 
2755         register struct tcpcb 
*tp
; 
2760         tcpstat
.tcps_rttupdated
++; 
2762         if (tp
->t_srtt 
!= 0) { 
2764                  * srtt is stored as fixed point with 5 bits after the 
2765                  * binary point (i.e., scaled by 8).  The following magic 
2766                  * is equivalent to the smoothing algorithm in rfc793 with 
2767                  * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed 
2768                  * point).  Adjust rtt to origin 0. 
2770                 delta 
= ((rtt 
- 1) << TCP_DELTA_SHIFT
) 
2771                         - (tp
->t_srtt 
>> (TCP_RTT_SHIFT 
- TCP_DELTA_SHIFT
)); 
2773                 if ((tp
->t_srtt 
+= delta
) <= 0) 
2777                  * We accumulate a smoothed rtt variance (actually, a 
2778                  * smoothed mean difference), then set the retransmit 
2779                  * timer to smoothed rtt + 4 times the smoothed variance. 
2780                  * rttvar is stored as fixed point with 4 bits after the 
2781                  * binary point (scaled by 16).  The following is 
2782                  * equivalent to rfc793 smoothing with an alpha of .75 
2783                  * (rttvar = rttvar*3/4 + |delta| / 4).  This replaces 
2784                  * rfc793's wired-in beta. 
2788                 delta 
-= tp
->t_rttvar 
>> (TCP_RTTVAR_SHIFT 
- TCP_DELTA_SHIFT
); 
2789                 if ((tp
->t_rttvar 
+= delta
) <= 0) 
2793                  * No rtt measurement yet - use the unsmoothed rtt. 
2794                  * Set the variance to half the rtt (so our first 
2795                  * retransmit happens at 3*rtt). 
2797                 tp
->t_srtt 
= rtt 
<< TCP_RTT_SHIFT
; 
2798                 tp
->t_rttvar 
= rtt 
<< (TCP_RTTVAR_SHIFT 
- 1); 
2804          * the retransmit should happen at rtt + 4 * rttvar. 
2805          * Because of the way we do the smoothing, srtt and rttvar 
2806          * will each average +1/2 tick of bias.  When we compute 
2807          * the retransmit timer, we want 1/2 tick of rounding and 
2808          * 1 extra tick because of +-1/2 tick uncertainty in the 
2809          * firing of the timer.  The bias will give us exactly the 
2810          * 1.5 tick we need.  But, because the bias is 
2811          * statistical, we have to test that we don't drop below 
2812          * the minimum feasible timer (which is 2 ticks). 
2814         TCPT_RANGESET(tp
->t_rxtcur
, TCP_REXMTVAL(tp
), 
2815                       max(tp
->t_rttmin
, rtt 
+ 2), TCPTV_REXMTMAX
); 
2818          * We received an ack for a packet that wasn't retransmitted; 
2819          * it is probably safe to discard any error indications we've 
2820          * received recently.  This isn't quite right, but close enough 
2821          * for now (a route might have failed after we sent a segment, 
2822          * and the return path might not be symmetrical). 
2824         tp
->t_softerror 
= 0; 
2828  * Determine a reasonable value for maxseg size. 
2829  * If the route is known, check route for mtu. 
2830  * If none, use an mss that can be handled on the outgoing 
2831  * interface without forcing IP to fragment; if bigger than 
2832  * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES 
2833  * to utilize large mbufs.  If no route is found, route has no mtu, 
2834  * or the destination isn't local, use a default, hopefully conservative 
2835  * size (usually 512 or the default IP max size, but no more than the mtu 
2836  * of the interface), as we can't discover anything about intervening 
2837  * gateways or networks.  We also initialize the congestion/slow start 
2838  * window to be a single segment if the destination isn't local. 
2839  * While looking at the routing entry, we also initialize other path-dependent 
2840  * parameters from pre-set or cached values in the routing entry. 
2842  * Also take into account the space needed for options that we 
2843  * send regularly.  Make maxseg shorter by that amount to assure 
2844  * that we can send maxseg amount of data even when the options 
2845  * are present.  Store the upper limit of the length of options plus 
2848  * NOTE that this routine is only called when we process an incoming 
2849  * segment, for outgoing segments only tcp_mssopt is called. 
2851  * In case of T/TCP, we call this routine during implicit connection 
2852  * setup as well (offer = -1), to initialize maxseg from the cached 
2860         register struct rtentry 
*rt
; 
2862         register int rtt
, mss
; 
2866         struct rmxp_tao 
*taop
; 
2867         int origoffer 
= offer
; 
2875         isipv6 
= ((inp
->inp_vflag 
& INP_IPV6
) != 0) ? 1 : 0; 
2876         min_protoh 
= isipv6 
? sizeof (struct ip6_hdr
) + sizeof (struct tcphdr
) 
2877                             : sizeof (struct tcpiphdr
); 
2879 #define min_protoh  (sizeof (struct tcpiphdr)) 
2883                 rt 
= tcp_rtlookup6(inp
); 
2886         rt 
= tcp_rtlookup(inp
); 
2888                 tp
->t_maxopd 
= tp
->t_maxseg 
= 
2890                 isipv6 
? tcp_v6mssdflt 
: 
2897          * Slower link window correction: 
2898          * If a value is specificied for slowlink_wsize use it for PPP links 
2899          * believed to be on a serial modem (speed <128Kbps). Excludes 9600bps as 
2900          * it is the default value adversized by pseudo-devices over ppp. 
2902         if (ifp
->if_type 
== IFT_PPP 
&& slowlink_wsize 
> 0 &&  
2903             ifp
->if_baudrate 
> 9600 && ifp
->if_baudrate 
<= 128000) { 
2904                 tp
->t_flags 
|= TF_SLOWLINK
; 
2906         so 
= inp
->inp_socket
; 
2908         taop 
= rmx_taop(rt
->rt_rmx
); 
2910          * Offer == -1 means that we didn't receive SYN yet, 
2911          * use cached value in that case; 
2914                 offer 
= taop
->tao_mssopt
; 
2916          * Offer == 0 means that there was no MSS on the SYN segment, 
2917          * in this case we use tcp_mssdflt. 
2922                         isipv6 
? tcp_v6mssdflt 
: 
2927                  * Sanity check: make sure that maxopd will be large 
2928                  * enough to allow some data on segments even is the 
2929                  * all the option space is used (40bytes).  Otherwise 
2930                  * funny things may happen in tcp_output. 
2932                 offer 
= max(offer
, 64); 
2933         taop
->tao_mssopt 
= offer
; 
2936          * While we're here, check if there's an initial rtt 
2937          * or rttvar.  Convert from the route-table units 
2938          * to scaled multiples of the slow timeout timer. 
2940         if (tp
->t_srtt 
== 0 && (rtt 
= rt
->rt_rmx
.rmx_rtt
)) { 
2942                  * XXX the lock bit for RTT indicates that the value 
2943                  * is also a minimum value; this is subject to time. 
2945                 if (rt
->rt_rmx
.rmx_locks 
& RTV_RTT
) 
2946                         tp
->t_rttmin 
= rtt 
/ (RTM_RTTUNIT 
/ PR_SLOWHZ
); 
2947                 tp
->t_srtt 
= rtt 
/ (RTM_RTTUNIT 
/ (PR_SLOWHZ 
* TCP_RTT_SCALE
)); 
2948                 tcpstat
.tcps_usedrtt
++; 
2949                 if (rt
->rt_rmx
.rmx_rttvar
) { 
2950                         tp
->t_rttvar 
= rt
->rt_rmx
.rmx_rttvar 
/ 
2951                             (RTM_RTTUNIT 
/ (PR_SLOWHZ 
* TCP_RTTVAR_SCALE
)); 
2952                         tcpstat
.tcps_usedrttvar
++; 
2954                         /* default variation is +- 1 rtt */ 
2956                             tp
->t_srtt 
* TCP_RTTVAR_SCALE 
/ TCP_RTT_SCALE
; 
2958                 TCPT_RANGESET(tp
->t_rxtcur
, 
2959                               ((tp
->t_srtt 
>> 2) + tp
->t_rttvar
) >> 1, 
2960                               tp
->t_rttmin
, TCPTV_REXMTMAX
); 
2963          * if there's an mtu associated with the route, use it 
2964          * else, use the link mtu. 
2966         if (rt
->rt_rmx
.rmx_mtu
) 
2967                 mss 
= rt
->rt_rmx
.rmx_mtu 
- min_protoh
; 
2972                         (isipv6 
? nd_ifinfo
[rt
->rt_ifp
->if_index
].linkmtu 
: 
2981                         if (!in6_localaddr(&inp
->in6p_faddr
)) 
2982                                 mss 
= min(mss
, tcp_v6mssdflt
); 
2985                 if (!in_localaddr(inp
->inp_faddr
)) 
2986                         mss 
= min(mss
, tcp_mssdflt
); 
2988         mss 
= min(mss
, offer
); 
2990          * maxopd stores the maximum length of data AND options 
2991          * in a segment; maxseg is the amount of data in a normal 
2992          * segment.  We need to store this value (maxopd) apart 
2993          * from maxseg, because now every segment carries options 
2994          * and thus we normally have somewhat less data in segments. 
2999          * In case of T/TCP, origoffer==-1 indicates, that no segments 
3000          * were received yet.  In this case we just guess, otherwise 
3001          * we do the same as before T/TCP. 
3003         if ((tp
->t_flags 
& (TF_REQ_TSTMP
|TF_NOOPT
)) == TF_REQ_TSTMP 
&& 
3005              (tp
->t_flags 
& TF_RCVD_TSTMP
) == TF_RCVD_TSTMP
)) 
3006                 mss 
-= TCPOLEN_TSTAMP_APPA
; 
3007         if ((tp
->t_flags 
& (TF_REQ_CC
|TF_NOOPT
)) == TF_REQ_CC 
&& 
3009              (tp
->t_flags 
& TF_RCVD_CC
) == TF_RCVD_CC
)) 
3010                 mss 
-= TCPOLEN_CC_APPA
; 
3013          * If there's a pipesize (ie loopback), change the socket 
3014          * buffer to that size only if it's bigger than the current 
3015          * sockbuf size.  Make the socket buffers an integral 
3016          * number of mss units; if the mss is larger than 
3017          * the socket buffer, decrease the mss. 
3020         bufsize 
= rt
->rt_rmx
.rmx_sendpipe
; 
3021         if (bufsize 
< so
->so_snd
.sb_hiwat
) 
3023                 bufsize 
= so
->so_snd
.sb_hiwat
; 
3027                 bufsize 
= roundup(bufsize
, mss
); 
3028                 if (bufsize 
> sb_max
) 
3030                 (void)sbreserve(&so
->so_snd
, bufsize
); 
3035         bufsize 
= rt
->rt_rmx
.rmx_recvpipe
; 
3036         if (bufsize 
< so
->so_rcv
.sb_hiwat
) 
3038                 bufsize 
= so
->so_rcv
.sb_hiwat
; 
3039         if (bufsize 
> mss
) { 
3040                 bufsize 
= roundup(bufsize
, mss
); 
3041                 if (bufsize 
> sb_max
) 
3043                 (void)sbreserve(&so
->so_rcv
, bufsize
); 
3047          * Set the slow-start flight size depending on whether this 
3048          * is a local network or not. 
3052             (isipv6 
&& in6_localaddr(&inp
->in6p_faddr
)) || 
3055              in_localaddr(inp
->inp_faddr
) 
3060                 tp
->snd_cwnd 
= mss 
* ss_fltsz_local
; 
3062                 tp
->snd_cwnd 
= mss 
* ss_fltsz
; 
3064         if (rt
->rt_rmx
.rmx_ssthresh
) { 
3066                  * There's some sort of gateway or interface 
3067                  * buffer limit on the path.  Use this to set 
3068                  * the slow start threshhold, but set the 
3069                  * threshold to no less than 2*mss. 
3071                 tp
->snd_ssthresh 
= max(2 * mss
, rt
->rt_rmx
.rmx_ssthresh
); 
3072                 tcpstat
.tcps_usedssthresh
++; 
3077  * Determine the MSS option to send on an outgoing SYN. 
3090         isipv6 
= ((tp
->t_inpcb
->inp_vflag 
& INP_IPV6
) != 0) ? 1 : 0; 
3091         min_protoh 
= isipv6 
? sizeof (struct ip6_hdr
) + sizeof (struct tcphdr
) 
3092                             : sizeof (struct tcpiphdr
); 
3094 #define min_protoh  (sizeof (struct tcpiphdr)) 
3098                 rt 
= tcp_rtlookup6(tp
->t_inpcb
); 
3101         rt 
= tcp_rtlookup(tp
->t_inpcb
); 
3105                         isipv6 
? tcp_v6mssdflt 
: 
3109          * Slower link window correction: 
3110          * If a value is specificied for slowlink_wsize use it for PPP links 
3111          * believed to be on a serial modem (speed <128Kbps). Excludes 9600bps as 
3112          * it is the default value adversized by pseudo-devices over ppp. 
3114         if (rt
->rt_ifp
->if_type 
== IFT_PPP 
&& slowlink_wsize 
> 0 &&  
3115             rt
->rt_ifp
->if_baudrate 
> 9600 && rt
->rt_ifp
->if_baudrate 
<= 128000) { 
3116                 tp
->t_flags 
|= TF_SLOWLINK
; 
3119         return rt
->rt_ifp
->if_mtu 
- min_protoh
; 
3124  * Checks for partial ack.  If partial ack arrives, force the retransmission 
3125  * of the next unacknowledged segment, do not clear tp->t_dupacks, and return 
3126  * 1.  By setting snd_nxt to ti_ack, this forces retransmission timer to 
3127  * be started again.  If the ack advances at least to tp->snd_recover, return 0. 
3134         if (SEQ_LT(th
->th_ack
, tp
->snd_recover
)) { 
3135                 tcp_seq onxt 
= tp
->snd_nxt
; 
3136                 u_long  ocwnd 
= tp
->snd_cwnd
; 
3138                 tp
->t_timer
[TCPT_REXMT
] = 0; 
3140                 callout_stop(tp
->tt_rexmt
); 
3143                 tp
->snd_nxt 
= th
->th_ack
; 
3145                  * Set snd_cwnd to one segment beyond acknowledged offset 
3146                  * (tp->snd_una has not yet been updated when this function  
3149                 tp
->snd_cwnd 
= tp
->t_maxseg 
+ (th
->th_ack 
- tp
->snd_una
); 
3150                 (void) tcp_output(tp
); 
3151                 tp
->snd_cwnd 
= ocwnd
; 
3152                 if (SEQ_GT(onxt
, tp
->snd_nxt
)) 
3155                  * Partial window deflation.  Relies on fact that tp->snd_una 
3158                 tp
->snd_cwnd 
-= (th
->th_ack 
- tp
->snd_una 
- tp
->t_maxseg
);