bsd/netinet/tcp_subr.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  24  *      The Regents of the University of California.  All rights reserved.
  25  *
  26  * Redistribution and use in source and binary forms, with or without
  27  * modification, are permitted provided that the following conditions
  28  * are met:
  29  * 1. Redistributions of source code must retain the above copyright
  30  *    notice, this list of conditions and the following disclaimer.
  31  * 2. Redistributions in binary form must reproduce the above copyright
  32  *    notice, this list of conditions and the following disclaimer in the
  33  *    documentation and/or other materials provided with the distribution.
  34  * 3. All advertising materials mentioning features or use of this software
  35  *    must display the following acknowledgement:
  36  *      This product includes software developed by the University of
  37  *      California, Berkeley and its contributors.
  38  * 4. Neither the name of the University nor the names of its contributors
  39  *    may be used to endorse or promote products derived from this software
  40  *    without specific prior written permission.
  41  *
  42  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  43  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  44  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  45  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  46  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  47  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  48  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  49  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  50  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  51  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  52  * SUCH DAMAGE.
  53  *
  54  *      @(#)tcp_subr.c  8.2 (Berkeley) 5/24/95
  55  */
  56
  57 #if ISFB31
  58 #include "opt_compat.h"
  59 #include "opt_tcpdebug.h"
  60 #endif
  61
  62 #include <sys/param.h>
  63 #include <sys/systm.h>
  64 #include <sys/kernel.h>
  65 #include <sys/sysctl.h>
  66 #include <sys/malloc.h>
  67 #include <sys/mbuf.h>
  68 #include <sys/domain.h>
  69 #include <sys/socket.h>
  70 #include <sys/socketvar.h>
  71 #include <sys/protosw.h>
  72 #include <sys/syslog.h>
  73
  74
  75 #if ISFB31
  76 #include <vm/vm_zone.h>
  77 #endif
  78
  79 #include <net/route.h>
  80 #include <net/if.h>
  81
  82 #define _IP_VHL
  83 #include <netinet/in.h>
  84 #include <netinet/in_systm.h>
  85 #include <netinet/ip.h>
  86 #include <netinet/in_pcb.h>
  87 #include <netinet/in_var.h>
  88 #include <netinet/ip_var.h>
  89 #if INET6
  90 #include <netinet/ip6.h>
  91 #include <netinet6/ip6_var.h>
  92 #include <netinet6/in6_pcb.h>
  93 #endif
  94 #include <netinet/tcp.h>
  95 #include <netinet/tcp_fsm.h>
  96 #include <netinet/tcp_seq.h>
  97 #include <netinet/tcp_timer.h>
  98 #include <netinet/tcp_var.h>
  99 #include <netinet/tcpip.h>
 100 #if TCPDEBUG
 101 #include <netinet/tcp_debug.h>
 102 #endif
 103 #include <netinet6/ip6protosw.h>
 104
 105 #if IPSEC
 106 #include <netinet6/ipsec.h>
 107 #endif /*IPSEC*/
 108
 109 #include <sys/kdebug.h>
 110
 111 #define DBG_FNC_TCP_CLOSE       NETDBG_CODE(DBG_NETTCP, ((5 << 8) | 2))
 112 #ifndef offsetof               /* XXX */
 113 #define        offsetof(type, member)  ((size_t)(&((type *)0)->member))
 114 #endif
 115
 116
 117 int     tcp_mssdflt = TCP_MSS;
 118 SYSCTL_INT(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
 119         CTLFLAG_RW, &tcp_mssdflt , 0, "");
 120
 121 int     tcp_v6mssdflt = TCP6_MSS;
 122 SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
 123         CTLFLAG_RW, &tcp_v6mssdflt , 0, "");
 124
 125 static int      tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
 126 SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt,
 127         CTLFLAG_RW, &tcp_rttdflt , 0, "");
 128
 129 static int      tcp_do_rfc1323 = 1;
 130 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323,
 131         CTLFLAG_RW, &tcp_do_rfc1323 , 0, "");
 132
 133 static int      tcp_do_rfc1644 = 0;
 134 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1644, rfc1644,
 135         CTLFLAG_RW, &tcp_do_rfc1644 , 0, "");
 136
 137 SYSCTL_INT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_RD, &tcbinfo.ipi_count,
 138            0, "Number of active PCBs");
 139
 140 static void     tcp_cleartaocache __P((void));
 141 static void     tcp_notify __P((struct inpcb *, int));
 142 extern u_long   current_active_connections;
 143
 144
 145
 146
 147 /*
 148  * Target size of TCP PCB hash tables. Must be a power of two.
 149  *
 150  * Note that this can be overridden by the kernel environment
 151  * variable net.inet.tcp.tcbhashsize
 152  */
 153 #ifndef TCBHASHSIZE
 154 #define TCBHASHSIZE     4096
 155 #endif
 156
 157 /*
 158  * This is the actual shape of what we allocate using the zone
 159  * allocator.  Doing it this way allows us to protect both structures
 160  * using the same generation count, and also eliminates the overhead
 161  * of allocating tcpcbs separately.  By hiding the structure here,
 162  * we avoid changing most of the rest of the code (although it needs
 163  * to be changed, eventually, for greater efficiency).
 164  */
 165 #define ALIGNMENT       32
 166 #define ALIGNM1         (ALIGNMENT - 1)
 167 struct  inp_tp {
 168         union {
 169                 struct  inpcb inp;
 170                 char    align[(sizeof(struct inpcb) + ALIGNM1) & ~ALIGNM1];
 171         } inp_tp_u;
 172         struct  tcpcb tcb;
 173 };
 174 #undef ALIGNMENT
 175 #undef ALIGNM1
 176
 177 static struct tcpcb dummy_tcb;
 178
 179
 180 extern struct   inpcbhead       time_wait_slots[];
 181 extern int              cur_tw_slot;
 182 extern u_long           *delack_bitmask;
 183
 184
 185 int  get_inpcb_str_size()
 186 {
 187         return sizeof(struct inpcb);
 188 }
 189
 190
 191 int  get_tcp_str_size()
 192 {
 193         return sizeof(struct tcpcb);
 194 }
 195
 196 int     tcp_freeq __P((struct tcpcb *tp));
 197
 198
 199 /*
 200  * Tcp initialization
 201  */
 202 void
 203 tcp_init()
 204 {
 205         int hashsize;
 206         vm_size_t       str_size;
 207         int  i;
 208
 209 #ifdef TCP_COMPAT_42
 210        tcp_iss = 1;
 211 #endif /* TCP_COMPAT_42 */
 212         tcp_ccgen = 1;
 213         tcp_cleartaocache();
 214         LIST_INIT(&tcb);
 215         tcbinfo.listhead = &tcb;
 216         if (!(getenv_int("net.inet.tcp.tcbhashsize", &hashsize)))
 217                 hashsize = TCBHASHSIZE;
 218         if (!powerof2(hashsize)) {
 219                 printf("WARNING: TCB hash size not a power of 2\n");
 220                 hashsize = 512; /* safe default */
 221         }
 222         tcbinfo.hashsize = hashsize;
 223         tcbinfo.hashbase = hashinit(hashsize, M_PCB, &tcbinfo.hashmask);
 224         tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
 225                                         &tcbinfo.porthashmask);
 226 #if ISFB31
 227         tcbinfo.ipi_zone = (void *) zinit("tcpcb", sizeof(struct inp_tp), maxsockets,
 228                                  ZONE_INTERRUPT, 0);
 229 #else
 230         str_size = (vm_size_t) sizeof(struct inp_tp);
 231         tcbinfo.ipi_zone = (void *) zinit(str_size, 120000*str_size, 8192, "inpcb_zone");
 232 #endif
 233 #if INET6
 234 #define TCP_LGHDR (sizeof(struct tcpip6hdr))
 235 #else /* INET6 */
 236 #define TCP_LGHDR (sizeof(struct tcpiphdr))
 237 #endif /* INET6 */
 238         if (max_protohdr < TCP_LGHDR)
 239                 max_protohdr = TCP_LGHDR;
 240         if ((max_linkhdr + TCP_LGHDR) > MHLEN)
 241                 panic("tcp_init");
 242
 243         tcbinfo.last_pcb = 0;
 244         dummy_tcb.t_state = TCP_NSTATES;
 245         dummy_tcb.t_flags = 0;
 246         tcbinfo.dummy_cb = (caddr_t) &dummy_tcb;
 247         in_pcb_nat_init(&tcbinfo, AF_INET, IPPROTO_TCP, SOCK_STREAM);
 248
 249         delack_bitmask = _MALLOC((4 * hashsize)/32, M_PCB, M_WAITOK);
 250         if (delack_bitmask == 0)
 251              panic("Delack Memory");
 252
 253         for (i=0; i < (tcbinfo.hashsize / 32); i++)
 254                  delack_bitmask[i] = 0;
 255
 256         for (i=0; i < N_TIME_WAIT_SLOTS; i++) {
 257              LIST_INIT(&time_wait_slots[i]);
 258         }
 259 #undef TCP_LGHDR
 260 }
 261
 262 /*
 263  * Create template to be used to send tcp packets on a connection.
 264  * Call after host entry created, allocates an mbuf and fills
 265  * in a skeletal tcp/ip header, minimizing the amount of work
 266  * necessary when the connection is used.
 267  */
 268 struct tcptemp *
 269 tcp_template(tp)
 270         struct tcpcb *tp;
 271 {
 272         register struct inpcb *inp = tp->t_inpcb;
 273         register struct mbuf *m;
 274         register struct tcptemp *n;
 275
 276         if ((n = tp->t_template) == 0) {
 277                 m = m_get(M_DONTWAIT, MT_HEADER);
 278                 if (m == NULL)
 279                         return (0);
 280                 m->m_len = sizeof (struct tcptemp);
 281                 n = mtod(m, struct tcptemp *);
 282         }
 283         bzero(n->tt_x1, sizeof(n->tt_x1));
 284         n->tt_pr = IPPROTO_TCP;
 285         n->tt_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
 286         n->tt_src = inp->inp_laddr;
 287         n->tt_dst = inp->inp_faddr;
 288         n->tt_sport = inp->inp_lport;
 289         n->tt_dport = inp->inp_fport;
 290         n->tt_seq = 0;
 291         n->tt_ack = 0;
 292         n->tt_x2 = 0;
 293         n->tt_off = 5;
 294         n->tt_flags = 0;
 295         n->tt_win = 0;
 296         n->tt_sum = 0;
 297         n->tt_urp = 0;
 298
 299         n->tt_t.th_sum = in_pseudo(n->tt_src.s_addr, n->tt_dst.s_addr,
 300             htons(sizeof(struct tcphdr) + IPPROTO_TCP));
 301
 302 #if INET6
 303         n->tt_flow = inp->inp_flow & IPV6_FLOWINFO_MASK;
 304         if (ip6_auto_flowlabel) {
 305                 n->tt_flow &= ~IPV6_FLOWLABEL_MASK;
 306                 n->tt_flow |= (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK);
 307         }
 308         n->tt_vfc |= IPV6_VERSION;
 309         n->tt_pr6 = IPPROTO_TCP;
 310         n->tt_len6 = n->tt_len;
 311         n->tt_src6 = inp->in6p_laddr;
 312         n->tt_dst6 = inp->in6p_faddr;
 313 #endif /* INET6 */
 314         return (n);
 315 }
 316
 317 /*
 318  * Send a single message to the TCP at address specified by
 319  * the given TCP/IP header.  If m == 0, then we make a copy
 320  * of the tcpiphdr at ti and send directly to the addressed host.
 321  * This is used to force keep alive messages out using the TCP
 322  * template for a connection tp->t_template.  If flags are given
 323  * then we send a message back to the TCP which originated the
 324  * segment ti, and discard the mbuf containing it and any other
 325  * attached mbufs.
 326  *
 327  * In any case the ack and sequence number of the transmitted
 328  * segment are as specified by the parameters.
 329  *
 330  * NOTE: If m != NULL, then ti must point to *inside* the mbuf.
 331  */
 332 void
 333 tcp_respond(tp, iph, th, m, ack, seq, flags, isipv6)
 334         struct tcpcb *tp;
 335         void *iph;
 336         register struct tcphdr *th;
 337         register struct mbuf *m;
 338         tcp_seq ack, seq;
 339         int flags;
 340 #if INET6
 341         int isipv6;
 342 #endif
 343 {
 344         register int tlen;
 345         int win = 0;
 346         struct route *ro = 0;
 347         struct route sro;
 348         struct ip *ip = iph;
 349         struct tcpiphdr *ti = iph;
 350         struct tcphdr *nth;
 351 #if INET6
 352         struct route_in6 *ro6 = 0;
 353         struct route_in6 sro6;
 354         struct ip6_hdr *ip6 = iph;
 355         struct tcpip6hdr *ti6 = iph;
 356 #endif /* INET6 */
 357
 358         if (tp) {
 359                 if (!(flags & TH_RST))
 360                         win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
 361 #if INET6
 362                 if (isipv6)
 363                         ro6 = &tp->t_inpcb->in6p_route;
 364                 else
 365 #endif /* INET6 */
 366                 ro = &tp->t_inpcb->inp_route;
 367         } else {
 368 #if INET6
 369                 if (isipv6) {
 370                         ro6 = &sro6;
 371                         bzero(ro6, sizeof *ro6);
 372                 } else {
 373 #endif /* INET6 */
 374                 ro = &sro;
 375                 bzero(ro, sizeof *ro);
 376 #if INET6
 377                 }
 378 #endif /* INET6 */
 379         }
 380         if (m == 0) {
 381                 m = m_gethdr(M_DONTWAIT, MT_HEADER);
 382                 if (m == NULL)
 383                         return;
 384 #if TCP_COMPAT_42
 385                 tlen = 1;
 386 #else
 387                 tlen = 0;
 388 #endif
 389                 m->m_data += max_linkhdr;
 390 #if INET6
 391                 if (isipv6) {
 392                         ti6 = mtod(m, struct tcpip6hdr *);
 393                         bcopy((caddr_t)ip6, (caddr_t)&ti6->ti6_i,
 394                               sizeof(struct ip6_hdr));
 395                         ip6 = &ti6->ti6_i;
 396                         nth = &ti6->ti6_t;
 397                 } else {
 398 #endif /* INET6 */
 399                 ti = mtod(m, struct tcpiphdr *);
 400                 bcopy((caddr_t)ip, (caddr_t)&ti->ti_i, sizeof(struct ip));
 401                 ip = (struct ip *)&ti->ti_i;
 402                 nth = &ti->ti_t;
 403 #if INET6
 404                 }
 405 #endif /* INET6 */
 406                 bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 407                 flags = TH_ACK;
 408         } else {
 409                 m_freem(m->m_next);
 410                 m->m_next = 0;
 411                 m->m_data = (caddr_t)ti;
 412                 /* m_len is set later */
 413                 tlen = 0;
 414 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 415 #if INET6
 416                 if (isipv6) {
 417                         struct in6_addr t;
 418
 419                         t = ip6->ip6_dst;
 420                         ip6->ip6_dst = ip6->ip6_src;
 421                         ip6->ip6_src = t;
 422                         nth = (struct tcphdr *)(ip6 + 1);
 423                         if (th != nth) {
 424                                 /*
 425                                  * this is the case if an extension header
 426                                  * exists between the IPv6 header and the
 427                                  * TCP header.
 428                                  */
 429                                 nth->th_sport = th->th_sport;
 430                                 nth->th_dport = th->th_dport;
 431                         }
 432                 } else {
 433 #endif /* INET6 */
 434                         xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, n_long);
 435                         nth = th;
 436 #if INET6
 437                 }
 438 #endif /* INET6 */
 439                 xchg(nth->th_dport, nth->th_sport, n_short);
 440 #undef xchg
 441         }
 442         nth->th_seq = htonl(seq);
 443         nth->th_ack = htonl(ack);
 444         nth->th_x2 = 0;
 445         nth->th_off = sizeof (struct tcphdr) >> 2;
 446         nth->th_flags = flags;
 447         if (tp)
 448                 nth->th_win = htons((u_short) (win >> tp->rcv_scale));
 449         else
 450                 nth->th_win = htons((u_short)win);
 451         nth->th_urp = 0;
 452         tlen += sizeof (struct tcphdr);
 453 #if INET6
 454         if (isipv6) {
 455                 m->m_len = tlen + sizeof(struct ip6_hdr);
 456                 m->m_pkthdr.len = tlen + sizeof(struct ip6_hdr);
 457                 m->m_pkthdr.rcvif = (struct ifnet *) 0;
 458                 ip6->ip6_plen = htons((u_short)tlen);
 459                 ip6->ip6_nxt = IPPROTO_TCP;
 460                 ip6->ip6_hlim = in6_selecthlim(tp ? tp->t_inpcb : NULL,
 461                                                ro6 && ro6->ro_rt ?
 462                                                ro6->ro_rt->rt_ifp :
 463                                                NULL);
 464                 nth->th_sum = in6_cksum(m, IPPROTO_TCP,
 465                                          sizeof(struct ip6_hdr), tlen);
 466                 ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
 467                 if (ip6_auto_flowlabel) {
 468                         ip6->ip6_flow |=
 469                                 (htonl(ip6_flow_seq++) & IPV6_FLOWLABEL_MASK);
 470                 }
 471         } else {
 472 #endif /* INET6 */
 473         ti->ti_len = htons((u_short)(tlen));
 474         m->m_len = tlen + sizeof(struct ip);
 475         m->m_pkthdr.len = tlen + sizeof(struct ip);
 476         m->m_pkthdr.rcvif = (struct ifnet *) 0;
 477         nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 478             htons((u_short)(tlen  + IPPROTO_TCP)));
 479         m->m_pkthdr.csum_flags = CSUM_TCP;
 480         m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 481
 482         ip->ip_len = tlen + sizeof (struct ip);
 483         ip->ip_ttl = ip_defttl;
 484 #if INET6
 485         }
 486 #endif /* INET6 */
 487 #if TCPDEBUG
 488         if (tp == NULL || (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 489                 tcp_trace(TA_OUTPUT, 0, tp,
 490 #if INET6
 491                           isipv6 ? (void *)ip6 :
 492 #endif /* INET6 */
 493                           ip,
 494                           nth, 0);
 495 #endif
 496 #if IPSEC
 497         ipsec_setsocket(m, tp ? tp->t_inpcb->inp_socket : NULL);
 498 #endif /*IPSEC*/
 499 #if INET6
 500         if (isipv6) {
 501                 (void)ip6_output(m, NULL, ro6, 0, NULL, NULL);
 502                 if (ro6 == &sro6 && ro6->ro_rt)
 503                         RTFREE(ro6->ro_rt);
 504         } else {
 505 #endif /* INET6 */
 506         (void)ip_output(m, NULL, ro, 0, NULL);
 507         if (ro == &sro && ro->ro_rt) {
 508                 RTFREE(ro->ro_rt);
 509         }
 510 #if INET6
 511         }
 512 #endif /* INET6 */
 513 }
 514
 515 /*
 516  * Create a new TCP control block, making an
 517  * empty reassembly queue and hooking it to the argument
 518  * protocol control block.  The `inp' parameter must have
 519  * come from the zone allocator set up in tcp_init().
 520  */
 521 struct tcpcb *
 522 tcp_newtcpcb(inp)
 523         struct inpcb *inp;
 524 {
 525         struct inp_tp *it;
 526         register struct tcpcb *tp;
 527         register struct socket *so = inp->inp_socket;
 528 #if INET6
 529         int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 530 #endif /* INET6 */
 531
 532
 533         if (so->cached_in_sock_layer == 0) {
 534              it = (struct inp_tp *)inp;
 535              tp = &it->tcb;
 536         }
 537         else
 538              tp = (struct tcpcb *) inp->inp_saved_ppcb;
 539
 540         bzero((char *) tp, sizeof(struct tcpcb));
 541         tp->segq.lh_first = NULL;
 542         tp->t_maxseg = tp->t_maxopd =
 543 #if INET6
 544                 isipv6 ? tcp_v6mssdflt :
 545 #endif /* INET6 */
 546                 tcp_mssdflt;
 547
 548
 549         if (tcp_do_rfc1323)
 550                 tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 551         if (tcp_do_rfc1644)
 552                 tp->t_flags |= TF_REQ_CC;
 553         tp->t_inpcb = inp;      /* XXX */
 554         /*
 555          * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 556          * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
 557          * reasonable initial retransmit time.
 558          */
 559         tp->t_srtt = TCPTV_SRTTBASE;
 560         tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 561         tp->t_rttmin = TCPTV_MIN;
 562         tp->t_rxtcur = TCPTV_RTOBASE;
 563         tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 564         tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 565         /*
 566          * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 567          * because the socket may be bound to an IPv6 wildcard address,
 568          * which may match an IPv4-mapped IPv6 address.
 569          * XXX: is there a better approach?
 570          */
 571         inp->inp_ip_ttl = ip_defttl;
 572         inp->inp_ppcb = (caddr_t)tp;
 573         return (tp);            /* XXX */
 574 }
 575
 576 /*
 577  * Drop a TCP connection, reporting
 578  * the specified error.  If connection is synchronized,
 579  * then send a RST to peer.
 580  */
 581 struct tcpcb *
 582 tcp_drop(tp, errno)
 583         register struct tcpcb *tp;
 584         int errno;
 585 {
 586         struct socket *so = tp->t_inpcb->inp_socket;
 587
 588         switch (tp->t_state)
 589         {
 590         case TCPS_ESTABLISHED:
 591         case TCPS_FIN_WAIT_1:
 592         case TCPS_CLOSING:
 593         case TCPS_CLOSE_WAIT:
 594         case TCPS_LAST_ACK:
 595              current_active_connections--;
 596              break;
 597         }
 598
 599         if (TCPS_HAVERCVDSYN(tp->t_state)) {
 600                 tp->t_state = TCPS_CLOSED;
 601                 (void) tcp_output(tp);
 602                 tcpstat.tcps_drops++;
 603         } else
 604                 tcpstat.tcps_conndrops++;
 605         if (errno == ETIMEDOUT && tp->t_softerror)
 606                 errno = tp->t_softerror;
 607         so->so_error = errno;
 608         return (tcp_close(tp));
 609 }
 610
 611 /*
 612  * Close a TCP control block:
 613  *      discard all space held by the tcp
 614  *      discard internet protocol block
 615  *      wake up any sleepers
 616  */
 617 struct tcpcb *
 618 tcp_close(tp)
 619         register struct tcpcb *tp;
 620 {
 621         register struct mbuf *q;
 622         register struct mbuf *nq;
 623         struct inpcb *inp = tp->t_inpcb;
 624         struct socket *so = inp->inp_socket;
 625 #if INET6
 626         int isipv6 = INP_CHECK_SOCKAF(so, AF_INET6);
 627 #endif /* INET6 */
 628         register struct rtentry *rt;
 629         int dosavessthresh;
 630
 631
 632         KERNEL_DEBUG(DBG_FNC_TCP_CLOSE | DBG_FUNC_START, tp,0,0,0,0);
 633         switch (tp->t_state)
 634         {
 635         case TCPS_ESTABLISHED:
 636         case TCPS_FIN_WAIT_1:
 637         case TCPS_CLOSING:
 638         case TCPS_CLOSE_WAIT:
 639         case TCPS_LAST_ACK:
 640              current_active_connections--;
 641              break;
 642         }
 643
 644
 645         /*
 646          * If we got enough samples through the srtt filter,
 647          * save the rtt and rttvar in the routing entry.
 648          * 'Enough' is arbitrarily defined as the 16 samples.
 649          * 16 samples is enough for the srtt filter to converge
 650          * to within 5% of the correct value; fewer samples and
 651          * we could save a very bogus rtt.
 652          *
 653          * Don't update the default route's characteristics and don't
 654          * update anything that the user "locked".
 655          */
 656         if (tp->t_rttupdated >= 16) {
 657                 register u_long i = 0;
 658 #if INET6
 659                 if (isipv6) {
 660                         struct sockaddr_in6 *sin6;
 661
 662                         if ((rt = inp->in6p_route.ro_rt) == NULL)
 663                                 goto no_valid_rt;
 664                         sin6 = (struct sockaddr_in6 *)rt_key(rt);
 665                         if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 666                                 goto no_valid_rt;
 667                 }
 668                 else
 669 #endif /* INET6 */
 670                 if ((rt = inp->inp_route.ro_rt) == NULL ||
 671                     ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr
 672                     == INADDR_ANY)
 673                         goto no_valid_rt;
 674
 675                 if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
 676                         i = tp->t_srtt *
 677                             (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
 678                         if (rt->rt_rmx.rmx_rtt && i)
 679                                 /*
 680                                  * filter this update to half the old & half
 681                                  * the new values, converting scale.
 682                                  * See route.h and tcp_var.h for a
 683                                  * description of the scaling constants.
 684                                  */
 685                                 rt->rt_rmx.rmx_rtt =
 686                                     (rt->rt_rmx.rmx_rtt + i) / 2;
 687                         else
 688                                 rt->rt_rmx.rmx_rtt = i;
 689                         tcpstat.tcps_cachedrtt++;
 690                 }
 691                 if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
 692                         i = tp->t_rttvar *
 693                             (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
 694                         if (rt->rt_rmx.rmx_rttvar && i)
 695                                 rt->rt_rmx.rmx_rttvar =
 696                                     (rt->rt_rmx.rmx_rttvar + i) / 2;
 697                         else
 698                                 rt->rt_rmx.rmx_rttvar = i;
 699                         tcpstat.tcps_cachedrttvar++;
 700                 }
 701                 /*
 702                  * The old comment here said:
 703                  * update the pipelimit (ssthresh) if it has been updated
 704                  * already or if a pipesize was specified & the threshhold
 705                  * got below half the pipesize.  I.e., wait for bad news
 706                  * before we start updating, then update on both good
 707                  * and bad news.
 708                  *
 709                  * But we want to save the ssthresh even if no pipesize is
 710                  * specified explicitly in the route, because such
 711                  * connections still have an implicit pipesize specified
 712                  * by the global tcp_sendspace.  In the absence of a reliable
 713                  * way to calculate the pipesize, it will have to do.
 714                  */
 715                 i = tp->snd_ssthresh;
 716                 if (rt->rt_rmx.rmx_sendpipe != 0)
 717                         dosavessthresh = (i < rt->rt_rmx.rmx_sendpipe / 2);
 718                 else
 719                         dosavessthresh = (i < so->so_snd.sb_hiwat / 2);
 720                 if (((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
 721                      i != 0 && rt->rt_rmx.rmx_ssthresh != 0)
 722                     || dosavessthresh) {
 723                         /*
 724                          * convert the limit from user data bytes to
 725                          * packets then to packet data bytes.
 726                          */
 727                         i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
 728                         if (i < 2)
 729                                 i = 2;
 730                         i *= (u_long)(tp->t_maxseg +
 731 #if INET6
 732                                       isipv6 ? sizeof (struct tcpip6hdr) :
 733 #endif /* INET6 */
 734                                       sizeof (struct tcpiphdr));
 735                         if (rt->rt_rmx.rmx_ssthresh)
 736                                 rt->rt_rmx.rmx_ssthresh =
 737                                     (rt->rt_rmx.rmx_ssthresh + i) / 2;
 738                         else
 739                                 rt->rt_rmx.rmx_ssthresh = i;
 740                         tcpstat.tcps_cachedssthresh++;
 741                 }
 742         }
 743     no_valid_rt:
 744         /* free the reassembly queue, if any */
 745         (void) tcp_freeq(tp);
 746
 747         if (tp->t_template)
 748                 (void) m_free(dtom(tp->t_template));
 749
 750         if (so->cached_in_sock_layer)
 751             inp->inp_saved_ppcb = (caddr_t) tp;
 752
 753         inp->inp_ppcb = NULL;
 754         soisdisconnected(so);
 755 #if INET6
 756         if (isipv6)
 757                 in6_pcbdetach(inp);
 758         else
 759 #endif /* INET6 */
 760         in_pcbdetach(inp);
 761         tcpstat.tcps_closed++;
 762         KERNEL_DEBUG(DBG_FNC_TCP_CLOSE | DBG_FUNC_END, tcpstat.tcps_closed,0,0,0,0);
 763         return ((struct tcpcb *)0);
 764 }
 765
 766 int
 767 tcp_freeq(tp)
 768         struct tcpcb *tp;
 769 {
 770         register struct ipqent *qe;
 771         int rv = 0;
 772
 773         while ((qe = tp->segq.lh_first) != NULL) {
 774                 LIST_REMOVE(qe, ipqe_q);
 775                 m_freem(qe->ipqe_m);
 776                 FREE(qe, M_SONAME);
 777                 rv = 1;
 778         }
 779         return (rv);
 780 }
 781
 782 void
 783 tcp_drain()
 784 {
 785
 786 }
 787
 788 /*
 789  * Notify a tcp user of an asynchronous error;
 790  * store error as soft error, but wake up user
 791  * (for now, won't do anything until can select for soft error).
 792  */
 793 static void
 794 tcp_notify(inp, error)
 795         struct inpcb *inp;
 796         int error;
 797 {
 798         register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
 799         register struct socket *so = inp->inp_socket;
 800
 801         /*
 802          * Ignore some errors if we are hooked up.
 803          * If connection hasn't completed, has retransmitted several times,
 804          * and receives a second error, give up now.  This is better
 805          * than waiting a long time to establish a connection that
 806          * can never complete.
 807          */
 808         if (tp->t_state == TCPS_ESTABLISHED &&
 809              (error == EHOSTUNREACH || error == ENETUNREACH ||
 810               error == EHOSTDOWN)) {
 811                 return;
 812         } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 813             tp->t_softerror)
 814                 so->so_error = error;
 815         else
 816                 tp->t_softerror = error;
 817         wakeup((caddr_t) &so->so_timeo);
 818         sorwakeup(so);
 819         sowwakeup(so);
 820 }
 821
 822
 823 static int
 824 tcp_pcblist SYSCTL_HANDLER_ARGS
 825 {
 826         int error, i, n, s;
 827         struct inpcb *inp, **inp_list;
 828         inp_gen_t gencnt;
 829         struct xinpgen xig;
 830
 831         /*
 832          * The process of preparing the TCB list is too time-consuming and
 833          * resource-intensive to repeat twice on every request.
 834          */
 835         if (req->oldptr == 0) {
 836                 n = tcbinfo.ipi_count;
 837                 req->oldidx = 2 * (sizeof xig)
 838                         + (n + n/8) * sizeof(struct xtcpcb);
 839                 return 0;
 840         }
 841
 842         if (req->newptr != 0)
 843                 return EPERM;
 844
 845         /*
 846          * OK, now we're committed to doing something.
 847          */
 848         s = splnet();
 849         gencnt = tcbinfo.ipi_gencnt;
 850         n = tcbinfo.ipi_count;
 851         splx(s);
 852
 853         xig.xig_len = sizeof xig;
 854         xig.xig_count = n;
 855         xig.xig_gen = gencnt;
 856         xig.xig_sogen = so_gencnt;
 857         error = SYSCTL_OUT(req, &xig, sizeof xig);
 858         if (error)
 859                 return error;
 860         /*
 861          * We are done if there is no pcb
 862          */
 863         if (n == 0)
 864             return 0;
 865
 866         inp_list = _MALLOC(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 867         if (inp_list == 0)
 868                 return ENOMEM;
 869
 870         s = splnet();
 871         for (inp = tcbinfo.listhead->lh_first, i = 0; inp && i < n;
 872              inp = inp->inp_list.le_next) {
 873                 if (inp->inp_gencnt <= gencnt)
 874                         inp_list[i++] = inp;
 875         }
 876         splx(s);
 877         n = i;
 878
 879         error = 0;
 880         for (i = 0; i < n; i++) {
 881                 inp = inp_list[i];
 882                 if (inp->inp_gencnt <= gencnt) {
 883                         struct xtcpcb xt;
 884                         xt.xt_len = sizeof xt;
 885                         /* XXX should avoid extra copy */
 886                         bcopy(inp, &xt.xt_inp, sizeof *inp);
 887                         bcopy(inp->inp_ppcb, &xt.xt_tp, sizeof xt.xt_tp);
 888                         if (inp->inp_socket)
 889                                 sotoxsocket(inp->inp_socket, &xt.xt_socket);
 890                         error = SYSCTL_OUT(req, &xt, sizeof xt);
 891                 }
 892         }
 893         if (!error) {
 894                 /*
 895                  * Give the user an updated idea of our state.
 896                  * If the generation differs from what we told
 897                  * her before, she knows that something happened
 898                  * while we were processing this request, and it
 899                  * might be necessary to retry.
 900                  */
 901                 s = splnet();
 902                 xig.xig_gen = tcbinfo.ipi_gencnt;
 903                 xig.xig_sogen = so_gencnt;
 904                 xig.xig_count = tcbinfo.ipi_count;
 905                 splx(s);
 906                 error = SYSCTL_OUT(req, &xig, sizeof xig);
 907         }
 908         FREE(inp_list, M_TEMP);
 909         return error;
 910 }
 911
 912
 913 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist, CTLFLAG_RD, 0, 0,
 914             tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
 915
 916 void
 917 tcp_ctlinput(cmd, sa, vip)
 918         int cmd;
 919         struct sockaddr *sa;
 920         void *vip;
 921 {
 922         register struct ip *ip = vip;
 923         register struct tcphdr *th;
 924         void (*notify) __P((struct inpcb *, int)) = tcp_notify;
 925
 926         if (cmd == PRC_QUENCH)
 927                 notify = tcp_quench;
 928         else if (cmd == PRC_MSGSIZE)
 929                 notify = tcp_mtudisc;
 930         else if (!PRC_IS_REDIRECT(cmd) &&
 931                  ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
 932                 return;
 933         if (ip) {
 934                 th = (struct tcphdr *)((caddr_t)ip
 935                                        + (IP_VHL_HL(ip->ip_vhl) << 2));
 936                 in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
 937                         cmd, notify);
 938         } else
 939                 in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
 940 }
 941
 942 #if INET6
 943 void
 944 tcp6_ctlinput(cmd, sa, d)
 945         int cmd;
 946         struct sockaddr *sa;
 947         void *d;
 948 {
 949         register struct tcphdr *thp;
 950         struct tcphdr th;
 951         void (*notify) __P((struct inpcb *, int)) = tcp_notify;
 952         struct sockaddr_in6 sa6;
 953         struct ip6_hdr *ip6;
 954         struct mbuf *m;
 955         int off = 0 ;
 956
 957         if (sa->sa_family != AF_INET6 ||
 958             sa->sa_len != sizeof(struct sockaddr_in6))
 959                 return;
 960
 961         if (cmd == PRC_QUENCH)
 962                 notify = tcp_quench;
 963         else if (cmd == PRC_MSGSIZE)
 964                 notify = tcp_mtudisc;
 965         else if (!PRC_IS_REDIRECT(cmd) &&
 966                  ((unsigned)cmd > PRC_NCMDS || inet6ctlerrmap[cmd] == 0))
 967                 return;
 968
 969         /* if the parameter is from icmp6, decode it. */
 970         if (d != NULL) {
 971                 struct ip6ctlparam *ip6cp = (struct ip6ctlparam *)d;
 972                 m = ip6cp->ip6c_m;
 973                 ip6 = ip6cp->ip6c_ip6;
 974                 off = ip6cp->ip6c_off;
 975         } else {
 976                 m = NULL;
 977                 ip6 = NULL;
 978         }
 979
 980         /* translate addresses into internal form */
 981         sa6 = *(struct sockaddr_in6 *)sa;
 982         if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr) && m && m->m_pkthdr.rcvif)
 983                 sa6.sin6_addr.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
 984
 985         if (ip6) {
 986                 /*
 987                  * XXX: We assume that when IPV6 is non NULL,
 988                  * M and OFF are valid.
 989                  */
 990                 struct in6_addr s;
 991
 992                 /* translate addresses into internal form */
 993                 memcpy(&s, &ip6->ip6_src, sizeof(s));
 994                 if (IN6_IS_ADDR_LINKLOCAL(&s))
 995                         s.s6_addr16[1] = htons(m->m_pkthdr.rcvif->if_index);
 996
 997
 998                 if (m->m_len < off + sizeof(*thp)) {
 999                         /*
1000                          * this should be rare case,
1001                          * so we compromise on this copy...
1002                          */
1003                         m_copydata(m, off, sizeof(th), (caddr_t)&th);
1004                         thp = &th;
1005                 } else
1006                         thp = (struct tcphdr *)(mtod(m, caddr_t) + off);
1007                 in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, thp->th_dport,
1008                               &s, thp->th_sport, cmd, notify);
1009         } else
1010                 in6_pcbnotify(&tcb, (struct sockaddr *)&sa6, 0, &zeroin6_addr,
1011                               0, cmd, notify);
1012 }
1013 #endif /* INET6 */
1014
1015 #define TCP_RNDISS_ROUNDS      16
1016 #define TCP_RNDISS_OUT 7200
1017 #define TCP_RNDISS_MAX 30000
1018
1019 u_int8_t tcp_rndiss_sbox[128];
1020 u_int16_t tcp_rndiss_msb;
1021 u_int16_t tcp_rndiss_cnt;
1022 long tcp_rndiss_reseed;
1023
1024 u_int16_t
1025 tcp_rndiss_encrypt(val)
1026        u_int16_t val;
1027 {
1028        u_int16_t sum = 0, i;
1029
1030        for (i = 0; i < TCP_RNDISS_ROUNDS; i++) {
1031                sum += 0x79b9;
1032                val ^= ((u_int16_t)tcp_rndiss_sbox[(val^sum) & 0x7f]) << 7;
1033                val = ((val & 0xff) << 7) | (val >> 8);
1034        }
1035
1036        return val;
1037 }
1038
1039 void
1040 tcp_rndiss_init()
1041 {
1042        struct timeval time;
1043
1044        getmicrotime(&time);
1045        read_random(tcp_rndiss_sbox, sizeof(tcp_rndiss_sbox));
1046
1047        tcp_rndiss_reseed = time.tv_sec + TCP_RNDISS_OUT;
1048        tcp_rndiss_msb = tcp_rndiss_msb == 0x8000 ? 0 : 0x8000;
1049        tcp_rndiss_cnt = 0;
1050 }
1051
1052 tcp_seq
1053 tcp_rndiss_next()
1054 {
1055        u_int32_t tmp;
1056        struct timeval time;
1057
1058        getmicrotime(&time);
1059
1060         if (tcp_rndiss_cnt >= TCP_RNDISS_MAX ||
1061            time.tv_sec > tcp_rndiss_reseed)
1062                 tcp_rndiss_init();
1063
1064        tmp = random();
1065
1066        /* (tmp & 0x7fff) ensures a 32768 byte gap between ISS */
1067        return ((tcp_rndiss_encrypt(tcp_rndiss_cnt++) | tcp_rndiss_msb) <<16) |
1068                (tmp & 0x7fff);
1069 }
1070
1071
1072 /*
1073  * When a source quench is received, close congestion window
1074  * to one segment.  We will gradually open it again as we proceed.
1075  */
1076 void
1077 tcp_quench(inp, errno)
1078         struct inpcb *inp;
1079         int errno;
1080 {
1081         struct tcpcb *tp = intotcpcb(inp);
1082
1083         if (tp)
1084                 tp->snd_cwnd = tp->t_maxseg;
1085 }
1086
1087 /*
1088  * When `need fragmentation' ICMP is received, update our idea of the MSS
1089  * based on the new value in the route.  Also nudge TCP to send something,
1090  * since we know the packet we just sent was dropped.
1091  * This duplicates some code in the tcp_mss() function in tcp_input.c.
1092  */
1093 void
1094 tcp_mtudisc(inp, errno)
1095         struct inpcb *inp;
1096         int errno;
1097 {
1098         struct tcpcb *tp = intotcpcb(inp);
1099         struct rtentry *rt;
1100         struct rmxp_tao *taop;
1101         struct socket *so = inp->inp_socket;
1102         int offered;
1103         int mss;
1104 #if INET6
1105         int isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV4) == 0;
1106 #endif /* INET6 */
1107
1108         if (tp) {
1109 #if INET6
1110                 if (isipv6)
1111                         rt = tcp_rtlookup6(inp);
1112                 else
1113 #endif /* INET6 */
1114                 rt = tcp_rtlookup(inp);
1115                 if (!rt || !rt->rt_rmx.rmx_mtu) {
1116                         tp->t_maxopd = tp->t_maxseg =
1117 #if INET6
1118                                 isipv6 ? tcp_v6mssdflt :
1119 #endif /* INET6 */
1120                                 tcp_mssdflt;
1121                         return;
1122                 }
1123                 taop = rmx_taop(rt->rt_rmx);
1124                 offered = taop->tao_mssopt;
1125                 mss = rt->rt_rmx.rmx_mtu -
1126 #if INET6
1127                         (isipv6 ?
1128                          sizeof(struct tcpip6hdr) :
1129 #endif /* INET6 */
1130                          sizeof(struct tcpiphdr)
1131 #if INET6
1132                          )
1133 #endif /* INET6 */
1134                         ;
1135
1136                 if (offered)
1137                         mss = min(mss, offered);
1138                 /*
1139                  * XXX - The above conditional probably violates the TCP
1140                  * spec.  The problem is that, since we don't know the
1141                  * other end's MSS, we are supposed to use a conservative
1142                  * default.  But, if we do that, then MTU discovery will
1143                  * never actually take place, because the conservative
1144                  * default is much less than the MTUs typically seen
1145                  * on the Internet today.  For the moment, we'll sweep
1146                  * this under the carpet.
1147                  *
1148                  * The conservative default might not actually be a problem
1149                  * if the only case this occurs is when sending an initial
1150                  * SYN with options and data to a host we've never talked
1151                  * to before.  Then, they will reply with an MSS value which
1152                  * will get recorded and the new parameters should get
1153                  * recomputed.  For Further Study.
1154                  */
1155                 if (tp->t_maxopd <= mss)
1156                         return;
1157                 tp->t_maxopd = mss;
1158
1159                 if ((tp->t_flags & (TF_REQ_TSTMP|TF_NOOPT)) == TF_REQ_TSTMP &&
1160                     (tp->t_flags & TF_RCVD_TSTMP) == TF_RCVD_TSTMP)
1161                         mss -= TCPOLEN_TSTAMP_APPA;
1162                 if ((tp->t_flags & (TF_REQ_CC|TF_NOOPT)) == TF_REQ_CC &&
1163                     (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC)
1164                         mss -= TCPOLEN_CC_APPA;
1165 #if     (MCLBYTES & (MCLBYTES - 1)) == 0
1166                 if (mss > MCLBYTES)
1167                         mss &= ~(MCLBYTES-1);
1168 #else
1169                 if (mss > MCLBYTES)
1170                         mss = mss / MCLBYTES * MCLBYTES;
1171 #endif
1172                 if (so->so_snd.sb_hiwat < mss)
1173                         mss = so->so_snd.sb_hiwat;
1174
1175                 tp->t_maxseg = mss;
1176
1177                 tcpstat.tcps_mturesent++;
1178                 tp->t_rtt = 0;
1179                 tp->snd_nxt = tp->snd_una;
1180                 tcp_output(tp);
1181         }
1182 }
1183
1184 /*
1185  * Look-up the routing entry to the peer of this inpcb.  If no route
1186  * is found and it cannot be allocated the return NULL.  This routine
1187  * is called by TCP routines that access the rmx structure and by tcp_mss
1188  * to get the interface MTU.
1189  */
1190 struct rtentry *
1191 tcp_rtlookup(inp)
1192         struct inpcb *inp;
1193 {
1194         struct route *ro;
1195         struct rtentry *rt;
1196
1197         ro = &inp->inp_route;
1198         if (ro == NULL)
1199                 return (NULL);
1200         rt = ro->ro_rt;
1201         if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
1202                 /* No route yet, so try to acquire one */
1203                 if (inp->inp_faddr.s_addr != INADDR_ANY) {
1204                         ro->ro_dst.sa_family = AF_INET;
1205                         ro->ro_dst.sa_len = sizeof(ro->ro_dst);
1206                         ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
1207                                 inp->inp_faddr;
1208                         rtalloc(ro);
1209                         rt = ro->ro_rt;
1210                 }
1211         }
1212         return rt;
1213 }
1214
1215 #if INET6
1216 struct rtentry *
1217 tcp_rtlookup6(inp)
1218         struct inpcb *inp;
1219 {
1220         struct route_in6 *ro6;
1221         struct rtentry *rt;
1222
1223         ro6 = &inp->in6p_route;
1224         rt = ro6->ro_rt;
1225         if (rt == NULL || !(rt->rt_flags & RTF_UP)) {
1226                 /* No route yet, so try to acquire one */
1227                 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) {
1228                         ro6->ro_dst.sin6_family = AF_INET6;
1229                         ro6->ro_dst.sin6_len = sizeof(ro6->ro_dst);
1230                         ro6->ro_dst.sin6_addr = inp->in6p_faddr;
1231                         rtalloc((struct route *)ro6);
1232                         rt = ro6->ro_rt;
1233                 }
1234         }
1235         return rt;
1236 }
1237 #endif /* INET6 */
1238
1239 #if IPSEC
1240 /* compute ESP/AH header size for TCP, including outer IP header. */
1241 size_t
1242 ipsec_hdrsiz_tcp(tp, isipv6)
1243         struct tcpcb *tp;
1244 #if INET6
1245         int isipv6;
1246 #endif /* INET6 */
1247 {
1248         struct inpcb *inp;
1249         struct mbuf *m;
1250         size_t hdrsiz;
1251         struct ip *ip;
1252 #if INET6
1253         struct ip6_hdr *ip6 = NULL;
1254 #endif /* INET6 */
1255         struct tcphdr *th;
1256
1257         if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
1258                 return 0;
1259         MGETHDR(m, M_DONTWAIT, MT_DATA);
1260         if (!m)
1261                 return 0;
1262
1263 #if INET6
1264         if (isipv6) {
1265                 ip6 = mtod(m, struct ip6_hdr *);
1266                 th = (struct tcphdr *)(ip6 + 1);
1267                 m->m_pkthdr.len = m->m_len = sizeof(struct tcpip6hdr);
1268                 bcopy((caddr_t)&tp->t_template->tt_i6, (caddr_t)ip6,
1269                       sizeof(struct ip6_hdr));
1270                 bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
1271                       sizeof(struct tcphdr));
1272         } else {
1273 #endif /* INET6 */
1274         ip = mtod(m, struct ip *);
1275         th = (struct tcphdr *)(ip + 1);
1276         m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
1277         bcopy((caddr_t)&tp->t_template->tt_i, (caddr_t)ip, sizeof(struct ip));
1278         bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
1279               sizeof(struct tcphdr));
1280 #if INET6
1281         }
1282 #endif /* INET6 */
1283
1284 #if INET6
1285         if (isipv6)
1286                 hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
1287         else
1288 #endif /* INET6 */
1289         hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
1290
1291         m_free(m);
1292         return hdrsiz;
1293 }
1294 #endif /*IPSEC*/
1295
1296 /*
1297  * Return a pointer to the cached information about the remote host.
1298  * The cached information is stored in the protocol specific part of
1299  * the route metrics.
1300  */
1301 struct rmxp_tao *
1302 tcp_gettaocache(inp)
1303         struct inpcb *inp;
1304 {
1305 #if INET6
1306         int isipv6 = (inp->inp_vflag & INP_IPV4) == 0;
1307 #endif /* INET6 */
1308         struct rtentry *rt;
1309
1310 #if INET6
1311         if (isipv6)
1312                 rt = tcp_rtlookup6(inp);
1313         else
1314 #endif /* INET6 */
1315         rt = tcp_rtlookup(inp);
1316
1317         /* Make sure this is a host route and is up. */
1318         if (rt == NULL ||
1319             (rt->rt_flags & (RTF_UP|RTF_HOST)) != (RTF_UP|RTF_HOST))
1320                 return NULL;
1321
1322         return rmx_taop(rt->rt_rmx);
1323 }
1324
1325 /*
1326  * Clear all the TAO cache entries, called from tcp_init.
1327  *
1328  * XXX
1329  * This routine is just an empty one, because we assume that the routing
1330  * routing tables are initialized at the same time when TCP, so there is
1331  * nothing in the cache left over.
1332  */
1333 static void
1334 tcp_cleartaocache()
1335 {
1336 }