2  * Copyright (c) 2000-2015 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995 
  30  *      The Regents of the University of California.  All rights reserved. 
  32  * Redistribution and use in source and binary forms, with or without 
  33  * modification, are permitted provided that the following conditions 
  35  * 1. Redistributions of source code must retain the above copyright 
  36  *    notice, this list of conditions and the following disclaimer. 
  37  * 2. Redistributions in binary form must reproduce the above copyright 
  38  *    notice, this list of conditions and the following disclaimer in the 
  39  *    documentation and/or other materials provided with the distribution. 
  40  * 3. All advertising materials mentioning features or use of this software 
  41  *    must display the following acknowledgement: 
  42  *      This product includes software developed by the University of 
  43  *      California, Berkeley and its contributors. 
  44  * 4. Neither the name of the University nor the names of its contributors 
  45  *    may be used to endorse or promote products derived from this software 
  46  *    without specific prior written permission. 
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
  60  *      @(#)tcp_output.c        8.4 (Berkeley) 5/24/95 
  61  * $FreeBSD: src/sys/netinet/tcp_output.c,v 1.39.2.10 2001/07/07 04:30:38 silby Exp $ 
  64  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 
  65  * support for mandatory and extensible security protections.  This notice 
  66  * is included in support of clause 2.2 (b) of the Apple Public License, 
  73 #include <sys/param.h> 
  74 #include <sys/systm.h> 
  75 #include <sys/kernel.h> 
  76 #include <sys/sysctl.h> 
  78 #include <sys/domain.h> 
  79 #include <sys/protosw.h> 
  80 #include <sys/socket.h> 
  81 #include <sys/socketvar.h> 
  83 #include <net/route.h> 
  84 #include <net/ntstat.h> 
  85 #include <net/if_var.h> 
  87 #include <net/if_types.h> 
  90 #include <netinet/in.h> 
  91 #include <netinet/in_systm.h> 
  92 #include <netinet/in_var.h> 
  93 #include <netinet/ip.h> 
  94 #include <netinet/in_pcb.h> 
  95 #include <netinet/ip_var.h> 
  98 #include <netinet6/in6_pcb.h> 
  99 #include <netinet/ip6.h> 
 100 #include <netinet6/ip6_var.h> 
 102 #include <netinet/tcp.h> 
 104 #include <netinet/tcp_cache.h> 
 105 #include <netinet/tcp_fsm.h> 
 106 #include <netinet/tcp_seq.h> 
 107 #include <netinet/tcp_timer.h> 
 108 #include <netinet/tcp_var.h> 
 109 #include <netinet/tcpip.h> 
 110 #include <netinet/tcp_cc.h> 
 112 #include <netinet/tcp_debug.h> 
 114 #include <sys/kdebug.h> 
 115 #include <mach/sdt.h> 
 118 #include <netinet6/ipsec.h> 
 122 #include <security/mac_framework.h> 
 123 #endif /* MAC_SOCKET */ 
 125 #include <netinet/lro_ext.h> 
 127 #include <netinet/mptcp_var.h> 
 128 #include <netinet/mptcp.h> 
 129 #include <netinet/mptcp_opt.h> 
 132 #include <corecrypto/ccaes.h> 
 134 #define DBG_LAYER_BEG           NETDBG_CODE(DBG_NETTCP, 1) 
 135 #define DBG_LAYER_END           NETDBG_CODE(DBG_NETTCP, 3) 
 136 #define DBG_FNC_TCP_OUTPUT      NETDBG_CODE(DBG_NETTCP, (4 << 8) | 1) 
 138 int path_mtu_discovery 
= 1; 
 139 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, path_mtu_discovery
, 
 140         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &path_mtu_discovery
, 1, 
 141         "Enable Path MTU Discovery"); 
 144 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, slowstart_flightsize
, 
 145         CTLFLAG_RW 
| CTLFLAG_LOCKED
,&ss_fltsz
, 1, 
 146         "Slow start flight size"); 
 148 int ss_fltsz_local 
= 8; /* starts with eight segments max */ 
 149 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, local_slowstart_flightsize
, 
 150         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &ss_fltsz_local
, 1, 
 151         "Slow start flight size for local networks"); 
 154 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, tso
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 155         &tcp_do_tso
, 0, "Enable TCP Segmentation Offload"); 
 158 sysctl_change_ecn_setting SYSCTL_HANDLER_ARGS
 
 160 #pragma unused(oidp, arg1, arg2) 
 161         int i
, err 
= 0, changed 
= 0; 
 164         err 
= sysctl_io_number(req
, tcp_ecn_outbound
, sizeof(int32_t), 
 166         if (err 
!= 0 || req
->newptr 
== USER_ADDR_NULL
) 
 170                 if ((tcp_ecn_outbound 
== 0 || tcp_ecn_outbound 
== 1) && 
 171                     (i 
== 0 || i 
== 1)) { 
 172                         tcp_ecn_outbound 
= i
; 
 175                 if (tcp_ecn_outbound 
== 2 && (i 
== 0 || i 
== 1)) { 
 177                          * Reset ECN enable flags on non-cellular 
 178                          * interfaces so that the system default will take 
 181                         ifnet_head_lock_shared(); 
 182                         TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) { 
 183                                 if (!IFNET_IS_CELLULAR(ifp
)) { 
 184                                         ifnet_lock_exclusive(ifp
); 
 185                                         ifp
->if_eflags 
&= ~IFEF_ECN_DISABLE
; 
 186                                         ifp
->if_eflags 
&= ~IFEF_ECN_ENABLE
; 
 187                                         ifnet_lock_done(ifp
); 
 193                          * Set ECN enable flags on non-cellular 
 196                         ifnet_head_lock_shared(); 
 197                         TAILQ_FOREACH(ifp
, &ifnet_head
, if_link
) { 
 198                                 if (!IFNET_IS_CELLULAR(ifp
)) { 
 199                                         ifnet_lock_exclusive(ifp
); 
 200                                         ifp
->if_eflags 
|= IFEF_ECN_ENABLE
; 
 201                                         ifp
->if_eflags 
&= ~IFEF_ECN_DISABLE
; 
 202                                         ifnet_lock_done(ifp
); 
 207                 tcp_ecn_outbound 
= i
; 
 209         /* Change the other one too as the work is done */ 
 210         if (i 
== 2 || tcp_ecn_inbound 
== 2) 
 215 int     tcp_ecn_outbound 
= 0; 
 216 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, ecn_initiate_out
, 
 217     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_ecn_outbound
, 0, 
 218     sysctl_change_ecn_setting
, "IU", 
 219     "Initiate ECN for outbound connections"); 
 221 int     tcp_ecn_inbound 
= 0; 
 222 SYSCTL_PROC(_net_inet_tcp
, OID_AUTO
, ecn_negotiate_in
, 
 223     CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_ecn_inbound
, 0, 
 224     sysctl_change_ecn_setting
, "IU", 
 225     "Initiate ECN for inbound connections"); 
 227 int     tcp_packet_chaining 
= 50; 
 228 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, packetchain
, 
 229         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_packet_chaining
, 0, 
 230         "Enable TCP output packet chaining"); 
 232 int     tcp_output_unlocked 
= 1; 
 233 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, socket_unlocked_on_output
, 
 234         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_output_unlocked
, 0, 
 235         "Unlock TCP when sending packets down to IP"); 
 237 int tcp_do_rfc3390 
= 1; 
 238 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, rfc3390
, 
 239         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_do_rfc3390
, 1, 
 240         "Calculate intial slowstart cwnd depending on MSS"); 
 242 int tcp_min_iaj_win 
= MIN_IAJ_WIN
; 
 243 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, min_iaj_win
, 
 244         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_min_iaj_win
, 1, 
 245         "Minimum recv win based on inter-packet arrival jitter"); 
 247 int tcp_acc_iaj_react_limit 
= ACC_IAJ_REACT_LIMIT
; 
 248 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, acc_iaj_react_limit
, 
 249         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_acc_iaj_react_limit
, 1, 
 250         "Accumulated IAJ when receiver starts to react"); 
 252 uint32_t tcp_do_autosendbuf 
= 1; 
 253 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, doautosndbuf
, 
 254         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_do_autosendbuf
, 1, 
 255         "Enable send socket buffer auto-tuning"); 
 257 uint32_t tcp_autosndbuf_inc 
= 8 * 1024; 
 258 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, autosndbufinc
, 
 259         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_autosndbuf_inc
, 1, 
 260         "Increment in send socket bufffer size"); 
 262 uint32_t tcp_autosndbuf_max 
= 512 * 1024; 
 263 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, autosndbufmax
, 
 264         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_autosndbuf_max
, 1, 
 265         "Maximum send socket buffer size"); 
 267 uint32_t tcp_prioritize_acks 
= 1; 
 268 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, ack_prioritize
, 
 269         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_prioritize_acks
, 1, 
 270         "Prioritize pure acks"); 
 272 uint32_t tcp_use_rtt_recvbg 
= 1; 
 273 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, rtt_recvbg
, 
 274         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_use_rtt_recvbg
, 1, 
 275         "Use RTT for bg recv algorithm"); 
 277 uint32_t tcp_recv_throttle_minwin 
= 16 * 1024; 
 278 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, recv_throttle_minwin
,  
 279         CTLFLAG_RW 
| CTLFLAG_LOCKED
, &tcp_recv_throttle_minwin
, 1, 
 280         "Minimum recv win for throttling"); 
 282 int32_t tcp_enable_tlp 
= 1; 
 283 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, enable_tlp
, 
 284         CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 285         &tcp_enable_tlp
, 1, "Enable Tail loss probe"); 
 287 static int32_t packchain_newlist 
= 0; 
 288 static int32_t packchain_looped 
= 0; 
 289 static int32_t packchain_sent 
= 0; 
 291 /* temporary: for testing */ 
 293 extern int ipsec_bypass
; 
 296 extern int slowlink_wsize
;      /* window correction for slow links */ 
 298 extern int fw_enable
;           /* firewall check for packet chaining */ 
 299 extern int fw_bypass
;           /* firewall check: disable packet chaining if there is rules */ 
 300 #endif /* IPFIREWALL */ 
 302 extern u_int32_t dlil_filter_disable_tso_count
; 
 303 extern u_int32_t kipf_count
; 
 304 extern int tcp_recv_bg
; 
 306 static int tcp_ip_output(struct socket 
*, struct tcpcb 
*, struct mbuf 
*, int, 
 307     struct mbuf 
*, int, int, int32_t, boolean_t
); 
 308 static struct mbuf
* tcp_send_lroacks(struct tcpcb 
*tp
, struct mbuf 
*m
, struct tcphdr 
*th
); 
 309 static int tcp_recv_throttle(struct tcpcb 
*tp
); 
 311 static int32_t tcp_tfo_check(struct tcpcb 
*tp
, int32_t len
) 
 313         struct socket 
*so 
= tp
->t_inpcb
->inp_socket
; 
 314         unsigned int optlen 
= 0; 
 315         unsigned int cookie_len
; 
 317         if (tp
->t_flags 
& TF_NOOPT
) 
 320         if (!tcp_heuristic_do_tfo(tp
)) 
 323         optlen 
+= TCPOLEN_MAXSEG
; 
 325         if (tp
->t_flags 
& TF_REQ_SCALE
) 
 329         if ((so
->so_flags 
& SOF_MP_SUBFLOW
) && mptcp_enable 
&& 
 330             tp
->t_rxtshift 
<= mptcp_mpcap_retries
) 
 331                 optlen 
+= sizeof(struct mptcp_mpcapable_opt_common
) + sizeof(mptcp_key_t
); 
 334         if (tp
->t_flags 
& TF_REQ_TSTMP
) 
 335                 optlen 
+= TCPOLEN_TSTAMP_APPA
; 
 337         if (SACK_ENABLED(tp
)) 
 338                 optlen 
+= TCPOLEN_SACK_PERMITTED
; 
 340         /* Now, decide whether to use TFO or not */ 
 342         /* Don't even bother trying if there is no space at all... */ 
 343         if (MAX_TCPOPTLEN 
- optlen 
< TCPOLEN_FASTOPEN_REQ
) 
 346         cookie_len 
= tcp_cache_get_cookie_len(tp
); 
 348                 /* No cookie, so we request one */ 
 351         /* Do not send SYN+data if there is more in the queue than MSS */ 
 352         if (so
->so_snd
.sb_cc 
> (tp
->t_maxopd 
- MAX_TCPOPTLEN
)) 
 355         /* Ok, everything looks good. We can go on and do TFO */ 
 359         tp
->t_flagsext 
&= ~TF_FASTOPEN
; 
 363 /* Returns the number of bytes written to the TCP option-space */ 
 365 tcp_tfo_write_cookie_rep(struct tcpcb 
*tp
, unsigned optlen
, u_char 
*opt
) 
 367         u_char out
[CCAES_BLOCK_SIZE
]; 
 371         if ((MAX_TCPOPTLEN 
- optlen
) < 
 372             (TCPOLEN_FASTOPEN_REQ 
+ TFO_COOKIE_LEN_DEFAULT
)) 
 375         tcp_tfo_gen_cookie(tp
->t_inpcb
, out
, sizeof(out
)); 
 379         *bp
++ = TCPOPT_FASTOPEN
; 
 380         *bp
++ = 2 + TFO_COOKIE_LEN_DEFAULT
; 
 381         memcpy(bp
, out
, TFO_COOKIE_LEN_DEFAULT
); 
 382         ret 
+= 2 + TFO_COOKIE_LEN_DEFAULT
; 
 384         tp
->t_tfo_stats 
|= TFO_S_COOKIE_SENT
; 
 385         tcpstat
.tcps_tfo_cookie_sent
++; 
 391 tcp_tfo_write_cookie(struct tcpcb 
*tp
, unsigned optlen
, int32_t *len
, 
 394         u_int8_t tfo_len 
= MAX_TCPOPTLEN 
- optlen 
- TCPOLEN_FASTOPEN_REQ
; 
 402          * The cookie will be copied in the appropriate place within the 
 403          * TCP-option space. That way we avoid the need for an intermediate 
 406         res 
= tcp_cache_get_cookie(tp
, bp 
+ TCPOLEN_FASTOPEN_REQ
, &tfo_len
); 
 408                 *bp
++ = TCPOPT_FASTOPEN
; 
 409                 *bp
++ = TCPOLEN_FASTOPEN_REQ
; 
 410                 ret 
+= TCPOLEN_FASTOPEN_REQ
; 
 412                 tp
->t_tfo_flags 
|= TFO_F_COOKIE_REQ
; 
 414                 tp
->t_tfo_stats 
|= TFO_S_COOKIE_REQ
; 
 415                 tcpstat
.tcps_tfo_cookie_req
++; 
 417                 *bp
++ = TCPOPT_FASTOPEN
; 
 418                 *bp
++ = TCPOLEN_FASTOPEN_REQ 
+ tfo_len
; 
 420                 ret 
+= TCPOLEN_FASTOPEN_REQ 
+ tfo_len
; 
 422                 tp
->t_tfo_flags 
|= TFO_F_COOKIE_SENT
; 
 424                 /* If there is some data, let's track it */ 
 426                         tp
->t_tfo_stats 
|= TFO_S_SYN_DATA_SENT
; 
 427                         tcpstat
.tcps_tfo_syn_data_sent
++; 
 435 tcp_send_ecn_flags_on_syn(struct tcpcb 
*tp
, struct socket 
*so
) 
 437         return(!((tp
->ecn_flags 
& TE_SETUPSENT
) || 
 438             (so
->so_flags 
& SOF_MP_SUBFLOW
) || 
 439             (tp
->t_flagsext 
& TF_FASTOPEN
))); 
 443 tcp_set_ecn(struct tcpcb 
*tp
, struct ifnet 
*ifp
) 
 448          * Socket option has precedence 
 450         if (tp
->ecn_flags 
& TE_ECN_MODE_ENABLE
) { 
 451                 tp
->ecn_flags 
|= TE_ENABLE_ECN
; 
 452                 goto check_heuristic
; 
 455         if (tp
->ecn_flags 
& TE_ECN_MODE_DISABLE
) { 
 456                 tp
->ecn_flags 
&= ~TE_ENABLE_ECN
; 
 460          * Per interface setting comes next 
 463                 if (ifp
->if_eflags 
& IFEF_ECN_ENABLE
) { 
 464                         tp
->ecn_flags 
|= TE_ENABLE_ECN
; 
 465                         goto check_heuristic
; 
 468                 if (ifp
->if_eflags 
& IFEF_ECN_DISABLE
) { 
 469                         tp
->ecn_flags 
&= ~TE_ENABLE_ECN
; 
 474          * System wide settings come last 
 476         inbound 
= (tp
->t_inpcb
->inp_socket
->so_head 
!= NULL
); 
 477         if ((inbound 
&& tcp_ecn_inbound 
== 1) || 
 478             (!inbound 
&& tcp_ecn_outbound 
== 1)) { 
 479                 tp
->ecn_flags 
|= TE_ENABLE_ECN
; 
 480                 goto check_heuristic
; 
 482                 tp
->ecn_flags 
&= ~TE_ENABLE_ECN
; 
 488         if (!tcp_heuristic_do_ecn(tp
)) 
 489                 tp
->ecn_flags 
&= ~TE_ENABLE_ECN
; 
 493  * Tcp output routine: figure out what should be sent and send it. 
 501  *      ip_output_list:ENOMEM 
 502  *      ip_output_list:EADDRNOTAVAIL 
 503  *      ip_output_list:ENETUNREACH 
 504  *      ip_output_list:EHOSTUNREACH 
 505  *      ip_output_list:EACCES 
 506  *      ip_output_list:EMSGSIZE 
 507  *      ip_output_list:ENOBUFS 
 508  *      ip_output_list:???              [ignorable: mostly IPSEC/firewall/DLIL] 
 509  *      ip6_output_list:EINVAL 
 510  *      ip6_output_list:EOPNOTSUPP 
 511  *      ip6_output_list:EHOSTUNREACH 
 512  *      ip6_output_list:EADDRNOTAVAIL 
 513  *      ip6_output_list:ENETUNREACH 
 514  *      ip6_output_list:EMSGSIZE 
 515  *      ip6_output_list:ENOBUFS 
 516  *      ip6_output_list:???             [ignorable: mostly IPSEC/firewall/DLIL] 
 519 tcp_output(struct tcpcb 
*tp
) 
 521         struct inpcb 
*inp 
= tp
->t_inpcb
; 
 522         struct socket 
*so 
= inp
->inp_socket
; 
 523         int32_t len
, recwin
, sendwin
, off
; 
 526         struct ip 
*ip 
= NULL
; 
 527         struct ipovly 
*ipov 
= NULL
; 
 529         struct ip6_hdr 
*ip6 
= NULL
; 
 532         u_char opt
[TCP_MAXOLEN
]; 
 533         unsigned ipoptlen
, optlen
, hdrlen
; 
 534         int idle
, sendalot
, lost 
= 0; 
 538         tcp_seq old_snd_nxt 
= 0; 
 541         unsigned ipsec_optlen 
= 0; 
 544         struct mbuf 
*packetlist 
= NULL
; 
 545         struct mbuf 
*tp_inp_options 
= inp
->inp_depend4
.inp4_options
; 
 547         int isipv6 
= inp
->inp_vflag 
& INP_IPV6 
; 
 549         short packchain_listadd 
= 0; 
 550         int so_options 
= so
->so_options
; 
 552         u_int32_t basertt
, svc_flags 
= 0, allocated_len
; 
 553         u_int32_t lro_ackmore 
= (tp
->t_lropktlen 
!= 0) ? 1 : 0; 
 554         struct mbuf 
*mnext 
= NULL
; 
 557         unsigned int *dlenp 
= NULL
; 
 558         u_int8_t 
*finp 
= NULL
; 
 559         u_int32_t 
*sseqp 
= NULL
; 
 560         u_int64_t dss_val 
= 0; 
 561         boolean_t mptcp_acknow 
= FALSE
; 
 562         boolean_t early_data_sent 
= FALSE
; 
 564         boolean_t cell 
= FALSE
; 
 565         boolean_t wifi 
= FALSE
; 
 566         boolean_t wired 
= FALSE
; 
 567         boolean_t sack_rescue_rxt 
= FALSE
; 
 570          * Determine length of data that should be transmitted, 
 571          * and flags that will be used. 
 572          * If there is some data or critical controls (SYN, RST) 
 573          * to send, then transmit; otherwise, investigate further. 
 575         idle 
= (tp
->t_flags 
& TF_LASTIDLE
) || (tp
->snd_max 
== tp
->snd_una
); 
 577         /* Since idle_time is signed integer, the following integer subtraction 
 578          * will take care of wrap around of tcp_now 
 580         idle_time 
= tcp_now 
- tp
->t_rcvtime
; 
 581         if (idle 
&& idle_time 
>= TCP_IDLETIMEOUT(tp
)) { 
 582                 if (CC_ALGO(tp
)->after_idle 
!= NULL 
&& 
 583                     (tp
->tcp_cc_index 
!= TCP_CC_ALGO_CUBIC_INDEX 
|| 
 584                     idle_time 
>= TCP_CC_CWND_NONVALIDATED_PERIOD
)) { 
 585                         CC_ALGO(tp
)->after_idle(tp
); 
 586                         tcp_ccdbg_trace(tp
, NULL
, TCP_CC_IDLE_TIMEOUT
); 
 590                  * Do some other tasks that need to be done after 
 593                 if (!SLIST_EMPTY(&tp
->t_rxt_segments
)) 
 594                         tcp_rxtseg_clean(tp
); 
 596                 /* If stretch ack was auto-disabled, re-evaluate it */ 
 597                 tcp_cc_after_idle_stretchack(tp
); 
 599         tp
->t_flags 
&= ~TF_LASTIDLE
; 
 601                 if (tp
->t_flags 
& TF_MORETOCOME
) { 
 602                         tp
->t_flags 
|= TF_LASTIDLE
; 
 607         if (tp
->t_mpflags 
& TMPF_RESET
) { 
 608                 tcp_check_timer_state(tp
); 
 610                  * Once a RST has been sent for an MPTCP subflow,  
 611                  * the subflow socket stays around until deleted. 
 612                  * No packets such as FINs must be sent after RST. 
 619         KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT 
| DBG_FUNC_START
, 0,0,0,0,0); 
 623                 KERNEL_DEBUG(DBG_LAYER_BEG
, 
 624                      ((inp
->inp_fport 
<< 16) | inp
->inp_lport
), 
 625                      (((inp
->in6p_laddr
.s6_addr16
[0] & 0xffff) << 16) | 
 626                       (inp
->in6p_faddr
.s6_addr16
[0] & 0xffff)), 
 632                 KERNEL_DEBUG(DBG_LAYER_BEG
, 
 633                      ((inp
->inp_fport 
<< 16) | inp
->inp_lport
), 
 634                      (((inp
->inp_laddr
.s_addr 
& 0xffff) << 16) | 
 635                       (inp
->inp_faddr
.s_addr 
& 0xffff)), 
 639          * If the route generation id changed, we need to check that our 
 640          * local (source) IP address is still valid. If it isn't either 
 641          * return error or silently do nothing (assuming the address will 
 642          * come back before the TCP connection times out). 
 644         rt 
= inp
->inp_route
.ro_rt
; 
 645         if (rt 
!= NULL 
&& ROUTE_UNUSABLE(&tp
->t_inpcb
->inp_route
)) { 
 647                 struct in_ifaddr 
*ia 
= NULL
; 
 648                 struct in6_ifaddr 
*ia6 
= NULL
; 
 649                 int found_srcaddr 
= 0; 
 651                 /* disable multipages at the socket */ 
 652                 somultipages(so
, FALSE
); 
 654                 /* Disable TSO for the socket until we know more */ 
 655                 tp
->t_flags 
&= ~TF_TSO
; 
 660                         ia6 
= ifa_foraddr6(&inp
->in6p_laddr
); 
 664                         ia 
= ifa_foraddr(inp
->inp_laddr
.s_addr
); 
 669                 /* check that the source address is still valid */ 
 670                 if (found_srcaddr 
== 0) { 
 672                             (SO_FILT_HINT_LOCKED 
| SO_FILT_HINT_NOSRCADDR
)); 
 674                         if (tp
->t_state 
>= TCPS_CLOSE_WAIT
) { 
 675                                 tcp_drop(tp
, EADDRNOTAVAIL
); 
 676                                 return(EADDRNOTAVAIL
); 
 679                         /* Set retransmit  timer if it wasn't set, 
 680                          * reset Persist timer and shift register as the 
 681                          * advertised peer window may not be valid anymore 
 684                         if (!tp
->t_timer
[TCPT_REXMT
]) { 
 685                                 tp
->t_timer
[TCPT_REXMT
] = 
 686                                     OFFSET_FROM_START(tp
, tp
->t_rxtcur
); 
 687                                 if (tp
->t_timer
[TCPT_PERSIST
]) { 
 688                                         tp
->t_timer
[TCPT_PERSIST
] = 0; 
 690                                         tp
->t_persist_stop 
= 0; 
 695                         if (tp
->t_pktlist_head 
!= NULL
) 
 696                                 m_freem_list(tp
->t_pktlist_head
); 
 697                         TCP_PKTLIST_CLEAR(tp
); 
 699                         /* drop connection if source address isn't available */ 
 700                         if (so
->so_flags 
& SOF_NOADDRAVAIL
) {  
 701                                 tcp_drop(tp
, EADDRNOTAVAIL
); 
 702                                 return(EADDRNOTAVAIL
); 
 704                                 tcp_check_timer_state(tp
); 
 705                                 return(0); /* silently ignore, keep data in socket: address may be back */ 
 709                         IFA_REMREF(&ia
->ia_ifa
); 
 712                         IFA_REMREF(&ia6
->ia_ifa
); 
 715                  * Address is still valid; check for multipages capability 
 716                  * again in case the outgoing interface has changed. 
 719                 if ((ifp 
= rt
->rt_ifp
) != NULL
) { 
 720                         somultipages(so
, (ifp
->if_hwassist 
& IFNET_MULTIPAGES
)); 
 721                         tcp_set_tso(tp
, ifp
); 
 722                         soif2kcl(so
, (ifp
->if_eflags 
& IFEF_2KCL
)); 
 723                         tcp_set_ecn(tp
, ifp
); 
 725                 if (rt
->rt_flags 
& RTF_UP
) 
 728                  * See if we should do MTU discovery. Don't do it if: 
 729                  *      1) it is disabled via the sysctl 
 730                  *      2) the route isn't up 
 731                  *      3) the MTU is locked (if it is, then discovery 
 735                 if (!path_mtu_discovery 
|| ((rt 
!= NULL
) &&  
 736                     (!(rt
->rt_flags 
& RTF_UP
) || 
 737                     (rt
->rt_rmx
.rmx_locks 
& RTV_MTU
))))  
 738                         tp
->t_flags 
&= ~TF_PMTUD
; 
 740                         tp
->t_flags 
|= TF_PMTUD
; 
 746                 cell 
= IFNET_IS_CELLULAR(rt
->rt_ifp
); 
 747                 wifi 
= (!cell 
&& IFNET_IS_WIFI(rt
->rt_ifp
)); 
 748                 wired 
= (!wifi 
&& IFNET_IS_WIRED(rt
->rt_ifp
)); 
 752          * If we've recently taken a timeout, snd_max will be greater than 
 753          * snd_nxt.  There may be SACK information that allows us to avoid 
 754          * resending already delivered data.  Adjust snd_nxt accordingly. 
 756         if (SACK_ENABLED(tp
) && SEQ_LT(tp
->snd_nxt
, tp
->snd_max
)) 
 759         off 
= tp
->snd_nxt 
- tp
->snd_una
; 
 760         sendwin 
= min(tp
->snd_wnd
, tp
->snd_cwnd
); 
 762         if (tp
->t_flags 
& TF_SLOWLINK 
&& slowlink_wsize 
> 0) 
 763                 sendwin 
= min(sendwin
, slowlink_wsize
); 
 765         flags 
= tcp_outflags
[tp
->t_state
]; 
 767          * Send any SACK-generated retransmissions.  If we're explicitly 
 768          * trying to send out new data (when sendalot is 1), bypass this 
 769          * function. If we retransmit in fast recovery mode, decrement 
 770          * snd_cwnd, since we're replacing a (future) new transmission 
 771          * with a retransmission now, and we previously incremented 
 772          * snd_cwnd in tcp_input(). 
 775          * Still in sack recovery , reset rxmit flag to zero. 
 781         if (SACK_ENABLED(tp
) && IN_FASTRECOVERY(tp
) && 
 782             (p 
= tcp_sack_output(tp
, &sack_bytes_rxmt
))) { 
 785                 cwin 
= min(tp
->snd_wnd
, tp
->snd_cwnd
) - sack_bytes_rxmt
; 
 788                 /* Do not retransmit SACK segments beyond snd_recover */ 
 789                 if (SEQ_GT(p
->end
, tp
->snd_recover
)) { 
 791                          * (At least) part of sack hole extends beyond 
 792                          * snd_recover. Check to see if we can rexmit data 
 795                         if (SEQ_GEQ(p
->rxmit
, tp
->snd_recover
)) { 
 797                                  * Can't rexmit any more data for this hole. 
 798                                  * That data will be rexmitted in the next 
 799                                  * sack recovery episode, when snd_recover 
 800                                  * moves past p->rxmit. 
 803                                 goto after_sack_rexmit
; 
 805                                 /* Can rexmit part of the current hole */ 
 806                                 len 
= ((int32_t)min(cwin
, 
 807                                                    tp
->snd_recover 
- p
->rxmit
)); 
 809                         len 
= ((int32_t)min(cwin
, p
->end 
- p
->rxmit
)); 
 812                         off 
= p
->rxmit 
- tp
->snd_una
;  
 815                         tcpstat
.tcps_sack_rexmits
++; 
 816                         tcpstat
.tcps_sack_rexmit_bytes 
+= 
 817                             min(len
, tp
->t_maxseg
); 
 819                                 nstat_route_tx(inp
->inp_route
.ro_rt
, 1, 
 820                                         min(len
, tp
->t_maxseg
), 
 821                                         NSTAT_TX_FLAG_RETRANSMIT
); 
 822                                 INP_ADD_STAT(inp
, cell
, wifi
, wired
, 
 824                                 INP_ADD_STAT(inp
, cell
, wifi
, wired
, 
 825                                     txbytes
, min(len
, tp
->t_maxseg
)); 
 826                                 tp
->t_stat
.txretransmitbytes 
+= min(len
, tp
->t_maxseg
); 
 834          * Get standard flags, and add SYN or FIN if requested by 'hidden' 
 837         if (tp
->t_flags 
& TF_NEEDFIN
) 
 839         if (tp
->t_flags 
& TF_NEEDSYN
) 
 843          * If in persist timeout with window of 0, send 1 byte. 
 844          * Otherwise, if window is small but nonzero 
 845          * and timer expired, we will send what we can 
 846          * and go to transmit state. 
 848         if (tp
->t_flagsext 
& TF_FORCE
) { 
 851                          * If we still have some data to send, then 
 852                          * clear the FIN bit.  Usually this would 
 853                          * happen below when it realizes that we 
 854                          * aren't sending all the data.  However, 
 855                          * if we have exactly 1 byte of unsent data, 
 856                          * then it won't clear the FIN bit below, 
 857                          * and if we are in persist state, we wind 
 858                          * up sending the packet without recording 
 859                          * that we sent the FIN bit. 
 861                          * We can't just blindly clear the FIN bit, 
 862                          * because if we don't have any more data 
 863                          * to send then the probe will be the FIN 
 866                         if (off 
< so
->so_snd
.sb_cc
) 
 870                         tp
->t_timer
[TCPT_PERSIST
] = 0; 
 873                         tp
->t_persist_stop 
= 0; 
 878          * If snd_nxt == snd_max and we have transmitted a FIN, the 
 879          * offset will be > 0 even if so_snd.sb_cc is 0, resulting in 
 880          * a negative length.  This can also occur when TCP opens up 
 881          * its congestion window while receiving additional duplicate 
 882          * acks after fast-retransmit because TCP will reset snd_nxt 
 883          * to snd_max after the fast-retransmit. 
 885          * In the normal retransmit-FIN-only case, however, snd_nxt will 
 886          * be set to snd_una, the offset will be 0, and the length may 
 889          * If sack_rxmit is true we are retransmitting from the scoreboard 
 890          * in which case len is already set. 
 892         if (sack_rxmit 
== 0) { 
 893                 if (sack_bytes_rxmt 
== 0) { 
 894                         len 
= min(so
->so_snd
.sb_cc
, sendwin
) - off
; 
 898                         cwin 
= tp
->snd_cwnd 
- 
 899                             (tp
->snd_nxt 
- tp
->sack_newdata
) - 
 904                          * We are inside of a SACK recovery episode and are 
 905                          * sending new data, having retransmitted all the 
 906                          * data possible in the scoreboard. 
 908                         len 
= min(so
->so_snd
.sb_cc
, tp
->snd_wnd
)  
 911                          * Don't remove this (len > 0) check ! 
 912                          * We explicitly check for len > 0 here (although it  
 913                          * isn't really necessary), to work around a gcc  
 914                          * optimization issue - to force gcc to compute 
 915                          * len above. Without this check, the computation 
 916                          * of len is bungled by the optimizer. 
 919                                 len 
= imin(len
, cwin
); 
 924                          * At this point SACK recovery can not send any 
 925                          * data from scoreboard or any new data. Check 
 926                          * if we can do a rescue retransmit towards the 
 927                          * tail end of recovery window. 
 929                         if (len 
== 0 && cwin 
> 0 && 
 930                             SEQ_LT(tp
->snd_fack
, tp
->snd_recover
) && 
 931                             !(tp
->t_flagsext 
& TF_RESCUE_RXT
)) { 
 932                                 len 
= min((tp
->snd_recover 
- tp
->snd_fack
), 
 934                                 len 
= imin(len
, cwin
); 
 935                                 old_snd_nxt 
= tp
->snd_nxt
; 
 936                                 sack_rescue_rxt 
= TRUE
; 
 937                                 tp
->snd_nxt 
= tp
->snd_recover 
- len
; 
 939                                  * If FIN has been sent, snd_max 
 940                                  * must have been advanced to cover it. 
 942                                 if ((tp
->t_flags 
& TF_SENTFIN
) && 
 943                                     tp
->snd_max 
== tp
->snd_recover
) 
 946                                 off 
= tp
->snd_nxt 
- tp
->snd_una
; 
 948                                 tp
->t_flagsext 
|= TF_RESCUE_RXT
; 
 954         if ((tp
->t_mpflags 
& TMPF_FASTJOIN_SEND
) && 
 955             (tp
->t_state 
== TCPS_SYN_SENT
) && 
 956             (!(tp
->t_flags 
& TF_CLOSING
)) && 
 957             (so
->so_snd
.sb_cc 
!= 0) && 
 958             (tp
->t_rxtshift 
== 0)) { 
 962                 len 
= min(so
->so_snd
.sb_cc
, tp
->t_maxseg
); 
 963                 early_data_sent 
= TRUE
; 
 964         } else if (early_data_sent
) { 
 965                 /* for now, we allow only one data segment to be sent */ 
 970          * Lop off SYN bit if it has already been sent.  However, if this 
 971          * is SYN-SENT state and if segment contains data and if we don't 
 972          * know that foreign host supports TAO, suppress sending segment. 
 974         if ((flags 
& TH_SYN
) && SEQ_GT(tp
->snd_nxt
, tp
->snd_una
)) { 
 975                 if (tp
->t_state 
!= TCPS_SYN_RECEIVED 
|| tfo_enabled(tp
)) 
 978                 if (len 
> 0 && tp
->t_state 
== TCPS_SYN_SENT
) { 
 979                         while (inp
->inp_sndinprog_cnt 
== 0 && 
 980                                 tp
->t_pktlist_head 
!= NULL
) { 
 981                                 packetlist 
= tp
->t_pktlist_head
; 
 982                                 packchain_listadd 
= tp
->t_lastchain
; 
 984                                 TCP_PKTLIST_CLEAR(tp
); 
 986                                 error 
= tcp_ip_output(so
, tp
, packetlist
, 
 987                                     packchain_listadd
, tp_inp_options
, 
 988                                     (so_options 
& SO_DONTROUTE
), 
 989                                     (sack_rxmit 
| (sack_bytes_rxmt 
!= 0)), 0, 
1000                          * tcp was closed while we were in ip, 
1003                         if (inp
->inp_sndinprog_cnt 
== 0 && 
1004                                 (tp
->t_flags 
& TF_CLOSING
)) { 
1005                                 tp
->t_flags 
&= ~TF_CLOSING
; 
1006                                 (void) tcp_close(tp
); 
1008                                 tcp_check_timer_state(tp
); 
1010                         KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT 
| DBG_FUNC_END
, 
1017          * Be careful not to send data and/or FIN on SYN segments. 
1018          * This measure is needed to prevent interoperability problems 
1019          * with not fully conformant TCP implementations. 
1021          * In case of TFO, we handle the setting of the len in 
1022          * tcp_tfo_check. In case TFO is not enabled, never ever send 
1025         if ((flags 
& TH_SYN
) && !tfo_enabled(tp
)) { 
1030         if ((flags 
& TH_SYN
) && tp
->t_state 
<= TCPS_SYN_SENT 
&& tfo_enabled(tp
)) 
1031                 len 
= tcp_tfo_check(tp
, len
); 
1034          * The check here used to be (len < 0). Some times len is zero 
1035          * when the congestion window is closed and we need to check 
1036          * if persist timer has to be set in that case. But don't set  
1037          * persist until connection is established. 
1039         if (len 
<= 0 && !(flags 
& TH_SYN
)) { 
1041                  * If FIN has been sent but not acked, 
1042                  * but we haven't been called to retransmit, 
1043                  * len will be < 0.  Otherwise, window shrank 
1044                  * after we sent into it.  If window shrank to 0, 
1045                  * cancel pending retransmit, pull snd_nxt back 
1046                  * to (closed) window, and set the persist timer 
1047                  * if it isn't already going.  If the window didn't 
1048                  * close completely, just wait for an ACK. 
1052                         tp
->t_timer
[TCPT_REXMT
] = 0; 
1053                         tp
->t_timer
[TCPT_PTO
] = 0; 
1056                         tp
->snd_nxt 
= tp
->snd_una
; 
1058                         if (tp
->t_timer
[TCPT_PERSIST
] == 0) 
1064          * Automatic sizing of send socket buffer. Increase the send 
1065          * socket buffer size if all of the following criteria are met 
1066          *      1. the receiver has enough buffer space for this data 
1067          *      2. send buffer is filled to 7/8th with data (so we actually 
1068          *         have data to make use of it); 
1069          *      3. our send window (slow start and congestion controlled) is 
1070          *         larger than sent but unacknowledged data in send buffer. 
1072         basertt 
= get_base_rtt(tp
); 
1073         if (tcp_do_autosendbuf 
== 1 && 
1074             !INP_WAIT_FOR_IF_FEEDBACK(inp
) && !IN_FASTRECOVERY(tp
) && 
1075             (so
->so_snd
.sb_flags 
& (SB_AUTOSIZE 
| SB_TRIM
)) == SB_AUTOSIZE 
&& 
1076             tcp_cansbgrow(&so
->so_snd
)) { 
1077                 if ((tp
->snd_wnd 
/ 4 * 5) >= so
->so_snd
.sb_hiwat 
&& 
1078                     so
->so_snd
.sb_cc 
>= (so
->so_snd
.sb_hiwat 
/ 8 * 7) && 
1079                     sendwin 
>= (so
->so_snd
.sb_cc 
-  
1080                         (tp
->snd_nxt 
- tp
->snd_una
))) { 
1081                         /* Also increase the send buffer only if the  
1082                          * round-trip time is not increasing because we do 
1083                          * not want to contribute to latency by filling 
1085                          * We also do not want to hold onto application's 
1086                          * old data for too long. Interactive applications 
1087                          * would rather discard old data. 
1089                         if (tp
->t_rttcur 
<= (basertt 
+ 25)) { 
1090                                 if (sbreserve(&so
->so_snd
, 
1091                                     min(so
->so_snd
.sb_hiwat 
+ tcp_autosndbuf_inc
, 
1092                                         tcp_autosndbuf_max
)) == 1) { 
1093                                         so
->so_snd
.sb_idealsize 
= so
->so_snd
.sb_hiwat
; 
1096                                 so
->so_snd
.sb_idealsize 
= 
1097                                     max(tcp_sendspace
, so
->so_snd
.sb_hiwat 
- 
1098                                         (2 * tcp_autosndbuf_inc
)); 
1099                                 so
->so_snd
.sb_flags 
|= SB_TRIM
; 
1105          * Truncate to the maximum segment length or enable TCP Segmentation 
1106          * Offloading (if supported by hardware) and ensure that FIN is removed 
1107          * if the length no longer contains the last data byte. 
1109          * TSO may only be used if we are in a pure bulk sending state. 
1110          * The presence of TCP-MD5, SACK retransmits, SACK advertizements, 
1111          * ipfw rules and IP options, as well as disabling hardware checksum 
1112          * offload prevent using TSO.  With TSO the TCP header is the same 
1113          * (except for the sequence number) for all generated packets.  This 
1114          * makes it impossible to transmit any options which vary per generated 
1115          * segment or packet. 
1117          * The length of TSO bursts is limited to TCP_MAXWIN.  That limit and 
1118          * removal of FIN (if not already catched here) are handled later after 
1119          * the exact length of the TCP options are known. 
1123          * Pre-calculate here as we save another lookup into the darknesses 
1124          * of IPsec that way and can actually decide if TSO is ok. 
1126         if (ipsec_bypass 
== 0) 
1127                 ipsec_optlen 
= ipsec_hdrsiz_tcp(tp
); 
1129         if (len 
> tp
->t_maxseg
) { 
1130                 if ((tp
->t_flags 
& TF_TSO
) && tcp_do_tso 
&& hwcksum_tx 
&& 
1131                     ip_use_randomid 
&& kipf_count 
== 0 && 
1132                     dlil_filter_disable_tso_count 
== 0 && 
1133                     tp
->rcv_numsacks 
== 0 && sack_rxmit 
== 0  && 
1134                     sack_bytes_rxmt 
== 0 && 
1135                     inp
->inp_options 
== NULL 
&& 
1136                     inp
->in6p_options 
== NULL
 
1138                     && ipsec_optlen 
== 0 
1141                     && (fw_enable 
== 0 || fw_bypass
) 
1153         /* Send one segment or less as a tail loss probe */ 
1154         if (tp
->t_flagsext 
& TF_SENT_TLPROBE
) { 
1155                 len 
= min(len
, tp
->t_maxseg
); 
1161         if ((so
->so_flags 
& SOF_MP_SUBFLOW
) &&  
1162             !(tp
->t_mpflags 
& TMPF_TCP_FALLBACK
)) { 
1164                 if (!(tp
->t_mpflags 
& TMPF_PREESTABLISHED
) && 
1165                     (tp
->t_state 
> TCPS_CLOSED
) && 
1166                     ((tp
->t_mpflags 
& TMPF_SND_MPPRIO
) || 
1167                     (tp
->t_mpflags 
& TMPF_SND_REM_ADDR
) || 
1168                     (tp
->t_mpflags 
& TMPF_SND_MPFAIL
) || 
1169                     (tp
->t_mpflags 
& TMPF_MPCAP_RETRANSMIT
))) { 
1174                         mptcp_acknow 
= TRUE
; 
1176                         mptcp_acknow 
= FALSE
; 
1179                  * The contiguous bytes in the subflow socket buffer can be 
1180                  * discontiguous at the MPTCP level. Since only one DSS  
1181                  * option can be sent in one packet, reduce length to match 
1182                  * the contiguous MPTCP level. Set sendalot to send remainder. 
1185                         newlen 
= mptcp_adj_sendlen(so
, off
, len
); 
1194          * If the socket is capable of doing unordered send, 
1195          * pull the amount of data that can be sent from the 
1196          * unordered priority queues to the serial queue in 
1197          * the socket buffer. If bytes are not yet available 
1198          * in the highest priority message, we may not be able  
1199          * to send any new data.  
1201         if (so
->so_flags 
& SOF_ENABLE_MSGS
) { 
1203                     so
->so_msg_state
->msg_serial_bytes
) { 
1204                         sbpull_unordered_data(so
, off
, len
); 
1206                         /* check if len needs to be modified */ 
1208                             so
->so_msg_state
->msg_serial_bytes
) { 
1209                                 len 
= so
->so_msg_state
->msg_serial_bytes 
- off
; 
1212                                         tcpstat
.tcps_msg_sndwaithipri
++; 
1219                 if (SEQ_LT(p
->rxmit 
+ len
, tp
->snd_una 
+ so
->so_snd
.sb_cc
)) 
1222                 if (SEQ_LT(tp
->snd_nxt 
+ len
, tp
->snd_una 
+ so
->so_snd
.sb_cc
)) 
1226         recwin 
= tcp_sbspace(tp
); 
1229          * Sender silly window avoidance.   We transmit under the following 
1230          * conditions when len is non-zero: 
1232          *      - we've timed out (e.g. persist timer) 
1233          *      - we need to retransmit 
1234          *      - We have a full segment (or more with TSO) 
1235          *      - This is the last buffer in a write()/send() and we are 
1236          *        either idle or running NODELAY 
1237          *      - we have more then 1/2 the maximum send window's worth of 
1238          *        data (receiver may be limited the window size) 
1241                 if (tp
->t_flagsext 
& TF_FORCE
) 
1243                 if (SEQ_LT(tp
->snd_nxt
, tp
->snd_max
)) 
1249                  * Send new data on the connection only if it is 
1250                  * not flow controlled 
1252                 if (!INP_WAIT_FOR_IF_FEEDBACK(inp
) || 
1253                     tp
->t_state 
!= TCPS_ESTABLISHED
) { 
1254                         if (len 
>= tp
->t_maxseg
) 
1256                         if (!(tp
->t_flags 
& TF_MORETOCOME
) && 
1257                             (idle 
|| tp
->t_flags 
& TF_NODELAY 
||  
1258                             tp
->t_flags 
& TF_MAXSEGSNT 
|| 
1259                             ALLOW_LIMITED_TRANSMIT(tp
)) && 
1260                             (tp
->t_flags 
& TF_NOPUSH
) == 0 && 
1261                             len 
+ off 
>= so
->so_snd
.sb_cc
) 
1263                         if (len 
>= tp
->max_sndwnd 
/ 2 && tp
->max_sndwnd 
> 0) 
1266                         tcpstat
.tcps_fcholdpacket
++; 
1271          * Compare available window to amount of window 
1272          * known to peer (as advertised window less 
1273          * next expected input).  If the difference is at least two 
1274          * max size segments, or at least 25% of the maximum possible 
1275          * window, then want to send a window update to peer. 
1276          * Skip this if the connection is in T/TCP half-open state. 
1278         if (recwin 
> 0 && !(tp
->t_flags 
& TF_NEEDSYN
)) { 
1280                  * "adv" is the amount we can increase the window, 
1281                  * taking into account that we are limited by 
1282                  * TCP_MAXWIN << tp->rcv_scale. 
1284                 int32_t adv
, oldwin 
= 0; 
1285                 adv 
= imin(recwin
, (int)TCP_MAXWIN 
<< tp
->rcv_scale
) - 
1286                         (tp
->rcv_adv 
- tp
->rcv_nxt
); 
1288                 if (SEQ_GT(tp
->rcv_adv
, tp
->rcv_nxt
)) 
1289                         oldwin 
= tp
->rcv_adv 
- tp
->rcv_nxt
; 
1291                 if (adv 
>= (int32_t) (2 * tp
->t_maxseg
)) { 
1293                          * Update only if the resulting scaled value of 
1294                          * the window changed, or if there is a change in 
1295                          * the sequence since the last ack. This avoids  
1296                          * what appears as dupe ACKS (see rdar://5640997) 
1298                          * If streaming is detected avoid sending too many 
1299                          * window updates. We will depend on the delack  
1300                          * timer to send a window update when needed. 
1302                         if (!(tp
->t_flags 
& TF_STRETCHACK
) && 
1303                                 (tp
->last_ack_sent 
!= tp
->rcv_nxt 
||  
1304                                 ((oldwin 
+ adv
) >> tp
->rcv_scale
) > 
1305                                 (oldwin 
>> tp
->rcv_scale
))) { 
1310                          * Make sure that the delayed ack timer is set if 
1311                          * we delayed sending a window update because of  
1312                          * streaming detection. 
1314                         if ((tp
->t_flags 
& TF_STRETCHACK
) && 
1315                                 !(tp
->t_flags 
& TF_DELACK
)) {  
1316                                 tp
->t_flags 
|= TF_DELACK
; 
1317                                 tp
->t_timer
[TCPT_DELACK
] =  
1318                                         OFFSET_FROM_START(tp
, tcp_delack
); 
1321                 if (4 * adv 
>= (int32_t) so
->so_rcv
.sb_hiwat
)  
1326          * Send if we owe the peer an ACK, RST, SYN, or urgent data. ACKNOW 
1327          * is also a catch-all for the retransmit timer timeout case. 
1329         if (tp
->t_flags 
& TF_ACKNOW
) 
1331         if ((flags 
& TH_RST
) || 
1332             ((flags 
& TH_SYN
) && (tp
->t_flags 
& TF_NEEDSYN
) == 0)) 
1334         if (SEQ_GT(tp
->snd_up
, tp
->snd_una
)) 
1341          * If our state indicates that FIN should be sent 
1342          * and we have not yet done so, then we need to send. 
1344         if ((flags 
& TH_FIN
) && 
1345             (!(tp
->t_flags 
& TF_SENTFIN
) || tp
->snd_nxt 
== tp
->snd_una
)) 
1348          * In SACK, it is possible for tcp_output to fail to send a segment 
1349          * after the retransmission timer has been turned off.  Make sure 
1350          * that the retransmission timer is set. 
1352         if (SACK_ENABLED(tp
) && (tp
->t_state 
>= TCPS_ESTABLISHED
) &&  
1353             SEQ_GT(tp
->snd_max
, tp
->snd_una
) && 
1354             tp
->t_timer
[TCPT_REXMT
] == 0 && 
1355             tp
->t_timer
[TCPT_PERSIST
] == 0) { 
1356                 tp
->t_timer
[TCPT_REXMT
] = OFFSET_FROM_START(tp
, 
1361          * TCP window updates are not reliable, rather a polling protocol 
1362          * using ``persist'' packets is used to insure receipt of window 
1363          * updates.  The three ``states'' for the output side are: 
1364          *      idle                    not doing retransmits or persists 
1365          *      persisting              to move a small or zero window 
1366          *      (re)transmitting        and thereby not persisting 
1368          * tp->t_timer[TCPT_PERSIST] 
1369          *      is set when we are in persist state. 
1371          *      is set when we are called to send a persist packet. 
1372          * tp->t_timer[TCPT_REXMT] 
1373          *      is set when we are retransmitting 
1374          * The output side is idle when both timers are zero. 
1376          * If send window is too small, there is data to transmit, and no 
1377          * retransmit or persist is pending, then go to persist state. 
1378          * If nothing happens soon, send when timer expires: 
1379          * if window is nonzero, transmit what we can, 
1380          * otherwise force out a byte. 
1382         if (so
->so_snd
.sb_cc 
&& tp
->t_timer
[TCPT_REXMT
] == 0 && 
1383             tp
->t_timer
[TCPT_PERSIST
] == 0) { 
1390          * If there is no reason to send a segment, just return. 
1391          * but if there is some packets left in the packet list, send them now. 
1393         while (inp
->inp_sndinprog_cnt 
== 0 && 
1394                 tp
->t_pktlist_head 
!= NULL
) { 
1395                 packetlist 
= tp
->t_pktlist_head
; 
1396                 packchain_listadd 
= tp
->t_lastchain
; 
1398                 TCP_PKTLIST_CLEAR(tp
); 
1400                 error 
= tcp_ip_output(so
, tp
, packetlist
, 
1402                     tp_inp_options
, (so_options 
& SO_DONTROUTE
), 
1403                     (sack_rxmit 
| (sack_bytes_rxmt 
!= 0)), recwin
, 
1410         /* tcp was closed while we were in ip; resume close */ 
1411         if (inp
->inp_sndinprog_cnt 
== 0 && 
1412                 (tp
->t_flags 
& TF_CLOSING
)) { 
1413                 tp
->t_flags 
&= ~TF_CLOSING
; 
1414                 (void) tcp_close(tp
); 
1416                 tcp_check_timer_state(tp
); 
1418         KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT 
| DBG_FUNC_END
, 0,0,0,0,0); 
1423          * Set TF_MAXSEGSNT flag if the segment size is greater than 
1424          * the max segment size. 
1427                 if (len 
>= tp
->t_maxseg
) 
1428                         tp
->t_flags 
|= TF_MAXSEGSNT
; 
1430                         tp
->t_flags 
&= ~TF_MAXSEGSNT
; 
1433          * Before ESTABLISHED, force sending of initial options 
1434          * unless TCP set not to do any options. 
1435          * NOTE: we assume that the IP/TCP header plus TCP options 
1436          * always fit in a single mbuf, leaving room for a maximum 
1438          *      max_linkhdr + sizeof (struct tcpiphdr) + optlen <= MCLBYTES 
1443                 hdrlen 
= sizeof (struct ip6_hdr
) + sizeof (struct tcphdr
); 
1446                 hdrlen 
= sizeof (struct tcpiphdr
); 
1447         if (flags 
& TH_SYN
) { 
1448                 tp
->snd_nxt 
= tp
->iss
; 
1449                 if ((tp
->t_flags 
& TF_NOOPT
) == 0) { 
1452                         opt
[0] = TCPOPT_MAXSEG
; 
1453                         opt
[1] = TCPOLEN_MAXSEG
; 
1454                         mss 
= htons((u_short
) tcp_mssopt(tp
)); 
1455                         (void)memcpy(opt 
+ 2, &mss
, sizeof(mss
)); 
1456                         optlen 
= TCPOLEN_MAXSEG
; 
1458                         if ((tp
->t_flags 
& TF_REQ_SCALE
) && 
1459                             ((flags 
& TH_ACK
) == 0 || 
1460                             (tp
->t_flags 
& TF_RCVD_SCALE
))) { 
1461                                 *((u_int32_t 
*)(void *)(opt 
+ optlen
)) = htonl( 
1463                                         TCPOPT_WINDOW 
<< 16 | 
1464                                         TCPOLEN_WINDOW 
<< 8 | 
1465                                         tp
->request_r_scale
); 
1470                                 optlen 
= mptcp_setup_syn_opts(so
, flags
, opt
, 
1478          * Send a timestamp and echo-reply if this is a SYN and our side 
1479          * wants to use timestamps (TF_REQ_TSTMP is set) or both our side 
1480          * and our peer have sent timestamps in our SYN's. 
1482         if ((tp
->t_flags 
& (TF_REQ_TSTMP
|TF_NOOPT
)) == TF_REQ_TSTMP 
&& 
1483             (flags 
& TH_RST
) == 0 && 
1484             ((flags 
& TH_ACK
) == 0 || 
1485              (tp
->t_flags 
& TF_RCVD_TSTMP
))) { 
1486                 u_int32_t 
*lp 
= (u_int32_t 
*)(void *)(opt 
+ optlen
); 
1488                 /* Form timestamp option as shown in appendix A of RFC 1323. */ 
1489                 *lp
++ = htonl(TCPOPT_TSTAMP_HDR
); 
1490                 *lp
++ = htonl(tcp_now
); 
1491                 *lp   
= htonl(tp
->ts_recent
); 
1492                 optlen 
+= TCPOLEN_TSTAMP_APPA
; 
1495         /* Note the timestamp for receive buffer autosizing */ 
1496         if (tp
->rfbuf_ts 
== 0 && (so
->so_rcv
.sb_flags 
& SB_AUTOSIZE
)) 
1497                 tp
->rfbuf_ts 
= tcp_now
; 
1499         if (SACK_ENABLED(tp
) && ((tp
->t_flags 
& TF_NOOPT
) == 0)) { 
1501                  * Tack on the SACK permitted option *last*. 
1502                  * And do padding of options after tacking this on. 
1503                  * This is because of MSS, TS, WinScale and Signatures are 
1504                  * all present, we have just 2 bytes left for the SACK 
1505                  * permitted option, which is just enough. 
1508                  * If this is the first SYN of connection (not a SYN 
1509                  * ACK), include SACK permitted option.  If this is a 
1510                  * SYN ACK, include SACK permitted option if peer has 
1511                  * already done so. This is only for active connect, 
1512                  * since the syncache takes care of the passive connect. 
1514                 if ((flags 
& TH_SYN
) && 
1515                     (!(flags 
& TH_ACK
) || (tp
->t_flags 
& TF_SACK_PERMIT
))) { 
1517                         bp 
= (u_char 
*)opt 
+ optlen
; 
1519                         *bp
++ = TCPOPT_SACK_PERMITTED
; 
1520                         *bp
++ = TCPOLEN_SACK_PERMITTED
; 
1521                         optlen 
+= TCPOLEN_SACK_PERMITTED
; 
1525         if (so
->so_flags 
& SOF_MP_SUBFLOW
) { 
1527                  * Its important to piggyback acks with data as ack only packets 
1528                  * may get lost and data packets that don't send Data ACKs 
1529                  * still advance the subflow level ACK and therefore make it 
1530                  * hard for the remote end to recover in low cwnd situations. 
1533                         tp
->t_mpflags 
|= (TMPF_SEND_DSN 
| 
1536                         tp
->t_mpflags 
|= TMPF_MPTCP_ACKNOW
; 
1538                 optlen 
= mptcp_setup_opts(tp
, off
, &opt
[0], optlen
, flags
, 
1539                     len
, &dlenp
, &finp
, &dss_val
, &sseqp
, &mptcp_acknow
); 
1540                 tp
->t_mpflags 
&= ~TMPF_SEND_DSN
; 
1544         if (tfo_enabled(tp
) && !(tp
->t_flags 
& TF_NOOPT
) && 
1545             (flags 
& (TH_SYN 
| TH_ACK
)) == TH_SYN
) 
1546                 optlen 
+= tcp_tfo_write_cookie(tp
, optlen
, &len
, opt
); 
1548         if (tfo_enabled(tp
) && 
1549             (flags 
& (TH_SYN 
| TH_ACK
)) == (TH_SYN 
| TH_ACK
) && 
1550             (tp
->t_tfo_flags 
& TFO_F_OFFER_COOKIE
)) 
1551                 optlen 
+= tcp_tfo_write_cookie_rep(tp
, optlen
, opt
); 
1553         if (SACK_ENABLED(tp
) && ((tp
->t_flags 
& TF_NOOPT
) == 0)) { 
1555                  * Send SACKs if necessary.  This should be the last 
1556                  * option processed.  Only as many SACKs are sent as 
1557                  * are permitted by the maximum options size. 
1559                  * In general, SACK blocks consume 8*n+2 bytes. 
1560                  * So a full size SACK blocks option is 34 bytes 
1561                  * (to generate 4 SACK blocks).  At a minimum, 
1562                  * we need 10 bytes (to generate 1 SACK block). 
1563                  * If TCP Timestamps (12 bytes) and TCP Signatures 
1564                  * (18 bytes) are both present, we'll just have 
1565                  * 10 bytes for SACK options 40 - (12 + 18). 
1567                 if (TCPS_HAVEESTABLISHED(tp
->t_state
) && 
1568                     (tp
->t_flags 
& TF_SACK_PERMIT
) && 
1569                     (tp
->rcv_numsacks 
> 0 || TCP_SEND_DSACK_OPT(tp
)) && 
1570                     MAX_TCPOPTLEN 
- optlen 
- 2 >= TCPOLEN_SACK
) { 
1572                         u_char 
*bp 
= (u_char 
*)opt 
+ optlen
; 
1575                         nsack 
= (MAX_TCPOPTLEN 
- optlen 
- 2) / TCPOLEN_SACK
; 
1576                         nsack 
= min(nsack
, (tp
->rcv_numsacks 
+ 
1577                             (TCP_SEND_DSACK_OPT(tp
) ? 1 : 0))); 
1578                         sackoptlen 
= (2 + nsack 
* TCPOLEN_SACK
); 
1581                          * First we need to pad options so that the 
1582                          * SACK blocks can start at a 4-byte boundary 
1583                          * (sack option and length are at a 2 byte offset). 
1585                         padlen 
= (MAX_TCPOPTLEN 
- optlen 
- sackoptlen
) % 4; 
1587                         while (padlen
-- > 0) 
1590                         tcpstat
.tcps_sack_send_blocks
++; 
1591                         *bp
++ = TCPOPT_SACK
; 
1593                         lp 
= (u_int32_t 
*)(void *)bp
; 
1596                          * First block of SACK option should represent 
1597                          * DSACK. Prefer to send SACK information if there 
1598                          * is space for only one SACK block. This will 
1599                          * allow for faster recovery. 
1601                         if (TCP_SEND_DSACK_OPT(tp
) && nsack 
> 0 && 
1602                             (tp
->rcv_numsacks 
== 0 || nsack 
> 1)) { 
1603                                 *lp
++ = htonl(tp
->t_dsack_lseq
); 
1604                                 *lp
++ = htonl(tp
->t_dsack_rseq
); 
1605                                 tcpstat
.tcps_dsack_sent
++; 
1609                         VERIFY(nsack 
== 0 || tp
->rcv_numsacks 
>= nsack
); 
1610                         for (i 
= 0; i 
< nsack
; i
++) { 
1611                                 struct sackblk sack 
= tp
->sackblks
[i
]; 
1612                                 *lp
++ = htonl(sack
.start
); 
1613                                 *lp
++ = htonl(sack
.end
); 
1615                         optlen 
+= sackoptlen
; 
1619         /* Pad TCP options to a 4 byte boundary */ 
1620         if (optlen 
< MAX_TCPOPTLEN 
&& (optlen 
% sizeof(u_int32_t
))) { 
1621                 int pad 
= sizeof(u_int32_t
) - (optlen 
% sizeof(u_int32_t
)); 
1622                 u_char 
*bp 
= (u_char 
*)opt 
+ optlen
; 
1632          * RFC 3168 states that: 
1633          * - If you ever sent an ECN-setup SYN/SYN-ACK you must be prepared 
1634          * to handle the TCP ECE flag, even if you also later send a 
1635          * non-ECN-setup SYN/SYN-ACK. 
1636          * - If you ever send a non-ECN-setup SYN/SYN-ACK, you must not set 
1639          * It is not clear how the ECE flag would ever be set if you never 
1640          * set the IP ECT flag on outbound packets. All the same, we use 
1641          * the TE_SETUPSENT to indicate that we have committed to handling 
1642          * the TCP ECE flag correctly. We use the TE_SENDIPECT to indicate 
1643          * whether or not we should set the IP ECT flag on outbound packet 
1645          * For a SYN-ACK, send an ECN setup SYN-ACK 
1647         if ((flags 
& (TH_SYN 
| TH_ACK
)) == (TH_SYN 
| TH_ACK
) && 
1648             (tp
->ecn_flags 
& TE_ENABLE_ECN
)) { 
1649                 if (tp
->ecn_flags 
& TE_SETUPRECEIVED
) { 
1650                         if (tcp_send_ecn_flags_on_syn(tp
, so
)) { 
1652                                  * Setting TH_ECE makes this an ECN-setup 
1658                                  * Record that we sent the ECN-setup and 
1659                                  * default to setting IP ECT. 
1661                                 tp
->ecn_flags 
|= (TE_SETUPSENT
|TE_SENDIPECT
); 
1662                                 tcpstat
.tcps_ecn_server_setup
++; 
1663                                 tcpstat
.tcps_ecn_server_success
++; 
1666                                  * We sent an ECN-setup SYN-ACK but it was 
1667                                  * dropped. Fallback to non-ECN-setup 
1668                                  * SYN-ACK and clear flag to indicate that 
1669                                  * we should not send data with IP ECT set 
1671                                  * Pretend we didn't receive an 
1674                                  * We already incremented the counter 
1675                                  * assuming that the ECN setup will 
1676                                  * succeed. Decrementing here 
1677                                  * tcps_ecn_server_success to correct it. 
1679                                 if (tp
->ecn_flags 
& TE_SETUPSENT
) { 
1680                                         tcpstat
.tcps_ecn_lost_synack
++; 
1681                                         tcpstat
.tcps_ecn_server_success
--; 
1682                                         tp
->ecn_flags 
|= TE_LOST_SYNACK
; 
1686                                     ~(TE_SETUPRECEIVED 
| TE_SENDIPECT 
| 
1690         } else if ((flags 
& (TH_SYN 
| TH_ACK
)) == TH_SYN 
&& 
1691             (tp
->ecn_flags 
& TE_ENABLE_ECN
)) { 
1692                 if (tcp_send_ecn_flags_on_syn(tp
, so
)) { 
1694                          * Setting TH_ECE and TH_CWR makes this an 
1697                         flags 
|= (TH_ECE 
| TH_CWR
); 
1698                         tcpstat
.tcps_ecn_client_setup
++; 
1699                         tp
->ecn_flags 
|= TE_CLIENT_SETUP
; 
1702                          * Record that we sent the ECN-setup and default to 
1705                         tp
->ecn_flags 
|= (TE_SETUPSENT 
| TE_SENDIPECT
); 
1708                          * We sent an ECN-setup SYN but it was dropped. 
1709                          * Fall back to non-ECN and clear flag indicating 
1710                          * we should send data with IP ECT set. 
1712                         if (tp
->ecn_flags 
& TE_SETUPSENT
) { 
1713                                 tcpstat
.tcps_ecn_lost_syn
++; 
1714                                 tp
->ecn_flags 
|= TE_LOST_SYN
; 
1716                         tp
->ecn_flags 
&= ~TE_SENDIPECT
; 
1721          * Check if we should set the TCP CWR flag. 
1722          * CWR flag is sent when we reduced the congestion window because 
1723          * we received a TCP ECE or we performed a fast retransmit. We 
1724          * never set the CWR flag on retransmitted packets. We only set 
1725          * the CWR flag on data packets. Pure acks don't have this set. 
1727         if ((tp
->ecn_flags 
& TE_SENDCWR
) != 0 && len 
!= 0 && 
1728             !SEQ_LT(tp
->snd_nxt
, tp
->snd_max
) && !sack_rxmit
) { 
1730                 tp
->ecn_flags 
&= ~TE_SENDCWR
; 
1734          * Check if we should set the TCP ECE flag. 
1736         if ((tp
->ecn_flags 
& TE_SENDECE
) != 0 && len 
== 0) { 
1738                 tcpstat
.tcps_ecn_sent_ece
++; 
1744         /* Reset DSACK sequence numbers */ 
1745         tp
->t_dsack_lseq 
= 0; 
1746         tp
->t_dsack_rseq 
= 0; 
1750                 ipoptlen 
= ip6_optlen(inp
); 
1754                 if (tp_inp_options
) { 
1755                         ipoptlen 
= tp_inp_options
->m_len 
- 
1756                                 offsetof(struct ipoption
, ipopt_list
); 
1762                 ipoptlen 
+= ipsec_optlen
; 
1766          * Adjust data length if insertion of options will 
1767          * bump the packet length beyond the t_maxopd length. 
1768          * Clear the FIN bit because we cut off the tail of 
1771          * When doing TSO limit a burst to TCP_MAXWIN minus the 
1772          * IP, TCP and Options length to keep ip->ip_len from 
1773          * overflowing.  Prevent the last segment from being 
1774          * fractional thus making them all equal sized and set 
1775          * the flag to continue sending.  TSO is disabled when 
1776          * IP options or IPSEC are present. 
1778         if (len 
+ optlen 
+ ipoptlen 
> tp
->t_maxopd
) { 
1780                  * If there is still more to send, 
1781                  * don't close the connection. 
1787                         tso_maxlen 
= tp
->tso_max_segment_size 
? 
1788                                 tp
->tso_max_segment_size 
: TCP_MAXWIN
; 
1790                         if (len 
> tso_maxlen 
- hdrlen 
- optlen
) { 
1791                                 len 
= tso_maxlen 
- hdrlen 
- optlen
; 
1792                                 len 
= len 
- (len 
% (tp
->t_maxopd 
- optlen
)); 
1794                         } else if (tp
->t_flags 
& TF_NEEDFIN
) { 
1798                         len 
= tp
->t_maxopd 
- optlen 
- ipoptlen
; 
1803         /* Adjust the length in the DSS option, if it is lesser than len */ 
1806                  * To test this path without SACK, artificially 
1807                  * decrement len with something like 
1811                 if (ntohs(*dlenp
) > len
) { 
1812                         *dlenp 
= htons(len
); 
1813                         /* Unset the FIN flag, if len was adjusted */ 
1822         if (max_linkhdr 
+ hdrlen 
> MCLBYTES
) 
1823                 panic("tcphdr too big"); 
1825         /* Check if there is enough data in the send socket 
1826          * buffer to start measuring bw  
1828         if ((tp
->t_flagsext 
& TF_MEASURESNDBW
) != 0 && 
1829                 (tp
->t_bwmeas 
!= NULL
) && 
1830                 (tp
->t_flagsext 
& TF_BWMEAS_INPROGRESS
) == 0 && 
1831                 (so
->so_snd
.sb_cc 
- (tp
->snd_max 
- tp
->snd_una
)) >=  
1832                         tp
->t_bwmeas
->bw_minsize
) { 
1833                 tp
->t_bwmeas
->bw_size 
= min( 
1834                         (so
->so_snd
.sb_cc 
- (tp
->snd_max 
- tp
->snd_una
)), 
1835                         tp
->t_bwmeas
->bw_maxsize
); 
1836                 tp
->t_flagsext 
|= TF_BWMEAS_INPROGRESS
; 
1837                 tp
->t_bwmeas
->bw_start 
= tp
->snd_max
; 
1838                 tp
->t_bwmeas
->bw_ts 
= tcp_now
; 
1841         VERIFY(inp
->inp_flowhash 
!= 0); 
1843          * Grab a header mbuf, attaching a copy of data to 
1844          * be transmitted, and initialize the header from 
1845          * the template for sends on this connection. 
1848                 tp
->t_pmtud_lastseg_size 
= len 
+ optlen 
+ ipoptlen
; 
1849                 if ((tp
->t_flagsext 
& TF_FORCE
) && len 
== 1) 
1850                         tcpstat
.tcps_sndprobe
++; 
1851                 else if (SEQ_LT(tp
->snd_nxt
, tp
->snd_max
) || sack_rxmit
) { 
1852                         tcpstat
.tcps_sndrexmitpack
++; 
1853                         tcpstat
.tcps_sndrexmitbyte 
+= len
; 
1854                         if (nstat_collect
) { 
1855                                 nstat_route_tx(inp
->inp_route
.ro_rt
, 1, 
1856                                         len
, NSTAT_TX_FLAG_RETRANSMIT
); 
1857                                 INP_ADD_STAT(inp
, cell
, wifi
, wired
, 
1859                                 INP_ADD_STAT(inp
, cell
, wifi
, wired
, 
1861                                 tp
->t_stat
.txretransmitbytes 
+= len
; 
1864                         tcpstat
.tcps_sndpack
++; 
1865                         tcpstat
.tcps_sndbyte 
+= len
; 
1867                         if (nstat_collect
) { 
1868                                 INP_ADD_STAT(inp
, cell
, wifi
, wired
, 
1870                                 INP_ADD_STAT(inp
, cell
, wifi
, wired
, 
1875                 if (tp
->t_mpflags 
& TMPF_MPTCP_TRUE
) { 
1876                         tcpstat
.tcps_mp_sndpacks
++; 
1877                         tcpstat
.tcps_mp_sndbytes 
+= len
; 
1881                  * try to use the new interface that allocates all  
1882                  * the necessary mbuf hdrs under 1 mbuf lock and  
1883                  * avoids rescanning the socket mbuf list if  
1884                  * certain conditions are met.  This routine can't 
1885                  * be used in the following cases... 
1886                  * 1) the protocol headers exceed the capacity of 
1887                  * of a single mbuf header's data area (no cluster attached) 
1888                  * 2) the length of the data being transmitted plus 
1889                  * the protocol headers fits into a single mbuf header's 
1890                  * data area (no cluster attached) 
1894                 /* minimum length we are going to allocate */ 
1895                 allocated_len 
= MHLEN
; 
1896                 if (MHLEN 
< hdrlen 
+ max_linkhdr
) { 
1897                         MGETHDR(m
, M_DONTWAIT
, MT_HEADER
); 
1902                         MCLGET(m
, M_DONTWAIT
); 
1903                         if ((m
->m_flags 
& M_EXT
) == 0) { 
1908                         m
->m_data 
+= max_linkhdr
; 
1910                         allocated_len 
= MCLBYTES
; 
1912                 if (len 
<= allocated_len 
- hdrlen 
- max_linkhdr
) { 
1914                                 VERIFY(allocated_len 
<= MHLEN
); 
1915                                 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
); 
1920                                 m
->m_data 
+= max_linkhdr
; 
1923                         /* makes sure we still have data left to be sent at this point */ 
1924                         if (so
->so_snd
.sb_mb 
== NULL 
|| off 
< 0) { 
1925                                 if (m 
!= NULL
)  m_freem(m
); 
1926                                 error 
= 0; /* should we return an error? */ 
1929                         m_copydata(so
->so_snd
.sb_mb
, off
, (int) len
, 
1930                             mtod(m
, caddr_t
) + hdrlen
); 
1935                          * Retain packet header metadata at the socket 
1936                          * buffer if this is is an MPTCP subflow, 
1937                          * otherwise move it. 
1939                         copymode 
= M_COPYM_MOVE_HDR
; 
1941                         if (so
->so_flags 
& SOF_MP_SUBFLOW
) { 
1942                                 copymode 
= M_COPYM_NOOP_HDR
; 
1946                                 m
->m_next 
= m_copym_mode(so
->so_snd
.sb_mb
, 
1947                                     off
, (int)len
, M_DONTWAIT
, copymode
); 
1948                                 if (m
->m_next 
== NULL
) { 
1955                                  * make sure we still have data left 
1956                                  * to be sent at this point 
1958                                 if (so
->so_snd
.sb_mb 
== NULL
) { 
1959                                         error 
= 0; /* should we return an error? */ 
1964                                  * m_copym_with_hdrs will always return the 
1965                                  * last mbuf pointer and the offset into it that 
1966                                  * it acted on to fullfill the current request, 
1967                                  * whether a valid 'hint' was passed in or not. 
1969                                 if ((m 
= m_copym_with_hdrs(so
->so_snd
.sb_mb
, 
1970                                     off
, len
, M_DONTWAIT
, NULL
, NULL
, 
1971                                     copymode
)) == NULL
) { 
1975                                 m
->m_data 
+= max_linkhdr
; 
1980                  * If we're sending everything we've got, set PUSH. 
1981                  * (This will keep happy those implementations which only 
1982                  * give data to the user when a buffer fills or 
1985                  * On SYN-segments we should not add the PUSH-flag. 
1987                 if (off 
+ len 
== so
->so_snd
.sb_cc 
&& !(flags 
& TH_SYN
)) 
1990                 if (tp
->t_flags 
& TF_ACKNOW
) 
1991                         tcpstat
.tcps_sndacks
++; 
1992                 else if (flags 
& (TH_SYN
|TH_FIN
|TH_RST
)) 
1993                         tcpstat
.tcps_sndctrl
++; 
1994                 else if (SEQ_GT(tp
->snd_up
, tp
->snd_una
)) 
1995                         tcpstat
.tcps_sndurg
++; 
1997                         tcpstat
.tcps_sndwinup
++; 
1999                 MGETHDR(m
, M_DONTWAIT
, MT_HEADER
);      /* MAC-OK */ 
2004                 if (MHLEN 
< (hdrlen 
+ max_linkhdr
)) { 
2005                         MCLGET(m
, M_DONTWAIT
); 
2006                         if ((m
->m_flags 
& M_EXT
) == 0) { 
2012                 m
->m_data 
+= max_linkhdr
; 
2015         m
->m_pkthdr
.rcvif 
= 0; 
2017         /* Before opt is copied to the mbuf, set the csum field */ 
2018         mptcp_output_csum(tp
, m
, len
, hdrlen
, dss_val
, sseqp
); 
2021         mac_mbuf_label_associate_inpcb(inp
, m
); 
2025                 ip6 
= mtod(m
, struct ip6_hdr 
*); 
2026                 th 
= (struct tcphdr 
*)(void *)(ip6 
+ 1); 
2027                 tcp_fillheaders(tp
, ip6
, th
); 
2028                 if ((tp
->ecn_flags 
& TE_SENDIPECT
) != 0 && len 
&& 
2029                         !SEQ_LT(tp
->snd_nxt
, tp
->snd_max
) && !sack_rxmit
) { 
2030                         ip6
->ip6_flow 
|= htonl(IPTOS_ECN_ECT0 
<< 20); 
2032                 svc_flags 
|= PKT_SCF_IPV6
; 
2034                 m
->m_pkthdr
.pf_mtag
.pftag_hdr 
= (void *)ip6
; 
2035                 m
->m_pkthdr
.pf_mtag
.pftag_flags 
|= PF_TAG_HDR_INET6
; 
2040                 ip 
= mtod(m
, struct ip 
*); 
2041                 ipov 
= (struct ipovly 
*)ip
; 
2042                 th 
= (struct tcphdr 
*)(void *)(ip 
+ 1); 
2043                 /* this picks up the pseudo header (w/o the length) */ 
2044                 tcp_fillheaders(tp
, ip
, th
); 
2045                 if ((tp
->ecn_flags 
& TE_SENDIPECT
) != 0 && len 
&& 
2046                     !SEQ_LT(tp
->snd_nxt
, tp
->snd_max
) && 
2047                     !sack_rxmit 
&& !(flags 
& TH_SYN
)) { 
2048                         ip
->ip_tos 
|= IPTOS_ECN_ECT0
; 
2051                 m
->m_pkthdr
.pf_mtag
.pftag_hdr 
= (void *)ip
; 
2052                 m
->m_pkthdr
.pf_mtag
.pftag_flags 
|= PF_TAG_HDR_INET
; 
2057          * Fill in fields, remembering maximum advertised 
2058          * window for use in delaying messages about window sizes. 
2059          * If resending a FIN, be sure not to use a new sequence number. 
2061         if ((flags 
& TH_FIN
) && (tp
->t_flags 
& TF_SENTFIN
) && 
2062             tp
->snd_nxt 
== tp
->snd_max
) 
2065          * If we are doing retransmissions, then snd_nxt will 
2066          * not reflect the first unsent octet.  For ACK only 
2067          * packets, we do not want the sequence number of the 
2068          * retransmitted packet, we want the sequence number 
2069          * of the next unsent octet.  So, if there is no data 
2070          * (and no SYN or FIN), use snd_max instead of snd_nxt 
2071          * when filling in ti_seq.  But if we are in persist 
2072          * state, snd_max might reflect one byte beyond the 
2073          * right edge of the window, so use snd_nxt in that 
2074          * case, since we know we aren't doing a retransmission. 
2075          * (retransmit and persist are mutually exclusive...) 
2077          * Note the state of this retransmit segment to detect spurious 
2080         if (sack_rxmit 
== 0) { 
2081                 if (len 
|| (flags 
& (TH_SYN
|TH_FIN
)) || 
2082                     tp
->t_timer
[TCPT_PERSIST
]) { 
2083                         th
->th_seq 
= htonl(tp
->snd_nxt
); 
2084                         if (SEQ_LT(tp
->snd_nxt
, tp
->snd_max
)) { 
2085                                 if (SACK_ENABLED(tp
) && len 
> 1) { 
2086                                         tcp_rxtseg_insert(tp
, tp
->snd_nxt
, 
2087                                             (tp
->snd_nxt 
+ len 
- 1)); 
2090                                         m
->m_pkthdr
.pkt_flags 
|= 
2094                         th
->th_seq 
= htonl(tp
->snd_max
); 
2097                 th
->th_seq 
= htonl(p
->rxmit
); 
2098                 tcp_rxtseg_insert(tp
, p
->rxmit
, (p
->rxmit 
+ len 
- 1)); 
2100                 tp
->sackhint
.sack_bytes_rexmit 
+= len
; 
2102                         m
->m_pkthdr
.pkt_flags 
|= PKTF_TCP_REXMT
; 
2104         th
->th_ack 
= htonl(tp
->rcv_nxt
); 
2105         tp
->last_ack_sent 
= tp
->rcv_nxt
; 
2107         /* Initialize the ACK field to a value as 0 ack fields are dropped */ 
2108         if (early_data_sent
) { 
2109                 th
->th_ack 
= th
->th_seq 
+ 1; 
2113                 bcopy(opt
, th 
+ 1, optlen
); 
2114                 th
->th_off 
= (sizeof (struct tcphdr
) + optlen
) >> 2; 
2116         th
->th_flags 
= flags
; 
2118          * Calculate receive window.  Don't shrink window, 
2119          * but avoid silly window syndrome. 
2121         if (recwin 
< (int32_t)(so
->so_rcv
.sb_hiwat 
/ 4) && recwin 
< (int)tp
->t_maxseg
) 
2123         if (recwin 
< (int32_t)(tp
->rcv_adv 
- tp
->rcv_nxt
)) 
2124                 recwin 
= (int32_t)(tp
->rcv_adv 
- tp
->rcv_nxt
); 
2125         if (tp
->t_flags 
& TF_SLOWLINK 
&& slowlink_wsize 
> 0) { 
2126                 if (recwin 
> (int32_t)slowlink_wsize
)  
2127                         recwin 
= slowlink_wsize
; 
2131         if (tcp_recv_bg 
== 1  || IS_TCP_RECV_BG(so
)) { 
2132                 if (tcp_recv_throttle(tp
)) { 
2133                         uint32_t min_iaj_win 
=  
2134                                 tcp_min_iaj_win 
* tp
->t_maxseg
; 
2135                         if (tp
->iaj_rwintop 
== 0 || 
2136                                 SEQ_LT(tp
->iaj_rwintop
, tp
->rcv_adv
)) 
2137                                 tp
->iaj_rwintop 
= tp
->rcv_adv
;  
2138                         if (SEQ_LT(tp
->iaj_rwintop
,  
2139                                 tp
->rcv_nxt 
+ min_iaj_win
)) 
2140                                 tp
->iaj_rwintop 
=  tp
->rcv_nxt 
+ min_iaj_win
; 
2141                         recwin 
= min(tp
->iaj_rwintop 
- tp
->rcv_nxt
, recwin
); 
2144 #endif /* TRAFFIC_MGT */ 
2146         if (recwin 
> (int32_t)(TCP_MAXWIN 
<< tp
->rcv_scale
)) 
2147                 recwin 
= (int32_t)(TCP_MAXWIN 
<< tp
->rcv_scale
); 
2148         th
->th_win 
= htons((u_short
) (recwin
>>tp
->rcv_scale
)); 
2151          * Adjust the RXWIN0SENT flag - indicate that we have advertised 
2152          * a 0 window.  This may cause the remote transmitter to stall.  This 
2153          * flag tells soreceive() to disable delayed acknowledgements when 
2154          * draining the buffer.  This can occur if the receiver is attempting 
2155          * to read more data then can be buffered prior to transmitting on 
2158         if (th
->th_win 
== 0) 
2159                 tp
->t_flags 
|= TF_RXWIN0SENT
; 
2161                 tp
->t_flags 
&= ~TF_RXWIN0SENT
; 
2162         if (SEQ_GT(tp
->snd_up
, tp
->snd_nxt
)) { 
2163                 th
->th_urp 
= htons((u_short
)(tp
->snd_up 
- tp
->snd_nxt
)); 
2164                 th
->th_flags 
|= TH_URG
; 
2167                  * If no urgent pointer to send, then we pull 
2168                  * the urgent pointer to the left edge of the send window 
2169                  * so that it doesn't drift into the send window on sequence 
2170                  * number wraparound. 
2172                 tp
->snd_up 
= tp
->snd_una
;               /* drag it along */ 
2176          * Put TCP length in extended header, and then 
2177          * checksum extended header and data. 
2179         m
->m_pkthdr
.len 
= hdrlen 
+ len
; /* in6_cksum() need this */ 
2183                  * ip6_plen is not need to be filled now, and will be filled 
2186                 m
->m_pkthdr
.csum_flags 
= CSUM_TCPIPV6
; 
2187                 m
->m_pkthdr
.csum_data 
= offsetof(struct tcphdr
, th_sum
); 
2189                         th
->th_sum 
= in_addword(th
->th_sum
,  
2190                                 htons((u_short
)(optlen 
+ len
))); 
2195                 m
->m_pkthdr
.csum_flags 
= CSUM_TCP
; 
2196                 m
->m_pkthdr
.csum_data 
= offsetof(struct tcphdr
, th_sum
); 
2198                         th
->th_sum 
= in_addword(th
->th_sum
,  
2199                                 htons((u_short
)(optlen 
+ len
))); 
2203          * Enable TSO and specify the size of the segments. 
2204          * The TCP pseudo header checksum is always provided. 
2209                         m
->m_pkthdr
.csum_flags 
|= CSUM_TSO_IPV6
; 
2212                         m
->m_pkthdr
.csum_flags 
|= CSUM_TSO_IPV4
; 
2214                 m
->m_pkthdr
.tso_segsz 
= tp
->t_maxopd 
- optlen
; 
2216                 m
->m_pkthdr
.tso_segsz 
= 0; 
2220          * In transmit state, time the transmission and arrange for 
2221          * the retransmit.  In persist state, just set snd_max. 
2223         if (!(tp
->t_flagsext 
& TF_FORCE
) 
2224             || tp
->t_timer
[TCPT_PERSIST
] == 0) { 
2225                 tcp_seq startseq 
= tp
->snd_nxt
; 
2228                  * Advance snd_nxt over sequence space of this segment. 
2230                 if (flags 
& (TH_SYN
|TH_FIN
)) { 
2233                         if ((flags 
& TH_FIN
) &&  
2234                                 !(tp
->t_flags 
& TF_SENTFIN
)) { 
2236                                 tp
->t_flags 
|= TF_SENTFIN
; 
2241                 if (sack_rescue_rxt 
== TRUE
) { 
2242                         tp
->snd_nxt 
= old_snd_nxt
; 
2243                         sack_rescue_rxt 
= FALSE
; 
2244                         tcpstat
.tcps_pto_in_recovery
++; 
2248                 if (SEQ_GT(tp
->snd_nxt
, tp
->snd_max
)) { 
2249                         tp
->snd_max 
= tp
->snd_nxt
; 
2251                          * Time this transmission if not a retransmission and 
2252                          * not currently timing anything. 
2254                         if (tp
->t_rtttime 
== 0) { 
2255                                 tp
->t_rtttime 
= tcp_now
; 
2256                                 tp
->t_rtseq 
= startseq
; 
2257                                 tcpstat
.tcps_segstimed
++; 
2259                                 /* update variables related to pipe ack */ 
2260                                 tp
->t_pipeack_lastuna 
= tp
->snd_una
; 
2265                  * Set retransmit timer if not currently set, 
2266                  * and not doing an ack or a keep-alive probe. 
2269                 if (tp
->t_timer
[TCPT_REXMT
] == 0 && 
2270                     ((sack_rxmit 
&& tp
->snd_nxt 
!= tp
->snd_max
) || 
2271                         tp
->snd_nxt 
!= tp
->snd_una 
|| (flags 
& TH_FIN
))) { 
2272                         if (tp
->t_timer
[TCPT_PERSIST
]) { 
2273                                 tp
->t_timer
[TCPT_PERSIST
] = 0; 
2276                                 tp
->t_persist_stop 
= 0; 
2278                         tp
->t_timer
[TCPT_REXMT
] = 
2279                                 OFFSET_FROM_START(tp
, tp
->t_rxtcur
); 
2283                  * Set tail loss probe timeout if new data is being 
2284                  * transmitted. This will be supported only when 
2285                  * SACK option is enabled on a connection. 
2287                  * Every time new data is sent PTO will get reset. 
2289                 if (tcp_enable_tlp 
&& tp
->t_state 
== TCPS_ESTABLISHED 
&& 
2290                     SACK_ENABLED(tp
) && !IN_FASTRECOVERY(tp
) 
2291                     && tp
->snd_nxt 
== tp
->snd_max
 
2292                     && SEQ_GT(tp
->snd_nxt
, tp
->snd_una
) 
2293                     && tp
->t_rxtshift 
== 0 
2294                     && (tp
->t_flagsext 
& (TF_SENT_TLPROBE
|TF_PKTS_REORDERED
)) == 0) { 
2295                         u_int32_t pto
, srtt
, new_rto 
= 0; 
2298                          * Using SRTT alone to set PTO can cause spurious 
2299                          * retransmissions on wireless networks where there 
2300                          * is a lot of variance in RTT. Taking variance  
2301                          * into account will avoid this. 
2303                         srtt 
= tp
->t_srtt 
>> TCP_RTT_SHIFT
; 
2304                         pto 
= ((TCP_REXMTVAL(tp
)) * 3) >> 1; 
2305                         pto 
= max (2 * srtt
, pto
); 
2306                         if ((tp
->snd_max 
- tp
->snd_una
) == tp
->t_maxseg
) 
2308                                     (((3 * pto
) >> 2) + tcp_delack 
* 2)); 
2312                         /* if RTO is less than PTO, choose RTO instead */ 
2313                         if (tp
->t_rxtcur 
< pto
) { 
2315                                  * Schedule PTO instead of RTO in favor of 
2320                                 /* Reset the next RTO to be after PTO. */ 
2321                                 TCPT_RANGESET(new_rto
, 
2322                                     (pto 
+ TCP_REXMTVAL(tp
)), 
2323                                     max(tp
->t_rttmin
, tp
->t_rttcur 
+ 2), 
2325                                 tp
->t_timer
[TCPT_REXMT
] = 
2326                                     OFFSET_FROM_START(tp
, new_rto
); 
2328                         tp
->t_timer
[TCPT_PTO
] = OFFSET_FROM_START(tp
, pto
); 
2332                  * Persist case, update snd_max but since we are in 
2333                  * persist mode (no window) we do not update snd_nxt. 
2338                 if ((flags 
& TH_FIN
) &&  
2339                         !(tp
->t_flags 
& TF_SENTFIN
)) { 
2341                         tp
->t_flags 
|= TF_SENTFIN
; 
2343                 if (SEQ_GT(tp
->snd_nxt 
+ xlen
, tp
->snd_max
)) 
2344                         tp
->snd_max 
= tp
->snd_nxt 
+ len
; 
2351         if (so_options 
& SO_DEBUG
) 
2352                 tcp_trace(TA_OUTPUT
, tp
->t_state
, tp
, mtod(m
, void *), th
, 0); 
2356          * Fill in IP length and desired time to live and 
2357          * send to IP level.  There should be a better way 
2358          * to handle ttl and tos; we could keep them in 
2359          * the template, but need a way to checksum without them. 
2363          * m->m_pkthdr.len should have been set before cksum calcuration, 
2364          * because in6_cksum() need it. 
2368                  * we separately set hoplimit for every segment, since the 
2369                  * user might want to change the value via setsockopt. 
2370                  * Also, desired default hop limit might be changed via 
2371                  * Neighbor Discovery. 
2373                 ip6
->ip6_hlim 
= in6_selecthlim(inp
, inp
->in6p_route
.ro_rt 
? 
2374                     inp
->in6p_route
.ro_rt
->rt_ifp 
: NULL
); 
2376                 /* TODO: IPv6 IP6TOS_ECT bit on */ 
2377                 KERNEL_DEBUG(DBG_LAYER_BEG
, 
2378                     ((inp
->inp_fport 
<< 16) | inp
->inp_lport
), 
2379                     (((inp
->in6p_laddr
.s6_addr16
[0] & 0xffff) << 16) | 
2380                     (inp
->in6p_faddr
.s6_addr16
[0] & 0xffff)), 
2385                 ip
->ip_len 
= m
->m_pkthdr
.len
; 
2386                 ip
->ip_ttl 
= inp
->inp_ip_ttl
;   /* XXX */ 
2387                 ip
->ip_tos 
|= (inp
->inp_ip_tos 
& ~IPTOS_ECN_MASK
);/* XXX */ 
2388                 KERNEL_DEBUG(DBG_LAYER_BEG
, 
2389                     ((inp
->inp_fport 
<< 16) | inp
->inp_lport
), 
2390                     (((inp
->inp_laddr
.s_addr 
& 0xffff) << 16) | 
2391                     (inp
->inp_faddr
.s_addr 
& 0xffff)), 0,0,0); 
2395          * See if we should do MTU discovery. 
2396          * Look at the flag updated on the following criterias: 
2397          *      1) Path MTU discovery is authorized by the sysctl 
2398          *      2) The route isn't set yet (unlikely but could happen) 
2399          *      3) The route is up 
2400          *      4) the MTU is not locked (if it is, then discovery has been 
2401          *         disabled for that route) 
2406                 if (path_mtu_discovery 
&& (tp
->t_flags 
& TF_PMTUD
)) 
2407                         ip
->ip_off 
|= IP_DF
; 
2411                 necp_kernel_policy_id policy_id
; 
2412                 u_int32_t route_rule_id
; 
2413                 if (!necp_socket_is_allowed_to_send_recv(inp
, &policy_id
, &route_rule_id
)) { 
2415                         error 
= EHOSTUNREACH
; 
2419                 necp_mark_packet_from_socket(m
, inp
, policy_id
, route_rule_id
); 
2424         if (inp
->inp_sp 
!= NULL
) 
2425                 ipsec_setsocket(m
, so
); 
2429          * The socket is kept locked while sending out packets in ip_output, even if packet chaining is not active. 
2434          * Embed the flow hash in pkt hdr and mark the packet as 
2435          * capable of flow controlling 
2437         m
->m_pkthdr
.pkt_flowsrc 
= FLOWSRC_INPCB
; 
2438         m
->m_pkthdr
.pkt_flowid 
= inp
->inp_flowhash
; 
2439         m
->m_pkthdr
.pkt_flags 
|= PKTF_FLOW_ID 
| PKTF_FLOW_LOCALSRC
; 
2441         /* Disable flow advisory when using MPTCP. */ 
2442         if (!(tp
->t_mpflags 
& TMPF_MPTCP_TRUE
)) 
2444                 m
->m_pkthdr
.pkt_flags 
|= PKTF_FLOW_ADV
; 
2445         m
->m_pkthdr
.pkt_proto 
= IPPROTO_TCP
; 
2447         m
->m_nextpkt 
= NULL
; 
2449         if (inp
->inp_last_outifp 
!= NULL 
&& 
2450             !(inp
->inp_last_outifp
->if_flags 
& IFF_LOOPBACK
)) { 
2451                 /* Hint to prioritize this packet if 
2452                  * 1. if the packet has no data 
2453                  * 2. the interface supports transmit-start model and did  
2454                  *    not disable ACK prioritization. 
2455                  * 3. Only ACK flag is set. 
2456                  * 4. there is no outstanding data on this connection. 
2458                 if (tcp_prioritize_acks 
!= 0 && len 
== 0 && 
2459                     (inp
->inp_last_outifp
->if_eflags 
&  
2460                         (IFEF_TXSTART 
| IFEF_NOACKPRI
)) == IFEF_TXSTART 
&& 
2461                     th
->th_flags 
== TH_ACK 
&& tp
->snd_una 
== tp
->snd_max 
&& 
2462                     tp
->t_timer
[TCPT_REXMT
] == 0) { 
2463                         svc_flags 
|= PKT_SCF_TCP_ACK
; 
2465                 set_packet_service_class(m
, so
, MBUF_SC_UNSPEC
, svc_flags
); 
2468         tp
->t_pktlist_sentlen 
+= len
; 
2473                 DTRACE_TCP5(send
, struct mbuf 
*, m
, struct inpcb 
*, inp
, 
2474                         struct ip6 
*, ip6
, struct tcpcb 
*, tp
, struct tcphdr 
*, 
2479                 DTRACE_TCP5(send
, struct mbuf 
*, m
, struct inpcb 
*, inp
, 
2480                         struct ip 
*, ip
, struct tcpcb 
*, tp
, struct tcphdr 
*, th
); 
2483         if (tp
->t_pktlist_head 
!= NULL
) { 
2484                 tp
->t_pktlist_tail
->m_nextpkt 
= m
; 
2485                 tp
->t_pktlist_tail 
= m
; 
2487                 packchain_newlist
++; 
2488                 tp
->t_pktlist_head 
= tp
->t_pktlist_tail 
= m
; 
2491         if ((lro_ackmore
) && (!sackoptlen
) && (!tp
->t_timer
[TCPT_PERSIST
]) && 
2492                         ((th
->th_flags 
& TH_ACK
) == TH_ACK
) && (!len
) && 
2493                         (tp
->t_state 
== TCPS_ESTABLISHED
)) { 
2494                 /* For a pure ACK, see if you need to send more of them */       
2495                 mnext 
= tcp_send_lroacks(tp
, m
, th
); 
2497                         tp
->t_pktlist_tail
->m_nextpkt 
= mnext
; 
2498                         if (mnext
->m_nextpkt 
== NULL
) { 
2499                                 tp
->t_pktlist_tail 
= mnext
; 
2502                                 struct mbuf 
*tail
, *next
; 
2503                                 next 
= mnext
->m_nextpkt
; 
2504                                 tail 
= next
->m_nextpkt
; 
2507                                         tail 
= tail
->m_nextpkt
; 
2510                                 tp
->t_pktlist_tail 
= next
; 
2515         if (sendalot 
== 0 || (tp
->t_state 
!= TCPS_ESTABLISHED
) || 
2516             (tp
->snd_cwnd 
<= (tp
->snd_wnd 
/ 8)) || 
2517             (tp
->t_flags 
& (TH_PUSH 
| TF_ACKNOW
)) || 
2518             (tp
->t_flagsext 
& TF_FORCE
) || 
2519             tp
->t_lastchain 
>= tcp_packet_chaining
) { 
2521                 while (inp
->inp_sndinprog_cnt 
== 0 && 
2522                         tp
->t_pktlist_head 
!= NULL
) { 
2523                         packetlist 
= tp
->t_pktlist_head
; 
2524                         packchain_listadd 
= tp
->t_lastchain
; 
2526                         lost 
= tp
->t_pktlist_sentlen
; 
2527                         TCP_PKTLIST_CLEAR(tp
); 
2529                         error 
= tcp_ip_output(so
, tp
, packetlist
, 
2530                             packchain_listadd
, tp_inp_options
, 
2531                             (so_options 
& SO_DONTROUTE
), 
2532                             (sack_rxmit 
| (sack_bytes_rxmt 
!= 0)), recwin
, 
2540                                  * Take into account the rest of unsent 
2541                                  * packets in the packet list for this tcp 
2542                                  * into "lost", since we're about to free 
2543                                  * the whole list below. 
2545                                 lost 
+= tp
->t_pktlist_sentlen
; 
2551                 /* tcp was closed while we were in ip; resume close */ 
2552                 if (inp
->inp_sndinprog_cnt 
== 0 && 
2553                         (tp
->t_flags 
& TF_CLOSING
)) { 
2554                         tp
->t_flags 
&= ~TF_CLOSING
; 
2555                         (void) tcp_close(tp
); 
2561                 tcpstat
.tcps_sndtotal
++; 
2567                  * Assume that the packets were lost, so back out the 
2568                  * sequence number advance, if any.  Note that the "lost" 
2569                  * variable represents the amount of user data sent during 
2570                  * the recent call to ip_output_list() plus the amount of 
2571                  * user data in the packet list for this tcp at the moment. 
2573                 if (!(tp
->t_flagsext 
& TF_FORCE
) 
2574                     || tp
->t_timer
[TCPT_PERSIST
] == 0) { 
2576                          * No need to check for TH_FIN here because 
2577                          * the TF_SENTFIN flag handles that case. 
2579                         if ((flags 
& TH_SYN
) == 0) { 
2581                                         if (SEQ_GT((p
->rxmit 
- lost
), 
2585                                                 lost 
= p
->rxmit 
- tp
->snd_una
; 
2586                                                 p
->rxmit 
= tp
->snd_una
; 
2588                                         tp
->sackhint
.sack_bytes_rexmit 
-= lost
; 
2590                                         if (SEQ_GT((tp
->snd_nxt 
- lost
), 
2592                                                 tp
->snd_nxt 
-= lost
; 
2594                                                 tp
->snd_nxt 
= tp
->snd_una
; 
2599                 if (tp
->t_pktlist_head 
!= NULL
) 
2600                         m_freem_list(tp
->t_pktlist_head
); 
2601                 TCP_PKTLIST_CLEAR(tp
); 
2603                 if (error 
== ENOBUFS
) { 
2604                         if (!tp
->t_timer
[TCPT_REXMT
] && 
2605                                 !tp
->t_timer
[TCPT_PERSIST
]) 
2606                                 tp
->t_timer
[TCPT_REXMT
] =  
2607                                         OFFSET_FROM_START(tp
, tp
->t_rxtcur
); 
2608                         tp
->snd_cwnd 
= tp
->t_maxseg
; 
2609                         tp
->t_bytes_acked 
= 0; 
2610                         tcp_check_timer_state(tp
); 
2611                         KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT 
| DBG_FUNC_END
, 0,0,0,0,0); 
2613                         tcp_ccdbg_trace(tp
, NULL
, TCP_CC_OUTPUT_ERROR
); 
2616                 if (error 
== EMSGSIZE
) { 
2618                          * ip_output() will have already fixed the route 
2619                          * for us.  tcp_mtudisc() will, as its last action, 
2620                          * initiate retransmission, so it is important to 
2623                          * If TSO was active we either got an interface 
2624                          * without TSO capabilits or TSO was turned off. 
2625                          * Disable it for this connection as too and 
2626                          * immediatly retry with MSS sized segments generated 
2630                                 tp
->t_flags 
&= ~TF_TSO
; 
2632                         tcp_mtudisc(inp
, 0); 
2633                         tcp_check_timer_state(tp
); 
2635                         KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT 
| DBG_FUNC_END
, 0,0,0,0,0); 
2639                  * Unless this is due to interface restriction policy, 
2640                  * treat EHOSTUNREACH/ENETDOWN as a soft error. 
2642                 if ((error 
== EHOSTUNREACH 
|| error 
== ENETDOWN
) && 
2643                     TCPS_HAVERCVDSYN(tp
->t_state
) &&  
2644                     !inp_restricted_send(inp
, inp
->inp_last_outifp
)) { 
2645                                 tp
->t_softerror 
= error
; 
2648                 tcp_check_timer_state(tp
); 
2649                 KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT 
| DBG_FUNC_END
, 0,0,0,0,0); 
2653         tcpstat
.tcps_sndtotal
++; 
2655         KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT 
| DBG_FUNC_END
,0,0,0,0,0); 
2659         tcp_check_timer_state(tp
); 
2664 tcp_ip_output(struct socket 
*so
, struct tcpcb 
*tp
, struct mbuf 
*pkt
, 
2665     int cnt
, struct mbuf 
*opt
, int flags
, int sack_in_progress
, int recwin
, 
2670         boolean_t unlocked 
= FALSE
; 
2671         boolean_t ifdenied 
= FALSE
; 
2672         struct inpcb 
*inp 
= tp
->t_inpcb
; 
2673         struct ip_out_args ipoa 
= 
2674             { IFSCOPE_NONE
, { 0 }, IPOAF_SELECT_SRCIF
|IPOAF_BOUND_SRCADDR
, 0 }; 
2676         struct ifnet 
*outif 
= NULL
; 
2678         struct ip6_out_args ip6oa 
= 
2679             { IFSCOPE_NONE
, { 0 }, IP6OAF_SELECT_SRCIF
|IP6OAF_BOUND_SRCADDR
, 0 }; 
2680         struct route_in6 ro6
; 
2681         struct flowadv 
*adv 
= 
2682             (isipv6 
? &ip6oa
.ip6oa_flowadv 
: &ipoa
.ipoa_flowadv
); 
2684         struct flowadv 
*adv 
= &ipoa
.ipoa_flowadv
; 
2687         /* If socket was bound to an ifindex, tell ip_output about it */ 
2688         if (inp
->inp_flags 
& INP_BOUND_IF
) { 
2691                         ip6oa
.ip6oa_boundif 
= inp
->inp_boundifp
->if_index
; 
2692                         ip6oa
.ip6oa_flags 
|= IP6OAF_BOUND_IF
; 
2696                         ipoa
.ipoa_boundif 
= inp
->inp_boundifp
->if_index
; 
2697                         ipoa
.ipoa_flags 
|= IPOAF_BOUND_IF
; 
2701         if (INP_NO_CELLULAR(inp
)) { 
2704                         ip6oa
.ip6oa_flags 
|=  IP6OAF_NO_CELLULAR
; 
2707                         ipoa
.ipoa_flags 
|=  IPOAF_NO_CELLULAR
; 
2709         if (INP_NO_EXPENSIVE(inp
)) { 
2712                         ip6oa
.ip6oa_flags 
|=  IP6OAF_NO_EXPENSIVE
; 
2715                         ipoa
.ipoa_flags 
|=  IPOAF_NO_EXPENSIVE
; 
2718         if (INP_AWDL_UNRESTRICTED(inp
)) { 
2721                         ip6oa
.ip6oa_flags 
|=  IP6OAF_AWDL_UNRESTRICTED
; 
2724                         ipoa
.ipoa_flags 
|=  IPOAF_AWDL_UNRESTRICTED
; 
2729                 flags 
|= IPV6_OUTARGS
; 
2732                 flags 
|= IP_OUTARGS
; 
2734         /* Copy the cached route and take an extra reference */ 
2737                 in6p_route_copyout(inp
, &ro6
); 
2740                 inp_route_copyout(inp
, &ro
); 
2743          * Data sent (as far as we can tell). 
2744          * If this advertises a larger window than any other segment, 
2745          * then remember the size of the advertised window. 
2746          * Make sure ACK/DELACK conditions are cleared before 
2747          * we unlock the socket. 
2749         if (recwin 
> 0 && SEQ_GT(tp
->rcv_nxt 
+ recwin
, tp
->rcv_adv
)) 
2750                 tp
->rcv_adv 
= tp
->rcv_nxt 
+ recwin
; 
2751         tp
->last_ack_sent 
= tp
->rcv_nxt
; 
2752         tp
->t_flags 
&= ~(TF_ACKNOW 
| TF_DELACK
); 
2753         tp
->t_timer
[TCPT_DELACK
] = 0; 
2754         tp
->t_unacksegs 
= 0; 
2756         /* Increment the count of outstanding send operations */ 
2757         inp
->inp_sndinprog_cnt
++; 
2760          * If allowed, unlock TCP socket while in IP 
2761          * but only if the connection is established and 
2762          * in a normal mode where reentrancy on the tcpcb won't be 
2764          * - there is no SACK episode 
2765          * - we're not in Fast Recovery mode 
2766          * - if we're not sending from an upcall. 
2768         if (tcp_output_unlocked 
&& !so
->so_upcallusecount 
&& 
2769             (tp
->t_state 
== TCPS_ESTABLISHED
) && (sack_in_progress 
== 0) && 
2770             !IN_FASTRECOVERY(tp
)) { 
2773                 socket_unlock(so
, 0); 
2777          * Don't send down a chain of packets when: 
2778          * - TCP chaining is disabled 
2779          * - there is an IPsec rule set 
2780          * - there is a non default rule set for the firewall 
2783         chain 
= tcp_packet_chaining 
> 1 
2788                 && (fw_enable 
== 0 || fw_bypass
) 
2790                 ; // I'm important, not extraneous 
2793         while (pkt 
!= NULL
) { 
2794                 struct mbuf 
*npkt 
= pkt
->m_nextpkt
; 
2797                         pkt
->m_nextpkt 
= NULL
; 
2799                          * If we are not chaining, make sure to set the packet 
2800                          * list count to 0 so that IP takes the right path; 
2801                          * this is important for cases such as IPSec where a 
2802                          * single mbuf might result in multiple mbufs as part 
2803                          * of the encapsulation.  If a non-zero count is passed 
2804                          * down to IP, the head of the chain might change and 
2805                          * we could end up skipping it (thus generating bogus 
2806                          * packets).  Fixing it in IP would be desirable, but 
2807                          * for now this would do it. 
2813                         error 
= ip6_output_list(pkt
, cnt
, 
2814                             inp
->in6p_outputopts
, &ro6
, flags
, NULL
, NULL
, 
2816                         ifdenied 
= (ip6oa
.ip6oa_retflags 
& IP6OARF_IFDENIED
); 
2819                         error 
= ip_output_list(pkt
, cnt
, opt
, &ro
, flags
, NULL
, 
2821                         ifdenied 
= (ipoa
.ipoa_retflags 
& IPOARF_IFDENIED
); 
2824                 if (chain 
|| error
) { 
2826                          * If we sent down a chain then we are done since 
2827                          * the callee had taken care of everything; else 
2828                          * we need to free the rest of the chain ourselves. 
2841          * Enter flow controlled state if the connection is established 
2842          * and is not in recovery. 
2844          * A connection will enter suspended state even if it is in  
2847         if (((adv
->code 
== FADV_FLOW_CONTROLLED 
&& !IN_FASTRECOVERY(tp
)) || 
2848             adv
->code 
== FADV_SUSPENDED
) &&  
2849             !(tp
->t_flags 
& TF_CLOSING
) && 
2850             tp
->t_state 
== TCPS_ESTABLISHED
) { 
2852                 rc 
= inp_set_fc_state(inp
, adv
->code
); 
2855                         tcp_ccdbg_trace(tp
, NULL
,  
2856                             ((adv
->code 
== FADV_FLOW_CONTROLLED
) ? 
2857                             TCP_CC_FLOW_CONTROL 
: TCP_CC_SUSPEND
)); 
2861          * When an interface queue gets suspended, some of the 
2862          * packets are dropped. Return ENOBUFS, to update the 
2865         if (adv
->code 
== FADV_SUSPENDED
) 
2868         VERIFY(inp
->inp_sndinprog_cnt 
> 0); 
2869         if ( --inp
->inp_sndinprog_cnt 
== 0) 
2870                 inp
->inp_flags 
&= ~(INP_FC_FEEDBACK
); 
2874                 if (ro6
.ro_rt 
!= NULL 
&& (outif 
= ro6
.ro_rt
->rt_ifp
) != 
2875                     inp
->in6p_last_outifp
) 
2876                         inp
->in6p_last_outifp 
= outif
; 
2879                 if (ro
.ro_rt 
!= NULL 
&& (outif 
= ro
.ro_rt
->rt_ifp
) != 
2880                     inp
->inp_last_outifp
) 
2881                         inp
->inp_last_outifp 
= outif
; 
2883         if (error 
!= 0 && ifdenied 
&&  
2884             (INP_NO_CELLULAR(inp
) || INP_NO_EXPENSIVE(inp
))) 
2885                 soevent(inp
->inp_socket
, 
2886                     (SO_FILT_HINT_LOCKED
|SO_FILT_HINT_IFDENIED
)); 
2888         /* Synchronize cached PCB route & options */ 
2891                 in6p_route_copyin(inp
, &ro6
); 
2894                 inp_route_copyin(inp
, &ro
); 
2896         if (tp
->t_state 
< TCPS_ESTABLISHED 
&& tp
->t_rxtshift 
== 0 &&  
2897                 tp
->t_inpcb
->inp_route
.ro_rt 
!= NULL
) { 
2898                 /* If we found the route and there is an rtt on it 
2899                  * reset the retransmit timer 
2901                 tcp_getrt_rtt(tp
, tp
->t_inpcb
->in6p_route
.ro_rt
); 
2902                 tp
->t_timer
[TCPT_REXMT
] = OFFSET_FROM_START(tp
, tp
->t_rxtcur
); 
2909         register struct tcpcb 
*tp
; 
2911         int t 
= ((tp
->t_srtt 
>> 2) + tp
->t_rttvar
) >> 1; 
2913         /* If a PERSIST_TIMER option was set we will limit the 
2914          * time the persist timer will be active for that connection 
2915          * in order to avoid DOS by using zero window probes. 
2916          * see rdar://5805356 
2919         if ((tp
->t_persist_timeout 
!= 0) && 
2920             (tp
->t_timer
[TCPT_PERSIST
] == 0) && 
2921             (tp
->t_persist_stop 
== 0)) { 
2922                 tp
->t_persist_stop 
= tcp_now 
+ tp
->t_persist_timeout
; 
2926          * Start/restart persistance timer. 
2928         TCPT_RANGESET(tp
->t_timer
[TCPT_PERSIST
], 
2929             t 
* tcp_backoff
[tp
->t_rxtshift
], 
2930             TCPTV_PERSMIN
, TCPTV_PERSMAX
, 0); 
2931         tp
->t_timer
[TCPT_PERSIST
] = OFFSET_FROM_START(tp
, tp
->t_timer
[TCPT_PERSIST
]); 
2933         if (tp
->t_rxtshift 
< TCP_MAXRXTSHIFT
) 
2938  * Send as many acks as data coalesced. Every other packet when stretch 
2939  * ACK is not enabled. Every 8 packets, if stretch ACK is enabled. 
2942 tcp_send_lroacks(struct tcpcb 
*tp
, struct mbuf 
*m
, struct tcphdr 
*th
) 
2944         struct mbuf 
*mnext 
= NULL
, *ack_chain 
= NULL
, *tail 
= NULL
; 
2946         tcp_seq org_ack 
= ntohl(th
->th_ack
); 
2947         tcp_seq prev_ack 
= 0; 
2948         int tack_offset 
= 28; /* XXX IPv6 and IP options not supported */ 
2949         int twin_offset 
= 34; /* XXX IPv6 and IP options not supported */ 
2950         int ack_size 
= (tp
->t_flags 
& TF_STRETCHACK
) ? 
2951                         (maxseg_unacked 
* tp
->t_maxseg
) : (tp
->t_maxseg 
<< 1); 
2952         int segs_acked 
= (tp
->t_flags 
& TF_STRETCHACK
) ? maxseg_unacked 
: 2; 
2953         struct mbuf 
*prev_ack_pkt 
= NULL
; 
2954         struct socket 
*so 
= tp
->t_inpcb
->inp_socket
; 
2955         unsigned short winsz 
= ntohs(th
->th_win
); 
2956         unsigned int scaled_win 
= winsz
<<tp
->rcv_scale
; 
2957         tcp_seq win_rtedge 
= org_ack 
+ scaled_win
; 
2959         count 
= tp
->t_lropktlen
/tp
->t_maxseg
; 
2961         prev_ack 
= (org_ack 
- tp
->t_lropktlen
) + ack_size
; 
2962         if (prev_ack 
< org_ack
) { 
2963                 ack_chain 
= m_dup(m
, M_DONTWAIT
); 
2965                         th
->th_ack 
= htonl(prev_ack
); 
2966                         /* Keep adv window constant for duplicated ACK packets */ 
2967                         scaled_win 
= win_rtedge 
- prev_ack
; 
2968                         if (scaled_win 
> (int32_t)(TCP_MAXWIN 
<< tp
->rcv_scale
)) 
2969                                 scaled_win 
= (int32_t)(TCP_MAXWIN 
<< tp
->rcv_scale
); 
2970                         th
->th_win 
= htons(scaled_win
>>tp
->rcv_scale
); 
2971                         if (lrodebug 
== 5) { 
2972                                 printf("%s: win = %d winsz = %d sc = %d" 
2974                                     __func__
, scaled_win
>>tp
->rcv_scale
, winsz
, 
2975                                     tp
->rcv_scale
, tp
->t_lropktlen
, count
); 
2978                         count 
-= segs_acked
; /* accounts for prev_ack packet */ 
2979                         count 
= (count 
<= segs_acked
) ? 0 : count 
- segs_acked
; 
2980                         tcpstat
.tcps_sndacks
++; 
2981                         so_tc_update_stats(m
, so
, m_get_service_class(m
)); 
2987                 tp
->t_lropktlen 
= 0; 
2991         prev_ack_pkt 
= ack_chain
; 
2994                 if ((prev_ack 
+ ack_size
) < org_ack
) { 
2995                         prev_ack 
+= ack_size
; 
2998                          * The last ACK sent must have the ACK number that TCP 
2999                          * thinks is the last sent ACK number. 
3003                 mnext 
= m_dup(prev_ack_pkt
, M_DONTWAIT
); 
3005                         /* Keep adv window constant for duplicated ACK packets */ 
3006                         scaled_win 
= win_rtedge 
- prev_ack
; 
3007                         if (scaled_win 
> (int32_t)(TCP_MAXWIN 
<< tp
->rcv_scale
)) 
3008                                 scaled_win 
= (int32_t)(TCP_MAXWIN 
<< tp
->rcv_scale
); 
3009                         winsz 
= htons(scaled_win
>>tp
->rcv_scale
); 
3010                         if (lrodebug 
== 5) { 
3011                                 printf("%s: winsz = %d ack %x count %d\n", 
3012                                     __func__
, scaled_win
>>tp
->rcv_scale
, 
3015                         bcopy(&winsz
, mtod(prev_ack_pkt
, caddr_t
) + twin_offset
, 2); 
3017                         bcopy(&prev_ack
, mtod(prev_ack_pkt
, caddr_t
) + tack_offset
, 4); 
3019                         tail
->m_nextpkt 
= mnext
; 
3021                         count 
-= segs_acked
; 
3022                         tcpstat
.tcps_sndacks
++; 
3023                         so_tc_update_stats(m
, so
, m_get_service_class(m
)); 
3025                         if (lrodebug 
== 5) { 
3026                                 printf("%s: failed to alloc mbuf.\n", __func__
); 
3030                 prev_ack_pkt 
= mnext
; 
3032         tp
->t_lropktlen 
= 0; 
3037 tcp_recv_throttle (struct tcpcb 
*tp
) 
3039         uint32_t base_rtt
, newsize
; 
3041         struct sockbuf 
*sbrcv 
= &tp
->t_inpcb
->inp_socket
->so_rcv
; 
3043         if (tcp_use_rtt_recvbg 
== 1 && 
3044             TSTMP_SUPPORTED(tp
)) { 
3046                  * Timestamps are supported on this connection. Use 
3047                  * RTT to look for an increase in latency. 
3051                  * If the connection is already being throttled, leave it 
3052                  * in that state until rtt comes closer to base rtt 
3054                 if (tp
->t_flagsext 
& TF_RECV_THROTTLE
) 
3057                 base_rtt 
= get_base_rtt(tp
); 
3059                 if (base_rtt 
!= 0 && tp
->t_rttcur 
!= 0) { 
3060                         qdelay 
= tp
->t_rttcur 
- base_rtt
; 
3062                          * if latency increased on a background flow, 
3063                          * return 1 to start throttling. 
3065                         if (qdelay 
> target_qdelay
) { 
3066                                 tp
->t_flagsext 
|= TF_RECV_THROTTLE
; 
3069                                  * Reduce the recv socket buffer size to 
3072                                 if (sbrcv
->sb_idealsize 
>  
3073                                     tcp_recv_throttle_minwin
) { 
3074                                         newsize 
= sbrcv
->sb_idealsize 
>> 1; 
3075                                         /* Set a minimum of 16 K */ 
3078                                             tcp_recv_throttle_minwin
); 
3079                                         sbrcv
->sb_idealsize 
= newsize
; 
3089          * Timestamps are not supported or there is no good RTT 
3090          * measurement. Use IPDV in this case. 
3092         if (tp
->acc_iaj 
> tcp_acc_iaj_react_limit
)