]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/tcp_output.c
xnu-792.6.70.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_output.c
index d35b91673127b1e06b101cc1554e27c0d45e44e9..36e310fd153bc07a731a4ca4f7708e1c88675db8 100644 (file)
@@ -3,22 +3,19 @@
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License").  You may not use this file except in compliance with the
+ * License.  Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
  * 
  * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
  * 
  * @APPLE_LICENSE_HEADER_END@
  */
@@ -124,7 +121,14 @@ int     tcp_do_newreno = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
         0, "Enable NewReno Algorithms");
 
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, newreno, CTLFLAG_RW, &tcp_do_newreno,
         0, "Enable NewReno Algorithms");
 
-struct mbuf *m_copym_with_hdrs __P((struct mbuf*, int, int, int, struct mbuf**, int*));
+int    tcp_packet_chaining = 50;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, packetchain, CTLFLAG_RW, &tcp_packet_chaining,
+        0, "Enable TCP output packet chaining");
+
+struct mbuf *m_copym_with_hdrs(struct mbuf*, int, int, int, struct mbuf**, int*);
+static long packchain_newlist = 0;
+static long packchain_looped = 0;
+static long packchain_sent = 0;
 
 
 /* temporary: for testing */
 
 
 /* temporary: for testing */
@@ -134,7 +138,25 @@ extern int ipsec_bypass;
 
 extern int slowlink_wsize;     /* window correction for slow links */
 extern u_long  route_generation;
 
 extern int slowlink_wsize;     /* window correction for slow links */
 extern u_long  route_generation;
+extern int fw_enable;          /* firewall is on: disable packet chaining */
+extern int ipsec_bypass;
+
+extern vm_size_t       so_cache_zone_element_size;
 
 
+static __inline__ u_int16_t
+get_socket_id(struct socket * s)
+{
+       u_int16_t               val;
+
+       if (so_cache_zone_element_size == 0) {
+               return (0);
+       }
+       val = (u_int16_t)(((u_int32_t)s) / so_cache_zone_element_size);
+       if (val == 0) {
+               val = 0xffff;
+       }
+       return (val);
+}
 
 /*
  * Tcp output routine: figure out what should be sent and send it.
 
 /*
  * Tcp output routine: figure out what should be sent and send it.
@@ -155,7 +177,7 @@ tcp_output(tp)
        register struct tcphdr *th;
        u_char opt[TCP_MAXOLEN];
        unsigned ipoptlen, optlen, hdrlen;
        register struct tcphdr *th;
        u_char opt[TCP_MAXOLEN];
        unsigned ipoptlen, optlen, hdrlen;
-       int idle, sendalot;
+       int idle, sendalot, howmuchsent = 0;
        int maxburst = TCP_MAXBURST;
        struct rmxp_tao *taop;
        struct rmxp_tao tao_noncached;
        int maxburst = TCP_MAXBURST;
        struct rmxp_tao *taop;
        struct rmxp_tao tao_noncached;
@@ -163,9 +185,13 @@ tcp_output(tp)
        int    m_off;
        struct mbuf *m_last = 0;
        struct mbuf *m_head = 0;
        int    m_off;
        struct mbuf *m_last = 0;
        struct mbuf *m_head = 0;
+       struct mbuf *packetlist = 0;
+       struct mbuf *lastpacket = 0;
 #if INET6
        int isipv6 = tp->t_inpcb->inp_vflag & INP_IPV6 ;
 #endif
 #if INET6
        int isipv6 = tp->t_inpcb->inp_vflag & INP_IPV6 ;
 #endif
+       short packchain_listadd = 0;
+       u_int16_t       socket_id = get_socket_id(so);
 
 
        /*
 
 
        /*
@@ -175,11 +201,7 @@ tcp_output(tp)
         * to send, then transmit; otherwise, investigate further.
         */
        idle = (tp->snd_max == tp->snd_una);
         * to send, then transmit; otherwise, investigate further.
         */
        idle = (tp->snd_max == tp->snd_una);
-#ifdef __APPLE__
        if (idle && tp->t_rcvtime >= tp->t_rxtcur) {
        if (idle && tp->t_rcvtime >= tp->t_rxtcur) {
-#else
-       if (idle && (ticks - tp->t_rcvtime) >= tp->t_rxtcur) {
-#endif
                /*
                 * We have been idle for "a while" and no acks are
                 * expected to clock out any data we send --
                /*
                 * We have been idle for "a while" and no acks are
                 * expected to clock out any data we send --
@@ -234,7 +256,7 @@ again:
       if ((tp->t_inpcb->inp_route.ro_rt != NULL &&
            (tp->t_inpcb->inp_route.ro_rt->generation_id != route_generation)) || (tp->t_inpcb->inp_route.ro_rt == NULL)) {
                /* check that the source address is still valid */
       if ((tp->t_inpcb->inp_route.ro_rt != NULL &&
            (tp->t_inpcb->inp_route.ro_rt->generation_id != route_generation)) || (tp->t_inpcb->inp_route.ro_rt == NULL)) {
                /* check that the source address is still valid */
-               if (ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr) == NULL) {
+               if (ifa_foraddr(tp->t_inpcb->inp_laddr.s_addr) == 0) {
                        if (tp->t_state >= TCPS_CLOSE_WAIT) {
                                tcp_close(tp);
                                return(EADDRNOTAVAIL);
                        if (tp->t_state >= TCPS_CLOSE_WAIT) {
                                tcp_close(tp);
                                return(EADDRNOTAVAIL);
@@ -253,6 +275,11 @@ again:
                                }
                        }
 
                                }
                        }
 
+                       if (packetlist) {
+                               error = ip_output_list(packetlist, packchain_listadd, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+                                       (so->so_options & SO_DONTROUTE), 0);
+                               tp->t_lastchain = 0;
+                       }
                        if (so->so_flags & SOF_NOADDRAVAIL)
                                return(EADDRNOTAVAIL);
                        else
                        if (so->so_flags & SOF_NOADDRAVAIL)
                                return(EADDRNOTAVAIL);
                        else
@@ -326,6 +353,11 @@ again:
                off--, len++;
                if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
                    taop->tao_ccsent == 0) {
                off--, len++;
                if (len > 0 && tp->t_state == TCPS_SYN_SENT &&
                    taop->tao_ccsent == 0) {
+                       if (packetlist) {
+                               error = ip_output_list(packetlist, packchain_listadd, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+                               (so->so_options & SO_DONTROUTE), 0);
+                               tp->t_lastchain = 0;
+                       }
                  KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                  return 0;
                }
                  KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                  return 0;
                }
@@ -366,6 +398,7 @@ again:
        }
        if (len > tp->t_maxseg) {
                len = tp->t_maxseg;
        }
        if (len > tp->t_maxseg) {
                len = tp->t_maxseg;
+               howmuchsent += len;
                sendalot = 1;
        }
        if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
                sendalot = 1;
        }
        if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
@@ -472,8 +505,13 @@ again:
        }
 
        /*
        }
 
        /*
-        * No reason to send a segment, just return.
+        * If there is no reason to send a segment, just return.
+        * but if there is some packets left in the packet list, send them now.
         */
         */
+       if (packetlist) {
+               error = ip_output_list(packetlist, packchain_listadd, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+                       (so->so_options & SO_DONTROUTE), 0);
+       }
        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
        return (0);
 
        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
        return (0);
 
@@ -637,6 +675,7 @@ send:
                 */
                flags &= ~TH_FIN;
                len = tp->t_maxopd - optlen - ipoptlen;
                 */
                flags &= ~TH_FIN;
                len = tp->t_maxopd - optlen - ipoptlen;
+               howmuchsent += len;
                sendalot = 1;
        }
 
                sendalot = 1;
        }
 
@@ -801,7 +840,7 @@ send:
                m->m_data += max_linkhdr;
                m->m_len = hdrlen;
        }
                m->m_data += max_linkhdr;
                m->m_len = hdrlen;
        }
-       m->m_pkthdr.rcvif = (struct ifnet *)0;
+       m->m_pkthdr.rcvif = 0;
 #if INET6
        if (isipv6) {
                ip6 = mtod(m, struct ip6_hdr *);
 #if INET6
        if (isipv6) {
                ip6 = mtod(m, struct ip6_hdr *);
@@ -867,6 +906,20 @@ send:
                win = (long)TCP_MAXWIN << tp->rcv_scale;
                th->th_win = htons((u_short) (win>>tp->rcv_scale));
        }
                win = (long)TCP_MAXWIN << tp->rcv_scale;
                th->th_win = htons((u_short) (win>>tp->rcv_scale));
        }
+
+        /*
+         * Adjust the RXWIN0SENT flag - indicate that we have advertised   
+         * a 0 window.  This may cause the remote transmitter to stall.  This
+         * flag tells soreceive() to disable delayed acknowledgements when
+         * draining the buffer.  This can occur if the receiver is attempting
+         * to read more data then can be buffered prior to transmitting on   
+         * the connection.
+         */
+        if (win == 0)
+                tp->t_flags |= TF_RXWIN0SENT;
+        else
+                tp->t_flags &= ~TF_RXWIN0SENT;
+
        if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
                th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
                th->th_flags |= TH_URG;
        if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
                th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
                th->th_flags |= TH_URG;
@@ -997,10 +1050,11 @@ send:
                        goto out;
                }
 #endif /*IPSEC*/
                        goto out;
                }
 #endif /*IPSEC*/
+               m->m_pkthdr.socket_id = socket_id;
                error = ip6_output(m,
                            tp->t_inpcb->in6p_outputopts,
                            &tp->t_inpcb->in6p_route,
                error = ip6_output(m,
                            tp->t_inpcb->in6p_outputopts,
                            &tp->t_inpcb->in6p_route,
-                           (so->so_options & SO_DONTROUTE), NULL, NULL);
+                           (so->so_options & SO_DONTROUTE), NULL, NULL, 0);
        } else
 #endif /* INET6 */
     {
        } else
 #endif /* INET6 */
     {
@@ -1053,9 +1107,49 @@ send:
        if (ipsec_bypass == 0)
                ipsec_setsocket(m, so);
 #endif /*IPSEC*/
        if (ipsec_bypass == 0)
                ipsec_setsocket(m, so);
 #endif /*IPSEC*/
-       error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
-           (so->so_options & SO_DONTROUTE), 0);
-    }
+
+       /*
+        * The socket is kept locked while sending out packets in ip_output, even if packet chaining is not active.
+        */
+
+       m->m_pkthdr.socket_id = socket_id;
+       if (packetlist) {
+               m->m_nextpkt = NULL;
+               lastpacket->m_nextpkt = m;
+               lastpacket = m;
+               packchain_listadd++;
+       }
+       else {
+               m->m_nextpkt = NULL;
+               packchain_newlist++;
+               packetlist = lastpacket = m;
+               packchain_listadd=0;
+       }
+
+       if ((ipsec_bypass == 0) || fw_enable || sendalot == 0 || (tp->t_state != TCPS_ESTABLISHED) || 
+                     (tp->snd_cwnd <= (tp->snd_wnd / 4)) || 
+                     (tp->t_flags & (TH_PUSH | TF_ACKNOW)) || tp->t_force != 0 ||
+                     packchain_listadd >= tcp_packet_chaining) {
+               lastpacket->m_nextpkt = 0;
+               error = ip_output_list(packetlist, packchain_listadd, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
+                       (so->so_options & SO_DONTROUTE), 0);
+               tp->t_lastchain = packchain_listadd;
+               packchain_sent++;
+               packetlist = NULL;
+               if (error == 0)
+                       howmuchsent = 0;
+       }
+       else {
+               error = 0;
+               packchain_looped++;
+               tcpstat.tcps_sndtotal++;
+               if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
+                       tp->rcv_adv = tp->rcv_nxt + win;
+               tp->last_ack_sent = tp->rcv_nxt;
+               tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
+               goto again;
+       }
+   }
        if (error) {
 
                /*
        if (error) {
 
                /*
@@ -1067,15 +1161,19 @@ send:
                         * No need to check for TH_FIN here because
                         * the TF_SENTFIN flag handles that case.
                         */
                         * No need to check for TH_FIN here because
                         * the TF_SENTFIN flag handles that case.
                         */
-                       if ((flags & TH_SYN) == 0)
-                               tp->snd_nxt -= len;
+                       if ((flags & TH_SYN) == 0) 
+                               tp->snd_nxt -= howmuchsent;
                }
                }
+               howmuchsent = 0;
 out:
                if (error == ENOBUFS) {
                         if (!tp->t_timer[TCPT_REXMT] &&
                              !tp->t_timer[TCPT_PERSIST])
                                 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
                        tcp_quench(tp->t_inpcb, 0);
 out:
                if (error == ENOBUFS) {
                         if (!tp->t_timer[TCPT_REXMT] &&
                              !tp->t_timer[TCPT_PERSIST])
                                 tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
                        tcp_quench(tp->t_inpcb, 0);
+                       if (packetlist)
+                               m_freem_list(packetlist);
+                       tp->t_lastchain = 0;
                        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                        return (0);
                }
                        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                        return (0);
                }
@@ -1087,18 +1185,28 @@ out:
                         * not do so here.
                         */
                        tcp_mtudisc(tp->t_inpcb, 0);
                         * not do so here.
                         */
                        tcp_mtudisc(tp->t_inpcb, 0);
+                       if (packetlist)
+                               m_freem_list(packetlist);
+                       tp->t_lastchain = 0;
                        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                        return 0;
                }
                if ((error == EHOSTUNREACH || error == ENETDOWN)
                    && TCPS_HAVERCVDSYN(tp->t_state)) {
                        tp->t_softerror = error;
                        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                        return 0;
                }
                if ((error == EHOSTUNREACH || error == ENETDOWN)
                    && TCPS_HAVERCVDSYN(tp->t_state)) {
                        tp->t_softerror = error;
+                       if (packetlist)
+                               m_freem_list(packetlist);
+                       tp->t_lastchain = 0;
                        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                        return (0);
                }
                        KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                        return (0);
                }
+               if (packetlist)
+                       m_freem_list(packetlist);
+               tp->t_lastchain = 0;
                KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                return (error);
        }
                KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
                return (error);
        }
+sentit:
        tcpstat.tcps_sndtotal++;
 
        /*
        tcpstat.tcps_sndtotal++;
 
        /*
@@ -1112,8 +1220,8 @@ out:
        tp->last_ack_sent = tp->rcv_nxt;
        tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
 
        tp->last_ack_sent = tp->rcv_nxt;
        tp->t_flags &= ~(TF_ACKNOW|TF_DELACK);
 
-       KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END, 0,0,0,0,0);
-       if (sendalot)
+       KERNEL_DEBUG(DBG_FNC_TCP_OUTPUT | DBG_FUNC_END,0,0,0,0,0);
+       if (sendalot && (!tcp_do_newreno || --maxburst))
                goto again;
        return (0);
 }
                goto again;
        return (0);
 }
@@ -1123,7 +1231,6 @@ tcp_setpersist(tp)
        register struct tcpcb *tp;
 {
        int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
        register struct tcpcb *tp;
 {
        int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
-       int tt;
 
        if (tp->t_timer[TCPT_REXMT])
                panic("tcp_setpersist: retransmit pending");
 
        if (tp->t_timer[TCPT_REXMT])
                panic("tcp_setpersist: retransmit pending");