]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/netinet/tcp_input.c
xnu-517.9.5.tar.gz
[apple/xnu.git] / bsd / netinet / tcp_input.c
index 2e14bc57795d4604665c5d8ce22643f25a4f9314..46fe20ed1b63578a78ac653abd696e54d9daabd5 100644 (file)
@@ -71,6 +71,7 @@
 #include <kern/cpu_number.h>   /* before tcp_seq.h, for tcp_random18() */
 
 #include <net/if.h>
 #include <kern/cpu_number.h>   /* before tcp_seq.h, for tcp_random18() */
 
 #include <net/if.h>
+#include <net/if_types.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
@@ -153,11 +154,35 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcp_lq_overflow, CTLFLAG_RW,
     "Listen Queue Overflow");
 
 #if TCP_DROP_SYNFIN
     "Listen Queue Overflow");
 
 #if TCP_DROP_SYNFIN
-static int drop_synfin = 0;
+static int drop_synfin = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
     &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
 #endif
 
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_RW,
     &drop_synfin, 0, "Drop TCP packets with SYN+FIN set");
 #endif
 
+SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
+    "TCP Segment Reassembly Queue");
+
+__private_extern__ int tcp_reass_maxseg = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RW,
+    &tcp_reass_maxseg, 0,
+    "Global maximum number of TCP Segments in Reassembly Queue");
+
+__private_extern__ int tcp_reass_qsize = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
+    &tcp_reass_qsize, 0,
+    "Global number of TCP Segments currently in Reassembly Queue");
+
+static int tcp_reass_overflows = 0;
+SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
+    &tcp_reass_overflows, 0,
+    "Global number of TCP Segment Reassembly Queue Overflows");
+
+
+__private_extern__ int slowlink_wsize = 8192;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, slowlink_wsize, CTLFLAG_RW,
+       &slowlink_wsize, 0, "Maximum advertised window size for slowlink");
+
+
 u_long tcp_now;
 struct inpcbhead tcb;
 #define        tcb6    tcb  /* for KAME src sync over BSD*'s */
 u_long tcp_now;
 struct inpcbhead tcb;
 #define        tcb6    tcb  /* for KAME src sync over BSD*'s */
@@ -220,6 +245,21 @@ tcp_reass(tp, th, tlenp, m)
        if (th == 0)
                goto present;
 
        if (th == 0)
                goto present;
 
+       /*
+        * Limit the number of segments in the reassembly queue to prevent
+        * holding on to too many segments (and thus running out of mbufs).
+        * Make sure to let the missing segment through which caused this
+        * queue.  Always keep one global queue entry spare to be able to
+        * process the missing segment.
+        */
+       if (th->th_seq != tp->rcv_nxt &&
+           tcp_reass_qsize + 1 >= tcp_reass_maxseg) {
+               tcp_reass_overflows++;
+               tcpstat.tcps_rcvmemdrop++;
+               m_freem(m);
+               return (0);
+       }
+
        /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
        MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
               M_NOWAIT);
        /* Allocate a new queue entry. If we can't, just drop the pkt. XXX */
        MALLOC(te, struct tseg_qent *, sizeof (struct tseg_qent), M_TSEGQ,
               M_NOWAIT);
@@ -228,6 +268,7 @@ tcp_reass(tp, th, tlenp, m)
                m_freem(m);
                return (0);
        }
                m_freem(m);
                return (0);
        }
+       tcp_reass_qsize++;
 
        /*
         * Find a segment which begins after this one does.
 
        /*
         * Find a segment which begins after this one does.
@@ -253,6 +294,7 @@ tcp_reass(tp, th, tlenp, m)
                                tcpstat.tcps_rcvdupbyte += *tlenp;
                                m_freem(m);
                                FREE(te, M_TSEGQ);
                                tcpstat.tcps_rcvdupbyte += *tlenp;
                                m_freem(m);
                                FREE(te, M_TSEGQ);
+                               tcp_reass_qsize--;
                                /*
                                 * Try to present any queued data
                                 * at the left window edge to the user.
                                /*
                                 * Try to present any queued data
                                 * at the left window edge to the user.
@@ -288,6 +330,7 @@ tcp_reass(tp, th, tlenp, m)
                LIST_REMOVE(q, tqe_q);
                m_freem(q->tqe_m);
                FREE(q, M_TSEGQ);
                LIST_REMOVE(q, tqe_q);
                m_freem(q->tqe_m);
                FREE(q, M_TSEGQ);
+               tcp_reass_qsize--;
                q = nq;
        }
 
                q = nq;
        }
 
@@ -322,6 +365,7 @@ present:
                else
                        sbappend(&so->so_rcv, q->tqe_m);
                FREE(q, M_TSEGQ);
                else
                        sbappend(&so->so_rcv, q->tqe_m);
                FREE(q, M_TSEGQ);
+               tcp_reass_qsize--;
                q = nq;
        } while (q && q->tqe_th->th_seq == tp->rcv_nxt);
        ND6_HINT(tp);
                q = nq;
        } while (q && q->tqe_th->th_seq == tp->rcv_nxt);
        ND6_HINT(tp);
@@ -356,9 +400,9 @@ present:
  */
 #if INET6
 int
  */
 #if INET6
 int
-tcp6_input(mp, offp, proto)
+tcp6_input(mp, offp)
        struct mbuf **mp;
        struct mbuf **mp;
-       int *offp, proto;
+       int *offp;
 {
        register struct mbuf *m = *mp;
        struct in6_ifaddr *ia6;
 {
        register struct mbuf *m = *mp;
        struct in6_ifaddr *ia6;
@@ -460,7 +504,7 @@ tcp_input(m, off0)
                }
        } else
 #endif /* INET6 */
                }
        } else
 #endif /* INET6 */
-      {
+       {
        /*
         * Get IP and TCP header together in first mbuf.
         * Note: IP leaves IP header in first mbuf.
        /*
         * Get IP and TCP header together in first mbuf.
         * Note: IP leaves IP header in first mbuf.
@@ -490,10 +534,20 @@ tcp_input(m, off0)
        if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
                if (apple_hwcksum_rx && (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16)) {
                        u_short pseudo;
        if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) {
                if (apple_hwcksum_rx && (m->m_pkthdr.csum_flags & CSUM_TCP_SUM16)) {
                        u_short pseudo;
+                       char b[9];
+                       *(uint32_t*)&b[0] = *(uint32_t*)&ipov->ih_x1[0];
+                       *(uint32_t*)&b[4] = *(uint32_t*)&ipov->ih_x1[4];
+                       *(uint8_t*)&b[8] = *(uint8_t*)&ipov->ih_x1[8];
+                       
                        bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
                        ipov->ih_len = (u_short)tlen;
                        HTONS(ipov->ih_len);
                        pseudo = in_cksum(m, sizeof (struct ip));
                        bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
                        ipov->ih_len = (u_short)tlen;
                        HTONS(ipov->ih_len);
                        pseudo = in_cksum(m, sizeof (struct ip));
+                       
+                       *(uint32_t*)&ipov->ih_x1[0] = *(uint32_t*)&b[0];
+                       *(uint32_t*)&ipov->ih_x1[4] = *(uint32_t*)&b[4];
+                       *(uint8_t*)&ipov->ih_x1[8] = *(uint8_t*)&b[8];
+                       
                        th->th_sum = in_addword(pseudo, (m->m_pkthdr.csum_data & 0xFFFF));
                } else {
                        if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
                        th->th_sum = in_addword(pseudo, (m->m_pkthdr.csum_data & 0xFFFF));
                } else {
                        if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR)
@@ -505,14 +559,23 @@ tcp_input(m, off0)
                }
                th->th_sum ^= 0xffff;
        } else {
                }
                th->th_sum ^= 0xffff;
        } else {
+               char b[9];
                /*
                 * Checksum extended TCP header and data.
                 */
                /*
                 * Checksum extended TCP header and data.
                 */
+               *(uint32_t*)&b[0] = *(uint32_t*)&ipov->ih_x1[0];
+               *(uint32_t*)&b[4] = *(uint32_t*)&ipov->ih_x1[4];
+               *(uint8_t*)&b[8] = *(uint8_t*)&ipov->ih_x1[8];
+               
                len = sizeof (struct ip) + tlen;
                bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
                ipov->ih_len = (u_short)tlen;
                HTONS(ipov->ih_len);
                th->th_sum = in_cksum(m, len);
                len = sizeof (struct ip) + tlen;
                bzero(ipov->ih_x1, sizeof(ipov->ih_x1));
                ipov->ih_len = (u_short)tlen;
                HTONS(ipov->ih_len);
                th->th_sum = in_cksum(m, len);
+               
+               *(uint32_t*)&ipov->ih_x1[0] = *(uint32_t*)&b[0];
+               *(uint32_t*)&ipov->ih_x1[4] = *(uint32_t*)&b[4];
+               *(uint8_t*)&ipov->ih_x1[8] = *(uint8_t*)&b[8];
        }
        if (th->th_sum) {
                tcpstat.tcps_rcvbadsum++;
        }
        if (th->th_sum) {
                tcpstat.tcps_rcvbadsum++;
@@ -522,7 +585,7 @@ tcp_input(m, off0)
        /* Re-initialization for later version check */
        ip->ip_v = IPVERSION;
 #endif
        /* Re-initialization for later version check */
        ip->ip_v = IPVERSION;
 #endif
-      }
+       }
 
        /*
         * Check that TCP offset makes sense,
 
        /*
         * Check that TCP offset makes sense,
@@ -772,6 +835,7 @@ findpcb:
 #if INET6
                        struct inpcb *oinp = sotoinpcb(so);
 #endif /* INET6 */
 #if INET6
                        struct inpcb *oinp = sotoinpcb(so);
 #endif /* INET6 */
+                       int ogencnt = so->so_gencnt;
 
 #if !IPSEC
                        /*
 
 #if !IPSEC
                        /*
@@ -851,6 +915,12 @@ findpcb:
                                if (!so2)
                                        goto drop;
                        }
                                if (!so2)
                                        goto drop;
                        }
+                       /*
+                        * Make sure listening socket did not get closed during socket allocation,
+                         * not only this is incorrect but it is know to cause panic
+                         */
+                       if (so->so_gencnt != ogencnt)
+                               goto drop;
 #if IPSEC
                        oso = so;
 #endif
 #if IPSEC
                        oso = so;
 #endif
@@ -972,7 +1042,7 @@ findpcb:
         */
        tp->t_rcvtime = 0;
        if (TCPS_HAVEESTABLISHED(tp->t_state))
         */
        tp->t_rcvtime = 0;
        if (TCPS_HAVEESTABLISHED(tp->t_state))
-               tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+               tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
 
        /*
         * Process options if not in LISTEN state,
 
        /*
         * Process options if not in LISTEN state,
@@ -1046,7 +1116,7 @@ findpcb:
                                        tp->snd_nxt = tp->snd_max;
                                        tp->t_badrxtwin = 0;
                                }
                                        tp->snd_nxt = tp->snd_max;
                                        tp->t_badrxtwin = 0;
                                }
-                               if ((to.to_flag & TOF_TS) != 0)
+                               if (((to.to_flag & TOF_TS) != 0) && (to.to_tsecr != 0)) /* Makes sure we already have a TS */
                                        tcp_xmit_timer(tp,
                                            tcp_now - to.to_tsecr + 1);
                                else if (tp->t_rtttime &&
                                        tcp_xmit_timer(tp,
                                            tcp_now - to.to_tsecr + 1);
                                else if (tp->t_rtttime &&
@@ -1135,6 +1205,10 @@ findpcb:
        win = sbspace(&so->so_rcv);
        if (win < 0)
                win = 0;
        win = sbspace(&so->so_rcv);
        if (win < 0)
                win = 0;
+       else {  /* clip rcv window to 4K for modems */
+               if (tp->t_flags & TF_SLOWLINK && slowlink_wsize > 0)
+                       win = min(win, slowlink_wsize);
+       }
        tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
        }
 
        tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
        }
 
@@ -1317,7 +1391,10 @@ findpcb:
                         * here.  Even if we requested window scaling, it will
                         * become effective only later when our SYN is acked.
                         */
                         * here.  Even if we requested window scaling, it will
                         * become effective only later when our SYN is acked.
                         */
-                       tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);
+                       if (tp->t_flags & TF_SLOWLINK && slowlink_wsize > 0) /* clip window size for for slow link */
+                               tp->rcv_adv += min(tp->rcv_wnd, slowlink_wsize);
+                       else 
+                               tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN);
                        tcpstat.tcps_connects++;
                        soisconnected(so);
                        tp->t_timer[TCPT_KEEP] = tcp_keepinit;
                        tcpstat.tcps_connects++;
                        soisconnected(so);
                        tp->t_timer[TCPT_KEEP] = tcp_keepinit;
@@ -1464,7 +1541,7 @@ findpcb:
                                thflags &= ~TH_SYN;
                        } else {
                                tp->t_state = TCPS_ESTABLISHED;
                                thflags &= ~TH_SYN;
                        } else {
                                tp->t_state = TCPS_ESTABLISHED;
-                               tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+                               tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
                        }
                } else {
                /*
                        }
                } else {
                /*
@@ -1492,7 +1569,7 @@ findpcb:
                                                tp->t_flags &= ~TF_NEEDFIN;
                                        } else {
                                                tp->t_state = TCPS_ESTABLISHED;
                                                tp->t_flags &= ~TF_NEEDFIN;
                                        } else {
                                                tp->t_state = TCPS_ESTABLISHED;
-                                               tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+                                               tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
                                        }
                                        tp->t_flags |= TF_NEEDSYN;
                                } else
                                        }
                                        tp->t_flags |= TF_NEEDSYN;
                                } else
@@ -1563,6 +1640,16 @@ trimthenstep6:
                                goto drop;
                }
                break;  /* continue normal processing */
                                goto drop;
                }
                break;  /* continue normal processing */
+
+       /* Received a SYN while connection is already established.
+        * This is a "half open connection and other anomalies" described
+        * in RFC793 page 34, send an ACK so the remote reset the connection
+        * or recovers by adjusting its sequence numberering 
+        */
+       case TCPS_ESTABLISHED:
+               if (thflags & TH_SYN)  
+                       goto dropafterack; 
+               break;
        }
 
        /*
        }
 
        /*
@@ -1883,7 +1970,7 @@ trimthenstep6:
                        tp->t_flags &= ~TF_NEEDFIN;
                } else {
                        tp->t_state = TCPS_ESTABLISHED;
                        tp->t_flags &= ~TF_NEEDFIN;
                } else {
                        tp->t_state = TCPS_ESTABLISHED;
-                       tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+                       tp->t_timer[TCPT_KEEP] = TCP_KEEPIDLE(tp);
                }
                /*
                 * If segment contains data or ACK, will call tcp_reass()
                }
                /*
                 * If segment contains data or ACK, will call tcp_reass()
@@ -2060,8 +2147,9 @@ process_ACK:
                 * Since we now have an rtt measurement, cancel the
                 * timer backoff (cf., Phil Karn's retransmit alg.).
                 * Recompute the initial retransmit timer.
                 * Since we now have an rtt measurement, cancel the
                 * timer backoff (cf., Phil Karn's retransmit alg.).
                 * Recompute the initial retransmit timer.
+                * Also makes sure we have a valid time stamp in hand
                 */
                 */
-               if (to.to_flag & TOF_TS)
+               if (((to.to_flag & TOF_TS) != 0) && (to.to_tsecr != 0))
                        tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1);
                else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
                        tcp_xmit_timer(tp, tp->t_rtttime);
                        tcp_xmit_timer(tp, tcp_now - to.to_tsecr + 1);
                else if (tp->t_rtttime && SEQ_GT(th->th_ack, tp->t_rtseq))
                        tcp_xmit_timer(tp, tp->t_rtttime);
@@ -2115,10 +2203,10 @@ process_ACK:
                        tp->snd_wnd -= acked;
                        ourfinisacked = 0;
                }
                        tp->snd_wnd -= acked;
                        ourfinisacked = 0;
                }
-               sowwakeup(so);
                tp->snd_una = th->th_ack;
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                        tp->snd_nxt = tp->snd_una;
                tp->snd_una = th->th_ack;
                if (SEQ_LT(tp->snd_nxt, tp->snd_una))
                        tp->snd_nxt = tp->snd_una;
+               sowwakeup(so);
 
                switch (tp->t_state) {
 
 
                switch (tp->t_state) {
 
@@ -2840,6 +2928,16 @@ tcp_mss(tp, offer)
                return;
        }
        ifp = rt->rt_ifp;
                return;
        }
        ifp = rt->rt_ifp;
+       /*
+        * Slower link window correction:
+        * If a value is specificied for slowlink_wsize use it for PPP links
+        * believed to be on a serial modem (speed <128Kbps). Excludes 9600bps as
+        * it is the default value adversized by pseudo-devices over ppp.
+        */
+       if (ifp->if_type == IFT_PPP && slowlink_wsize > 0 && 
+           ifp->if_baudrate > 9600 && ifp->if_baudrate <= 128000) {
+               tp->t_flags |= TF_SLOWLINK;
+       }
        so = inp->inp_socket;
 
        taop = rmx_taop(rt->rt_rmx);
        so = inp->inp_socket;
 
        taop = rmx_taop(rt->rt_rmx);
@@ -2859,7 +2957,12 @@ tcp_mss(tp, offer)
                        isipv6 ? tcp_v6mssdflt :
 #endif /* INET6 */
                        tcp_mssdflt;
                        isipv6 ? tcp_v6mssdflt :
 #endif /* INET6 */
                        tcp_mssdflt;
-       else
+       else {
+               /*
+                * Prevent DoS attack with too small MSS. Round up
+                * to at least minmss.
+                */
+               offer = max(offer, tcp_minmss);
                /*
                 * Sanity check: make sure that maxopd will be large
                 * enough to allow some data on segments even is the
                /*
                 * Sanity check: make sure that maxopd will be large
                 * enough to allow some data on segments even is the
@@ -2867,6 +2970,7 @@ tcp_mss(tp, offer)
                 * funny things may happen in tcp_output.
                 */
                offer = max(offer, 64);
                 * funny things may happen in tcp_output.
                 */
                offer = max(offer, 64);
+       }
        taop->tao_mssopt = offer;
 
        /*
        taop->tao_mssopt = offer;
 
        /*
@@ -2946,21 +3050,16 @@ tcp_mss(tp, offer)
             (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
                mss -= TCPOLEN_CC_APPA;
 
             (tp->t_flags & TF_RCVD_CC) == TF_RCVD_CC))
                mss -= TCPOLEN_CC_APPA;
 
-#if    (MCLBYTES & (MCLBYTES - 1)) == 0
-               if (mss > MCLBYTES)
-                       mss &= ~(MCLBYTES-1);
-#else
-               if (mss > MCLBYTES)
-                       mss = mss / MCLBYTES * MCLBYTES;
-#endif
        /*
        /*
-        * If there's a pipesize, change the socket buffer
-        * to that size.  Make the socket buffers an integral
+        * If there's a pipesize (ie loopback), change the socket
+        * buffer to that size only if it's bigger than the current
+        * sockbuf size.  Make the socket buffers an integral
         * number of mss units; if the mss is larger than
         * the socket buffer, decrease the mss.
         */
 #if RTV_SPIPE
         * number of mss units; if the mss is larger than
         * the socket buffer, decrease the mss.
         */
 #if RTV_SPIPE
-       if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+       bufsize = rt->rt_rmx.rmx_sendpipe;
+       if (bufsize < so->so_snd.sb_hiwat)
 #endif
                bufsize = so->so_snd.sb_hiwat;
        if (bufsize < mss)
 #endif
                bufsize = so->so_snd.sb_hiwat;
        if (bufsize < mss)
@@ -2974,7 +3073,8 @@ tcp_mss(tp, offer)
        tp->t_maxseg = mss;
 
 #if RTV_RPIPE
        tp->t_maxseg = mss;
 
 #if RTV_RPIPE
-       if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+       bufsize = rt->rt_rmx.rmx_recvpipe;
+       if (bufsize < so->so_rcv.sb_hiwat)
 #endif
                bufsize = so->so_rcv.sb_hiwat;
        if (bufsize > mss) {
 #endif
                bufsize = so->so_rcv.sb_hiwat;
        if (bufsize > mss) {
@@ -3046,6 +3146,16 @@ tcp_mssopt(tp)
                        isipv6 ? tcp_v6mssdflt :
 #endif /* INET6 */
                        tcp_mssdflt;
                        isipv6 ? tcp_v6mssdflt :
 #endif /* INET6 */
                        tcp_mssdflt;
+       /*
+        * Slower link window correction:
+        * If a value is specificied for slowlink_wsize use it for PPP links
+        * believed to be on a serial modem (speed <128Kbps). Excludes 9600bps as
+        * it is the default value adversized by pseudo-devices over ppp.
+        */
+       if (rt->rt_ifp->if_type == IFT_PPP && slowlink_wsize > 0 && 
+           rt->rt_ifp->if_baudrate > 9600 && rt->rt_ifp->if_baudrate <= 128000) {
+               tp->t_flags |= TF_SLOWLINK;
+       }
 
        return rt->rt_ifp->if_mtu - min_protoh;
 }
 
        return rt->rt_ifp->if_mtu - min_protoh;
 }