X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/8f6c56a50524aa785f7e596d52dddfb331e18961..fe8ab488e9161c46dd9885d58fc52996dc0249ff:/bsd/netinet/tcp_sack.c diff --git a/bsd/netinet/tcp_sack.c b/bsd/netinet/tcp_sack.c index 0ea418550..e3b339360 100644 --- a/bsd/netinet/tcp_sack.c +++ b/bsd/netinet/tcp_sack.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2004 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2004-2014 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -72,6 +72,8 @@ #include #include +#include + #include #include @@ -100,21 +102,23 @@ #include #endif /*IPSEC*/ +#include + int tcp_do_sack = 1; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW, &tcp_do_sack, 0, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_sack, 0, "Enable/Disable TCP SACK support"); static int tcp_sack_maxholes = 128; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_maxholes, CTLFLAG_RW, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_maxholes, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_sack_maxholes, 0, "Maximum number of TCP SACK holes allowed per connection"); static int tcp_sack_globalmaxholes = 65536; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalmaxholes, CTLFLAG_RW, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalmaxholes, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_sack_globalmaxholes, 0, "Global maximum number of TCP SACK holes"); -static int tcp_sack_globalholes = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD, +static SInt32 tcp_sack_globalholes = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD | CTLFLAG_LOCKED, &tcp_sack_globalholes, 0, "Global number of TCP SACK holes currently allocated"); @@ -201,6 +205,18 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end) /* Save the number of SACK blocks. */ tp->rcv_numsacks = num_head + num_saved; + + /* If we are requesting SACK recovery, reset the stretch-ack state + * so that connection will generate more acks after recovery and + * sender's cwnd will open. + */ + if ((tp->t_flags & TF_STRETCHACK) != 0 && tp->rcv_numsacks > 0) + tcp_reset_stretch_ack(tp); + +#if TRAFFIC_MGT + if (tp->acc_iaj > 0 && tp->rcv_numsacks > 0) + reset_acc_iaj(tp); +#endif /* TRAFFIC_MGT */ } /* @@ -209,13 +225,8 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end) void tcp_clean_sackreport( struct tcpcb *tp) { -/* - int i; tp->rcv_numsacks = 0; - for (i = 0; i < MAX_SACK_BLKS; i++) - tp->sackblks[i].start = tp->sackblks[i].end=0; -*/ bzero(&tp->sackblks[0], sizeof (struct sackblk) * MAX_SACK_BLKS); } @@ -233,7 +244,7 @@ tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end) return NULL; } - hole = (struct sackhole *)zalloc_noblock(sack_hole_zone); + hole = (struct sackhole *)zalloc(sack_hole_zone); if (hole == NULL) return NULL; @@ -242,7 +253,7 @@ tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end) hole->rxmit = start; tp->snd_numholes++; - tcp_sack_globalholes++; + OSIncrementAtomic(&tcp_sack_globalholes); return hole; } @@ -256,7 +267,7 @@ tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole) zfree(sack_hole_zone, hole); tp->snd_numholes--; - tcp_sack_globalholes--; + OSDecrementAtomic(&tcp_sack_globalholes); } /* @@ -272,7 +283,7 @@ tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end, hole = tcp_sackhole_alloc(tp, start, end); if (hole == NULL) return NULL; - + hole->rxmit_start = tcp_now; /* Insert the new SACK hole into scoreboard */ if (after != NULL) TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink); @@ -302,6 +313,75 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) /* Free this SACK hole. */ tcp_sackhole_free(tp, hole); } +/* + * When a new ack with SACK is received, check if it indicates packet + * reordering. If there is packet reordering, the socket is marked and + * the late time offset by which the packet was reordered with + * respect to its closest neighboring packets is computed. + */ +static void +tcp_sack_detect_reordering(struct tcpcb *tp, struct sackhole *s, + tcp_seq sacked_seq, tcp_seq snd_fack) +{ + int32_t rext = 0, reordered = 0; + + /* + * If the SACK hole is past snd_fack, this is from new SACK + * information, so we can ignore it. + */ + if (SEQ_GT(s->end, snd_fack)) + return; + /* + * If there has been a retransmit timeout, then the timestamp on + * the SACK segment will be newer. This might lead to a + * false-positive. Avoid re-ordering detection in this case. + */ + if (tp->t_rxtshift > 0) + return; + + /* + * Detect reordering from SACK information by checking + * if recently sacked data was never retransmitted from this hole. + */ + if (SEQ_LT(s->rxmit, sacked_seq)) { + reordered = 1; + tcpstat.tcps_avoid_rxmt++; + } + + if (reordered) { + if (!(tp->t_flagsext & TF_PKTS_REORDERED)) { + tp->t_flagsext |= TF_PKTS_REORDERED; + tcpstat.tcps_detect_reordering++; + } + + tcpstat.tcps_reordered_pkts++; + + VERIFY(SEQ_GEQ(snd_fack, s->rxmit)); + + if (s->rxmit_start > 0) { + rext = timer_diff(tcp_now, 0, s->rxmit_start, 0); + if (rext < 0) + return; + + /* + * We take the maximum reorder window to schedule + * DELAYFR timer as that will take care of jitter + * on the network path. + * + * Computing average and standard deviation seems + * to cause unnecessary retransmissions when there + * is high jitter. + * + * We set a maximum of SRTT/2 and a minimum of + * 10 ms on the reorder window. + */ + tp->t_reorderwin = max(tp->t_reorderwin, rext); + tp->t_reorderwin = min(tp->t_reorderwin, + (tp->t_srtt >> (TCP_RTT_SHIFT - 1))); + tp->t_reorderwin = max(tp->t_reorderwin, 10); + } + } +} /* * Process cumulative ACK and the TCP SACK option to update the scoreboard. @@ -309,11 +389,13 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) * the sequence space). */ void -tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) +tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, struct tcphdr *th, + u_int32_t *newbytes_acked) { struct sackhole *cur, *temp; struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; int i, j, num_sack_blks; + tcp_seq old_snd_fack = 0, th_ack = th->th_ack; num_sack_blks = 0; /* @@ -326,14 +408,18 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) } /* * Append received valid SACK blocks to sack_blocks[]. + * Check that the SACK block range is valid. */ for (i = 0; i < to->to_nsacks; i++) { - bcopy((to->to_sacks + i * TCPOLEN_SACK), &sack, sizeof(sack)); + bcopy((to->to_sacks + i * TCPOLEN_SACK), + &sack, sizeof(sack)); sack.start = ntohl(sack.start); sack.end = ntohl(sack.end); if (SEQ_GT(sack.end, sack.start) && SEQ_GT(sack.start, tp->snd_una) && SEQ_GT(sack.start, th_ack) && + SEQ_LT(sack.start, tp->snd_max) && + SEQ_GT(sack.end, tp->snd_una) && SEQ_LEQ(sack.end, tp->snd_max)) sack_blocks[num_sack_blks++] = sack; } @@ -345,6 +431,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) if (num_sack_blks == 0) return; + VERIFY(num_sack_blks <= (TCP_MAX_SACK + 1)); /* * Sort the SACK blocks so we can update the scoreboard * with just one pass. The overhead of sorting upto 4+1 elements @@ -359,7 +446,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) } } } - if (TAILQ_EMPTY(&tp->snd_holes)) + if (TAILQ_EMPTY(&tp->snd_holes)) { /* * Empty scoreboard. Need to initialize snd_fack (it may be * uninitialized or have a bogus value). Scoreboard holes @@ -367,6 +454,10 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) * the logic that adds holes to the tail of the scoreboard). */ tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); + *newbytes_acked += (tp->snd_fack - tp->snd_una); + } + + old_snd_fack = tp->snd_fack; /* * In the while-loop below, incoming SACK blocks (sack_blocks[]) * and SACK holes (snd_holes) are traversed from their tails with @@ -390,6 +481,8 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); if (temp != NULL) { tp->snd_fack = sblkp->end; + *newbytes_acked += (sblkp->end - sblkp->start); + /* Go to the previous sack block. */ sblkp--; } else { @@ -405,12 +498,16 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) SEQ_LT(tp->snd_fack, sblkp->start)) sblkp--; if (sblkp >= sack_blocks && - SEQ_LT(tp->snd_fack, sblkp->end)) + SEQ_LT(tp->snd_fack, sblkp->end)) { + *newbytes_acked += (sblkp->end - tp->snd_fack); tp->snd_fack = sblkp->end; + } } - } else if (SEQ_LT(tp->snd_fack, sblkp->end)) + } else if (SEQ_LT(tp->snd_fack, sblkp->end)) { /* fack is advanced. */ + *newbytes_acked += (sblkp->end - tp->snd_fack); tp->snd_fack = sblkp->end; + } /* We must have at least one SACK hole in scoreboard */ cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole */ /* @@ -439,6 +536,10 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) /* Data acks at least the beginning of hole */ if (SEQ_GEQ(sblkp->end, cur->end)) { /* Acks entire hole, so delete hole */ + *newbytes_acked += (cur->end - cur->start); + + tcp_sack_detect_reordering(tp, cur, + cur->end, old_snd_fack); temp = cur; cur = TAILQ_PREV(cur, sackhole_head, scblink); tcp_sackhole_remove(tp, temp); @@ -449,6 +550,9 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) continue; } else { /* Move start of hole forward */ + *newbytes_acked += (sblkp->end - cur->start); + tcp_sack_detect_reordering(tp, cur, + sblkp->end, old_snd_fack); cur->start = sblkp->end; cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); } @@ -456,15 +560,21 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) /* Data acks at least the end of hole */ if (SEQ_GEQ(sblkp->end, cur->end)) { /* Move end of hole backward */ + *newbytes_acked += (cur->end - sblkp->start); + tcp_sack_detect_reordering(tp, cur, + cur->end, old_snd_fack); cur->end = sblkp->start; cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); } else { /* - * ACKs some data in middle of a hole; need to - * split current hole + * ACKs some data in the middle of a hole; + * need to split current hole */ + *newbytes_acked += (sblkp->end - sblkp->start); + tcp_sack_detect_reordering(tp, cur, + sblkp->end, old_snd_fack); temp = tcp_sackhole_insert(tp, sblkp->end, - cur->end, cur); + cur->end, cur); if (temp != NULL) { if (SEQ_GT(cur->rxmit, temp->rxmit)) { temp->rxmit = cur->rxmit; @@ -475,6 +585,13 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) cur->end = sblkp->start; cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); + /* + * Reset the rxmit_start to that of + * the current hole as that will + * help to compute the reorder + * window correctly + */ + temp->rxmit_start = cur->rxmit_start; } } } @@ -502,6 +619,8 @@ tcp_free_sackholes(struct tcpcb *tp) while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL) tcp_sackhole_remove(tp, q); tp->sackhint.sack_bytes_rexmit = 0; + tp->sackhint.nexthole = NULL; + tp->sack_newdata = 0; } @@ -525,7 +644,7 @@ tcp_sack_partialack(tp, th) tp->t_timer[TCPT_REXMT] = 0; tp->t_rtttime = 0; /* send one or 2 segments based on how much new data was acked */ - if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2) + if (((BYTES_ACKED(th, tp)) / tp->t_maxseg) > 2) num_segs = 2; tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + (tp->snd_nxt - tp->sack_newdata) + @@ -641,3 +760,28 @@ tcp_sack_adjust(struct tcpcb *tp) tp->snd_nxt = tp->snd_fack; return; } + +/* + * This function returns true if more than (tcprexmtthresh - 1) * SMSS + * bytes with sequence numbers greater than snd_una have been SACKed. + */ +boolean_t +tcp_sack_byte_islost(struct tcpcb *tp) +{ + u_int32_t unacked_bytes, sndhole_bytes = 0; + struct sackhole *sndhole; + if (!SACK_ENABLED(tp) || IN_FASTRECOVERY(tp) || + TAILQ_EMPTY(&tp->snd_holes) || + (tp->t_flagsext & TF_PKTS_REORDERED)) + return (FALSE); + + unacked_bytes = tp->snd_max - tp->snd_una; + + TAILQ_FOREACH(sndhole, &tp->snd_holes, scblink) { + sndhole_bytes += (sndhole->end - sndhole->start); + } + + VERIFY(unacked_bytes >= sndhole_bytes); + return ((unacked_bytes - sndhole_bytes) > + ((tcprexmtthresh - 1) * tp->t_maxseg)); +}