bsd/netinet/tcp_sack.c

   1 /*
   2  * Copyright (c) 2004,2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  */
  61
  62 #define _IP_VHL
  63
  64
  65 #include <sys/param.h>
  66 #include <sys/systm.h>
  67 #include <sys/kernel.h>
  68 #include <sys/sysctl.h>
  69 #include <sys/mbuf.h>
  70 #include <sys/domain.h>
  71 #include <sys/protosw.h>
  72 #include <sys/socket.h>
  73 #include <sys/socketvar.h>
  74
  75 #include <kern/zalloc.h>
  76
  77 #include <net/route.h>
  78
  79 #include <netinet/in.h>
  80 #include <netinet/in_systm.h>
  81 #include <netinet/ip.h>
  82 #include <netinet/in_pcb.h>
  83 #include <netinet/ip_var.h>
  84 #if INET6
  85 #include <netinet6/in6_pcb.h>
  86 #include <netinet/ip6.h>
  87 #include <netinet6/ip6_var.h>
  88 #endif
  89 #include <netinet/tcp.h>
  90 //#define       TCPOUTFLAGS
  91 #include <netinet/tcp_fsm.h>
  92 #include <netinet/tcp_seq.h>
  93 #include <netinet/tcp_timer.h>
  94 #include <netinet/tcp_var.h>
  95 #include <netinet/tcpip.h>
  96 #if TCPDEBUG
  97 #include <netinet/tcp_debug.h>
  98 #endif
  99 #include <sys/kdebug.h>
 100
 101 #if IPSEC
 102 #include <netinet6/ipsec.h>
 103 #endif /*IPSEC*/
 104
 105 int     tcp_do_sack = 1;
 106 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_sack, 0,
 107         "Enable/Disable TCP SACK support");
 108 static int tcp_sack_maxholes = 128;
 109 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_maxholes, CTLFLAG_RW | CTLFLAG_LOCKED,
 110         &tcp_sack_maxholes, 0,
 111     "Maximum number of TCP SACK holes allowed per connection");
 112
 113 static int tcp_sack_globalmaxholes = 65536;
 114 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalmaxholes, CTLFLAG_RW | CTLFLAG_LOCKED,
 115         &tcp_sack_globalmaxholes, 0,
 116     "Global maximum number of TCP SACK holes");
 117
 118 static int tcp_sack_globalholes = 0;
 119 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD | CTLFLAG_LOCKED,
 120     &tcp_sack_globalholes, 0,
 121     "Global number of TCP SACK holes currently allocated");
 122
 123 extern struct zone *sack_hole_zone;
 124
 125 /*
 126  * This function is called upon receipt of new valid data (while not in header
 127  * prediction mode), and it updates the ordered list of sacks.
 128  */
 129 void
 130 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
 131 {
 132         /*
 133          * First reported block MUST be the most recent one.  Subsequent
 134          * blocks SHOULD be in the order in which they arrived at the
 135          * receiver.  These two conditions make the implementation fully
 136          * compliant with RFC 2018.
 137          */
 138         struct sackblk head_blk, saved_blks[MAX_SACK_BLKS];
 139         int num_head, num_saved, i;
 140
 141         /* SACK block for the received segment. */
 142         head_blk.start = rcv_start;
 143         head_blk.end = rcv_end;
 144
 145         /*
 146          * Merge updated SACK blocks into head_blk, and
 147          * save unchanged SACK blocks into saved_blks[].
 148          * num_saved will have the number of the saved SACK blocks.
 149          */
 150         num_saved = 0;
 151         for (i = 0; i < tp->rcv_numsacks; i++) {
 152                 tcp_seq start = tp->sackblks[i].start;
 153                 tcp_seq end = tp->sackblks[i].end;
 154                 if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) {
 155                         /*
 156                          * Discard this SACK block.
 157                          */
 158                 } else if (SEQ_LEQ(head_blk.start, end) &&
 159                            SEQ_GEQ(head_blk.end, start)) {
 160                         /*
 161                          * Merge this SACK block into head_blk.
 162                          * This SACK block itself will be discarded.
 163                          */
 164                         if (SEQ_GT(head_blk.start, start))
 165                                 head_blk.start = start;
 166                         if (SEQ_LT(head_blk.end, end))
 167                                 head_blk.end = end;
 168                 } else {
 169                         /*
 170                          * Save this SACK block.
 171                          */
 172                         saved_blks[num_saved].start = start;
 173                         saved_blks[num_saved].end = end;
 174                         num_saved++;
 175                 }
 176         }
 177
 178         /*
 179          * Update SACK list in tp->sackblks[].
 180          */
 181         num_head = 0;
 182         if (SEQ_GT(head_blk.start, tp->rcv_nxt)) {
 183                 /*
 184                  * The received data segment is an out-of-order segment.
 185                  * Put head_blk at the top of SACK list.
 186                  */
 187                 tp->sackblks[0] = head_blk;
 188                 num_head = 1;
 189                 /*
 190                  * If the number of saved SACK blocks exceeds its limit,
 191                  * discard the last SACK block.
 192                  */
 193                 if (num_saved >= MAX_SACK_BLKS)
 194                         num_saved--;
 195         }
 196         if (num_saved > 0) {
 197                 /*
 198                  * Copy the saved SACK blocks back.
 199                  */
 200                 bcopy(saved_blks, &tp->sackblks[num_head],
 201                       sizeof(struct sackblk) * num_saved);
 202         }
 203
 204         /* Save the number of SACK blocks. */
 205         tp->rcv_numsacks = num_head + num_saved;
 206
 207         /* If we are requesting SACK recovery, reset the stretch-ack state
 208          * so that connection will generate more acks after recovery and
 209          * sender's cwnd will open.
 210          */
 211         if ((tp->t_flags & TF_STRETCHACK) != 0 && tp->rcv_numsacks > 0)
 212                 tcp_reset_stretch_ack(tp);
 213
 214 #if TRAFFIC_MGT
 215         if (tp->acc_iaj > 0 && tp->rcv_numsacks > 0)
 216                 reset_acc_iaj(tp);
 217 #endif /* TRAFFIC_MGT */
 218 }
 219
 220 /*
 221  * Delete all receiver-side SACK information.
 222  */
 223 void
 224 tcp_clean_sackreport( struct tcpcb *tp)
 225 {
 226
 227         tp->rcv_numsacks = 0;
 228         bzero(&tp->sackblks[0], sizeof (struct sackblk) * MAX_SACK_BLKS);
 229 }
 230
 231 /*
 232  * Allocate struct sackhole.
 233  */
 234 static struct sackhole *
 235 tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end)
 236 {
 237         struct sackhole *hole;
 238
 239         if (tp->snd_numholes >= tcp_sack_maxholes ||
 240             tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
 241                 tcpstat.tcps_sack_sboverflow++;
 242                 return NULL;
 243         }
 244
 245         hole = (struct sackhole *)zalloc_noblock(sack_hole_zone);
 246         if (hole == NULL)
 247                 return NULL;
 248
 249         hole->start = start;
 250         hole->end = end;
 251         hole->rxmit = start;
 252
 253         tp->snd_numholes++;
 254         tcp_sack_globalholes++;
 255
 256         return hole;
 257 }
 258
 259 /*
 260  * Free struct sackhole.
 261  */
 262 static void
 263 tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole)
 264 {
 265         zfree(sack_hole_zone, hole);
 266
 267         tp->snd_numholes--;
 268         tcp_sack_globalholes--;
 269 }
 270
 271 /*
 272  * Insert new SACK hole into scoreboard.
 273  */
 274 static struct sackhole *
 275 tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end,
 276                     struct sackhole *after)
 277 {
 278         struct sackhole *hole;
 279
 280         /* Allocate a new SACK hole. */
 281         hole = tcp_sackhole_alloc(tp, start, end);
 282         if (hole == NULL)
 283                 return NULL;
 284
 285         /* Insert the new SACK hole into scoreboard */
 286         if (after != NULL)
 287                 TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink);
 288         else
 289                 TAILQ_INSERT_TAIL(&tp->snd_holes, hole, scblink);
 290
 291         /* Update SACK hint. */
 292         if (tp->sackhint.nexthole == NULL)
 293                 tp->sackhint.nexthole = hole;
 294
 295         return hole;
 296 }
 297
 298 /*
 299  * Remove SACK hole from scoreboard.
 300  */
 301 static void
 302 tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
 303 {
 304         /* Update SACK hint. */
 305         if (tp->sackhint.nexthole == hole)
 306                 tp->sackhint.nexthole = TAILQ_NEXT(hole, scblink);
 307
 308         /* Remove this SACK hole. */
 309         TAILQ_REMOVE(&tp->snd_holes, hole, scblink);
 310
 311         /* Free this SACK hole. */
 312         tcp_sackhole_free(tp, hole);
 313 }
 314
 315 /*
 316  * Process cumulative ACK and the TCP SACK option to update the scoreboard.
 317  * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
 318  * the sequence space).
 319  */
 320 void
 321 tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
 322 {
 323         struct sackhole *cur, *temp;
 324         struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
 325         int i, j, num_sack_blks;
 326
 327         num_sack_blks = 0;
 328         /*
 329          * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
 330          * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
 331          */
 332         if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
 333                 sack_blocks[num_sack_blks].start = tp->snd_una;
 334                 sack_blocks[num_sack_blks++].end = th_ack;
 335         }
 336         /*
 337          * Append received valid SACK blocks to sack_blocks[].
 338          * Check that the SACK block range is valid.
 339          */
 340                 for (i = 0; i < to->to_nsacks; i++) {
 341                         bcopy((to->to_sacks + i * TCPOLEN_SACK),
 342                             &sack, sizeof(sack));
 343                         sack.start = ntohl(sack.start);
 344                         sack.end = ntohl(sack.end);
 345                         if (SEQ_GT(sack.end, sack.start) &&
 346                             SEQ_GT(sack.start, tp->snd_una) &&
 347                             SEQ_GT(sack.start, th_ack) &&
 348                             SEQ_LT(sack.start, tp->snd_max) &&
 349                             SEQ_GT(sack.end, tp->snd_una) &&
 350                             SEQ_LEQ(sack.end, tp->snd_max))
 351                                 sack_blocks[num_sack_blks++] = sack;
 352         }
 353
 354         /*
 355          * Return if SND.UNA is not advanced and no valid SACK block
 356          * is received.
 357          */
 358         if (num_sack_blks == 0)
 359                 return;
 360
 361         /*
 362          * Sort the SACK blocks so we can update the scoreboard
 363          * with just one pass. The overhead of sorting upto 4+1 elements
 364          * is less than making upto 4+1 passes over the scoreboard.
 365          */
 366         for (i = 0; i < num_sack_blks; i++) {
 367                 for (j = i + 1; j < num_sack_blks; j++) {
 368                         if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) {
 369                                 sack = sack_blocks[i];
 370                                 sack_blocks[i] = sack_blocks[j];
 371                                 sack_blocks[j] = sack;
 372                         }
 373                 }
 374         }
 375         if (TAILQ_EMPTY(&tp->snd_holes))
 376                 /*
 377                  * Empty scoreboard. Need to initialize snd_fack (it may be
 378                  * uninitialized or have a bogus value). Scoreboard holes
 379                  * (from the sack blocks received) are created later below (in
 380                  * the logic that adds holes to the tail of the scoreboard).
 381                  */
 382                 tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
 383         /*
 384          * In the while-loop below, incoming SACK blocks (sack_blocks[])
 385          * and SACK holes (snd_holes) are traversed from their tails with
 386          * just one pass in order to reduce the number of compares especially
 387          * when the bandwidth-delay product is large.
 388          * Note: Typically, in the first RTT of SACK recovery, the highest
 389          * three or four SACK blocks with the same ack number are received.
 390          * In the second RTT, if retransmitted data segments are not lost,
 391          * the highest three or four SACK blocks with ack number advancing
 392          * are received.
 393          */
 394         sblkp = &sack_blocks[num_sack_blks - 1];        /* Last SACK block */
 395         if (SEQ_LT(tp->snd_fack, sblkp->start)) {
 396                 /*
 397                  * The highest SACK block is beyond fack.
 398                  * Append new SACK hole at the tail.
 399                  * If the second or later highest SACK blocks are also
 400                  * beyond the current fack, they will be inserted by
 401                  * way of hole splitting in the while-loop below.
 402                  */
 403                 temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
 404                 if (temp != NULL) {
 405                         tp->snd_fack = sblkp->end;
 406                         /* Go to the previous sack block. */
 407                         sblkp--;
 408                 } else {
 409                         /*
 410                          * We failed to add a new hole based on the current
 411                          * sack block.  Skip over all the sack blocks that
 412                          * fall completely to the right of snd_fack and proceed
 413                          * to trim the scoreboard based on the remaining sack
 414                          * blocks. This also trims the scoreboard for th_ack
 415                          * (which is sack_blocks[0]).
 416                          */
 417                         while (sblkp >= sack_blocks &&
 418                                SEQ_LT(tp->snd_fack, sblkp->start))
 419                                 sblkp--;
 420                         if (sblkp >= sack_blocks &&
 421                             SEQ_LT(tp->snd_fack, sblkp->end))
 422                                 tp->snd_fack = sblkp->end;
 423                 }
 424         } else if (SEQ_LT(tp->snd_fack, sblkp->end))
 425                 /* fack is advanced. */
 426                 tp->snd_fack = sblkp->end;
 427         /* We must have at least one SACK hole in scoreboard */
 428         cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole */
 429         /*
 430          * Since the incoming sack blocks are sorted, we can process them
 431          * making one sweep of the scoreboard.
 432          */
 433         while (sblkp >= sack_blocks  && cur != NULL) {
 434                 if (SEQ_GEQ(sblkp->start, cur->end)) {
 435                         /*
 436                          * SACKs data beyond the current hole.
 437                          * Go to the previous sack block.
 438                          */
 439                         sblkp--;
 440                         continue;
 441                 }
 442                 if (SEQ_LEQ(sblkp->end, cur->start)) {
 443                         /*
 444                          * SACKs data before the current hole.
 445                          * Go to the previous hole.
 446                          */
 447                         cur = TAILQ_PREV(cur, sackhole_head, scblink);
 448                         continue;
 449                 }
 450                 tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
 451                 if (SEQ_LEQ(sblkp->start, cur->start)) {
 452                         /* Data acks at least the beginning of hole */
 453                         if (SEQ_GEQ(sblkp->end, cur->end)) {
 454                                 /* Acks entire hole, so delete hole */
 455                                 temp = cur;
 456                                 cur = TAILQ_PREV(cur, sackhole_head, scblink);
 457                                 tcp_sackhole_remove(tp, temp);
 458                                 /*
 459                                  * The sack block may ack all or part of the next
 460                                  * hole too, so continue onto the next hole.
 461                                  */
 462                                 continue;
 463                         } else {
 464                                 /* Move start of hole forward */
 465                                 cur->start = sblkp->end;
 466                                 cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
 467                         }
 468                 } else {
 469                         /* Data acks at least the end of hole */
 470                         if (SEQ_GEQ(sblkp->end, cur->end)) {
 471                                 /* Move end of hole backward */
 472                                 cur->end = sblkp->start;
 473                                 cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
 474                         } else {
 475                                 /*
 476                                  * ACKs some data in middle of a hole; need to
 477                                  * split current hole
 478                                  */
 479                                 temp = tcp_sackhole_insert(tp, sblkp->end,
 480                                                            cur->end, cur);
 481                                 if (temp != NULL) {
 482                                         if (SEQ_GT(cur->rxmit, temp->rxmit)) {
 483                                                 temp->rxmit = cur->rxmit;
 484                                                 tp->sackhint.sack_bytes_rexmit
 485                                                         += (temp->rxmit
 486                                                             - temp->start);
 487                                         }
 488                                         cur->end = sblkp->start;
 489                                         cur->rxmit = SEQ_MIN(cur->rxmit,
 490                                                              cur->end);
 491                                 }
 492                         }
 493                 }
 494                 tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start);
 495                 /*
 496                  * Testing sblkp->start against cur->start tells us whether
 497                  * we're done with the sack block or the sack hole.
 498                  * Accordingly, we advance one or the other.
 499                  */
 500                 if (SEQ_LEQ(sblkp->start, cur->start))
 501                         cur = TAILQ_PREV(cur, sackhole_head, scblink);
 502                 else
 503                         sblkp--;
 504         }
 505 }
 506
 507 /*
 508  * Free all SACK holes to clear the scoreboard.
 509  */
 510 void
 511 tcp_free_sackholes(struct tcpcb *tp)
 512 {
 513         struct sackhole *q;
 514
 515         while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL)
 516                 tcp_sackhole_remove(tp, q);
 517         tp->sackhint.sack_bytes_rexmit = 0;
 518         tp->sackhint.nexthole = NULL;
 519         tp->sack_newdata = 0;
 520
 521 }
 522
 523 /*
 524  * Partial ack handling within a sack recovery episode.
 525  * Keeping this very simple for now. When a partial ack
 526  * is received, force snd_cwnd to a value that will allow
 527  * the sender to transmit no more than 2 segments.
 528  * If necessary, a better scheme can be adopted at a
 529  * later point, but for now, the goal is to prevent the
 530  * sender from bursting a large amount of data in the midst
 531  * of sack recovery.
 532  */
 533 void
 534 tcp_sack_partialack(tp, th)
 535         struct tcpcb *tp;
 536         struct tcphdr *th;
 537 {
 538         int num_segs = 1;
 539
 540         tp->t_timer[TCPT_REXMT] = 0;
 541         tp->t_rtttime = 0;
 542         /* send one or 2 segments based on how much new data was acked */
 543         if (((th->th_ack - tp->snd_una) / tp->t_maxseg) > 2)
 544                 num_segs = 2;
 545         tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
 546                 (tp->snd_nxt - tp->sack_newdata) +
 547                 num_segs * tp->t_maxseg);
 548         if (tp->snd_cwnd > tp->snd_ssthresh)
 549                 tp->snd_cwnd = tp->snd_ssthresh;
 550         tp->t_flags |= TF_ACKNOW;
 551         (void) tcp_output(tp);
 552 }
 553
 554 /*
 555  * Debug version of tcp_sack_output() that walks the scoreboard. Used for
 556  * now to sanity check the hint.
 557  */
 558 static struct sackhole *
 559 tcp_sack_output_debug(struct tcpcb *tp, int *sack_bytes_rexmt)
 560 {
 561         struct sackhole *p;
 562
 563         *sack_bytes_rexmt = 0;
 564         TAILQ_FOREACH(p, &tp->snd_holes, scblink) {
 565                 if (SEQ_LT(p->rxmit, p->end)) {
 566                         if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */
 567                                 continue;
 568                         }
 569                         *sack_bytes_rexmt += (p->rxmit - p->start);
 570                         break;
 571                 }
 572                 *sack_bytes_rexmt += (p->rxmit - p->start);
 573         }
 574         return (p);
 575 }
 576
 577 /*
 578  * Returns the next hole to retransmit and the number of retransmitted bytes
 579  * from the scoreboard. We store both the next hole and the number of
 580  * retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK
 581  * reception). This avoids scoreboard traversals completely.
 582  *
 583  * The loop here will traverse *at most* one link. Here's the argument.
 584  * For the loop to traverse more than 1 link before finding the next hole to
 585  * retransmit, we would need to have at least 1 node following the current hint
 586  * with (rxmit == end). But, for all holes following the current hint,
 587  * (start == rxmit), since we have not yet retransmitted from them. Therefore,
 588  * in order to traverse more 1 link in the loop below, we need to have at least
 589  * one node following the current hint with (start == rxmit == end).
 590  * But that can't happen, (start == end) means that all the data in that hole
 591  * has been sacked, in which case, the hole would have been removed from the
 592  * scoreboard.
 593  */
 594 struct sackhole *
 595 tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
 596 {
 597         struct sackhole *hole = NULL, *dbg_hole = NULL;
 598         int dbg_bytes_rexmt;
 599
 600         dbg_hole = tcp_sack_output_debug(tp, &dbg_bytes_rexmt);
 601         *sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit;
 602         hole = tp->sackhint.nexthole;
 603         if (hole == NULL || SEQ_LT(hole->rxmit, hole->end))
 604                 goto out;
 605         while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) {
 606                 if (SEQ_LT(hole->rxmit, hole->end)) {
 607                         tp->sackhint.nexthole = hole;
 608                         break;
 609                 }
 610         }
 611 out:
 612         if (dbg_hole != hole) {
 613                 printf("%s: Computed sack hole not the same as cached value\n", __func__);
 614                 hole = dbg_hole;
 615         }
 616         if (*sack_bytes_rexmt != dbg_bytes_rexmt) {
 617                 printf("%s: Computed sack_bytes_retransmitted (%d) not "
 618                        "the same as cached value (%d)\n",
 619                        __func__, dbg_bytes_rexmt, *sack_bytes_rexmt);
 620                 *sack_bytes_rexmt = dbg_bytes_rexmt;
 621         }
 622         return (hole);
 623 }
 624
 625 /*
 626  * After a timeout, the SACK list may be rebuilt.  This SACK information
 627  * should be used to avoid retransmitting SACKed data.  This function
 628  * traverses the SACK list to see if snd_nxt should be moved forward.
 629  */
 630 void
 631 tcp_sack_adjust(struct tcpcb *tp)
 632 {
 633         struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes);
 634
 635         if (cur == NULL)
 636                 return; /* No holes */
 637         if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack))
 638                 return; /* We're already beyond any SACKed blocks */
 639         /*
 640          * Two cases for which we want to advance snd_nxt:
 641          * i) snd_nxt lies between end of one hole and beginning of another
 642          * ii) snd_nxt lies between end of last hole and snd_fack
 643          */
 644         while ((p = TAILQ_NEXT(cur, scblink)) != NULL) {
 645                 if (SEQ_LT(tp->snd_nxt, cur->end))
 646                         return;
 647                 if (SEQ_GEQ(tp->snd_nxt, p->start))
 648                         cur = p;
 649                 else {
 650                         tp->snd_nxt = p->start;
 651                         return;
 652                 }
 653         }
 654         if (SEQ_LT(tp->snd_nxt, cur->end))
 655                 return;
 656         tp->snd_nxt = tp->snd_fack;
 657         return;
 658 }