bsd/netinet/tcp_sack.c

   1 /*
   2  * Copyright (c) 2004-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  */
  61
  62 #define _IP_VHL
  63
  64
  65 #include <sys/param.h>
  66 #include <sys/systm.h>
  67 #include <sys/kernel.h>
  68 #include <sys/sysctl.h>
  69 #include <sys/mbuf.h>
  70 #include <sys/domain.h>
  71 #include <sys/protosw.h>
  72 #include <sys/socket.h>
  73 #include <sys/socketvar.h>
  74
  75 #include <kern/zalloc.h>
  76
  77 #include <net/route.h>
  78
  79 #include <netinet/in.h>
  80 #include <netinet/in_systm.h>
  81 #include <netinet/ip.h>
  82 #include <netinet/in_pcb.h>
  83 #include <netinet/ip_var.h>
  84 #if INET6
  85 #include <netinet6/in6_pcb.h>
  86 #include <netinet/ip6.h>
  87 #include <netinet6/ip6_var.h>
  88 #endif
  89 #include <netinet/tcp.h>
  90 //#define       TCPOUTFLAGS
  91 #include <netinet/tcp_fsm.h>
  92 #include <netinet/tcp_seq.h>
  93 #include <netinet/tcp_timer.h>
  94 #include <netinet/tcp_var.h>
  95 #include <netinet/tcpip.h>
  96 #if TCPDEBUG
  97 #include <netinet/tcp_debug.h>
  98 #endif
  99 #include <sys/kdebug.h>
 100
 101 #if IPSEC
 102 #include <netinet6/ipsec.h>
 103 #endif /*IPSEC*/
 104
 105 int     tcp_do_sack = 1;
 106 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack, CTLFLAG_RW | CTLFLAG_LOCKED, &tcp_do_sack, 0,
 107         "Enable/Disable TCP SACK support");
 108 static int tcp_sack_maxholes = 128;
 109 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_maxholes, CTLFLAG_RW | CTLFLAG_LOCKED,
 110         &tcp_sack_maxholes, 0,
 111     "Maximum number of TCP SACK holes allowed per connection");
 112
 113 static int tcp_sack_globalmaxholes = 65536;
 114 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalmaxholes, CTLFLAG_RW | CTLFLAG_LOCKED,
 115         &tcp_sack_globalmaxholes, 0,
 116     "Global maximum number of TCP SACK holes");
 117
 118 static int tcp_sack_globalholes = 0;
 119 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sack_globalholes, CTLFLAG_RD | CTLFLAG_LOCKED,
 120     &tcp_sack_globalholes, 0,
 121     "Global number of TCP SACK holes currently allocated");
 122
 123 extern struct zone *sack_hole_zone;
 124
 125 /*
 126  * This function is called upon receipt of new valid data (while not in header
 127  * prediction mode), and it updates the ordered list of sacks.
 128  */
 129 void
 130 tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
 131 {
 132         /*
 133          * First reported block MUST be the most recent one.  Subsequent
 134          * blocks SHOULD be in the order in which they arrived at the
 135          * receiver.  These two conditions make the implementation fully
 136          * compliant with RFC 2018.
 137          */
 138         struct sackblk head_blk, saved_blks[MAX_SACK_BLKS];
 139         int num_head, num_saved, i;
 140
 141         /* SACK block for the received segment. */
 142         head_blk.start = rcv_start;
 143         head_blk.end = rcv_end;
 144
 145         /*
 146          * Merge updated SACK blocks into head_blk, and
 147          * save unchanged SACK blocks into saved_blks[].
 148          * num_saved will have the number of the saved SACK blocks.
 149          */
 150         num_saved = 0;
 151         for (i = 0; i < tp->rcv_numsacks; i++) {
 152                 tcp_seq start = tp->sackblks[i].start;
 153                 tcp_seq end = tp->sackblks[i].end;
 154                 if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) {
 155                         /*
 156                          * Discard this SACK block.
 157                          */
 158                 } else if (SEQ_LEQ(head_blk.start, end) &&
 159                            SEQ_GEQ(head_blk.end, start)) {
 160                         /*
 161                          * Merge this SACK block into head_blk.
 162                          * This SACK block itself will be discarded.
 163                          */
 164                         if (SEQ_GT(head_blk.start, start))
 165                                 head_blk.start = start;
 166                         if (SEQ_LT(head_blk.end, end))
 167                                 head_blk.end = end;
 168                 } else {
 169                         /*
 170                          * Save this SACK block.
 171                          */
 172                         saved_blks[num_saved].start = start;
 173                         saved_blks[num_saved].end = end;
 174                         num_saved++;
 175                 }
 176         }
 177
 178         /*
 179          * Update SACK list in tp->sackblks[].
 180          */
 181         num_head = 0;
 182         if (SEQ_GT(head_blk.start, tp->rcv_nxt)) {
 183                 /*
 184                  * The received data segment is an out-of-order segment.
 185                  * Put head_blk at the top of SACK list.
 186                  */
 187                 tp->sackblks[0] = head_blk;
 188                 num_head = 1;
 189                 /*
 190                  * If the number of saved SACK blocks exceeds its limit,
 191                  * discard the last SACK block.
 192                  */
 193                 if (num_saved >= MAX_SACK_BLKS)
 194                         num_saved--;
 195         }
 196         if (num_saved > 0) {
 197                 /*
 198                  * Copy the saved SACK blocks back.
 199                  */
 200                 bcopy(saved_blks, &tp->sackblks[num_head],
 201                       sizeof(struct sackblk) * num_saved);
 202         }
 203
 204         /* Save the number of SACK blocks. */
 205         tp->rcv_numsacks = num_head + num_saved;
 206
 207         /* If we are requesting SACK recovery, reset the stretch-ack state
 208          * so that connection will generate more acks after recovery and
 209          * sender's cwnd will open.
 210          */
 211         if ((tp->t_flags & TF_STRETCHACK) != 0 && tp->rcv_numsacks > 0)
 212                 tcp_reset_stretch_ack(tp);
 213
 214 #if TRAFFIC_MGT
 215         if (tp->acc_iaj > 0 && tp->rcv_numsacks > 0)
 216                 reset_acc_iaj(tp);
 217 #endif /* TRAFFIC_MGT */
 218 }
 219
 220 /*
 221  * Delete all receiver-side SACK information.
 222  */
 223 void
 224 tcp_clean_sackreport( struct tcpcb *tp)
 225 {
 226
 227         tp->rcv_numsacks = 0;
 228         bzero(&tp->sackblks[0], sizeof (struct sackblk) * MAX_SACK_BLKS);
 229 }
 230
 231 /*
 232  * Allocate struct sackhole.
 233  */
 234 static struct sackhole *
 235 tcp_sackhole_alloc(struct tcpcb *tp, tcp_seq start, tcp_seq end)
 236 {
 237         struct sackhole *hole;
 238
 239         if (tp->snd_numholes >= tcp_sack_maxholes ||
 240             tcp_sack_globalholes >= tcp_sack_globalmaxholes) {
 241                 tcpstat.tcps_sack_sboverflow++;
 242                 return NULL;
 243         }
 244
 245         hole = (struct sackhole *)zalloc_noblock(sack_hole_zone);
 246         if (hole == NULL)
 247                 return NULL;
 248
 249         hole->start = start;
 250         hole->end = end;
 251         hole->rxmit = start;
 252
 253         tp->snd_numholes++;
 254         tcp_sack_globalholes++;
 255
 256         return hole;
 257 }
 258
 259 /*
 260  * Free struct sackhole.
 261  */
 262 static void
 263 tcp_sackhole_free(struct tcpcb *tp, struct sackhole *hole)
 264 {
 265         zfree(sack_hole_zone, hole);
 266
 267         tp->snd_numholes--;
 268         tcp_sack_globalholes--;
 269 }
 270
 271 /*
 272  * Insert new SACK hole into scoreboard.
 273  */
 274 static struct sackhole *
 275 tcp_sackhole_insert(struct tcpcb *tp, tcp_seq start, tcp_seq end,
 276                     struct sackhole *after)
 277 {
 278         struct sackhole *hole;
 279
 280         /* Allocate a new SACK hole. */
 281         hole = tcp_sackhole_alloc(tp, start, end);
 282         if (hole == NULL)
 283                 return NULL;
 284
 285         /* Insert the new SACK hole into scoreboard */
 286         if (after != NULL)
 287                 TAILQ_INSERT_AFTER(&tp->snd_holes, after, hole, scblink);
 288         else
 289                 TAILQ_INSERT_TAIL(&tp->snd_holes, hole, scblink);
 290
 291         /* Update SACK hint. */
 292         if (tp->sackhint.nexthole == NULL)
 293                 tp->sackhint.nexthole = hole;
 294
 295         return hole;
 296 }
 297
 298 /*
 299  * Remove SACK hole from scoreboard.
 300  */
 301 static void
 302 tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole)
 303 {
 304         /* Update SACK hint. */
 305         if (tp->sackhint.nexthole == hole)
 306                 tp->sackhint.nexthole = TAILQ_NEXT(hole, scblink);
 307
 308         /* Remove this SACK hole. */
 309         TAILQ_REMOVE(&tp->snd_holes, hole, scblink);
 310
 311         /* Free this SACK hole. */
 312         tcp_sackhole_free(tp, hole);
 313 }
 314
 315 /*
 316  * Process cumulative ACK and the TCP SACK option to update the scoreboard.
 317  * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
 318  * the sequence space).
 319  */
 320 void
 321 tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack,
 322         u_int32_t *newbytes_acked)
 323 {
 324         struct sackhole *cur, *temp;
 325         struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
 326         int i, j, num_sack_blks;
 327
 328         num_sack_blks = 0;
 329         /*
 330          * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
 331          * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
 332          */
 333         if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
 334                 sack_blocks[num_sack_blks].start = tp->snd_una;
 335                 sack_blocks[num_sack_blks++].end = th_ack;
 336         }
 337         /*
 338          * Append received valid SACK blocks to sack_blocks[].
 339          * Check that the SACK block range is valid.
 340          */
 341         for (i = 0; i < to->to_nsacks; i++) {
 342                 bcopy((to->to_sacks + i * TCPOLEN_SACK),
 343                     &sack, sizeof(sack));
 344                 sack.start = ntohl(sack.start);
 345                 sack.end = ntohl(sack.end);
 346                 if (SEQ_GT(sack.end, sack.start) &&
 347                     SEQ_GT(sack.start, tp->snd_una) &&
 348                     SEQ_GT(sack.start, th_ack) &&
 349                     SEQ_LT(sack.start, tp->snd_max) &&
 350                     SEQ_GT(sack.end, tp->snd_una) &&
 351                     SEQ_LEQ(sack.end, tp->snd_max))
 352                         sack_blocks[num_sack_blks++] = sack;
 353         }
 354
 355         /*
 356          * Return if SND.UNA is not advanced and no valid SACK block
 357          * is received.
 358          */
 359         if (num_sack_blks == 0)
 360                 return;
 361
 362         /*
 363          * Sort the SACK blocks so we can update the scoreboard
 364          * with just one pass. The overhead of sorting upto 4+1 elements
 365          * is less than making upto 4+1 passes over the scoreboard.
 366          */
 367         for (i = 0; i < num_sack_blks; i++) {
 368                 for (j = i + 1; j < num_sack_blks; j++) {
 369                         if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) {
 370                                 sack = sack_blocks[i];
 371                                 sack_blocks[i] = sack_blocks[j];
 372                                 sack_blocks[j] = sack;
 373                         }
 374                 }
 375         }
 376         if (TAILQ_EMPTY(&tp->snd_holes)) {
 377                 /*
 378                  * Empty scoreboard. Need to initialize snd_fack (it may be
 379                  * uninitialized or have a bogus value). Scoreboard holes
 380                  * (from the sack blocks received) are created later below (in
 381                  * the logic that adds holes to the tail of the scoreboard).
 382                  */
 383                 tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
 384                 *newbytes_acked += (tp->snd_fack - tp->snd_una);
 385         }
 386
 387         /*
 388          * In the while-loop below, incoming SACK blocks (sack_blocks[])
 389          * and SACK holes (snd_holes) are traversed from their tails with
 390          * just one pass in order to reduce the number of compares especially
 391          * when the bandwidth-delay product is large.
 392          * Note: Typically, in the first RTT of SACK recovery, the highest
 393          * three or four SACK blocks with the same ack number are received.
 394          * In the second RTT, if retransmitted data segments are not lost,
 395          * the highest three or four SACK blocks with ack number advancing
 396          * are received.
 397          */
 398         sblkp = &sack_blocks[num_sack_blks - 1];        /* Last SACK block */
 399         if (SEQ_LT(tp->snd_fack, sblkp->start)) {
 400                 /*
 401                  * The highest SACK block is beyond fack.
 402                  * Append new SACK hole at the tail.
 403                  * If the second or later highest SACK blocks are also
 404                  * beyond the current fack, they will be inserted by
 405                  * way of hole splitting in the while-loop below.
 406                  */
 407                 temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
 408                 if (temp != NULL) {
 409                         tp->snd_fack = sblkp->end;
 410                         *newbytes_acked += (sblkp->end - sblkp->start);
 411
 412                         /* Go to the previous sack block. */
 413                         sblkp--;
 414                 } else {
 415                         /*
 416                          * We failed to add a new hole based on the current
 417                          * sack block.  Skip over all the sack blocks that
 418                          * fall completely to the right of snd_fack and proceed
 419                          * to trim the scoreboard based on the remaining sack
 420                          * blocks. This also trims the scoreboard for th_ack
 421                          * (which is sack_blocks[0]).
 422                          */
 423                         while (sblkp >= sack_blocks &&
 424                                SEQ_LT(tp->snd_fack, sblkp->start))
 425                                 sblkp--;
 426                         if (sblkp >= sack_blocks &&
 427                             SEQ_LT(tp->snd_fack, sblkp->end)) {
 428                                 *newbytes_acked += (sblkp->end - tp->snd_fack);
 429                                 tp->snd_fack = sblkp->end;
 430                         }
 431                 }
 432         } else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
 433                 /* fack is advanced. */
 434                 *newbytes_acked += (sblkp->end - tp->snd_fack);
 435                 tp->snd_fack = sblkp->end;
 436         }
 437         /* We must have at least one SACK hole in scoreboard */
 438         cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole */
 439         /*
 440          * Since the incoming sack blocks are sorted, we can process them
 441          * making one sweep of the scoreboard.
 442          */
 443         while (sblkp >= sack_blocks  && cur != NULL) {
 444                 if (SEQ_GEQ(sblkp->start, cur->end)) {
 445                         /*
 446                          * SACKs data beyond the current hole.
 447                          * Go to the previous sack block.
 448                          */
 449                         sblkp--;
 450                         continue;
 451                 }
 452                 if (SEQ_LEQ(sblkp->end, cur->start)) {
 453                         /*
 454                          * SACKs data before the current hole.
 455                          * Go to the previous hole.
 456                          */
 457                         cur = TAILQ_PREV(cur, sackhole_head, scblink);
 458                         continue;
 459                 }
 460                 tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
 461                 if (SEQ_LEQ(sblkp->start, cur->start)) {
 462                         /* Data acks at least the beginning of hole */
 463                         if (SEQ_GEQ(sblkp->end, cur->end)) {
 464                                 /* Acks entire hole, so delete hole */
 465                                 *newbytes_acked += (cur->end - cur->start);
 466                                 temp = cur;
 467                                 cur = TAILQ_PREV(cur, sackhole_head, scblink);
 468                                 tcp_sackhole_remove(tp, temp);
 469                                 /*
 470                                  * The sack block may ack all or part of the next
 471                                  * hole too, so continue onto the next hole.
 472                                  */
 473                                 continue;
 474                         } else {
 475                                 /* Move start of hole forward */
 476                                 *newbytes_acked += (sblkp->end - cur->start);
 477                                 cur->start = sblkp->end;
 478                                 cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
 479                         }
 480                 } else {
 481                         /* Data acks at least the end of hole */
 482                         if (SEQ_GEQ(sblkp->end, cur->end)) {
 483                                 /* Move end of hole backward */
 484                                 *newbytes_acked += (cur->end - sblkp->start);
 485                                 cur->end = sblkp->start;
 486                                 cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
 487                         } else {
 488                                 /*
 489                                  * ACKs some data in middle of a hole; need to
 490                                  * split current hole
 491                                  */
 492                                 *newbytes_acked += (sblkp->end - sblkp->start);
 493                                 temp = tcp_sackhole_insert(tp, sblkp->end,
 494                                                            cur->end, cur);
 495                                 if (temp != NULL) {
 496                                         if (SEQ_GT(cur->rxmit, temp->rxmit)) {
 497                                                 temp->rxmit = cur->rxmit;
 498                                                 tp->sackhint.sack_bytes_rexmit
 499                                                         += (temp->rxmit
 500                                                             - temp->start);
 501                                         }
 502                                         cur->end = sblkp->start;
 503                                         cur->rxmit = SEQ_MIN(cur->rxmit,
 504                                                              cur->end);
 505                                 }
 506                         }
 507                 }
 508                 tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start);
 509                 /*
 510                  * Testing sblkp->start against cur->start tells us whether
 511                  * we're done with the sack block or the sack hole.
 512                  * Accordingly, we advance one or the other.
 513                  */
 514                 if (SEQ_LEQ(sblkp->start, cur->start))
 515                         cur = TAILQ_PREV(cur, sackhole_head, scblink);
 516                 else
 517                         sblkp--;
 518         }
 519 }
 520
 521 /*
 522  * Free all SACK holes to clear the scoreboard.
 523  */
 524 void
 525 tcp_free_sackholes(struct tcpcb *tp)
 526 {
 527         struct sackhole *q;
 528
 529         while ((q = TAILQ_FIRST(&tp->snd_holes)) != NULL)
 530                 tcp_sackhole_remove(tp, q);
 531         tp->sackhint.sack_bytes_rexmit = 0;
 532         tp->sackhint.nexthole = NULL;
 533         tp->sack_newdata = 0;
 534
 535 }
 536
 537 /*
 538  * Partial ack handling within a sack recovery episode.
 539  * Keeping this very simple for now. When a partial ack
 540  * is received, force snd_cwnd to a value that will allow
 541  * the sender to transmit no more than 2 segments.
 542  * If necessary, a better scheme can be adopted at a
 543  * later point, but for now, the goal is to prevent the
 544  * sender from bursting a large amount of data in the midst
 545  * of sack recovery.
 546  */
 547 void
 548 tcp_sack_partialack(tp, th)
 549         struct tcpcb *tp;
 550         struct tcphdr *th;
 551 {
 552         int num_segs = 1;
 553
 554         tp->t_timer[TCPT_REXMT] = 0;
 555         tp->t_rtttime = 0;
 556         /* send one or 2 segments based on how much new data was acked */
 557         if (((BYTES_ACKED(th, tp)) / tp->t_maxseg) > 2)
 558                 num_segs = 2;
 559         tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
 560                 (tp->snd_nxt - tp->sack_newdata) +
 561                 num_segs * tp->t_maxseg);
 562         if (tp->snd_cwnd > tp->snd_ssthresh)
 563                 tp->snd_cwnd = tp->snd_ssthresh;
 564         tp->t_flags |= TF_ACKNOW;
 565         (void) tcp_output(tp);
 566 }
 567
 568 /*
 569  * Debug version of tcp_sack_output() that walks the scoreboard. Used for
 570  * now to sanity check the hint.
 571  */
 572 static struct sackhole *
 573 tcp_sack_output_debug(struct tcpcb *tp, int *sack_bytes_rexmt)
 574 {
 575         struct sackhole *p;
 576
 577         *sack_bytes_rexmt = 0;
 578         TAILQ_FOREACH(p, &tp->snd_holes, scblink) {
 579                 if (SEQ_LT(p->rxmit, p->end)) {
 580                         if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */
 581                                 continue;
 582                         }
 583                         *sack_bytes_rexmt += (p->rxmit - p->start);
 584                         break;
 585                 }
 586                 *sack_bytes_rexmt += (p->rxmit - p->start);
 587         }
 588         return (p);
 589 }
 590
 591 /*
 592  * Returns the next hole to retransmit and the number of retransmitted bytes
 593  * from the scoreboard. We store both the next hole and the number of
 594  * retransmitted bytes as hints (and recompute these on the fly upon SACK/ACK
 595  * reception). This avoids scoreboard traversals completely.
 596  *
 597  * The loop here will traverse *at most* one link. Here's the argument.
 598  * For the loop to traverse more than 1 link before finding the next hole to
 599  * retransmit, we would need to have at least 1 node following the current hint
 600  * with (rxmit == end). But, for all holes following the current hint,
 601  * (start == rxmit), since we have not yet retransmitted from them. Therefore,
 602  * in order to traverse more 1 link in the loop below, we need to have at least
 603  * one node following the current hint with (start == rxmit == end).
 604  * But that can't happen, (start == end) means that all the data in that hole
 605  * has been sacked, in which case, the hole would have been removed from the
 606  * scoreboard.
 607  */
 608 struct sackhole *
 609 tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt)
 610 {
 611         struct sackhole *hole = NULL, *dbg_hole = NULL;
 612         int dbg_bytes_rexmt;
 613
 614         dbg_hole = tcp_sack_output_debug(tp, &dbg_bytes_rexmt);
 615         *sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit;
 616         hole = tp->sackhint.nexthole;
 617         if (hole == NULL || SEQ_LT(hole->rxmit, hole->end))
 618                 goto out;
 619         while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) {
 620                 if (SEQ_LT(hole->rxmit, hole->end)) {
 621                         tp->sackhint.nexthole = hole;
 622                         break;
 623                 }
 624         }
 625 out:
 626         if (dbg_hole != hole) {
 627                 printf("%s: Computed sack hole not the same as cached value\n", __func__);
 628                 hole = dbg_hole;
 629         }
 630         if (*sack_bytes_rexmt != dbg_bytes_rexmt) {
 631                 printf("%s: Computed sack_bytes_retransmitted (%d) not "
 632                        "the same as cached value (%d)\n",
 633                        __func__, dbg_bytes_rexmt, *sack_bytes_rexmt);
 634                 *sack_bytes_rexmt = dbg_bytes_rexmt;
 635         }
 636         return (hole);
 637 }
 638
 639 /*
 640  * After a timeout, the SACK list may be rebuilt.  This SACK information
 641  * should be used to avoid retransmitting SACKed data.  This function
 642  * traverses the SACK list to see if snd_nxt should be moved forward.
 643  */
 644 void
 645 tcp_sack_adjust(struct tcpcb *tp)
 646 {
 647         struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes);
 648
 649         if (cur == NULL)
 650                 return; /* No holes */
 651         if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack))
 652                 return; /* We're already beyond any SACKed blocks */
 653         /*
 654          * Two cases for which we want to advance snd_nxt:
 655          * i) snd_nxt lies between end of one hole and beginning of another
 656          * ii) snd_nxt lies between end of last hole and snd_fack
 657          */
 658         while ((p = TAILQ_NEXT(cur, scblink)) != NULL) {
 659                 if (SEQ_LT(tp->snd_nxt, cur->end))
 660                         return;
 661                 if (SEQ_GEQ(tp->snd_nxt, p->start))
 662                         cur = p;
 663                 else {
 664                         tp->snd_nxt = p->start;
 665                         return;
 666                 }
 667         }
 668         if (SEQ_LT(tp->snd_nxt, cur->end))
 669                 return;
 670         tp->snd_nxt = tp->snd_fack;
 671         return;
 672 }