+/*
+ * If the previous packet was sent in retransmission timer, and it was
+ * not needed, then restore the congestion window to the state before that
+ * transmission.
+ *
+ * If the last packet was sent in tail loss probe timeout, check if that
+ * recovered the last packet. If so, that will indicate a real loss and
+ * the congestion window needs to be lowered.
+ */
+static void
+tcp_bad_rexmt_check(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
+{
+ if (tp->t_rxtshift > 0 &&
+ tcp_detect_bad_rexmt(tp, th, to, tp->t_rxtstart)) {
+ ++tcpstat.tcps_sndrexmitbad;
+ tcp_bad_rexmt_restore_state(tp, th);
+ tcp_ccdbg_trace(tp, th, TCP_CC_BAD_REXMT_RECOVERY);
+ } else if ((tp->t_flagsext & TF_SENT_TLPROBE)
+ && tp->t_tlphighrxt > 0
+ && SEQ_GEQ(th->th_ack, tp->t_tlphighrxt)
+ && !tcp_detect_bad_rexmt(tp, th, to, tp->t_tlpstart)) {
+ /*
+ * The tail loss probe recovered the last packet and
+ * we need to adjust the congestion window to take
+ * this loss into account.
+ */
+ ++tcpstat.tcps_tlp_recoverlastpkt;
+ if (!IN_FASTRECOVERY(tp)) {
+ tcp_reduce_congestion_window(tp);
+ EXIT_FASTRECOVERY(tp);
+ }
+ tcp_ccdbg_trace(tp, th, TCP_CC_TLP_RECOVER_LASTPACKET);
+ }
+
+ tp->t_flagsext &= ~(TF_SENT_TLPROBE);
+ tp->t_tlphighrxt = 0;
+ tp->t_tlpstart = 0;
+
+ /*
+ * check if the latest ack was for a segment sent during PMTU
+ * blackhole detection. If the timestamp on the ack is before
+ * PMTU blackhole detection, then revert the size of the max
+ * segment to previous size.
+ */
+ if (tp->t_rxtshift > 0 && (tp->t_flags & TF_BLACKHOLE) &&
+ tp->t_pmtud_start_ts > 0 && TSTMP_SUPPORTED(tp)) {
+ if ((to->to_flags & TOF_TS) && to->to_tsecr != 0
+ && TSTMP_LT(to->to_tsecr, tp->t_pmtud_start_ts)) {
+ tcp_pmtud_revert_segment_size(tp);
+ }
+ }
+ if (tp->t_pmtud_start_ts > 0)
+ tp->t_pmtud_start_ts = 0;
+}
+
+/*
+ * Check if early retransmit can be attempted according to RFC 5827.
+ *
+ * If packet reordering is detected on a connection, fast recovery will
+ * be delayed until it is clear that the packet was lost and not reordered.
+ * But reordering detection is done only when SACK is enabled.
+ *
+ * On connections that do not support SACK, there is a limit on the number
+ * of early retransmits that can be done per minute. This limit is needed
+ * to make sure that too many packets are not retransmitted when there is
+ * packet reordering.
+ */
+static void
+tcp_early_rexmt_check (struct tcpcb *tp, struct tcphdr *th)
+{
+ u_int32_t obytes, snd_off;
+ int32_t snd_len;
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ if (early_rexmt && (SACK_ENABLED(tp) ||
+ tp->t_early_rexmt_count < TCP_EARLY_REXMT_LIMIT) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) &&
+ (tp->t_dupacks == 1 ||
+ (SACK_ENABLED(tp) &&
+ !TAILQ_EMPTY(&tp->snd_holes)))) {
+ /*
+ * If there are only a few outstanding
+ * segments on the connection, we might need
+ * to lower the retransmit threshold. This
+ * will allow us to do Early Retransmit as
+ * described in RFC 5827.
+ */
+ if (SACK_ENABLED(tp) &&
+ !TAILQ_EMPTY(&tp->snd_holes)) {
+ obytes = (tp->snd_max - tp->snd_fack) +
+ tp->sackhint.sack_bytes_rexmit;
+ } else {
+ obytes = (tp->snd_max - tp->snd_una);
+ }
+
+ /*
+ * In order to lower retransmit threshold the
+ * following two conditions must be met.
+ * 1. the amount of outstanding data is less
+ * than 4*SMSS bytes
+ * 2. there is no unsent data ready for
+ * transmission or the advertised window
+ * will limit sending new segments.
+ */
+ snd_off = tp->snd_max - tp->snd_una;
+ snd_len = min(so->so_snd.sb_cc, tp->snd_wnd) - snd_off;
+ if (obytes < (tp->t_maxseg << 2) &&
+ snd_len <= 0) {
+ u_int32_t osegs;
+
+ osegs = obytes / tp->t_maxseg;
+ if ((osegs * tp->t_maxseg) < obytes)
+ osegs++;
+
+ /*
+ * Since the connection might have already
+ * received some dupacks, we add them to
+ * to the outstanding segments count to get
+ * the correct retransmit threshold.
+ *
+ * By checking for early retransmit after
+ * receiving some duplicate acks when SACK
+ * is supported, the connection will
+ * enter fast recovery even if multiple
+ * segments are lost in the same window.
+ */
+ osegs += tp->t_dupacks;
+ if (osegs < 4) {
+ tp->t_rexmtthresh =
+ ((osegs - 1) > 1) ? (osegs - 1) : 1;
+ tp->t_rexmtthresh =
+ min(tp->t_rexmtthresh, tcprexmtthresh);
+ tp->t_rexmtthresh =
+ max(tp->t_rexmtthresh, tp->t_dupacks);
+
+ if (tp->t_early_rexmt_count == 0)
+ tp->t_early_rexmt_win = tcp_now;
+
+ if (tp->t_flagsext & TF_SENT_TLPROBE) {
+ tcpstat.tcps_tlp_recovery++;
+ tcp_ccdbg_trace(tp, th,
+ TCP_CC_TLP_RECOVERY);
+ } else {
+ tcpstat.tcps_early_rexmt++;
+ tp->t_early_rexmt_count++;
+ tcp_ccdbg_trace(tp, th,
+ TCP_CC_EARLY_RETRANSMIT);
+ }
+ }
+ }
+ }
+
+ /*
+ * If we ever sent a TLP probe, the acknowledgement will trigger
+ * early retransmit because the value of snd_fack will be close
+ * to snd_max. This will take care of adjustments to the
+ * congestion window. So we can reset TF_SENT_PROBE flag.
+ */
+ tp->t_flagsext &= ~(TF_SENT_TLPROBE);
+ tp->t_tlphighrxt = 0;
+ tp->t_tlpstart = 0;
+}
+