+/* RFC 3465 variables */
+ u_int32_t t_bytes_acked; /* ABC "bytes_acked" parameter */
+
+ int t_lastchain; /* amount of packets chained last time around */
+ u_int16_t t_unacksegs; /* received but unacked segments for delaying acks */
+ u_int8_t t_rexmtthresh; /* duplicate ack threshold for entering fast recovery */
+ u_int8_t t_rtimo_probes; /* number of adaptive rtimo probes sent */
+ u_int32_t t_persist_timeout; /* ZWP persistence limit as set by PERSIST_TIMEOUT */
+ u_int32_t t_persist_stop; /* persistence limit deadline if triggered by ZWP */
+ u_int32_t t_notsent_lowat; /* Low water for not sent data */
+
+/* Receiver state for stretch-ack algorithm */
+ u_int32_t rcv_unackwin; /* to measure win for stretching acks */
+ u_int32_t rcv_by_unackwin; /* bytes seen during the last ack-stretching win */
+ u_int32_t rcv_nostrack_ts; /* timestamp when stretch ack was disabled automatically */
+ u_int16_t rcv_waitforss; /* wait for packets during slow-start */
+ u_int16_t ecn_flags;
+#define TE_SETUPSENT 0x01 /* Indicate we have sent ECN-SETUP SYN or SYN-ACK */
+#define TE_SETUPRECEIVED 0x02 /* Indicate we have received ECN-SETUP SYN or SYN-ACK */
+#define TE_SENDIPECT 0x04 /* Indicate we haven't sent or received non-ECN-setup SYN or SYN-ACK */
+#define TE_SENDCWR 0x08 /* Indicate that the next non-retransmit should have the TCP CWR flag set */
+#define TE_SENDECE 0x10 /* Indicate that the next packet should have the TCP ECE flag set */
+#define TE_INRECOVERY 0x20 /* connection entered recovery after receiving ECE */
+#define TE_RECV_ECN_CE 0x40 /* Received IPTOS_ECN_CE marking atleast once */
+#define TE_RECV_ECN_ECE 0x80 /* Received ECE marking atleast once */
+#define TE_ECN_ON (TE_SETUPSENT | TE_SETUPRECEIVED) /* Indicate ECN was successfully negotiated on a connection) */
+
+/* state for bad retransmit recovery */
+ u_int32_t snd_cwnd_prev; /* cwnd prior to retransmit */
+ u_int32_t snd_ssthresh_prev; /* ssthresh prior to retransmit */
+ tcp_seq snd_recover_prev; /* snd_recover prior to retransmit */
+ int t_srtt_prev; /* srtt prior to retransmit */
+ int t_rttvar_prev; /* rttvar prior to retransmit */
+ u_int32_t t_badrexmt_time; /* bad rexmt detection time */
+
+/* Packet reordering metric */
+ u_int16_t t_reorderwin; /* Reordering late time offset */
+
+/* SACK related state */
+ int16_t snd_numholes; /* number of holes seen by sender */
+ tcp_seq sack_newdata; /* New data xmitted in this recovery
+ episode starts at this seq number */
+ TAILQ_HEAD(sackhole_head, sackhole) snd_holes;
+ /* SACK scoreboard (sorted) */
+ tcp_seq snd_fack; /* last seq number(+1) sack'd by rcv'r*/
+ int rcv_numsacks; /* # distinct sack blks present */
+ struct sackblk sackblks[MAX_SACK_BLKS]; /* seq nos. of sack blocks */
+ struct sackhint sackhint; /* SACK scoreboard hint */
+
+ struct mbuf *t_pktlist_head; /* First packet in transmit chain */
+ struct mbuf *t_pktlist_tail; /* Last packet in transmit chain */
+ u_int32_t t_pktlist_sentlen; /* total bytes in transmit chain */
+
+ u_int32_t t_keepidle; /* keepalive idle timer (override global if > 0) */
+ u_int32_t t_keepinit; /* connection timeout, i.e. idle time
+ in SYN_SENT or SYN_RECV state */
+ u_int32_t t_keepintvl; /* interval between keepalives */
+ u_int32_t t_keepcnt; /* number of keepalives before close */
+
+ u_int32_t tso_max_segment_size; /* TSO maximum segment unit for NIC */
+ u_int16_t t_pmtud_lastseg_size; /* size of the last sent segment */
+ u_int16_t t_pmtud_saved_maxopd; /* MSS saved before performing PMTU-D BlackHole detection */
+ u_int32_t t_pmtud_start_ts; /* Time of PMTUD blackhole detection */
+
+ struct
+ {
+ u_int32_t rxduplicatebytes;
+ u_int32_t rxoutoforderbytes;
+ u_int32_t txretransmitbytes;
+ u_int8_t synrxtshift;
+ u_int8_t unused;
+ u_int16_t unused_pad_to_8;
+ } t_stat;
+
+ /* Background congestion related state */
+ uint32_t rtt_hist[N_RTT_BASE]; /* history of minimum RTT */
+ uint32_t rtt_count; /* Number of RTT samples in recent base history */
+ uint32_t bg_ssthresh; /* Slow start threshold until delay increases */
+ uint32_t t_flagsext; /* Another field to accommodate more flags */
+#define TF_RXTFINDROP 0x1 /* Drop conn after retransmitting FIN 3 times */
+#define TF_RCVUNACK_WAITSS 0x2 /* set when the receiver should not stretch acks */
+#define TF_BWMEAS_INPROGRESS 0x4 /* Indicate BW meas is happening */
+#define TF_MEASURESNDBW 0x8 /* Measure send bw on this connection */
+#define TF_LRO_OFFLOADED 0x10 /* Connection LRO offloaded */
+#define TF_SACK_ENABLE 0x20 /* SACK is enabled */
+#define TF_RECOMPUTE_RTT 0x40 /* recompute RTT after spurious retransmit */
+#define TF_DETECT_READSTALL 0x80 /* Used to detect a stall during read operation */
+#define TF_RECV_THROTTLE 0x100 /* Input throttling active */
+#define TF_NOSTRETCHACK 0x200 /* ack every other packet */
+#define TF_STREAMEOW 0x400 /* Last packet was small indicating end of write */
+#define TF_NOTIMEWAIT 0x800 /* Avoid going into time-wait */
+#define TF_SENT_TLPROBE 0x1000 /* Sent data in PTO */
+#define TF_PKTS_REORDERED 0x2000 /* Detected reordering */
+#define TF_DELAY_RECOVERY 0x4000 /* delay fast recovery */
+#define TF_FORCE 0x8000 /* force 1 byte out */
+#define TF_DISABLE_STRETCHACK 0x10000 /* auto-disable stretch ack */
+#define TF_NOBLACKHOLE_DETECTION 0x20000 /* Disable PMTU blackhole detection */
+#define TF_DISABLE_DSACK 0x40000 /* Ignore DSACK due to n/w duplication */
+#define TF_RESCUE_RXT 0x80000 /* SACK rescue retransmit */
+#define TF_CWND_NONVALIDATED 0x100000 /* cwnd non validated */
+#define TF_PROBING 0x200000 /* Trigger probe timeout */
+#define TF_FASTOPEN 0x400000 /* TCP Fastopen is enabled */
+
+#if TRAFFIC_MGT
+ /* Inter-arrival jitter related state */
+ uint32_t iaj_rcv_ts; /* tcp clock when the first packet was received */
+ uint16_t iaj_size; /* Size of packet for iaj measurement */
+ uint8_t iaj_small_pkt; /* Count of packets smaller than iaj_size */
+ uint8_t t_pipeack_ind; /* index for next pipeack sample */
+ uint16_t iaj_pktcnt; /* packet count, to avoid throttling initially */
+ uint16_t acc_iaj; /* Accumulated iaj */
+ tcp_seq iaj_rwintop; /* recent max advertised window */
+ uint32_t avg_iaj; /* Mean */
+ uint32_t std_dev_iaj; /* Standard deviation */
+#endif /* TRAFFIC_MGT */
+ struct bwmeas *t_bwmeas; /* State for bandwidth measurement */
+ uint32_t t_lropktlen; /* Bytes in a LRO frame */
+ tcp_seq t_idleat; /* rcv_nxt at idle time */
+ TAILQ_ENTRY(tcpcb) t_twentry; /* link for time wait queue */
+ struct tcp_ccstate *t_ccstate; /* congestion control related state */
+/* Tail loss probe related state */
+ tcp_seq t_tlphighrxt; /* snd_nxt after PTO */
+ u_int32_t t_tlpstart; /* timestamp at PTO */
+/* DSACK data receiver state */
+ tcp_seq t_dsack_lseq; /* DSACK left sequence */
+ tcp_seq t_dsack_rseq; /* DSACK right sequence */
+/* DSACK data sender state */
+ SLIST_HEAD(tcp_rxt_seghead, tcp_rxt_seg) t_rxt_segments;
+ tcp_seq t_dsack_lastuna; /* snd_una when last recovery episode started */
+/* state for congestion window validation (draft-ietf-tcpm-newcwv-07) */
+#define TCP_PIPEACK_SAMPLE_COUNT 3
+ u_int32_t t_pipeack_sample[TCP_PIPEACK_SAMPLE_COUNT]; /* pipeack, bytes acked within RTT */
+ tcp_seq t_pipeack_lastuna; /* una when pipeack measurement started */
+ u_int32_t t_pipeack;
+ u_int32_t t_lossflightsize;
+
+#if MPTCP
+ u_int32_t t_mpflags; /* flags for multipath TCP */
+
+#define TMPF_PREESTABLISHED 0x00000001 /* conn in pre-established state */
+#define TMPF_SENT_KEYS 0x00000002 /* indicates that keys were sent */
+#define TMPF_MPTCP_TRUE 0x00000004 /* negotiated MPTCP successfully */
+#define TMPF_MPTCP_RCVD_KEY 0x00000008 /* state for 3-way handshake */
+#define TMPF_SND_MPPRIO 0x00000010 /* send priority of subflow */
+#define TMPF_SND_REM_ADDR 0x00000020 /* initiate address removal */
+#define TMPF_UNUSED 0x00000040 /* address addition acked by peer */
+#define TMPF_JOINED_FLOW 0x00000080 /* Indicates additional flow */
+#define TMPF_BACKUP_PATH 0x00000100 /* Indicates backup path */
+#define TMPF_MPTCP_ACKNOW 0x00000200 /* Send Data ACK */
+#define TMPF_SEND_DSN 0x00000400 /* Send DSN mapping */
+#define TMPF_SEND_DFIN 0x00000800 /* Send Data FIN */
+#define TMPF_RECV_DFIN 0x00001000 /* Recv Data FIN */
+#define TMPF_SENT_JOIN 0x00002000 /* Sent Join */
+#define TMPF_RECVD_JOIN 0x00004000 /* Received Join */
+#define TMPF_RESET 0x00008000 /* Send RST */
+#define TMPF_TCP_FALLBACK 0x00010000 /* Fallback to TCP */
+#define TMPF_FASTCLOSE 0x00020000 /* Send Fastclose option */
+#define TMPF_EMBED_DSN 0x00040000 /* tp has DSN mapping */
+#define TMPF_MPTCP_READY 0x00080000 /* Can send DSS options on data */
+#define TMPF_INFIN_SENT 0x00100000 /* Sent infinite mapping */
+#define TMPF_SND_MPFAIL 0x00200000 /* Received mapping csum failure */
+#define TMPF_FASTJOIN_SEND 0x00400000 /* Fast join early data send */
+#define TMPF_FASTJOINBY2_SEND 0x00800000 /* Fast join send after 3 WHS */
+#define TMPF_MPCAP_RETRANSMIT 0x01000000 /* Retransmission of 3rd ACK */
+
+ tcp_seq t_mpuna; /* unacknowledged sequence */
+ void *t_mptcb; /* pointer to MPTCP TCB */
+ struct mpt_dsn_map t_rcv_map; /* Receive mapping list */
+ u_int8_t t_local_aid; /* Addr Id for authentication */
+ u_int8_t t_rem_aid; /* Addr ID of another subflow */
+ u_int8_t t_mprxtshift; /* join retransmission */
+#endif /* MPTCP */
+
+#define TFO_F_OFFER_COOKIE 0x01 /* We will offer a cookie */
+#define TFO_F_COOKIE_VALID 0x02 /* The received cookie is valid */
+#define TFO_F_COOKIE_REQ 0x04 /* Client requested a new cookie */
+#define TFO_F_COOKIE_SENT 0x08 /* Client did send a cookie in the SYN */
+#define TFO_F_SYN_LOSS 0x10 /* A SYN-loss triggered a fallback to regular TCP on the client-side */
+#define TFO_F_NO_RCVPROBING 0x20 /* This network is guaranteed to support TFO in the downstream direction */
+#define TFO_F_NO_SNDPROBING 0x40 /* This network is guaranteed to support TFO in the upstream direction */
+ u_int8_t t_tfo_flags;
+#define TFO_S_SYNDATA_RCV 0x01 /* SYN+data has been received */
+#define TFO_S_COOKIEREQ_RECV 0x02 /* TFO-cookie request received */
+#define TFO_S_COOKIE_SENT 0x04 /* TFO-cookie announced in SYN/ACK */
+#define TFO_S_COOKIE_INVALID 0x08 /* Received TFO-cookie is invalid */
+#define TFO_S_COOKIE_REQ 0x10 /* TFO-cookie requested within the SYN */
+#define TFO_S_COOKIE_RCV 0x20 /* TFO-cookie received in SYN/ACK */
+#define TFO_S_SYN_DATA_SENT 0x40 /* SYN+data sent */
+#define TFO_S_SYN_DATA_ACKED 0x80 /* SYN+data has been acknowledged in SYN/ACK */
+#define TFO_S_SYN_LOSS 0x0100 /* SYN+TFO has been lost - fallback to regular TCP */
+ u_int16_t t_tfo_stats;
+
+ u_int8_t t_tfo_probes; /* TFO-probes we did send */
+/*
+ * This here is the TFO-probing state-machine. Transitions are as follows:
+ *
+ * Current state: PROBE_NONE
+ * Event: SYN+DATA acknowledged
+ * Action: Transition to PROBE_PROBING and set keepalive-timer
+ *
+ * Current state: PROBE_PROBING (initial state)
+ * Event: Receive data
+ * Action: Transition to PROBE_NONE and cancel keepalive-timer
+ * Event: Receive ACK that does not indicate a hole
+ * Action: Transition to PROBE_NONE and cancel keepalive-timer
+ * Event: Receive ACK that indicates a hole
+ * Action: Transition to PROBE_WAIT_DATA and set a short timer
+ * to wait for the final segment.
+ * Event: Keepalive-timeout (did not receive any segment)
+ * Action: Signal ETIMEDOUT as with regular keepalive-timers
+ *
+ * Current state: PROBE_WAIT_DATA
+ * Event: Receive data
+ * Action: Transition to PROBE_NONE and cancel keepalive-timer
+ * Event: Data-timeout (did not receive the expected data)
+ * Action: Signal ENODATA up to the app and close everything.
+ */
+#define TFO_PROBE_NONE 0 /* Not probing now */
+#define TFO_PROBE_PROBING 1 /* Sending out TCP-keepalives waiting for reply */
+#define TFO_PROBE_WAIT_DATA 2 /* Received reply, waiting for data */
+ u_int8_t t_tfo_probe_state;
+};
+
+#define IN_FASTRECOVERY(tp) (tp->t_flags & TF_FASTRECOVERY)
+#define SACK_ENABLED(tp) (tp->t_flagsext & TF_SACK_ENABLE)
+
+/*
+ * If the connection is in a throttled state due to advisory feedback from
+ * the interface output queue, reset that state. We do this in favor
+ * of entering recovery because the data transfer during recovery
+ * should be just a trickle and it will help to improve performance.
+ * We also do not want to back off twice in the same RTT.
+ */
+#define ENTER_FASTRECOVERY(_tp_) do { \
+ (_tp_)->t_flags |= TF_FASTRECOVERY; \
+ if (INP_IS_FLOW_CONTROLLED((_tp_)->t_inpcb)) \
+ inp_reset_fc_state((_tp_)->t_inpcb); \
+ if (!SLIST_EMPTY(&tp->t_rxt_segments)) \
+ tcp_rxtseg_clean(tp); \
+} while(0)
+
+#define EXIT_FASTRECOVERY(_tp_) do { \
+ (_tp_)->t_flags &= ~TF_FASTRECOVERY; \
+ (_tp_)->t_dupacks = 0; \
+ (_tp_)->t_rexmtthresh = tcprexmtthresh; \
+ (_tp_)->t_bytes_acked = 0; \
+ (_tp_)->ecn_flags &= ~TE_INRECOVERY; \
+ (_tp_)->t_timer[TCPT_PTO] = 0; \
+ (_tp_)->t_flagsext &= ~TF_RESCUE_RXT; \
+ (_tp_)->t_lossflightsize = 0; \
+} while(0)
+
+/*
+ * When the number of duplicate acks received is less than
+ * the retransmit threshold, use Limited Transmit algorithm
+ */
+extern int tcprexmtthresh;
+#define ALLOW_LIMITED_TRANSMIT(_tp_) \
+ ((_tp_)->t_dupacks > 0 && \
+ (_tp_)->t_dupacks < (_tp_)->t_rexmtthresh && \
+ ((_tp_)->t_flagsext & (TF_PKTS_REORDERED|TF_DELAY_RECOVERY)) \
+ != (TF_PKTS_REORDERED|TF_DELAY_RECOVERY))
+
+/*
+ * This condition is true is timestamp option is supported
+ * on a connection.
+ */
+#define TSTMP_SUPPORTED(_tp_) \
+ (((_tp_)->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP)) == \
+ (TF_REQ_TSTMP|TF_RCVD_TSTMP))
+
+/*
+ * This condition is true if window scale option is supported
+ * on a connection
+ */
+#define TCP_WINDOW_SCALE_ENABLED(_tp_) \
+ (((_tp_)->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == \
+ (TF_RCVD_SCALE|TF_REQ_SCALE))
+
+/* Is ECN enabled end-to-end */
+#define TCP_ECN_ENABLED(_tp_) \
+ (((_tp_)->ecn_flags & (TE_ECN_ON)) == (TE_ECN_ON))
+
+/*
+ * Gives number of bytes acked by this ack
+ */
+#define BYTES_ACKED(_th_, _tp_) \
+ ((_th_)->th_ack - (_tp_)->snd_una)
+
+/* Returns true if a DSACK option should be sent */
+#define TCP_SEND_DSACK_OPT(_tp_) \
+ ((_tp_)->t_dsack_lseq > 0 && (_tp_)->t_dsack_rseq > 0)
+
+/* Check if DSACK option should be processed */
+#define TCP_DSACK_ENABLED(tp) (tcp_dsack_enable == 1 && \
+ !(tp->t_flagsext & TF_DISABLE_DSACK))
+
+/*
+ * Returns true if a DSACK sequence is within the max send window that will
+ * be accepted. In order to set a window to validate sequence numbers, the
+ * max send window within which a DSACK option is processed is limited.
+ *
+ * We need to choose a maximum window to check if the sequence number is
+ * within the window. One arbitrary choice is 256 * MSS because if the
+ * window is as large as 256 segments it might be big enough to ignore the
+ * DSACK option. Choosing a much larger limit means that the memory for
+ * retransmit segments can be held for a longer time.
+ */
+#define TCP_DSACK_MAX_SEND_WINDOW(_tp_) ((_tp_)->t_maxseg << 8)
+#define TCP_DSACK_SEQ_IN_WINDOW(_tp_, _seq_, _una_) \
+ (SEQ_LEQ((_seq_), (_tp_)->snd_max) && \
+ SEQ_GEQ((_seq_), ((_una_) - TCP_DSACK_MAX_SEND_WINDOW(_tp_))))
+
+
+enum tcp_cc_event {
+ TCP_CC_CWND_INIT, /* 0 */
+ TCP_CC_INSEQ_ACK_RCVD, /* 1 */
+ TCP_CC_ACK_RCVD, /* 2 */
+ TCP_CC_ENTER_FASTRECOVERY, /* 3 */
+ TCP_CC_IN_FASTRECOVERY, /* 4 */
+ TCP_CC_EXIT_FASTRECOVERY, /* 5 */
+ TCP_CC_PARTIAL_ACK, /* 6 */
+ TCP_CC_IDLE_TIMEOUT, /* 7 */
+ TCP_CC_REXMT_TIMEOUT, /* 8 */
+ TCP_CC_ECN_RCVD, /* 9 */
+ TCP_CC_BAD_REXMT_RECOVERY, /* 10 */
+ TCP_CC_OUTPUT_ERROR, /* 11 */
+ TCP_CC_CHANGE_ALGO, /* 12 */
+ TCP_CC_FLOW_CONTROL, /* 13 */
+ TCP_CC_SUSPEND, /* 14 */
+ TCP_CC_LIMITED_TRANSMIT, /* 15 */
+ TCP_CC_EARLY_RETRANSMIT, /* 16 */
+ TCP_CC_TLP_RECOVERY, /* 17 */
+ TCP_CC_TLP_RECOVER_LASTPACKET, /* 18 */
+ TCP_CC_DELAY_FASTRECOVERY, /* 19 */
+ TCP_CC_TLP_IN_FASTRECOVERY, /* 20 */
+ TCP_CC_DSACK_BAD_REXMT /* 21 */